def dividetask(data,task,silent=True): data=mpi.broadcast(mpi.world,data,0) nProcs = mpi.world.size chunkSize=len(data)//nProcs extraBits =len(data)%nProcs res=[] allRes=[] # the root node handles the extra pieces: if mpi.world.rank == 0: for i in range(extraBits): elem=data[i] res.append(task(elem)) if not silent: logger.info('task(%d) done %d'%(mpi.world.rank,i+1)) pos=extraBits+mpi.world.rank*chunkSize; for i in range(chunkSize): elem=data[pos] pos += 1 res.append(task(elem)) if not silent: logger.info('task(%d) done %d'%(mpi.world.rank,i+1)) if mpi.world.rank==0: tmp=mpi.gather(mpi.world,res,0) for res in tmp: allRes.extend(res) else: mpi.gather(mpi.world,res,0) return allRes
def dividetask(data, task, silent=True): data = mpi.broadcast(mpi.world, data, 0) nProcs = mpi.world.size chunkSize = len(data) // nProcs extraBits = len(data) % nProcs res = [] allRes = [] # the root node handles the extra pieces: if mpi.world.rank == 0: for i in range(extraBits): elem = data[i] res.append(task(elem)) if not silent: logger.info('task(%d) done %d' % (mpi.world.rank, i + 1)) pos = extraBits + mpi.world.rank * chunkSize for i in range(chunkSize): elem = data[pos] pos += 1 res.append(task(elem)) if not silent: logger.info('task(%d) done %d' % (mpi.world.rank, i + 1)) if mpi.world.rank == 0: tmp = mpi.gather(mpi.world, res, 0) for res in tmp: allRes.extend(res) else: mpi.gather(mpi.world, res, 0) return allRes
def RandomA(N, zeroRatio=0.9): A = rand(N*N).reshape(N,N) for row in range(N): for col in range(N): if A[row,col] < zeroRatio and row!=col: A[row, col] = 0 A = mpi.broadcast(mpi.world, A, 0) if mpi.world.rank == 0: print "A = ", A return A
def tick(self): """Record data points from each added L{LogQuantity}. May also checkpoint data to disk, and/or synchronize data points to the head rank. """ start_time = time() def insert_datapoint(name, value): if value is None: return self.get_table(name).insert_row( (self.tick_count, self.rank, value)) if self.db_conn is not None: self.db_conn.execute("insert into %s values (?,?,?)" % name, (self.tick_count, self.rank, value)) for gd in self.gather_descriptors: if self.tick_count % gd.interval == 0: q_value = gd.quantity() if isinstance(gd.quantity, MultiLogQuantity): for name, value in zip(gd.quantity.names, q_value): insert_datapoint(name, value) else: insert_datapoint(gd.quantity.name, q_value) self.tick_count += 1 end_time = time() # print watches if self.tick_count == self.next_watch_tick: self._watch_tick() if self.tick_count == self.next_sync_tick: # sync every few seconds: self.save() # figure out next sync tick, broadcast to peers ticks_per_10_sec = 10*self.tick_count/max(1, end_time-self.start_time) self.next_sync_tick = self.tick_count + int(max(50, ticks_per_10_sec)) if self.mpi_comm is not None: from boost.mpi import broadcast self.next_sync_tick = broadcast(self.mpi_comm, self.next_sync_tick, self.head_rank) self.t_log = time() - start_time
def do_training(indices, training_blob, heldout_blob, weights, weights_out, debiasing_weights): """ Helper/wrapper function for parallel perceptron training. Runs one epoch of perceptron training and reports current accuracy on training data and on heldout data. """ # Under de-biasing mode, we only allow features present in a given initial # weight vector. These are features that have been "selected" under a previously # run regularized training scheme. valid_feature_names = None if FLAGS.debiasing: valid_feature_names = getFeatureNames(debiasing_weights) for epoch in range(FLAGS.maxepochs): # Randomize order of examples; broadcast this randomized order to all processes. # The particular subset any perceptron process gets for this epoch is dependent # upon this randomized ordering. if myRank == 0 and FLAGS.shuffle: random.shuffle(indices) indices = mpi.broadcast(value=indices, root=0) ################################################## # SEARCH: Find 1-best under current model ################################################## # Run one epoch over training data io_helper.write_master("===EPOCH %d TRAINING===\n" %(epoch)) newWeights_avg = perceptron_parallel(epoch, indices, training_blob, weights, valid_feature_names) #################################### # Dump weights for this iteration #################################### if myRank == 0: cPickle.dump(newWeights_avg, weights_out, protocol=cPickle.HIGHEST_PROTOCOL) # Need to flush output somehow here. Does weights_out.flush() work? weights_out.flush() ################################################## # Try a corpus re-decode here with the new weights # This returns a HELDOUT F-SCORE ################################################## # Decode dev data with same new learned weight vector if FLAGS.decodeheldout: io_helper.write_master("===EPOCH %d DECODE HELDOUT===\n" %(epoch)) decode_parallel(newWeights_avg, indices_dev, heldout_blob, "dev") if myRank == 0: weights_out.close()
def _watch_tick(self): def get_last_value(table): if table: return table.data[-1][2] else: return 0 data_block = dict((qname, get_last_value(self.get_table(qname))) for qname in self.quantity_data.iterkeys()) if self.mpi_comm is not None: from boost.mpi import broadcast, gather gathered_data = gather(self.mpi_comm, data_block, self.head_rank) else: gathered_data = [data_block] if self.rank == self.head_rank: values = {} for data_block in gathered_data: for name, value in data_block.iteritems(): values.setdefault(name, []).append(value) if self.watches: print " | ".join( "%s=%g" % (watch.expr, watch.compiled( *[dd.agg_func(values[dd.name]) for dd in watch.dep_data])) for watch in self.watches ) ticks_per_sec = self.tick_count/max(1, time()-self.start_time) self.next_watch_tick = self.tick_count + int(max(1, ticks_per_sec)) if self.mpi_comm is not None: self.next_watch_tick = broadcast(self.mpi_comm, self.next_watch_tick, self.head_rank)
numbersjob.args = caps, numbers, caps in ["B", "D"] numbersjob["indict"] = "indicted" numbersjob["something"] = "else" # loads dictionary else: job_dictionary = jobs.load(options.loadme, comm=world) # saves dictionary if options.saveme is not None: jobs.save(jobfolder=job_dictionary, path=options.saveme, overwrite=True, comm=world) # writes pbs stuff. if options.pbs is not None and world.rank == 0: jobs.pbs_script( outdir="results", jobfolder=job_dictionary, pools=options.pools,\ queue="regular", mppwidth=world.size, python_path=getcwd() ) # Computes all jobs. if options.loadme is None and options.saveme is None and options.pbs is None: for outdir, job in job_dictionary.iteritems(): # launch jobs and stores result result = job.compute(outdir=join('results', outdir)) # Root process of pool prints result. if local_comm.rank == 0: print result, "\n" # Executes jobs using jobs.bleed elif options.loadme is not None: for job, outdir in jobs.bleed(options.loadme, outdir=options.pbs, comm=local_comm): # decides on the same waittime for all processes. waittime = broadcast(local_comm, random.randint(0,2), 0) # launch jobs and stores result result = job.compute(outdir=outdir, wait=waittime) # Root process of pool prints result. if local_comm.rank == 0: print result, "\n"
def perceptron_parallel(epoch, indices, blob, weights = None, valid_feature_names = None): """ Implements parallelized version of perceptron training for structured outputs (Collins, 2002; McDonald, 2010). """ # Which processor am I? myRank = mpi.rank # Let processor 0 be the master. masterRank = 0 # How many processors are there? nProcs = mpi.size ########################################## # Keep track of time to train this epoch ########################################## startTime = time.time() # Restart with weights from last epoch or 0. # Will ignore any weights passed during function call. weights_restart_filename = '%s/training-restart.%s' % (tmpdir, str(mpi.rank)) if os.path.isfile(weights_restart_filename): weights_restart_file = open(weights_restart_filename, 'r') weights = cPickle.load(weights_restart_file) weights_restart_file.close() else: # If weights passed during function call is None start with empty. if weights is None or len(weights) == 0: weights = svector.Vector() # Restart with previous running weight sum, also. weights_sum_filename = '%s/training.%s' % (tmpdir, str(mpi.rank)) if os.path.isfile(weights_sum_filename): weights_sum_file = open(weights_sum_filename, 'r') weights_sum = cPickle.load(weights_sum_file) weights_sum_file.close() else: weights_sum = svector.Vector() numChanged = 0 done = False for i, instanceID in enumerate(indices[:FLAGS.subset]): if myRank == i % nProcs: # Assign the current instances we will look at f = blob['f_instances'][instanceID] e = blob['e_instances'][instanceID] etree = blob['etree_instances'][instanceID] gold_str = blob['gold_instances'][instanceID] inverse = None if FLAGS.inverse is not None: inverse = blob['inverse_instances'][instanceID] a1 = None if FLAGS.a1 is not None: a1 = blob['a1_instances'][instanceID] a2 = None if FLAGS.a2 is not None: a2 = blob['a2_instances'][instanceID] ftree = None if FLAGS.ftrees is not None: ftree = blob['ftree_instances'][instanceID] # Preprocess input data # f, e are sequences of words f = f.split() ; e = e.split() # gold is a sequence of f-e link pairs gold = Alignment.Alignment(gold_str) # Initialize model for this instance model = GridAlign.Model(f, e, etree, ftree, instanceID, weights, a1, a2, inverse, LOCAL_FEATURES=blob['localFeatures'], NONLOCAL_FEATURES=blob['nonlocalFeatures'], FLAGS=FLAGS) model.gold = gold # Initialize model with data tables model.pef = blob['pef'] model.pfe = blob['pfe'] # Align the current training instance model.align() ###################################################################### # Weight updating ###################################################################### LEARNING_RATE = FLAGS.learningrate # Set the oracle item oracle = None if FLAGS.oracle == 'gold': oracle = model.oracle elif FLAGS.oracle == 'hope': oracle = model.hope else: sys.stderr.write("ERROR: Unknown oracle class: %s\n" %(FLAGS.oracle)) # Set the hypothesis item hyp = None if FLAGS.hyp == '1best': hyp = model.modelBest elif FLAGS.hyp == 'fear': hyp = model.fear else: sys.stderr.write("ERROR: Unknown hyp class: %s\n" %(FLAGS.hyp)) # Debiasing if FLAGS.debiasing: validate_features(oracle.scoreVector, valid_feature_names) validate_features(hyp.scoreVector, valid_feature_names) deltas = None if set(hyp.links) != set(oracle.links): numChanged += 1 ############################################################### # WEIGHT UPDATES ################################################################ deltas = oracle.scoreVector - hyp.scoreVector weights = weights + LEARNING_RATE*deltas # Even if we didnt update, the current weight vector should count towards the sum! weights_sum += weights # L1 Projection step # if w in [-tau, tau], w -> 0 # else, move w closer to 0 by tau. if FLAGS.tau is not None: for index, w in weights_sum.iteritems(): if w == 0: del weights_sum[index] continue if index[-3:] == '_nb': continue if w > 0 and w <= FLAGS.tau and not FLAGS.negreg: del weights_sum[index] elif w < 0 and w >= (FLAGS.tau * -1): del weights_sum[index] elif w > 0 and w > FLAGS.tau and not FLAGS.negreg: weights_sum[index] -= FLAGS.tau elif w < 0 and w < (FLAGS.tau * -1): weights_sum[index] += FLAGS.tau # Set uniq pickled output file for this process # Holds sum of weights over each iteration for this process output_filename = "%s/training.%s" %(tmpdir, str(mpi.rank)) output_file = open(output_filename,'w') # Dump all weights used during this node's run; to be averaged by master along with others cPickle.dump(weights_sum, output_file, protocol=cPickle.HIGHEST_PROTOCOL) output_file.close() # Remeber just the last weights used for this process; start here next epoch. output_filename_last_weights = "%s/training-restart.%s" %(tmpdir, str(mpi.rank)) output_file_last_weights = open(output_filename_last_weights,'w') cPickle.dump(weights, output_file_last_weights, protocol=cPickle.HIGHEST_PROTOCOL) output_file_last_weights.close() ############################################# # Gather "done" messages from workers ############################################# # Synchronize done = mpi.gather(value=True,root=0) ##################################################################################### # Compute f-measure over all alignments ##################################################################################### masterWeights = svector.Vector() if myRank == masterRank: # Read pickled output for rank in range(nProcs): input_filename = tmpdir+'/training.'+str(rank) input_file = open(input_filename,'r') masterWeights += cPickle.load(input_file) input_file.close() sys.stderr.write("Done reading data.\n") sys.stderr.write("len(masterWeights)= %d\n"%(len(masterWeights))) sys.stderr.flush() ###################################################### # AVERAGED WEIGHTS ###################################################### sys.stderr.write("[%d] Averaging weights.\n" %(mpi.rank)) sys.stderr.flush() masterWeights = masterWeights / (len(indices) * (epoch+1)) # Dump master weights to file # There is only one weight vector in this file at a time. mw = robustWrite(tmpdir+'/weights') cPickle.dump(masterWeights,mw,protocol=cPickle.HIGHEST_PROTOCOL) mw.close() ###################################################################### # All processes read and load new averaged weights ###################################################################### # But make sure worker nodes don't attempt to read from the weights # file before the root node has written it. # Sync-up with a blocking broadcast call ready = mpi.broadcast(value=True, root=0) mw = robustRead(tmpdir+'/weights') masterWeights = cPickle.load(mw) mw.close() ###################################################################### # Print report for this iteration ###################################################################### elapsedTime = time.time() - startTime if myRank == masterRank: # masterRank is printing elapsed time. # May differ at each node. sys.stderr.write("Time: %0.2f\n" %(elapsedTime)) sys.stderr.write("[%d] Finished training.\n" %(mpi.rank)) return masterWeights
a2_dev_instances = [] gold_dev_instances = [] inverse_dev_instances = [] tmpdir = None if mpi.rank == 0: base_tempdir = None if FLAGS.tempdir is not None: base_tempdir = FLAGS.tempdir else: base_tempdir = tempfile.gettempdir() if base_tempdir is None: base_tempdir = "." tmpdir = tempfile.mkdtemp(prefix='align-'+str(os.getpid())+'-', dir=base_tempdir) tmpdir = mpi.broadcast(value=tmpdir, root=0) ################################################ # Load training examples ################################################ for f, e, etree in izip(file_handles['f'], file_handles['e'], file_handles['etrees']): f_instances.append(f.strip()) e_instances.append(e.strip()) etree_instances.append(etree.strip()) indices = range(len(e_instances)) ################################################ # Load held-out dev examples ################################################
''' Created on Nov 11, 2010 @author: joel ''' import boost.mpi as mpi import numpy as np from time import time N = 1000000 #a = np.arange(N, dtype=float) a = [np.arange(10) for i in range(N/10)] if mpi.world.rank == 0: ts = [] ts.append(time()) mpi.broadcast(mpi.world, a, root=0) ts.append(time()) g = mpi.gather(mpi.world, a, root=0) ts.append(time()) ta = np.array(ts) print "Times taken: ",(ta[1:] - ta[:-1]) else: mpi.broadcast(mpi.world, a, root=0) mpi.gather(mpi.world, a, root=0)
def test(): A =[[ 1, 2, 3, 0, 4, 0 ], \ [ 2, 5, 6, 7, 8, 9 ], \ [ 3, 6, 10, 11, 12, 0 ], \ [ 0, 7, 11, 13, 14, 15 ], \ [ 4, 8, 12, 14, 16, 17 ], \ [ 0, 9, 0, 16, 17, 18 ]] #A = RandomA(100, zeroRatio=0.95) #A = A + A.transpose() A = array(A) N = A.shape[0] indexPairs = [] for row in range(N): for col in range(N): if A[row, col] != 0: indexPairs.append([row, col]) indexPairs = array(indexPairs) def matrix(row, col): return A[row, col] #Create in-vector psi = rand(N) + 0.0j #psi = r_[:N] + 0.0j #send psi from proc0 to everyone psi = mpi.broadcast(mpi.world, psi, 0) #output refOutput = dot(A, psi) #Create local vectors and matrices localSize = GetDistributedShape(N, ProcCount, ProcId) globalStartIndex = GetGlobalStartIndex(N, ProcCount, ProcId) globalEndIndex = globalStartIndex+localSize localPsi = psi[globalStartIndex:globalEndIndex] localRefOutput = refOutput[globalStartIndex:globalEndIndex] distrib = DistributedModelTest() rank = 0 localMatrix, stepList = SetupSimpleDistributedMatrix(matrix, N, indexPairs, distrib, rank) localMatrixIndex, globalRow, globalCol, globalSendProc, recvProcList, recvLocalRowList, recvCount = StepListToArray(stepList) print "SHAPE= ", recvLocalRowList.shape if ProcId == 10: print recvProcList #if ProcId == 0: # print indexPairs localTestOutput = zeros(localSize, dtype=complex) for i in range(ProcCount): mpi.world.barrier() if i == ProcId: #if i == 0: # print "psi =", psi print "ProcId == %i" % (i) print localSize print globalStartIndex, " -> ", globalEndIndex print "%i steps, %i nonzero steps" % (len(stepList), len([1 for step in stepList if step.LocalMatrixIndex!=-1])) print "max recv length = %i" % (max([len(step.RecvProcList) for step in stepList])) #for step in stepList: # print step print "" mpi.world.barrier() #SimpleDistributedMatrixVectorMultiply(localMatrix, N, stepList, localPsi, localTestOutput, mpi.world) pyprop.core.TensorPotentialMultiply_SimpD(localMatrix, 1.0, localPsi, localTestOutput, N, localMatrixIndex, globalRow, globalCol, globalSendProc, recvProcList, recvLocalRowList, recvCount) #the verdict for i in range(ProcCount): if i == ProcId: #if i == 0: # print "" # print refOutput # print "" print "ProcId == %i" % (i) print sqrt(sum(abs(localRefOutput)**2)) print sqrt(sum(abs(localTestOutput)**2)) print sqrt(sum(abs(localRefOutput - localTestOutput)**2)) #print localTestOutput #print localRefOutput print "" mpi.world.barrier()