def main_parallel_sim(comm, absind, abstracts, type, mattype): ''' MPI implementation to find similarity for the mattype (cosine or jaccard distance) between a given abstract (given by id, absind) and all abstracts based on their "type" values ''' rank = comm.Get_rank() if rank == 0: #print "Parallel version: Similarity matrices" simvalues = Similar.master(comm, absind, abstracts, type, mattype) return simvalues else: Similar.slave(comm)
print "Send abstract time: %f secs" % (pabsend - pabsstart) print "Frequency time: %f secs" % (pfreqend - pfreqstart) print "Send abs, terms time: %f secs" % (psendend - psendstart) print "TF-IDF time: %f secs" % (ptfidfend - ptfidfstart) print "Topic modelling time: %f secs" % (ptopicend - ptopicstart) print "Cosine similarity, bag of words time: %f secs" % (psimbowend - psimbowstart) print "Cosine similarity, bigrams time: %f secs" % (psimbigramend - psimbigramstart) print "Jaccard similarity, bag of words time: %f secs" % (psimjacend - psimjacstart) print "\n" target = open(filename[:-4]+"processed", 'w') abstractpickle = pickle.dumps(abstracts) target.write(abstractpickle) else: Process.slave(comm) Similar.slave(comm) Similar.slave(comm) Similar.slave(comm) # Test scatter-gather implementation if rank == 0: starttime = MPI.Wtime() abstracts = Process.main_mpi(comm, filename) if rank == 0: endtime = MPI.Wtime() print "Scatter-gather MPI time: %f secs" % (starttime - endtime) target = open(filename[:-4]+"processed", 'w') abstractpickle = pickle.dumps(abstracts) target.write(abstractpickle) # Serial testing