# Calculate similarity values for given article sim_matrix = [] if version.lower() == 'p': if rank == 0: for i in range(1,size): comm.send(1, dest = i) sim_matrix = sorted(enumerate(Process.main_parallel_sim(comm, ind, abstracts, type, mattype)), key=lambda ind:ind[1]) else: tosend = comm.recv(source = 0) if tosend == 1: Process.main_parallel_sim(comm, ind, abstracts, type, mattype) elif tosend == 0: sys.exit() else: if rank == 0: sim_matrix = sorted(enumerate(Process.main_serial_sim(comm, ind, abstracts, type, mattype)), key=lambda ind:ind[1]) else: sys.exit() # print 5 most similar articles if rank == 0: print "Similar articles:\n" setabs = 0 while True: if len(sim_matrix)/5 <= setabs+1: print "No more articles to see ..." if version.lower() == 'p': for i in range(1,size): comm.send(0, dest = i) sys.exit("Thanks for visiting!") for i in range(5): ind, val = sim_matrix[i+setabs*5+1]
sfreqend = time.time() # create dict of tfidf stfidfstart = time.time() Process.serial_tfidf(abstracts, 'bow', termbow, len(bigramdict)) Process.serial_tfidf(abstracts, 'bigram', termbigram) stfidfend = time.time() # do some topic modeling stopicstart = time.time() Process.serial_topics(abstracts, Process.numtopics) stopicend = time.time() # test similarity ssimbowstart = MPI.Wtime() Process.main_serial_sim(comm, 0, abstracts, 'bow', 'cossim') ssimbowend = MPI.Wtime() ssimbigramstart = MPI.Wtime() Process.main_serial_sim(comm, 0, abstracts, 'bigram', 'cossim') ssimbigramend = MPI.Wtime() ssimjacstart = MPI.Wtime() Process.main_serial_sim(comm, 0, abstracts, 'bow', 'jaccard') ssimjacend = MPI.Wtime() # print times print "Serial times" print "Load time: %f secs" % (sloadend - sloadstart) print "Create dictionary time: %f secs" % (sdictend - sdictstart) print "Clean text time: %f secs" % (scleanend - scleanstart) print "Frequency time: %f secs" % (sfreqend - sfreqstart) print "TF-IDF time: %f secs" % (stfidfend - stfidfstart)