Example #1
0
 # Calculate similarity values for given article
 sim_matrix = []
 if version.lower() == 'p':
     if rank == 0:
         for i in range(1,size):
             comm.send(1, dest = i)
         sim_matrix = sorted(enumerate(Process.main_parallel_sim(comm, ind, abstracts, type, mattype)), key=lambda ind:ind[1])
     else:
         tosend = comm.recv(source = 0)
         if tosend == 1:
             Process.main_parallel_sim(comm, ind, abstracts, type, mattype)
         elif tosend == 0:
             sys.exit()
 else:
     if rank == 0:
         sim_matrix = sorted(enumerate(Process.main_serial_sim(comm, ind, abstracts, type, mattype)), key=lambda ind:ind[1])
     else:
         sys.exit()
 # print 5 most similar articles
 if rank == 0:
     print "Similar articles:\n"
     setabs = 0
     while True:
         if len(sim_matrix)/5 <= setabs+1:
             print "No more articles to see ..."
             if version.lower() == 'p':
                 for i in range(1,size):
                     comm.send(0, dest = i)
             sys.exit("Thanks for visiting!")
         for i in range(5):
             ind, val = sim_matrix[i+setabs*5+1]
Example #2
0
        sfreqend = time.time()

        # create dict of tfidf
        stfidfstart = time.time()
        Process.serial_tfidf(abstracts, 'bow', termbow, len(bigramdict))
        Process.serial_tfidf(abstracts, 'bigram', termbigram)
        stfidfend = time.time()

        # do some topic modeling
        stopicstart = time.time()
        Process.serial_topics(abstracts, Process.numtopics)
        stopicend = time.time()

        # test similarity
        ssimbowstart = MPI.Wtime()
        Process.main_serial_sim(comm, 0, abstracts, 'bow', 'cossim')
        ssimbowend = MPI.Wtime()
        ssimbigramstart = MPI.Wtime()
        Process.main_serial_sim(comm, 0, abstracts, 'bigram', 'cossim')
        ssimbigramend = MPI.Wtime()
        ssimjacstart = MPI.Wtime()
        Process.main_serial_sim(comm, 0, abstracts, 'bow', 'jaccard')
        ssimjacend = MPI.Wtime()

        # print times
        print "Serial times"
        print "Load time: %f secs" % (sloadend - sloadstart)
        print "Create dictionary time: %f secs" % (sdictend - sdictstart)
        print "Clean text time: %f secs" % (scleanend - scleanstart)
        print "Frequency time: %f secs" % (sfreqend - sfreqstart)
        print "TF-IDF time: %f secs" % (stfidfend - stfidfstart)