예제 #1
0
파일: process.py 프로젝트: wsun/abstracts
def main_parallel_sim(comm, absind, abstracts, type, mattype):
    '''
    MPI implementation to find similarity for the mattype (cosine or jaccard 
    distance) between a given abstract (given by id, absind) and all abstracts
    based on their "type" values
    '''
    rank = comm.Get_rank()
    if rank == 0:
        #print "Parallel version: Similarity matrices"
        simvalues = Similar.master(comm, absind, abstracts, type, mattype)
        return simvalues
    else:
        Similar.slave(comm)
예제 #2
0
        print "Send abstract time: %f secs" % (pabsend - pabsstart)
        print "Frequency time: %f secs" % (pfreqend - pfreqstart)
        print "Send abs, terms time: %f secs" % (psendend - psendstart)
        print "TF-IDF time: %f secs" % (ptfidfend - ptfidfstart)
        print "Topic modelling time: %f secs" % (ptopicend - ptopicstart)
        print "Cosine similarity, bag of words time: %f secs" % (psimbowend - psimbowstart)
        print "Cosine similarity, bigrams time: %f secs" % (psimbigramend - psimbigramstart)
        print "Jaccard similarity, bag of words time: %f secs" % (psimjacend - psimjacstart)
        print "\n"

        target = open(filename[:-4]+"processed", 'w')
        abstractpickle = pickle.dumps(abstracts)
        target.write(abstractpickle)
    else:    
        Process.slave(comm)
        Similar.slave(comm)
        Similar.slave(comm)
        Similar.slave(comm)

    # Test scatter-gather implementation
    if rank == 0:
        starttime = MPI.Wtime()
    abstracts = Process.main_mpi(comm, filename)
    if rank == 0:
        endtime = MPI.Wtime()
        print "Scatter-gather MPI time: %f secs" % (starttime - endtime)
        target = open(filename[:-4]+"processed", 'w')
        abstractpickle = pickle.dumps(abstracts)
        target.write(abstractpickle)

    # Serial testing