コード例 #1
0
ファイル: DNAParallel.py プロジェクト: aerinzhang/15-440-lab4
def kMeans(k, e, i, o):
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()

    #when rank = 0, divides the strands and scatters to each node
    if (rank == 0):
        f = open(i, "r")
        strands = []
        for line in f.readlines():
            strand = line.strip()
            strands.append(strand)
        f.close()
        chunkSize = len(strands)/size
        chunk = [strands[i*chunkSize: min((i+1)*chunkSize, len(strands))] for i in range(size)]
        #pick centroids and then broadcast
        newCen = DS.getInitialCentroids(strands, k)
    else:
        newCen = None
        chunk = None

    # every other node gets the broadcasted newCen from root
    newCen = comm.bcast(newCen, root=0)
    # every node gets its chunk of data
    chunk = comm.scatter(chunk, root=0)
    oldCen = [""]*k
    membership = [-1]*len(chunk)
    allMembership = []
    # repeat until convergence
    while(DS.diffCentroids(oldCen, newCen) > e):
        oldCen = newCen[:]
        # each node then computes membership based on the current centroids
        membership = assignMembership(chunk, newCen)
        allMembers = comm.gather(membership, root=0)
        #flatten the list of lists
        if (rank == 0):
            allMembership = []
            #flatten the list of lists
            for member in allMembers:
                # based on newmembership calculate new centroids 
                allMembership.extend(member)
            newCen = updateCentroids(allMembership, k)
        #broadcast newCentroids to every node
        newCen = comm.bcast(newCen, root=0)

    # write output file
    if (rank == 0):
        fo = open(o, "w+")
        for c in newCen:
            fo.write(c+'\n')
        fo.close()
コード例 #2
0
ファイル: DNAParallel.py プロジェクト: aerinzhang/15-440-lab4
def assignMembership(strands, centroids):
    k = len(centroids)
    membership = []
    for strand in strands:
        minDistance = float("inf")
        for c in xrange(k):
            d = DS.distance(strand, centroids[c])
            if (d < minDistance):
                minDistance = d
                minCluster = c
        membership.append((minCluster, strand))
    return membership