예제 #1
0
    points = np.array(points)
    centers = np.array(centers)
    logger.debug("**closestPoint - Point: " + str(points) + " Centers: " + str(centers))
    for i in range(len(centers)):
        #dist = sum([(m-k)**2 for k,m in zip(points,centers[i]) ])
        dist = np.sum((points - centers[i]) ** 2)
        if dist < closest:
            closest = dist
            bestIndex = i
            logger.debug("Map point " + str(points) + " to index " + str(bestIndex))
    return bestIndex


if __name__ == "__main__":
    # Initialize Map Job
    mapJob = Mapper(sys.argv)         
    
    # map function    
    pVectors = map(parseVector, open(mapJob.chunkFile))
    cVectors = map(parseVector, open(mapJob.mapArgs[0]))
    #cVectors = sorted(cVectors)    
    print("Total number of datapoints/chunk is %s " % len(pVectors))
    tst=time.time()
    for point in pVectors:
        st=time.time()
        bestIndex = closestPoint(point, cVectors)                
        mapJob.emit(bestIndex, "%s,%s" % (bestIndex,",".join([str(x) for x in point])))        
        print("Time taken - %s" % round(time.time()-st,2))
    print("Total Time taken - %s" % round(time.time()-tst,2))
    
    ## Finalize map job  
예제 #2
0
import sys
from pmr.mapper import Mapper
    
if __name__ == "__main__":
    # Initialize Map Job
    mapJob = Mapper(sys.argv)
          
    
    # Map function    
    with open(mapJob.chunkFile) as fh:
        line = fh.read()
        for word in line.split():
            mapJob.emit(word, "%s,%s" % (word, 1))
                            
    # Finalize map job  
    mapJob.finalize()