コード例 #1
0
ファイル: kmeans.py プロジェクト: SiNZeRo/wormhole
def main():

  num_cluster = int(sys.argv[2])
  max_iter = int(sys.argv[3])
  rabit.init(sys.argv)
  world_size = rabit.get_world_size()
  rank = rabit.get_rank()
  data_iter = dmlc_core.RowBlockIter()
  data_iter.CreateFromUri(sys.argv[1], rank, world_size, 'libsvm')
  iter_cnt = 0
  fdim_array = np.array([0])
  fdim_array[0] = data_iter.NumCol() 
  #print fdim_array
  if iter_cnt == 0:
    fdim_array = rabit.allreduce(fdim_array, rabit.MAX)
    model = Model(num_cluster, int(fdim_array[0]))
    model.InitCentroids(data_iter)
    #model.normalize()
  num_feat = fdim_array[0]
  data_iter.setNumFeat(num_feat)
  for it in range(iter_cnt, max_iter):
    if rabit.get_rank() == 0:
      print 'iter = ', it

    temp = np.zeros((num_cluster, num_feat + 1), dtype=np.float32)

    def preparefun(temp):
      nbrline = 0
      data_iter.BeforeFirst()
      while data_iter.Next():
        spmat = data_iter.ValueCSR()
        nbrline += spmat.shape[0]
        num_row = spmat.shape[0]
        
        vnorm = np.sqrt(spmat.multiply(spmat).sum(axis = 1)) 
        dotp = spmat.dot(model.centroid.T)
        dist = dotp / vnorm
        max_id = np.argmax(dist, axis = 1)
        for k in range(num_cluster):
          temp[:,num_feat] += np.where(max_id == k)[0].shape[1]
        data_iter.CSRReduceSum(max_id, temp)
        #print 'processed %d lines = ' % (nbrline)
        
    rabit.allreduce(temp, rabit.SUM, preparefun)
    model.centroid = temp[:,0:num_feat]
      #print temp
    for k in range(num_cluster):
      assert(temp[k,num_feat] > 0)
      model.centroid[k,:] /= temp[k,num_feat]
      #print model.centroid
        #dist /= 
    #model.normalize()
  rabit.finalize()
コード例 #2
0
#!/usr/bin/python
"""
demo python script of rabit
"""
from __future__ import print_function
from builtins import range
import os
import sys
import numpy as np
# import rabit, the tracker script will setup the lib path correctly
# for normal run without tracker script, add following line
# sys.path.append(os.path.dirname(__file__) + '/../python')
import rabit

rabit.init()
n = 3
rank = rabit.get_rank()
a = np.zeros(n)
for i in range(n):
    a[i] = rank + i

print('@node[%d] before-allreduce: a=%s' % (rank, str(a)))
a = rabit.allreduce(a, rabit.MAX)
print('@node[%d] after-allreduce-max: a=%s' % (rank, str(a)))
a = rabit.allreduce(a, rabit.SUM)
print('@node[%d] after-allreduce-sum: a=%s' % (rank, str(a)))
rabit.finalize()