Example #1
0
def main():

  num_cluster = int(sys.argv[2])
  max_iter = int(sys.argv[3])
  rabit.init(sys.argv)
  world_size = rabit.get_world_size()
  rank = rabit.get_rank()
  data_iter = dmlc_core.RowBlockIter()
  data_iter.CreateFromUri(sys.argv[1], rank, world_size, 'libsvm')
  iter_cnt = 0
  fdim_array = np.array([0])
  fdim_array[0] = data_iter.NumCol() 
  #print fdim_array
  if iter_cnt == 0:
    fdim_array = rabit.allreduce(fdim_array, rabit.MAX)
    model = Model(num_cluster, int(fdim_array[0]))
    model.InitCentroids(data_iter)
    #model.normalize()
  num_feat = fdim_array[0]
  data_iter.setNumFeat(num_feat)
  for it in range(iter_cnt, max_iter):
    if rabit.get_rank() == 0:
      print 'iter = ', it

    temp = np.zeros((num_cluster, num_feat + 1), dtype=np.float32)

    def preparefun(temp):
      nbrline = 0
      data_iter.BeforeFirst()
      while data_iter.Next():
        spmat = data_iter.ValueCSR()
        nbrline += spmat.shape[0]
        num_row = spmat.shape[0]
        
        vnorm = np.sqrt(spmat.multiply(spmat).sum(axis = 1)) 
        dotp = spmat.dot(model.centroid.T)
        dist = dotp / vnorm
        max_id = np.argmax(dist, axis = 1)
        for k in range(num_cluster):
          temp[:,num_feat] += np.where(max_id == k)[0].shape[1]
        data_iter.CSRReduceSum(max_id, temp)
        #print 'processed %d lines = ' % (nbrline)
        
    rabit.allreduce(temp, rabit.SUM, preparefun)
    model.centroid = temp[:,0:num_feat]
      #print temp
    for k in range(num_cluster):
      assert(temp[k,num_feat] > 0)
      model.centroid[k,:] /= temp[k,num_feat]
      #print model.centroid
        #dist /= 
    #model.normalize()
  rabit.finalize()
Example #2
0
#!/usr/bin/python
"""
demo python script of rabit
"""
from __future__ import print_function
from builtins import range
import os
import sys
import numpy as np
# import rabit, the tracker script will setup the lib path correctly
# for normal run without tracker script, add following line
# sys.path.append(os.path.dirname(__file__) + '/../python')
import rabit

rabit.init()
n = 3
rank = rabit.get_rank()
a = np.zeros(n)
for i in range(n):
    a[i] = rank + i

print('@node[%d] before-allreduce: a=%s' % (rank, str(a)))
a = rabit.allreduce(a, rabit.MAX)
print('@node[%d] after-allreduce-max: a=%s' % (rank, str(a)))
a = rabit.allreduce(a, rabit.SUM)
print('@node[%d] after-allreduce-sum: a=%s' % (rank, str(a)))
rabit.finalize()
Example #3
0
#!/usr/bin/python
import rabit
import numpy as np

rabit.init(lib='mock')
rank = rabit.get_rank()
n = 10
nround = 3
data = np.ones(n) * rank

version, model, local = rabit.load_checkpoint(True)
if version == 0:
    model = np.zeros(n)
    local = np.ones(n)
else:
    print '[%d] restart from version %d' % (rank, version)

for i in xrange(version, nround):    
    res = rabit.allreduce(data + model+local, rabit.SUM)
    print '[%d] iter=%d: %s' % (rank, i, str(res))
    model = res
    local[:] = i
    rabit.checkpoint(model, local)

rabit.finalize()
Example #4
0
"""
demo python script of rabit: Lazy preparation function
"""
import os
import sys
import numpy as np
# import rabit, the tracker script will setup the lib path correctly
# for normal run without tracker script, add following line
# sys.path.append(os.path.dirname(__file__) + '/../wrapper')
import rabit


# use mock library so that we can run failure test
rabit.init(lib = 'mock')
n = 3
rank = rabit.get_rank()
a = np.zeros(n)

def prepare(a):
    print('@node[%d] run prepare function' % rank)
    # must take in reference and modify the reference
    for i in xrange(n):
        a[i] = rank + i

print('@node[%d] before-allreduce: a=%s' % (rank, str(a)))
a = rabit.allreduce(a, rabit.MAX, prepare_fun = prepare)
print('@node[%d] after-allreduce-max: a=%s' % (rank, str(a)))
a = rabit.allreduce(a, rabit.SUM)
print('@node[%d] after-allreduce-sum: a=%s' % (rank, str(a)))
rabit.finalize()
"""
demo python script of rabit: Lazy preparation function
"""
import os
import sys
import numpy as np
# import rabit, the tracker script will setup the lib path correctly
# for normal run without tracker script, add following line
# sys.path.append(os.path.dirname(__file__) + '/../wrapper')
import rabit


# use mock library so that we can run failure test
rabit.init(lib = 'mock')
n = 3
rank = rabit.get_rank()
a = np.zeros(n)

def prepare(a):
    print '@node[%d] run prepare function' % rank
    # must take in reference and modify the reference
    for i in xrange(n):
        a[i] = rank + i    
    
print '@node[%d] before-allreduce: a=%s' % (rank, str(a))
a = rabit.allreduce(a, rabit.MAX, prepare_fun = prepare)
print '@node[%d] after-allreduce-max: a=%s' % (rank, str(a))
a = rabit.allreduce(a, rabit.SUM)
print '@node[%d] after-allreduce-sum: a=%s' % (rank, str(a))
rabit.finalize()