예제 #1
0
def assign_labels(model, data_file, cols):
  table = []
  labels = []
 
  cnter = 0
  for row in prep.gen_file_stream(data_file, cols):
    table.append(row)
    cnter += 1
    if cnter % 100000 == 0:
      print cnter
      data = mx.DataSet()
      print 'table size: %d' % len(table)
      data.fromArray(np.array(table))
      labels += list(model.classify(data, None, None, 1))
      del data
      table[:] = []
    #  print len(list(labels))

  # process the trailing entries
  if len(table) > 0:
    data = mx.DataSet()
    data.fromArray(np.array(table))
    labels += list(model.classify(data, None, None, 1))

  return labels
예제 #2
0
def assign_labels(model, data_file, cols):
    table = []
    labels = []

    cnter = 0
    for row in prep.gen_file_stream(data_file, cols):
        table.append(row)
        cnter += 1
        if cnter % 100000 == 0:
            print cnter
            data = mx.DataSet()
            print 'table size: %d' % len(table)
            data.fromArray(np.array(table))
            labels += list(model.classify(data, None, None, 1))
            del data
            table[:] = []
        #  print len(list(labels))

    # process the trailing entries
    if len(table) > 0:
        data = mx.DataSet()
        data.fromArray(np.array(table))
        labels += list(model.classify(data, None, None, 1))

    return labels
예제 #3
0
def classify_data_kmeans(k, cols, path, centers):
  for f in prep.gen_file_list(path):
    if f.endswith('.train'):
      print 'classifying %s' % f
      
      fw = open(f[:f.rfind('/')] + '/.' + str(k) + '.labels', 'w')
      prog = 0
      for row in prep.gen_file_stream(f, cols):
        if prog % 10000 == 0:
          print 'progress: %d' % prog
        label = assign_center(row, centers)
        fw.write(str(label) + '\n')
        prog += 1
      fw.close()
예제 #4
0
def classify_data_kmeans(k, cols, path, centers):
    for f in prep.gen_file_list(path):
        if f.endswith('.train'):
            print 'classifying %s' % f

            fw = open(f[:f.rfind('/')] + '/.' + str(k) + '.labels', 'w')
            prog = 0
            for row in prep.gen_file_stream(f, cols):
                if prog % 10000 == 0:
                    print 'progress: %d' % prog
                label = assign_center(row, centers)
                fw.write(str(label) + '\n')
                prog += 1
            fw.close()