Exemple #1
0
def prepDataTest(data, fields, fillna = True, fillVal = {}, dump = False, dumpPrefix = 'set'):
  data = data[fields]
  if fillna:
    data = analysis.fillnaDict(data, fillVal)

  if dump:
    np.savetxt("%s_testActual.csv" % (dumpPrefix), data, delimiter=",")

  return data
Exemple #2
0
def prepDataTrain(data,
                  label='Label',
                  fields=[],
                  split=True,
                  splitPercent=10,
                  shuffle=False,
                  fillna=True,
                  typeSub='mean',
                  dump=True,
                  dumpPrefix='set'):

    if len(fields) == 0:
        fields = data.dtype.names
    if shuffle:
        np.random.shuffle(data)
        print 'Shuffled'

    if split:
        test = data[(len(data) - int(len(data) * (splitPercent / 100.0))):]
        data = data[:(len(data) - int(len(data) * (splitPercent / 100.0)))]
        print 'Split'

    fillVal = {}
    if fillna:
        data, fillVal = analysis.fillna(data, typeSub, fields, -1)
        if split:
            test = analysis.fillnaDict(test, fillVal)
        print 'Filled NaN'

    if dump:
        np.savetxt("%s_train.csv" % (dumpPrefix), data, delimiter=",")
        if split:
            np.savetxt("%s_test.csv" % (dumpPrefix), test, delimiter=",")
        print 'Data dumped'

    index = 0
    features = []
    features = fields
    for feature in fields:
        if feature == label:
            features = np.delete(fields, index, 0)
        index += 1

    if split:
        if fillna:
            return data, test, features, fillVal
        else:
            return data, test, features, {}
    else:
        if fillna:
            return data, [], features, fillVal
        else:
            return data, [], features, {}
Exemple #3
0
def prepDataTest(data,
                 fields,
                 fillna=True,
                 fillVal={},
                 dump=False,
                 dumpPrefix='set'):
    data = data[fields]
    if fillna:
        data = analysis.fillnaDict(data, fillVal)

    if dump:
        np.savetxt("%s_testActual.csv" % (dumpPrefix), data, delimiter=",")

    return data
Exemple #4
0
def prepDataTrain(data, label = 'Label', fields = [], split = True, splitPercent = 10, shuffle = False, fillna = True, typeSub = 'mean', dump = True, dumpPrefix = 'set'):

  if len(fields) == 0:
    fields = data.dtype.names
  if shuffle:
    np.random.shuffle(data)
    print 'Shuffled'

  if split:
    test = data[(len(data) - int(len(data)*(splitPercent/100.0))):]
    data = data[:(len(data) - int(len(data)*(splitPercent/100.0)))]
    print 'Split'

  fillVal = {}
  if fillna:
    data, fillVal = analysis.fillna(data, typeSub, fields, -1)
    if split:
      test = analysis.fillnaDict(test, fillVal)
    print 'Filled NaN'

  if dump:
    np.savetxt("%s_train.csv" % (dumpPrefix), data, delimiter=",")
    if split:
      np.savetxt("%s_test.csv" % (dumpPrefix), test, delimiter=",")
    print 'Data dumped'

  index = 0
  features = []
  features = fields
  for feature in fields:
    if feature == label:
      features = np.delete(fields, index, 0)      
    index += 1

  if split:
    if fillna:
      return data, test, features, fillVal
    else:
      return data, test, features, {}
  else:
    if fillna:
      return data, [], features, fillVal
    else:
      return data, [], features, {}