Exemple #1
0
def main():
    if len(sys.argv) < 4:
        print_usage()
        return

    data = data_io.load_from_files(sys.argv[1])
    model_fname = sys.argv[2]
    fields = sys.argv[3:]

    for field in fields:
        score_dataset(data, model_fname, field)
Exemple #2
0
def main():
    if len(sys.argv) < 3:
        print_usage()
        return

    data = data_io.load_from_files(sys.argv[1])
    fields = sys.argv[3:]
    models_fname = sys.argv[2]

    try:
        with open('models.p') as model_file:
            models = pickle.load(model_file)
    except:
        models = {}

    print 'before training.........'    
    
    for field in fields:
        models[field] = train_model(data, field)
        print 'field: ', field

    with open(models_fname, 'w') as model_file:
        pickle.dump(models, model_file)
##############
#dir_name = '/home/evgeniy/ReceiptBank/rb-engine/text-training-old/'
# match_file = 'data.csv'

dir_train = '/home/evgeniy/ReceiptBank/rb-engine/text-training-small/'
dir_test = '/home/evgeniy/ReceiptBank/rb-engine/text-training-small/'

#dir_train = '/home/evgeniy/ReceiptBank/rb-engine/text-training/'
#dir_test = '/home/evgeniy/ReceiptBank/rb-engine/text-test/'
match_file = 'data.csv'
##############


print 'Before training...........'

data = data_io.load_from_files(dir_train)

# model = train_model(data, 'total_amount')
amount_name = 'total_amount'

model = train_model(data, amount_name)


print 'After training...........'


data = data_io.load_from_files(dir_test)

res = model.score(data, data[amount_name])

print "Score: %.4f" % res
Exemple #4
0
def main():
    if len(sys.argv) > 1:
        data = data_io.load_from_files(sys.argv[1])
        score_dataset(data)
    else:
        print("Usage: {} <dataset path>".format(sys.argv[0]))
Exemple #5
0
#
# train_indexes = [it[0] for it in X_train ]
# test_indexes = [it[0] for it in X_test ]
#
# extract_directory(dir_name,
#                  '/home/evgeniy/ReceiptBank/rb-engine/text-training-old/',
#                  train_indexes,
#                  data00
#                  )
#
# extract_directory(dir_name,
#                  '/home/evgeniy/ReceiptBank/rb-engine/text-test-old/',
#                  test_indexes,
#                  data00
#                  )
# ==============================================================================

data = data_io.load_from_files(dir_name)
fields = ["total_amount"]

print "before training..........."

models = {}

for field in fields:
    models[field] = train_model(data, field)

with open("models_est45.p", "w") as model_file:
    pickle.dump(models, model_file)
############