コード例 #1
0
    print("Saving cleaned data")
    dataset.data.to_csv(os.path.join(config.model_dir, args.dataset+'_clean.csv'), index=False)
    print("Done")

trainer = Trainer(dataset=dataset, models=args.models, transforms=args.feats, cfg=config, grid=False)
# If we only want to test
if args.test_only:
    trainer.load_model(args.models[0], args.feats[0], args.load_path)

# Train
if not(args.test_only):
    trainer.train()
# Get the evaluation metric
# If test only GridSearchCV is not fitted yet -> so set grid to False
if args.test_only:
    metrics = trainer.evaluate()
else:
    metrics = trainer.evaluate()

# Save results
if args.save_results:
    print("Saving results")
    metrics.to_csv(os.path.join(config.model_dir, args.dataset+'_results.csv'), index=False)
    print("Done")

# Save best
if not(args.test_only):
    trainer.save_best(metrics)
    print("Simultaneously training done")
else:
    print("Test result : ")
コード例 #2
0
ファイル: run.py プロジェクト: pierrel/document-labeling
import numpy
import itertools

from training.dataprep.airbnb_example import AirbnbExample
from training.trainer import Trainer
from training.model import DocLabelingModel

example = AirbnbExample(file_path="/path/to/file.csv")
texts = numpy.array([i for i in example.texts()])
labels = numpy.array([i for i in example.labels()])

eval_train_index = len(texts) - 50

train_texts = texts[:eval_train_index]
eval_texts = texts[eval_train_index:]
train_labels = labels[:eval_train_index]
eval_labels = labels[eval_train_index:]

model = DocLabelingModel(len(labels[0]))
trainer = Trainer(model.model)

model = trainer.train(train_texts, train_labels)
print(trainer.evaluate(eval_texts, eval_labels))

# simple experiment
predictions = model.predict(eval_texts)
results = [(example.read_prediction(i), example.read_prediction(j))
           for (i, j) in zip(predictions, eval_labels)]
for result in results:
    print(result, "\n")