print("Saving cleaned data") dataset.data.to_csv(os.path.join(config.model_dir, args.dataset+'_clean.csv'), index=False) print("Done") trainer = Trainer(dataset=dataset, models=args.models, transforms=args.feats, cfg=config, grid=False) # If we only want to test if args.test_only: trainer.load_model(args.models[0], args.feats[0], args.load_path) # Train if not(args.test_only): trainer.train() # Get the evaluation metric # If test only GridSearchCV is not fitted yet -> so set grid to False if args.test_only: metrics = trainer.evaluate() else: metrics = trainer.evaluate() # Save results if args.save_results: print("Saving results") metrics.to_csv(os.path.join(config.model_dir, args.dataset+'_results.csv'), index=False) print("Done") # Save best if not(args.test_only): trainer.save_best(metrics) print("Simultaneously training done") else: print("Test result : ")
import numpy import itertools from training.dataprep.airbnb_example import AirbnbExample from training.trainer import Trainer from training.model import DocLabelingModel example = AirbnbExample(file_path="/path/to/file.csv") texts = numpy.array([i for i in example.texts()]) labels = numpy.array([i for i in example.labels()]) eval_train_index = len(texts) - 50 train_texts = texts[:eval_train_index] eval_texts = texts[eval_train_index:] train_labels = labels[:eval_train_index] eval_labels = labels[eval_train_index:] model = DocLabelingModel(len(labels[0])) trainer = Trainer(model.model) model = trainer.train(train_texts, train_labels) print(trainer.evaluate(eval_texts, eval_labels)) # simple experiment predictions = model.predict(eval_texts) results = [(example.read_prediction(i), example.read_prediction(j)) for (i, j) in zip(predictions, eval_labels)] for result in results: print(result, "\n")