def train(model, dataset, optim, loss_fn, epochs, device, lr): metrics = train_model( dl_train=dataset.param.train(), dl_val=dataset.param.test(), model=model.param(), opt_func=partial(optim.param, lr=lr), loss_fn=loss_fn.param(), epochs=epochs, device=device, show_progress=False, ) rows = list( map( lambda r: { "model_name": model.name, "optim": optim.name, "loss_fn": loss_fn.name, "dataset": dataset.name, "lr": lr, "epoch": r["epoch"], "train_loss": r["train_loss"], "val_loss": r["val_loss"], "train_loss_avg": r["train_loss_avg"], "val_loss_avg": r["val_loss_avg"], "pct_error_avg": r["pct_error_avg"], }, metrics, ) ) return pd.DataFrame(rows)
def test_on_positively_correlated_data(self): test_argument = np.array([[1.0, 4.0], [2.0, 4.0], [3.0, 9.0], [4.0, 10.0], [5.0, 7.0], [6.0, 13.0], ] ) actual_slope, actual_intercept = train_model(test_argument) assert actual_slope > 0, "Expected slope: > 0, Actual slope: {0}".format(actual_slope)
def test_on_linear_data(self): test_argument = np.array([[1.0, 3.0], [2.0, 5.0], [3.0, 7.0]]) expected_slope = 2.0 expected_intercept = 1.0 actual_slope, actual_intercept = train_model(test_argument) slope_message = ("train_model({0}) should return slope {1}, " "but it actually returned slope {2}".format( test_argument, expected_slope, actual_slope)) intercept_message = ("train_model({0}) should return intercept {1}, " "but it actually returned intercept {2}".format( test_argument, expected_intercept, actual_intercept)) assert actual_slope == pytest.approx(expected_slope), slope_message assert actual_intercept == pytest.approx( expected_intercept), intercept_message
def train(self): start_time = time.time() trained_model, acc = train_model(self.loaders, self.model, self.loss_fn, self.activated_features, self.acc_fn, self.optimizer, self.scheduler, self.epochs, name=self.name, classOfInterest=self.classOfInterest) logging.info('Training time: {:10f} minutes'.format( (time.time() - start_time) / 60)) return acc
def main(): paths = setup_paths() raw_data_path = paths.get("raw_data_path") + "train.csv" # get raw data, from /datasets/ df = load_data(raw_data_path) # deal with cleaning/transforming raw data into the correct types df = preprocess_data(df) # building the relevant features set X, y, features = extract_features(df) X_train, X_eval, y_train, y_eval = split_train_test(X, y) model = train_model(X_train, y_train, features) evaluate_model(model, X_eval, y_eval)
def main(): parser = ArgumentParser() action_parser = parser.add_subparsers(title="actions", dest="action", required=True, help="select action to execute") # args for preprocessing preprocess_parser = action_parser.add_parser("preprocess", help="preprocess data") preprocess_parser.add_argument( "-r", "--root-dir", dest="root_dir", required=True, help="root directory of the common voice dataset") # args for feature extraction feature_extractor_parser = action_parser.add_parser( "feature_extractor", help="feature extractor") feature_extractor_parser.add_argument( "-r", "--root-dir", dest="root_dir", required=True, help="root directory of the common voice dataset") # args for training training_parser = action_parser.add_parser("train", help="Train the model") training_parser.add_argument( "-r", "--root-dir", dest="root_dir", required=True, help="root directory of the common voice dataset") training_parser.add_argument( "-m", "--model-name", dest="model_key", required=True, help="key to determine the model to be trained") # args for testing test_parser = action_parser.add_parser("test", help="Test the model") test_parser.add_argument("-r", "--root-dir", dest="root_dir", required=True, help="root directory of the common voice dataset") test_parser.add_argument("-m", "--model-name", dest="model_key", required=True, help="key to determine the model to be tested") test_parser.add_argument("-c", "--checkpoint-dir", dest="checkpoint_path", required=True, help="root directory of the saved models") # args for inference inference_parser = action_parser.add_parser( "inference", help="Run inference on the model") inference_parser.add_argument("-r", "--root-dir", dest="root_dir", required=True, help="root directory of the audio files") inference_parser.add_argument("-m", "--model-path", dest="model_path", required=True, help="path of the model") action, args = clean_args(parser.parse_args()) if action == 'preprocess': preprocess(**args) elif action == 'feature_extractor': extract_features(**args) elif action == 'train': train_model(**args) elif action == 'test': test_model(**args) elif action == 'inference': inference(**args)
# this class wieghts have better accuracy than the one computed with sklearn class_weight = [1., 1.5] tokenized, key_word_map = pipeline.fit_transform(loader.sms_data) model = build_convolutional_model(filters=32, kernel_size=3, padding="valid", strides=1, data_format=None, classes=2, layers=3, fc=True, fc_dropout=0.5, pooling='max', pool_size=2, conv_dropout=False) model = train_model( model=model, X=tokenized, y=loader.labels, save_model=args.save, model_path='model_conv_drop_false_15_new_data_1.json', weights_path='model_weights_conv_drop_false_15_new_data_1.h5', epochs=4, batch_size=16, class_weight=class_weight, shuffle_data=True)
from utils.dataloader import get_data from models.crnn import CRNN from models.res_lstm import ResidualLSTM, Resblock from models import train import torch train_data, test_data = get_data() model = ResidualLSTM(Resblock, [2]) if torch.cuda.is_available(): device = torch.device('cuda:0') else: device = torch.device('cpu') model.to(device) train.train_model(model, train_data) torch.save train.test_model(model, test_data)