def train( sentences_train, labels_train, sentences_valid, labels_valid, batch_size=128, n_epochs=10, ): train_dataset = data.TensorDataset(sentences_train, labels_train) valid_dataset = data.TensorDataset(sentences_valid, labels_valid) model = Network() train_loader = data.DataLoader( train_dataset, batch_size=batch_size, shuffle=True, pin_memory=False ) valid_loader = data.DataLoader( valid_dataset, batch_size=batch_size, shuffle=False, pin_memory=False ) databunch = DataBunch(train_dl=train_loader, valid_dl=valid_loader) learn = Learner(databunch, model, loss_func=loss) if torch.cuda.is_available(): learn = learn.to_fp16() learn.fit_one_cycle(n_epochs) return learn.model
def load_base_model_if_needed(learner: Learner, lm_training_config: LMTrainingConfig, model_file='best') -> None: if lm_training_config.base_model: model = os.path.join(lm_training_config.base_model, model_file) logger.info(f"Using pretrained model: {model}.pth") # not setting purge to True raises a pickle serialization error learner.load(model, purge=False) else: logger.info("Training form scratch")
def run(ini_file='tinyimg.ini', data_in_dir='./../../dataset', model_cfg='../cfg/vgg-tiny.cfg', model_out_dir='./models', epochs=30, lr=3.0e-5, batch_sz=256, num_worker=4, log_freq=20, use_gpu=True): # Step 1: parse config cfg = parse_cfg(ini_file, data_in_dir=data_in_dir, model_cfg=model_cfg, model_out_dir=model_out_dir, epochs=epochs, lr=lr, batch_sz=batch_sz, log_freq=log_freq, num_worker=num_worker, use_gpu=use_gpu) print_cfg(cfg) # Step 2: create data sets and loaders train_ds, val_ds = build_train_val_datasets(cfg, in_memory=True) train_loader, val_loader = DLFactory.create_train_val_dataloader( cfg, train_ds, val_ds) # Step 3: create model model = MFactory.create_model(cfg) # Step 4: train/valid # This demos our approach can be easily intergrate with our app framework device = get_device(cfg) data = DataBunch(train_loader, val_loader, device=device) learn = Learner(data, model, loss_func=torch.nn.CrossEntropyLoss(), metrics=accuracy) # callback_fns=[partial(EarlyStoppingCallback, monitor='accuracy', min_delta=0.01, patience=2)]) # lr_find(learn, start_lr=1e-7, end_lr=10) # learn.recorder.plot() # lrs_losses = [(lr, loss) for lr, loss in zip(learn.recorder.lrs, learn.recorder.losses)] # min_lr = min(lrs_losses[10:-5], key=lambda x: x[1])[0] # lr = min_lr/10.0 # plt.show() # print(f'Minimal lr rate is {min_lr} propose init lr {lr}') # fit_one_cycle(learn, epochs, lr) learn.fit(epochs, lr)
def train_(self, x_train, y_train, y_aux_train, x_test): y_train_torch = torch.tensor(np.hstack([y_train, y_aux_train]), dtype=torch.float32) test_dataset = data.TensorDataset(x_test, self.test_lengths) train_dataset = data.TensorDataset(x_train, self.train_lengths, y_train_torch) valid_dataset = data.Subset(train_dataset, indices=[0, 1]) del x_train, x_test gc.collect() train_collator = SequenceBucketCollator(lambda lenghts: lenghts.max(), sequence_index=0, length_index=1, label_index=2) train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=train_collator) valid_loader = data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, collate_fn=train_collator) databunch = DataBunch(train_dl=train_loader, valid_dl=valid_loader, collate_fn=train_collator) del train_dataset, valid_dataset gc.collect() for model_idx in range(NUM_MODELS): all_test_preds = [] print('Model ', model_idx) self.seed_everything(1234 + model_idx) model = NeuralNet(self.embedding_matrix, y_aux_train.shape[-1], y_train.shape[-1] - 1) if y_train.shape[-1] > 2: learn = Learner(databunch, model, loss_func=self.custom_loss1) else: learn = Learner(databunch, model, loss_func=self.custom_loss) test_preds = self.train_model(learn, test_dataset, output_dim=y_train.shape[-1] + y_aux_train.shape[-1] - 1) all_test_preds.append(test_preds) preds = np.mean(all_test_preds, axis=0) return preds
def make_embedings(): model = NeuralNet(embedding_matrix, y_aux_train.shape[-1]) learn = Learner(databunch, model, loss_func=custom_loss) val_preds, test_preds = train_model(learn, output_dim=y_aux_train.shape[-1] + 1, batch_size=BATCH_SIZE, n_epochs=N_EPOCH) return val_preds, test_preds
def create_cnn(data, arch, pretrained=False, is_mono_input=True, **kwargs): meta = cnn_config(arch) body = create_body(arch, pretrained) # sum up the weights of in_channels axis, to reduce to single input channel # Suggestion by David Gutman # https://forums.fast.ai/t/black-and-white-images-on-vgg16/2479/2 if is_mono_input: first_conv_layer = body[0] first_conv_weights = first_conv_layer.state_dict()['weight'] assert first_conv_weights.size(1) == 3 # RGB channels dim summed_weights = torch.sum(first_conv_weights, dim=1, keepdim=True) first_conv_layer.weight.data = summed_weights first_conv_layer.in_channels = 1 else: # In this case, the input is a stereo first_conv_layer = body[0] first_conv_weights = first_conv_layer.state_dict()['weight'] assert first_conv_weights.size(1) == 3 # RGB channels dim summed_weights = torch.sum(first_conv_weights, dim=1, keepdim=True) first_conv_layer.weight.data = first_conv_weights[:, : 2, :, :] # Keep only 2 channels for the weights first_conv_layer.in_channels = 2 nf = num_features_model(body) * 2 head = create_head(nf, data.c, None, 0.5) model = nn.Sequential(body, head) learn = Learner(data, model, **kwargs) learn.split(meta['split']) if pretrained: learn.freeze() apply_init(model[1], nn.init.kaiming_normal_) return learn
def get_score(): print('Make Train Features.') with open(args.temporary_file, 'rb') as f: x_train, x_feat_train, y_train_o, y_aux_train, embedding_matrix = pickle.load( f) def power_mean(series, p=-5): total = sum(np.power(series, p)) return np.power(total / len(series), 1 / p) def sigmoid(x): return 1 / (1 + np.exp(-x)) # all, sub, s&t, !s&t, s&!t, !s&!t weight_factor = list(map(float, args.weight_factor.split(','))) identity_factor_1 = list(map(float, args.identity_factor_1.split(','))) identity_factor_2 = list(map(float, args.identity_factor_2.split(','))) model_factor = list(map(int, args.model_factor.split(','))) print('weight_factor =', weight_factor) print('identity_factor_1 = ', identity_factor_1) print('identity_factor_2 = ', identity_factor_2) print('model_factor = ', model_factor) train = read_competision_file(train=True) identity_columns = [ 'male', 'female', 'homosexual_gay_or_lesbian', 'christian', 'jewish', 'muslim', 'black', 'white', 'psychiatric_or_mental_illness' ] index_subgroup, index_bpsn, index_bnsp = dict(), dict(), dict() for col in identity_columns: index_subgroup[col] = (train[col].fillna(0).values >= 0.5).astype(bool) index_bpsn[col] = ( (((train['target'].values < 0.5).astype(bool).astype(np.int) + (train[col].fillna(0).values >= 0.5).astype(bool).astype(np.int)) > 1).astype(bool)) + (( ((train['target'].values >= 0.5).astype(bool).astype(np.int) + (train[col].fillna(0).values < 0.5).astype(bool).astype( np.int)) > 1).astype(bool)) index_bnsp[col] = ( (((train['target'].values >= 0.5).astype(bool).astype(np.int) + (train[col].fillna(0).values >= 0.5).astype(bool).astype(np.int)) > 1).astype(bool)) + (( ((train['target'].values < 0.5).astype(bool).astype(np.int) + (train[col].fillna(0).values < 0.5).astype(bool).astype( np.int)) > 1).astype(bool)) # Overall weights = np.ones((len(x_train), )) * weight_factor[0] # Subgroup weights += (train[identity_columns].fillna(0).values >= 0.5).sum( axis=1).astype(bool).astype(np.int) * weight_factor[1] weights += (((train['target'].values >= 0.5).astype(bool).astype(np.int) + (train[identity_columns].fillna(0).values >= 0.5).sum( axis=1).astype(bool).astype(np.int)) > 1).astype(bool).astype(np.int) * weight_factor[2] weights += (((train['target'].values >= 0.5).astype(bool).astype(np.int) + (train[identity_columns].fillna(0).values < 0.5).sum( axis=1).astype(bool).astype(np.int)) > 1).astype(bool).astype(np.int) * weight_factor[3] weights += (((train['target'].values < 0.5).astype(bool).astype(np.int) + (train[identity_columns].fillna(0).values >= 0.5).sum( axis=1).astype(bool).astype(np.int)) > 1).astype(bool).astype(np.int) * weight_factor[4] weights += (((train['target'].values < 0.5).astype(bool).astype(np.int) + (train[identity_columns].fillna(0).values < 0.5).sum( axis=1).astype(bool).astype(np.int)) > 1).astype(bool).astype(np.int) * weight_factor[5] index_id1, index_id2 = dict(), dict() for col in identity_columns: index_id1[col] = ( ((train[col].fillna(0).values >= 0.5).astype(bool).astype(np.int) + (train['target'].values >= 0.5).astype(bool).astype(np.int)) > 1).astype(bool) index_id2[col] = ( ((train[col].fillna(0).values >= 0.5).astype(bool).astype(np.int) + (train['target'].values < 0.5).astype(bool).astype(np.int)) > 1).astype(bool) for col, id1 in zip(identity_columns, identity_factor_1): weights[index_id1[col]] += id1 for col, id2 in zip(identity_columns, identity_factor_2): weights[index_id2[col]] += id2 loss_weight = 1.0 / weights.mean() aux_impact_factor = list(map(float, args.aux_impact_factor.split(','))) aux_identity_factor = list(map(float, args.aux_identity_factor.split(','))) print('aux_impact_factor =', aux_impact_factor) print('aux_identity_factor =', aux_identity_factor) weights_aux = np.ones((len(x_train), )) weights_aux[(train['target'].values >= 0.5).astype(np.int) + (train[identity_columns].fillna(0).values < 0.5).sum(axis=1). astype(bool).astype(np.int) > 1] = aux_identity_factor[0] weights_aux[(train['target'].values >= 0.5).astype(np.int) + (train[identity_columns].fillna(0).values >= 0.5).sum(axis=1). astype(bool).astype(np.int) > 1] = aux_identity_factor[1] weights_aux[(train['target'].values < 0.5).astype(np.int) + (train[identity_columns].fillna(0).values < 0.5).sum(axis=1). astype(bool).astype(np.int) > 1] = aux_identity_factor[2] weights_aux[(train['target'].values < 0.5).astype(np.int) + (train[identity_columns].fillna(0).values >= 0.5).sum(axis=1). astype(bool).astype(np.int) > 1] = aux_identity_factor[3] y_train = np.vstack([y_train_o, weights, weights_aux]).T del train def custom_loss_aux(data, targets): ''' Define custom loss function for weighted BCE on 'target' column ''' bce_loss_1 = nn.BCEWithLogitsLoss(weight=targets[:, 1:2])(data[:, :1], targets[:, :1]) bce_loss_aux_1 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])( data[:, 1:2], targets[:, 3:4]) bce_loss_aux_2 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])( data[:, 2:3], targets[:, 4:5]) bce_loss_aux_3 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])( data[:, 3:4], targets[:, 5:6]) bce_loss_aux_4 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])( data[:, 4:5], targets[:, 6:7]) bce_loss_aux_5 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])( data[:, 5:6], targets[:, 7:8]) bce_loss_aux_6 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])( data[:, 6:7], targets[:, 8:9]) return (bce_loss_1 * loss_weight) + ( bce_loss_aux_1 * aux_impact_factor[0]) + (bce_loss_aux_2 * aux_impact_factor[1]) + ( bce_loss_aux_3 * aux_impact_factor[2] ) + (bce_loss_aux_4 * aux_impact_factor[3]) + ( bce_loss_aux_5 * aux_impact_factor[4]) + (bce_loss_aux_6 * aux_impact_factor[5]) from sklearn.model_selection import KFold, train_test_split from sklearn.metrics import classification_report, roc_auc_score batch_size = args.batch_size lr = args.learning_ratio max_features = np.max(x_train) kf = KFold(n_splits=5, random_state=12, shuffle=True) final_epoch_score_cv = dict() final_fold_count = 0 for fold_id, (big_index, small_index) in enumerate(kf.split(y_train)): final_fold_count += 1 if args.minimize == 1: train_index, test_index = train_test_split(np.arange(len(y_train)), test_size=0.5, random_state=1234, shuffle=True) elif args.minimize == 2: train_index, test_index = train_test_split(np.arange(len(y_train)), test_size=0.666, random_state=1234, shuffle=True) elif args.minimize == 3: train_index, test_index = big_index[:25600], small_index[:12800] else: train_index, test_index = big_index, small_index if len(args.model_file) > 0: train_index = np.arange(len(x_train)) if args.use_feats_url: x_train_train = np.hstack( [x_feat_train[train_index], x_train[train_index]]) x_train_test = np.hstack( [x_feat_train[test_index], x_train[test_index]]) feats_nums = x_feat_train.shape[1] else: x_train_train = x_train[train_index] x_train_test = x_train[test_index] feats_nums = 0 x_train_torch = torch.tensor(x_train_train, dtype=torch.long) x_test_torch = torch.tensor(x_train_test, dtype=torch.long) y_train_torch = torch.tensor(np.hstack([y_train, y_aux_train])[train_index], dtype=torch.float32) y_test_torch = torch.tensor(np.hstack([y_train, y_aux_train])[test_index], dtype=torch.float32) train_dataset = data.TensorDataset(x_train_torch, y_train_torch) valid_dataset = data.TensorDataset(x_test_torch, y_test_torch) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=False) databunch = DataBunch(train_dl=train_loader, valid_dl=valid_loader) checkpoint_predictions = [] weights = [] seed_everything(args.random_seed + fold_id) num_units = list(map(int, args.num_units.split(','))) model = get_model(model_factor, num_units[0], num_units[1], embedding_matrix, max_features, y_aux_train.shape[-1], args.num_words, feats_nums) model = model.cuda(device=cuda) if args.optimizer == 'Nadam': from NadamLocal import Nadam learn = Learner(databunch, model, loss_func=custom_loss_aux, opt_func=Nadam) else: learn = Learner(databunch, model, loss_func=custom_loss_aux) all_test_preds = [] checkpoint_weights = [2**epoch for epoch in range(args.num_epochs)] test_loader = valid_loader n = len(learn.data.train_dl) phases = [(TrainingPhase(n).schedule_hp('lr', lr * (0.6**(i)))) for i in range(args.num_epochs)] sched = GeneralScheduler(learn, phases) learn.callbacks.append(sched) final_epoch_score = 0 for global_epoch in range(args.num_epochs): print("Fold#", fold_id, "epoch#", global_epoch) learn.fit(1) if args.minimize < 2 or (args.minimize >= 2 and global_epoch == int(args.num_epochs - 1)): test_preds = np.zeros((len(test_index), 7)) for i, x_batch in enumerate(test_loader): X = x_batch[0].cuda() y_pred = sigmoid(learn.model(X).detach().cpu().numpy()) test_preds[i * batch_size:(i + 1) * batch_size, :] = y_pred all_test_preds.append(test_preds) prediction_one = test_preds[:, 0].flatten() checkpoint_predictions.append(prediction_one) weights.append(2**global_epoch) predictions = np.average(checkpoint_predictions, weights=weights, axis=0) y_true = (y_train[test_index, 0]).reshape( (-1, )).astype(np.int) roc_sub, roc_bpsn, roc_bnsp = [], [], [] roc_sub_one, roc_bpsn_one, roc_bnsp_one = [], [], [] for col in identity_columns: if args.vervose: print("Subgroup#", col, ":") print( classification_report( y_true[index_subgroup[col][test_index]], (predictions[index_subgroup[col][test_index]] >= 0.5).astype(np.int))) if args.minimize < 2: roc_sub.append( roc_auc_score( y_true[index_subgroup[col][test_index]], predictions[index_subgroup[col][test_index]])) roc_sub_one.append( roc_auc_score( y_true[index_subgroup[col][test_index]], prediction_one[index_subgroup[col][test_index]])) if args.vervose: print("BPSN#", col, ":") print( classification_report( y_true[index_bpsn[col][test_index]], (predictions[index_bpsn[col][test_index]] >= 0.5).astype(np.int))) if args.minimize < 2: roc_bpsn.append( roc_auc_score( y_true[index_bpsn[col][test_index]], predictions[index_bpsn[col][test_index]])) roc_bpsn_one.append( roc_auc_score( y_true[index_bpsn[col][test_index]], prediction_one[index_bpsn[col][test_index]])) if args.vervose: print("BNSP#", col, ":") print( classification_report( y_true[index_bnsp[col][test_index]], (predictions[index_bnsp[col][test_index]] >= 0.5).astype(np.int))) if args.minimize < 2: roc_bnsp.append( roc_auc_score( y_true[index_bnsp[col][test_index]], predictions[index_bnsp[col][test_index]])) roc_bnsp_one.append( roc_auc_score( y_true[index_bnsp[col][test_index]], prediction_one[index_bnsp[col][test_index]])) if args.minimize < 2: roc_all = roc_auc_score(y_true, predictions) pm_roc_sub = power_mean(roc_sub) pm_roc_bpsn = power_mean(roc_bpsn) pm_roc_bnsp = power_mean(roc_bnsp) final_epoch_score = (roc_all + pm_roc_sub + pm_roc_bpsn + pm_roc_bnsp) / 4 roc_all_one = roc_auc_score(y_true, prediction_one) pm_roc_sub_one = power_mean(roc_sub_one) pm_roc_bpsn_one = power_mean(roc_bpsn_one) pm_roc_bnsp_one = power_mean(roc_bnsp_one) final_epoch_score_one = (roc_all_one + pm_roc_sub_one + pm_roc_bpsn_one + pm_roc_bnsp_one) / 4 if args.minimize >= 2: return final_epoch_score_one if args.vervose: print("roc_sub:", pm_roc_sub) print("roc_bpsn:", pm_roc_bpsn) print("roc_bnsp:", pm_roc_bnsp) print("final score:", (roc_all + pm_roc_sub + pm_roc_bpsn + pm_roc_bnsp) / 4) if global_epoch not in final_epoch_score_cv.keys(): final_epoch_score_cv[global_epoch] = [] final_epoch_score_cv[global_epoch].append( (final_epoch_score, final_epoch_score_one)) if len(args.model_file) > 0: if args.model_file.endswith('.bz2'): model_file = args.model_file else: model_file = args.model_file + '.bz2' model_json_file = model_file[:-4] + '.json' model.save_model(model_file) with open(model_json_file, 'w') as pf: pf.write('{') pf.write('\"model_factor\":[' + ','.join(list(map(str, model_factor))) + ']') pf.write(',') pf.write('\"num_units\":[' + ','.join(list(map(str, num_units))) + ']') pf.write(',') pf.write('\"num_aux_targets\":%d' % y_aux_train.shape[-1]) pf.write(',') pf.write('\"feats_nums\":%d' % feats_nums) pf.write(',') pf.write('\"max_seq_len\":%d' % args.num_words) pf.write('}') break if args.minimize > 0: break return final_epoch_score_cv
self.features = nn.Sequential(*layers) def forward(self, x): return self.features(x) def wrn_22(): return WideResNet(n_groups=3, N=3, n_classes=10, k=6) model = wrn_22() from fastai.basic_data import DataBunch from fastai.train import Learner from fastai.metrics import accuracy data = DataBunch.create(train_ds, valid_ds, bs=batch_size, path='./data/cifar10') learner = Learner(data, model, loss_func=F.cross_entropy, metrics=[accuracy]) learner.clip = 0.1 # gradient is clipped to be in range of [-0.1, 0.1] # Find best learning rate learner.lr_find() learner.recorder.plot() # select lr with largest negative gradient (about 5e-3) # Training epochs = 1 lr = 5e-3 wd = 1e-4 import time t0 = time.time() learner.fit_one_cycle(epochs, lr, wd=wd) # wd is the lambda in l2 regularization
collate_fn=train_collator, ) return DataBunch(train_dl=train_loader, valid_dl=valid_loader, collate_fn=train_collator) y_train_torch = get_y_train_torch(weights) databunch = get_databunch(y_train_torch) logging.info("training model 1: para, rawl, w2v...") embedding_matrix = np.concatenate( [para_matrix, crawl_matrix, w2v_matrix, char_matrix], axis=1) seed_everything(42) model = NeuralNet(embedding_matrix, output_aux_sub=subgroup_target.shape[1]) learn = Learner(databunch, model, loss_func=custom_loss) cb = OneCycleScheduler(learn, lr_max=0.001) learn.callbacks.append(cb) learn.fit(EPOCHS) save_nn_without_embedding_weights(learn.model, "./models/Notebook_100_1.bin") logging.info("training model 2: glove, crawl, w2v...") embedding_matrix = np.concatenate( [glove_matrix, crawl_matrix, w2v_matrix, char_matrix], axis=1) seed_everything(43) model = NeuralNet(embedding_matrix, output_aux_sub=subgroup_target.shape[1]) learn = Learner(databunch, model, loss_func=custom_loss) cb = OneCycleScheduler(learn, lr_max=0.001) learn.callbacks.append(cb)
cv_train_dataset = data.TensorDataset(x_train_torch, y_train_torch) cv_fake_val_dataset = data.TensorDataset(x_train_torch[:1000], y_train_torch[:1000]) cv_val_dataset = data.TensorDataset(x_val_torch, y_val_torch) cv_train_loader = data.DataLoader(cv_train_dataset, batch_size=512, shuffle=True) cv_fake_val_loader = data.DataLoader(cv_fake_val_dataset, batch_size=512, shuffle=False) cv_databunch = DataBunch(train_dl=cv_train_loader, valid_dl=cv_fake_val_loader) cv_model = MLP(y_aux_train.shape[-1]) cv_learn = Learner(cv_databunch, cv_model, loss_func=custom_loss) cv_predictions, _, _, _, _ = train_model_per_epoch( cv_learn, cv_val_dataset, output_dim=7, model_idx=0, lr=0.001, lr_decay=1, n_epochs=10, save_models='last', model_name='mlp_stacking') # cv_test_predictions = cv_model.predict(x_test) y_train_predictions[val_index] = cv_predictions # test_predictions += cv_test_predictions / float(N_SPLITS)
'.', bert_train.iloc[trn_idx, :].sample(frac=1, random_state=SEED + CUR_STEP), bert_train.iloc[val_idx, :], bert_test, tokenizer=fastai_tokenizer, vocab=fastai_bert_vocab, include_bos=False, include_eos=False, text_cols='comment_text', label_cols=label_cols, bs=BATCH_SIZE, collate_fn=partial(pad_collate, pad_first=False, pad_idx=0), ) learner = Learner(databunch, bert_model, loss_func=bert_custom_loss) if CUR_STEP != 1: learner.load('/kaggle/input/freeze-bert-1-s-uc-260ml-3e-8f-s-' + str(CUR_STEP - 1) + '-f-' + str(MAKE_FOLD) + '/models/' + FILE_NAME) learner.fit_one_cycle(N_EPOCH, max_lr=MAX_LR) oof[val_idx] = get_preds_as_nparray(DatasetType.Valid).astype( np.float32) predictions += get_preds_as_nparray(DatasetType.Test).astype( np.float32) / NFOLDS validate_df(train.iloc[val_idx], oof[val_idx, 0], verbose=True) learner.save(FILE_NAME)
def build_learner(params, project_dir, pindex=0, comm_file=None, queues=None): """ Builds a fastai `Learner` object containing the model and data specified by `params`. It is configured to run on GPU `device_id`. Assumes it is GPU `pindex` of `world_size` total GPUs. In case more than one GPU is being used, a file named `comm_file` is used to communicate between processes. """ # For user friendly error messages, check these parameters exist. check_params(params, [ 'cpu', 'data.batch_size', 'data.dir', 'data.epoch_size', 'data.max_length', 'data.max_val_size', 'data.src', 'data.tgt', 'data.vocab', 'decoder.embedding_dim', 'decoder.embedding_dropout', 'decoder.prediction_dropout', 'encoder.embedding_dim', 'encoder.embedding_dropout', 'network.bias', 'network.block_sizes', 'network.division_factor', 'network.dropout', 'network.efficient', 'network.growth_rate', 'network.kernel_size', ]) model_name = params['model_name'] # Try to make the directory for saving models. model_dir = os.path.join(project_dir, 'model', model_name) os.makedirs(model_dir, exist_ok=True) # Configure GPU/CPU device settings. cpu = params['cpu'] gpu_ids = params['gpu_ids'] if not cpu else [] world_size = len(gpu_ids) if len(gpu_ids) > 0 else 1 distributed = world_size > 1 if gpu_ids: device_id = gpu_ids[pindex] device = torch.device(device_id) torch.cuda.set_device(device_id) else: device_id = None device = torch.device('cpu') # If distributed, initialize inter-process communication using shared file. if distributed: torch.distributed.init_process_group(backend='nccl', world_size=world_size, rank=pindex, init_method=f'file://{comm_file}') # Load vocabulary. vocab_path = os.path.join(params['data']['dir'], params['data']['vocab']) vocab = VocabData(vocab_path) # Load data. src_l = params['data']['src'] tgt_l = params['data']['tgt'] loader = PervasiveDataLoader(os.path.join(params['data']['dir'], f'{src_l}.h5'), os.path.join(params['data']['dir'], f'{tgt_l}.h5'), vocab, vocab, params['data']['batch_size'] // world_size, params['data']['max_length'], epoch_size=params['data']['epoch_size'], max_val_size=params['data']['max_val_size'], distributed=distributed, world_size=world_size, pindex=pindex) # Define neural network. # Max length is 1 more than setting to account for BOS. if params['network']['type'] == 'pervasive-embeddings': model = PervasiveEmbedding( params['network']['block_sizes'], vocab.bos, loader.max_length, loader.max_length, loader.datasets['train'].arrays[0].shape[2], params['encoder']['embedding_dim'], params['encoder']['embedding_dropout'], params['network']['dropout'], params['decoder']['prediction_dropout'], params['network']['division_factor'], params['network']['growth_rate'], params['network']['bias'], params['network']['efficient']) # Rescale loss by 100 for easier display in training output. loss_func = scaled_mse_loss elif params['network']['type'] == 'pervasive-downsample': model = PervasiveDownsample( params['network']['block_sizes'], vocab.bos, loader.max_length, loader.max_length, params['encoder']['embedding_dim'], params['encoder']['embedding_dropout'], params['network']['dropout'], params['decoder']['prediction_dropout'], params['network']['division_factor'], params['network']['growth_rate'], params['network']['bias'], params['network']['efficient'], params['network']['kernel_size']) # Rescale loss by 100 for easier display in training output. loss_func = F.cross_entropy elif params['network']['type'] == 'pervasive-bert': model = PervasiveBert( params['network']['block_sizes'], vocab.bos, loader.max_length, loader.max_length, params['encoder']['embedding_dim'], params['encoder']['embedding_dropout'], params['network']['dropout'], params['decoder']['prediction_dropout'], params['network']['division_factor'], params['network']['growth_rate'], params['network']['bias'], params['network']['efficient'], params['network']['kernel_size']) loss_func = F.cross_entropy elif params['network']['type'] == 'pervasive-original': model = PervasiveOriginal( params['network']['block_sizes'], len(vocab), vocab.bos, loader.max_length, loader.max_length, params['encoder']['embedding_dim'], params['encoder']['embedding_dropout'], params['network']['dropout'], params['decoder']['prediction_dropout'], params['network']['division_factor'], params['network']['growth_rate'], params['network']['bias'], params['network']['efficient'], params['network']['kernel_size']) loss_func = F.cross_entropy elif params['network']['type'] == 'pervasive': model = Pervasive( params['network']['block_sizes'], len(vocab), vocab.bos, loader.max_length, loader.max_length, params['encoder']['initial_emb_dim'], params['encoder']['embedding_dim'], params['encoder']['embedding_dropout'], params['network']['dropout'], params['decoder']['prediction_dropout'], params['network']['division_factor'], params['network']['growth_rate'], params['network']['bias'], params['network']['efficient'], params['network']['kernel_size']) loss_func = F.cross_entropy model.init_weights() if device_id is not None: if not torch.cuda.is_available(): raise ValueError( 'Request to train on GPU {device_id}, but not GPU found.') model.cuda(device_id) if distributed: model = DistributedDataParallel(model, device_ids=[device_id]) data = DataBunch(loader.loaders['train'], loader.loaders['valid'], loader.loaders['valid'], device=device) # Create Learner with Adam optimizer. learn = Learner(data, model, loss_func=loss_func, model_dir=model_dir) AdamP = partial(torch.optim.Adam, betas=(params['optim']['beta1'], params['optim']['beta2'])) learn.opt_func = AdamP learn.wd = params['optim']['wd'] return (learn, loader.loaders['train'].src_vocab, loader.loaders['train'].tgt_vocab)
def train(train_dataset: torch.utils.data.Dataset, test_dataset: torch.utils.data.Dataset, training_config: dict = train_config, global_config: dict = global_config): """ Template training routine. Takes a training and a test dataset wrapped as torch.utils.data.Dataset type and two corresponding generic configs for both gobal path settings and training settings. Returns the fitted fastai.train.Learner object which can be used to assess the resulting metrics and error curves etc. """ for path in global_config.values(): create_dirs(path) # wrap datasets with Dataloader classes train_loader = torch.utils.data.DataLoader( train_dataset, **train_config["DATA_LOADER_CONFIG"]) test_loader = torch.utils.data.DataLoader( test_dataset, **train_config["DATA_LOADER_CONFIG"]) databunch = DataBunch(train_loader, test_loader) # instantiate model and learner if training_config["WEIGHTS"] is None: model = training_config["MODEL"](**training_config["MODEL_CONFIG"]) else: model = load_model(training_config["MODEL"], training_config["MODEL_CONFIG"], training_config["WEIGHTS"], training_config["DEVICE"]) learner = Learner(databunch, model, metrics=train_config["METRICS"], path=global_config["ROOT_PATH"], model_dir=global_config["WEIGHT_DIR"], loss_func=train_config["LOSS"]) # model name & paths name = "_".join([train_config["DATE"], train_config["SESSION_NAME"]]) modelpath = os.path.join(global_config["WEIGHT_DIR"], name) if train_config["MIXED_PRECISION"]: learner.to_fp16() learner.save(modelpath) torch.backends.cudnn.benchmark = True cbs = [ SaveModelCallback(learner), LearnerTensorboardWriter( learner, Path(os.path.join(global_config["LOG_DIR"]), "tensorboardx"), name), TerminateOnNaNCallback() ] # perform training iteration try: if train_config["ONE_CYCLE"]: learner.fit_one_cycle(train_config["EPOCHS"], max_lr=train_config["LR"], callbacks=cbs) else: learner.fit(train_config["EPOCHS"], lr=train_config["LR"], callbacks=cbs) # save model files except KeyboardInterrupt: learner.save(modelpath) raise KeyboardInterrupt learner.save(modelpath) val_loss = min(learner.recorder.val_losses) val_metrics = learner.recorder.metrics # log using the logging tool logger = log.Log(train_config, run_name=train_config['SESSION_NAME']) logger.log_metric('Validation Loss', val_loss) logger.log.metrics(val_metrics) logger.end_run() #write csv log file log_content = train_config.copy() log_content["VAL_LOSS"] = val_loss log_content["VAL_METRICS"] = val_metrics log_path = os.path.join(global_config["LOG_DIR"], train_config["LOGFILE"]) write_log(log_path, log_content) return learner, log_content, name
import config from dataset import Dataset from model import WideResNet22 from fastai.train import Learner from fastai.metrics import accuracy from torch.nn import functional as f from fastai.basic_data import DataBunch cifar10 = Dataset() # cifar10.download_dataset() train_dataloader, valid_dataloader = cifar10.get_dataloader() model = WideResNet22(3, 10) data = DataBunch(train_dataloader, valid_dataloader) learner = Learner(data, model, loss_func=f.cross_entropy, metrics=[accuracy]) learner.clip = 0.1 learner.fit_one_cycle(config.EPOCHS, config.LEARNING_RATE, wd=1e-4)
for images, labels in train_dl: print('images.shape:', images.shape) out = model(images) print('out.shape:', out.shape) break # now we will use the library FastAi to help us out(I still need to download this module) from fastai.basic_data import DataBunch from fastai.train import Learner from fastai.metrics import accuracy data = DataBunch.create(train_ds, valid_ds, bs=batch_size, path='./data/cifar10') learner = Learner(data, model, loss_func=F.cross_entropy, metrics=[accuracy]) learner.clip = 0.1 # this starts with a low lr then adjusts it and tracks the loss learner.lr_find() # plot the marked lr that gives the fastest reduction in loss learner.recorder.plot() learner.fit_one_cycle(9, 5e-3, wd=1e-4) # epochs, lr, weight decay # plot all the weights, losses and accuracy of the model
shuffle=False) databunch = DataBunch(train_dl=train_loader, valid_dl=valid_loader) def custom_loss(data, targets): ''' Define custom loss function for weighted BCE on 'target' column ''' bce_loss_1 = nn.BCEWithLogitsLoss(weight=targets[:, 1:2])(data[:, :1], targets[:, :1]) bce_loss_2 = nn.BCEWithLogitsLoss()(data[:, 1:], targets[:, 2:]) return (bce_loss_1 * loss_weight) + bce_loss_2 all_test_preds = [] for model_idx in range(NUM_MODELS): print('Model ', model_idx) seed_everything(1234 + model_idx) model = NeuralNet(embedding_matrix, out_shape) learn = Learner(databunch, model, loss_func=custom_loss) test_preds = train_model(learn, test_dataset, output_dim=3) all_test_preds.append(test_preds) submission = pd.DataFrame.from_dict({ 'id': testid, 'prediction': np.mean(all_test_preds, axis=0)[:, 0] }) submission.to_csv('submission.csv', index=False)
DENSE_HIDDEN_UNITS = 4 * LSTM_UNITS embed_size = 50 # Note: these parameters can be tunes for improving model performance # targets num_targets = 12 # init LSTM model LUT = ModelEmbedding(max_features, embed_size, dropout = 0.3) NET = NeuralNet(embed_size, LSTM_UNITS, DENSE_HIDDEN_UNITS, num_targets) model = LSTM_model(LUT, NET) # Fast ai learner learn = Learner(databunch, model, loss_func=loss_function) # train model LSTM_valid_raw_preds = train_model(learn,output_dim=num_targets, lr = 1.0e-3) # test set prediction LSTM_pred_raw = torch.zeros(len(X_test), num_targets) test_preds = np.zeros((len(X_test))) learn.model.eval() for i, x_batch in enumerate(test_loader): X = x_batch[0].cuda()