def model_train_small(model_type='fc'): """ Use for fast iteration :param model_type: :return: """ start = timer() if model_type == 'fc': model_pair = build_fc_network() elif model_type == 'cnn': model_pair = build_cnn_network() else: print("Type not supported") return end = timer() print("{} min taken for generating networks".format((end - start) / 60.0)) start = timer() train_file_list = fetch_file_list(data_dir=TRAINING_DATA_DIR, portion=1.1) tg = build_numpy(file_list=train_file_list, num_samples=None, xcolumns=X_COLUMNS, ycolumns=Y_COLUMNS, ytx=None, is_csv=IS_CSV) val_file_list = fetch_file_list(data_dir=VALIDATION_DATA_DIR, portion=1.1) vg = build_numpy(file_list=val_file_list, num_samples=None, xcolumns=X_COLUMNS, ycolumns=Y_COLUMNS, ytx=None, is_csv=IS_CSV) end = timer() print("{} min taken for generating input".format((end - start) / 60.0)) count = np.sum(tg[1], axis=0) print("Number of Positive Training Windows: {}".format(count[0])) print("Number of Negative Training Windows: {}".format(len(tg[1]) - count[0])) key = model_pair[0] + '_small' model = model_pair[1] print(key) print(model.summary()) file_path = MODEL_CHECKPOINT + key + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") history = History() log_dir = LOG_DIR + key + '/' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") tensor_board = TensorBoard(log_dir, histogram_freq=5, write_grads=False, write_graph=False) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=4, min_lr=1e-5) checkpoint = ModelCheckpoint(file_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min', period=5) callbacks_list = [history, tensor_board, reduce_lr, checkpoint] adam_wn = Adam(lr=LR) model.compile(loss="sparse_categorical_crossentropy", optimizer=adam_wn, metrics=['accuracy']) model.fit(x=tg[0], y=tg[1], validation_data=vg, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=1, shuffle=False, callbacks=callbacks_list, class_weight={0: 0.1, 1: 1.0}) model.save(file_path) return
def model_train(model_type='fc'): """ Use for fast iteration :param model_type: :return: """ start = timer() if model_type == 'fc': model_pair = build_fc_network() elif model_type == 'cnn': model_pair = build_cnn_network() else: print("Type not supported") return end = timer() print("{} min taken for generating networks".format((end - start) / 60.0)) start = timer() train_file_list = fetch_file_list(data_dir=TRAINING_DATA_DIR, portion=TRAIN_PORTION) tg = build_generator(file_list=train_file_list, xcolumns=X_COLUMNS, ycolumns=Y_COLUMNS, ytx=None, is_csv=IS_CSV) v_file_list = fetch_file_list(data_dir=VALIDATION_DATA_DIR, portion=VALIDATION_PORTION) vg = build_generator(file_list=v_file_list, xcolumns=X_COLUMNS, ycolumns=Y_COLUMNS, ytx=None, is_csv=IS_CSV) end = timer() print("{} min taken for generating input".format((end - start) / 60.0)) key = model_pair[0] model = model_pair[1] print(key) print(model.summary()) history = History() log_dir = LOG_DIR + key + '/' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") file_path = MODEL_CHECKPOINT + key + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") tensor_board = TensorBoard(log_dir, histogram_freq=5, write_grads=False, write_graph=False, profile_batch=0) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=4, min_lr=1e-5) checkpoint = ModelCheckpoint(file_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min', period=5) callbacks_list = [history, tensor_board, reduce_lr, checkpoint] adam_wn = Adam(lr=LR) model.compile(loss="sparse_categorical_crossentropy", optimizer=adam_wn, metrics=['accuracy']) model.fit_generator(generator=tg, steps_per_epoch=int(NUM_TRAIN_SAMPLES/BATCH_SIZE), epochs=EPOCHS, verbose=1, shuffle=True, use_multiprocessing=True, workers=10, callbacks=callbacks_list, max_queue_size=len(train_file_list), validation_data=vg, validation_steps=int(NUM_VALI_SAMPLES/BATCH_SIZE), class_weight={0: 0.1, 1: 1.0}) model.save(file_path) return
def model_train_resume(model_path): """ Resume training a saved model :param model_path: :return: """ # load model model = load_model(model_path) start = timer() train_file_list = fetch_file_list(data_dir=TRAINING_DATA_DIR, portion=TRAIN_PORTION) tg = build_generator(file_list=train_file_list, xcolumns=X_COLUMNS, ycolumns=Y_COLUMNS, ytx=None, is_csv=IS_CSV) v_file_list = fetch_file_list(data_dir=VALIDATION_DATA_DIR, portion=VALIDATION_PORTION) vg = build_generator(file_list=v_file_list, xcolumns=X_COLUMNS, ycolumns=Y_COLUMNS, ytx=None, is_csv=IS_CSV) end = timer() print("{} min taken for generating input".format((end - start) / 60.0)) print(model_path) print(model.summary()) new_path = model_path + '_resume_{}'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) checkpoint = ModelCheckpoint(new_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min', period=5) history = History() log_dir = LOG_DIR + model_path.split('/')[-1] \ + '_resume_{}'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) tensor_board = TensorBoard(log_dir, histogram_freq=1) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-5) callbacks_list = [history, tensor_board, checkpoint, reduce_lr] # hack!!! print(K.eval(model.optimizer.lr)) adam_wn = Adam(lr=LR) model.compile(loss="sparse_categorical_crossentropy", optimizer=adam_wn, metrics=['accuracy']) print(K.eval(model.optimizer.lr)) model.fit_generator(generator=tg, steps_per_epoch=int(NUM_TRAIN_SAMPLES/BATCH_SIZE), epochs=EPOCHS, verbose=1, shuffle=True, use_multiprocessing=True, workers=10, callbacks=callbacks_list, max_queue_size=len(train_file_list), validation_data=vg, validation_steps=int(NUM_VALI_SAMPLES/BATCH_SIZE), class_weight={0: 0.1, 1: 1.0}) model.save(new_path) return
def xgboost_train(): train_file_list = fetch_file_list(data_dir=TRAINING_DATA_DIR, portion=1) tg = build_numpy(file_list=train_file_list, num_samples=None, xcolumns=X_COLUMNS, ycolumns=Y_COLUMNS, ytx=None, skip_header=1, shuffle=False, is_csv=IS_CSV) val_file_list = fetch_file_list(data_dir=VALIDATION_DATA_DIR, portion=1) vg = build_numpy(file_list=val_file_list, num_samples=None, xcolumns=X_COLUMNS, ycolumns=Y_COLUMNS, ytx=None, skip_header=1, shuffle=False, is_csv=IS_CSV) x_train = copy.deepcopy(tg[0]) y_train = copy.deepcopy(tg[1].reshape(-1)) x_val = copy.deepcopy(vg[0]) y_val = copy.deepcopy(vg[1].reshape(-1)) del tg del vg count = np.sum(y_train) print("Number of Positive Training Windows: {}".format(count)) print( "Number of Negative Training Windows: {}".format(len(y_train) - count)) eval_set = [(x_train, y_train), (x_val, y_val)] my_model = XGBClassifier(base_score=0.5, booster='gbtree', colsample_by_level=1, colsample_bynode=1, colsample_bytree=0.8, eta=0.03, gamma=0.1, learning_rate=0.1, ax_delta_step=0, max_depth=6, min_child_weight=3, missing=None, n_estimators=600, n_jobs=1, nthread=None, objective='binary:logistic', random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=XGBOOST_POSITIVE_WEIGHT, seed=1234, subsample=0.8, verbosity=2, tree_method='hist') my_model.get_xgb_params() # logloss here equivalent to CategoricalCrossEntropy in tensorflow trained = my_model.fit(x_train, y_train, early_stopping_rounds=15, eval_metric=["logloss", "error"], eval_set=eval_set, verbose=True) key = "xgboost-withClassWeight" file_path = MODEL_CHECKPOINT + key + datetime.datetime.now().strftime( "%Y%m%d-%H%M%S") trained.save_model(file_path) return trained