Python fetch_file_listの例、src.train.utils.fetch_file_list Pythonの例

コード例 #1

0

ファイルを表示

ファイル: ar_dl_model_train.py プロジェクト: lucindaStrava/ml-local

def model_train_small(model_type='fc'):
    """
    Use for fast iteration
    :param model_type:
    :return:
    """
    start = timer()
    if model_type == 'fc':
        model_pair = build_fc_network()
    elif model_type == 'cnn':
        model_pair = build_cnn_network()
    else:
        print("Type not supported")
        return
    end = timer()
    print("{} min taken for generating networks".format((end - start) / 60.0))

    start = timer()
    train_file_list = fetch_file_list(data_dir=TRAINING_DATA_DIR, portion=1.1)
    tg = build_numpy(file_list=train_file_list, num_samples=None, xcolumns=X_COLUMNS,
                     ycolumns=Y_COLUMNS, ytx=None, is_csv=IS_CSV)
    val_file_list = fetch_file_list(data_dir=VALIDATION_DATA_DIR, portion=1.1)
    vg = build_numpy(file_list=val_file_list, num_samples=None, xcolumns=X_COLUMNS,
                     ycolumns=Y_COLUMNS, ytx=None, is_csv=IS_CSV)
    end = timer()
    print("{} min taken for generating input".format((end - start) / 60.0))

    count = np.sum(tg[1], axis=0)
    print("Number of Positive Training Windows: {}".format(count[0]))
    print("Number of Negative Training Windows: {}".format(len(tg[1]) - count[0]))

    key = model_pair[0] + '_small'
    model = model_pair[1]
    print(key)
    print(model.summary())

    file_path = MODEL_CHECKPOINT + key + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    history = History()
    log_dir = LOG_DIR + key + '/' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    tensor_board = TensorBoard(log_dir, histogram_freq=5,
                                       write_grads=False, write_graph=False)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5,
                                  patience=4, min_lr=1e-5)
    checkpoint = ModelCheckpoint(file_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min', period=5)

    callbacks_list = [history, tensor_board, reduce_lr, checkpoint]

    adam_wn = Adam(lr=LR)
    model.compile(loss="sparse_categorical_crossentropy", optimizer=adam_wn, metrics=['accuracy'])
    model.fit(x=tg[0], y=tg[1], validation_data=vg, batch_size=BATCH_SIZE,
              epochs=EPOCHS, verbose=1, shuffle=False,
              callbacks=callbacks_list, class_weight={0: 0.1, 1: 1.0})
    model.save(file_path)

    return

コード例 #2

0

ファイルを表示

ファイル: ar_dl_model_train.py プロジェクト: lucindaStrava/ml-local

def model_train(model_type='fc'):
    """
    Use for fast iteration
    :param model_type:
    :return:
    """
    start = timer()
    if model_type == 'fc':
        model_pair = build_fc_network()
    elif model_type == 'cnn':
        model_pair = build_cnn_network()
    else:
        print("Type not supported")
        return
    end = timer()
    print("{} min taken for generating networks".format((end - start) / 60.0))

    start = timer()
    train_file_list = fetch_file_list(data_dir=TRAINING_DATA_DIR, portion=TRAIN_PORTION)
    tg = build_generator(file_list=train_file_list, xcolumns=X_COLUMNS, ycolumns=Y_COLUMNS, ytx=None, is_csv=IS_CSV)
    v_file_list = fetch_file_list(data_dir=VALIDATION_DATA_DIR, portion=VALIDATION_PORTION)
    vg = build_generator(file_list=v_file_list, xcolumns=X_COLUMNS, ycolumns=Y_COLUMNS, ytx=None, is_csv=IS_CSV)
    end = timer()
    print("{} min taken for generating input".format((end - start) / 60.0))

    key = model_pair[0]
    model = model_pair[1]
    print(key)
    print(model.summary())

    history = History()
    log_dir = LOG_DIR + key + '/' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    file_path = MODEL_CHECKPOINT + key + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    tensor_board = TensorBoard(log_dir, histogram_freq=5,
                                    write_grads=False, write_graph=False, profile_batch=0)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5,
                                patience=4, min_lr=1e-5)
    checkpoint = ModelCheckpoint(file_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min', period=5)

    callbacks_list = [history, tensor_board, reduce_lr, checkpoint]

    adam_wn = Adam(lr=LR)
    model.compile(loss="sparse_categorical_crossentropy", optimizer=adam_wn, metrics=['accuracy'])

    model.fit_generator(generator=tg, steps_per_epoch=int(NUM_TRAIN_SAMPLES/BATCH_SIZE),
                        epochs=EPOCHS, verbose=1, shuffle=True,
                        use_multiprocessing=True, workers=10,
                        callbacks=callbacks_list,
                        max_queue_size=len(train_file_list),
                        validation_data=vg, validation_steps=int(NUM_VALI_SAMPLES/BATCH_SIZE),
                        class_weight={0: 0.1, 1: 1.0})

    model.save(file_path)
    return

コード例 #3

0

ファイルを表示

ファイル: ar_dl_model_train.py プロジェクト: lucindaStrava/ml-local

def model_train_resume(model_path):
    """
    Resume training a saved model
    :param model_path:
    :return:
    """
    # load model
    model = load_model(model_path)

    start = timer()
    train_file_list = fetch_file_list(data_dir=TRAINING_DATA_DIR, portion=TRAIN_PORTION)
    tg = build_generator(file_list=train_file_list, xcolumns=X_COLUMNS, ycolumns=Y_COLUMNS, ytx=None, is_csv=IS_CSV)
    v_file_list = fetch_file_list(data_dir=VALIDATION_DATA_DIR, portion=VALIDATION_PORTION)
    vg = build_generator(file_list=v_file_list, xcolumns=X_COLUMNS, ycolumns=Y_COLUMNS, ytx=None, is_csv=IS_CSV)

    end = timer()
    print("{} min taken for generating input".format((end - start) / 60.0))
    print(model_path)
    print(model.summary())

    new_path = model_path + '_resume_{}'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
    checkpoint = ModelCheckpoint(new_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min', period=5)
    history = History()
    log_dir = LOG_DIR + model_path.split('/')[-1] \
              + '_resume_{}'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
    tensor_board = TensorBoard(log_dir, histogram_freq=1)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5,
                                  patience=3, min_lr=1e-5)
    callbacks_list = [history, tensor_board, checkpoint, reduce_lr]

    # hack!!!
    print(K.eval(model.optimizer.lr))
    adam_wn = Adam(lr=LR)
    model.compile(loss="sparse_categorical_crossentropy", optimizer=adam_wn, metrics=['accuracy'])
    print(K.eval(model.optimizer.lr))

    model.fit_generator(generator=tg, steps_per_epoch=int(NUM_TRAIN_SAMPLES/BATCH_SIZE),
                        epochs=EPOCHS, verbose=1, shuffle=True,
                        use_multiprocessing=True, workers=10,
                        callbacks=callbacks_list,
                        max_queue_size=len(train_file_list),
                        validation_data=vg, validation_steps=int(NUM_VALI_SAMPLES/BATCH_SIZE),
                        class_weight={0: 0.1, 1: 1.0})

    model.save(new_path)
    return

コード例 #4

0

ファイルを表示

ファイル: ar_xgboost_train.py プロジェクト: lucindaStrava/ml-local

def xgboost_train():
    train_file_list = fetch_file_list(data_dir=TRAINING_DATA_DIR, portion=1)
    tg = build_numpy(file_list=train_file_list,
                     num_samples=None,
                     xcolumns=X_COLUMNS,
                     ycolumns=Y_COLUMNS,
                     ytx=None,
                     skip_header=1,
                     shuffle=False,
                     is_csv=IS_CSV)
    val_file_list = fetch_file_list(data_dir=VALIDATION_DATA_DIR, portion=1)
    vg = build_numpy(file_list=val_file_list,
                     num_samples=None,
                     xcolumns=X_COLUMNS,
                     ycolumns=Y_COLUMNS,
                     ytx=None,
                     skip_header=1,
                     shuffle=False,
                     is_csv=IS_CSV)

    x_train = copy.deepcopy(tg[0])
    y_train = copy.deepcopy(tg[1].reshape(-1))
    x_val = copy.deepcopy(vg[0])
    y_val = copy.deepcopy(vg[1].reshape(-1))
    del tg
    del vg

    count = np.sum(y_train)
    print("Number of Positive Training Windows: {}".format(count))
    print(
        "Number of Negative Training Windows: {}".format(len(y_train) - count))

    eval_set = [(x_train, y_train), (x_val, y_val)]
    my_model = XGBClassifier(base_score=0.5,
                             booster='gbtree',
                             colsample_by_level=1,
                             colsample_bynode=1,
                             colsample_bytree=0.8,
                             eta=0.03,
                             gamma=0.1,
                             learning_rate=0.1,
                             ax_delta_step=0,
                             max_depth=6,
                             min_child_weight=3,
                             missing=None,
                             n_estimators=600,
                             n_jobs=1,
                             nthread=None,
                             objective='binary:logistic',
                             random_state=0,
                             reg_alpha=0,
                             reg_lambda=1,
                             scale_pos_weight=XGBOOST_POSITIVE_WEIGHT,
                             seed=1234,
                             subsample=0.8,
                             verbosity=2,
                             tree_method='hist')
    my_model.get_xgb_params()
    # logloss here equivalent to CategoricalCrossEntropy in tensorflow
    trained = my_model.fit(x_train,
                           y_train,
                           early_stopping_rounds=15,
                           eval_metric=["logloss", "error"],
                           eval_set=eval_set,
                           verbose=True)

    key = "xgboost-withClassWeight"
    file_path = MODEL_CHECKPOINT + key + datetime.datetime.now().strftime(
        "%Y%m%d-%H%M%S")
    trained.save_model(file_path)
    return trained