Exemple #1
0
def train_eval():
    """ train and eval the model
    """

    global trainloader
    global testloader
    global net

    (x_train, y_train) = trainloader
    (x_test, y_test) = testloader

    # train procedure
    net.fit(
        x=x_train,
        y=y_train,
        batch_size=args.batch_size,
        validation_data=(x_test, y_test),
        epochs=args.epochs,
        shuffle=True,
        callbacks=[
            SendMetrics(),
            EarlyStopping(min_delta=0.001, patience=10),
            TensorBoard(log_dir=TENSORBOARD_DIR),
        ],
    )

    # trial report final acc to tuner
    _, acc = net.evaluate(x_test, y_test)
    logger.debug("Final result is: %d", acc)
    nni.report_final_result(acc)
Exemple #2
0
def train(args, params):
    '''
    Train model
    '''
    model = build_model(params)
    X_train, Y1_train, Y2_train, X_test, Y1_test, Y2_test = load_dataset(
        args.dataset_name, args.num_train)

    print('Fitting model...')
    results = model.fit(
        X_train, [Y1_train, Y2_train],
        epochs=args.epochs,
        verbose=1,
        validation_data=(X_test, [Y1_test, Y2_test]),
        callbacks=[SendMetrics(),
                   TensorBoard(log_dir=TENSORBOARD_DIR)])

    _, _, _, cat_acc, subcat_acc = model.evaluate(X_test, [Y1_test, Y2_test],
                                                  verbose=0)
    LOG.debug('Final result is: %d', subcat_acc)
    nni.report_final_result(subcat_acc)
    print('Final result is: %d', subcat_acc)

    model_id = nni.get_sequence_id()
    # serialize model to JSON
    model_json = model.to_json()
    with open("model-{}.json".format(model_id), "w") as json_file:
        json_file.write(model_json)
    # serialize weights to HDF5
    model.save_weights("model-{}.h5".format(model_id))
    print("Saved model to disk")
Exemple #3
0
def run(kf, data, model, label='flag_y'):
    """
    Train model, predict on test set and get model performance.

    :param kf:
    :param data:
    :param model:
    :param label:
    :return:
    """
    defaults, gini_trains, gini_tests, auc_trains, auc_tests, ks_trains, ks_tests, psis, models = [], [], [], [], [], \
                                                                                                  [], [], [], []
    # 交叉验证
    kf_list = list(kf.split(data))
    for i, index in enumerate(kf_list):
        # 训练
        X = data.drop(columns=label)
        y = data[label]
        train_index, test_index = index[0], index[1]
        X_train, y_train = X.iloc[train_index], y.iloc[train_index]
        X_test, y_test = X.iloc[test_index], y.iloc[test_index]
        model.fit(X_train, y_train)
        y_pred = model.predict_proba(X_train)[:, 1]
        y_test_pred = model.predict_proba(X_test)[:, 1]
        # 计算指标
        ks_train, ks_test = calc_ks(y_pred, y_train), calc_ks(y_test_pred, y_test)
        auc_train, auc_test = calc_auc(y_pred, y_train), calc_auc(y_test_pred, y_test)
        gini_train, gini_test = calc_gini(y_pred, y_train), calc_gini(y_test_pred, y_test)
        psi = calc_psi(y_pred, y_test_pred)
        default = 1.8 * ks_test - 0.8 * abs(ks_train - ks_test)
        defaults.append(default)
        gini_trains.append(gini_train)
        gini_tests.append(gini_test)
        auc_trains.append(auc_train)
        auc_tests.append(auc_test)
        ks_trains.append(ks_train)
        ks_tests.append(ks_test)
        psis.append(psi)
        models.append(model)
    # 整合结果
    metrics = {
        'gini_train': float(np.mean(gini_trains)),
        'gini_test': float(np.mean(gini_tests)),
        'auc_train': float(np.mean(auc_trains)),
        'auc_test': float(np.mean(auc_tests)),
        'ks_train': float(np.mean(ks_trains)),
        'ks_test': float(np.mean(ks_tests)),
        'psi': float(np.mean(psis)),
        'default': float(np.mean(defaults))
    }
    # 输出每套超参数最优模型
    best_model_idx = np.argmax(defaults)
    dump_pkl(models[best_model_idx])
    # 生成训练集测试集
    train = data.iloc[kf_list[best_model_idx][0]]
    write_data(train, 'train.fea')
    test = data.iloc[kf_list[best_model_idx][1]]
    write_data(test, 'test.fea')
    LOG.debug(metrics)
    nni.report_final_result(metrics)
Exemple #4
0
def run(X_train, X_test, y_train, y_test, model):
    '''Train model and predict result'''
    model.fit(X_train, y_train)
    testProba = model.predict_proba(X_test)
    score = roc_auc_score(y_test, testProba)
    LOG.debug('ROC-AUC score: %s' % score)
    nni.report_final_result(score)
def svd_training(params):
    """
    Train Surprise SVD using the given hyper-parameters
    """
    logger.debug("Start training...")
    train_data = pd.read_pickle(path=os.path.join(params['datastore'], params['train_datapath']))
    validation_data = pd.read_pickle(path=os.path.join(params['datastore'], params['validation_datapath']))

    svd_params = {p: params[p] for p in ['random_state', 'n_epochs', 'verbose', 'biased', 'n_factors', 'init_mean',
                                         'init_std_dev', 'lr_all', 'reg_all', 'lr_bu', 'lr_bi', 'lr_pu', 'lr_qi',
                                         'reg_bu', 'reg_bi', 'reg_pu', 'reg_qi']}
    svd = surprise.SVD(**svd_params)

    train_set = surprise.Dataset.load_from_df(train_data, reader=surprise.Reader(params['surprise_reader'])) \
        .build_full_trainset()
    svd.fit(train_set)

    logger.debug("Evaluating...")

    metrics_dict = {}
    rating_metrics = params['rating_metrics']
    if len(rating_metrics) > 0:
        predictions = compute_rating_predictions(svd, validation_data, usercol=params['usercol'],
                                                 itemcol=params['itemcol'])
        for metric in rating_metrics:
            result = getattr(evaluation, metric)(validation_data, predictions)
            logger.debug("%s = %g", metric, result)
            if metric == params['primary_metric']:
                metrics_dict['default'] = result
            else:
                metrics_dict[metric] = result

    ranking_metrics = params['ranking_metrics']
    if len(ranking_metrics) > 0:
        all_predictions = compute_ranking_predictions(svd, train_data, usercol=params['usercol'],
                                                      itemcol=params['itemcol'],
                                                      recommend_seen=params['recommend_seen'])
        k = params['k']
        for metric in ranking_metrics:
            result = getattr(evaluation, metric)(validation_data, all_predictions, col_prediction='prediction', k=k)
            logger.debug("%s@%d = %g", metric, k, result)
            if metric == params['primary_metric']:
                metrics_dict['default'] = result
            else:
                metrics_dict[metric] = result

    if len(ranking_metrics) == 0 and len(rating_metrics) == 0:
        raise ValueError("No metrics were specified.")

    # Report the metrics
    nni.report_final_result(metrics_dict)

    # Save the metrics in a JSON file
    output_dir = os.environ.get('NNI_OUTPUT_DIR')
    with open(os.path.join(output_dir, 'metrics.json'), 'w') as fp:
        temp_dict = metrics_dict.copy()
        temp_dict[params['primary_metric']] = temp_dict.pop('default')
        json.dump(temp_dict, fp)

    return svd
Exemple #6
0
def test(final=False):
    model.eval()
    z = model(data.x, data.edge_index)

    evaluator = MulticlassEvaluator()
    if args.dataset == 'WikiCS':
        accs = []
        for i in range(20):
            acc = log_regression(z,
                                 dataset,
                                 evaluator,
                                 split=f'wikics:{i}',
                                 num_epochs=800)['acc']
            accs.append(acc)
        acc = sum(accs) / len(accs)
    else:
        acc = log_regression(z,
                             dataset,
                             evaluator,
                             split='rand:0.1',
                             num_epochs=3000,
                             preload_split=split)['acc']

    if final and use_nni:
        nni.report_final_result(acc)
    elif use_nni:
        nni.report_intermediate_result(acc)

    return acc
def run(X_train, X_test, y_train, y_test, model):
    """
    Train model, predict on test set and get model performance.

    :param X_train: train data
    :param X_test:
    :param y_train: train label
    :param y_test: test label
    :param model: specific model
    :return: report final result to nni
    """
    # 训练
    model.fit(X_train, y_train)
    y_pred = model.predict_proba(X_train)[:, 1]
    y_test_pred = model.predict_proba(X_test)[:, 1]
    # 计算指标
    ks_train, ks_test = calc_ks(y_pred, y_train), calc_ks(y_test_pred, y_test)
    auc_train, auc_test = calc_auc(y_pred, y_train), calc_auc(y_test_pred, y_test)
    gini_train, gini_test = calc_gini(y_pred, y_train), calc_gini(y_test_pred, y_test)
    psi = calc_psi(y_pred, y_test_pred)
    # 整合结果
    metrics = {
        'gini_train': gini_train,
        'gini_test': gini_test,
        'auc_train': auc_train,
        'auc_test': auc_test,
        'ks_train': ks_train,
        'ks_test': ks_test,
        'psi': psi,
        'default': 1.8 * ks_test - 0.8 * abs(ks_train - ks_test)
    }
    dump_pkl(model)
    LOG.debug(metrics)
    nni.report_final_result(metrics)
Exemple #8
0
def main(params):
    """
    Main program:
      - Build network
      - Prepare dataset
      - Train the model
      - Report accuracy to tuner
    """
    model = MnistModel(conv_size=params['conv_size'],
                       hidden_size=params['hidden_size'],
                       dropout_rate=params['dropout_rate'])
    optimizer = Adam(learning_rate=params['learning_rate'])
    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    _logger.info('Model built')

    (x_train, y_train), (x_test, y_test) = load_dataset()
    _logger.info('Dataset loaded')

    model.fit(x_train,
              y_train,
              batch_size=params['batch_size'],
              epochs=10,
              verbose=0,
              callbacks=[ReportIntermediates()],
              validation_data=(x_test, y_test))
    _logger.info('Training completed')

    loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
    nni.report_final_result(
        accuracy)  # send final accuracy to NNI tuner and web UI
    _logger.info('Final accuracy reported: %s', accuracy)
Exemple #9
0
def train(params):
    '''
    Train model
    '''
    x_train, y_train, x_test, y_test = load_mnist_data()
    model = create_mnist_model(params)

    epochs = 10
    model.fit(x_train,
              y_train,
              batch_size=params['batch_size'],
              epochs=epochs,
              verbose=1,
              validation_data=(x_test, y_test),
              callbacks=[SendMetrics()])

    _, acc = model.evaluate(x_test, y_test, verbose=0)
    logger.debug('Final result is: %d', acc)
    nni.report_final_result(acc)

    model_id = nni.get_sequence_id()
    model_json = model.to_json()
    with open('./ckpt/model-{}.json'.format(model_id), 'w') as json_file:
        json_file.write(model_json)
    model.save_weights('./ckpt/model-{}.h5'.format(model_id))
def run(x_train_res, y_train_res, x_train, y_train, params, model_num):
    bagging_predict_result = []
    data_index = permutation(len(y_train))
    if model_num == 1:
        model = CatBoostClassifier(learning_rate=params["learning_rate"],
                                   l2_leaf_reg=params["l2_leaf_reg"],
                                   depth=params["depth"],
                                   iterations=params["iterations"],
                                   border_count=params["border_count"],
                                   verbose=False)
        model.fit(X=x_train_res.iloc[data_index],
                  y=y_train_res.iloc[data_index])
    elif model_num == 2:
        model = XGBClassifier(use_label_encoder=False,
                              n_estimators=params["n_estimators"],
                              learning_rate=params["learning_rate"],
                              min_child_weight=params["min_child_weight"],
                              reg_lambda=params["reg_lambda"],
                              gamma=params["gamma"],
                              depth=params["depth"],
                              verbosity=0)
        model.fit(x_train_res.iloc[data_index], y_train_res.iloc[data_index])
    else:
        model = LGBMClassifier(n_estimators=params['n_estimators'], learning_rate=params['learning_rate'],subsample=params['subsample'],\
                             min_child_weight=params['min_child_weight'], reg_lambda =params['reg_lambda'],  silent=True)
        model.fit(x_train_res.iloc[data_index], y_train_res.iloc[data_index])
    y_pred = model.predict_proba(x_train)
    bagging_predict_result.append(y_pred)
    lloss = log_loss(y_train, y_pred)
    print(
        f'log loss:{lloss}\naccuracy: {accuracy_score(y_train, model.predict(x_train))}'
    )
    nni.report_final_result(lloss)
def evaluate_mlp(agent, env, max_steps, use_nni=False, report_avg=None, eval_repeat=1):
    print("Evaluating agent over {} episodes".format(eval_repeat))
    evaluation_returns = []
    for _ in range(eval_repeat):
        state = env.reset()
        episode_reward = 0.
        for _ in range(max_steps):
            with torch.no_grad():
                action, _, _, _ = agent.act(state, True)
                next_state, reward, done, _ = env.step(action)

                state = next_state
                episode_reward += reward
            if done:  # currently all situations end with a done
                break

        evaluation_returns.append(episode_reward)
    eval_avg = sum(evaluation_returns) / len(evaluation_returns)
    print("Ave. evaluation return =", eval_avg)

    if use_nni:
        if eval_repeat == 1:
            nni.report_intermediate_result(eval_avg)
        elif eval_repeat > 1 and report_avg is not None:
            metric = (report_avg + eval_avg) / 2
            nni.report_final_result(metric)
    return eval_avg
def execute_runner(runners, is_nni=False):
    train_losses = []
    train_accuracies = []
    test_intermediate_results = []
    test_losses = []
    test_accuracies = []
    for idx_r, runner in enumerate(runners):
        rs = runner.run(verbose=2)
        train_losses.append(rs[0])
        train_accuracies.append(rs[1])
        test_intermediate_results.append(rs[2])
        test_losses.append(rs[3]["loss"])
        test_accuracies.append(rs[3]["acc"])
        '''if idx_r == 0:
            plot_graphs(rs)'''
    if is_nni:
        mean_intermediate_res = np.mean(test_intermediate_results, axis=0)
        for i in mean_intermediate_res:
            nni.report_intermediate_result(i)
        nni.report_final_result(np.mean(test_accuracies))

    runners[-1].logger.info("*" * 15 + "Final accuracy train: %3.4f" %
                            np.mean(train_accuracies))
    runners[-1].logger.info("*" * 15 + "Std accuracy train: %3.4f" %
                            np.std(train_accuracies))
    runners[-1].logger.info("*" * 15 + "Final accuracy test: %3.4f" %
                            np.mean(test_accuracies))
    runners[-1].logger.info("*" * 15 + "Std accuracy train: %3.4f" %
                            np.std(train_accuracies))
    runners[-1].logger.info("Finished")
    return
Exemple #13
0
def main():
    data_dir = '/tmp/tensorflow/mnist/input_data'
    mnist = input_data.read_data_sets(data_dir, one_hot=True)
    logger.debug('Mnist download data down.')
    mnist_network = MnistNetwork()
    mnist_network.build_network()
    logger.debug('Mnist build network done.')
    graph_location = tempfile.mkdtemp()
    logger.debug('Saving graph to: %s' % graph_location)
    train_writer = tf.summary.FileWriter(graph_location)
    train_writer.add_graph(tf.get_default_graph())
    test_acc = 0.0
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        batch_num = 200
        for i in range(batch_num):
            batch_size = nni.choice(50, 250, 500, name='batch_size')
            batch = mnist.train.next_batch(batch_size)
            dropout_rate = nni.choice(1, 5, name='dropout_rate')
            mnist_network.train_step.run(feed_dict={mnist_network.x: batch[
                0], mnist_network.y: batch[1], mnist_network.keep_prob:
                dropout_rate})
            if i % 100 == 0:
                test_acc = mnist_network.accuracy.eval(feed_dict={
                    mnist_network.x: mnist.test.images, mnist_network.y:
                    mnist.test.labels, mnist_network.keep_prob: 1.0})
                nni.report_intermediate_result(test_acc)
        test_acc = mnist_network.accuracy.eval(feed_dict={mnist_network.x:
            mnist.test.images, mnist_network.y: mnist.test.labels,
            mnist_network.keep_prob: 1.0})
        nni.report_final_result(test_acc)
Exemple #14
0
def execute_runner(runners, is_nni=False):
    res = [runner.run(verbose=2) for runner in runners]
    all_final_results = [r[1] for r in res]
    if is_nni:
        # NNI reporting. now reporting -losses, trying to maximize this. It can also be done for AUCs.
        final_loss = np.mean([all_final_results[it]["all_loss_test"] for it in range(len(all_final_results))])
        nni.report_final_result(np.exp(-final_loss))

        # Reporting results to loggers
        aggr_final_results = {}
        for new_name, old_name in zip(
                ["auc_train", "loss_train", "auc_eval", "loss_eval", "auc_test", "loss_test", "epochs_done"],
                ["auc_train", "all_loss_train", "auc_eval", "all_loss_eval", "auc_test", "all_loss_test",
                 "epochs_done"]):
            aggr_final_results[new_name] = [d[old_name] for d in all_final_results]
        runners[-1].logger.info("\nAggregated final results:")
        for name, vals in aggr_final_results.items():
            runners[-1].logger.info("*" * 15 + f"mean {name}: {np.mean(vals):.4f}")
            runners[-1].logger.info("*" * 15 + f"std {name}: {np.std(vals):.4f}")
            runners[-1].logger.info("Finished")

    # If the NNI doesn't run, only the mean results dictionary will be built. No special plots.
    all_results = {
        "all_final_output_labels_train": [d["training_output_labels"] for d in all_final_results],
        "all_final_output_labels_eval": [d["eval_output_labels"] for d in all_final_results],
        "all_final_output_labels_test": [d["test_output_labels"] for d in all_final_results],
        "final_auc_train": np.mean([d["auc_train"] for d in all_final_results]),
        "final_loss_train": np.mean([d["all_loss_train"] for d in all_final_results]),
        "final_auc_eval": np.mean([d["auc_eval"] for d in all_final_results]),
        "final_loss_eval": np.mean([d["all_loss_eval"] for d in all_final_results]),
        "final_auc_test": np.mean([d["auc_test"] for d in all_final_results]),
        "final_loss_test": np.mean([d["all_loss_test"] for d in all_final_results]),
        "average_epochs_done": np.mean([d["epochs_done"] for d in all_final_results])
    }
    return all_results
Exemple #15
0
def run_trial(parameters):
    print("Starting Trial")
    print(parameters)
    model = Model(parameters)
    model.load_data()
    acc = []
    for _ in range(parameters['trials']):
        # try:
        model.build_architecture()
        model.train()
        acc.append(model.test())
        # except:
        #     model.set_device("cpu")
        #     # model.build_topological_edges()
        #     model.build_architecture()
        #     model.train()
        #     acc.append(model.test())

    avg_acc = np.mean(acc)
    Logger.info("average test acc: {:.3f}% \n std is: {}".format(avg_acc * 100, np.std(acc) * 100))
    # output for nni - auto ml
    if parameters['is_nni']:
        avg_acc = max(avg_acc, MinValForNNI)
        nni.report_final_result(avg_acc)

    return
Exemple #16
0
def evaluate_model(model_cls):
    # "model_cls" is a class, need to instantiate
    model = model_cls()

    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    transf = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])
    train_loader = DataLoader(MNIST('data/mnist',
                                    download=True,
                                    transform=transf),
                              batch_size=64,
                              shuffle=True)
    test_loader = DataLoader(MNIST('data/mnist',
                                   download=True,
                                   train=False,
                                   transform=transf),
                             batch_size=64)

    for epoch in range(3):
        # train the model for one epoch
        train_epoch(model, device, train_loader, optimizer, epoch)
        # test the model for one epoch
        accuracy = test_epoch(model, device, test_loader)
        # call report intermediate result. Result can be float or dict
        nni.report_intermediate_result(accuracy)

    # report final test result
    nni.report_final_result(accuracy)
Exemple #17
0
    def fit(self, train_loader, validation_loader):
        for e in range(self.config.n_epochs):
            if self.config.verbose:
                lr = self.optimizer.param_groups[0]['lr']
                timestamp = datetime.datetime.now().utcnow().isoformat()
                self.log(f'\n{timestamp}\nLR: {lr}')

            t = time.time()
            summary_loss = self.train_one_epoch(train_loader)

            self.log(
                f'[RESULT]: Train. Epoch: {self.epoch}, summary_loss: {summary_loss.avg:.5f}, time: {(time.time() - t):.5f}')
            self.save(f'{self.base_dir}/last-checkpoint.bin')

            t = time.time()
            summary_loss = self.validation(validation_loader)

            self.log(
                f'[RESULT]: Val. Epoch: {self.epoch}, summary_loss: {summary_loss.avg:.5f}, time: {(time.time() - t):.5f}')
            nni.report_intermediate_result(summary_loss.avg)
            # logger.debug(summary_loss.avg)
            if summary_loss.avg < self.best_summary_loss:
                self.best_summary_loss = summary_loss.avg
                self.model.eval()
                self.save(f'{self.base_dir}/best-checkpoint-{str(self.epoch).zfill(3)}epoch.bin')
                # for path in sorted(glob(f'{self.base_dir}/best-checkpoint-*epoch.bin'))[:-3]:
                #     os.remove(path)

            if self.config.validation_scheduler:
                self.scheduler.step(metrics=summary_loss.avg)

            self.epoch += 1
        nni.report_final_result(summary_loss.avg)
Exemple #18
0
    def __call__(self):
        while self.cur_epoch < self.hp.max_epoch:
            try:
                if self.optimizer_config.lr_update:
                    utils.adjust_learning_rate(self.optimizer, self.cur_epoch,
                                               self.hp.max_epoch,
                                               self.optimizer_config.lr)
                else:
                    utils.set_learning_rate(self.optimizer, self.hp.lr, False)
                _ = self.train_epoch_dataset_first(self.cur_epoch, 'trn')
            except KeyboardInterrupt:
                self.handle_exception()
                print('Exit control menu.')
            # nni.report_intermediate_result(0.5)
            # if self.cur_epoch % self.config.val_interval == 0:
            try:
                ret_val, val_loss, val_eval = self.train_epoch_dataset_first(
                    self.cur_epoch, 'val')
                ret_tst, tst_loss, tst_eval = self.train_epoch_dataset_first(
                    self.cur_epoch, 'tst')
                if self.hp.evaluation == 'loss':
                    self.undec = self.recoder.push_loss(
                        self.cur_epoch, self.undec, val_loss, ret_tst)
                    if self.hp.nni:
                        nni.report_intermediate_result(tst_loss)
                elif self.hp.evaluation == 'acc':
                    self.undec = self.recoder.push_eval(
                        self.cur_epoch, self.undec, val_eval, ret_tst)
                    if self.hp.nni:
                        nni.report_intermediate_result(tst_eval)
                else:
                    raise ValueError('Unknown evaluation.')
                if self.undec == 0: self.save_checkpoint()
            except KeyboardInterrupt:
                print(f'Skipping val and test for ctrl + c detected.')

            self.cur_epoch += 1
            if self.undec >= self.hp.stop_val_dec:
                print(
                    'Val_loss hasn\'t decrease in the last [{}] epoches, stop training early.'
                    .format(self.hp.stop_val_dec))
                break

        if self.hp.evaluation == 'loss':
            fin_epoch, fin_loss = self.recoder.pop_via_loss()
            if self.hp.nni:
                nni.report_final_result(fin_loss)
            print(
                f'[{self.cur_epoch}] epoches complete, output results = {fin_loss} at epoch [{fin_epoch}], seed = {self.hp.seed}.'
            )
        elif self.hp.evaluation == 'acc':
            fin_epoch, fin_eval = self.recoder.pop_via_eval()
            if self.hp.nni:
                nni.report_final_result(fin_eval)
            print(
                f'[{self.cur_epoch}] epoches complete, output results = {fin_eval} at epoch [{fin_epoch}], seed = {self.hp.seed}.'
            )
        else:
            raise ValueError('Unknown evaluation.')
        self.evaluate()
def run(x_train, y_train, xtest, y_test, model):

    eval_set = [(x_test, y_test)]
    model.fit(x_train,
              y_train,
              early_stopping_rounds=10,
              eval_metric='auc',
              eval_set=eval_set,
              verbose=True)
    y_pred = model.predict_proba(x_test, ntree_limit=model.best_ntree_limit)[:,
                                                                             1]
    auc = roc_auc_score(y_test, y_pred)
    score = roc_auc_score(
        y_train,
        model.predict_proba(x_train, ntree_limit=model.best_ntree_limit)[:, 1])
    trail_id = get_experiment_id
    LOG.info('trail id :{}\ttrain auc:{}\ttest auc{}' % (trail_id, score, auc))
    save_dir = args.save_dir
    model_best = glob.glob('{}/*.model'.format(save_folder))
    if len(model_best) != 0:
        model_best = model_best[0]
        model_best_auc = float(model_best.strip('.model').split('_')[-1])
        if auc > model_best_auc:
            model.save_model('{}/xgb_{}.model'.format(save_folder, auc))
            os.system('rm {}'.format(model_best))
    else:
        model.save_model('{}/xgb_{}.model'.format(save_folder, auc))
    nni.report_final_result(auc)
def train(args, params):
    '''
    Train model
    '''
    data=data_read.yushan_data()
    train_x,train_y,test_x,test_y=data.get_train_data(tr_path='../data/yushan/yushan_tr_index.npy',
                                                      tt_path='../data/yushan/yushan_tt_index.npy',
                                                      data_type='3D')
#    data_aug_generator=data_read.data_aug(train_x,train_y)
#    rot_x,rot_y=data_aug_generator.rotate(train_x,train_y)
#    flip_x,flip_y=data_aug_generator.flip(train_x,train_y)
#    noise_x,noise_y=data_aug_generator.add_noise(train_x,train_y,sigma=0.5)
#    aug_data=get_mixup_data(train_x,train_y,[flip_x,flip_y],[rot_x,rot_y],[noise_x,noise_y],params)
#    train_x,train_y=aug_data
    train_y=data_read.label_to_onehot(train_y)
    test_y=data_read.label_to_onehot(test_y)
    model = create_model(params)
    SendMetric=SendMetrics(validation_data=(test_x,test_y))
    model.fit(train_x, train_y, batch_size=args.batch_size, epochs=args.epochs, verbose=1,
        validation_data=(test_x, test_y), callbacks=[SendMetric, TensorBoard(log_dir=TENSORBOARD_DIR)])
    y_pred=model.predict_proba(test_x)
    score = roc_auc_score(test_y[:,1], y_pred[:,1])
#    _, acc = model.evaluate(x_test, y_test, verbose=0)
    LOG.debug('Final result is: %d', score)
    nni.report_final_result(score)
Exemple #21
0
def main(params):
    """
    Main program:
      - Build network
      - Prepare dataset
      - Train the model
      - Report accuracy to tuner
    """
    model = dnnmodel(hidden_size=params['hidden_size'])
    optimizer = Adam(learning_rate=params['learning_rate'])
    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    _logger.info('Model built')

    train_x, train_y, test_x, test_y = load_data()
    _logger.info('Data loaded')

    model.fit(train_x,
              train_y,
              batch_size=params['batch_size'],
              epochs=100,
              verbose=0,
              callbacks=[ReportIntermediates()],
              validation_data=(test_x, test_y))
    _logger.info('Training completed')

    loss, accuracy = model.evaluate(test_x, test_y, verbose=0)
    nni.report_final_result(
        accuracy)  # send final accuracy to NNI tuner and web UI
    _logger.info('Final accuracy reported: %s', accuracy)
Exemple #22
0
    async def query_trial_metrics(self):
        start_t = getattr(self, "_trial_start_time", None)
        if start_t is None:
            logger.info(f"Trial({self.cfg_hash}) is not started!")
        else:
            logger.info(
                f"Trial({self.cfg_hash}) has started {(datetime.now() - start_t).total_seconds()} secs"
            )

        curr_latest_epoch, intermediate_metrics, final_val = self.metrics_reporter.query_metrics(
            self.latest_epoch)
        if curr_latest_epoch is None:
            return
        if curr_latest_epoch is not None and intermediate_metrics is not None:
            for metrics in intermediate_metrics:
                logger.info(f"report_intermediate_result:{metrics}")
                if os.getenv(ENV_KEY_TRIAL_IN_NNI, None):
                    nni.report_intermediate_result(
                        metrics)  # 目前测试阶段,还不能调用 nni 的接口
            self.latest_epoch = curr_latest_epoch
        if final_val is not None and self.final_val is None:  # 第一次读取到 final val
            self.final_val = final_val
            logger.info(f"report_final_result:{self.final_val}")
            if os.getenv(ENV_KEY_TRIAL_IN_NNI, None):
                nni.report_final_result(self.final_val)
            self._trial_finished_future.set_result(self.final_val)
def main(args, experiment_id, trial_id):
    use_cuda = not args['no_cuda'] and torch.cuda.is_available()
    torch.set_num_threads(4)
    torch.manual_seed(args['seed'])
    device = torch.device("cuda" if use_cuda else "cpu")

    batch_size = args['batch_size']
    hidden_size = args['hidden_size']

    train_loader, test_loader = data_loader(batch_size)

    model = Net(hidden_size=hidden_size).to(device)
    optimizer = optim.SGD(model.parameters(),
                          lr=args['lr'],
                          momentum=args['momentum'])

    for epoch in range(1, args['epochs'] + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test_acc = test(args, model, device, test_loader)

        # report intermediate result
        nni.report_intermediate_result(test_acc)
        logger.debug('test accuracy %g', test_acc)
        logger.debug('Pipe send intermediate result done.')
        torch.save(
            model.state_dict(),
            f'{os.path.join(os.getcwd())}/model_outputs/{experiment_id}-{trial_id}-model.pth'
        )

    test_acc = test(args, model, device, test_loader)
    # report final result
    nni.report_final_result(test_acc)
    logger.debug('Final result is %g', test_acc)
    output_logger.info(f'{experiment_id}|{trial_id}|{params}|{test_acc:0.6f}')
    logger.debug('Send final result done.')
Exemple #24
0
def run(params):
    """ Distributed Synchronous SGD Example """
    rank = dist.get_rank()
    torch.manual_seed(1234)
    train_set, bsz = partition_dataset()
    model = Net()
    model = model
    optimizer = optim.SGD(model.parameters(), lr=params['learning_rate'], momentum=params['momentum'])

    num_batches = ceil(len(train_set.dataset) / float(bsz))
    total_loss = 0.0
    for epoch in range(3):
        epoch_loss = 0.0
        for data, target in train_set:
            data, target = Variable(data), Variable(target)
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            epoch_loss += loss.item()
            loss.backward()
            average_gradients(model)
            optimizer.step()
        #logger.debug('Rank: ', rank, ', epoch: ', epoch, ': ', epoch_loss / num_batches)
        if rank == 0:
            nni.report_intermediate_result(epoch_loss / num_batches)
        total_loss += (epoch_loss / num_batches)
    total_loss /= 3
    logger.debug('Final loss: {}'.format(total_loss))
    if rank == 0:
        nni.report_final_result(total_loss)
Exemple #25
0
def main_nni(X, y, title, folder, result_type):
    # params = nni.get_next_parameter()
    params = {
        "hid_dim_0": 100,
        "hid_dim_1": 10,
        "reg": 0.68,
        "dims": [20, 40, 60, 2],
        "lr": 0.001,
        "test_size": 0.15,
        "batch_size": 4,
        "shuffle": 1,
        "num_workers": 4,
        "epochs": 150,
        "optimizer": 'SGD',
        "loss": 'MSE'
    }

    print(params)
    auc, loss = nn_nni_main(X, y, params, title, folder)

    if result_type == "loss":
        nni.report_final_result(loss)
    if result_type == "auc":
        nni.report_final_result(auc)
    else:
        raise Exception
Exemple #26
0
def train_ml_flow_predictor(pre_model, dataset, test_set_choice,
                            early_stopping_round, **kwargs):
    train_set = dataset.gen_flow_session(set_choice=0, **kwargs)
    eval_set = dataset.gen_flow_session(set_choice=1, **kwargs)
    test_set = dataset.gen_flow_session(set_choice=test_set_choice, **kwargs)

    def _split_set(input_set):
        _, seq = zip(*input_set)
        seq = np.array(seq)
        history_seq, label = seq[:, :-1], seq[:, -1]
        return history_seq, label

    train_history, train_label = _split_set(train_set)
    eval_history, eval_label = _split_set(eval_set)
    test_history, test_label = _split_set(test_set)
    try:
        pre_model = pre_model.fit(train_history,
                                  train_label,
                                  eval_set=[(eval_history, eval_label)],
                                  early_stopping_rounds=early_stopping_round)
    except TypeError:
        pre_model = pre_model.fit(train_history, train_label)
    pre = pre_model.predict(test_history)

    score_series = cal_regression_metric(pre, test_label)
    print(score_series)
    nni.report_final_result(score_series.loc['mae'])
    return score_series
Exemple #27
0
def run(X_train, X_test, y_train, y_test, model_):
    '''Train model and predict result'''
    y_train_log = np.log1p(y_train)
    model_.fit(X_train, y_train_log)

    predict_y_train = model_.predict(X_train)
    predict_y_train = np.expm1(predict_y_train)

    predict_y_test = model_.predict(X_test)
    predict_y_test = np.expm1(predict_y_test)

    score_train = np.int(mean_absolute_error(y_train, predict_y_train))
    r2_train = r2_score(y_train, predict_y_train)
    LOG.info('mean_absolute_error on train : %s' % score_train)
    LOG.info('r2 on train : %s' % r2_train)

    score_test = np.int(mean_absolute_error(y_test, predict_y_test))
    r2_test = r2_score(y_test, predict_y_test)
    LOG.info('mean_absolute_error on test : %s' % score_test)
    LOG.info('r2 on test : %s' % r2_test)

    # add penalty if r2 train score is larger then r2 test score
    if r2_train/r2_test > 1:
        score_rate = (r2_train/r2_test)**2
    else:
        score_rate = 1

    LOG.info('score rate: %s' % score_rate)

    score = score_test * score_rate

    LOG.info('corrected mean_absolute_error on test score: %s' % score)
    nni.report_final_result(score)
Exemple #28
0
def main(params):
    '''
    Main function, build mnist network, run and send result to NNI.
    '''
    # Import data
    mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
    print('Mnist download data down.')
    logger.debug('Mnist download data down.')

    # Create the model
    # Build the graph for the deep net
    mnist_network = MnistNetwork(channel_1_num=params['channel_1_num'],
                                 channel_2_num=params['channel_2_num'],
                                 conv_size=params['conv_size'],
                                 hidden_size=params['hidden_size'],
                                 pool_size=params['pool_size'],
                                 learning_rate=params['learning_rate'])
    mnist_network.build_network()
    logger.debug('Mnist build network done.')

    # Write log
    graph_location = tempfile.mkdtemp()
    logger.debug('Saving graph to: %s', graph_location)
    train_writer = tf.summary.FileWriter(graph_location)
    train_writer.add_graph(tf.get_default_graph())

    test_acc = 0.0
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(params['batch_num']):
            batch = mnist.train.next_batch(params['batch_size'])
            mnist_network.train_step.run(
                feed_dict={
                    mnist_network.images: batch[0],
                    mnist_network.labels: batch[1],
                    mnist_network.keep_prob: 1 - params['dropout_rate']
                })

            if i % 100 == 0:
                test_acc = mnist_network.accuracy.eval(
                    feed_dict={
                        mnist_network.images: mnist.test.images,
                        mnist_network.labels: mnist.test.labels,
                        mnist_network.keep_prob: 1.0
                    })

                nni.report_intermediate_result(test_acc)
                logger.debug('test accuracy %g', test_acc)
                logger.debug('Pipe send intermediate result done.')

        test_acc = mnist_network.accuracy.eval(
            feed_dict={
                mnist_network.images: mnist.test.images,
                mnist_network.labels: mnist.test.labels,
                mnist_network.keep_prob: 1.0
            })

        nni.report_final_result(test_acc)
        logger.debug('Final result is %g', test_acc)
        logger.debug('Send final result done.')
Exemple #29
0
def train(params):
    num_units = params.get('num_units')
    dropout_rate = params.get('dropout_rate')
    lr = params.get('lr')
    activationOne = params.get('activationOne')
    activationTwo = params.get('activationTwo')
    batch_size = params.get('batch_size')
    test_size = params.get('test_size')
    dropout_rate = params.get('dropout_rate')
    vocab_size = params.get('vocab_size')
    embedding_dim = params.get('embedding_dim')
    lossF = params.get('lossF')
    model = EmbeddingNN(test_size, num_units, dropout_rate, lr, activationOne,
                        activationTwo, vocab_size, embedding_dim, lossF)
    X_train, y_train, X_test, y_test, class_weights, input_shape = load_dataset(
        test_size)

    #Model fit
    model.fit(X_train,
              y_train,
              validation_data=(X_test, y_test),
              class_weight=class_weights,
              verbose=2,
              batch_size=int(batch_size))
    threshold = true_acc(model, X_test, y_test)
    y_pred = model.predict(X_test)
    y_pred = np.array([1 if row > threshold else 0 for row in y_pred])
    y_test = np.array(y_test)
    acc = np.mean(y_pred == y_test)
    # print(acc)
    nni.report_final_result(acc)
Exemple #30
0
def run(X_train, X_test, y_train, y_test, PARAMS):
    '''Train model and predict result'''
    model.fit(X_train, y_train)
    predict_y = model.predict(X_test)
    score = r2_score(y_test, predict_y)
    LOG.debug('r2 score: %s' % score)
    nni.report_final_result(score)
Exemple #31
0
def main():
    _params = {
        "data_name": "ia-retweet-pol",
        "graphs_cutoff_number": 2,
        "net": GCNNet,
        "l1_lambda": 0,
        "epochs": 500,
        "gcn_dropout_rate": 0.7,
        "lstm_dropout_rate": 0,
        "gcn_hidden_sizes": [10, 10, 10, 10, 10, 10, 10, 10, 10],
        "learning_rate": 0.001,
        "weight_decay": 0,
        "gcn_latent_dim": 5,
        "lstm_hidden_size": 10,
        "lstm_num_layers": 1,
        "learned_label": DEFAULT_LABEL_TO_LEARN,
    }
    l1_lambda = [0, 1e-7]
    epochs = [500]
    gcn_dropout_rate = [0.3, 0.5]
    gcn_hidden_sizes = [[100, 100], [200, 200]]
    learning_rate = [1e-3, 1e-2, 3e-2]
    weight_decay = [5e-2, 1e-2]
    gcn_latent_dim = [50, 100]
    lstm_hidden_size = [50, 100]
    results = []

    argparser = argparse.ArgumentParser()
    argparser.add_argument("--nni", action='store_true')

    args = argparser.parse_args()

    NNI = args.nni

    if NNI:
        p = nni.get_next_parameter()
        p["gcn_hidden_sizes"] = ast.literal_eval(p["gcn_hidden_sizes"])
        _params.update(p)

    (
        model_loss,
        model_accuracy,
        model_tot_accuracy,
        zero_model_tot_loss,
        zero_model_tot_accuracy,
        first_order_tot_loss,
        first_order_tot_accuracy,
        zero_model_diff_loss,
        zero_model_diff_accuracy,
        first_order_diff_loss,
        first_order_diff_accuracy
    ) = run_trial(_params)

    if NNI:
        nni.report_final_result(
            model_tot_accuracy[0]-zero_model_tot_accuracy[-1])
    else:
        print(f"Final result: model tot accuracy: {model_tot_accuracy}, zero_model_tot_accuracy: {zero_model_tot_accuracy}, "
              f"first order tot accuracy: {first_order_tot_accuracy}, zero model diff accuracy: {zero_model_diff_accuracy}, "
              f"first order diff accuracy: {first_order_diff_accuracy}")