Example #1
0
def handler(context):
    print(
        f'start training with parameters : {Parameters.as_dict()}, context : {context}'
    )

    X_train, y_train, cols_train = train_data_loader(DATALAKE_CHANNEL_ID,
                                                     DATALAKE_TRAIN_FILE_ID,
                                                     LABEL_FIELD, INPUT_FIELDS)
    dtrain = lgb.Dataset(X_train, y_train)

    if DATALAKE_VAL_FILE_ID:
        X_val, y_val, _ = train_data_loader(DATALAKE_CHANNEL_ID,
                                            DATALAKE_VAL_FILE_ID, LABEL_FIELD,
                                            INPUT_FIELDS)
    else:
        X_val, y_val = None, None

    extraction_cb = ModelExtractionCallback()
    tensorboard_cb = TensorBoardCallback(statistics, writer)
    tensorboard_cb.set_valid(X_val, y_val, Parameters.IS_CLASSIFICATION,
                             IS_MULTI, Parameters.NUM_CLASS)
    callbacks = [
        extraction_cb,
        tensorboard_cb,
    ]

    lgb.cv(PARAMS,
           dtrain,
           nfold=Parameters.NFOLD,
           early_stopping_rounds=Parameters.EARLY_STOPPING_ROUNDS,
           verbose_eval=Parameters.VERBOSE_EVAL,
           stratified=STRATIFIED,
           callbacks=callbacks,
           metrics=Parameters.METRIC,
           seed=Parameters.SEED)

    models = extraction_cb.raw_boosters
    for i, model in enumerate(models):
        model.save_model(
            os.path.join(ABEJA_TRAINING_RESULT_DIR, f'model_{i}.txt'))

    di = {**(Parameters.as_dict()), 'cols_train': cols_train}
    lgb_env = open(os.path.join(ABEJA_TRAINING_RESULT_DIR, 'lgb_env.json'),
                   'w')
    json.dump(di, lgb_env)
    lgb_env.close()
    writer.close()
Example #2
0
def predict():
    weight = np.load("./result/weight.npy")
    tr_data_loader = train_data_loader()
    te_data_loader = test_data_loader(tr_data_loader.mean, tr_data_loader.std)
    question = te_data_loader.get_data()
    # predict
    pre = np.dot(question, weight)
    pre = (pre * te_data_loader.std[9]) + te_data_loader.mean[9]
    for i in range(len(pre)):
        print("id:", i, pre[i])
    # save file
    with open("./result/predict.csv", "w") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["id", "value"])
        for i in range(len(pre)):
            id_name = 'id_'
            id_name = id_name + str(i)
            answer = float(pre[i])
            if answer < 0:
                answer = 0
            writer.writerow([id_name, answer])
hyper_params = {
	"num_epochs" : config.num_epochs,
	"batch_size" : config.batch_size,
	"learning_rate" : config.learning_rate,
	"hidden_size" : config.hidden_size,
	"pretrained" : config.pretrained
}


# define a path to save experiment logs
experiment_path = "./{}".format(config.exp)
if not os.path.exists(experiment_path):
    os.mkdir(experiment_path)

#create data loaders
train_dataloader = data_loader.train_data_loader()
test_dataloader = data_loader.test_data_loader()

Model = model.newModel()
Model.to(config.device)

#define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adadelta(Model.parameters(), lr = config.learning_rate)

def train(dataloader, model, loss_fn, optimizer):
	size = len(dataloader.dataset)
	for batch, (X, y) in enumerate(dataloader):
		X, y = X.to(config.device), y.to(config.device)

		#compute prediction error
Example #4
0
    bind_model(model)
    #model.summary()
    """ Load data """
    print('dataset path', DATASET_PATH)
    output_path = ['./img_list.pkl', './label_list.pkl']
    train_dataset_path = DATASET_PATH + '/train/train_data'
    if nsml.IS_ON_NSML:
        # Caching file
        nsml.cache(train_data_loader,
                   data_path=train_dataset_path,
                   img_size=input_shape[:2],
                   output_path=output_path)
    else:
        # local에서 실험할경우 dataset의 local-path 를 입력해주세요.
        train_data_loader(train_dataset_path,
                          input_shape[:2],
                          output_path=output_path)

    with open(output_path[0], 'rb') as img_f:
        img_list = pickle.load(img_f)
    with open(output_path[1], 'rb') as label_f:
        label_list = pickle.load(label_f)

    mean_arr = None  # np.zeros(input_shape)
    #for img in img_list:
    #    mean_arr += img.astype('float32')
    #mean_arr /= len(img_list)
    #print('mean shape:',mean_arr.shape, 'mean mean:',mean_arr.mean(), 'mean max:',mean_arr.max())
    #mean_arr /= 255
    #np.save('./mean.npy', mean_arr)
Example #5
0
    bTrainmode = False
    if config.mode == 'train':
        bTrainmode = True
        """ Load data """
        print(DATASET_PATH)
        output_path = ['./img_list.pkl', './label_list.pkl']
        train_dataset_path = DATASET_PATH + '/train/train_data'

        if nsml.IS_ON_NSML:
            # Caching file
            nsml.cache(train_data_loader,
                       data_path=train_dataset_path,
                       output_path=output_path)
        else:
            train_dataset_path = config.debug_data
            train_data_loader(train_dataset_path, output_path=output_path)

        with open(output_path[0], 'rb') as img_f:
            img_list = pickle.load(img_f)
        with open(output_path[1], 'rb') as label_f:
            label_list = pickle.load(label_f)

        queries, references, queries_img, reference_img \
            = convert_to_query_db_data_for_generator(img_list, label_list, input_shape, config.dev_querynum, config.dev_referencenum)
        print("mAP devset : query(%d), reference(%d) " %
              (len(queries), len(references)))

        dataset = get_triplet_dataset(train_dataset_path,
                                      batch_size,
                                      nb_epoch,
                                      num_classes=num_classes)
def train(model,
          train_inp_tuple,
          validation_inp_tuple,
          checkpoint_dir,
          checkpoint_prefix,
          device,
          epoches=5,
          batch_size=1024,
          logger=None,
          epoch_start=0,
          max_seq_len=100,
          lr=1e-3):
    """
	: model (torch.nn.module): model to be trained
	: train_inp_tuple (list[tuple(str, list[str], list[str])]): list of input for train_data_loader
		: str: path to label data
		: list[str]: list of embedding variables
		: list[str]: list of paths to a pkl file 
	: validation_inp_tuple (list[tuple(str, list[str], list[str])]): list of input for train_data_loader
		: str: path to label data
		: list[str]: list of embedding variables
		: list[str]: list of paths to a pkl file
	: checkpoint_dir (str): path to checkpoint directory
	: checkpoint_prefix (str): prefix of checkpoint file
	: device (str): device to train the model
	: epoches (int): number of epoches to train
	: batch_size (int): size of mini batch
	: epoch_start (int): if = 0 then train a new model, else load an existing model and continue to train, default 0
	: max_seq_len (int): max length for sequence input, default 100 
	: lr (float): learning rate for Adam, default 1e-3
	"""
    global w2v_registry, model_path
    gc.enable()

    # Check checkpoint directory
    if not os.path.isdir(checkpoint_dir): os.mkdir(checkpoint_dir)

    # Load model if not train from scratch
    if epoch_start != 0:
        model_artifact_path = os.path.join(
            checkpoint_dir, '{}_{}.pth'.format(checkpoint_prefix, epoch_start))
        model.load_state_dict(torch.load(model_artifact_path))
        if logger:
            logger.info('Start retraining from epoch {}'.format(epoch_start))

    # Set up loss function and optimizer
    model.to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, amsgrad=True)

    div, mod = divmod(810000, batch_size)
    n_batch_estimate = div + min(mod, 1)

    # Main Loop
    for epoch in range(1 + epoch_start, epoches + 1 + epoch_start):
        if logger:
            logger.info('=========================')
            logger.info('Processing Epoch {}/{}'.format(
                epoch, epoches + epoch_start))
            logger.info('=========================')

        # Train model
        model.train()
        train_running_loss, train_n_batch = 0, 0

        for index, (label_artifact_path, seq_inp_target,
                    seq_inp_path) in enumerate(train_inp_tuple, start=1):
            train_loader = train_data_loader(label_artifact_path,
                                             seq_inp_target,
                                             seq_inp_path,
                                             w2v_registry,
                                             batch_size=batch_size,
                                             max_seq_len=max_seq_len)
            train_iterator = iter(train_loader)
            while True:
                try:
                    y, x_seq, x_last_idx = next(train_iterator)
                    y = torch.from_numpy(y).long().to(device)
                    x = []
                    for s in x_seq:
                        x.append(s.to(device))
                    x.append(x_last_idx)
                    optimizer.zero_grad()
                    yp = F.softmax(model(*x), dim=1)
                    loss = loss_fn(yp, y)

                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                   max_norm=100)
                    optimizer.step()

                    train_running_loss += loss.item()
                    train_n_batch += 1

                    if train_n_batch % 100 == 0 and logger:
                        logger.info(
                            'Epoch {}/{} - Batch {}/{} Done - Train Loss: {:.6f}'
                            .format(epoch, epoches + epoch_start,
                                    train_n_batch, n_batch_estimate,
                                    train_running_loss / train_n_batch))
                    del x, y, yp, x_seq, x_last_idx
                    _ = gc.collect()
                    torch.cuda.empty_cache()

                except StopIteration:
                    break

            del train_loader, train_iterator
            _ = gc.collect()
            torch.cuda.empty_cache()

            if logger:
                logger.info(
                    'Epoch {}/{} - Batch {}/{} Done - Train Loss: {:.6f}'.
                    format(epoch, epoches + epoch_start, train_n_batch,
                           n_batch_estimate,
                           train_running_loss / train_n_batch))

        # Evaluate model
        model.eval()
        test_running_loss, test_n_batch = 0, 0
        true_y, pred_y = [], []

        for index, (label_artifact_path, seq_inp_target,
                    seq_inp_path) in enumerate(validation_inp_tuple, start=1):
            train_loader = train_data_loader(label_artifact_path,
                                             seq_inp_target,
                                             seq_inp_path,
                                             w2v_registry,
                                             batch_size=batch_size,
                                             max_seq_len=max_seq_len)
            train_iterator = iter(train_loader)
            while True:
                try:
                    y, x_seq, x_last_idx = next(train_iterator)
                    y = torch.from_numpy(y).long().to(device)
                    x = []
                    for s in x_seq:
                        x.append(s.to(device))
                    x.append(x_last_idx)
                    yp = F.softmax(model(*x), dim=1)
                    loss = loss_fn(yp, y)

                    pred_y.extend(list(yp.cpu().detach().numpy()))
                    true_y.extend(list(y.cpu().detach().numpy()))

                    test_running_loss += loss.item()
                    test_n_batch += 1

                    del x, y, yp, x_seq, x_last_idx
                    _ = gc.collect()
                    torch.cuda.empty_cache()

                except StopIteration:
                    break

            del train_loader, train_iterator
            _ = gc.collect()
            torch.cuda.empty_cache()

        pred = np.argmax(np.array(pred_y), 1)
        true = np.array(true_y).reshape((-1, ))
        acc_score = accuracy_score(true, pred)

        del pred, true, pred_y, true_y
        _ = gc.collect()
        torch.cuda.empty_cache()

        if logger:
            logger.info(
                'Epoch {}/{} Done - Test Loss: {:.6f}, Test Accuracy: {:.6f}'.
                format(epoch, epoches + epoch_start,
                       test_running_loss / test_n_batch, acc_score))

        # Save model state dict
        ck_file_name = '{}_{}.pth'.format(checkpoint_prefix, epoch)
        ck_file_path = os.path.join(checkpoint_dir, ck_file_name)

        torch.save(model.state_dict(), ck_file_path)
Example #7
0
    if config.model_to_test:
        model = load(file_path=config.model_to_test)

    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)

    if config.mode == 'train':

        #torch.autograd.set_detect_anomaly(True)

        """ Load data """
        print('dataset path', dataset_path)
        train_dataset_path = dataset_path #+ '/train/train_data'

        img_dataset = train_data_loader(data_path=train_dataset_path, img_size=input_size,
                                        use_augment=use_augmentation)

        # Balanced batch sampler and online train loader
        train_batch_sampler = BalancedBatchSampler(img_dataset, n_classes=num_classes, n_samples=num_samples)
        #train_batch_sampler = NegativeClassMiningBatchSampler(img_dataset, n_classes=num_classes, n_samples=num_samples)
        online_train_loader = torch.utils.data.DataLoader(img_dataset,
                                                          batch_sampler=train_batch_sampler,
                                                          num_workers=4,
                                                          pin_memory=True)

        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

        # Gather the parameters to be optimized/updated.
        params_to_update = model.parameters()
        print("Params to learn:")
        if feature_extracting:
Example #8
0
def main():
    t_data_loader = train_data_loader()
    question, answer = t_data_loader.load_all_data()
    train(question, answer, 300000, 1, True)
Example #9
0
            t2 = time.time()
            print(res.history)
            print('Training time for one epoch : %.1f' % ((t2 - t1)))
            train_loss, train_acc = res.history['loss'][0], res.history['acc'][0]
            nsml.report(summary=True, epoch=epoch, epoch_total=nb_epoch, loss=train_loss, acc=train_acc)
            nsml.save(epoch+1)

        print('Total training time : %.1f' % (time.time() - t0))

        """ Test with a subset of training data """
        print('dataset path', DATASET_PATH)
        output_path = ['./img_list.pkl', './label_list.pkl']
        train_dataset_path = DATASET_PATH + '/train/train_data'

        train_data_loader(train_dataset_path,
                          input_shape[:2],
                          output_path=output_path,
                          num_samples=5000)

        with open(output_path[0], 'rb') as img_f:
            img_list = pickle.load(img_f)
        with open(output_path[1], 'rb') as label_f:
            label_list = pickle.load(label_f)

        x_train = np.asarray(img_list)
        labels = np.asarray(label_list)
        label_binarizer = LabelBinarizer()
        y_train = label_binarizer.fit_transform(labels)
        x_train = x_train.astype('float32')
        # x_train /= 255
        x_train = preprocess_input(x_train)
        print(len(labels), 'validation samples')
Example #10
0
        model.compile(loss='categorical_crossentropy',
                      optimizer=opt,
                      metrics=['accuracy'])

        """ Load data """
        print('dataset path', DATASET_PATH)
        output_path = ['./triplets.txt']
        train_dataset_path = DATASET_PATH + '/train/train_data'

        if nsml.IS_ON_NSML:
            # Caching file
            nsml.cache(tripletSampler, data_path=train_dataset_path, img_size=input_shape[:2],
                       output_path=output_path)
        else:
            # local에서 실험할경우 dataset의 local-path 를 입력해주세요.
            train_data_loader('/home/donghoon/Downloads/image-similarity-deep-ranking/dataset', input_shape[:2], output_path=output_path)

        with open(output_path[0], 'rb') as img_f:
            img_list = pickle.load(img_f)


        # x_train = np.asarray(img_list)
        # labels = np.asarray(label_list)
        # y_train = keras.utils.to_categorical(labels, num_classes=num_classes)
        # x_train = x_train.astype('float32')
        # x_train /= 255
        # print(len(labels), 'train samples')
        #
        # """ Callback """
        # monitor = 'acc'
        # reduce_lr = ReduceLROnPlateau(monitor=monitor, patience=3)
Example #11
0
def main_training(log_tuple,
                  validation_set=0,
                  threshold=0.5,
                  layers=3,
                  lr=1e-2,
                  nb_epoch=5,
                  nb_samples_per_epoch=100,
                  nb_val_samples=20,
                  patience=20,
                  path='models/weights'):
    best_val_loss = np.inf
    not_done_looping = True
    nb_perf_not_improved = 0
    demo_dict = {}
    log_train, log_valid = log_tuple
    for epoch in range(nb_epoch):
        print("Epoch: {}/{}".format(epoch + 1, nb_epoch))
        if not_done_looping:
            progbar = Progbar(target=nb_samples_per_epoch)
            seen = 0
            count_train_samples = 0
            decay = math.pow(0.5, epoch / 50)
            lr = lr * decay
            set_lr(lr)
            mean_accuracy = 0
            mean_val_loss = 0
            mean_dice_score = 0
            mean_precision = 0
            mean_recall = 0
            count_valid_samples = 0
            no_of_patches_seen = 0
            mean_train_loss = 0
            mean_train_recall = 0
            mean_train_precision = 0
            mean_train_dice_score = 0

            for X_train, Y_train, weights in train_data_loader(
                    train_batch_size, combine_label):
                if count_train_samples == nb_samples_per_epoch:
                    break
                if seen < nb_samples_per_epoch:
                    log_values = []
                xs = X_train.shape[2]
                ys = Y_train.shape[3]
                Y_train = Y_train.reshape((train_batch_size * xs * ys, ))
                weights = weights.reshape((train_batch_size * xs * ys, ))
                train_loss = train_fn(X_train.astype('float32'),
                                      Y_train.astype('int32'),
                                      weights.astype('float32'))
                Y_pred = predict_fn(X_train.astype('float32'))
                Y_pred_class = np.argmax(Y_pred, axis=1)
                dice_score = get_dice_score(Y_train, Y_pred_class)
                mean_train_loss += train_loss
                mean_train_dice_score += dice_score
                count_train_samples += X_train.shape[0]
                seen += X_train.shape[0]
                log_values.append(('train_loss', train_loss))
                if seen < nb_samples_per_epoch:
                    progbar.update(seen, log_values)
            log_values.append(('train_loss', train_loss))
            progbar.update(seen, log_values, force=True)
            mean_train_loss = mean_train_loss / (nb_samples_per_epoch /
                                                 train_batch_size)
            mean_train_dice_score = mean_train_dice_score / (
                nb_samples_per_epoch / train_batch_size)
            log_train.post('train_loss', mean_train_loss, epoch)
            log_train.post("mean_train_dice_score", mean_train_dice_score,
                           epoch)

            if epoch % 5 == 0:
                validation_start = time.time()
                count_valid_samples = 0
                for X_valid, Y_valid in valid_data_loader(
                        nb_val_samples, valid_batch_size, combine_label):
                    xs = X_valid.shape[2]
                    ys = Y_valid.shape[3]
                    Y_valid = Y_valid.reshape((valid_batch_size * xs * ys, ))
                    Y_pred = test_predict_fn(X_valid.astype('float32'))
                    val_loss = loss(
                        Y_pred.astype('float32'), Y_valid.astype('int32'),
                        np.ones(
                            (Y_valid.shape[0], )).astype('float32')).eval()
                    Y_pred_class = np.argmax(Y_pred, axis=1)
                    dice_score = get_dice_score(Y_valid, Y_pred_class)
                    Y_pred = Y_pred_class.reshape(valid_batch_size, 1, xs, ys)
                    Y_valid = Y_valid.reshape(valid_batch_size, 1, xs, ys)
                    save_image_path = os.path.join(
                        save_path, str(epoch),
                        '{}.png'.format(count_valid_samples))
                    if not os.path.exists(os.path.join(save_path, str(epoch))):
                        os.makedirs(os.path.join(save_path, str(epoch)))
                    vis_detections(X_valid[5][0], Y_valid[5][0], Y_pred[5][0],
                                   save_image_path)
                    mean_val_loss += val_loss
                    mean_dice_score += dice_score
                    count_valid_samples += 1

                mean_val_loss = mean_val_loss / (nb_val_samples /
                                                 valid_batch_size)
                mean_dice_score = mean_dice_score / (nb_val_samples /
                                                     valid_batch_size)
                print(mean_val_loss, mean_dice_score)

                log_valid.post("val_loss", mean_val_loss, epoch)
                log_valid.post("mean_val_dice_score", mean_dice_score, epoch)

                print("mean_val_loss: {} , mean_dice_score: {}".format(
                    mean_val_loss, mean_dice_score))
                validation_end = time.time()
                validation_time = validation_end - validation_start
                print('validation time : %ds' % validation_time)
                if mean_val_loss < best_val_loss:
                    best_val_loss = mean_val_loss
                    best_epoch = epoch
                    nb_perf_not_improved = 0
                    dpath = os.path.join(
                        path, "Unet_vald_set_{}_val_loss_{}_epoch_{}".format(
                            validation_set, best_val_loss, best_epoch))
                    save_params(dpath)
                else:
                    nb_perf_not_improved += 1
                    if nb_perf_not_improved > patience:
                        print(
                            "Exiting training as performance  not improving for {} loops"
                            .format(patience))
                        not_done_looping = False

    return best_val_loss, best_epoch
Example #12
0
STRATIFIED = Parameters.STRATIFIED and Parameters.IS_CLASSIFICATION
IS_MULTI = Parameters.OBJECTIVE.startswith("multi")

statistics = Statistics(Parameters.NUM_ITERATIONS)

log_path = os.path.join(ABEJA_TRAINING_RESULT_DIR, 'logs')
writer = SummaryWriter(log_dir=log_path)

# In[4]:

print(f'start training with parameters : {Parameters.as_dict()}')

# In[5]:

X_train, y_train, cols_train = train_data_loader(DATALAKE_CHANNEL_ID,
                                                 DATALAKE_TRAIN_FILE_ID,
                                                 LABEL_FIELD, INPUT_FIELDS)

# In[8]:

dtrain = lgb.Dataset(X_train, y_train)

if DATALAKE_VAL_FILE_ID:
    X_val, y_val, _ = train_data_loader(DATALAKE_CHANNEL_ID,
                                        DATALAKE_VAL_FILE_ID, LABEL_FIELD,
                                        INPUT_FIELDS)
else:
    X_val, y_val = None, None

extraction_cb = ModelExtractionCallback()
tensorboard_cb = TensorBoardCallback(statistics, writer)
def handler(context):
    print(
        f'start training with parameters : {Parameters.as_dict()}, context : {context}'
    )

    X_train, y_train, cols_train = train_data_loader(DATALAKE_CHANNEL_ID,
                                                     DATALAKE_TRAIN_FILE_ID,
                                                     LABEL_FIELD, INPUT_FIELDS)
    models = []
    pred = np.zeros(len(X_train))

    if DATALAKE_VAL_FILE_ID:
        X_val, y_val, _ = train_data_loader(DATALAKE_CHANNEL_ID,
                                            DATALAKE_VAL_FILE_ID, LABEL_FIELD,
                                            INPUT_FIELDS)
        if IS_MULTI:
            pred_val = np.zeros((len(X_val), NUM_CLASS))
        else:
            pred_val = np.zeros(len(X_val))
    else:
        X_val, y_val, pred_val = None, None, None

    for i, (train_index, valid_index) in enumerate(skf.split(X_train,
                                                             y_train)):
        model = classifier(**PARAMS)
        model.fit(X_train.iloc[train_index], y_train[train_index])
        pred[valid_index] = model.predict(X_train.iloc[valid_index])

        score, loss = evaluator(y_train[valid_index], pred[valid_index])
        score_val = 0.0
        loss_val = 0.0

        filename = os.path.join(ABEJA_TRAINING_RESULT_DIR, f'model_{i}.pkl')
        pickle.dump(model, open(filename, 'wb'))

        models.append(model)

        if DATALAKE_VAL_FILE_ID:
            pred_val_cv = model.predict(X_val)
            if IS_MULTI:
                pred_val += np.identity(NUM_CLASS)[pred_val_cv]
            else:
                pred_val += pred_val_cv
            score_val, loss_val = evaluator(y_val, pred_val_cv)

        print('-------------')
        print(
            'cv {} || score:{:.4f} || loss:{:.4f} || val_score:{:.4f} || val_loss:{:.4f}'
            .format(i + 1, score, loss, score_val, loss_val))
        writer.add_scalar('main/acc', score, i + 1)
        writer.add_scalar('main/loss', loss, i + 1)
        writer.add_scalar('test/acc', score_val, i + 1)
        writer.add_scalar('test/loss', loss_val, i + 1)
        statistics(i + 1, loss, score, loss_val, score_val)
        writer.flush()

    score, loss = evaluator(y_train, pred)
    score_val = 0.0
    loss_val = 0.0

    if DATALAKE_VAL_FILE_ID:
        if IS_MULTI:
            pred_val = np.argmax(pred_val, axis=1)
        else:
            pred_val /= len(models)
        score_val, loss_val = evaluator(y_val, pred_val)

    print('-------------')
    print(
        'cv total score:{:.4f} || cv total loss:{:.4f} || cv total val_score:{:.4f} || cv total val_loss:{:.4f}'
        .format(score, loss, score_val, loss_val))
    statistics(Parameters.NFOLD, None, score, None, score_val)
    writer.add_scalar('main/acc', score, Parameters.NFOLD)
    writer.add_scalar('main/loss', loss, Parameters.NFOLD)
    writer.add_scalar('test/acc', score_val, Parameters.NFOLD)
    writer.add_scalar('test/loss', loss_val, Parameters.NFOLD)
    writer.close()

    di = {**(Parameters.as_dict()), 'cols_train': cols_train}
    skf_env = open(os.path.join(ABEJA_TRAINING_RESULT_DIR, 'skf_env.json'),
                   'w')
    json.dump(di, skf_env)
    skf_env.close()
    return
Example #14
0
from sklearn import metrics
from matplotlib import pyplot as plt

from model import Model
from hyperparams import Hyperparams
from data_loader import train_data_loader, test_data_loader, prediction_dataframe, hist_data, untransformed_price


#logger configuration
FORMAT = "[%(filename)s: %(lineno)3s] %(levelname)s: %(message)s"
logging.basicConfig(level=logging.INFO, format=FORMAT)
logger = logging.getLogger(__name__)

H = Hyperparams()

train_batch_generator = train_data_loader(H.train_batch_size, H.num_train)
test_batch_generator = test_data_loader(H.test_batch_size, H.num_train)
prediction_dataframe_gen = prediction_dataframe()
scaler = prediction_dataframe_gen.get_scaler()
logger.info("Generators instantiated")

model = Model().get_model()
logger.info("Model loaded")

model.compile(optimizer='RMSProp', loss='mean_squared_error')
logger.info("Model compiled")

logger.info("Beginning training")
train_num_batch = H.num_train//H.train_batch_size
train_shuffled_batch = np.array([np.random.choice(train_num_batch, size=(train_num_batch), replace=False) for _ in range(H.num_epochs)])