예제 #1
0
def select_train_data(filename, methods=['POST']):
    global train_data

    try:
        with open("./datasets/" + filename['filename'], 'r') as file:
            train_data = TrainData()
            train_data.from_metadata(json.load(file))
        
            socketio.emit('selected_train_data', 1)

    except Exception as ex:
        print(ex)
        socketio.emit('selected_train_data', 0)
예제 #2
0
def main():
    learning_rate = 5e-2
    num_epoch = 20
    save_dir = 'models'
    h5file = '/home/song/workspace/datasets/recog-alzheimer/train/train_pre_data.h5'
    csvfile = '/home/song/workspace/datasets/recog-alzheimer/train/train_pre_label.csv'
    dataset = TrainData(h5file, csvfile, img_size=48)
    train_loader = DataLoader(dataset, batch_size=5, shuffle=True)
    solver = Solver(learning_rate, num_epoch, save_dir)
    solver.train(train_loader)
예제 #3
0
def traintest():
    if not os.path.isfile(train_data_pickle):
        # training data
        train_features, train_labels = get_features(['train_data'], "train")
        traindata = TrainData(train_features, train_labels)
        with open(train_data_pickle, mode='wb') as f:
            pickle.dump(traindata, f)
    else:
        print("loading: %s" % (train_data_pickle))
        with open(train_data_pickle, mode='rb') as f:
            traindata = pickle.load(f)
            train_features = traindata.train_inputs
            train_labels = traindata.train_targets

    if not os.path.isfile(test_data_pickle):
        # testing data
        test_features, _ = get_features(['test_data'], "test")
        testdata = TestData(test_features)
        with open(test_data_pickle, mode='wb') as f:
            pickle.dump(testdata, f)
    else:
        print("loading: %s" % (test_data_pickle))
        with open(test_data_pickle, mode='rb') as f:
            testdata = pickle.load(f)
            test_features = testdata.test_inputs

    train_labels = one_hot_encode(train_labels)

    n_dim = train_features.shape[1]
    print("input dim: %s" % (n_dim))

    # random train and test sets.
    '''
    train_test_split = np.random.rand(len(train_features)) < 0.80
    Xtr = train_features[train_test_split]
    Ytr = train_labels[train_test_split]
    Xte = train_features[~train_test_split]
    Yte = train_labels[~train_test_split]
    '''
    Xtr = train_features
    Ytr = train_labels
    Xte = test_features

    knn(n_dim, Xtr, Ytr, Xte)
예제 #4
0
def add_train():
    status = 100 # = fail
    try:
        content = request.files['file']
        # get the file name without its extension
        filename = (content.filename).replace(".json", "")
        
        content = content.read().decode('utf-8')
        content = json.loads(content)

        global train_data
        train_data = TrainData(filename)
        
        
        if not train_data.filter_json(content) : 
            message = "incorrect data structure (1)"
            print(message)
            return make_response(jsonify({"message" : message}), status)
        
        if not train_data.is_correct() :
            message = "incorrect data structure (2)"
            print(message)
            return make_response(jsonify({"message" : message}), status)

        if not train_data.metadata() :
            message = "failed to create metadata"
            print(message)
            return make_response(jsonify({"message" : message}), status)
        
        # save metafile
        train_data.create_metafile()
        status = 200
        return make_response(jsonify({"message" : "JSON received"}), status) #200 = success

    except Exception as ex:
        print(ex)
        return make_response(jsonify({"message" : str(ex)}), status)
예제 #5
0
def main():
    input_dir = "/amit/kaggle/tgs"
    output_dir = "/artifacts"
    image_size_target = 128
    batch_size = 32
    epochs_to_train = 300
    bce_loss_weight_gamma = 0.98
    sgdr_min_lr = 0.0001  # 0.0001, 0.001
    sgdr_max_lr = 0.001  # 0.001, 0.03
    sgdr_cycle_epochs = 20
    sgdr_cycle_epoch_prolongation = 3
    sgdr_cycle_end_patience = 3
    train_abort_epochs_without_improval = 30
    ensemble_model_count = 3
    swa_epoch_to_start = 30

    model_dir = sys.argv[1] if len(sys.argv) > 1 else None

    train_data = TrainData(input_dir)

    train_set = TrainDataset(train_data.train_set_df, image_size_target, augment=True)
    train_set_data_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=8)

    val_set = TrainDataset(train_data.val_set_df, image_size_target, augment=False)
    val_set_data_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=2)

    if model_dir:
        model = create_model(pretrained=False).to(device)
        model.load_state_dict(torch.load("{}/model.pth".format(model_dir), map_location=device))
    else:
        model = create_model(pretrained=True).to(device)

    torch.save(model.state_dict(), "{}/model.pth".format(output_dir))

    swa_model = create_model(pretrained=False).to(device)

    print("train_set_samples: %d, val_set_samples: %d" % (len(train_set), len(val_set)))

    global_val_precision_best_avg = float("-inf")
    global_swa_val_precision_best_avg = float("-inf")
    sgdr_cycle_val_precision_best_avg = float("-inf")

    epoch_iterations = len(train_set) // batch_size

    # optimizer = optim.SGD(model.parameters(), lr=sgdr_max_lr, weight_decay=0, momentum=0.9, nesterov=True)
    optimizer = optim.Adam(model.parameters(), lr=sgdr_max_lr)
    lr_scheduler = CosineAnnealingLR(optimizer, T_max=sgdr_cycle_epochs, eta_min=sgdr_min_lr)

    optim_summary_writer = SummaryWriter(log_dir="{}/logs/optim".format(output_dir))
    train_summary_writer = SummaryWriter(log_dir="{}/logs/train".format(output_dir))
    val_summary_writer = SummaryWriter(log_dir="{}/logs/val".format(output_dir))
    swa_val_summary_writer = SummaryWriter(log_dir="{}/logs/swa_val".format(output_dir))

    sgdr_iterations = 0
    sgdr_reset_count = 0
    batch_count = 0
    epoch_of_last_improval = 0
    sgdr_next_cycle_end_epoch = sgdr_cycle_epochs + sgdr_cycle_epoch_prolongation
    swa_update_count = 0

    ensemble_model_index = 0
    for model_file_path in glob.glob("{}/model-*.pth".format(output_dir)):
        model_file_name = os.path.basename(model_file_path)
        model_index = int(model_file_name.replace("model-", "").replace(".pth", ""))
        ensemble_model_index = max(ensemble_model_index, model_index + 1)

    print('{"chart": "best_val_precision", "axis": "epoch"}')
    print('{"chart": "val_precision", "axis": "epoch"}')
    print('{"chart": "val_loss", "axis": "epoch"}')
    print('{"chart": "sgdr_reset", "axis": "epoch"}')
    print('{"chart": "precision", "axis": "epoch"}')
    print('{"chart": "loss", "axis": "epoch"}')
    print('{"chart": "swa_val_precision", "axis": "epoch"}')
    print('{"chart": "swa_val_loss", "axis": "epoch"}')

    train_start_time = time.time()

    criterion = nn.BCEWithLogitsLoss()

    for epoch in range(epochs_to_train):
        epoch_start_time = time.time()
        model.train()

        train_loss_sum = 0.0
        train_precision_sum = 0.0
        train_step_count = 0
        for batch in train_set_data_loader:
            images, masks, mask_weights = \
                batch[0].to(device, non_blocking=True), \
                batch[1].to(device, non_blocking=True), \
                batch[2].to(device, non_blocking=True)

            lr_scheduler.step(epoch=min(sgdr_cycle_epochs, sgdr_iterations / epoch_iterations))

            optimizer.zero_grad()
            prediction_logits = model(images)
            predictions = torch.sigmoid(prediction_logits)
            criterion.weight = mask_weights
            loss = criterion(prediction_logits, masks)
            loss.backward()
            optimizer.step()

            train_loss_sum += loss.item()
            train_precision_sum += np.mean(precision_batch(predictions, masks))
            sgdr_iterations += 1
            train_step_count += 1
            batch_count += 1

            optim_summary_writer.add_scalar("lr", get_learning_rate(optimizer), batch_count + 1)

        train_loss_avg = train_loss_sum / train_step_count
        train_precision_avg = train_precision_sum / train_step_count

        val_loss_avg, val_precision_avg = evaluate(model, val_set_data_loader, criterion)

        model_improved_within_sgdr_cycle = val_precision_avg > sgdr_cycle_val_precision_best_avg
        if model_improved_within_sgdr_cycle:
            torch.save(model.state_dict(), "{}/model-{}.pth".format(output_dir, ensemble_model_index))
            sgdr_cycle_val_precision_best_avg = val_precision_avg

        model_improved = val_precision_avg > global_val_precision_best_avg
        ckpt_saved = False
        if model_improved:
            torch.save(model.state_dict(), "{}/model.pth".format(output_dir))
            global_val_precision_best_avg = val_precision_avg
            ckpt_saved = True

        swa_model_improved = False
        if epoch + 1 >= swa_epoch_to_start:
            if model_improved_within_sgdr_cycle:
                swa_update_count += 1
                moving_average(swa_model, model, 1.0 / swa_update_count)
                bn_update(train_set_data_loader, swa_model)

            swa_model_improved = val_precision_avg > global_swa_val_precision_best_avg
            if swa_model_improved:
                torch.save(swa_model.state_dict(), "{}/swa_model.pth".format(output_dir))
                global_swa_val_precision_best_avg = val_precision_avg

        if model_improved or swa_model_improved:
            epoch_of_last_improval = epoch

        sgdr_reset = False
        if (epoch + 1 >= sgdr_next_cycle_end_epoch) and (epoch - epoch_of_last_improval >= sgdr_cycle_end_patience):
            sgdr_iterations = 0
            sgdr_next_cycle_end_epoch = epoch + 1 + sgdr_cycle_epochs + sgdr_cycle_epoch_prolongation
            ensemble_model_index += 1
            sgdr_cycle_val_precision_best_avg = float("-inf")
            sgdr_reset_count += 1
            sgdr_reset = True

        swa_val_loss_avg, swa_val_precision_avg = evaluate(swa_model, val_set_data_loader, criterion)

        optim_summary_writer.add_scalar("sgdr_reset", sgdr_reset_count, epoch + 1)

        train_summary_writer.add_scalar("loss", train_loss_avg, epoch + 1)
        train_summary_writer.add_scalar("precision", train_precision_avg, epoch + 1)

        val_summary_writer.add_scalar("loss", val_loss_avg, epoch + 1)
        val_summary_writer.add_scalar("precision", val_precision_avg, epoch + 1)

        swa_val_summary_writer.add_scalar("loss", swa_val_loss_avg, epoch + 1)
        swa_val_summary_writer.add_scalar("precision", swa_val_precision_avg, epoch + 1)

        epoch_end_time = time.time()
        epoch_duration_time = epoch_end_time - epoch_start_time

        print(
            "[%03d/%03d] %ds, lr: %.6f, loss: %.3f, val_loss: %.3f|%.3f, prec: %.3f, val_prec: %.3f|%.3f, ckpt: %d, rst: %d" % (
                epoch + 1,
                epochs_to_train,
                epoch_duration_time,
                get_learning_rate(optimizer),
                train_loss_avg,
                val_loss_avg,
                swa_val_loss_avg,
                train_precision_avg,
                val_precision_avg,
                swa_val_precision_avg,
                int(ckpt_saved),
                int(sgdr_reset)),
            flush=True)

        print('{"chart": "best_val_precision", "x": %d, "y": %.3f}' % (epoch + 1, global_val_precision_best_avg))
        print('{"chart": "val_precision", "x": %d, "y": %.3f}' % (epoch + 1, val_precision_avg))
        print('{"chart": "val_loss", "x": %d, "y": %.3f}' % (epoch + 1, val_loss_avg))
        print('{"chart": "sgdr_reset", "x": %d, "y": %.3f}' % (epoch + 1, sgdr_reset_count))
        print('{"chart": "precision", "x": %d, "y": %.3f}' % (epoch + 1, train_precision_avg))
        print('{"chart": "loss", "x": %d, "y": %.3f}' % (epoch + 1, train_loss_avg))
        print('{"chart": "swa_val_precision", "x": %d, "y": %.3f}' % (epoch + 1, swa_val_precision_avg))
        print('{"chart": "swa_val_loss", "x": %d, "y": %.3f}' % (epoch + 1, swa_val_loss_avg))

        if sgdr_reset and sgdr_reset_count >= ensemble_model_count and epoch - epoch_of_last_improval >= train_abort_epochs_without_improval:
            print("early abort")
            break

    optim_summary_writer.close()
    train_summary_writer.close()
    val_summary_writer.close()

    train_end_time = time.time()
    print()
    print("Train time: %s" % str(datetime.timedelta(seconds=train_end_time - train_start_time)))

    eval_start_time = time.time()

    print()
    print("evaluation of the training model")

    model.load_state_dict(torch.load("{}/model.pth".format(output_dir), map_location=device))

    analyze(Ensemble([model]), train_data.val_set_df, use_tta=False)
    analyze(Ensemble([model]), train_data.val_set_df, use_tta=True)

    score_to_model = {}
    ensemble_model_candidates = glob.glob("{}/model-*.pth".format(output_dir))
    ensemble_model_candidates.append("{}/swa_model.pth".format(output_dir))
    for model_file_path in ensemble_model_candidates:
        model_file_name = os.path.basename(model_file_path)
        m = create_model(pretrained=False).to(device)
        m.load_state_dict(torch.load(model_file_path, map_location=device))
        val_loss_avg, val_precision_avg = evaluate(m, val_set_data_loader, criterion)
        print("ensemble '%s': val_loss=%.3f, val_precision=%.3f" % (model_file_name, val_loss_avg, val_precision_avg))
        if len(score_to_model) < ensemble_model_count or min(score_to_model.keys()) < val_precision_avg:
            del score_to_model[min(score_to_model.keys())]
            score_to_model[val_precision_avg] = m

    ensemble_models = list(score_to_model.values())
    for ensemble_model in ensemble_models:
        val_loss_avg, val_precision_avg = evaluate(ensemble_model, val_set_data_loader, criterion)
        print("ensemble: val_loss=%.3f, val_precision=%.3f" % (val_loss_avg, val_precision_avg))

    model = Ensemble(ensemble_models)
    mask_threshold_global, mask_threshold_per_cc = analyze(model, train_data.val_set_df, use_tta=True)

    eval_end_time = time.time()
    print()
    print("Eval time: %s" % str(datetime.timedelta(seconds=eval_end_time - eval_start_time)))

    print()
    print("submission preparation")

    submission_start_time = time.time()

    test_data = TestData(input_dir)
    calculate_predictions(test_data.df, model, use_tta=True)
    calculate_prediction_masks(test_data.df, mask_threshold_global)

    print()
    print(test_data.df.groupby("predictions_cc").agg({"predictions_cc": "count"}))

    write_submission(test_data.df, "prediction_masks", "{}/{}".format(output_dir, "submission.csv"))
    write_submission(test_data.df, "prediction_masks_best", "{}/{}".format(output_dir, "submission_best.csv"))

    submission_end_time = time.time()
    print()
    print("Submission time: %s" % str(datetime.timedelta(seconds=submission_end_time - submission_start_time)))
예제 #6
0
def main():
    args = argparser.parse_args()
    print("Arguments:")
    for arg in vars(args):
        print("  {}: {}".format(arg, getattr(args, arg)))
    print()

    input_dir = args.input_dir
    output_dir = args.output_dir
    base_model_dir = args.base_model_dir
    image_size = args.image_size
    augment = args.augment
    use_dummy_image = args.use_dummy_image
    use_progressive_image_sizes = args.use_progressive_image_sizes
    progressive_image_size_min = args.progressive_image_size_min
    progressive_image_size_step = args.progressive_image_size_step
    progressive_image_epoch_step = args.progressive_image_epoch_step
    batch_size = args.batch_size
    batch_iterations = args.batch_iterations
    test_size = args.test_size
    train_on_unrecognized = args.train_on_unrecognized
    num_category_shards = args.num_category_shards
    category_shard = args.category_shard
    eval_train_mapk = args.eval_train_mapk
    mapk_topk = args.mapk_topk
    num_shard_preload = args.num_shard_preload
    num_shard_loaders = args.num_shard_loaders
    num_workers = args.num_workers
    pin_memory = args.pin_memory
    epochs_to_train = args.epochs
    lr_scheduler_type = args.lr_scheduler
    lr_patience = args.lr_patience
    lr_min = args.lr_min
    lr_max = args.lr_max
    lr_min_decay = args.lr_min_decay
    lr_max_decay = args.lr_max_decay
    optimizer_type = args.optimizer
    loss_type = args.loss
    loss2_type = args.loss2
    loss2_start_sgdr_cycle = args.loss2_start_sgdr_cycle
    model_type = args.model
    patience = args.patience
    sgdr_cycle_epochs = args.sgdr_cycle_epochs
    sgdr_cycle_epochs_mult = args.sgdr_cycle_epochs_mult
    sgdr_cycle_end_prolongation = args.sgdr_cycle_end_prolongation
    sgdr_cycle_end_patience = args.sgdr_cycle_end_patience
    max_sgdr_cycles = args.max_sgdr_cycles

    use_extended_stroke_channels = model_type in [
        "cnn", "residual_cnn", "fc_cnn", "hc_fc_cnn"
    ]

    train_data = TrainData(data_dir=input_dir,
                           shard=0,
                           test_size=test_size,
                           train_on_unrecognized=train_on_unrecognized,
                           confusion_set=None,
                           num_category_shards=num_category_shards,
                           category_shard=category_shard)

    val_set = TrainDataset(train_data.val_set_df, image_size,
                           use_extended_stroke_channels, False,
                           use_dummy_image)
    val_set_data_loader = \
        DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=pin_memory)
    categories = train_data.categories
    criterion = create_criterion(loss_type, len(categories))
    model_dir = "/storage/models/quickdraw/seresnext50"
    model = load_ensemble_model(model_dir, 3, val_set_data_loader, criterion,
                                model_type, image_size, len(categories))

    cs_entry_categories = [
        'angel', 'arm', 'bat', 'bathtub', 'bottlecap', 'hospital',
        'police car', 'spider', 'sun', 'tent', 'triangle', 'windmill'
    ]
    cs_categories = read_confusion_set(
        "/storage/models/quickdraw/seresnext50_confusion/confusion_set_{}.txt".
        format(0))

    predicted_words = predict(model, val_set_data_loader, categories, tta=True)
    prediction_mask = []
    cs_entry_match_count = 0
    cs_match_count = 0
    for i, p in enumerate(predicted_words):
        predicted_word = p.split(" ")[0].replace("_", " ")
        cond1 = predicted_word in cs_entry_categories
        prediction_mask.append(cond1)
        if cond1 and categories[train_data.val_set_df["category"]
                                [i]] in cs_entry_categories:
            cs_entry_match_count += 1
        if cond1 and categories[train_data.val_set_df["category"]
                                [i]] in cs_categories:
            cs_match_count += 1
    print("matched {} of {}".format(sum(prediction_mask),
                                    len(prediction_mask)),
          flush=True)
    print("cs_entry_match_count: {}".format(cs_entry_match_count), flush=True)
    print("cs_match_count: {}".format(cs_match_count), flush=True)
    df = {
        "category": train_data.val_set_df["category"][prediction_mask],
        "drawing": train_data.val_set_df["drawing"][prediction_mask]
    }
    val_set = TrainDataset(df, image_size, use_extended_stroke_channels, False,
                           use_dummy_image)
    val_set_data_loader = \
        DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=pin_memory)

    loss_avg, mapk_avg, accuracy_top1_avg, accuracy_top3_avg, accuracy_top5_avg, accuracy_top10_avg = \
        evaluate(model, val_set_data_loader, criterion, mapk_topk)
    print(
        "loss: {:.3f}, map@3: {:.3f}, acc@1: {:.3f}, acc@3: {:.3f}, acc@5: {:.3f}, acc@10: {:.3f}"
        .format(loss_avg, mapk_avg, accuracy_top1_avg, accuracy_top3_avg,
                accuracy_top5_avg, accuracy_top10_avg),
        flush=True)

    predicted_words = predict(model, val_set_data_loader, categories, tta=True)
    match_count = 0
    for i, p in enumerate(predicted_words):
        predicted_word = p.split(" ")[0].replace("_", " ")
        true_word = categories[df["category"][i]]
        if predicted_word == true_word:
            match_count += 1
        if predicted_word not in cs_entry_categories:
            print("predicted unexpected word: '{}'".format(predicted_word),
                  flush=True)
    print("acc@1: {}".format(match_count / len(predicted_words)), flush=True)

    criterion = create_criterion(loss_type, len(cs_categories))
    model_dir = "/storage/models/quickdraw/seresnext50_cs_0"
    model = load_ensemble_model(model_dir, 3, val_set_data_loader,
                                criterion, "seresnext50_cs", image_size,
                                len(cs_categories))
    predicted_words = predict(model,
                              val_set_data_loader,
                              cs_categories,
                              tta=True)
    match_count = 0
    for i, p in enumerate(predicted_words):
        predicted_word = p.split(" ")[0].replace("_", " ")
        true_word = categories[df["category"][i]]
        if predicted_word == true_word:
            match_count += 1
    print("acc@1: {}".format(match_count / len(predicted_words)), flush=True)
예제 #7
0
def create_datasets(args):

    train_data = TrainData(args.train_path)
    dev_data = ValidData(args.validation_path)

    return dev_data, train_data
예제 #8
0
def train():
    if not os.path.isfile(train_data_pickle):
        # trainig data
        train_features, train_labels = features(['fold0', 'fold1', 'fold2'])
        traindata = TrainData(train_features, train_labels)
        with open(train_data_pickle, mode='wb') as f:
            pickle.dump(traindata, f)
    else:
        print("loading: %s" % (train_data_pickle))
        with open(train_data_pickle, mode='rb') as f:
            traindata = pickle.load(f)
            train_features = traindata.train_inputs
            train_labels = traindata.train_targets

    if not os.path.isfile(test_data_pickle):
        test_features, test_labels = features(['fold3'])
        testdata = TestData(test_features, test_labels)
        with open(test_data_pickle, mode='wb') as f:
            pickle.dump(testdata, f)
    else:
        print("loading: %s" % (test_data_pickle))
        with open(test_data_pickle, mode='rb') as f:
            testdata = pickle.load(f)
            test_features = testdata.test_inputs
            test_labels = testdata.test_targets

    # TODO change to use train and test
    train_labels = one_hot_encode(train_labels)
    test_labels = one_hot_encode(test_labels)

    # random train and test sets.
    train_test_split = np.random.rand(len(train_features)) < 0.70
    train_x = train_features[train_test_split]
    train_y = train_labels[train_test_split]
    test_x = train_features[~train_test_split]
    test_y = train_labels[~train_test_split]

    n_dim = train_features.shape[1]
    print("input dim: %s" % (n_dim))

    # create placeholder
    X = tf.placeholder(tf.float32, [None, n_dim])
    Y = tf.placeholder(tf.float32, [None, FLAGS.num_classes])
    # build graph
    logits = model.inference(X, n_dim)

    weights = tf.all_variables()
    saver = tf.train.Saver(weights)

    # create loss
    loss = model.loss(logits, Y)
    tf.scalar_summary('loss', loss)

    accracy = model.accuracy(logits, Y)
    tf.scalar_summary('test accuracy', accracy)

    # train operation
    train_op = model.train_op(loss)

    # variable initializer
    init = tf.initialize_all_variables()

    # get Session
    sess = tf.Session()

    # sumary merge and writer
    merged = tf.merge_all_summaries()
    train_writer = tf.train.SummaryWriter(FLAGS.summaries_dir)

    # initialize
    sess.run(init)

    for step in xrange(MAX_STEPS):

        t_pred = sess.run(tf.argmax(logits, 1), feed_dict={X: train_features})
        t_true = sess.run(tf.argmax(train_labels, 1))
        print("train samples pred: %s" % t_pred[:30])
        print("train samples target: %s" % t_true[:30])
        print('Train accuracy: ',
              sess.run(accracy, feed_dict={
                  X: train_x,
                  Y: train_y
              }))
        for epoch in xrange(training_epochs):
            summary, logits_val, _, loss_val = sess.run(
                [merged, logits, train_op, loss],
                feed_dict={
                    X: train_x,
                    Y: train_y
                })
        train_writer.add_summary(summary, step)

        print("step:%d, loss: %s" % (step, loss_val))
        y_pred = sess.run(tf.argmax(logits, 1), feed_dict={X: test_x})
        y_true = sess.run(tf.argmax(test_y, 1))
        print("test samples pred: %s" % y_pred[:10])
        print("test samples target: %s" % y_true[:10])
        accracy_val = sess.run([accracy], feed_dict={X: test_x, Y: test_y})
        # print('Test accuracy: ', accracy_val)
        # train_writer.add_summary(accracy_val, step)
        p, r, f, s = precision_recall_fscore_support(y_true,
                                                     y_pred,
                                                     average='micro')
        print("F-score: %s" % f)

        if step % 1000 == 0:
            saver.save(sess, FLAGS.ckpt_dir, global_step=step)
예제 #9
0
def main():
    args = argparser.parse_args()
    log_args(args)

    input_dir = args.input_dir
    output_dir = args.output_dir
    base_model_dir = args.base_model_dir
    image_size = args.image_size
    crop_images = args.crop_images
    augment = args.augment
    use_progressive_image_sizes = args.use_progressive_image_sizes
    progressive_image_size_min = args.progressive_image_size_min
    progressive_image_size_step = args.progressive_image_size_step
    progressive_image_epoch_step = args.progressive_image_epoch_step
    batch_size = args.batch_size
    batch_iterations = args.batch_iterations
    num_workers = args.num_workers
    pin_memory = args.pin_memory
    epochs_to_train = args.epochs
    lr_scheduler_type = args.lr_scheduler
    lr_patience = args.lr_patience
    lr_min = args.lr_min
    lr_max = args.lr_max
    lr_min_decay = args.lr_min_decay
    lr_max_decay = args.lr_max_decay
    optimizer_type = args.optimizer
    loss_type = args.loss
    focal_loss_gamma = args.focal_loss_gamma
    use_class_weights = args.use_class_weights
    use_weighted_sampling = args.use_weighted_sampling
    model_type = args.model
    patience = args.patience
    sgdr_cycle_epochs = args.sgdr_cycle_epochs
    sgdr_cycle_epochs_mult = args.sgdr_cycle_epochs_mult
    sgdr_cycle_end_prolongation = args.sgdr_cycle_end_prolongation
    sgdr_cycle_end_patience = args.sgdr_cycle_end_patience
    max_sgdr_cycles = args.max_sgdr_cycles

    if optimizer_type == "adam":
        lr_scheduler_type = "adam"

    progressive_image_sizes = list(
        range(progressive_image_size_min, image_size + 1,
              progressive_image_size_step))

    train_data = TrainData(input_dir)

    train_set = TrainDataset(train_data.train_set_df, input_dir, 28,
                             image_size, crop_images, augment)

    balance_weights, balance_class_weights = calculate_balance_weights(
        train_data.df, train_data.train_set_df, 28)
    train_set_sampler = WeightedRandomSampler(balance_weights,
                                              len(balance_weights))

    train_set_data_loader = DataLoader(
        train_set,
        batch_size=batch_size,
        shuffle=False if use_weighted_sampling else True,
        sampler=train_set_sampler if use_weighted_sampling else None,
        num_workers=num_workers,
        pin_memory=pin_memory)

    val_set = TrainDataset(train_data.val_set_df, input_dir, 28, image_size,
                           crop_images, False)
    val_set_data_loader = \
        DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=pin_memory)

    if base_model_dir:
        for base_file_path in glob.glob("{}/*.pth".format(base_model_dir)):
            shutil.copyfile(
                base_file_path,
                "{}/{}".format(output_dir, os.path.basename(base_file_path)))
        model = create_model(type=model_type, num_classes=28).to(device)
        model.load_state_dict(
            torch.load("{}/model.pth".format(output_dir), map_location=device))
        optimizer = create_optimizer(optimizer_type, model, lr_max)
        if os.path.isfile("{}/optimizer.pth".format(output_dir)):
            try:
                optimizer.load_state_dict(
                    torch.load("{}/optimizer.pth".format(output_dir)))
                adjust_initial_learning_rate(optimizer, lr_max)
                adjust_learning_rate(optimizer, lr_max)
            except:
                log("Failed to load the optimizer weights")
    else:
        model = create_model(type=model_type, num_classes=28).to(device)
        optimizer = create_optimizer(optimizer_type, model, lr_max)

    torch.save(model.state_dict(), "{}/model.pth".format(output_dir))

    ensemble_model_index = 0
    for model_file_path in glob.glob("{}/model-*.pth".format(output_dir)):
        model_file_name = os.path.basename(model_file_path)
        model_index = int(
            model_file_name.replace("model-", "").replace(".pth", ""))
        ensemble_model_index = max(ensemble_model_index, model_index + 1)

    epoch_iterations = ceil(len(train_set) / batch_size)

    log("train_set_samples: {}, val_set_samples: {}".format(
        len(train_set), len(val_set)))
    log()

    global_val_score_best_avg = float("-inf")
    sgdr_cycle_val_score_best_avg = float("-inf")

    lr_scheduler = CosineAnnealingLR(optimizer,
                                     T_max=sgdr_cycle_epochs,
                                     eta_min=lr_min)

    optim_summary_writer = SummaryWriter(
        log_dir="{}/logs/optim".format(output_dir))
    train_summary_writer = SummaryWriter(
        log_dir="{}/logs/train".format(output_dir))
    val_summary_writer = SummaryWriter(
        log_dir="{}/logs/val".format(output_dir))

    current_sgdr_cycle_epochs = sgdr_cycle_epochs
    sgdr_next_cycle_end_epoch = current_sgdr_cycle_epochs + sgdr_cycle_end_prolongation
    sgdr_iterations = 0
    sgdr_cycle_count = 0
    batch_count = 0
    epoch_of_last_improval = 0

    lr_scheduler_plateau = \
        ReduceLROnPlateau(optimizer, mode="max", min_lr=lr_min, patience=lr_patience, factor=0.5, threshold=1e-4)

    lr_scheduler_step = StepLR(optimizer, step_size=10, gamma=0.1)

    log('{"chart": "best_val_score", "axis": "epoch"}')
    log('{"chart": "val_score", "axis": "epoch"}')
    log('{"chart": "val_loss", "axis": "epoch"}')
    log('{"chart": "sgdr_cycle", "axis": "epoch"}')
    log('{"chart": "score", "axis": "epoch"}')
    log('{"chart": "loss", "axis": "epoch"}')
    log('{"chart": "lr_scaled", "axis": "epoch"}')
    log('{"chart": "mem_used", "axis": "epoch"}')
    log('{"chart": "epoch_time", "axis": "epoch"}')

    train_start_time = time.time()

    loss_weight = CLASS_WEIGHTS_TENSOR if use_class_weights else None
    criterion = create_criterion(loss_type, loss_weight, focal_loss_gamma)

    for epoch in range(epochs_to_train):
        epoch_start_time = time.time()

        log("memory used: {:.2f} GB".format(psutil.virtual_memory().used /
                                            2**30))

        if use_progressive_image_sizes:
            next_image_size = \
                progressive_image_sizes[min(epoch // progressive_image_epoch_step, len(progressive_image_sizes) - 1)]

            if train_set.image_size != next_image_size:
                log("changing image size to {}".format(next_image_size))
                train_set.image_size = next_image_size
                val_set.image_size = next_image_size

        model.train()

        train_loss_sum_t = zero_item_tensor()

        epoch_batch_iter_count = 0

        if lr_scheduler_type == "lr_finder":
            new_lr = lr_max * 0.5**(sgdr_cycle_epochs - min(
                sgdr_cycle_epochs, sgdr_iterations / epoch_iterations))
            adjust_learning_rate(optimizer, new_lr)

        all_predictions = []
        all_targets = []
        for b, batch in enumerate(train_set_data_loader):
            images, categories = \
                batch[0].to(device, non_blocking=True), \
                batch[1].to(device, non_blocking=True)

            if lr_scheduler_type == "cosine_annealing":
                lr_scheduler.step(
                    epoch=min(current_sgdr_cycle_epochs, sgdr_iterations /
                              epoch_iterations))

            if b % batch_iterations == 0:
                optimizer.zero_grad()

            prediction_logits = model(images)
            criterion.weight = CLASS_WEIGHTS_TENSOR
            loss = criterion(prediction_logits, categories)
            loss.backward()

            with torch.no_grad():
                train_loss_sum_t += loss
                all_predictions.extend(
                    torch.sigmoid(prediction_logits).cpu().data.numpy())
                all_targets.extend(categories.cpu().data.numpy())

            if (b + 1) % batch_iterations == 0 or (
                    b + 1) == len(train_set_data_loader):
                optimizer.step()

            sgdr_iterations += 1
            batch_count += 1
            epoch_batch_iter_count += 1

            optim_summary_writer.add_scalar("lr", get_learning_rate(optimizer),
                                            batch_count + 1)

        train_loss_avg = train_loss_sum_t.item() / epoch_batch_iter_count
        train_score_avg = f1_score_from_probs(torch.tensor(all_predictions),
                                              torch.tensor(all_targets))

        val_loss_avg, val_score_avg = evaluate(model, val_set_data_loader,
                                               criterion)

        if lr_scheduler_type == "reduce_on_plateau":
            lr_scheduler_plateau.step(val_score_avg)
        elif lr_scheduler_type == "step":
            lr_scheduler_step.step(epoch)

        model_improved_within_sgdr_cycle = check_model_improved(
            sgdr_cycle_val_score_best_avg, val_score_avg)
        if model_improved_within_sgdr_cycle:
            torch.save(
                model.state_dict(),
                "{}/model-{}.pth".format(output_dir, ensemble_model_index))
            sgdr_cycle_val_score_best_avg = val_score_avg

        model_improved = check_model_improved(global_val_score_best_avg,
                                              val_score_avg)
        ckpt_saved = False
        if model_improved:
            torch.save(model.state_dict(), "{}/model.pth".format(output_dir))
            torch.save(optimizer.state_dict(),
                       "{}/optimizer.pth".format(output_dir))
            np.save("{}/train_predictions.npy".format(output_dir),
                    all_predictions)
            np.save("{}/train_targets.npy".format(output_dir), all_targets)
            global_val_score_best_avg = val_score_avg
            epoch_of_last_improval = epoch
            ckpt_saved = True

        sgdr_reset = False
        if (lr_scheduler_type == "cosine_annealing") \
                and (epoch + 1 >= sgdr_next_cycle_end_epoch) \
                and (epoch - epoch_of_last_improval >= sgdr_cycle_end_patience):
            sgdr_iterations = 0
            current_sgdr_cycle_epochs = int(current_sgdr_cycle_epochs *
                                            sgdr_cycle_epochs_mult)
            sgdr_next_cycle_end_epoch = epoch + 1 + current_sgdr_cycle_epochs + sgdr_cycle_end_prolongation

            ensemble_model_index += 1
            sgdr_cycle_val_score_best_avg = float("-inf")
            sgdr_cycle_count += 1
            sgdr_reset = True

            new_lr_min = lr_min * (lr_min_decay**sgdr_cycle_count)
            new_lr_max = lr_max * (lr_max_decay**sgdr_cycle_count)
            new_lr_max = max(new_lr_max, new_lr_min)

            adjust_learning_rate(optimizer, new_lr_max)
            lr_scheduler = CosineAnnealingLR(optimizer,
                                             T_max=current_sgdr_cycle_epochs,
                                             eta_min=new_lr_min)

        optim_summary_writer.add_scalar("sgdr_cycle", sgdr_cycle_count,
                                        epoch + 1)

        train_summary_writer.add_scalar("loss", train_loss_avg, epoch + 1)
        train_summary_writer.add_scalar("score", train_score_avg, epoch + 1)
        val_summary_writer.add_scalar("loss", val_loss_avg, epoch + 1)
        val_summary_writer.add_scalar("score", val_score_avg, epoch + 1)

        epoch_end_time = time.time()
        epoch_duration_time = epoch_end_time - epoch_start_time

        log("[%03d/%03d] %ds, lr: %.6f, loss: %.4f, val_loss: %.4f, score: %.4f, val_score: %.4f, ckpt: %d, rst: %d"
            %
            (epoch + 1, epochs_to_train, epoch_duration_time,
             get_learning_rate(optimizer), train_loss_avg, val_loss_avg,
             train_score_avg, val_score_avg, int(ckpt_saved), int(sgdr_reset)))

        log('{"chart": "best_val_score", "x": %d, "y": %.4f}' %
            (epoch + 1, global_val_score_best_avg))
        log('{"chart": "val_loss", "x": %d, "y": %.4f}' %
            (epoch + 1, val_loss_avg))
        log('{"chart": "val_score", "x": %d, "y": %.4f}' %
            (epoch + 1, val_score_avg))
        log('{"chart": "sgdr_cycle", "x": %d, "y": %d}' %
            (epoch + 1, sgdr_cycle_count))
        log('{"chart": "loss", "x": %d, "y": %.4f}' %
            (epoch + 1, train_loss_avg))
        log('{"chart": "score", "x": %d, "y": %.4f}' %
            (epoch + 1, train_score_avg))
        log('{"chart": "lr_scaled", "x": %d, "y": %.4f}' %
            (epoch + 1, 1000 * get_learning_rate(optimizer)))
        log('{"chart": "mem_used", "x": %d, "y": %.2f}' %
            (epoch + 1, psutil.virtual_memory().used / 2**30))
        log('{"chart": "epoch_time", "x": %d, "y": %d}' %
            (epoch + 1, epoch_duration_time))

        if (sgdr_reset or lr_scheduler_type in ("reduce_on_plateau", "step")) \
                and epoch - epoch_of_last_improval >= patience:
            log("early abort due to lack of improval")
            break

        if max_sgdr_cycles is not None and sgdr_cycle_count >= max_sgdr_cycles:
            log("early abort due to maximum number of sgdr cycles reached")
            break

    optim_summary_writer.close()
    train_summary_writer.close()
    val_summary_writer.close()

    train_end_time = time.time()
    log()
    log("Train time: %s" %
        str(datetime.timedelta(seconds=train_end_time - train_start_time)))

    model.load_state_dict(
        torch.load("{}/model.pth".format(output_dir), map_location=device))

    val_predictions, val_targets = predict(model, val_set_data_loader)
    np.save("{}/val_predictions.npy".format(output_dir), val_predictions)
    np.save("{}/val_targets.npy".format(output_dir), val_targets)

    best_threshold, best_threshold_score, all_threshold_scores = calculate_best_threshold(
        val_predictions, val_targets)
    log("All threshold scores: {}".format(all_threshold_scores))
    log("Best threshold / score: {} / {}".format(best_threshold,
                                                 best_threshold_score))

    test_data = TestData(input_dir)
    test_set = TestDataset(test_data.test_set_df, input_dir, image_size,
                           crop_images)
    test_set_data_loader = \
        DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=pin_memory)

    test_predictions, _ = predict(model, test_set_data_loader)
    np.save("{}/test_predictions.npy".format(output_dir), test_predictions)

    predicted_categories = calculate_categories_from_predictions(
        test_predictions, threshold=best_threshold)

    submission_df = test_data.test_set_df.copy()
    submission_df["Predicted"] = [
        " ".join(map(str, pc)) for pc in predicted_categories
    ]
    submission_df.to_csv("{}/submission.csv".format(output_dir))
예제 #10
0
def main(args):
    """main function for training DHS net"""

    # print(args) # uncomment to test arg inputs
    bsize = args.batch_size
    train_dir = args.train_dir
    test_dir = args.test_dir
    model_dir = args.ckpt_dir
    tensorboard_dir = args.tensorboard_dir
    device = args.device
    if not os.path.exists(model_dir):
        os.mkdir(model_dir)

    train_loader = torch.utils.data.DataLoader(TrainData(train_dir,
                                                         transform=True),
                                               batch_size=bsize,
                                               shuffle=True,
                                               num_workers=4,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(TrainData(test_dir,
                                                       transform=True),
                                             batch_size=bsize,
                                             shuffle=True,
                                             num_workers=4,
                                             pin_memory=True)

    model = SRM()
    if device == 'gpu':
        model.cuda()

    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    train_loss = []
    evaluation = []
    result = {'epoch': [], 'F_measure': [], 'MAE': []}

    progress = tqdm(range(0, args.epochs + 1),
                    miniters=1,
                    ncols=100,
                    desc='Overall Progress',
                    leave=True,
                    position=0)
    offset = 1
    best = 0

    writer = SummaryWriter(tensorboard_dir)

    for epoch in progress:
        if epoch != 0:
            print("load parameters")
            model.load_state_dict(torch.load(model_dir +
                                             'current_network.pth'))
            optimizer.load_state_dict(
                torch.load(model_dir + 'current_optimizer.pth'))
        title = 'Training Epoch {}'.format(epoch)
        progress_epoch = tqdm(train_loader,
                              ncols=120,
                              total=len(train_loader),
                              smoothing=0.9,
                              miniters=1,
                              leave=True,
                              position=offset,
                              desc=title)

        for ib, (img, gt) in enumerate(progress_epoch):
            # inputs = Variable(img).cuda()  # GPU version
            # gt = Variable(gt.unsqueeze(1)).cuda()  # GPU version
            inputs = Variable(img)  # CPU version
            gt = Variable(gt.unsqueeze(1))  # CPU version
            output1, output2 = model.forward(inputs)
            output1 = get_pred(output1)
            output2 = get_pred(output2)
            loss = criterion(output1, gt) + criterion(output2, gt)
            model.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss.append(round(float(loss.data.cpu()), 3))
            title = '{} Epoch {}/{}'.format('Training', epoch, args.epochs)
            progress_epoch.set_description(title + ' ' + 'loss:' +
                                           str(loss.data.cpu().numpy()))
            writer.add_scalar('Train/Loss', loss.data.cpu(), epoch)

        filename = model_dir + 'current_network.pth'
        filename_opti = model_dir + 'current_optimizer.pth'
        torch.save(model.state_dict(), filename)  # save current model params
        torch.save(optimizer.state_dict(),
                   filename_opti)  # save current optimizer params

        if epoch % args.val_rate == 0:  # start validation
            params = model_dir + 'current_network.pth'
            model.load_state_dict(torch.load(params))
            pred_list = []
            gt_list = []
            for img, gt in val_loader:
                # inputs = Variable(img).cuda()  # GPU version
                inputs = Variable(img)  # CPU version
                _, output = model.forward(inputs)
                output = get_pred(output)
                out = output.data.cpu().numpy()
                pred_list.extend(out)
                gt = gt.numpy()
                gt_list.extend(gt)
            pred_list = np.array(pred_list)
            pred_list = np.squeeze(pred_list)
            gt_list = np.array(gt_list)
            F_measure = get_f_measure(pred_list, gt_list)
            mae = get_mae(pred_list, gt_list)
            evaluation.append([int(epoch), float(F_measure), float(mae)])
            result['epoch'].append(int(epoch))
            result['F_measure'].append(round(float(F_measure), 3))
            result['MAE'].append(round(float(mae), 3))
            df = pd.DataFrame(result).set_index('epoch')
            df.to_csv('./eval.csv')

            if epoch == 0:
                best = F_measure - mae
            elif F_measure - mae > best:  # save model with best performance
                best = F_measure - mae
                filename = ('%s/best_network.pth' % model_dir)
                filename_opti = ('%s/best_optimizer.pth' % model_dir)
                torch.save(model.state_dict(), filename)
                torch.save(optimizer.state_dict(), filename_opti)
예제 #11
0
파일: train.py 프로젝트: khlee369/SRGAN
def main():
    batch_size = 16
    generator = Generator().cuda()
    discriminator = Discriminator(96, 96).cuda()
    optimizer_G = optim.Adam(generator.parameters(), lr=1e-4)
    optimizer_D = optim.Adam(discriminator.parameters(), lr=1e-4)
    # dataset = FaceData('train')
    dataset = TrainData()
    data_loader = DataLoader(dataset,
                             batch_size,
                             shuffle=True,
                             num_workers=0,
                             pin_memory=True,
                             drop_last=True)
    MSE = nn.MSELoss()
    BCE = nn.BCELoss()

    # content loss, perceptual loss vgg / i,j == 5,4
    vgg_net = vgg19(pretrained=True).features[:36].cuda()
    vgg_net.eval()
    for param in vgg_net.parameters():
        param.requires_grad = False

    discriminator.train()
    generator.train()
    optimizer_G.zero_grad()
    optimizer_D.zero_grad()

    print("Start Training")
    current_epoch = 0
    for epoch in range(current_epoch, 100):
        for step, (img_Input, img_GT) in tqdm(enumerate(data_loader)):

            img_GT = img_GT.cuda()
            img_Input = img_Input.cuda()

            # # Discriminator update
            # img_SR = generator(img_Input)
            # fake = discriminator(img_SR)
            # real = discriminator(img_GT)
            # loss_Dfake = 0.001 * BCE(fake, torch.zeros(batch_size, 1).cuda())
            # loss_Dreal = 0.001 * BCE(real, torch.ones(batch_size, 1).cuda())
            # loss_D = 0.001 * (loss_Dfake + loss_Dreal)
            # # if epoch > 0:
            # discriminator.zero_grad()
            # loss_D.backward(retain_graph=True)
            # optimizer_D.step()

            # # Generator update
            # img_SR = generator(img_Input)
            # loss_content = MSE(img_SR, img_GT)
            # loss_vgg = 0.006 * MSE(vgg_net(img_SR), vgg_net(img_GT))
            # fake = discriminator(img_SR)
            # loss_Dfake = 0.001 * BCE(fake, torch.zeros(batch_size, 1).cuda())

            # loss_G = loss_content + loss_vgg + loss_Dfake
            # generator.zero_grad()
            # loss_G.backward()
            # # loss_Dfake.backward()
            # optimizer_G.step()
            if epoch < 10:
                # SRResnet Initialize Generator update
                generator.zero_grad()
                img_SR = generator(img_Input)
                loss_content = MSE(img_SR, img_GT)
                loss_content.backward()
                optimizer_G.step()

                if step % 100 == 0:
                    print()
                    print("Loss_content : {}".format(loss_content.item()))
                continue

            # Discriminator update
            discriminator.zero_grad()
            D_real = discriminator(img_GT)
            loss_Dreal = 0.1 * BCE(D_real, torch.ones(batch_size, 1).cuda())
            loss_Dreal.backward()
            D_x = D_real.mean().item()

            img_SR = generator(img_Input)
            D_fake = discriminator(img_SR.detach())
            loss_Dfake = 0.1 * BCE(D_fake, torch.zeros(batch_size, 1).cuda())
            loss_Dfake.backward()
            DG_z = D_fake.mean().item()

            loss_D = (loss_Dfake + loss_Dreal)
            optimizer_D.step()

            # Generator update
            generator.zero_grad()
            loss_content = MSE(img_SR, img_GT)
            loss_vgg = MSE(vgg_net(img_SR), vgg_net(img_GT))

            # img_SR = generator(img_Input)
            G_fake = discriminator(img_SR)
            loss_Gfake = BCE(G_fake, torch.zeros(batch_size, 1).cuda())

            loss_G = loss_content + 0.006 * loss_vgg + 0.001 * loss_Gfake
            loss_G.backward()
            # loss_Dfake.backward()
            optimizer_G.step()

            if step % 100 == 0:
                # :.10f
                print()
                print("fake out : {}".format(DG_z))
                print("real out : {}".format(D_x))
                print("Loss_Dfake :   {}".format(loss_Dfake.item()))
                print("Loss_Dreal :   {}".format(loss_Dreal.item()))
                print("Loss_D :       {}".format(loss_D.item()))
                print("Loss_content : {}".format(loss_content.item()))
                print("Loss_vgg :     {}".format(0.006 * loss_vgg.item()))
                print("Loss_Gfake :   {}".format(0.001 * loss_Gfake.item()))
                print("Loss_G :       {}".format(loss_G.item()))
                print("Loss_Total :   {}".format((loss_G + loss_D).item()))
                # print("Loss_D : {:.4f}".format(loss_D.item()))
                # print("Loss : {:.4f}".format(loss_total.item()))

        with torch.no_grad():
            generator.eval()
            save_image(denorm(img_SR[0].cpu()),
                       "./Result/{0}_SR.png".format(epoch))
            save_image(denorm(img_GT[0].cpu()),
                       "./Result/{0}_GT.png".format(epoch))
            generator.train()