コード例 #1
0
def train_results(model, train_test_data):
    results = pd.DataFrame(
        {},
        columns=[
            'train_size', 'train_support', 'train_accuracy', 'train_precision',
            'train_recall', 'train_f1', 'train_roc', 'TrueNeg', 'FalsePos',
            'FalseNeg', 'TruePos', 'size', 'support', 'accuracy', 'precision',
            'recall', 'f1', 'roc', 'top_ft_1', 'top_inf_1', 'top_ft_2',
            'top_inf_2', 'top_ft_3', 'top_inf_3'
        ])

    (x_train, y_train), (x_test, y_test) = train_test_data
    y_train_pred = model_predict(model, x_train)
    y_test_pred = model_predict(model, x_test)

    for i, vul in enumerate(ALL_VULS):
        try:
            train_score_ = trainer.evaluate(y_train[:, i], y_train_pred[:, i])
            test_score_ = trainer.evaluate(y_test[:, i], y_test_pred[:, i])
            confusion_matrix_ = confusion_matrix(y_test[:, i], y_test_pred[:,
                                                                           i])
            influence_ = np.array([['', 0], ['', 0], ['', 0]])
            result = [
                *train_score_, *confusion_matrix_.reshape(-1), *test_score_,
                *influence_.reshape(6)
            ]

            results.loc[vul] = result

        except Exception as ex:
            print(vul, '\terror: %s' % ex)

    return results
コード例 #2
0
def run(model, val_loader, clusters, prob_thresh, nms_thresh, predictions_file, multiscale=False):
    if osp.exists(predictions_file):
        os.remove(predictions_file)

    if multiscale:
        trainer.evaluate_multiscale(model, val_loader, clusters, prob_thresh=prob_thresh, nms_thresh=nms_thresh)
    else:
        trainer.evaluate(model, val_loader, clusters, prob_thresh=prob_thresh, nms_thresh=nms_thresh)
コード例 #3
0
def evaluate_single(run_name, model_name):
    # Get model path
    cur_dir = os.getcwd()
    model_path = "%s\\Runs\\%s\\Models\\%s.h5" % (cur_dir, run_name,
                                                  model_name)
    assert os.path.exists(model_path), "Model does not exist."

    # Load Test Data
    print("Loading test data...", end="", flush=True)
    test_ids, test_images_raw = fileutils.read_test_data_raw('./Data/test.csv')
    print("done.")

    # Normalize
    print("Normalizing data...", end="", flush=True)
    reshaped_images = processing.reshape_images(test_images_raw)
    test_images = processing.normalize_images(reshaped_images)
    print("done.")

    # Load best
    print("Evaluating test set...")
    _, test_labels = trainer.evaluate(test_images, model_path)

    print("Generating classification CSV...", end="", flush=True)
    fileutils.generate_classification(test_ids, test_labels, run_name)
    print("done.")
コード例 #4
0
ファイル: main.py プロジェクト: smileyenot983/ADDA-pytorch
def main(config):
    logger = prepare_logger(config)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # get loaders
    if not config.is_train_source:
        target_loader = get_loader(type="MNIST",
                                   train=False,
                                   batch_size=config.batch_size)

    source_train_loader = get_loader(type="SVHN",
                                     train=True,
                                     batch_size=config.batch_size)
    source_test_loader = get_loader(type="SVHN",
                                    train=False,
                                    batch_size=config.batch_size)

    # build source classifier
    model_src = LeNet(config.num_gpus).to(device)
    if (not config.is_train_source) or config.is_finetune:
        model_src.load_state_dict(torch.load(config.model_dir))

    # train source classifier
    if config.is_train_source:
        logger.info("train source classifier..")
        train_source(model_src, source_train_loader, source_test_loader,
                     config, logger)
        logger.info("evaluate source classifier..")
        logger.info("test accurracy in source domain: %f\n" %
                    (evaluate(model_src, source_test_loader)))

    else:
        # initialize target classifer with source classifer
        model_trg = torch.load(open("./pretrained/lenet-source.pth", "rb"))

        # build discriminator
        D = Discriminator(config.num_gpus)

        # adaptation process
        logger.info("start adaptation process..")
        adapt_target_domain(D, model_src, model_trg, source_train_loader,
                            target_loader, config)
        logger.info("evaluate target classifier..")
        logger.info("accurracy in target domain: %f\n" %
                    (evaluate(model_trg, target_loader)))
コード例 #5
0
def main(args):
    model = RCNN(vocab_size=args.vocab_size,
                 embedding_dim=args.embedding_dim,
                 hidden_size=args.hidden_size,
                 hidden_size_linear=args.hidden_size_linear,
                 class_num=args.class_num,
                 dropout=args.dropout).to(args.device)

    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model, dim=0)

    train_texts, train_labels = read_file(args.train_file_path)
    word2idx = build_dictionary(train_texts, vocab_size=args.vocab_size)
    logger.info('Dictionary Finished!')

    full_dataset = CustomTextDataset(train_texts, train_labels, word2idx)
    num_train_data = len(full_dataset) - args.num_val_data
    train_dataset, val_dataset = random_split(
        full_dataset, [num_train_data, args.num_val_data])
    train_dataloader = DataLoader(dataset=train_dataset,
                                  collate_fn=lambda x: collate_fn(x, args),
                                  batch_size=args.batch_size,
                                  shuffle=True)

    valid_dataloader = DataLoader(dataset=val_dataset,
                                  collate_fn=lambda x: collate_fn(x, args),
                                  batch_size=args.batch_size,
                                  shuffle=True)

    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    train(model, optimizer, train_dataloader, valid_dataloader, args)
    logger.info('******************** Train Finished ********************')

    # Test
    if args.test_set:
        test_texts, test_labels = read_file(args.test_file_path)
        test_dataset = CustomTextDataset(test_texts, test_labels, word2idx)
        test_dataloader = DataLoader(dataset=test_dataset,
                                     collate_fn=lambda x: collate_fn(x, args),
                                     batch_size=args.batch_size,
                                     shuffle=True)

        model.load_state_dict(
            torch.load(os.path.join(args.model_save_path, "best.pt")))
        _, accuracy, precision, recall, f1, cm = evaluate(
            model, test_dataloader, args)
        logger.info('-' * 50)
        logger.info(
            f'|* TEST SET *| |ACC| {accuracy:>.4f} |PRECISION| {precision:>.4f} |RECALL| {recall:>.4f} |F1| {f1:>.4f}'
        )
        logger.info('-' * 50)
        logger.info('---------------- CONFUSION MATRIX ----------------')
        for i in range(len(cm)):
            logger.info(cm[i])
        logger.info('--------------------------------------------------')
コード例 #6
0
def run_baseline(output_dir,
                 num_models=3,
                 max_train_samples=None,
                 epochs=3,
                 finetune=False,
                 uncertainty_strategy='best'):

    started = time.time()
    model_dirs = []
    for i in range(num_models):
        model_dir = os.path.join(output_dir, f'm{i + 1}')
        model_dirs.append(model_dir)

    for model_dir in model_dirs:
        print("=== Training model", model_dir, "===")
        trainer = Trainer(max_train_samples=max_train_samples,
                          epochs=epochs,
                          finetune=finetune,
                          uncertainty_strategy=uncertainty_strategy,
                          output_path=model_dir,
                          arch="resnet",
                          layers=18)
        trainer.train()
        y_true, y_pred = trainer.evaluate()
        plot_roc_auc(y_true, y_pred, save_to_file=True, output_path=model_dir)

        print("=== Completed training of", model_dir, "===")
        display_elapsed_time(started, "Total elapsed")
        print()

    ensemble = load_ensemble_from_dirs(model_dirs)
    results = evaluate(model=ensemble,
                       dataloader=get_val_loader(),
                       device=get_device())
    labels = results['labels']
    preds = results['predictions']
    final_auc = mt.roc_auc_score(labels, preds)
    print("Ensemble Validation AUC Score", final_auc)
    plot_roc_auc(labels, preds, save_to_file=True, output_path=output_dir)
    display_elapsed_time(started, "Total time taken")
コード例 #7
0
    started = time.time()
    model_dirs = args.output_path.split(",")
    for model_dir in model_dirs:
        print("=== Training model", model_dir, "===")
        trainer = Trainer(max_train_samples=args.max_train_samples,
                          epochs=args.epochs,
                          finetune=args.finetune,
                          uncertainty_strategy=args.uncertainty_strategy,
                          output_path=model_dir)
        trainer.train()
        y_true, y_pred = trainer.evaluate()
        plot_roc_auc(y_true, y_pred, save_to_file=True, output_path=model_dir)

        print("=== Completed training of", model_dir, "===")
        display_elapsed_time(started, "Total elapsed")
        print()

    ensemble = load_ensemble_from_dirs(model_dirs)
    results = evaluate(model=ensemble,
                       dataloader=get_val_loader(),
                       device=get_device())
    labels = results['labels']
    preds = results['predictions']
    final_auc = mt.roc_auc_score(labels, preds)
    print("Ensemble Validation AUC Score", final_auc)
    plot_roc_auc(labels,
                 preds,
                 save_to_file=True,
                 output_path="./models/baseline")
    display_elapsed_time(started, "Total time taken")
コード例 #8
0
def run(model, val_loader, templates, prob_thresh, nms_thresh, device):
    dets = trainer.evaluate(model, val_loader, templates,
                            prob_thresh, nms_thresh, device)
    return dets
コード例 #9
0
        dev_dataloader = DataLoader(utils.MyDataset(dev_features),
                                    batch_size=args.eval_batch_size,
                                    shuffle=False,
                                    collate_fn=utils.TextPairCollate())
        optimizer = optim.Adam(model.parameters(), lr=args.lr,)
        best_model, best_val_result = trainer.train(
            model, args.num_train_epochs, train_dataloader, dev_dataloader,
            loss_fn, optimizer, utils.acc_p_r_f1, device)
        utils.save_experiment(config, best_model,
                              best_val_result, args.output_dir)
        tokenizer.save_vocab(args.output_dir)
    if args.do_eval:
        if args.do_train:
            args.model_dir = args.output_dir
        test_examples = processor.get_test_examples()
        test_features = utils.lcqmc_examples_to_features(
            test_examples, label2id, tokenizer,
            max_len=args.max_len, verbose=True)
        test_dataloader = DataLoader(utils.MyDataset(test_features),
                                     batch_size=args.eval_batch_size,
                                     shuffle=False,
                                     collate_fn=utils.TextPairCollate())
        model.load_state_dict(torch.load(
            os.path.join(args.model_dir, 'pytorch_model.bin')))
        test_result = trainer.evaluate(model, test_dataloader,
                                       loss_fn, utils.acc_p_r_f1,
                                       device, )
        logger.info('*** Test result ***')
        for k, v in test_result.items():
            logger.info('{}: {}'.format(k, v))
コード例 #10
0
def main(args):
    acc_list = []
    f1_score_list = []
    prec_list = []
    recall_list = []
    for i in range(10):
        setup_data()
        model = RCNN(vocab_size=args.vocab_size,
                     embedding_dim=args.embedding_dim,
                     hidden_size=args.hidden_size,
                     hidden_size_linear=args.hidden_size_linear,
                     class_num=args.class_num,
                     dropout=args.dropout).to(args.device)

        if args.n_gpu > 1:
            model = torch.nn.DataParallel(model, dim=0)

        train_texts, train_labels = read_file(args.train_file_path)
        word2idx, embedding = build_dictionary(train_texts, args.vocab_size,
                                               args.lexical, args.syntactic,
                                               args.semantic)

        logger.info('Dictionary Finished!')

        full_dataset = CustomTextDataset(train_texts, train_labels, word2idx,
                                         args)
        num_train_data = len(full_dataset) - args.num_val_data
        train_dataset, val_dataset = random_split(
            full_dataset, [num_train_data, args.num_val_data])
        train_dataloader = DataLoader(dataset=train_dataset,
                                      collate_fn=lambda x: collate_fn(x, args),
                                      batch_size=args.batch_size,
                                      shuffle=True)

        valid_dataloader = DataLoader(dataset=val_dataset,
                                      collate_fn=lambda x: collate_fn(x, args),
                                      batch_size=args.batch_size,
                                      shuffle=True)

        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
        train(model, optimizer, train_dataloader, valid_dataloader, embedding,
              args)
        logger.info('******************** Train Finished ********************')

        # Test
        if args.test_set:
            test_texts, test_labels = read_file(args.test_file_path)
            test_dataset = CustomTextDataset(test_texts, test_labels, word2idx,
                                             args)
            test_dataloader = DataLoader(
                dataset=test_dataset,
                collate_fn=lambda x: collate_fn(x, args),
                batch_size=args.batch_size,
                shuffle=True)

            model.load_state_dict(
                torch.load(os.path.join(args.model_save_path, "best.pt")))
            _, accuracy, precision, recall, f1, cm = evaluate(
                model, test_dataloader, embedding, args)
            logger.info('-' * 50)
            logger.info(
                f'|* TEST SET *| |ACC| {accuracy:>.4f} |PRECISION| {precision:>.4f} |RECALL| {recall:>.4f} |F1| {f1:>.4f}'
            )
            logger.info('-' * 50)
            logger.info('---------------- CONFUSION MATRIX ----------------')
            for i in range(len(cm)):
                logger.info(cm[i])
            logger.info('--------------------------------------------------')
            acc_list.append(accuracy / 100)
            prec_list.append(precision)
            recall_list.append(recall)
            f1_score_list.append(f1)

    avg_acc = sum(acc_list) / len(acc_list)
    avg_prec = sum(prec_list) / len(prec_list)
    avg_recall = sum(recall_list) / len(recall_list)
    avg_f1_score = sum(f1_score_list) / len(f1_score_list)
    logger.info('--------------------------------------------------')
    logger.info(
        f'|* TEST SET *| |Avg ACC| {avg_acc:>.4f} |Avg PRECISION| {avg_prec:>.4f} |Avg RECALL| {avg_recall:>.4f} |Avg F1| {avg_f1_score:>.4f}'
    )
    logger.info('--------------------------------------------------')
    plot_df = pd.DataFrame({
        'x_values': range(10),
        'avg_acc': acc_list,
        'avg_prec': prec_list,
        'avg_recall': recall_list,
        'avg_f1_score': f1_score_list
    })
    plt.plot('x_values',
             'avg_acc',
             data=plot_df,
             marker='o',
             markerfacecolor='blue',
             markersize=12,
             color='skyblue',
             linewidth=4)
    plt.plot('x_values',
             'avg_prec',
             data=plot_df,
             marker='',
             color='olive',
             linewidth=2)
    plt.plot('x_values',
             'avg_recall',
             data=plot_df,
             marker='',
             color='olive',
             linewidth=2,
             linestyle='dashed')
    plt.plot('x_values',
             'avg_f1_score',
             data=plot_df,
             marker='',
             color='olive',
             linewidth=2,
             linestyle='dashed')
    plt.legend()
    fname = 'lexical-semantic-syntactic.png' if args.lexical and args.semantic and args.syntactic \
                            else 'semantic-syntactic.png' if args.semantic and args.syntactic \
                            else 'lexical-semantic.png' if args.lexical and args.semantic \
                            else 'lexical-syntactic.png'if args.lexical and args.syntactic \
                            else 'lexical.png' if args.lexical \
                            else 'syntactic.png' if args.syntactic \
                            else 'semantic.png' if args.semantic \
                            else 'plain.png'
    if not (path.exists('./images')):
        mkdir('./images')
    plt.savefig(path.join('./images', fname))
コード例 #11
0
ファイル: main.py プロジェクト: shubhampachori12110095/VQA-5
def main():
    args = parse_args()

    # Set the GPU to use
    torch.cuda.set_device(args.gpu)

    annotations = osp.expanduser(args.annotations)
    questions = osp.expanduser(args.questions)

    vqa_loader = dataset.get_train_dataloader(annotations, questions,
                                              args.images, args)
    # We always use the vocab from the training set
    vocab = vqa_loader.dataset.vocab

    maps = {
        "word_to_wid": vqa_loader.dataset.word_to_wid,
        "wid_to_word": vqa_loader.dataset.wid_to_word,
        "ans_to_aid": vqa_loader.dataset.ans_to_aid,
        "aid_to_ans": vqa_loader.dataset.aid_to_ans,
    }
    val_loader = dataset.get_val_dataloader(osp.expanduser(
        args.val_annotations),
                                            osp.expanduser(args.val_questions),
                                            args.val_images,
                                            args,
                                            maps=maps,
                                            vocab=vocab,
                                            shuffle=False)

    arch = Models[args.arch].value
    model = arch(len(vocab), output_dim=args.top_answer_limit)

    if torch.cuda.is_available():
        model.cuda()

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           betas=tuple(args.betas),
                           weight_decay=args.weight_decay)
    scheduler = lr_scheduler.StepLR(optimizer,
                                    step_size=args.decay_interval,
                                    gamma=args.lr_decay)

    vis = visualize.Visualizer(args.port)

    print("Beginning training")
    print("#" * 80)

    for epoch in range(args.start_epoch, args.epochs):
        scheduler.step()

        trainer.train(model,
                      vqa_loader,
                      criterion,
                      optimizer,
                      epoch,
                      args,
                      vis=vis)
        trainer.evaluate(model, val_loader, criterion, epoch, args, vis=vis)

    print("Training complete!")
コード例 #12
0
def evaluate_multiple(run_names, model_names, categories, backup_model):
    runs_len = len(run_names)
    models_len = len(model_names)
    categories_len = len(categories)

    assert runs_len == models_len and models_len == categories_len, "Runs, models, and categories must be the same length."

    # Get model paths
    cur_dir = os.getcwd()
    model_paths = list()
    for i in range(runs_len):
        run_name = run_names[i]
        model_name = model_names[i]
        model_path = "%s\\Runs\\%s\\Models\\%s.h5" % (cur_dir, run_name,
                                                      model_name)
        assert os.path.exists(
            model_path), "Model for %s does not exist." % run_name
        model_paths.append(model_path)

    # Load Test Data
    print("Loading test data...", end="", flush=True)
    test_ids, test_images_raw = fileutils.read_test_data_raw('./Data/test.csv')
    print("done.")

    # Normalize
    print("Normalizing data...", end="", flush=True)
    test_images = processing.normalize_images(test_images_raw)
    print("done.")

    # Load best
    print("Creating evaluations...")
    eval_labels = list()
    for i in range(runs_len):
        predictions, _ = trainer.evaluate(test_images, model_paths[i])
        print("Creating evaluations... (%s/%s)" % (i + 1, runs_len))
        eval_labels.append(predictions)

    print("Creating backup evaluations...")
    backup_confidence, backup_predictions = trainer.evaluate(
        test_images, backup_model)
    print("Generating labels...")
    test_predictions = [0] * test_ids.shape[0]
    for j in range(len(test_predictions)):

        # Check each prediction and get highest confidence, also count conflicts
        conflicts = 0
        highest_confidence = -1
        highest_confidence_label = -1
        for k in range(runs_len):
            cur_conf = eval_labels[k][j][0]
            cur_not_conf = eval_labels[k][j][1]
            if cur_conf > cur_not_conf and cur_conf > highest_confidence:
                highest_confidence = cur_conf
                highest_confidence_label = categories[k]
                if highest_confidence_label != -1:
                    conflicts = conflicts + 1

        # Maybe do something with predictions based off of conflicts
        if highest_confidence_label == -1:
            # None of the invidual models think its that classification, refer to backup model
            highest_confidence_label = backup_predictions[j]
            highest_confidence = backup_confidence[k][highest_confidence_label]

        test_predictions[j] = highest_confidence_label

    print("Generating classification CSV...", end="", flush=True)
    fileutils.generate_classification(test_ids, test_predictions, run_names[0])
    print("done.")
コード例 #13
0
def run_train(seed, train_dataset, valid_dataset, test_dataset, param_dict):
    reset_seed(seed)
    training_param_dict = param_dict['training_param']

    train_dataloader = get_dataloader(train_dataset,
                                      shuffle=True,
                                      param_dict=param_dict)
    valid_dataloader = get_dataloader(valid_dataset,
                                      shuffle=False,
                                      param_dict=param_dict)
    test_dataloader = get_dataloader(test_dataset,
                                     shuffle=False,
                                     param_dict=param_dict)

    model = MloClassifier(param_dict)
    print(model)
    model = model.cuda()

    criterion = torch.nn.BCEWithLogitsLoss()
    criterion = criterion.cuda()

    optimizer = torch.optim.AdamW(
        model.parameters(),
        lr=training_param_dict['learning_rate'],
        weight_decay=training_param_dict['weight_decay'])
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=1)

    optimizer.zero_grad()

    max_auc = -1
    max_model = None
    for epoch in tqdm(range(training_param_dict['num_epochs'])):
        model, optimizer, scheduler, train_loss = train(
            model, optimizer, scheduler, criterion, train_dataloader)
        #print(f'{epoch}th Epoch Train Loss: {train_loss}')
        eval_loss, pred_auc, prob_auc = evaluate(model,
                                                 criterion,
                                                 valid_dataloader,
                                                 test=False)
        #print(f'{epoch}th Epoch Valid Loss: {eval_loss}, Pred AUC: {pred_auc}, Prob AUC: {prob_auc}')

        if pred_auc > max_auc:
            max_auc = pred_auc
            max_model = copy.deepcopy(model)

    eval_loss, pred_auc, prob_auc = evaluate(max_model,
                                             criterion,
                                             test_dataloader,
                                             test=True)
    print(
        f'Evaluation on Test Dataset: Pred AUC: {pred_auc:.04f}, Prob AUC: {prob_auc:.04f}'
    )

    rd = param_dict['rep_dim']
    mp = param_dict['model_param']
    tp = param_dict['training_param']
    save_str = 'model'
    save_str += f'_feature_dim_{rd["feature"]}'
    save_str += f'_electra_dim_{rd["electra"]}'
    save_str += f'_liwc_dim_{rd["liwc"]}'
    save_str += f'_vader_dim{rd["vader"]}'
    save_str += f'_liwc_leaves_dim_{rd["liwc_leaves"]}'
    save_str += f'_vader_leaves_dim_{rd["vader_leaves"]}'
    save_str += f'_final_dim_{rd["final"]}'
    save_str += f'_use_attention_{mp["use_attention"]}'
    save_str += f'_dropout_{mp["dropout"]}'
    save_str += f'_seed_{tp["seed"]}'
    save_str += f'_learning_rate_{tp["learning_rate"]}'
    save_str += f'_batch_size_{tp["batch_size"]}'
    save_str += f'_weight_decay_{tp["weight_decay"]}'
    save_str += f'_num_epochs_{tp["num_epochs"]}'
    torch.save(model.state_dict(), f'./saved_models/{save_str}')

    return pred_auc, prob_auc, max_model
コード例 #14
0
ファイル: test.py プロジェクト: bobycv06fpm/forest-prediction
def main(config):
    logger = config.get_logger('test')
    # setup data_loader instances
    batch_size = 1
    if config['data_loader_val']['args']['max_dataset_size'] == 'inf':
        max_dataset_size = float('inf')
    else:
        max_dataset_size = config['data_loader_val']['args'][
            'max_dataset_size']
    data_loader = getattr(module_data, config['data_loader_val']['type'])(
        img_dir=config['data_loader_val']['args']['img_dir'],
        label_dir=config['data_loader_val']['args']['label_dir'],
        batch_size=batch_size,
        years=config['data_loader_val']['args']['years'],
        max_dataset_size=max_dataset_size,
        shuffle=False,
        num_workers=1,
    )
    landsat_mean, landsat_std = (0.3326, 0.3570, 0.2224), (0.1059, 0.1086,
                                                           0.1283)
    # build model architecture
    model = config.initialize('arch', module_arch)
    logger.info(model)

    # get function handles of loss and metrics
    loss_fn = config.initialize('loss', module_loss)
    # loss_fn = getattr(module_loss, config['loss'])
    metric_fns = [getattr(module_metric, met) for met in config['metrics']]

    logger.info('Loading checkpoint: {} ...'.format(config.resume))
    checkpoint = torch.load(config.resume)
    state_dict = checkpoint['state_dict']
    if config['n_gpu'] > 1:
        model = torch.nn.DataParallel(model)
    model.load_state_dict(state_dict)

    # prepare model for testing
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    model.eval()

    total_loss = 0.0
    total_metrics = torch.zeros(len(metric_fns))
    pred_dir = '/'.join(str(config.resume.absolute()).split('/')[:-1])
    out_dir = os.path.join(
        pred_dir, get_output_dir(config['data_loader_val']['args']['img_dir']))

    if not os.path.isdir(pred_dir):
        os.makedirs(pred_dir)
    if not os.path.isdir(out_dir):
        os.makedirs(out_dir)

    hist = np.zeros((2, 2))
    with torch.no_grad():
        for i, batch in enumerate(tqdm(data_loader)):
            # for i, (data, target) in enumerate(tqdm(data_loader)):
            init_time = time.time()
            loss = None
            data, target = batch
            udata = normalize_inverse(data, landsat_mean, landsat_std)

            data, target = data.to(device, dtype=torch.float), target.to(
                device, dtype=torch.float)
            output = model(data)

            output_probs = F.sigmoid(output)
            binary_target = threshold_outputs(
                target.data.cpu().numpy().flatten())
            output_binary = threshold_outputs(
                output_probs.data.cpu().numpy().flatten())

            np.save('planet_prediction_{}.npy'.format(i),
                    output_binary.reshape(-1, 1, 256, 256)[0, 0, :, :])
            # print(output_binary.shape, 'SHAPEEE')
            hist += fast_hist(output_binary, binary_target)
            images = {
                'img': udata.cpu().numpy(),
                'gt': target.cpu().numpy(),
                'pred': output_binary.reshape(-1, 1, 256, 256),
            }

            # Save single images (C=3) or double images (C=6)
            if images['img'].shape[1] == 3:
                save_simple_images(3, images, out_dir, i * batch_size)
            else:
                save_double_images(3, images, out_dir, i * batch_size)
            # computing loss, metrics on test set
            loss = loss_fn(output, target)
            batch_size = data.shape[0]
            total_loss += loss.item() * batch_size

    # Update binary segmentation metrics
    acc, acc_cls, mean_iu, fwavacc, precision, recall, f1_score = \
        evaluate(hist=hist)
    n_samples = len(data_loader.sampler)
    log = {
        'loss': total_loss / n_samples,
        'acc': acc,
        'mean_iu': mean_iu,
        'fwavacc': fwavacc,
        'precision': precision,
        'recall': recall,
        'f1_score': f1_score
    }
    logger.info(log)
コード例 #15
0
def main():
    args = parse_args()

    # Set the GPU to use
    torch.cuda.set_device(args.gpu)

    transform = transforms.Compose([
        transforms.Resize(256),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    vqa_loader = dataset.get_train_dataloader(osp.expanduser(args.annotations),
                                              osp.expanduser(args.questions),
                                              args.images,
                                              args,
                                              raw_images=args.raw_images,
                                              transforms=transform)
    # We always use the vocab from the training set
    vocab = vqa_loader.dataset.vocab

    maps = {
        "vocab": vocab,
        "word_to_wid": vqa_loader.dataset.word_to_wid,
        "wid_to_word": vqa_loader.dataset.wid_to_word,
        "ans_to_aid": vqa_loader.dataset.ans_to_aid,
        "aid_to_ans": vqa_loader.dataset.aid_to_ans,
    }
    val_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    val_loader = dataset.get_val_dataloader(osp.expanduser(
        args.val_annotations),
                                            osp.expanduser(args.val_questions),
                                            args.val_images,
                                            args,
                                            raw_images=args.raw_images,
                                            maps=maps,
                                            vocab=vocab,
                                            shuffle=False,
                                            transforms=val_transform)

    arch = Models[args.arch].value
    model = arch(len(vocab),
                 output_dim=args.top_answer_limit,
                 raw_images=args.raw_images)

    if args.resume:
        state = torch.load(args.resume)
        model.load_state_dict(state["model"])

    model.cuda()

    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           betas=tuple(args.betas),
                           weight_decay=args.weight_decay)
    scheduler = lr_scheduler.StepLR(optimizer,
                                    step_size=args.decay_interval,
                                    gamma=args.lr_decay)

    if args.visualize:
        vis = visualize.Visualizer(args.port)
    else:
        vis = None

    print("Beginning training")
    print("#" * 80)

    for epoch in range(args.start_epoch, args.epochs):
        scheduler.step()

        trainer.train(model,
                      vqa_loader,
                      criterion,
                      optimizer,
                      epoch,
                      args,
                      vis=vis)
        trainer.evaluate(model, val_loader, criterion, epoch, args, vis=vis)

    print("Training complete!")
コード例 #16
0
    logger.info("***** Running predictions *****")
    logger.info("  Num orig examples = %d", len(examples))
    logger.info("  Num split examples = %d", len(features))
    logger.info("  Batch size = %d", args.batch_size)

    # Run prediction for full data
    dataloader = DataLoader(dataset,
                            batch_size=args.batch_size,
                            num_workers=os.cpu_count())

    model.frozen_bert = False

    metrics = evaluate(
        args,
        model,
        tqdm(dataloader, desc="Prediction"),
        output_composer=output_composer,
        sequence_metrics=SequenceMetrics([]),  # Empty metrics
        reset=True,
    )

    # Get predictions for all examples
    all_y_pred_raw = output_composer.get_outputs()
    # Filter invalid predictions
    all_y_pred = [
        tag_encoder.decode_valid(y_pred) for y_pred in all_y_pred_raw
    ]

    # Write predictions to output file
    if args.output_format == 'conll':
        write_conll_prediction_file(args.output_file, examples, all_y_pred)
コード例 #17
0
ファイル: main.py プロジェクト: wy-ei/text-classification
    #                  embed_size=len(dct),
    #                  device=device)

    # model = Bi_RNN_ATTN(class_num=len(CATEGIRY_LIST),
    #                     embed_size=len(dct),
    #                     embed_dim=64,
    #                     device=device)

    lr = 0.001
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # train
    logger.info('training...')
    history = trainer.train(model,
                            optimizer,
                            train_dl,
                            valid_dl,
                            device=device,
                            epochs=5)

    # evaluate
    loss, acc = trainer.evaluate(model, valid_dl, device=device)

    # predict
    logger.info('predicting...')
    y_pred = trainer.predict(model, test_dl, device=device)

    y_true = test_ds.labels
    test_acc = (y_true == y_pred).sum() / y_pred.shape[0]
    logger.info('test - acc: {}'.format(test_acc))
コード例 #18
0
import time

# criterion = cal_loss
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
train_losses = []
test_losses = []

for epoch in range(1, N_EPOCHS + 1):
    train_iter = gen_bptt_iter(train_data, BATCH_SIZE, SEQ_LEN, DEVICE)
    valid_iter = gen_bptt_iter(valid_data, BATCH_SIZE, SEQ_LEN, DEVICE)

    start_time = time.time()
    train_loss = train(model, train_iter, optimizer, criterion, CLIP)
    valid_loss = evaluate(model, valid_iter, criterion)

    end_time = time.time()
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    print(f'Epoch: {epoch:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(
        f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}'
    )
    print(
        f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}'
    )
    train_losses.append(train_loss)
    test_losses.append(valid_loss)

test_iter = gen_bptt_iter(test_data, BATCH_SIZE, SEQ_LEN, DEVICE)
test_loss = evaluate(model, test_iter, criterion)
コード例 #19
0
def main(config):
    logger = config.get_logger('test')
    # setup data_loader instances
    batch_size = 1
    if config['data_loader_val']['args']['max_dataset_size'] == 'inf':
        max_dataset_size = float('inf')
    else:
        max_dataset_size = config['data_loader_val']['args'][
            'max_dataset_size']
    data_loader = getattr(module_data, config['data_loader_val']['type'])(
        img_dir=config['data_loader_val']['args']['img_dir'],
        label_dir=config['data_loader_val']['args']['label_dir'],
        video_dir=config['data_loader_val']['args']['video_dir'],
        batch_size=batch_size,
        max_dataset_size=max_dataset_size,
        shuffle=False,
        num_workers=1)
    landsat_mean, landsat_std = (0.3326, 0.3570, 0.2224), (0.1059, 0.1086,
                                                           0.1283)
    # build model architecture
    model = config.initialize('arch', module_arch)
    logger.info(model)

    # get function handles of loss and metrics
    loss_fn = config.initialize('loss', module_loss)
    # loss_fn = getattr(module_loss, config['loss'])
    metric_fns = [getattr(module_metric, met) for met in config['metrics']]

    logger.info('Loading checkpoint: {} ...'.format(config.resume))
    checkpoint = torch.load(config.resume)
    state_dict = checkpoint['state_dict']
    if config['n_gpu'] > 1:
        model = torch.nn.DataParallel(model)
    model.load_state_dict(state_dict)

    # prepare model for testing
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    model.eval()

    total_loss = 0.0
    total_metrics = torch.zeros(len(metric_fns))
    pred_dir = '/'.join(str(config.resume.absolute()).split('/')[:-1])
    # pred_dir = os.path.join(pred_dir, 'predictions')
    # out_dir = os.path.join(pred_dir, 'video_loss_last_three')
    out_dir = os.path.join(pred_dir, 'rm')
    if not os.path.isdir(pred_dir):
        os.makedirs(pred_dir)
    if not os.path.isdir(out_dir):
        os.makedirs(out_dir)
    # out_dir = '/'.join(str(config.resume.absolute()).split('/')[:-1])
    # out_dir = os.path.join(out_dir, 'predictions')
    hist = np.zeros((2, 2))
    hist2013 = np.zeros((2, 2))
    hist2014 = np.zeros((2, 2))
    hist2015 = np.zeros((2, 2))
    hist2016 = np.zeros((2, 2))
    hist2017 = np.zeros((2, 2))

    # This script only supports batch=1
    with torch.no_grad():
        for i, batch in enumerate(tqdm(data_loader)):
            if i not in [0, 84, 55]:
                continue
            # if i not in [18, 43, 51, 61, 73, 84, 85, 88, 116, 124, 198, 201, 214, 245, 325, 330]:
            # if i not in [84, 85, 88, 116, 124, 198, 201, 214, 245, 325]:
            #     continue
        # for i, (data, target) in enumerate(tqdm(data_loader)):
            init_time = time.time()
            loss = None
            img_arr2013, mask_arr2013 = batch['2013']['img_arr'], batch[
                '2013']['mask_arr']
            img_arr2014, mask_arr2014 = batch['2014']['img_arr'], batch[
                '2014']['mask_arr']
            img_arr2015, mask_arr2015 = batch['2015']['img_arr'], batch[
                '2015']['mask_arr']
            img_arr2016, mask_arr2016 = batch['2016']['img_arr'], batch[
                '2016']['mask_arr']
            img_arr2017, mask_arr2017 = batch['2017']['img_arr'], batch[
                '2017']['mask_arr']

            img_arr2015p, _ = batch['2015p']['img_arr'], batch['2015'][
                'mask_arr']
            img_arr2016p, _ = batch['2016p']['img_arr'], batch['2016'][
                'mask_arr']
            img_arr2017p, _ = batch['2017p']['img_arr'], batch['2017'][
                'mask_arr']

            uimg_arr2013, uimg_arr2014, uimg_arr2015, uimg_arr2016, uimg_arr2017 = \
                normalize_inverse(img_arr2013, landsat_mean, landsat_std), \
                normalize_inverse(img_arr2014, landsat_mean, landsat_std), \
                normalize_inverse(img_arr2015, landsat_mean, landsat_std), \
                normalize_inverse(img_arr2016, landsat_mean, landsat_std), \
                normalize_inverse(img_arr2017, landsat_mean, landsat_std)
            uimg_arr2015p, uimg_arr2016p, uimg_arr2017p = normalize_inverse(img_arr2015p, landsat_mean, landsat_std), \
                normalize_inverse(img_arr2016p, landsat_mean, landsat_std), \
                normalize_inverse(img_arr2017p, landsat_mean, landsat_std), \

            pred2013 = update_individual_hists(img_arr2013, mask_arr2013,
                                               hist2013, device, model)
            pred2014 = update_individual_hists(img_arr2014, mask_arr2014,
                                               hist2014, device, model)
            pred2015 = update_individual_hists(img_arr2015, mask_arr2015,
                                               hist2015, device, model)
            pred2016 = update_individual_hists(img_arr2016, mask_arr2016,
                                               hist2016, device, model)
            pred2017 = update_individual_hists(img_arr2017, mask_arr2017,
                                               hist2017, device, model)

            pred2015p = update_individual_hists(img_arr2015p, mask_arr2015,
                                                hist2015, device, model)
            pred2016p = update_individual_hists(img_arr2016p, mask_arr2016,
                                                hist2016, device, model)
            pred2017p = update_individual_hists(img_arr2017p, mask_arr2017,
                                                hist2017, device, model)

            images = {
                '2013': {
                    'img': uimg_arr2013.cpu().numpy(),
                    'gt': mask_arr2013.cpu().numpy(),
                    'pred': pred2013
                },
                '2014': {
                    'img': uimg_arr2014.cpu().numpy(),
                    'gt': mask_arr2014.cpu().numpy(),
                    'pred': pred2014
                },
                '2015': {
                    'img': uimg_arr2015.cpu().numpy(),
                    'gt': mask_arr2015.cpu().numpy(),
                    'pred': pred2015
                },
                '2016': {
                    'img': uimg_arr2016.cpu().numpy(),
                    'gt': mask_arr2016.cpu().numpy(),
                    'pred': pred2016
                },
                '2017': {
                    'img': uimg_arr2017.cpu().numpy(),
                    'gt': mask_arr2017.cpu().numpy(),
                    'pred': pred2017
                },
                '2015p': {
                    'img': uimg_arr2015p.cpu().numpy(),
                    'gt': mask_arr2015.cpu().numpy(),
                    'pred': pred2015p
                },
                '2016p': {
                    'img': uimg_arr2016p.cpu().numpy(),
                    'gt': mask_arr2016.cpu().numpy(),
                    'pred': pred2016p
                },
                '2017p': {
                    'img': uimg_arr2017p.cpu().numpy(),
                    'gt': mask_arr2017.cpu().numpy(),
                    'pred': pred2017p
                },
            }
            save_video_images256(images, out_dir, i * batch_size)

    acc2013, acc_cls2013, mean_iu2013, fwavacc2013, precision2013, recall2013, f1_score2013 = \
        evaluate(hist=hist2013)

    acc2014, acc_cls2014, mean_iu2014, fwavacc2014, precision2014, recall2014, f1_score2014 = \
        evaluate(hist=hist2014)

    acc2015, acc_cls2015, mean_iu2015, fwavacc2015, precision2015, recall2015, f1_score2015 = \
        evaluate(hist=hist2015)

    acc2016, acc_cls2016, mean_iu2016, fwavacc2016, precision2016, recall2016, f1_score2016 = \
        evaluate(hist=hist2016)
    acc2017, acc_cls2017, mean_iu2017, fwavacc2017, precision2017, recall2017, f1_score2017 = \
        evaluate(hist=hist2017)
    n_samples = len(data_loader.sampler)

    log2013 = {
        'loss2013': -1,
        'acc': acc2013,
        'mean_iu': mean_iu2013,
        'fwavacc': fwavacc2013,
        'precision': precision2013,
        'recall': recall2013,
        'f1_score': f1_score2013
    }

    log2014 = {
        'loss2014': -1,
        'acc': acc2014,
        'mean_iu': mean_iu2014,
        'fwavacc': fwavacc2014,
        'precision': precision2014,
        'recall': recall2014,
        'f1_score': f1_score2014
    }

    log2015 = {
        'loss2015': -1,
        'acc': acc2015,
        'mean_iu': mean_iu2015,
        'fwavacc': fwavacc2015,
        'precision': precision2015,
        'recall': recall2015,
        'f1_score': f1_score2015
    }

    log2016 = {
        'loss2016': -1,
        'acc': acc2016,
        'mean_iu': mean_iu2016,
        'fwavacc': fwavacc2016,
        'precision': precision2016,
        'recall': recall2016,
        'f1_score': f1_score2016
    }

    log2017 = {
        'loss2017': -1,
        'acc': acc2017,
        'mean_iu': mean_iu2017,
        'fwavacc': fwavacc2017,
        'precision': precision2017,
        'recall': recall2017,
        'f1_score': f1_score2017
    }

    logger.info(log2013)
    logger.info(log2014)
    logger.info(log2015)
    logger.info(log2016)
    logger.info(log2017)
コード例 #20
0
ファイル: main.py プロジェクト: huitangtang/DisClusterDA
def main():
    global args, best_prec1
    args = opts()

    current_epoch = 0

    # define base model
    model = resnet(args)
    # define multi-GPU
    model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    criterion_afem = AdaptiveFilteringEMLossForTarget(eps=args.eps).cuda()

    np.random.seed(args.seed)
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    if torch.cuda.device_count() > 1:
        torch.cuda.manual_seed_all(args.seed)

    # apply different learning rates to different layers
    lr_fe = args.lr * 0.1 if args.pretrained else args.lr
    if args.arch.find('resnet') != -1:
        params_list = [
            {
                'params': model.module.conv1.parameters(),
                'lr': lr_fe
            },
            {
                'params': model.module.bn1.parameters(),
                'lr': lr_fe
            },
            {
                'params': model.module.layer1.parameters(),
                'lr': lr_fe
            },
            {
                'params': model.module.layer2.parameters(),
                'lr': lr_fe
            },
            {
                'params': model.module.layer3.parameters(),
                'lr': lr_fe
            },
            {
                'params': model.module.layer4.parameters(),
                'lr': lr_fe
            },
            {
                'params': model.module.fc1.parameters()
            },
            {
                'params': model.module.fc2.parameters()
            },
        ]
        if args.optimizer == 'sgd':
            optimizer = torch.optim.SGD(params_list,
                                        lr=args.lr,
                                        momentum=args.momentum,
                                        weight_decay=args.weight_decay,
                                        nesterov=args.nesterov)
        if args.lr_scheduler == 'dann':
            lr_lambda = lambda epoch: 1 / pow(
                (1 + 10 * epoch / args.epochs), 0.75)
            scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,
                                                          lr_lambda,
                                                          last_epoch=-1)
        elif args.lr_scheduler == 'cosine':
            scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                optimizer, T_max=args.epochs, eta_min=0, last_epoch=-1)
        elif args.lr_scheduler == 'step':
            lr_lambda = lambda epoch: args.gamma**(
                epoch + 1 > args.decay_epoch[
                    1] and 2 or epoch + 1 > args.decay_epoch[0] and 1 or 0)
            scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,
                                                          lr_lambda,
                                                          last_epoch=-1)
    else:
        raise ValueError('Unavailable model architecture!!!')

    if args.resume:
        print("==> loading checkpoints '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        current_epoch = checkpoint['epoch']
        best_prec1 = checkpoint['best_prec1']
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        print("==> loaded checkpoint '{}'(epoch {})".format(
            args.resume, checkpoint['epoch']))
    if not os.path.isdir(args.log):
        os.makedirs(args.log)
    log = open(os.path.join(args.log, 'log.txt'), 'a')
    state = {k: v for k, v in args._get_kwargs()}
    log.write(json.dumps(state) + '\n')
    log.close()

    # start time
    log = open(os.path.join(args.log, 'log.txt'), 'a')
    log.write('\n-------------------------------------------\n')
    log.write(time.asctime(time.localtime(time.time())))
    log.write('\n-------------------------------------------')
    log.close()

    cudnn.benchmark = True
    # process data and prepare dataloaders
    train_loader_source, train_loader_target, val_loader_target, val_loader_source = generate_dataloader(
        args)

    if args.eval_only:
        prec1 = evaluate(val_loader_target, model, criterion, -1, args)
        print(' * Eval acc@1: {:.3f}'.format(prec1))
        return

    print('begin training')
    train_loader_source_batch = enumerate(train_loader_source)
    train_loader_target_batch = enumerate(train_loader_target)
    batch_number = count_epoch_on_large_dataset(train_loader_target,
                                                train_loader_source)
    num_itern_total = args.epochs * batch_number
    test_freq = int(num_itern_total / 200)
    print('test_freq: ', test_freq)
    args.start_epoch = current_epoch
    cs_1 = Variable(
        torch.cuda.FloatTensor(args.num_classes,
                               model.module.feat1_dim).fill_(0))
    ct_1 = Variable(
        torch.cuda.FloatTensor(args.num_classes,
                               model.module.feat1_dim).fill_(0))
    cs_2 = Variable(
        torch.cuda.FloatTensor(args.num_classes,
                               model.module.feat2_dim).fill_(0))
    ct_2 = Variable(
        torch.cuda.FloatTensor(args.num_classes,
                               model.module.feat2_dim).fill_(0))
    for itern in range(args.start_epoch * batch_number, num_itern_total):
        # train for one iteration
        train_loader_source_batch, train_loader_target_batch, cs_1, ct_1, cs_2, ct_2 = train_compute_class_mean(
            train_loader_source, train_loader_source_batch,
            train_loader_target, train_loader_target_batch, model, criterion,
            criterion_afem, optimizer, itern, current_epoch, cs_1, ct_1, cs_2,
            ct_2, args)
        # evaluate on target
        if (itern + 1) % batch_number == 0 or (itern + 1) % test_freq == 0:
            prec1 = evaluate(val_loader_target, model, criterion,
                             current_epoch, args)
            # record the best prec1
            is_best = prec1 > best_prec1
            if is_best:
                best_prec1 = prec1
                log = open(os.path.join(args.log, 'log.txt'), 'a')
                log.write(
                    '\n                                                                         best acc: %3f'
                    % (best_prec1))
                log.close()

            # update learning rate
            if (itern + 1) % batch_number == 0:
                scheduler.step()
                current_epoch += 1

            # save checkpoint
            save_checkpoint(
                {
                    'epoch': current_epoch,
                    'arch': args.arch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'scheduler_state_dict': scheduler.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best, args)

        if current_epoch > args.stop_epoch:
            break

    # end time
    log = open(os.path.join(args.log, 'log.txt'), 'a')
    log.write('\n * best acc: %3f' % best_prec1)
    log.write('\n-------------------------------------------\n')
    log.write(time.asctime(time.localtime(time.time())))
    log.write('\n-------------------------------------------\n')
    log.close()
コード例 #21
0
ファイル: run.py プロジェクト: ajcai/tf-estimator-template
import argparse
from trainer import train, evaluate, predict

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Process some integers.')
    parser.add_argument('mode', help='train, eval or predict')
    args = parser.parse_args()

    if args.mode == "train":
        train()
    if args.mode == "eval":
        evaluate()
    if args.mode == "predict":
        predict()
コード例 #22
0
def main(config):

	if config.mod_split == 'computation_split' and config.dataset in ['boston', 'wine', 'kin8nm', 'naval', 'protein']:
		config.hc_threshold = 0.5
	elif config.mod_split == 'computation_split' and config.dataset in ['cement', 'energy_efficiency', 'power_plant', 'yacht']:
		config.hc_threshold = 0.75

	data = dataset.load_dataset(config)

	n_feature_sets = len(data.keys()) - 1
	X = [np.array(data['{}'.format(i)]) for i in range(n_feature_sets)]
	y = np.array(data['y'])

	config.n_feature_sets = n_feature_sets
	config.feature_split_lengths = [i.shape[1] for i in X]

	print('Dataset used ', config.dataset)
	print('Number of feature sets ', n_feature_sets)
	[print('Shape of feature set {} {}'.format(e, np.array(i).shape)) for e,i in enumerate(X)]

	utils.make_model_dir(config.model_dir)

	if config.build_model == 'mc_dropout':
		config.n_models = 1

	if config.dataset in ['boston', 'cement', 'power_plant', 'wine', 'yacht', 'kin8nm', 'energy_efficiency', 'naval']:
		config.units = 50
	elif config.dataset in ['msd', 'protein', 'toy']:
		config.units = 100
	elif config.dataset in ['alzheimers', 'alzheimers_test']:
		config.units = 100
		config.feature_split_lengths[-1] = 21 # COMPARE features after PCA
		config.n_folds = 5

	if config.build_model == 'combined_pog' and config.dataset in ['cement', 'protein', 'yacht', 'power_plant']:
		config.y_scaling = 1

	if config.dataset == 'protein':
		config.n_folds = 5

	if config.dataset == 'msd':
		config.n_models = 2

	if config.mod_split == 'none':
		config.n_feature_sets = 1

	if config.task == 'train':
		print('Training..')
		trainer.train(X, y, config)

	elif config.task == 'evaluate':
		print('Evaluating..')
		trainer.evaluate(X, y, config)

	elif config.task == 'experiment':

		config.plot_name = os.path.join(config.plot_path, '{}_{}.png'.format(config.dataset, config.exp_name))
		
		if config.exp_name == 'defer_simulation':
			print('Plotting Calibration..')
			experiments.plot_defer_simulation(X, y, config)

		elif config.exp_name == 'toy_regression':
			print('Toy regression ..')
			experiments.plot_toy_regression(config)

		elif config.exp_name == 'clusterwise_ood':
			print('Plotting OOD..')
			experiments.plot_ood(X, y, config)

		elif config.exp_name == 'kl_mode':
			print('Plotting KL..')
			experiments.plot_kl(X, y, config)

		elif config.exp_name == 'show_summary':
			print('Showing..')
			experiments.show_model_summary(X, y, config)

		elif config.exp_name == 'empirical_rule_test':
			print('Emprical rule tests..')
			experiments.empirical_rule_test(X, y, config)
コード例 #23
0
ファイル: main.py プロジェクト: aiedward/Bi-LSTM-CRF-1
        name = "hidden_512_embed_150"
        hidden_size = 512
        num_tags = len(TAG_MAP)
        embed_dim = 300
        embed_size = len(dct)
        dropout = 0.5
        device = device
        condtraints = condtraints

    model = BiLSTM_CRF(Config())
    model = model.to(device)
    lr = 0.001
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # train model
    train(model,
          optimizer,
          train_dl,
          val_dl,
          device=device,
          epochs=20,
          early_stop=True,
          save_every_n_epochs=3)

    # evaluate
    test_corpus = read_corpus(test_corpus_path)
    test_ds = NER_DataSet(test_corpus, dct)
    test_dl = DataLoader(test_ds, batch_size=64)

    metric = evaluate(model, test_dl, device)
    print(metric.report())
コード例 #24
0
def main():
    # logging configuration
    logging.basicConfig(level=logging.INFO,
                        format="[%(asctime)s]: %(message)s")

    # parse command line input
    opt = utils.parse_arg()

    # Set GPU
    opt.cuda = opt.gpuid >= 0
    if opt.cuda:
        torch.cuda.set_device(opt.gpuid)
    else:
        # please use GPU for training, CPU version is not supported for now.
        raise NotImplementedError
        #logging.info("GPU acceleration is disabled.")

    # prepare training and validation dataset
    db = data_prepare.prepare_db(opt)

    # sanity check for FG-NET dataset, not used for now
    # assertion: the total images in the eval set lists should be 1002
    total_eval_imgs = sum([len(db['eval'][i]) for i in range(len(db['eval']))])
    print(total_eval_imgs)
    if db['train'][0].name == 'FGNET':
        assert total_eval_imgs == 1002, 'The preparation of the evalset is incorrect.'

    # training
    if opt.train:
        best_MAEs = []
        last_MAEs = []
        # record the current time
        opt.save_dir += time.asctime(time.localtime(time.time()))
        # for FG-NET, do training multiple times for leave-one-out validation
        # for CACD, do training just once
        for exp_id in range(len(db['train'])):
            # initialize the model
            model_train = model.prepare_model(opt)
            #print("model shape:")
            #  print(db['train'].head)

            #print( np.array(db['eval']).shape)
            # configurate the optimizer and learning rate scheduler
            optim, sche = optimizer.prepare_optim(model_train, opt)

            # train the model and record mean average error (MAE)
            model_train, MAE, last_MAE = trainer.train(model_train, optim,
                                                       sche, db, opt, exp_id)
            best_MAEs += MAE
            last_MAEs.append(last_MAE.data.item())

            # remove the trained model for leave-one-out validation
            if exp_id != len(db['train']) - 1:
                del model_train

        #np.save('./MAE.npy', np.array(best_MAEs))
        #np.save('./Last_MAE.npy', np.array(last_MAEs))
        # save the final trained model
        #utils.save_model(model_train, opt)

    # testing a pre-trained model
    elif opt.evaluate:
        # path to the pre-trained model
        save_dir = opt.test_model_path
        #example: save_dir = '../model/CACD_MAE_4.59.pth'
        model_loaded = torch.load(save_dir)
        # test the model on the evaluation set
        # the last subject is the test set (compatible with FG-NET)
        trainer.evaluate(model_loaded, db['eval'][-1], opt)
    return
コード例 #25
0
def main(config):
    logger = config.get_logger('train', config['trainer']['verbosity'])
    '''===== Data Loader ====='''
    logger.info('preparing data loader')

    # setup data_loader instances
    data_loader = config.initialize('data_loader', module_data)
    valid_data_loader = data_loader.split_validation()
    '''===== Generator ====='''
    logger.info('preparing Generator')

    # build model architecture, then print to console
    model = config.initialize('arch', model_arch, 'generator')
    logger.info(model)

    # get function handles of loss and metrics
    loss_fn = getattr(module_loss, config['generator']['loss'])
    metric_fns = [
        getattr(module_metric, met) for met in config['generator']['metrics']
    ]

    # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = config.initialize('optimizer', torch.optim, 'generator',
                                  trainable_params)

    lr_scheduler = config.initialize('lr_scheduler', torch.optim.lr_scheduler,
                                     'generator', optimizer)

    generator = {
        'model': model,
        'loss_fn': loss_fn,
        'metric_fns': metric_fns,
        'optimizer': optimizer,
        'lr_scheduler': lr_scheduler
    }
    '''===== Discriminator ====='''
    logger.info('preparing Discriminator')

    # build model architecture, then print to console
    model = config.initialize('arch', model_arch, 'discriminator')
    logger.info(model)

    # get function handles of loss and metrics
    loss_fn = getattr(module_loss, config['discriminator']['loss'])
    metric_fns = [
        getattr(module_metric, met)
        for met in config['discriminator']['metrics']
    ]

    # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = config.initialize('optimizer', torch.optim, 'discriminator',
                                  trainable_params)

    lr_scheduler = config.initialize('lr_scheduler', torch.optim.lr_scheduler,
                                     'discriminator', optimizer)

    discriminator = {
        'model': model,
        'loss_fn': loss_fn,
        'metric_fns': metric_fns,
        'optimizer': optimizer,
        'lr_scheduler': lr_scheduler
    }

    gif_generator = GifGenerator(str(config.output_dir / "progress.gif"))
    '''===== Training ====='''

    trainer = Trainer(config, logger, generator, discriminator, gif_generator,
                      data_loader, valid_data_loader)

    trainer.train()
    '''===== Testing ====='''

    log = evaluate(generator, discriminator, config, valid_data_loader)

    log_msg = '< Evaluation >\n'
    log_msg += '    Generator :\n'
    for key, value in log['generator'].items():
        if isinstance(value, float):
            value = round(value, 6)
        log_msg += '        {:15s}: {}'.format(str(key), value) + '\n'

    log_msg += '    Discriminator :\n'
    for key, value in log['discriminator'].items():
        if isinstance(value, float):
            value = round(value, 6)
        log_msg += '        {:15s}: {}'.format(str(key), value) + '\n'

    logger.info(log_msg)
    '''===== Generate samples ====='''

    num_samples = 16
    if trainer.default_z:
        z = trainer.default_z
    else:
        if num_samples != trainer.num_samples and trainer.num_samples > 0:
            num_samples = trainer.num_samples
        z = Variable(torch.randn(num_samples, trainer.z_size))

    z = z.to(trainer.device)

    generator['model'].eval()
    samples = generator['model'](z).detach().cpu()
    output_file = str(config.output_dir / "final.png")

    img = save_generated_images(samples.detach(), output_file)
    logger.info("saved generated images at {}".format(output_file))

    gif_generator.add_image(img)
    gif_generator.save()

    logger.info("saved progress as a gif at {}".format(gif_generator.gif_name))
コード例 #26
0
    arg_parser.add_argument('--epochs', type=str, default='200')
    arg_parser.add_argument('--generator', type=str, default='gan')
    arg_parser.add_argument('--optim', type=str, default='adam')
    arg_parser.add_argument('--proxy_dataset', type=str, default='cifar10')
    arg_parser.add_argument('--sample_optimization', type=str, default='class')
    arg_parser.add_argument('--samples', type=str, default='optimized')
    arg_parser.add_argument('--size', type=int, default=32)
    arg_parser.add_argument('--student', type=str, default='half_lenet')
    arg_parser.add_argument('--teacher', type=str, default='lenet')
    arg_parser.add_argument('--true_dataset', type=str, default='split_fmnist')

    env = arg_parser.parse_args()

    teacher, teacher_dataset, student = setup.prepare_teacher_student(env)
    trainer.evaluate(teacher, teacher_dataset)
    generator = setup.prepare_generator(env)

    student_dataset = setup.prepare_student_dataset(env, teacher,
                                                    teacher_dataset, student,
                                                    generator)

    if env.optim == 'sgd':
        trainer.train_or_restore_predictor(student,
                                           student_dataset,
                                           loss_type='binary',
                                           n_epochs=int(env.epochs))
    else:
        trainer.train_or_restore_predictor_adam(student,
                                                student_dataset,
                                                loss_type='binary',
コード例 #27
0
def main(config):
    logger = config.get_logger('test')
    '''===== Data Loader ====='''
    logger.info('preparing data loader')

    # setup data_loader instances
    data_loader = getattr(module_data, config['data_loader']['type'])(
        config['data_loader']['args']['data_dir'],
        batch_size=512,
        shuffle=False,
        validation_split=0.0,
        training=False,
        num_workers=2)
    '''===== Generator ====='''
    logger.info('preparing Generator')

    # build model architecture, then print to console
    model = config.initialize('arch', model_arch, 'generator')

    logger.info('Loading checkpoint for Generator: {} ...'.format(
        config.resume['generator']))
    checkpoint = torch.load(config.resume['generator'])
    state_dict = checkpoint['state_dict']
    if config['n_gpu'] > 1:
        model = torch.nn.DataParallel(model)
    model.load_state_dict(state_dict)

    logger.info(model)

    # get function handles of loss and metrics
    loss_fn = getattr(module_loss, config['generator']['loss'])
    metric_fns = [
        getattr(module_metric, met) for met in config['generator']['metrics']
    ]

    generator = {'model': model, 'loss_fn': loss_fn, 'metric_fns': metric_fns}
    '''===== Discriminator ====='''
    logger.info('preparing Discriminator')

    # build model architecture, then print to console
    model = config.initialize('arch', model_arch, 'discriminator')

    logger.info('Loading checkpoint for Discriminator: {} ...'.format(
        config.resume['discriminator']))
    checkpoint = torch.load(config.resume['discriminator'])
    state_dict = checkpoint['state_dict']
    if config['n_gpu'] > 1:
        model = torch.nn.DataParallel(model)
    model.load_state_dict(state_dict)

    logger.info(model)

    # get function handles of loss and metrics
    loss_fn = getattr(module_loss, config['discriminator']['loss'])
    metric_fns = [
        getattr(module_metric, met)
        for met in config['discriminator']['metrics']
    ]

    discriminator = {
        'model': model,
        'loss_fn': loss_fn,
        'metric_fns': metric_fns
    }
    '''===== Testing ====='''

    log = evaluate(generator, discriminator, config, data_loader)

    log_msg = '< Evaluation >\n'
    log_msg += '    Generator :\n'
    for key, value in log['generator'].items():
        if isinstance(value, float):
            value = round(value, 6)
        log_msg += '        {:15s}: {}'.format(str(key), value) + '\n'

    log_msg += '    Discriminator :\n'
    for key, value in log['discriminator'].items():
        if isinstance(value, float):
            value = round(value, 6)
        log_msg += '        {:15s}: {}'.format(str(key), value) + '\n'

    logger.info(log_msg)