Ejemplo n.º 1
0
def main(cmd=None, stdout=True):
    args = get_args(cmd, stdout)

    model_id = "seed_{}_strat_{}_noise_fn_{}_noise_fp_{}_num_passes_{}_seed_size_{}_model_{}_batch_size_{}_gamma_{}_label_budget_{}_epochs_{}".format(
        args.seed, args.strategy, args.noise_fn, args.noise_fp, args.num_passes, args.seed_size, args.model, args.batch_size, args.gamma, args.label_budget, args.epochs)

    logging.basicConfig(
        filename="{}/{}.txt".format(args.dout, model_id),
        format='%(asctime)s %(levelname)-8s %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S',
        level=logging.INFO)
    logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))

    logger = Experiment(comet_ml_key, project_name="ActiveDialogue")
    logger.log_parameters(vars(args))

    if args.model == "glad":
        model_arch = GLAD
    elif args.model == "gce":
        model_arch = GCE

    env = PartialEnv(load_dataset, model_arch, args)
    if args.seed_size:
        with logger.train():
            if not env.load('seed'):
                logging.info("No loaded seed. Training now.")
                env.seed_fit(args.seed_epochs, prefix="seed")
                logging.info("Seed completed.")
            else:
                logging.info("Loaded seed.")
                if args.force_seed:
                    logging.info("Training seed regardless.")
                    env.seed_fit(args.seed_epochs, prefix="seed")
        env.load('seed')

    use_strategy = False
    if args.strategy == "entropy":
        use_strategy = True
        strategy = partial_entropy
    elif args.strategy == "bald":
        use_strategy = True
        strategy = partial_bald

    if use_strategy:
        if args.threshold_strategy == "fixed":
            strategy = FixedThresholdStrategy(strategy, args, True)
        elif args.threshold_strategy == "variable":
            strategy = VariableThresholdStrategy(strategy, args, True)
        elif args.threshold_strategy == "randomvariable":
            strategy = StochasticVariableThresholdStrategy(
                strategy, args, True)

    ended = False
    i = 0

    initial_metrics = env.metrics(True)
    logger.log_current_epoch(i)
    logging.info("Initial metrics: {}".format(initial_metrics))
    for k, v in initial_metrics.items():
        logger.log_metric(k, v)

    with logger.train():
        while not ended:
            i += 1

            # Observe environment state
            logger.log_current_epoch(i)

            if env.can_label:
                # Obtain label request from strategy
                obs, preds = env.observe(20 if args.strategy ==
                                         "bald" else 1)
                if args.strategy != "bald":
                    preds = preds[0]
                if args.strategy == "aggressive":
                    label_request = aggressive(preds)
                elif args.strategy == "random":
                    label_request = random(preds)
                elif args.strategy == "passive":
                    label_request = passive(preds)
                elif use_strategy:
                    label_request = strategy.observe(preds)
                else:
                    raise ValueError()

                # Label solicitation
                labeled = env.label(label_request)
                if use_strategy:
                    strategy.update(
                        sum([
                            np.sum(s.flatten())
                            for s in label_request.values()
                        ]),
                        sum([
                            np.sum(np.ones_like(s).flatten())
                            for s in label_request.values()
                        ]))
            else:
                break

            # Environment stepping
            ended = env.step()
            # Fit every al_batch of items
            best = env.fit(prefix=model_id, reset_model=True)
            for k, v in best.items():
                logger.log_metric(k, v)
            env.load(prefix=model_id)

    # Final fit
    final_metrics = env.fit(epochs=args.final_epochs,
                            prefix="final_fit_" + model_id,
                            reset_model=True)
    for k, v in final_metrics.items():
        logger.log_metric("Final " + k, v)
        logging.info("Final " + k + ": " + str(v))
    logging.info("Run finished.")
Ejemplo n.º 2
0
print(out2.shape)
exit()"""

criterion = nn.BCELoss()
# Establish convention for real and fake labels during training
real_label = 1
fake_label = 0

# Setup Adam optimizers for both G and D
optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))

steps = 0

for epoch in range(num_epochs):
    experiment.log_current_epoch(epoch)
    for i, data in enumerate(dataloader, 0):
        experiment.set_step(steps)

        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
        ## Train with all-real batch
        netD.zero_grad()
        # Format batch
        real_cpu = data[0].to(device)
        b_size = real_cpu.size(0)
        label = torch.full((b_size,), real_label, device=device)
        # Forward pass real batch through D
        output = netD(real_cpu).view(-1)
        # Calculate loss on all-real batch
Ejemplo n.º 3
0
def run(args, train, sparse_evidences, claims_dict):
    BATCH_SIZE = args.batch_size
    LEARNING_RATE = args.learning_rate
    DATA_SAMPLING = args.data_sampling
    NUM_EPOCHS = args.epochs
    MODEL = args.model
    RANDOMIZE = args.no_randomize
    PRINT = args.print

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")

    logger = Logger('./logs/{}'.format(time.localtime()))

    if MODEL:
        print("Loading pretrained model...")
        model = torch.load(MODEL)
        model.load_state_dict(torch.load(MODEL).state_dict())
    else:
        model = cdssm.CDSSM()
        model = model.cuda()
        model = model.to(device)

    # model = cdssm.CDSSM()
    # model = model.cuda()
    # model = model.to(device)

    if torch.cuda.device_count() > 0:
        print("Let's use", torch.cuda.device_count(), "GPU(s)!")
        model = nn.DataParallel(model)

    print("Created model with {:,} parameters.".format(
        putils.count_parameters(model)))

    # if MODEL:
    # print("TEMPORARY change to loading!")
    # model.load_state_dict(torch.load(MODEL).state_dict())

    print("Created dataset...")

    # use an 80/20 train/validate split!
    train_size = int(len(train) * 0.80)
    #test = int(len(train) * 0.5)
    train_dataset = pytorch_data_loader.WikiDataset(
        train[:train_size],
        claims_dict,
        data_sampling=DATA_SAMPLING,
        sparse_evidences=sparse_evidences,
        randomize=RANDOMIZE)
    val_dataset = pytorch_data_loader.WikiDataset(
        train[train_size:],
        claims_dict,
        data_sampling=DATA_SAMPLING,
        sparse_evidences=sparse_evidences,
        randomize=RANDOMIZE)

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=BATCH_SIZE,
                                  num_workers=0,
                                  shuffle=True,
                                  collate_fn=pytorch_data_loader.PadCollate())
    val_dataloader = DataLoader(val_dataset,
                                batch_size=BATCH_SIZE,
                                num_workers=0,
                                shuffle=True,
                                collate_fn=pytorch_data_loader.PadCollate())

    # Loss and optimizer
    criterion = torch.nn.NLLLoss()
    # criterion = torch.nn.SoftMarginLoss()
    # if torch.cuda.device_count() > 0:
    # print("Let's parallelize the backward pass...")
    # criterion = DataParallelCriterion(criterion)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=LEARNING_RATE,
                                 weight_decay=1e-3)

    OUTPUT_FREQ = max(int((len(train_dataset) / BATCH_SIZE) * 0.02), 20)
    parameters = {
        "batch size": BATCH_SIZE,
        "epochs": NUM_EPOCHS,
        "learning rate": LEARNING_RATE,
        "optimizer": optimizer.__class__.__name__,
        "loss": criterion.__class__.__name__,
        "training size": train_size,
        "data sampling rate": DATA_SAMPLING,
        "data": args.data,
        "sparse_evidences": args.sparse_evidences,
        "randomize": RANDOMIZE,
        "model": MODEL
    }
    experiment = Experiment(api_key="YLsW4AvRTYGxzdDqlWRGCOhee",
                            project_name="clsm",
                            workspace="moinnadeem")
    experiment.add_tag("train")
    experiment.log_asset("cdssm.py")
    experiment.log_dataset_info(name=args.data)
    experiment.log_parameters(parameters)

    model_checkpoint_dir = "models/saved_model"
    for key, value in parameters.items():
        if type(value) == str:
            value = value.replace("/", "-")
        if key != "model":
            model_checkpoint_dir += "_{}-{}".format(key.replace(" ", "_"),
                                                    value)

    print("Training...")
    beginning_time = time.time()
    best_loss = torch.tensor(float("inf"),
                             dtype=torch.float)  # begin loss at infinity

    for epoch in range(NUM_EPOCHS):
        beginning_time = time.time()
        mean_train_acc = 0.0
        train_running_loss = 0.0
        train_running_accuracy = 0.0
        model.train()
        experiment.log_current_epoch(epoch)

        with experiment.train():
            for train_batch_num, inputs in enumerate(train_dataloader):
                claims_tensors, claims_text, evidences_tensors, evidences_text, labels = inputs

                claims_tensors = claims_tensors.cuda()
                evidences_tensors = evidences_tensors.cuda()
                labels = labels.cuda()
                #claims = claims.to(device).float()
                #evidences = evidences.to(device).float()
                #labels = labels.to(device)

                y_pred = model(claims_tensors, evidences_tensors)

                y = (labels)
                # y = y.unsqueeze(0)
                # y = y.unsqueeze(0)
                # y_pred = parallel.gather(y_pred, 0)

                y_pred = y_pred.squeeze()
                # y = y.squeeze()

                loss = criterion(y_pred, torch.max(y, 1)[1])
                # loss = criterion(y_pred, y)

                y = y.float()
                binary_y = torch.max(y, 1)[1]
                binary_pred = torch.max(y_pred, 1)[1]
                accuracy = (binary_y == binary_pred).to("cuda")
                accuracy = accuracy.float()
                accuracy = accuracy.mean()
                train_running_accuracy += accuracy.item()
                mean_train_acc += accuracy.item()
                train_running_loss += loss.item()

                if PRINT:
                    for idx in range(len(y)):
                        print(
                            "Claim: {}, Evidence: {}, Prediction: {}, Label: {}"
                            .format(claims_text[0], evidences_text[idx],
                                    torch.exp(y_pred[idx]), y[idx]))

                if (train_batch_num %
                        OUTPUT_FREQ) == 0 and train_batch_num > 0:
                    elapsed_time = time.time() - beginning_time
                    binary_y = torch.max(y, 1)[1]
                    binary_pred = torch.max(y_pred, 1)[1]
                    print(
                        "[{}:{}:{:3f}s] training loss: {}, training accuracy: {}, training recall: {}"
                        .format(
                            epoch, train_batch_num /
                            (len(train_dataset) / BATCH_SIZE), elapsed_time,
                            train_running_loss / OUTPUT_FREQ,
                            train_running_accuracy / OUTPUT_FREQ,
                            recall_score(binary_y.cpu().detach().numpy(),
                                         binary_pred.cpu().detach().numpy())))

                    # 1. Log scalar values (scalar summary)
                    info = {
                        'train_loss': train_running_loss / OUTPUT_FREQ,
                        'train_accuracy': train_running_accuracy / OUTPUT_FREQ
                    }

                    for tag, value in info.items():
                        experiment.log_metric(tag,
                                              value,
                                              step=train_batch_num *
                                              (epoch + 1))
                        logger.scalar_summary(tag, value, train_batch_num + 1)

                    ## 2. Log values and gradients of the parameters (histogram summary)
                    for tag, value in model.named_parameters():
                        tag = tag.replace('.', '/')
                        logger.histo_summary(tag,
                                             value.detach().cpu().numpy(),
                                             train_batch_num + 1)
                        logger.histo_summary(tag + '/grad',
                                             value.grad.detach().cpu().numpy(),
                                             train_batch_num + 1)

                    train_running_loss = 0.0
                    beginning_time = time.time()
                    train_running_accuracy = 0.0
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        # del loss
        # del accuracy
        # del claims_tensors
        # del claims_text
        # del evidences_tensors
        # del evidences_text
        # del labels
        # del y
        # del y_pred
        # torch.cuda.empty_cache()

        print("Running validation...")
        model.eval()
        pred = []
        true = []
        avg_loss = 0.0
        val_running_accuracy = 0.0
        val_running_loss = 0.0
        beginning_time = time.time()
        with experiment.validate():
            for val_batch_num, val_inputs in enumerate(val_dataloader):
                claims_tensors, claims_text, evidences_tensors, evidences_text, labels = val_inputs

                claims_tensors = claims_tensors.cuda()
                evidences_tensors = evidences_tensors.cuda()
                labels = labels.cuda()

                y_pred = model(claims_tensors, evidences_tensors)

                y = (labels)
                # y_pred = parallel.gather(y_pred, 0)

                y_pred = y_pred.squeeze()

                loss = criterion(y_pred, torch.max(y, 1)[1])

                y = y.float()

                binary_y = torch.max(y, 1)[1]
                binary_pred = torch.max(y_pred, 1)[1]
                true.extend(binary_y.tolist())
                pred.extend(binary_pred.tolist())

                accuracy = (binary_y == binary_pred).to("cuda")

                accuracy = accuracy.float().mean()
                val_running_accuracy += accuracy.item()
                val_running_loss += loss.item()
                avg_loss += loss.item()

                if (val_batch_num % OUTPUT_FREQ) == 0 and val_batch_num > 0:
                    elapsed_time = time.time() - beginning_time
                    print(
                        "[{}:{}:{:3f}s] validation loss: {}, accuracy: {}, recall: {}"
                        .format(
                            epoch,
                            val_batch_num / (len(val_dataset) / BATCH_SIZE),
                            elapsed_time, val_running_loss / OUTPUT_FREQ,
                            val_running_accuracy / OUTPUT_FREQ,
                            recall_score(binary_y.cpu().detach().numpy(),
                                         binary_pred.cpu().detach().numpy())))

                    # 1. Log scalar values (scalar summary)
                    info = {'val_accuracy': val_running_accuracy / OUTPUT_FREQ}

                    for tag, value in info.items():
                        experiment.log_metric(tag,
                                              value,
                                              step=val_batch_num * (epoch + 1))
                        logger.scalar_summary(tag, value, val_batch_num + 1)

                    ## 2. Log values and gradients of the parameters (histogram summary)
                    for tag, value in model.named_parameters():
                        tag = tag.replace('.', '/')
                        logger.histo_summary(tag,
                                             value.detach().cpu().numpy(),
                                             val_batch_num + 1)
                        logger.histo_summary(tag + '/grad',
                                             value.grad.detach().cpu().numpy(),
                                             val_batch_num + 1)

                    val_running_accuracy = 0.0
                    val_running_loss = 0.0
                    beginning_time = time.time()

        # del loss
        # del accuracy
        # del claims_tensors
        # del claims_text
        # del evidences_tensors
        # del evidences_text
        # del labels
        # del y
        # del y_pred
        # torch.cuda.empty_cache()

        accuracy = accuracy_score(true, pred)
        print("[{}] mean accuracy: {}, mean loss: {}".format(
            epoch, accuracy, avg_loss / len(val_dataloader)))

        true = np.array(true).astype("int")
        pred = np.array(pred).astype("int")
        print(classification_report(true, pred))

        best_loss = torch.tensor(
            min(avg_loss / len(val_dataloader),
                best_loss.cpu().numpy()))
        is_best = bool((avg_loss / len(val_dataloader)) <= best_loss)

        putils.save_checkpoint(
            {
                "epoch": epoch,
                "model": model,
                "best_loss": best_loss
            },
            is_best,
            filename="{}_loss_{}".format(model_checkpoint_dir,
                                         best_loss.cpu().numpy()))
Ejemplo n.º 4
0
def main(_):
    experiment = Experiment(api_key="xXtJguCo8yFdU7dpjEpo6YbHw",
                            project_name=args.experiment_name)
    hyper_params = {
        "learning_rate": args.lr,
        "num_epochs": args.max_epoch,
        "batch_size": args.single_batch_size,
        "alpha": args.alpha,
        "beta": args.beta,
        "gamma": args.gamma,
        "loss": args.loss
    }
    experiment.log_multiple_params(hyper_params)

    # TODO: split file support
    with tf.Graph().as_default():
        global save_model_dir
        start_epoch = 0
        global_counter = 0

        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=cfg.GPU_MEMORY_FRACTION,
            visible_device_list=cfg.GPU_AVAILABLE,
            allow_growth=True)
        config = tf.ConfigProto(
            gpu_options=gpu_options,
            device_count={
                "GPU": cfg.GPU_USE_COUNT,
            },
            allow_soft_placement=True,
            log_device_placement=False,
        )
        with tf.Session(config=config) as sess:
            # sess=tf_debug.LocalCLIDebugWrapperSession(sess,ui_type='readline')
            model = RPN3D(cls=cfg.DETECT_OBJ,
                          single_batch_size=args.single_batch_size,
                          learning_rate=args.lr,
                          max_gradient_norm=5.0,
                          alpha=args.alpha,
                          beta=args.beta,
                          gamma=args.gamma,
                          loss_type=args.loss,
                          avail_gpus=cfg.GPU_AVAILABLE.split(','))
            # param init/restore
            if tf.train.get_checkpoint_state(save_model_dir):
                print("Reading model parameters from %s" % save_model_dir)
                model.saver.restore(sess,
                                    tf.train.latest_checkpoint(save_model_dir))
                start_epoch = model.epoch.eval() + 1
                global_counter = model.global_step.eval() + 1
            else:
                print("Created model with fresh parameters.")
                tf.global_variables_initializer().run()

            # train and validate
            is_summary, is_summary_image, is_validate = False, False, False

            summary_interval = 5
            summary_val_interval = 10
            summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
            experiment.set_model_graph(sess.graph)

            # training
            with experiment.train():
                for epoch in range(start_epoch, args.max_epoch):
                    counter = 0
                    batch_time = time.time()
                    experiment.log_current_epoch(epoch)

                    for batch in iterate_data(
                            train_dir,
                            shuffle=True,
                            aug=True,
                            is_testset=False,
                            batch_size=args.single_batch_size *
                            cfg.GPU_USE_COUNT,
                            multi_gpu_sum=cfg.GPU_USE_COUNT):

                        counter += 1
                        global_counter += 1
                        experiment.set_step(global_counter)
                        if counter % summary_interval == 0:
                            is_summary = True
                        else:
                            is_summary = False
                        epochs = args.max_epoch
                        start_time = time.time()
                        ret = model.train_step(sess,
                                               batch,
                                               train=True,
                                               summary=is_summary)
                        forward_time = time.time() - start_time
                        batch_time = time.time() - batch_time
                        param = ret
                        params = {
                            "loss": param[0],
                            "cls_loss": param[1],
                            "cls_pos_loss": param[2],
                            "cls_neg_loss": param[3]
                        }
                        experiment.log_multiple_metrics(params)
                        # print(ret)
                        print(
                            'train: {} @ epoch:{}/{} loss: {:.4f} cls_loss: {:.4f} cls_pos_loss: {:.4f} cls_neg_loss: {:.4f} forward time: {:.4f} batch time: {:.4f}'
                            .format(counter, epoch, epochs, ret[0], ret[1],
                                    ret[2], ret[3], forward_time, batch_time))
                        # with open('log/train.txt', 'a') as f:
                        # f.write( 'train: {} @ epoch:{}/{} loss: {:.4f} cls_loss: {:.4f} cls_pos_loss: {:.4f} cls_neg_loss: {:.4f} forward time: {:.4f} batch time: {:.4f}'.format(counter,epoch, epochs, ret[0], ret[1], ret[2], ret[3], forward_time, batch_time))

                        #print(counter, summary_interval, counter % summary_interval)
                        if counter % summary_interval == 0:
                            print("summary_interval now")
                            summary_writer.add_summary(ret[-1], global_counter)

                        #print(counter, summary_val_interval, counter % summary_val_interval)
                        if counter % summary_val_interval == 0:
                            print("summary_val_interval now")
                            batch = sample_test_data(
                                val_dir,
                                args.single_batch_size * cfg.GPU_USE_COUNT,
                                multi_gpu_sum=cfg.GPU_USE_COUNT)

                            ret = model.validate_step(sess,
                                                      batch,
                                                      summary=True)
                            summary_writer.add_summary(ret[-1], global_counter)

                            try:
                                ret = model.predict_step(sess,
                                                         batch,
                                                         summary=True)
                                summary_writer.add_summary(
                                    ret[-1], global_counter)
                            except:
                                print("prediction skipped due to error")

                        if check_if_should_pause(args.tag):
                            model.saver.save(sess,
                                             os.path.join(
                                                 save_model_dir, 'checkpoint'),
                                             global_step=model.global_step)
                            print('pause and save model @ {} steps:{}'.format(
                                save_model_dir, model.global_step.eval()))
                            sys.exit(0)

                        batch_time = time.time()
                    experiment.log_epoch_end(epoch)
                    sess.run(model.epoch_add_op)

                    model.saver.save(sess,
                                     os.path.join(save_model_dir,
                                                  'checkpoint'),
                                     global_step=model.global_step)

                    # dump test data every 10 epochs
                    if (epoch + 1) % 10 == 0:
                        # create output folder
                        os.makedirs(os.path.join(args.output_path, str(epoch)),
                                    exist_ok=True)
                        os.makedirs(os.path.join(args.output_path, str(epoch),
                                                 'data'),
                                    exist_ok=True)
                        if args.vis:
                            os.makedirs(os.path.join(args.output_path,
                                                     str(epoch), 'vis'),
                                        exist_ok=True)

                        for batch in iterate_data(
                                val_dir,
                                shuffle=False,
                                aug=False,
                                is_testset=False,
                                batch_size=args.single_batch_size *
                                cfg.GPU_USE_COUNT,
                                multi_gpu_sum=cfg.GPU_USE_COUNT):

                            if args.vis:
                                tags, results, front_images, bird_views, heatmaps = model.predict_step(
                                    sess, batch, summary=False, vis=True)
                            else:
                                tags, results = model.predict_step(
                                    sess, batch, summary=False, vis=False)

                            for tag, result in zip(tags, results):
                                of_path = os.path.join(args.output_path,
                                                       str(epoch), 'data',
                                                       tag + '.txt')
                                with open(of_path, 'w+') as f:
                                    labels = box3d_to_label(
                                        [result[:, 1:8]], [result[:, 0]],
                                        [result[:, -1]],
                                        coordinate='lidar')[0]
                                    for line in labels:
                                        f.write(line)
                                    print('write out {} objects to {}'.format(
                                        len(labels), tag))
                            # dump visualizations
                            if args.vis:
                                for tag, front_image, bird_view, heatmap in zip(
                                        tags, front_images, bird_views,
                                        heatmaps):
                                    front_img_path = os.path.join(
                                        args.output_path, str(epoch), 'vis',
                                        tag + '_front.jpg')
                                    bird_view_path = os.path.join(
                                        args.output_path, str(epoch), 'vis',
                                        tag + '_bv.jpg')
                                    heatmap_path = os.path.join(
                                        args.output_path, str(epoch), 'vis',
                                        tag + '_heatmap.jpg')
                                    cv2.imwrite(front_img_path, front_image)
                                    cv2.imwrite(bird_view_path, bird_view)
                                    cv2.imwrite(heatmap_path, heatmap)

                        # execute evaluation code
                        cmd_1 = "./kitti_eval/launch_test.sh"
                        cmd_2 = os.path.join(args.output_path, str(epoch))
                        cmd_3 = os.path.join(args.output_path, str(epoch),
                                             'log')
                        os.system(" ".join([cmd_1, cmd_2, cmd_3]))

            print('train done. total epoch:{} iter:{}'.format(
                epoch, model.global_step.eval()))

            # finallly save model
            model.saver.save(sess,
                             os.path.join(save_model_dir, 'checkpoint'),
                             global_step=model.global_step)
Ejemplo n.º 5
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='Cifar10 Example')
    parser.add_argument('--batch-size', type=int, default=128, metavar='N', help='input batch size for training (default: 128)')
    parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs', type=int, default=25, metavar='N', help='number of epochs to train (default: 25)')
    parser.add_argument('--lr', type=float, default=0.1, metavar='LR', help='learning rate (default: 0.1)')
    parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum (default: 0.9)')
    parser.add_argument('--model-path', type=str, default='', metavar='M', help='model param path')
    parser.add_argument('--loss-type', type=str, default='CE', metavar='L', help='B or CE or F or ICF_CE or ICF_F or CB_CE or CB_F')
    parser.add_argument('--beta', type=float, default=0.999, metavar='B', help='Beta for ClassBalancedLoss')
    parser.add_argument('--gamma', type=float, default=2.0, metavar='G', help='Gamma for FocalLoss')
    parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training')
    parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)')
    parser.add_argument('--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status')
    parser.add_argument('--balanced-data', action='store_true', default=False, help='For sampling rate. Default is Imbalanced-data.')
    parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model')
    args = parser.parse_args()

    # Add the following code anywhere in your machine learning file
    experiment = Experiment(api_key="5Yl3Rxz9S3E0PUKQTBpA0QJPi", project_name="imbalanced-cifar-10", workspace="tancoro")

    # ブラウザの実験ページを開く
    # experiment.display(clear=True, wait=True, new=0, autoraise=True)
    # 実験キー(実験を一意に特定するためのキー)の取得
    exp_key = experiment.get_key()
    print('KEY: ' + exp_key)
    # HyperParamの記録
    hyper_params = {
        'batch_size': args.batch_size,
        'epoch': args.epochs,
        'learning_rate': args.lr,
        'sgd_momentum' : args.momentum,
        'model_path' : args.model_path,
        'loss_type' : args.loss_type,
        'beta' : args.beta,
        'gamma' : args.gamma,
        'torch_manual_seed': args.seed,
        'balanced_data' : args.balanced_data
    }
    experiment.log_parameters(hyper_params)

    use_cuda = not args.no_cuda and torch.cuda.is_available()
    print('use_cuda {}'.format(use_cuda))

    torch.manual_seed(args.seed)
    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    # train dataset
    cifar10_train_dataset = datasets.CIFAR10('./data', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.RandomCrop(32, padding=4),
                           transforms.RandomHorizontalFlip(),
                           transforms.ToTensor(),
                           transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
                       ]))
    # train sampling rate
    sampling_rate = {}
    if not args.balanced_data:
        sampling_rate = {1:0.05, 4:0.05, 6:0.05}
    print(sampling_rate)
    # train Sampler
    train_sampler = ReductionSampler(cifar10_train_dataset, sampling_rate=sampling_rate)
    # train loader
    train_loader = torch.utils.data.DataLoader(cifar10_train_dataset,
        batch_size=args.batch_size, sampler=train_sampler, **kwargs)
    # test dataset
    cifar10_test_dataset = datasets.CIFAR10('./data', train=False, transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
                       ]))
    # test majority loader
    test_majority_sampler = ReductionSampler(cifar10_test_dataset, sampling_rate={1:0, 4:0, 6:0})
    test_majority_loader = torch.utils.data.DataLoader(cifar10_test_dataset,
        batch_size=args.test_batch_size, sampler=test_majority_sampler, **kwargs)
    # test minority loader
    test_minority_sampler = ReductionSampler(cifar10_test_dataset, sampling_rate={0:0, 2:0, 3:0, 5:0, 7:0, 8:0, 9:0})
    test_minority_loader = torch.utils.data.DataLoader(cifar10_test_dataset,
            batch_size=args.test_batch_size, sampler=test_minority_sampler, **kwargs)
    # test alldata loader
    test_alldata_loader = torch.utils.data.DataLoader(cifar10_test_dataset, batch_size=args.test_batch_size, shuffle=True, **kwargs)

    model = ResNet18().to(device)
    # train loss
    train_loss = BasicCrossEntropyLoss()
    if args.loss_type == 'CE':
        train_loss = CrossEntropyLoss(train_sampler.get_data_count_map(), device)
    elif args.loss_type == 'F':
        train_loss = FocalLoss(train_sampler.get_data_count_map(), device, gamma=args.gamma)
    elif args.loss_type == 'ICF_CE':
        train_loss = InverseClassFrequencyCrossEntropyLoss(train_sampler.get_data_count_map(), device)
    elif args.loss_type == 'ICF_F':
        train_loss = InverseClassFrequencyFocalLoss(train_sampler.get_data_count_map(), device, gamma=args.gamma)
    elif args.loss_type == 'CB_CE':
        train_loss = ClassBalancedCrossEntropyLoss(train_sampler.get_data_count_map(), device, beta=args.beta)
    elif args.loss_type == 'CB_F':
        train_loss = ClassBalancedFocalLoss(train_sampler.get_data_count_map(), device, beta=args.beta, gamma=args.gamma)
    print('Train Loss Type: {}'.format(type(train_loss)))

    # load param
    if len(args.model_path) > 0:
        model.load_state_dict(torch.load(args.model_path))

    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=5e-4)
    # lr = 0.1 if epoch < 15
    # lr = 0.01 if 15 <= epoch < 20
    # lr = 0.001 if 20 <= epoch < 25
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[15,20], gamma=0.1)

    for epoch in range(1, args.epochs + 1):
        with experiment.train():
            experiment.log_current_epoch(epoch)
            train(args, model, device, train_loader, len(train_sampler), optimizer, epoch, experiment, lossfunc=train_loss)
        with experiment.test():
            test(args, model, device, test_minority_loader, len(test_minority_sampler), epoch, experiment, pref='minority')
            test(args, model, device, test_majority_loader, len(test_majority_sampler), epoch, experiment, pref='majority')
            test(args, model, device, test_alldata_loader, len(test_alldata_loader.dataset), epoch, experiment, pref='all')
        if (args.save_model) and (epoch % 10 == 0):
            print('saving model to ./model/cifar10_{0}_{1:04d}.pt'.format(exp_key, epoch))
            torch.save(model.state_dict(), "./model/cifar10_{0}_{1:04d}.pt".format(exp_key, epoch))
        scheduler.step()
Ejemplo n.º 6
0
def main():
    args = get_args()
    logger = Experiment(comet_ml_key, project_name="ActiveDialogue")
    logger.log_parameters(vars(args))

    if args.model == "glad":
        model_arch = GLAD
    elif args.model == "gce":
        model_arch = GCE

    env = BagEnv(load_dataset, model_arch, args, logger)
    if args.seed_size:
        with logger.train():
            if not env.load_seed():
                logging.debug("No loaded seed. Training now.")
                env.seed_fit(args.seed_epochs, prefix="seed")
                logging.debug("Seed completed.")
            else:
                logging.debug("Loaded seed.")
                if args.force_seed:
                    logging.debug("Training seed regardless.")
                    env.seed_fit(args.seed_epochs, prefix="seed")
        env.load_seed()
        logging.debug("Current seed metrics: {}".format(env.metrics(True)))

    use_strategy = False
    if args.strategy == "lc":
        use_strategy = True
        strategy = lc_singlet
    elif args.strategy == "bald":
        use_strategy = True
        strategy = bald_singlet

    if use_strategy:
        if args.threshold_strategy == "fixed":
            strategy = FixedThresholdStrategy(strategy, args)
        elif args.threshold_strategy == "variable":
            strategy = VariableThresholdStrategy(strategy, args)
        elif args.threshold_strategy == "randomvariable":
            strategy = StochasticVariableThresholdStrategy(strategy, args)

    ended = False
    i = 0
    while not ended:
        i += 1

        # Observe environment state
        logger.log_current_epoch(i)

        for j in range(args.label_timeout):
            if env.can_label:
                # Obtain label request from strategy
                obs, preds = env.observe()
                if args.strategy == "epsiloncheat":
                    label_request = epsilon_cheat(preds, env.leak_labels())
                elif args.strategy == "randomsinglets":
                    label_request = random_singlets(preds)
                elif args.strategy == "passive":
                    label_request = passive(preds)
                elif use_strategy:
                    label_request = strategy.observe(preds)
                else:
                    raise ValueError()

                # Label solicitation
                labeled = env.label(label_request)
                if use_strategy:
                    strategy.update(
                        np.sum(label_request.flatten()),
                        np.sum(np.ones_like(label_request.flatten())))

        # Environment stepping
        ended = env.step()
        # Fit every al_batch of items
        env.fit()

    logging.debug("Final fit: ", env.seed_fit(100, "final_fit", True))
Ejemplo n.º 7
0
    def train(self):
        ###################
        epochs = 100000
        batch_size = 10
        gamma = 0.99
        ###################
        
        ###### load #######
        experiment = Experiment(api_key="DFqdpuCkMgoVhT4sJyOXBYRRN") #DFqdpuCkMgoVhT4sJyOXBYRRN
        ###################
        step = 0
        r_vis = 0
        
        with experiment.train():
            for e in range(epochs):
                experiment.log_current_epoch(e)
                act_dic = []
                act_p_dic = []
                r_dic = []
                
                # init
                num_game = 1
                done = self._init_env()
                
                while True:
                    if done and num_game == batch_size:
                        ### visualize ###
                        step = step + 1
                        experiment.log_metric("reward", r_vis, step=step)
                        r_vis = 0
                        #################
                        break # finish
                    elif done:
                        ### visualize ###
                        step = step + 1
                        experiment.log_metric("reward", r_vis, step=step)
                        r_vis = 0
                        #################
                        num_game = num_game + 1 
                        done = self._init_env() # new game
                    else:
                        # in the game
                        self.diff_state = torch.from_numpy(self.diff_state).float().to(self.device)
                        act, act_p = self.ppo.get_action(self.diff_state)
                        state, reward, done, _ = self.env.step(act)
                        state = prepro(state)
                        self.diff_state = state - self.pre_state
                        self.pre_state = state
                        # remember action you take, and reward
                        act_dic.append(act)
                        act_p_dic.append(act_p)
                        r_dic.append(reward)
                        ### visualize ###
                        r_vis = r_vis + reward
                        #################
                # dic -> numpy -> tensor
                act_p_dic = torch.stack(act_p_dic)
                
                act_dic = np.array(act_dic)
                act_dic = torch.from_numpy(act_dic).float().to(self.device).view(-1, 1)
                
                r_dic = np.array(r_dic)
                r_dic = torch.from_numpy(r_dic).float().to(self.device)
                
                # early stop
                if (r_dic.sum() / batch_size) >= 2:
                    self.ppo.save(step) 

                # suitable reward 
                """
                change reward like below
                [0, 0, 0, 0, 1] -> [0.99^4, 0.99^3, 0.99^2, 0.99, 1]
                """
                r = 0
                for i in range(len(r_dic) - 1, -1, -1):
                    if r_dic[i] != 0:
                        r = r_dic[i]
                    else:
                        r = r * gamma
                        r_dic[i] = r
                r_dic = (r_dic - r_dic.mean()) / (r_dic.std() + 1e-8)
                
                self.ppo.update(act_p_dic, act_dic, r_dic)
                if(e % 100 == 0 and e != 0):
                    self.ppo.save(e) 
                    print("save:", e)