コード例 #1
0
ファイル: building_blocks.py プロジェクト: JonghwanMun/MCL-KD
    def __init__(self, config):
        super(KLDLoss, self).__init__()  # Must call super __init__()

        self.tau = utils.get_value_from_dict(config["model"], "tau", 1)
        self.reduce = utils.get_value_from_dict(config["model"], "loss_reduce",
                                                True)
        self.apply_softmax_on_teacher = True
コード例 #2
0
    def __init__(self, config, verbose=True):
        # Must call super __init__()
        super(SimpleMLP, self).__init__(config, verbose)

        self.classname = "SimpleMLP"
        self.use_gpu = utils.get_value_from_dict(config["model"], "use_gpu",
                                                 True)
        loss_reduce = utils.get_value_from_dict(config["model"], "loss_reduce",
                                                True)

        # options for applying l2-norm
        self.apply_l2_norm = utils.get_value_from_dict(config["model"],
                                                       "apply_l2_norm", False)

        # build layers
        self.img_emb_net = building_blocks.ResBlock2D(config["model"],
                                                      "img_emb")
        self.qst_emb_net = building_blocks.QuestionEmbedding(config["model"])
        self.classifier = building_blocks.MLP(config["model"], "answer")
        self.criterion = nn.CrossEntropyLoss(reduce=loss_reduce)

        # set layer names (all and to be updated)
        self.model_list = [
            "img_emb_net", "qst_emb_net", "classifier", "criterion"
        ]
        self.models_to_update = [
            "img_emb_net", "qst_emb_net", "classifier", "criterion"
        ]

        self.config = config
コード例 #3
0
ファイル: building_blocks.py プロジェクト: JonghwanMun/MCL-KD
    def __init__(self, config):
        super(StackedAttention, self).__init__()  # Must call super __init__()

        # get SAN configurations
        self.num_stacks = utils.get_value_from_dict(config, "num_stacks", 2)
        self.qst_feat_dim = utils.get_value_from_dict(config, "qst_emb_dim",
                                                      256)
        img_feat_dim = utils.get_value_from_dict(config, "img_emb_dim", 256)
        self.att_emb_dim = utils.get_value_from_dict(config, "att_emb_dim",
                                                     512)
        att_dropout_prob = utils.get_value_from_dict(config,
                                                     "att_dropout_prob", 0.5)

        assert self.num_stacks > 0, "# of stacks {} < 1.".format(
            self.num_stacks)

        # build layers
        layers = []
        if att_dropout_prob > 0:
            layers.append(nn.Dropout(p=att_dropout_prob))
        layers.append(
            nn.Conv2d(img_feat_dim + self.qst_feat_dim, self.att_emb_dim, 1,
                      1))
        layers.append(nn.ReLU())
        if att_dropout_prob > 0:
            layers.append(nn.Dropout(p=att_dropout_prob))
        layers.append(nn.Conv2d(self.att_emb_dim, self.num_stacks, 1, 1))
        self.att_encoder = nn.Sequential(*layers)
        self.att_softmax = nn.Softmax(dim=2)
コード例 #4
0
    def __init__(self, config):
        super(Ensemble, self).__init__(config)  # Must call super __init__()
        self.classname = "ENSEMBLE"

        # options for loading model
        self.base_model_type = utils.get_value_from_dict(
            config["model"], "base_model_type", "san")
        self.M = cmf.get_model(self.base_model_type)
        self.use_gpu = utils.get_value_from_dict(config["model"], "use_gpu",
                                                 True)
        self.num_models = utils.get_value_from_dict(config["model"],
                                                    "num_models", 5)

        # options if use knowledge distillation
        self.use_knowledge_distillation = utils.get_value_from_dict(
            config["model"], "use_knowledge_distillation", False)
        base_model_ckpt_path = utils.get_value_from_dict(
            config["model"], "base_model_ckpt_path", "None")
        if self.use_knowledge_distillation:
            assert base_model_ckpt_path != "None", \
                "checkpoint path for base model should be given"

        # build and load base models if use
        if self.use_knowledge_distillation:
            self.base_model = []
            num_base_models = len(base_model_ckpt_path)
            for i in range(num_base_models):
                base_config = copy.deepcopy(config)
                base_config["use_knowledge_distillation"] = False
                self.base_model.append(self.M(base_config))
                self.base_model[i].load_checkpoint(base_model_ckpt_path[i])
                if self.use_gpu and torch.cuda.is_available():
                    self.base_model[i].cuda()
                self.base_model[i].eval()  # set to eval mode for base model
                self.logger["train"].info( \
                        "{}th base-net is initialized from {}".format( \
                        i, base_model_ckpt_path[i]))

        # build specialized models
        self.net_list = nn.ModuleList()
        for m in range(self.num_models):
            self.net_list.append(self.M(config))

            # load pre-trained base models if exist
            if base_model_ckpt_path != "None":
                self.net_list[m].load_checkpoint(base_model_ckpt_path[m])
                self.logger["train"].info("{}th net is initialized from {}".format( \
                        m, base_model_ckpt_path[m]))
        self.criterion = building_blocks.EnsembleLoss(config["model"])

        # set is_main_net flag of base networks as False
        for m in range(self.num_models):
            self.net_list[m].set_is_main_net(False)

        # set models to update
        self.model_list = ["net_list", "criterion"]
        self.models_to_update = ["net_list", "criterion"]
コード例 #5
0
    def __init__(self, config):

        # get configions
        print(json.dumps(config, indent=4))
        self.hdf5_path = utils.get_value_from_dict(config, "encoded_hdf5_path", \
                "data/CLEVR_v1.0/preprocess/encoded_qa/vocab_train_raw/" \
                + "all_questions_use_zero_token/qa_train.h5")
        self.json_path = utils.get_value_from_dict(config, "encoded_json_path", \
                "data/CLEVR_v1.0/preprocess/encoded_qa/vocab_train_raw/" \
                + "all_questions_use_zero_token/qa_train.json")
        self.img_size = utils.get_value_from_dict(config, "img_size", 224)
        self.batch_size = utils.get_value_from_dict(config, "batch_size", 32)
        self.use_img = utils.get_value_from_dict(config, "use_img", False)
        self.use_gpu = utils.get_value_from_dict(config, "use_gpu", True)
        if self.use_img:
            self.img_dir = utils.get_value_from_dict(config, "img_dir",
                                                     "data/CLEVR_v1.0/images")
            self.prepro = trn.Compose([
                trn.Resize(self.img_size),
                trn.CenterCrop(self.img_size),
                trn.ToTensor(),
                trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ])
        else:
            self.feat_dir = utils.get_value_from_dict(config, "feat_dir",
                                                      "data/CLEVR_v1.0/feats")

        # load hdf5 file including question_labels, question_length,
        # answer_labels
        hdf5_file = io_utils.load_hdf5(self.hdf5_path)
        self.max_time_steps = hdf5_file["question_labels"].shape[1]

        # load json file including woti, itow, atoi, itoa, splits, vocab_info,
        # question_ids, image_filenames
        self.json_file = io_utils.load_json(self.json_path)

        # set path of pre-computed assignments
        # NOTE: DEPRECATED
        self.assignment_path = utils.get_value_from_dict(
            config, "assignment_path", "")

        # set path of pre-computed logits of base models
        self.base_logits_path = utils.get_value_from_dict(
            config, "base_logits_path", "")

        self.fetching_answer_option = "simple"

        self.vis_mode = config.get("vis_mode", False)
コード例 #6
0
ファイル: building_blocks.py プロジェクト: skynbe/JH-settings
    def __init__(self, config, name=""):
        super(Embedding2D, self).__init__() # Must call super __init__()
        if name != "":
            name = name + "_"

        inp_dim = utils.get_value_from_dict(
            config, name+"emb2d_inp_dim", 1024)
        out_dim = utils.get_value_from_dict(
            config, name+"emb2d_out_dim", 256)
        dropout_prob = utils.get_value_from_dict(
            config, name+"emb2d_dropout_prob", 0.0)
        nonlinear = utils.get_value_from_dict(
            config, name+"emb2d_nonlinear_fn", "None")
        batchnorm = utils.get_value_from_dict(
            config, name+"emb2d_use_batchnorm", False)
        self.apply_l2_norm = \
            utils.get_value_from_dict(config, name+"emb2d_apply_l2_norm", False)
        self.only_l2_norm = utils.get_value_from_dict(
            config, name+"emb2d_only_l2_norm", False)

        assert not ((self.apply_l2_norm == False) and (self.only_l2_norm == True)), \
            "You set only_l2_norm as True, but also set apply_l2_norm as False"

        # define layers
        if not self.only_l2_norm:
            self.embedding_2d = get_conv2d(inp_dim, out_dim, 1, 1,
                dropout=dropout_prob, nonlinear=nonlinear, use_batchnorm=batchnorm)
コード例 #7
0
ファイル: building_blocks.py プロジェクト: skynbe/JH-settings
    def __init__(self, config, name=""):
        super(WordEmbedding, self).__init__() # Must call super __init__()
        if name != "":
            name = name + "_"

        # get configuration
        inp_dim = utils.get_value_from_dict(
            config, name+"word_emb_vocab_size", 256)
        out_dim = utils.get_value_from_dict(
            config, name+"word_emb_dim", 52)
        dropout_prob = utils.get_value_from_dict(
            config, name+"word_emb_dropout_prob", 0)
        nonlinear = utils.get_value_from_dict(
            config, name+"word_emb_nonlinear_fn", "ReLU")

        # set layers
        self.word_emb = get_embedding(inp_dim, out_dim,
                dropout=dropout_prob, nonlinear=nonlinear)
コード例 #8
0
ファイル: building_blocks.py プロジェクト: skynbe/JH-settings
    def __init__(self, config, name=""):
        super(ResBlock2D, self).__init__() # Must call super __init__()
        if name != "":
            name = name + "_"

        # get configuration
        inp_dim = utils.get_value_from_dict(
            config, name+"res_block_2d_inp_dim", 1024)
        out_dim = utils.get_value_from_dict(
            config, name+"res_block_2d_out_dim", 1024)
        hidden_dim = utils.get_value_from_dict(
            config, name+"res_block_2d_hidden_dim", 512)
        self.num_blocks = utils.get_value_from_dict(
            config, name+"num_blocks", 1)
        self.use_downsample = utils.get_value_from_dict(
            config, name+"use_downsample", False)
        self.use_attention_transfer = utils.get_value_from_dict(
            config, "use_attention_transfer", False)

        # set layers
        if self.use_downsample:
            self.downsample = nn.Sequential(
                nn.Conv2d(inp_dim, out_dim, kernel_size=1, stride=1, bias=False),
                nn.BatchNorm2d(out_dim),
            )
        self.blocks = nn.ModuleList()
        for i in range(self.num_blocks):
            self.blocks.append(get_res_block_2d(inp_dim, out_dim, hidden_dim))
            if (i == 0) and self.use_downsample:
                inp_dim = out_dim
コード例 #9
0
def factory_model(config, M, dset, ckpt_path):
    net = M(config)
    net.bring_loader_info(dset)
    # ship network to use gpu
    if config["model"]["use_gpu"]:
        net.gpu_mode()

    # load checkpoint
    if len(ckpt_path) > 0:
        if not (net.classname == "ENSEMBLE"
                and config["model"]["version"] == "IE"):
            assert os.path.exists(ckpt_path), \
                "Checkpoint does not exists ({})".format(ckpt_path)
            net.load_checkpoint(ckpt_path)

    # If checkpoint is already applied with curriculum learning
    apply_cc_after = utils.get_value_from_dict(
        config["model"], "apply_curriculum_learning_after", -1)
    if (apply_cc_after > 0) and (epoch >= apply_cc_after):
        net.apply_curriculum_learning()
    return net
コード例 #10
0
ファイル: building_blocks.py プロジェクト: skynbe/JH-settings
    def __init__(self, config, name=""):
        super(MLP, self).__init__() # Must call super __init__()
        if name != "":
            name = name + "_"

        # get configuration
        inp_dim = utils.get_value_from_dict(
            config, name+"mlp_inp_dim", 256)
        out_dim = utils.get_value_from_dict(
            config, name+"mlp_out_dim", 52)
        dropout_prob = utils.get_value_from_dict(
            config, name+"mlp_dropout_prob", 0)
        hidden_dim = utils.get_value_from_dict(
            config, name+"mlp_hidden_dim", (1024,))
        use_batchnorm = utils.get_value_from_dict(
            config, name+"mlp_use_batchnorm", False)
        nonlinear = utils.get_value_from_dict(
            config, name+"mlp_nonlinear_fn", "ReLU")

        # set layers
        self.mlp_1d = get_mlp(inp_dim, out_dim, hidden_dim, \
                dropout=dropout_prob, nonlinear=nonlinear, use_batchnorm=use_batchnorm)
コード例 #11
0
ファイル: train.py プロジェクト: JonghwanMun/MCL-KD
def train(config):
    """ Build data loader """
    dsets = {}
    dsets["train"] = dataset.DataSet(config["train_loader"])
    dsets["test"] = dataset.DataSet(config["test_loader"])
    L = {}
    L["train"] = data.DataLoader( \
            dsets["train"], batch_size=config["train_loader"]["batch_size"], \
            num_workers=config["misc"]["num_workers"], \
            shuffle=True, collate_fn=dataset.collate_fn)
    L["test"] = data.DataLoader( \
            dsets["test"], batch_size=config["test_loader"]["batch_size"], \
            num_workers=config["misc"]["num_workers"], \
            shuffle=True, collate_fn=dataset.collate_fn)
    config = M.override_config_from_loader(config, dsets["train"])
    """ Build network """
    net = M(config)
    net.bring_loader_info(dsets)
    logger["train"].info(str(net))
    apply_cc_after = utils.get_value_from_dict(
        config["model"], "apply_curriculum_learning_after", -1)
    # load checkpoint if exists
    if len(config["model"]["checkpoint_path"]) > 0:
        net.load_checkpoint(config["model"]["checkpoint_path"])
        start_epoch = int(
            utils.get_filename_from_path(
                config["model"]["checkpoint_path"]).split("_")[-1])
        # If checkpoint use curriculum learning
        if (apply_cc_after > 0) and (start_epoch >= apply_cc_after):
            net.apply_curriculum_learning()
    else:
        start_epoch = 0

    # ship network to use gpu
    if config["model"]["use_gpu"]:
        net.gpu_mode()

    # Prepare tensorboard
    net.create_tensorboard_summary(config["misc"]["tensorboard_dir"])
    """ Run training network """
    ii = 0
    tm = timer.Timer()  # tm: timer
    iter_per_epoch = dsets["train"].get_iter_per_epoch()
    min_lr = config["optimize"].get("min_lr", 0.0002)
    for epoch in range(start_epoch, config["optimize"]["num_epoch"]):
        net.train_mode()  # set network as train mode
        net.reset_status()  # initialize status
        for batch in L["train"]:
            data_load_duration = tm.get_duration()

            # maintain sample data to observe learning status
            if ii == 0:
                sample_data = dsets["train"].get_samples(5)
                """ TODO: get samples from both training/test set
                test_sample_data = dsets["test"].get_samples(5))
                """

            # Forward and update the network
            # Note that the 1st and 2nd item of outputs from forward() should be
            # loss and logits. The others would change depending on the network
            tm.reset()
            lr = utils.adjust_lr(ii + 1, iter_per_epoch, config["optimize"],
                                 min_lr)
            outputs = net.forward_update(batch, lr)
            run_duration = tm.get_duration()

            # Compute status for current batch: loss, evaluation scores, etc
            net.compute_status(outputs[1], batch[0][-1])

            # print learning status
            if (ii + 1) % config["misc"]["print_every"] == 0:
                net.print_status(epoch + 1, ii + 1)
                txt = "fetching for {:.3f}s, optimizing for {:.3f}s, lr = {:.5f}"
                logger["train"].debug(
                    txt.format(data_load_duration, run_duration, lr))
                logger["train"].info("\n")

            # visualize results
            if (config["misc"]["vis_every"] > 0) \
                    and ((ii+1) % config["misc"]["vis_every"] == 0):
                if config["misc"]["model_type"] == "ensemble":
                    net.save_results(sample_data,
                                     "iteration_{}".format(ii + 1),
                                     mode="train")

            ii += 1
            tm.reset()

            if config["misc"]["debug"]:
                if ii % 100 == 0:
                    break
            # epoch done

        # save network every epoch
        net.save_checkpoint(epoch + 1)

        # visualize results
        net.save_results(sample_data,
                         "epoch_{:03d}".format(epoch + 1),
                         mode="train")

        # print status (metric) accumulated over each epoch
        net.print_counters_info(epoch + 1, logger_name="epoch", mode="Train")

        # validate network
        if (epoch + 1) % config["evaluation"]["every_eval"] == 0:
            cmf.evaluate(config,
                         L["test"],
                         net,
                         epoch,
                         logger_name="epoch",
                         mode="Valid")

        # curriculum learning
        if (apply_cc_after >= 0) and ((epoch + 1) == apply_cc_after):
            net.apply_curriculum_learning()

        # reset reference time to compute duration of loading data
        tm.reset()
コード例 #12
0
ファイル: ensemble.py プロジェクト: JonghwanMun/MCL-KD
def ensemble(config):

    """ Build data loader """
    dset = dataset.DataSet(config["test_loader"])
    L = data.DataLoader( \
            dset, batch_size=config["test_loader"]["batch_size"], \
            num_workers=config["num_workers"], \
            shuffle=False, collate_fn=dataset.collate_fn)

    """ Load assignments if exists """
    with_assignment = False
    if config["assignment_path"] != "None":
        with_assignment = True
        assignment_file = io_utils.load_hdf5(config["assignment_path"], verbose=False)
        assignments = assignment_file["assignments"][:]
        cnt_mapping = np.zeros((3,3))

    """ Build network """
    nets = []
    net_configs = []
    for i in range(len(config["checkpoint_paths"])):
        net_configs.append(io_utils.load_yaml(config["config_paths"][i]))
        net_configs[i] = M.override_config_from_loader(net_configs[i], dset)
        nets.append(M(net_configs[i]))
        nets[i].bring_loader_info(dset)
        apply_cc_after = utils.get_value_from_dict(
                net_configs[i]["model"], "apply_curriculum_learning_after", -1)
        # load checkpoint if exists
        nets[i].load_checkpoint(config["checkpoint_paths"][i])
        start_epoch = int(utils.get_filename_from_path(
                config["checkpoint_paths"][i]).split("_")[-1])
        # If checkpoint use curriculum learning
        if (apply_cc_after > 0) and (start_epoch >= apply_cc_after):
            nets[i].apply_curriculum_learning()

    # ship network to use gpu
    if config["use_gpu"]:
        for i in range(len(nets)):
            nets[i].gpu_mode()
    for i in range(len(nets)):
        nets[i].eval_mode()

    # initialize counters for different tau
    metrics = ["top1-avg", "top1-max", "oracle"]
    for i in range(len(nets)):
        modelname = "M{}".format(i)
        metrics.append(modelname)
    tau = [1.0, 1.2, 1.5, 2.0, 5.0, 10.0, 50.0, 100.0]
    counters = OrderedDict()
    for T in tau:
        tau_name = "tau-"+str(T)
        counters[tau_name] = OrderedDict()
        for mt in metrics:
            counters[tau_name][mt] = accumulator.Accumulator(mt)

    """ Run training network """
    ii = 0
    itoa = dset.get_itoa()
    predictions = []
    for batch in tqdm(L):
        # Forward networks
        probs = 0
        B = batch[0][0].size(0)
        if type(batch[0][-1]) == type(list()):
            gt = batch[0][-1][0]
        else:
            gt = batch[0][-1]

        correct = 0
        probs = {}
        for T in tau:
            tau_name = "tau-"+str(T)
            probs[tau_name] = 0

        prob_list = []
        for i in range(len(nets)):
            outputs = nets[i].evaluate(batch)
            prob_list.append(outputs[1]) # m*[B,A]

        if config["save_logits"]:
            TODO = True

        for T in tau:
            tau_name = "tau-"+str(T)
            probs = [net_utils.get_data(F.softmax(logits/T, dim=1)) \
                     for logits in prob_list] # m*[B,A]

            # count correct numbers for each model
            for i in range(len(nets)):
                val, idx = probs[i].max(dim=1)
                correct = torch.eq(idx, gt)
                num_correct = torch.sum(correct)
                modelname = "M{}".format(i)
                counters[tau_name][modelname].add(num_correct, B)

                # add prob of each model
                if i == 0:
                    oracle_correct = correct
                else:
                    oracle_correct = oracle_correct + correct


            # top1-max accuracy for ensemble
            ens_probs, ens_idx = torch.stack(probs,0).max(0) # [B,A]
            max_val, max_idx = ens_probs.max(dim=1)
            num_correct = torch.sum(torch.eq(max_idx, gt))
            counters[tau_name]["top1-max"].add(num_correct, B)

            # top1-avg accuracy for ensemble
            ens_probs = sum(probs) # [B,A]
            max_val, max_idx = ens_probs.max(dim=1)
            num_correct = torch.sum(torch.eq(max_idx, gt))
            counters[tau_name]["top1-avg"].add(num_correct, B)

            # oracle accuracy for ensemble
            num_oracle_correct = torch.sum(torch.ge(oracle_correct, 1))
            counters[tau_name]["oracle"].add(num_oracle_correct, B)

            # attach predictions
            for i in range(len(batch[1])):
                qid = batch[1][i]
                predictions.append({
                    "question_id": qid,
                    "answer": utils.label2string(itoa, max_idx[i])
                })

        # epoch done

    # print accuracy
    for cnt_k,cnt_v in counters.items():
        txt = cnt_k + " "
        for k,v in cnt_v.items():
            txt += ", {} = {:.5f}".format(v.get_name(), v.get_average())
        print(txt)

    save_dir = os.path.join("results", "ensemble_predictions")
    io_utils.check_and_create_dir(save_dir)
    io_utils.write_json(os.path.join(save_dir, config["out"]+".json"), predictions)
コード例 #13
0
ファイル: eval.py プロジェクト: JonghwanMun/MCL-KD
def main(params):

    # load configuration of pre-trained models
    exp_path = os.path.join("results", params["dataset"],
                            params["model_type"], params["exp"])
    config_path = os.path.join(exp_path, "config.yml")
    config = io_utils.load_yaml(config_path)
    params["config_path"] = config_path
    config = M.override_config_from_params(config, params)
    config["exp_path"] = exp_path
    cmf.create_save_dirs(config["misc"])

    # create logger
    logger_path = os.path.join(config["exp_path"], "evaluation.log")
    logger = io_utils.get_logger("Evaluate", log_file_path=logger_path)

    """ Build data loader """
    loader_config = io_utils.load_yaml(params["loader_config_path"])
    dset = dataset.DataSet(loader_config)
    L = data.DataLoader(dset, batch_size=loader_config["batch_size"], \
                        num_workers=params["num_workers"], \
                        shuffle=False, collate_fn=dataset.collate_fn)
    config = M.override_config_from_loader(config, dset)

    if params["mode"] == "eval":

        """ Evaluating networks """
        e0 = params["start_epoch"]
        e1 = params["end_epoch"]
        e_stride = params["epoch_stride"]
        sample_data = dset.get_samples(5)
        for epoch in range(e0, e1+1, e_stride):
            """ Build network """
            net = M(config)
            net.bring_loader_info(dset)
            # ship network to use gpu
            if config["model"]["use_gpu"]:
                net.gpu_mode()

            # load checkpoint
            if not (net.classname == "ENSEMBLE" and config["model"]["version"] == "IE"):
                ckpt_path = os.path.join(exp_path, "checkpoints",
                                         "checkpoint_epoch_{:03d}.pkl".format(epoch))
                assert os.path.exists(ckpt_path), \
                    "Checkpoint does not exists ({})".format(ckpt_path)
                net.load_checkpoint(ckpt_path)

            # If checkpoint is already applied with curriculum learning
            apply_cc_after = utils.get_value_from_dict(
                    config["model"], "apply_curriculum_learning_after", -1)
            if (apply_cc_after > 0) and (epoch >= apply_cc_after):
                net.apply_curriculum_learning()

            cmf.evaluate(config, L, net, epoch-1, logger_name="eval",
                         mode="Evaluation", verbose_every=100)

    elif params["mode"] == "selection":
        epoch = params["start_epoch"]
        """ Build network """
        net = M(config)
        net.bring_loader_info(dset)
        # ship network to use gpu
        if config["model"]["use_gpu"]:
            net.gpu_mode()

        # load checkpoint
        ckpt_path = os.path.join(exp_path, "checkpoints", "checkpoint_epoch_{:03d}.pkl".format(epoch))
        assert os.path.exists(ckpt_path), "Checkpoint does not exists ({})".format(ckpt_path)
        net.load_checkpoint(ckpt_path)
        apply_cc_after = utils.get_value_from_dict(
                config["model"], "apply_curriculum_learning_after", -1)
        # If checkpoint use curriculum learning
        if (apply_cc_after > 0) and (epoch >= apply_cc_after):
            net.apply_curriculum_learning()

        cmf.get_selection_values(config, L, net, epoch-1, logger_name="eval", mode="Evaluation", verbose_every=100)
コード例 #14
0
ファイル: building_blocks.py プロジェクト: JonghwanMun/MCL-KD
    def __init__(self, config):
        super(EnsembleLoss, self).__init__()  # Must call super __init__()

        self.logger = io_utils.get_logger("Train")

        # common options
        self.version = utils.get_value_from_dict(config, "version", "KD-MCL")
        self.use_gpu = utils.get_value_from_dict(config, "use_gpu", True)
        self.m = utils.get_value_from_dict(config, "num_models", 5)
        self.num_labels = utils.get_value_from_dict(config, "num_labels", 28)
        self.print_every = 20
        self.log_every = 500

        # options for computing assignments
        self.k = utils.get_value_from_dict(config, "num_overlaps", 2)
        self.tau = utils.get_value_from_dict(config, "tau", -1)
        self.beta = utils.get_value_from_dict(config, "beta", 0.75)

        # options for margin-MCL
        self.margin_threshold = utils.get_value_from_dict(
            config, "margin_threshold", 1.0)
        self.use_logit = utils.get_value_from_dict(config, "margin_in_logit",
                                                   True)

        # options for attention transfer
        self.use_attention_transfer = utils.get_value_from_dict(
            config, "use_attention_transfer", False)
        if self.use_attention_transfer:
            self.att_transfer_beta = utils.get_value_from_dict(
                config, "att_transfer_beta", 1000)

        # options for assignment model
        self.use_assignment_model = utils.get_value_from_dict(
            config, "use_assignment_model", False)

        if self.use_assignment_model:
            self.assignment_criterion = nn.CrossEntropyLoss()

        self.iteration = 0
コード例 #15
0
ファイル: building_blocks.py プロジェクト: JonghwanMun/MCL-KD
    def __init__(self, config, name=""):
        super(QuestionEmbedding, self).__init__()  # Must call super __init__()
        if name != "":
            name = name + "_"

        # get configurations
        self.use_gpu = utils.get_value_from_dict(config, "use_gpu", True)

        # options for word embedding
        word_emb_dim = utils.get_value_from_dict(config, name + "word_emb_dim",
                                                 300)
        padding_idx = utils.get_value_from_dict(config,
                                                name + "word_emb_padding_idx",
                                                0)
        self.apply_nonlinear = utils.get_value_from_dict(
            config, name + "apply_word_emb_nonlinear", False)
        self.word_emb_dropout_prob = utils.get_value_from_dict(
            config, name + "word_emb_dropout_prob", 0.0)

        # options for rnn
        self.rnn_type = utils.get_value_from_dict(config, name + "rnn_type",
                                                  "LSTM")
        self.num_layers = utils.get_value_from_dict(config,
                                                    name + "rnn_num_layers", 2)
        self.rnn_dim = utils.get_value_from_dict(config,
                                                 name + "rnn_hidden_dim", 256)
        rnn_dropout_prob = utils.get_value_from_dict(config,
                                                     name + "rnn_dropout_prob",
                                                     0)
        self.bidirectional = utils.get_value_from_dict(config,
                                                       name + "bidirectional",
                                                       False)
        vocab_size = utils.get_value_from_dict(config, name + "vocab_size", 10)

        assert (self.rnn_type == "GRU") or (self.rnn_type == "LSTM"),\
                "Not supported RNN type: {}" \
                + "(neither GRU or LSTM)".format(self.rnn_type)

        # word embedding layers
        self.lookuptable = nn.Embedding(vocab_size,
                                        word_emb_dim,
                                        padding_idx=padding_idx)
        if self.word_emb_dropout_prob > 0:
            self.word_emb_dropout = nn.Dropout(self.word_emb_dropout_prob)

        # RNN layers
        self.rnn = getattr(nn, self.rnn_type)(input_size=word_emb_dim,
                                              hidden_size=self.rnn_dim,
                                              num_layers=self.num_layers,
                                              bias=True,
                                              batch_first=True,
                                              dropout=rnn_dropout_prob,
                                              bidirectional=self.bidirectional)