예제 #1
0
    def __init__(self, cfg):
        cfg = deepcopy(cfg)
        cfg["model"]["arch"] = "joint_segmentation_depth"
        cfg["model"]["segmentation_name"] = None
        cfg["model"]["disable_monodepth"] = False
        cfg["model"]["disable_pose"] = True
        cfg['data']['augmentations'] = {}
        cfg['data'].pop('crop_h', None)
        cfg['data'].pop('crop_w', None)
        assert not (cfg["data"].get("depth_teacher") is not None and
                    cfg['model'].get("depth_estimator_weights") is not None)
        if cfg["data"].get("depth_teacher") is not None:
            cfg['model']['backbone_name'] = "resnet101"
            cfg, load_backbone = decoder_variant(cfg, 6, (512, 512))
            cfg['model']['depth_pretraining'] = cfg["data"]["depth_teacher"]
            cfg['model']['backbone_pretraining'] = cfg["data"]["depth_teacher"]
        if cfg['model'].get("depth_estimator_weights") is not None:
            cfg['model']['backbone_pretraining'] = cfg['model'][
                'depth_estimator_weights']
            cfg['model']['depth_pretraining'] = cfg['model'][
                'depth_estimator_weights']

        self.cfg = cfg
        assert cfg['model']['depth_pretraining'] == cfg['model'][
            'backbone_pretraining']
        self.depth_dir = cfg["data"]["generated_depth_dir"]
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        self.monodepth_loss_calculator = get_monodepth_loss(self.cfg,
                                                            is_train=False)

        unrestricted_cfg = deepcopy(self.cfg["data"])
        unrestricted_cfg.update({
            "restrict_to_subset": None,
            "generated_depth_dir": None
        })
        self.train_loader = build_loader(unrestricted_cfg,
                                         "train",
                                         load_labels=False,
                                         load_sequence=False)
        self.val_loader = build_loader(unrestricted_cfg,
                                       "val",
                                       load_labels=False,
                                       load_sequence=False)
        self.loader = data.ConcatDataset([self.train_loader, self.val_loader])
        self.n_classes = self.train_loader.n_classes

        batch_size = 4
        self.data_loader = data.DataLoader(
            self.loader,
            batch_size=batch_size,
            num_workers=self.cfg["training"]["n_workers"],
            pin_memory=True,
        )

        self.model = get_model(cfg["model"], self.n_classes).to(self.device)
예제 #2
0
파일: test.py 프로젝트: xy09Player/rd
def test(gen_result, config):
    time0 = time.time()

    # prepare
    if config.is_true_test:
        preprocess_data.gen_pre_file_for_test()

    # load w2v
    embedding_np_train = loader.load_w2v(config.train_embedding + '.npy')
    if config.is_true_test:
        embedding_np_test = loader.load_w2v(config.test_embedding + '.npy')

    # prepare: test_df
    if config.is_true_test and (os.path.isfile(config.test_df) is False):
        preprocess_data.gen_test_datafile(config.test_data, config.test_df)

    if (config.is_true_test is False) and (os.path.isfile(config.test_val_df)
                                           is False):
        print('run gen_train_val_datafile() again')
        assert 1 == -1

    # load data
    if config.is_true_test is False:
        if os.path.isfile(config.test_val_pkl):
            with open(config.test_val_pkl, 'rb') as file:
                test_data = pickle.load(file)
        else:
            test_data = loader.load_data(config.test_val_df,
                                         config.train_vocab_path,
                                         config.tag_path)
            with open(config.test_val_pkl, 'wb') as file:
                pickle.dump(test_data, file)

    else:
        if os.path.isfile(config.test_pkl):
            with open(config.test_pkl, 'rb') as file:
                test_data = pickle.load(file)
        else:
            test_data = loader.load_data(config.test_df,
                                         config.test_vocab_path,
                                         config.tag_path)
            with open(config.test_pkl, 'wb') as file:
                pickle.dump(test_data, file)

    # build test dataloader
    test_loader = loader.build_loader(dataset=test_data[:6],
                                      batch_size=config.test_batch_size,
                                      shuffle=False,
                                      drop_last=False)

    # model initial
    param = {
        'embedding': embedding_np_train,
        'mode': config.mode,
        'hidden_size': config.hidden_size,
        'dropout_p': config.dropout_p,
        'encoder_dropout_p': config.encoder_dropout_p,
        'encoder_bidirectional': config.encoder_bidirectional,
        'encoder_layer_num': config.encoder_layer_num,
        'is_bn': config.is_bn
    }

    model = eval(config.model_name).Model(param)

    # load model param, and training state
    model_path = os.path.join('model', config.model_test)
    print('load model, ', model_path)
    state = torch.load(model_path)
    model.load_state_dict(state['best_model_state'])

    # 改变embedding_fix
    if config.is_true_test:
        model.embedding.sd_embedding.embedding_fix = nn.Embedding(
            num_embeddings=embedding_np_test.shape[0],
            embedding_dim=embedding_np_test.shape[1],
            padding_idx=0,
            _weight=torch.Tensor(embedding_np_test))
        model.embedding.sd_embedding.embedding_fix.weight.requires_grad = False
        model.embedding.sd_embedding.vocab_size = embedding_np_test.shape[0]
    model = model.cuda()

    best_loss = state['best_loss']
    best_epoch = state['best_epoch']
    best_step = state['best_step']
    best_time = state['best_time']
    use_time = state['time']
    print(
        'best_epoch:%2d, best_step:%5d, best_loss:%.4f, best_time:%d, use_time:%d'
        % (best_epoch, best_step, best_loss, best_time, use_time))

    # gen result
    result_start = []
    result_end = []
    result_start_p = []
    result_end_p = []

    model.eval()
    with torch.no_grad():
        cc = 0
        cc_total = len(test_loader)
        print('total iter_num:%d' % cc_total)
        for batch in test_loader:
            # cuda, cut
            batch = utils.deal_batch(batch)
            outputs = model(batch)
            start, end = utils.answer_search(outputs)

            start = start.reshape(-1).cpu().numpy().tolist()
            end = end.reshape(-1).cpu().numpy().tolist()

            result_start = result_start + start
            result_end = result_end + end

            start_p = outputs[0].cpu().numpy().tolist()
            end_p = outputs[1].cpu().numpy().tolist()

            result_start_p += start_p
            result_end_p += end_p

            cc += 1
            if cc % 100 == 0:
                print('processing: %d/%d' % (cc, cc_total))

    # 需要生成结果
    if gen_result:
        if config.is_true_test:
            df = pd.read_csv(config.test_df)
        else:
            df = pd.read_csv(config.test_val_df)

        # 生成str结果
        titles = df['title']
        shorten_content = df['shorten_content']
        question = df['question']
        assert len(titles) == len(shorten_content) == len(result_start) == len(
            result_end)
        result = utils.gen_str(titles,
                               shorten_content,
                               question,
                               result_start,
                               result_end,
                               add_liangci=config.is_true_test)

        # gen a submission
        if config.is_true_test:
            articled_ids = df['article_id'].astype(str).values.tolist()
            question_ids = df['question_id'].values
            submission = []
            temp_a_id = articled_ids[0]
            temp_qa = []
            for a_id, q_id, a in zip(articled_ids, question_ids, result):
                if a_id == temp_a_id:
                    sub = {'questions_id': q_id, 'answer': a}
                    temp_qa.append(sub)
                else:
                    submission.append({
                        'article_id': temp_a_id,
                        'questions': temp_qa
                    })
                    temp_a_id = a_id
                    temp_qa = [{'questions_id': q_id, 'answer': a}]
            submission.append({'article_id': temp_a_id, 'questions': temp_qa})

            submission_article = [s['article_id'] for s in submission]
            submission_questions = [s['questions'] for s in submission]
            submission_dict = dict(
                zip(submission_article, submission_questions))

            with open(config.test_data, 'r') as file:
                all_data = json.load(file)
            all_article = [d['article_id'] for d in all_data]

            submission = []
            for a_id in all_article:
                if a_id in submission_dict:
                    submission.append({
                        'article_id': a_id,
                        'questions': submission_dict[a_id]
                    })
                else:
                    submission.append({'article_id': a_id, 'questions': []})

            with open(config.submission, mode='w', encoding='utf-8') as f:
                json.dump(submission, f, ensure_ascii=False)

        # my_metrics
        if config.is_true_test is False:
            answer_true = df['answer'].values
            assert len(result) == len(answer_true)
            blue_score = blue.Bleu()
            rouge_score = rouge_test.RougeL()
            for a, r in zip(answer_true, result):
                if a == a:
                    blue_score.add_inst(r, a)
                    rouge_score.add_inst(r, a)
            print('rouge_L score: %.4f, blue score:%.4f' %
                  (rouge_score.get_score(), blue_score.get_score()))

        # to .csv
        if config.is_true_test is False:
            df['answer_pred'] = result
            df['answer_start_pred'] = result_start
            df['answer_end_pred'] = result_end
            csv_path = os.path.join('result', config.model_test + '_val.csv')
            df.to_csv(csv_path, index=False)

    # save result_ans_range
    if config.is_true_test:
        save_path = os.path.join('result/ans_range',
                                 config.model_test + '_submission.pkl')
    else:
        save_path = os.path.join('result/ans_range',
                                 config.model_test + '_val.pkl')

    result_ans_range = {'start_p': result_start_p, 'end_p': result_end_p}
    torch.save(result_ans_range, save_path)
    print('time:%d' % (time.time() - time0))
예제 #3
0
def train():
    time_start = time.time()

    # prepare
    preprocess_data.gen_pre_file_for_train()

    # load w2v
    embedding_np = loader.load_w2v(config.train_embedding + '.npy')

    # prepare: train_df
    preprocess_data.gen_train_val_datafile()

    # load data
    print('load data...')
    time0 = time.time()
    # load train data
    if os.path.isfile(config.train_pkl):
        with open(config.train_pkl, 'rb') as file:
            train_data = pickle.load(file)
    else:
        train_data = build_dataset.CustomDataset(
            df_file=config.train_df,
            vocab_path=config.train_vocab_path,
            tag_path=config.tag_path)
        with open(config.train_pkl, 'wb') as file:
            pickle.dump(train_data, file)

    # load val data
    if os.path.isfile(config.val_pkl):
        with open(config.val_pkl, 'rb') as file:
            val_data = pickle.load(file)
    else:
        val_data = build_dataset.CustomDataset(
            df_file=config.val_df,
            vocab_path=config.train_vocab_path,
            tag_path=config.tag_path)
        with open(config.val_pkl, 'wb') as file:
            pickle.dump(val_data, file)
    print('train data size:%d, val data size:%d, time:%d' %
          (train_data.__len__(), val_data.__len__(), time.time() - time0))

    # build train, val dataloader
    train_loader = loader.build_loader(dataset=train_data,
                                       batch_size=config.batch_size,
                                       shuffle=True,
                                       drop_last=True)
    val_loader = loader.build_loader(dataset=val_data,
                                     batch_size=config.batch_size,
                                     shuffle=False,
                                     drop_last=True)

    # model:
    param = {
        'embedding': embedding_np,
        'mode': config.mode,
        'hidden_size': config.hidden_size,
        'dropout_p': config.dropout_p,
        'encoder_dropout_p': config.encoder_dropout_p,
        'encoder_bidirectional': config.encoder_bidirectional,
        'encoder_layer_num': config.encoder_layer_num,
        'is_bn': config.is_bn,
        'k': config.k,
        'num_align_hops': config.num_align_hops
    }
    model = eval(config.model_name).Model(param)
    # 改变embedding_fix
    model.embedding.sd_embedding.embedding_fix = nn.Embedding(
        num_embeddings=embedding_np.shape[0],
        embedding_dim=embedding_np.shape[1],
        padding_idx=0,
        _weight=torch.Tensor(embedding_np))
    model.embedding.sd_embedding.embedding_fix.weight.requires_grad = False
    model = model.cuda()

    # loss
    criterion = loss.LossJoin()

    # optimizer
    optimizer_param = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.Adam(optimizer_param,
                           lr=config.lr,
                           weight_decay=config.weight_decay)

    # load model param, optimizer param, train param
    model_path = os.path.join('model', config.model_save)
    if os.path.isfile(model_path):
        print('load training param, ', model_path)
        state = torch.load(model_path)
        model.load_state_dict(state['cur_model_state'])
        optimizer.load_state_dict(state['cur_opt_state'])
        epoch_list = range(state['cur_epoch'] + 1,
                           state['cur_epoch'] + 1 + config.epoch)
        train_loss_list = state['train_loss']
        val_loss_list = state['val_loss']
        val_accuracy = state['val_accuracy']
        steps = state['steps']
        time_use = state['time']
    else:
        state = None
        epoch_list = range(config.epoch)
        train_loss_list = []
        val_loss_list = []
        val_accuracy = []
        steps = []
        time_use = 0

    # train
    model_param_num = 0
    for param in model.parameters():
        if param.requires_grad is True:
            model_param_num += param.nelement()

    print('starting training: %s' % config.model_name)
    if state is None:
        print('start_epoch:0, end_epoch:%d, num_params:%d' %
              (config.epoch - 1, model_param_num))
    else:
        print('start_epoch:%d, end_epoch:%d, num_params:%d' %
              (state['cur_epoch'] + 1, state['cur_epoch'] + config.epoch,
               model_param_num))

    plt.ion()
    train_loss = 0
    train_c = 0
    flag = False
    cc = 0
    grade_1 = False
    grade_2 = False
    grade_num1 = train_data.__len__() // config.batch_size
    grade_num2 = grade_num1 // 20

    for e in epoch_list:
        for i, batch in enumerate(train_loader):
            # cuda
            batch = utils.deal_batch(batch)
            model.train()
            optimizer.zero_grad()
            outputs = model(batch)
            loss_value = criterion(outputs, batch[-1].view(-1))
            loss_value.backward()

            nn.utils.clip_grad_norm_(model.parameters(), config.max_grad)
            optimizer.step()

            train_loss += loss_value.item()
            train_c += 1

            if config.val_mean:
                flag = (train_c % config.val_every == 0)
            else:
                if (train_c % (config.val_every // 2) == 0) and (cc <= 0):
                    cc += 1
                    flag = True
                elif grade_1 and (train_c % grade_num1 == 0):
                    flag = True
                elif grade_2 and (train_c % grade_num2 == 0):
                    flag = True

            if flag:
                flag = False
                val_loss = 0
                val_c = 0
                correct_num = 0
                sum_num = 0
                with torch.no_grad():
                    model.eval()
                    for val_batch in val_loader:
                        # cut, cuda
                        val_batch = utils.deal_batch(val_batch)
                        outputs = model(val_batch)

                        loss_value = criterion(outputs, val_batch[-1].view(-1))
                        _, k = torch.max(outputs, dim=1)

                        k = k.view(-1)
                        correct_num += torch.sum(
                            k == val_batch[-1].view(-1)).item()
                        sum_num += val_batch[-1].size(0)

                        val_loss += loss_value.item()
                        val_c += 1

                train_loss_list.append(train_loss / train_c)
                val_loss_list.append(val_loss / val_c)
                steps.append(train_c)
                val_accuracy.append(correct_num * 1.0 / sum_num)

                print(
                    'training, epochs:%2d, steps:%5d, train_loss:%.4f, val_loss:%.4f, val_accuracy:%.4f, time:%4ds'
                    % (e, sum(steps), train_loss / train_c,
                       val_loss / val_c, correct_num * 1.0 / sum_num,
                       time.time() - time_start + time_use))

                if val_loss / val_c > 0.65:
                    grade_1 = True
                    grade_2 = False
                else:
                    grade_1 = False
                    grade_2 = True

                train_loss = 0
                train_c = 0

                # draw
                plt.cla()
                x = np.cumsum(steps)
                plt.plot(x, train_loss_list, color='r', label='train')
                plt.plot(x, val_loss_list, color='b', label='val')
                # plt.plot(
                #     x,
                #     val_accuracy,
                #     color='black',
                #     label='accuracy'
                # )

                plt.xlabel('steps')
                plt.ylabel('loss/accuracy')
                plt.legend()
                plt.pause(0.0000001)

                fig_path = os.path.join('model', config.model_save + '.png')
                plt.savefig(fig_path)
                plt.show()

                # save model
                if os.path.isfile(model_path):
                    state = torch.load(model_path)
                else:
                    state = {}

                if state == {} or (val_loss / val_c <= 0.61
                                   and state['best_val_accuracy'] <=
                                   correct_num * 1.0 / sum_num):
                    state['best_model_state'] = model.state_dict()
                    state['best_opt_state'] = optimizer.state_dict()
                    state['best_loss'] = val_loss / val_c
                    state['best_val_accuracy'] = correct_num * 1.0 / sum_num
                    state['best_epoch'] = e
                    state['best_step'] = sum(steps)
                    state['best_time'] = time_use + time.time() - time_start

                state['cur_model_state'] = model.state_dict()
                state['cur_opt_state'] = optimizer.state_dict()
                state['cur_epoch'] = e
                state['train_loss'] = train_loss_list
                state['val_loss'] = val_loss_list
                state['val_accuracy'] = val_accuracy
                state['steps'] = steps
                state['time'] = time_use + time.time() - time_start

                torch.save(state, model_path)
예제 #4
0
파일: run.py 프로젝트: joelmfonseca/COM-503
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

from loader import build_loader
from model import LSTM
from utils import snapshot, plot_prediction

# parameters
test_ratio = 0.13  # to get exactly one test sample based on how we built test samples
batch_size = 10

learning_rate = 0.001
look_back = 168
look_ahead = 574

train_loader, test_loader, scaler = build_loader(test_ratio, look_back,
                                                 look_ahead, batch_size)
model = LSTM(batch_size, learning_rate)

resume_training = True
if resume_training:
    # load previous model
    checkpoint = torch.load('saved_models/lstm_adam_b10_lb168_model')
    model.load_state_dict(checkpoint['model_state_dict'])
    model.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    loss = checkpoint['loss']
else:
    epoch = 0
    loss = np.inf

train = False
    def __init__(self, cfg, logdir, run_id):
        # Copy shared config fields
        if "monodepth_options" in cfg:
            cfg["data"].update(cfg["monodepth_options"])
            cfg["model"].update(cfg["monodepth_options"])
            cfg["training"]["monodepth_loss"].update(cfg["monodepth_options"])
            cfg['model']['depth_args']['max_scale_size'] = (
                cfg["monodepth_options"]["crop_h"],
                cfg["monodepth_options"]["crop_w"])

        # Setup seeds
        setup_seeds(cfg.get("seed", 1337))
        if cfg["data"]["dataset_seed"] == "same":
            cfg["data"]["dataset_seed"] = cfg["seed"]

        # Setup device
        torch.backends.cudnn.benchmark = cfg["training"].get("benchmark", True)
        self.cfg = cfg
        self.logdir = logdir
        self.run_id = run_id
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")

        # Prepare depth estimates
        do_precalculate_depth = False
        if do_precalculate_depth:
            print("Prepare depth estimates")
            depth_estimator = DepthEstimator(cfg)
            depth_estimator.prepare_depth_estimates()
            del depth_estimator
            torch.cuda.empty_cache()
        else:
            self.cfg["data"]["generated_depth_dir"] = None

        # Setup Dataloader
        self.val_loader = build_loader(self.cfg["data"],
                                       "val",
                                       load_labels=False,
                                       load_sequence=False)
        self.n_classes = self.val_loader.n_classes

        self.val_batch_size = self.cfg["training"]["val_batch_size"]
        self.val_data_loader = data.DataLoader(
            self.val_loader,
            batch_size=self.val_batch_size,
            num_workers=self.cfg["training"]["n_workers"],
            pin_memory=True,
            # If using a dataset with odd number of samples (CamVid), the memory consumption suddenly increases for the
            # last batch. This can be circumvented by dropping the last batch. Only do that if it is necessary for your
            # system as it will result in an incomplete validation set.
            # drop_last=True,
        )

        # Setup Model
        self.model = get_model(cfg["model"], self.n_classes).to(self.device)
        # print(self.model)

        self.monodepth_loss_calculator_val = get_monodepth_loss(
            self.cfg, is_train=False, batch_size=self.val_batch_size)

        if self.cfg["training"]["resume"] is not None:
            self.load_resume(strict=False)
예제 #6
0
def train():
    time_start = time.time()

    # prepare
    preprocess_data.gen_pre_file_for_train()

    # load w2v
    embedding_np = loader.load_w2v(config.train_embedding + '.npy')

    # prepare: train_df, val_df
    preprocess_data.gen_train_datafile()

    # load data
    print('load data...')
    time0 = time.time()
    # load train data
    if os.path.isfile(config.train_pkl):
        with open(config.train_pkl, 'rb') as file:
            train_data = pickle.load(file)
    else:
        train_data = loader.load_data(config.train_df, config.train_vocab_path, config.tag_path)
        with open(config.train_pkl, 'wb') as file:
            pickle.dump(train_data, file)

    # load val data
    if os.path.isfile(config.val_pkl):
        with open(config.val_pkl, 'rb') as file:
            val_data = pickle.load(file)
    else:
        val_data = loader.load_data(config.val_df, config.train_vocab_path, config.tag_path)
        with open(config.val_pkl, 'wb') as file:
            pickle.dump(val_data, file)

    print('load data finished, time:%d' % (time.time()-time0))

    # build train, val dataloader
    train_loader = loader.build_loader(
        dataset=train_data,
        batch_size=config.batch_size,
        shuffle=True,
        drop_last=True
    )
    val_loader = loader.build_loader(
        dataset=val_data,
        batch_size=config.batch_size,
        shuffle=False,
        drop_last=True
    )

    # model:
    param = {
        'embedding': embedding_np,
        'mode': config.mode,
        'hidden_size': config.hidden_size,
        'dropout_p': config.dropout_p,
        'encoder_dropout_p': config.encoder_dropout_p,
        'encoder_bidirectional': config.encoder_bidirectional,
        'encoder_layer_num': config.encoder_layer_num,
        'is_bn': config.is_bn
    }
    model = eval(config.model_name).Model(param)
    # 改变embedding_fix
    model.embedding.sd_embedding.embedding_fix.weight.requires_grad = False
    model = model.cuda()

    # loss
    if config.criterion == 'RougeLoss':
        criterion = eval(config.criterion)(lam=config.lamda)
    else:
        criterion = eval(config.criterion)()

    # optimizer
    optimizer_param = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.Adam(optimizer_param, lr=config.lr)

    # load model param, optimizer param, train param
    if config.is_for_rouge:
        model_path = os.path.join('model', config.model_save+'_mrt')
        if os.path.isfile(model_path):
            print('load training param, ', model_path)
            state = torch.load(model_path)
            model.load_state_dict(state['cur_model_state'])
            optimizer.load_state_dict(state['cur_opt_state'])
            train_loss_list = state['train_loss']
            val_loss_list = state['val_loss']
            steps = state['steps']
            time_use = state['time']

        else:
            model_path = os.path.join('model', config.model_save)
            assert os.path.isfile(model_path)
            state = torch.load(model_path)
            model.load_state_dict(state['best_model_state'])
            optimizer.load_state_dict(state['best_opt_state'])
            train_loss_list = []
            val_loss_list = []
            steps = []
            time_use = 0
        epoch_list = range(3)

    else:
        model_path = os.path.join('model', config.model_save)
        if os.path.isfile(model_path):
            print('load training param, ', model_path)
            state = torch.load(model_path)
            model.load_state_dict(state['cur_model_state'])
            optimizer.load_state_dict(state['cur_opt_state'])
            epoch_list = range(state['cur_epoch']+1, state['cur_epoch']+1+config.epoch)
            train_loss_list = state['train_loss']
            val_loss_list = state['val_loss']
            steps = state['steps']
            time_use = state['time']
        else:
            state = None
            epoch_list = range(config.epoch)
            train_loss_list = []
            val_loss_list = []
            steps = []
            time_use = 0

    # train
    model_param_num = 0
    for param in model.parameters():
        model_param_num += param.nelement()
    print('starting training: %s' % config.model_name)
    if state is None:
        print('start_epoch:0, end_epoch:%d, num_params:%d, num_params_except_embedding:%d' %
              (config.epoch-1, model_param_num, model_param_num-embedding_np.shape[0]*embedding_np.shape[1]))
    else:
        print('start_epoch:%d, end_epoch:%d, num_params:%d, num_params_except_embedding:%d' %
              (state['cur_epoch']+1, state['cur_epoch']+config.epoch, model_param_num,
               model_param_num-embedding_np.shape[0]*embedding_np.shape[1]))

    plt.ion()
    train_loss = 0
    train_c = 0
    flag = False
    cc = 0
    grade_1 = False
    grade_2 = False

    for e in epoch_list:
        for i, batch in enumerate(train_loader):
            # cut, cuda
            batch = utils.deal_batch(batch)

            model.train()
            optimizer.zero_grad()
            outputs = model(batch)
            loss_value = criterion(outputs, batch)
            loss_value.backward()

            nn.utils.clip_grad_norm_(model.parameters(), config.max_grad)
            optimizer.step()

            train_loss += loss_value.item()
            train_c += 1

            if config.val_mean:
                flag = (train_c % config.val_every == 0)
            else:
                if (train_c % (config.val_every//2) == 0) and (cc <= 1):
                    cc += 1
                    flag = True
                elif grade_1 and (train_c % (config.val_every*5) == 0):
                    flag = True
                elif grade_2 and (train_c % config.val_every == 0):
                    flag = True

            if flag:
                flag = False
                val_loss = 0
                val_c = 0
                with torch.no_grad():
                    model.eval()
                    for val_batch in val_loader:
                        # cut, cuda
                        val_batch = utils.deal_batch(val_batch)
                        outputs = model(val_batch)
                        loss_value = criterion(outputs, val_batch)

                        val_loss += loss_value.item()
                        val_c += 1

                train_loss_list.append(train_loss/train_c)
                val_loss_list.append(val_loss/val_c)
                steps.append(train_c)

                print('training, epochs:%2d, steps:%5d, train_loss:%.4f, val_loss:%.4f, time:%4ds' %
                      (e, sum(steps), train_loss/train_c, val_loss/val_c, time.time()-time_start+time_use))

                if val_loss/val_c >= 0.97:
                    grade_1 = True
                    grade_2 = False
                elif val_loss/val_c < 0.97:
                    grade_1 = False
                    grade_2 = True

                train_loss = 0
                train_c = 0

                # draw
                plt.cla()
                x = np.cumsum(steps)
                plt.plot(
                    x,
                    train_loss_list,
                    color='r',
                    label='train'
                )
                plt.plot(
                    x,
                    val_loss_list,
                    color='b',
                    label='val'
                )
                plt.xlabel('steps')
                plt.ylabel('loss')
                plt.legend()
                plt.pause(0.0000001)

                if config.is_for_rouge:
                    fig_path = os.path.join('model', config.model_save+'_mrt.png')
                else:
                    fig_path = os.path.join('model', config.model_save+'.png')

                plt.savefig(fig_path)
                plt.show()

                # save model
                if config.is_for_rouge:
                    model_path = os.path.join('model', config.model_save+'_mrt')

                if os.path.isfile(model_path):
                    state = torch.load(model_path)
                else:
                    state = {}

                if state == {} or state['best_loss'] > (val_loss/val_c):
                    state['best_model_state'] = model.state_dict()
                    state['best_opt_state'] = optimizer.state_dict()
                    state['best_loss'] = val_loss/val_c
                    state['best_epoch'] = e
                    state['best_step'] = sum(steps)
                    state['best_time'] = time_use + time.time() - time_start

                state['cur_model_state'] = model.state_dict()
                state['cur_opt_state'] = optimizer.state_dict()
                state['cur_epoch'] = e
                state['train_loss'] = train_loss_list
                state['val_loss'] = val_loss_list
                state['steps'] = steps
                state['time'] = time_use + time.time() - time_start

                torch.save(state, model_path)
예제 #7
0
    def __init__(self, cfg, writer, img_writer, logger, run_id):
        # Copy shared config fields
        if "monodepth_options" in cfg:
            cfg["data"].update(cfg["monodepth_options"])
            cfg["model"].update(cfg["monodepth_options"])
            cfg["training"]["monodepth_loss"].update(cfg["monodepth_options"])
        if "generated_depth_dir" in cfg["data"]:
            dataset_name = f"{cfg['data']['dataset']}_" \
                           f"{cfg['data']['width']}x{cfg['data']['height']}"
            depth_teacher = cfg["data"].get("depth_teacher", None)
            assert not (depth_teacher and cfg['model'].get('detph_estimator_weights') is not None)
            if depth_teacher is not None:
                cfg["data"]["generated_depth_dir"] += dataset_name + "/" + depth_teacher + "/"
            else:
                cfg["data"]["generated_depth_dir"] += dataset_name + "/" + cfg['model']['depth_estimator_weights'] + "/"

        # Setup seeds
        setup_seeds(cfg.get("seed", 1337))
        if cfg["data"]["dataset_seed"] == "same":
            cfg["data"]["dataset_seed"] = cfg["seed"]

        # Setup device
        torch.backends.cudnn.benchmark = cfg["training"].get("benchmark", True)
        self.cfg = cfg
        self.writer = writer
        self.img_writer = img_writer
        self.logger = logger
        self.run_id = run_id
        self.mIoU = 0
        self.fwAcc = 0
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        self.setup_segmentation_unlabeled()

        self.unlabeled_require_depth = (self.cfg["training"]["unlabeled_segmentation"] is not None and
                                        (self.cfg["training"]["unlabeled_segmentation"]["mix_mask"] == "depth" or
                                         self.cfg["training"]["unlabeled_segmentation"]["mix_mask"] == "depthcomp" or
                                         self.cfg["training"]["unlabeled_segmentation"]["mix_mask"] == "depthhist"))

        # Prepare depth estimates
        do_precalculate_depth = self.cfg["training"]["segmentation_lambda"] != 0 and self.unlabeled_require_depth and \
                                self.cfg['model']['segmentation_name'] != 'mtl_pad'
        use_depth_teacher = cfg["data"].get("depth_teacher", None) is not None
        if do_precalculate_depth or use_depth_teacher:
            assert not (do_precalculate_depth and use_depth_teacher)
            if not self.cfg["training"].get("disable_depth_estimator", False):
                print("Prepare depth estimates")
                depth_estimator = DepthEstimator(cfg)
                depth_estimator.prepare_depth_estimates()
                del depth_estimator
                torch.cuda.empty_cache()
        else:
            self.cfg["data"]["generated_depth_dir"] = None

        # Setup Dataloader
        load_labels, load_sequence = True, True
        if self.cfg["training"]["monodepth_lambda"] == 0:
            load_sequence = False
        if self.cfg["training"]["segmentation_lambda"] == 0:
            load_labels = False
        train_data_cfg = deepcopy(self.cfg["data"])
        if not do_precalculate_depth and not use_depth_teacher:
            train_data_cfg["generated_depth_dir"] = None
        self.train_loader = build_loader(train_data_cfg, "train", load_labels=load_labels, load_sequence=load_sequence)
        if self.cfg["training"].get("minimize_entropy_unlabeled", False) or self.enable_unlabled_segmentation:
            unlabeled_segmentation_cfg = deepcopy(self.cfg["data"])
            if not self.only_unlabeled and self.mix_use_gt:
                unlabeled_segmentation_cfg["load_onehot"] = True
            if self.only_unlabeled:
                unlabeled_segmentation_cfg.update({"load_unlabeled": True, "load_labeled": False})
            elif self.only_labeled:
                unlabeled_segmentation_cfg.update({"load_unlabeled": False, "load_labeled": True})
            else:
                unlabeled_segmentation_cfg.update({"load_unlabeled": True, "load_labeled": True})
            if self.mix_video:
                assert not self.mix_use_gt and not self.only_labeled and not self.only_unlabeled, \
                    "Video sample indices are not compatible with non-video indices."
                unlabeled_segmentation_cfg.update({"only_sequences_with_segmentation": not self.mix_video,
                                                   "restrict_to_subset": None})
            self.unlabeled_loader = build_loader(unlabeled_segmentation_cfg, "train",
                                                 load_labels=load_labels if not self.mix_video else False,
                                                 load_sequence=load_sequence)
        else:
            self.unlabeled_loader = None
        self.val_loader = build_loader(self.cfg["data"], "val", load_labels=load_labels,
                                       load_sequence=load_sequence)
        self.n_classes = self.train_loader.n_classes

        # monodepth dataloader settings uses drop_last=True and shuffle=True even for val
        self.train_data_loader = data.DataLoader(
            self.train_loader,
            batch_size=self.cfg["training"]["batch_size"],
            num_workers=self.cfg["training"]["n_workers"],
            shuffle=self.cfg["data"]["shuffle_trainset"],
            pin_memory=True,
            # Setting to false will cause crash at the end of epoch
            drop_last=True,
        )
        if self.unlabeled_loader is not None:
            self.unlabeled_data_loader = infinite_iterator(data.DataLoader(
                self.unlabeled_loader,
                batch_size=self.cfg["training"]["batch_size"],
                num_workers=self.cfg["training"]["n_workers"],
                shuffle=self.cfg["data"]["shuffle_trainset"],
                pin_memory=True,
                # Setting to false will cause crash at the end of epoch
                drop_last=True,
            ))

        self.val_batch_size = self.cfg["training"]["val_batch_size"]
        self.val_data_loader = data.DataLoader(
            self.val_loader,
            batch_size=self.val_batch_size,
            num_workers=self.cfg["training"]["n_workers"],
            pin_memory=True,
            # If using a dataset with odd number of samples (CamVid), the memory consumption suddenly increases for the
            # last batch. This can be circumvented by dropping the last batch. Only do that if it is necessary for your
            # system as it will result in an incomplete validation set.
            # drop_last=True,
        )

        # Setup Model
        self.model = get_model(cfg["model"], self.n_classes).to(self.device)
        # print(self.model)
        assert not (self.enable_unlabled_segmentation and self.cfg["training"]["save_monodepth_ema"])
        if self.enable_unlabled_segmentation and not self.only_labeled:
            print("Create segmentation ema model.")
            self.ema_model = self.create_ema_model(self.model).to(self.device)
        elif self.cfg["training"]["save_monodepth_ema"]:
            print("Create depth ema model.")
            # TODO: Try to remove unnecessary components and fit into gpu for better performance
            self.ema_model = self.create_ema_model(self.model)  # .to(self.device)
        else:
            self.ema_model = None

        # Setup optimizer, lr_scheduler and loss function
        optimizer_cls = get_optimizer(cfg)
        optimizer_params = {k: v for k, v in cfg["training"]["optimizer"].items() if
                            k not in ["name", "backbone_lr", "pose_lr", "depth_lr", "segmentation_lr"]}
        train_params = get_train_params(self.model, self.cfg)
        self.optimizer = optimizer_cls(train_params, **optimizer_params)

        self.scheduler = get_scheduler(self.optimizer, self.cfg["training"]["lr_schedule"])

        # Creates a GradScaler once at the beginning of training.
        self.scaler = GradScaler(enabled=self.cfg["training"]["amp"])

        self.loss_fn = get_segmentation_loss_function(self.cfg)
        self.monodepth_loss_calculator_train = get_monodepth_loss(self.cfg, is_train=True)
        self.monodepth_loss_calculator_val = get_monodepth_loss(self.cfg, is_train=False, batch_size=self.val_batch_size)

        if cfg["training"]["early_stopping"] is None:
            logger.info("Using No Early Stopping")
            self.earlyStopping = None
        else:
            self.earlyStopping = EarlyStopping(
                patience=round(cfg["training"]["early_stopping"]["patience"] / cfg["training"]["val_interval"]),
                min_delta=cfg["training"]["early_stopping"]["min_delta"],
                cumulative_delta=cfg["training"]["early_stopping"]["cum_delta"],
                logger=logger
            )
예제 #8
0
def test(config):
    time0 = time.time()

    # prepare
    preprocess_data.gen_pre_file_for_test()

    # load w2v
    embedding_np_train = loader.load_w2v(config.train_embedding + '.npy')
    embedding_np_test = loader.load_w2v(config.test_embedding + '.npy')

    # prepare: test_df
    if config.is_true_test:
        preprocess_data.gen_test_datafile()

    # load data
    if config.is_true_test is False:
        if os.path.isfile(config.val_true_pkl):
            with open(config.val_true_pkl, 'rb') as file:
                test_data = pickle.load(file)
        else:
            test_data = build_dataset.CustomDataset(
                df_file=config.val_df,
                vocab_path=config.train_vocab_path,
                tag_path=config.tag_path,
                is_test=True)
            with open(config.val_true_pkl, 'wb') as file:
                pickle.dump(test_data, file)

    else:
        if os.path.isfile(config.test_pkl):
            with open(config.test_pkl, 'rb') as file:
                test_data = pickle.load(file)
        else:
            test_data = build_dataset.CustomDataset(
                df_file=config.test_df,
                vocab_path=config.test_vocab_path,
                tag_path=config.tag_path,
                is_test=True)
            with open(config.test_pkl, 'wb') as file:
                pickle.dump(test_data, file)

    # build test dataloader
    test_loader = loader.build_loader(dataset=test_data,
                                      batch_size=config.test_batch_size,
                                      shuffle=False,
                                      drop_last=False)

    # model initial
    param = {
        'embedding': embedding_np_train,
        'mode': config.mode,
        'hidden_size': config.hidden_size,
        'dropout_p': config.dropout_p,
        'encoder_dropout_p': config.encoder_dropout_p,
        'encoder_bidirectional': config.encoder_bidirectional,
        'encoder_layer_num': config.encoder_layer_num,
        'is_bn': config.is_bn,
        'k': config.k,
        'num_align_hops': config.num_align_hops
    }

    model = eval(config.model_name).Model(param)

    # load model param, and training state
    model_path = os.path.join('model', config.model_test)
    print('load model, ', model_path)
    state = torch.load(model_path)
    model.load_state_dict(state['best_model_state'])

    # 改变embedding_fix
    if config.is_true_test:
        model.embedding.sd_embedding.embedding_fix = nn.Embedding(
            num_embeddings=embedding_np_test.shape[0],
            embedding_dim=embedding_np_test.shape[1],
            padding_idx=0,
            _weight=torch.Tensor(embedding_np_test))
        model.embedding.sd_embedding.embedding_fix.weight.requires_grad = False
        model.embedding.sd_embedding.vocab_size = embedding_np_test.shape[0]
    model = model.cuda()

    best_loss = state['best_loss']
    best_val_accuracy = state['best_val_accuracy']
    best_epoch = state['best_epoch']
    best_step = state['best_step']
    best_time = state['best_time']
    use_time = state['time']
    print(
        'best_epoch:%2d, best_step:%5d, best_loss:%.4f, val_accuracy:%.4f, best_time:%d, use_time:%d'
        % (best_epoch, best_step, best_loss, best_val_accuracy, best_time,
           use_time))

    # gen result
    result = []
    result_range = []

    model.eval()
    with torch.no_grad():
        cc = 0
        cc_total = len(test_loader)
        print('total iter_num:%d' % cc_total)
        for batch in test_loader:
            # cuda, cut
            batch = utils.deal_batch(batch)
            outputs = model(batch)  # (batch_size, 3)
            _, k = torch.max(outputs, dim=1)
            k = k.cpu().numpy().tolist()
            result = result + k

            outputs = outputs.cpu().numpy().tolist()
            result_range = result_range + outputs

            cc += 1
            if cc % 100 == 0:
                print('processing: %d/%d' % (cc, cc_total))

    if config.is_true_test:
        df = pd.read_csv(config.test_df, encoding='utf-8')
    else:
        df = pd.read_csv(config.val_df, encoding='utf-8')

    # 生成结果
    a_items = df['a_item'].values
    b_items = df['b_item'].values
    c_items = df['c_item'].values
    alts = df['alternatives'].values
    tmp = []
    for r, a, b, c, alt in zip(result, a_items, b_items, c_items, alts):
        alt_list = alt.split('|')
        if r == 0:
            if a == alt_list[0].strip():
                tmp.append(alt_list[0])
            elif a == alt_list[1].strip():
                tmp.append(alt_list[1])
            elif a == alt_list[2].strip():
                tmp.append(alt_list[2])
            else:
                tmp.append('xxx')
                print('r==0, meet wrong data')
        elif r == 1:
            if b == alt_list[0].strip():
                tmp.append(alt_list[0])
            elif b == alt_list[1].strip():
                tmp.append(alt_list[1])
            elif b == alt_list[2].strip():
                tmp.append(alt_list[2])
            else:
                tmp.append('xxx')
                print('r==1, meet wrong data')
        else:
            if c == alt_list[0].strip():
                tmp.append(alt_list[0])
            elif c == alt_list[1].strip():
                tmp.append(alt_list[1])
            elif c == alt_list[2].strip():
                tmp.append(alt_list[2])
            else:
                tmp.append('xxx')
                print('r==2, meet wrong data')

    # gen a submission
    if config.is_true_test:
        query_ids = df['query_id']
        with open(config.submission, 'w') as file:
            for i, r in zip(query_ids, tmp):
                file.writelines(str(i) + '\t' + r + '\n')

    # my_metrics
    if config.is_true_test is False:
        answers = df['answer']
        flag = []
        for a, r in zip(answers, tmp):
            if a == r:
                flag.append(True)
            else:
                flag.append(False)
        print('accuracy:%.4f' % (sum(flag) / len(answers)))

    # to .csv
    if config.is_true_test is False:
        df['answer_pred'] = tmp
        df = df[[
            'query_id', 'query', 'passage', 'alternatives', 'answer',
            'answer_pred'
        ]]
        csv_path = os.path.join('result', config.model_test + '_val.csv')
        df.to_csv(csv_path, index=False)

    # save result_ans_range
    if config.is_true_test:
        save_path = os.path.join('result/ans_range',
                                 config.model_test + '_submission.pkl')
    else:
        save_path = os.path.join('result/ans_range',
                                 config.model_test + '_val.pkl')
    torch.save(result_range, save_path)
    print('time:%d' % (time.time() - time0))