Esempio n. 1
0
def infer(model, rank=0):
    model = model.cuda()
    model = DataParallel(model)
    model.load_state_dict(torch.load(model_state_dict))
    model.eval()
    if rank == 0:
        print('preparing dataset...')
    data_iterator = DataIterator(coco_dir,
                                 resize=resize,
                                 max_size=max_size,
                                 batch_size=batch_size,
                                 stride=stride,
                                 training=training,
                                 dist=dist)
    if rank == 0:
        print('finish loading dataset!')

    results = []
    with torch.no_grad():
        for i, (data, ids, ratios) in enumerate(data_iterator, start=1):
            scores, boxes, classes = model(data)
            results.append([scores, boxes, classes, ids, ratios])
            if rank == 0:
                size = len(data_iterator.ids)
                msg = '[{:{len}}/{}]'.format(min(i * batch_size, size),
                                             size,
                                             len=len(str(size)))
                print(msg, flush=True)

    results = [torch.cat(r, dim=0) for r in zip(*results)]
    results = [r.cpu() for r in results]
Esempio n. 2
0
def make_data():
    base_dirs = [setting["parsed_data_path"]["test"],
                 setting["parsed_data_path"]["dev"],
                 setting["parsed_data_path"]["unlabeled"]]
    print("base_dirs are", base_dirs)
    corpus = ParsedCorpus(base_dirs)

    vocab = HeadWordVocabulary()
    if os.path.exists("./voc.txt"):
        vocab.load()
    else:
        vocab.make_vocabulary(corpus, "headWord")
        vocab.save()
    print("vocab length is", len(vocab.stoi))

    entity_vocab = HeadWordVocabulary()
    if os.path.exists("./evoc.txt"):
        entity_vocab.load("./evoc.txt")
    else:
        entity_vocab.make_vocabulary(corpus, "entityType")
        entity_vocab.save("./evoc.txt")
    print("entity label vocab length is", len(entity_vocab.stoi))

    data_iterator = DataIterator(corpus, vocab, entity_vocab)
    return data_iterator, vocab, entity_vocab
Esempio n. 3
0
File: main.py Progetto: jdddog/GTS
def train(args):

    # load dataset
    train_sentence_packs = json.load(open(args.prefix + args.dataset + '/train.json'))
    random.shuffle(train_sentence_packs)
    dev_sentence_packs = json.load(open(args.prefix + args.dataset + '/dev.json'))
    instances_train = load_data_instances(train_sentence_packs, args)
    instances_dev = load_data_instances(dev_sentence_packs, args)
    random.shuffle(instances_train)
    trainset = DataIterator(instances_train, args)
    devset = DataIterator(instances_dev, args)

    if not os.path.exists(args.model_dir):
        os.makedirs(args.model_dir)
    model = MultiInferBert(args).to(args.device)

    optimizer = torch.optim.Adam([
        {'params': model.bert.parameters(), 'lr': 5e-5},
        {'params': model.cls_linear.parameters()}
    ], lr=5e-5)

    best_joint_f1 = 0
    best_joint_epoch = 0
    for i in range(args.epochs):
        print('Epoch:{}'.format(i))
        for j in trange(trainset.batch_count):
            _, tokens, lengths, masks, _, _, aspect_tags, tags = trainset.get_batch(j)
            preds = model(tokens, masks)

            preds_flatten = preds.reshape([-1, preds.shape[3]])
            tags_flatten = tags.reshape([-1])
            loss = F.cross_entropy(preds_flatten, tags_flatten, ignore_index=-1)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        joint_precision, joint_recall, joint_f1 = eval(model, devset, args)

        if joint_f1 > best_joint_f1:
            model_path = args.model_dir + 'bert' + args.task + '.pt'
            torch.save(model, model_path)
            best_joint_f1 = joint_f1
            best_joint_epoch = i
    print('best epoch: {}\tbest dev {} f1: {:.5f}\n\n'.format(best_joint_epoch, args.task, best_joint_f1))
Esempio n. 4
0
def infer(img_path, batch_size=64, image_height=60, image_width=180, image_channel=1, checkpoint_dir="../checkpoint/"):
    # 读取图片的名称
    file_names = os.listdir(img_path)
    file_names = [t for t in file_names if t.find("label") < 0]
    file_names.sort(key=lambda x: int(x.split('.')[0]))
    file_names = np.asarray([os.path.join(img_path, file_name) for file_name in file_names])

    # 模型
    model = cnn_lstm_otc_ocr.LSTMOCR(num_classes=NumClasses, batch_size=batch_size, is_train=False)
    model.build_graph()

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        # 初始化模型
        sess.run(tf.global_variables_initializer())

        # 加载模型
        ckpt = tf.train.latest_checkpoint(checkpoint_dir)
        if ckpt:
            print('restore from ckpt{}'.format(ckpt))
            tf.train.Saver(tf.global_variables(), max_to_keep=100).restore(sess, ckpt)
        else:
            print('cannot restore')
            raise Exception("cannot restore")

        results = []
        for curr_step in range(len(file_names) // batch_size):

            # 读取图片数据
            images_input = []
            for img in file_names[curr_step * batch_size: (curr_step + 1) * batch_size]:
                image_data = np.asarray(Image.open(img).convert("L"), dtype=np.float32) / 255.
                image_data = np.reshape(image_data, [image_height, image_width, image_channel])
                images_input.append(image_data)
            images_input = np.asarray(images_input)

            # 运行得到结果
            # net_results = sess.run(model.dense_decoded, {model.inputs: images_input})
            net_results = sess.run([model.logits, model.seq_len, model.decoded, model.log_prob, model.dense_decoded], {model.inputs: images_input})

            # 对网络输出进行解码得到结果
            for item in net_results:
                result = DataIterator.get_result(item)
                results.append(result)
                print(result)
                pass

            pass

        # 保存结果
        with open('./result.txt', 'a') as f:
            for code in results:
                f.write(code + '\n')
            pass
        pass

    pass
Esempio n. 5
0
    def _predict(self):
        all_context = DataIterator(self.train_matrix_dense.tolist(),
                                   batch_size=self.batch_size)
        all_rating = []
        for users in all_context:
            r_hat = self.sess.run(self.r_hat,
                                  feed_dict={self.user_context: users})
            all_rating.extend(r_hat)

        return np.array(all_rating)
Esempio n. 6
0
File: main.py Progetto: jdddog/GTS
def test(args):
    print("Evaluation on testset:")
    model_path = args.model_dir + 'bert' + args.task + '.pt'
    model = torch.load(model_path).to(args.device)
    model.eval()

    sentence_packs = json.load(open(args.prefix + args.dataset + '/test.json'))
    instances = load_data_instances(sentence_packs, args)
    testset = DataIterator(instances, args)
    eval(model, testset, args)
Esempio n. 7
0
File: main.py Progetto: jdddog/GTS
def test(args):
    print("Evaluation on testset:")
    model_path = args.model_dir + args.model + args.task + '.pt'
    model = torch.load(model_path).to(args.device)
    model.eval()

    word2index = json.load(open(args.prefix + 'doubleembedding/word_idx.json'))
    sentence_packs = json.load(open(args.prefix + args.dataset + '/test.json'))
    instances = load_data_instances(sentence_packs, word2index, args)
    testset = DataIterator(instances, args)
    eval(model, testset, args)
Esempio n. 8
0
    def get_train_data(self):
        users_list = []
        items_list = []
        for user, items in self.user_pos_train.items():
            users_list.extend([user] * len(items))
            items_list.extend(items)

        dataloader = DataIterator(users_list,
                                  items_list,
                                  batch_size=self.batch_size,
                                  shuffle=True)
        return dataloader
Esempio n. 9
0
    def get_train_data(self):
        users_list, pos_items, neg_items = [], [], []
        train_users = list(self.user_pos_train.keys())
        with ThreadPoolExecutor() as executor:
            data = executor.map(self.get_train_data_one_user, train_users)
        data = list(data)
        for users, pos, neg in data:
            users_list.extend(users)
            pos_items.extend(pos)
            neg_items.extend(neg)

        dataloader = DataIterator(users_list, pos_items, neg_items, batch_size=self.batch_size, shuffle=True)
        return dataloader
Esempio n. 10
0
    def get_train_data(self):
        users_list, items_list, labels_list = [], [], []
        train_users = list(self.user_pos_train.keys())
        with ThreadPoolExecutor() as executor:
            data = executor.map(self.get_train_data_one_user, train_users)
        data = list(data)
        for users, items, labels in data:
            users_list.extend(users)
            items_list.extend(items)
            labels_list.extend(labels)

        dataloader = DataIterator(users_list,
                                  items_list,
                                  labels_list,
                                  batch_size=self.batch_size,
                                  shuffle=True)
        return dataloader
Esempio n. 11
0
    def get_training_data(self):
        users = []
        pos_items = []
        neg_items = []
        for u, pos in self.user_pos_train.items():
            pos_len = len(pos)
            neg = random_choice(self.all_items, size=pos_len, exclusion=pos)

            users.extend([u] * pos_len)
            pos_items.extend(pos.tolist())
            neg_items.extend(neg.tolist())

        return DataIterator(users,
                            pos_items,
                            neg_items,
                            batch_size=self.batch_size,
                            shuffle=True)
Esempio n. 12
0
    def get_training_data(self):
        users_list = []
        pos_items_list = []
        neg_items_list = []
        users = self.user_pos_train.keys()
        with ThreadPoolExecutor() as executor:
            batch_result = executor.map(self._get_neg_items, users)
        for user, pos, neg in batch_result:
            users_list.extend(user)
            pos_items_list.extend(pos)
            neg_items_list.extend(neg)

        return DataIterator(users_list,
                            pos_items_list,
                            neg_items_list,
                            batch_size=self.batch_size,
                            shuffle=True)
Esempio n. 13
0
    def get_train_data(self):
        self._mask = np.zeros([self.users_num, self.items_num])
        self._N_zr = np.zeros([self.users_num, self.items_num])

        for user, pos_items in self.user_pos_train.items():
            pos_items = self.user_pos_train[user]
            self._mask[user][pos_items] = 1
            neg = random_choice(self.all_items,
                                size=int(self.s_pm * self.items_num),
                                replace=False,
                                exclusion=pos_items)
            self._mask[user][neg] = 1

            neg = random_choice(self.all_items,
                                size=int(self.s_zr * self.items_num),
                                replace=False,
                                exclusion=pos_items)
            self._N_zr[user][neg] = 1

        return DataIterator(self.train_matrix_dense.tolist(),
                            self._mask,
                            self._N_zr,
                            batch_size=self.batch_size,
                            shuffle=True)
def infer(args, model, val_iterator=None):
    training = val_iterator is not None
    if not training:
        model = model.cuda()
        val_dataset = BitcoinDataset(args.price_path,
                                     args.tweet_path,
                                     date_from=args.start_date,
                                     date_to=args.end_date)
        val_iterator = DataIterator(val_dataset, args.batch_size, val=True)
    criterion = L1Loss(reduction='none')
    count = len(val_iterator)

    print('Running inference on {} datapoints...'.format(count))
    profiler = Profiler(['infer', 'fw'])
    results, losses, losses_daily = [], [], []
    model.eval()
    for i, (price, tweet, trgt) in enumerate(val_iterator):
        profiler.start('fw')
        with torch.no_grad():
            out = model(price, tweet)
            loss = criterion(out, trgt)
        results.append([out, loss])
        loss_daily = loss.mean(axis=0).view(7, 24).mean(axis=1)
        loss = loss.mean()
        losses_daily.append(loss_daily)
        losses.append(loss)
        profiler.stop('fw')
        profiler.bump('infer')

        if not training and (profiler.totals['infer'] > 60
                             or i == count // args.batch_size):
            avg_loss = torch.stack(losses).mean().item()
            avg_loss_daily = torch.stack(losses_daily).mean(axis=0).tolist()

            print(' | '.join([
                f'[{min((i+1) * args.batch_size, count):{len(str(count))}}/{count}] loss: {avg_loss:.4f}',
                ('loss-daily: [' + ', '.join(['{:.4f}']*7) + ']').format(*avg_loss_daily),
                f'{profiler.means["infer"]:.3f}s/{args.batch_size}-batch' + \
                f'(fw: {profiler.means["fw"]:.3f}s, bw: {profiler.means["bw"]:.3f}s)',
            ]), flush=True)

            profiler.reset()

    results = [torch.cat(r, dim=0).cpu() for r in zip(*results)]
    if not training:
        take = 2
        mean, std = 7.9078, 1.5308
        out = (results[0].numpy() * std) + mean
        out = diags(out.T[:take],
                    offsets=np.arange(take),
                    shape=(out.shape[0], out.shape[0] + take))
        out = np.asarray(out.sum(axis=0)).T[take - 1:-take - 1] / take
        trgt = (val_dataset.price_trgt.numpy()[take // 2:-take // 2, 0] *
                std) + mean
        date_from = pd.Timestamp('2020-09-01') + pd.Timedelta(days=30,
                                                              hours=take // 2)
        date_to = pd.Timestamp('2021-02-01') - pd.Timedelta(
            days=7, hours=take // 2 + 1)
        date_range = pd.date_range(date_from, date_to, freq='H')
        pd.DataFrame(data=np.concatenate([trgt, out], axis=-1),
                     index=date_range,
                     columns=['actual',
                              'forecast']).to_csv(args.output,
                                                  index_label='timestamp')
    loss = results[1].mean().item()
    loss_daily = results[1].mean(axis=0).view(7, 24).mean(axis=1).tolist()
    print(' | '.join([
        f'[Inference] loss: {loss:.4f}',
        ('loss-daily: [' + ', '.join(['{:.4f}'] * 7) + ']').format(*loss_daily)
    ]),
          flush=True)
    return loss, loss_daily
def main():
    #training_args = GlueTraingArgs(do_train=True)
    data_args_task0 = GlueDataArgs(task_name=task0)
    data_args_task1 = GlueDataArgs(task_name=task1)

    if use_gpu:
        print("Training on GPU.")

    # logging
    log_format = '[%(asctime)s] %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%d %I:%M:%S')
    t = time.time()
    local_time = time.localtime(t)
    if not os.path.exists('./log'):
        os.mkdir('./log')
    fh = logging.FileHandler(
        os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000,
                                                  local_time.tm_mon, t)))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    logger.info("Tasks:" + task0 + "," + task1)

    config_task0 = BertConfig.from_pretrained(
        bert_path,
        num_labels=glue_tasks_num_labels[data_args_task0.task_name],
        finetuning_task=data_args_task0.task_name,
        cache_dir=cache_dir)

    config_task1 = BertConfig.from_pretrained(
        bert_path,
        num_labels=glue_tasks_num_labels[data_args_task1.task_name],
        finetuning_task=data_args_task1.task_name,
        cache_dir=cache_dir)

    # Model Prepare, The Bert Model has loaded the pretrained model,
    # and these downstream structures are initialized randomly.
    # TODO: Adding Seed for random.  referee: Trainer.train()

    if use_gpu:
        model_Bert = BertModel.from_pretrained(bert_path,
                                               return_dict=True).cuda()
        model_task0 = SequenceClassification(config_task0).cuda()
        model_task1 = SequenceClassification(config_task1).cuda()
    else:
        model_Bert = BertModel.from_pretrained(bert_path, return_dict=True)
        model_task0 = SequenceClassification(config_task0)
        model_task1 = SequenceClassification(config_task1)

    # print(model_Bert)
    # print(model_task0)
    # print(model_task1)

    # return
    # Data prepare
    tokenizer = BertTokenizer.from_pretrained(bert_path, cache_dir=cache_dir)
    data_iterator_train_task0 = DataIterator(data_args_task0,
                                             tokenizer=tokenizer,
                                             mode="train",
                                             cache_dir=cache_dir,
                                             batch_size=batch_size)
    data_iterator_train_task1 = DataIterator(data_args_task1,
                                             tokenizer=tokenizer,
                                             mode="train",
                                             cache_dir=cache_dir,
                                             batch_size=batch_size)
    data_iterator_eval_task0 = DataIterator(data_args_task0,
                                            tokenizer=tokenizer,
                                            mode="dev",
                                            cache_dir=cache_dir,
                                            batch_size=batch_size)
    data_iterator_eval_task1 = DataIterator(data_args_task1,
                                            tokenizer=tokenizer,
                                            mode="dev",
                                            cache_dir=cache_dir,
                                            batch_size=batch_size)
    logger.info("*** DataSet Ready ***")

    # data0 = data_iterator_train_task0.next()
    # print(data0)

    # input_ids0=data0['input_ids']
    # attention_mask0=data0['attention_mask']
    # token_type_ids0=data0['token_type_ids']
    # label0=data0['labels']

    # print(input_ids0)
    # print(input_ids0.size())
    # print(input_ids0.type())
    # print(attention_mask0)
    # print(attention_mask0.size())
    # print(attention_mask0.type())
    # print(token_type_ids0)
    # print(token_type_ids0.size())
    # print(token_type_ids0.type())
    # print(label0)
    # print(label0.size())
    # print(label0.type())

    # Optimizer and lr_scheduler
    opt_bert = torch.optim.AdamW(model_Bert.parameters(), lr=learning_rate)
    opt_task0 = torch.optim.AdamW(model_task0.parameters(), lr=learning_rate)
    opt_task1 = torch.optim.AdamW(model_task1.parameters(), lr=learning_rate)

    metrics_task0 = ComputeMetrics(data_args_task0)
    metrics_task1 = ComputeMetrics(data_args_task1)

    iterations = (epochs * len(data_iterator_train_task1) // batch_size) + 1
    print(iterations)
    scheduler = torch.optim.lr_scheduler.LambdaLR(
        opt_bert, lambda step: (1.0 - step / iterations))
    all_iters = 0

    for i in range(1, iterations + 1):

        all_iters += 1
        model_Bert.train()
        model_task0.train()
        model_task1.train()
        data0 = data_iterator_train_task0.next()
        data1 = data_iterator_train_task1.next()

        if use_gpu:
            input_ids0 = data0['input_ids'].cuda()
            attention_mask0 = data0['attention_mask'].cuda()
            token_type_ids0 = data0['token_type_ids'].cuda()
            label0 = data0['labels'].cuda()
            input_ids1 = data1['input_ids'].cuda()
            attention_mask1 = data1['attention_mask'].cuda()
            token_type_ids1 = data1['token_type_ids'].cuda()
            label1 = data1['labels'].cuda()
        else:
            input_ids0 = data0['input_ids']
            attention_mask0 = data0['attention_mask']
            token_type_ids0 = data0['token_type_ids']
            label0 = data0['labels']
            input_ids1 = data1['input_ids']
            attention_mask1 = data1['attention_mask']
            token_type_ids1 = data1['token_type_ids']
            label1 = data1['labels']

        output_inter0 = model_Bert(input_ids=input_ids0,
                                   attention_mask=attention_mask0,
                                   token_type_ids=token_type_ids0,
                                   return_dict=True)
        output_inter1 = model_Bert(input_ids=input_ids1,
                                   attention_mask=attention_mask1,
                                   token_type_ids=token_type_ids1,
                                   return_dict=True)

        loss0 = model_task0(input=output_inter0, labels=label0)[0]
        loss1 = model_task1(input=output_inter1, labels=label1)[0]

        loss = loss0 + loss1

        # balance the losses of sub-tasks
        ratio = loss0 / loss1
        weight0 = (2 * ratio) / (1 + ratio)
        weight1 = 2 - weight0
        loss = loss0 * weight0 + loss1 * weight1

        printInfo = 'TOTAL/Train {}/{} - lr:{}, sl={:.6f}, l0/w0-{:.6f}/{:.6f}, l1/w1-{:.6f}/{:.6f}'.format(
            all_iters, iterations, scheduler.get_lr(), loss, loss0, weight0,
            loss1, weight1)
        logging.info(printInfo)

        # print(loss)
        # print(all_iters)

        opt_bert.zero_grad()
        opt_task0.zero_grad()
        opt_task1.zero_grad()
        # loss0.backward()
        loss.backward()

        opt_bert.step()
        opt_task0.step()
        opt_task1.step()

        scheduler.step()

        if (i % eval_interval == 0):
            evaluate(model_Bert, model_task0, data_iterator_eval_task0,
                     metrics_task0)
            evaluate(model_Bert, model_task1, data_iterator_eval_task1,
                     metrics_task1)

    evaluate(model_Bert, model_task0, data_iterator_eval_task0, metrics_task0)
    evaluate(model_Bert, model_task1, data_iterator_eval_task1, metrics_task1)

    # Saving models
    model_Bert.save_pretrained(model_save_dir + "main")
    model_task0.save_pretrained(model_save_dir + "task0")
    model_task1.save_pretrained(model_save_dir + "task1")
Esempio n. 16
0
def main():
    
    
    
    ntasks = len(tasks)
    
    data_args = list()
    configuration = list()
    sub_models = list()
    train_iter = list()
    dev_iter = list()
    test_iter = list()
    sub_optimizer = list()
    metrics = list()
    tokenizer = DistilBertTokenizer.from_pretrained(bert_path, cache_dir=cache_dir)
    
    for i in range(ntasks):    
        logger.info("Tasks:" + tasks[i])
        data_args.append(GlueDataArgs(task_name=tasks[i]))
        configuration.append(DistilBertConfig.from_pretrained(bert_path, num_labels=glue_tasks_num_labels[data_args[i].task_name], 
                                finetuning_task=data_args[i].task_name, cache_dir = cache_dir))
        if use_gpu:
            sub_models.append(SequenceClassification(configuration[i]).cuda())
        else: 
            sub_models.append(SequenceClassification(configuration[i]))
            
        train_iter.append(DataIterator(data_args[i], tokenizer=tokenizer, mode="train", cache_dir=cache_dir, batch_size=batch_size[i]))
        dev_iter.append(DataIterator(data_args[i], tokenizer=tokenizer, mode="dev", cache_dir=cache_dir, batch_size=batch_size_val[i]))
        
        sub_optimizer.append(torch.optim.AdamW(sub_models[i].parameters(), lr=learning_rate_0))
        
        metrics.append(ComputeMetrics(data_args[i]))
        
        logger.info("*** DataSet Ready ***")
    
    if use_gpu:
        Bert_model = DistilBertModel.from_pretrained(bert_path, return_dict=True).cuda()
    else:
        Bert_model = DistilBertModel.from_pretrained(bert_path, return_dict=True)
    
    bert_optimizer = torch.optim.AdamW(Bert_model.parameters(), lr=learning_rate_0)
    
    
    # balaned dataset
    train_num = list()    
    for i in range(ntasks):
        train_num.append(len(train_iter[i]))
    #train_nummax = 
    #train_num = [x/train_nummax for x in train_num]
    #print(train_num)
    iterations = (epochs * max(train_num) // bs) + 1
    #print(iterations)
    
    sub_scheduler = list()
    for i in range(ntasks):
        sub_scheduler.append(torch.optim.lr_scheduler.LambdaLR(sub_optimizer[i], lambda step: (1.0-step/iterations) if step <= frozen else learning_rate_1))    
    Bert_scheduler = torch.optim.lr_scheduler.LambdaLR(bert_optimizer, lambda step: (1.0-step/iterations) if step <= frozen else learning_rate_1)
    
    
    for i in range(1, iterations+1):
        
        
        if i > frozen:
            for p in Bert_model.parameters():
                p.requires_grad = True
            Bert_model.train()
        elif i == frozen:
            for p in Bert_model.parameters():
                p.requires_grad = True
            Bert_model.train()   
            logging.info("#####################################")
            logging.info("Release the Traing of the Main Model.")
            logging.info("#####################################")
        else:
            for p in Bert_model.parameters():
                p.requires_grad = False
            Bert_model.eval()
        
        losses=list()
        loss_rates=list()
        for j in range(ntasks):
            sub_models[j].train()
            data = train_iter[j].next()
            
            if use_gpu:
                input_ids=data['input_ids'].cuda()
                attention_mask=data['attention_mask'].cuda()
                #token_type_ids=data['token_type_ids'].cuda()
                label=data['labels'].cuda()
            else:
                input_ids=data['input_ids']
                attention_mask=data['attention_mask']
                #token_type_ids=data['token_type_ids']
                label=data['labels']
                
            output_inter = Bert_model(input_ids=input_ids, attention_mask=attention_mask, return_dict=True) # token_type_ids=token_type_ids,
            losses.append(sub_models[j](input=output_inter, labels=label)[0])
   
        
        losssum = sum(losses).item()     
        for j in range(ntasks):
            loss_rates.append(losses[j].item()/losssum)
        
        loss = 0
        printInfo = 'TOTAL/Train {}/{}, lr:{}'.format(i, iterations, Bert_scheduler.get_lr())
        for j in range(ntasks):
            loss += losses[j] * batch_size[j] * loss_rates[j]
            printInfo += ', loss{}-{:.6f}'.format(j,losses[j])
            sub_optimizer[j].zero_grad()
            
        logging.info(printInfo) 
        
        if i > frozen:
            bert_optimizer.zero_grad()
        loss.backward()
        
        if i > frozen:
            bert_optimizer.step()
            
        for j in range(ntasks):
            sub_optimizer[j].step()
            sub_scheduler[j].step()
        
        Bert_scheduler.step()
        
        if (i % eval_interval == 0):
            for j in range(ntasks):
                evaluate(Bert_model, sub_models[j], dev_iter[j], batch_size_val[j], metrics[j])
                sub_models[j].save_pretrained(os.path.join(model_save_dir, "{}-checkpoint-{:06}.pth.tar".format(tasks[j], i)))
            Bert_model.save_pretrained(os.path.join(model_save_dir, "{}-checkpoint-{:06}.pth.tar".format("main", i)))
    
    
    for i in range(ntasks):
        evaluate(Bert_model, sub_models[i], dev_iter[i], batch_size_val[i], metrics[i])
        sub_models[i].save_pretrained(os.path.join(model_save_dir, "{}-checkpoint-{:06}.pth.tar".format(tasks[j], iterations)))
            
    Bert_model.save_pretrained(os.path.join(model_save_dir, "{}-checkpoint-{:06}.pth.tar".format("main", iterations)))    
Esempio n. 17
0
def train(model: BaseModel, config, train_dataset, val_dataset, step=0):
    train_iterator = DataIterator(train_dataset,
                                  batch_size=config.batch_size,
                                  num_workers=config.data.num_workers,
                                  sampler=InfiniteRandomSampler(train_dataset))

    # Prepare for summary
    writer = SummaryWriter(config.log_dir)
    config_str = yaml.dump(namedtuple_to_dict(config))
    writer.add_text('config', config_str)
    train_sampler = SubsetSequentialSampler(train_dataset,
                                            config.summary.train_samples)
    val_sampler = SubsetSequentialSampler(val_dataset,
                                          config.summary.val_samples)
    train_sample_iterator = DataIterator(train_dataset.for_summary(),
                                         sampler=train_sampler,
                                         num_workers=2)
    val_sample_iterator = DataIterator(val_dataset.for_summary(),
                                       sampler=val_sampler,
                                       num_workers=2)

    # Training loop
    start_time = time.time()
    start_step = step
    while True:
        step += 1
        save_summary = step % config.summary_step == 0
        d_summary, g_summary, p_summary = None, None, None
        if config.mode == MODE_PRED:
            if model.lr_sched_p is not None:
                model.lr_sched_p.step()
            x, y = next(train_iterator)
            p_summary = model.optimize_p(x,
                                         y,
                                         step=step,
                                         summarize=save_summary)

        else:
            if model.lr_sched_d is not None:
                model.lr_sched_d.step()

            x, y = next(train_iterator)
            summarize_d = save_summary and config.d_updates_per_step == 1
            d_summary = model.optimize_d(x,
                                         y,
                                         step=step,
                                         summarize=summarize_d)
            for i in range(config.d_updates_per_step - 1):
                x, y = next(train_iterator)
                summarize_d = save_summary and (
                    i == config.d_updates_per_step - 2)
                d_summary = model.optimize_d(x,
                                             y,
                                             step=step,
                                             summarize=summarize_d)

            if model.lr_sched_g is not None:
                model.lr_sched_g.step()

            summarize_g = save_summary and config.g_updates_per_step == 1
            g_summary = model.optimize_g(x,
                                         y,
                                         step=step,
                                         summarize=summarize_g)
            for i in range(config.g_updates_per_step - 1):
                x, y = next(train_iterator)
                summarize_g = save_summary and (
                    i == config.g_updates_per_step - 2)
                g_summary = model.optimize_g(x,
                                             y,
                                             step=step,
                                             summarize=summarize_g)

        # Print status
        elapsed_time = time.time() - start_time
        elapsed_step = step - start_step
        print('\r[Step %d] %s' %
              (step, time.strftime('%H:%M:%S', time.gmtime(elapsed_time))),
              end='')
        if elapsed_time > elapsed_step:
            print(' | %.2f s/it' % (elapsed_time / elapsed_step), end='')
        else:
            print(' | %.2f it/s' % (elapsed_step / elapsed_time), end='')

        if step % config.ckpt_step == 0:
            model.save(step)

        if save_summary:
            # Save summaries from optimization process
            for summary in [p_summary, d_summary, g_summary]:
                if summary is None:
                    continue
                model.write_summary(writer, summary, step)

            # Summarize learning rates and gradients
            for component, optimizer in [
                ('d', model.optim_d),
                ('g', model.optim_g),
                ('p', model.optim_p),
            ]:
                if optimizer is None:
                    continue

                for i, group in enumerate(optimizer.param_groups):
                    writer.add_scalar('lr/%s/%d' % (component, i), group['lr'],
                                      step)
                    grads = []
                    for param in group['params']:
                        if param.grad is not None:
                            grads.append(param.grad.data.view([-1]))
                    if grads:
                        grads = torch.cat(grads, 0)
                        writer.add_histogram('grad/%s/%d' % (component, i),
                                             grads, step)

            # Custom summaries
            model.summarize(writer, step, train_sample_iterator,
                            val_sample_iterator)
Esempio n. 18
0
def train(model, rank=0):

    model.cuda()
    optimizer = SGD(model.parameters(),
                    lr=lr,
                    weight_decay=weight_decay,
                    momentum=momentem)

    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      opt_level='O0',
                                      loss_scale=loss_scale)

    model = DistributedDataParallel(model)
    model.train()
    if rank == 0:
        print('preparing dataset...')
    data_iterator = DataIterator(path=coco_dir,
                                 batch_size=batch_size,
                                 stride=stride,
                                 shuffle=shuffle,
                                 resize=resize,
                                 dist=dist,
                                 world_size=world_size)
    if rank == 0:
        print('finish loading dataset!')

    def schedule_warmup(i):
        return warmup_ratio if i < warmup else 1

    def schedule(epoch):
        return gamma**len([m for m in milestores if m <= epoch])

    scheduler_warmup = LambdaLR(optimizer, schedule_warmup)
    scheduler = LambdaLR(optimizer, schedule)
    if rank == 0:
        print('starting training...')

    for epoch in range(1, epochs + 1):
        cls_losses, box_losses, centerness_losses = [], [], []
        if epoch != 1:
            scheduler.step(epoch)
        for i, (data, target) in enumerate(data_iterator, start=1):
            optimizer.zero_grad()
            cls_loss, box_loss, centerness_loss = model([data, target])

            with amp.scale_loss(cls_loss + box_loss + centerness_loss,
                                optimizer) as scaled_loss:
                scaled_loss.backward()

            # torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), max_norm)
            optimizer.step()
            if epoch == 1 and i <= warmup:
                scheduler_warmup.step(i)

            cls_loss, box_loss, centerness_loss = cls_loss.mean().clone(
            ), box_loss.mean().clone(), centerness_loss.mean().clone()
            torch.distributed.all_reduce(cls_loss)
            torch.distributed.all_reduce(box_loss)
            torch.distributed.all_reduce(centerness_loss)
            cls_loss /= world_size
            box_loss /= world_size
            centerness_loss /= world_size
            if rank == 0:
                cls_losses.append(cls_loss)
                box_losses.append(box_loss)
                centerness_losses.append(centerness_loss)

            if rank == 0 and not isfinite(cls_loss + box_loss +
                                          centerness_loss):
                raise RuntimeError('Loss is diverging!')

            del cls_loss, box_loss, centerness_loss, target, data

            if rank == 0 and i % 10 == 0:
                focal_loss = torch.FloatTensor(cls_losses).mean().item()
                box_loss = torch.FloatTensor(box_losses).mean().item()
                centerness_loss = torch.FloatTensor(
                    centerness_losses).mean().item()
                learning_rate = optimizer.param_groups[0]['lr']

                msg = '[{:{len}}/{}]'.format(epoch,
                                             epochs,
                                             len=len(str(epochs)))
                msg += '[{:{len}}/{}]'.format(i,
                                              len(data_iterator),
                                              len=len(str(len(data_iterator))))
                msg += ' focal loss: {:.3f}'.format(focal_loss)
                msg += ', box loss: {:.3f}'.format(box_loss)
                msg += ', centerness loss: {:.3f}'.format(centerness_loss)
                msg += ', lr: {:.2g}'.format(learning_rate)
                msg += ', cuda_memory: {:.3g} GB'.format(
                    torch.cuda.memory_cached() / mb_to_gb_factor)
                print(msg, flush=True)
                del cls_losses[:], box_losses[:], centerness_losses[:], focal_loss, box_loss, centerness_loss

        if rank == 0:
            print('saving model for epoch {}'.format(epoch))
            torch.save(model.state_dict(),
                       './checkpoints/epoch-{}.pth'.format(epoch))

    if rank == 0:
        print('finish training, saving the final model...')
        torch.save(model.state_dict(), './checkpoints/final.pth')
        print('-' * 10 + 'completed!' + '-' * 10)
Esempio n. 19
0
def train(data_path, model, optimizer, criterion, device, logger, args):
    data_loader = DataLoader(data_path, args.verbose)

    X, y, seq = data_loader.run_pipeline(args.split_rate)

    train_iter = DataIterator(X[0], y[0], seq[0], batch_size=args.batch_size)
    test_iter = DataIterator(X[1], y[1], seq[1], batch_size=args.batch_size)

    train_err, test_err = [], []
    train_acc, test_acc = [], []

    logger.info(model)

    for epoch in range(args.epoch):

        logger.info("Epoch: {} / {}".format(epoch + 1, args.epoch))

        ### TRAIN LOOP ###
        err = []
        acc = []
        model.train()
        for proteins, sequence_lengths, targets in (tqdm(
                train_iter,
                ascii=False,
                desc="Training",
                total=int(len(X[0]) / args.batch_size),
                unit="batch") if args.verbose else train_iter):

            inputs = proteins.to(device)
            seq_lens = sequence_lengths.to(device)
            targets = targets.to(device)

            predictions = model(inputs, seq_lens)

            mask = build_mask(sequence_lengths).to(device)

            optimizer.zero_grad()
            batch_loss = criterion(predictions, targets, mask)
            batch_loss.backward()
            optimizer.step()

            cos_sim = cosine_similarity(predictions, targets, mask)

            err.append(batch_loss.cpu().item())
            acc.append(cos_sim.cpu().item())

        epoch_trainig_error = sum(err) / len(err)
        epoch_training_accuracy = sum(acc) / len(acc)
        train_err.append(epoch_trainig_error)
        train_acc.append(epoch_training_accuracy)

        ### TEST LOOP ###
        err = []
        acc = []
        model.eval()
        for proteins, sequence_lengths, targets in (tqdm(
                test_iter,
                ascii=False,
                desc="Testing",
                total=int(len(X[1]) / args.batch_size),
                unit="batch") if args.verbose else test_iter):

            inputs = proteins.to(device)
            seq_lens = sequence_lengths.to(device)
            targets = targets.to(device)

            predictions = model(inputs, seq_lens)

            mask = build_mask(sequence_lengths).to(device)

            batch_loss = criterion(predictions, targets, mask)

            cos_sim = cosine_similarity(predictions, targets, mask)

            err.append(batch_loss.cpu().item())
            acc.append(cos_sim.cpu().item())

        epoch_test_error = sum(err) / len(err)
        epoch_test_accuracy = sum(acc) / len(acc)
        test_err.append(epoch_test_error)
        test_acc.append(epoch_test_accuracy)

        logger.info(
            "Training error: {0:.4f},\tTest error: {1:.4f}\t\tTraining accuracy: {2:.4f}\tTest accuracy: {3:.4f}"
            .format(epoch_trainig_error, epoch_test_error,
                    epoch_training_accuracy, epoch_test_accuracy))

    return (train_err, test_err), (train_acc, test_acc)
Esempio n. 20
0
    def __init__(self,
                 train_data_dir=None,
                 val_data_dir=None,
                 log_dir=None,
                 batch_size=None,
                 step_set0=None,
                 restore=False,
                 checkpoint_dir=None,
                 train_epochs=None,
                 save_step_interval=None,
                 validation_interval_steps=None,
                 image_height=None,
                 image_width=None,
                 image_channel=None,
                 out_channels=None,
                 leakiness=None,
                 num_hidden=None,
                 output_keep_prob=None,
                 num_classes=None,
                 initial_learning_rate=1e-3,
                 decay_epoch=50,
                 decay_rate=0.1,
                 beta1=None,
                 beta2=None,
                 device="/cpu:0",
                 data_type=2,
                 augment_factor=20):
        self.restore = restore
        self.step_set0 = step_set0
        self.batch_size = batch_size
        self.checkpoint_dir = checkpoint_dir
        self.train_epochs = train_epochs
        self.save_step_interval = save_step_interval
        self.validation_interval_steps = validation_interval_steps

        self.train_data_dir = train_data_dir
        if not os.path.exists(self.train_data_dir):
            raise KeyError("train data path don't exists'")
        self.val_data_dir = val_data_dir
        if not os.path.exists(self.val_data_dir):
            raise KeyError("val data path don't exists'")

        self.log_dir = os.path.join(log_dir, "train")
        if os.path.exists(self.log_dir):
            log_files = os.listdir(self.log_dir)
            for log_file in log_files:
                shutil.rmtree(os.path.join(self.log_dir, log_file))
        self.device = device
        print("Loading train data, please wait---------")
        with tf.device(self.device):
            if data_type == 2:
                self.train_feeder = DataIterator.DataIterator2(
                    self.train_data_dir, image_width, image_height,
                    image_channel, self.batch_size, augment_factor)
            elif data_type == 3:
                self.train_feeder = DataIterator.DataIterator3(
                    self.train_data_dir, image_width, image_height,
                    image_channel, self.batch_size)
            elif data_type == 5:
                self.train_feeder = DataIterator.DataIterator5(
                    self.train_data_dir, image_width, image_height,
                    image_channel, self.batch_size)
            self.train_batch_inputs, self.train_batch_sparse_labels, self.train_batch_encode_labels =\
                    self.train_feeder.feed_tensor()
            print("Load train data done, load {} images.".format(
                self.train_feeder.size))
            print("Loading val data, please wait---------")
            if data_type == 2:
                self.val_feeder = DataIterator.DataIterator2(
                    self.val_data_dir, image_width, image_height,
                    image_channel, self.batch_size, augment_factor)
            elif data_type == 3:
                self.val_feeder = DataIterator.DataIterator3(
                    self.val_data_dir, image_width, image_height,
                    image_channel, self.batch_size)
            elif data_type == 5:
                self.val_feeder = DataIterator.DataIterator5(
                    self.val_data_dir, image_width, image_height,
                    image_channel, self.batch_size)

            self.val_batch_inputs, self.val_batch_sparse_labels, self.val_batch_encode_labels =\
                    self.val_feeder.feed_tensor()
            print("Load val data done, load {} images.".format(
                self.val_feeder.size))

            self.train_val_flag = tf.placeholder(dtype=bool, shape=())
            batch_inputs, batch_sparse_labels, batch_encode_labels =\
                    tf.cond(self.train_val_flag,
                            lambda:[self.train_batch_inputs,
                                self.train_batch_sparse_labels, self.train_batch_encode_labels],
                            lambda:[self.val_batch_inputs,
                                self.val_batch_sparse_labels, self.val_batch_encode_labels])
            self.vin_rec_model = model.LSTMOCR(
                'train', batch_inputs, batch_sparse_labels,
                batch_encode_labels, image_height, image_width, image_channel,
                out_channels, leakiness, num_hidden, output_keep_prob,
                num_classes)
            self.train_num_batches_per_epoch = int(self.train_feeder.size /
                                                   self.batch_size)
            self.val_num_batches_per_epoch = int(self.val_feeder.size /
                                                 self.batch_size)
            self.vin_rec_model.build_graph(self.train_num_batches_per_epoch,
                                           initial_learning_rate, decay_epoch,
                                           decay_rate, beta1, beta2,
                                           batch_size)
        with tf.device("/cpu:0"):
            gpu_options = tf.GPUOptions(allow_growth=True)
            config = tf.ConfigProto(allow_soft_placement=True,
                                    gpu_options=gpu_options)
            self.sess = tf.Session(config=config)
            self.sess.run(tf.global_variables_initializer())
            self.sess.run(tf.local_variables_initializer())
            all_variables_list = tf.global_variables()
            #train step from 0
            restore_variables_list = []
            if self.step_set0:
                for item in all_variables_list:
                    if item.name != "global_step:0":
                        restore_variables_list.append(item)
            else:
                restore_variables_list = all_variables_list
            self.saver = tf.train.Saver(restore_variables_list,
                                        max_to_keep=100)
            self.tb_writer = tf.summary.FileWriter(self.log_dir + '/train',
                                                   self.sess.graph)
            if self.restore:
                ckpt = tf.train.latest_checkpoint(self.checkpoint_dir)
                if ckpt:
                    self.saver.restore(self.sess, ckpt)
                    print("Restore from checkpoint {}".format(ckpt))
            self.coord = tf.train.Coordinator()
            self.threads = tf.train.start_queue_runners(sess=self.sess,
                                                        coord=self.coord)
            self.timer = timer.Timer()
Esempio n. 21
0
def infer(model, args):
    rank = args.local_rank
    epoch_name = args.epoch
    model_state_dict_dir = 'checkpoints/final.pth' if epoch_name == 'final' else 'checkpoints/epoch-{}.pth'.format(
        epoch_name)

    load = torch.load(model_state_dict_dir, map_location='cpu')
    load = {k.replace('module.', ''): v for k, v in load.items()}
    model_state_dict = load
    model.load_state_dict(model_state_dict)

    model = model.cuda()
    model = amp.initialize(model,
                           opt_level='O2',
                           keep_batchnorm_fp32=True,
                           verbosity=0)
    # model = DistributedDataParallel(model)
    model.eval()
    if rank == 0:
        print('preparing dataset...')
    data_iterator = DataIterator(coco_dir,
                                 resize=resize,
                                 max_size=max_size,
                                 batch_size=batch_size,
                                 stride=stride,
                                 training=training,
                                 dist=dist,
                                 world_size=world_size)
    if rank == 0:
        print('finish loading dataset!')
    results = []
    with torch.no_grad():
        for i, (data, ids, ratios) in enumerate(data_iterator, start=1):
            scores, boxes, classes = model(data)
            results.append([scores, boxes, classes, ids, ratios])
            if rank == 0:
                size = len(data_iterator.ids)
                msg = '[{:{len}}/{}]'.format(min(i * batch_size, size),
                                             size,
                                             len=len(str(size)))
                print(msg, flush=True)
    if rank == 0:
        print('gathering results...')
    results = [torch.cat(r, dim=0) for r in zip(*results)]

    for r, result in enumerate(results):
        all_result = [
            torch.ones_like(result, device=result.device)
            for _ in range(world_size)
        ]
        torch.distributed.all_gather(list(all_result), result)
        results[r] = torch.cat(all_result, dim=0)

    if rank == 0:
        results = [r.cpu() for r in results]
        detections = []
        processed_ids = set()
        for scores, boxes, classes, image_id, ratios in zip(*results):
            image_id = image_id.item()
            if image_id in processed_ids:
                continue
            processed_ids.add(image_id)

            keep = (scores > 0).nonzero()
            scores = scores[keep].view(-1)
            boxes = boxes[keep, :].view(-1, 4) / ratios
            classes = classes[keep].view(-1).int()

            for score, box, cat in zip(scores, boxes, classes):
                x1, y1, x2, y2 = box.data.tolist()
                cat = cat.item()

                cat = data_iterator.coco.getCatIds()[cat]
                detections.append({
                    'image_id': image_id,
                    'score': score.item(),
                    'bbox': [x1, y1, x2 - x1 + 1, y2 - y1 + 1],
                    'category_id': cat
                })
        if detections:

            print('writing {}...'.format(detection_file))
            detections = {'annotations': detections}
            detections['images'] = data_iterator.coco.dataset['images']
            detections['categories'] = [
                data_iterator.coco.dataset['categories']
            ]
            json.dump(detections, open(detection_file, 'w'), indent=4)

            print('evaluating model...')
            coco_pred = data_iterator.coco.loadRes(detections['annotations'])
            coco_eval = COCOeval(data_iterator.coco, coco_pred, 'bbox')
            coco_eval.evaluate()
            coco_eval.accumulate()
            coco_eval.summarize()
        else:
            print('no detections!')
Esempio n. 22
0
        p5 = self.smooth5(p5)

        return p3, p4, p5, p6, p7


def ResNet50FPN(
        state_dict_path='/Users/nick/.cache/torch/checkpoints/resnet50-19c8e357.pth',
        stride=128):
    return FPN(ResNet(layers=[3, 4, 6, 3],
                      outputs=[3, 4, 5],
                      state_dict_path=state_dict_path),
               stride=stride)


if __name__ == '__main__':
    net = ResNet50FPN()
    net.initialize()
    from data import DataIterator

    dataiter = DataIterator()
    net.initialize()
    i = 0
    for data, target in dataiter:
        i += 1
        if i == 5:
            break
        y = net(data)
        for item in y:
            print(item.shape, end=' ')
        print()
Esempio n. 23
0
def train_running_save(data_path,
                       model,
                       optimizer,
                       criterion,
                       device,
                       logger,
                       args,
                       step=10):

    if not os.path.exists("results"):
        os.mkdir("results")
    if not os.path.exists(args.checkpoint_dir):
        os.mkdir(args.checkpoint_dir)

    data_loader = DataLoader(data_path, args.verbose)

    X, y, seq = data_loader.run_pipeline(args.split_rate)

    train_iter = DataIterator(X[0], y[0], seq[0], batch_size=args.batch_size)
    test_iter = DataIterator(X[1], y[1], seq[1], batch_size=args.batch_size)

    train_err, test_err = [], []
    train_acc, test_acc = [], []

    logger.info(model)

    for epoch in range(args.epoch):

        logger.info("Epoch: {} / {}".format(epoch + 1, args.epoch))

        ### TRAIN LOOP ###
        err = []
        acc = []
        model.train()
        for proteins, sequence_lengths, targets in (tqdm(
                train_iter,
                ascii=False,
                desc="Training",
                total=int(len(X[0]) / args.batch_size),
                unit="batch") if args.verbose else train_iter):

            inputs = proteins.to(device)
            seq_lens = sequence_lengths.to(device)
            targets = targets.to(device)

            predictions = model(inputs, seq_lens)

            mask = build_mask(sequence_lengths).to(device)

            optimizer.zero_grad()
            batch_loss = criterion(predictions, targets, mask)
            batch_loss.backward()
            optimizer.step()

            cos_sim = cosine_similarity(predictions, targets, mask)

            err.append(batch_loss.cpu().item())
            acc.append(cos_sim.cpu().item())

        epoch_trainig_error = sum(err) / len(err)
        epoch_training_accuracy = sum(acc) / len(acc)
        train_err.append(epoch_trainig_error)
        train_acc.append(epoch_training_accuracy)

        ### TEST LOOP ###
        err = []
        acc = []
        model.eval()
        for proteins, sequence_lengths, targets in (tqdm(
                test_iter,
                ascii=False,
                desc="Testing",
                total=int(len(X[1]) / args.batch_size),
                unit="batch") if args.verbose else test_iter):

            inputs = proteins.to(device)
            seq_lens = sequence_lengths.to(device)
            targets = targets.to(device)

            predictions = model(inputs, seq_lens)

            mask = build_mask(sequence_lengths).to(device)

            batch_loss = criterion(predictions, targets, mask)

            cos_sim = cosine_similarity(predictions, targets, mask)

            err.append(batch_loss.cpu().item())
            acc.append(cos_sim.cpu().item())

        epoch_test_error = sum(err) / len(err)
        epoch_test_accuracy = sum(acc) / len(acc)
        test_err.append(epoch_test_error)
        test_acc.append(epoch_test_accuracy)

        logger.info(
            "Training error: {0:.4f},\tTest error: {1:.4f}\t\tTraining accuracy: {2:.4f}\tTest accuracy: {3:.4f}"
            .format(epoch_trainig_error, epoch_test_error,
                    epoch_training_accuracy, epoch_test_accuracy))

        if epoch % step == 0:

            logger.info("Saving checkpoint")

            performance_path = os.path.join("results", "{}-epoch{}.pk".format(
                args.results_name.split(".")[0], epoch))  # temporary name
            checkpoint_name = "{}-epoch{}.pt".format(
                args.checkpoint_name.split(".")[0], epoch)  # temporary name
            results = (train_err, test_err), (train_acc, test_acc)
            with open(performance_path, "wb") as file:
                pickle.dump(results, file)
            torch.save(
                {
                    "epoch": args.epoch,
                    "model_state_dict": model.state_dict(),
                    "optimizer_state_dict": optimizer.state_dict()
                }, os.path.join(args.checkpoint_dir, checkpoint_name))

    return (train_err, test_err), (train_acc, test_acc)
Esempio n. 24
0
from config import ConfigBinaryClassification
from config import ConfigTripleClassification

if __name__ == "__main__":
    args = get_args()
    print_args(args)

    if args.class_num == 2:
        cfg = ConfigBinaryClassification()
    elif args.class_num == 3:
        cfg = ConfigTripleClassification()
    else:
        raise ValueError("wrong class num")

    device = torch.device("cuda:%d" % args.cuda)
    Data = DataIterator(config=cfg, train_batchsize=args.batch_size)
    model = CNN(vocab_size=len(Data.vocab),
                embedding_dim=100,
                n_filters=100,
                filter_sizes=range(2, 5),
                output_dim=args.class_num,
                dropout=0.5,
                pad_idx=1).to(device)

    optimizer = Adam(model.parameters(), lr=args.lr)
    criterion = FocalLoss(classes=args.class_num, device=device).to(device)

    for epoch in range(args.epoch_num):
        print(epoch)
        for sample in Data.train_iter:
            model.train()
Esempio n. 25
0
File: main.py Progetto: jdddog/GTS
def train(args):

    # load double embedding
    word2index = json.load(open(args.prefix + 'doubleembedding/word_idx.json'))
    general_embedding = numpy.load(args.prefix + 'doubleembedding/gen.vec.npy')
    general_embedding = torch.from_numpy(general_embedding)
    domain_embedding = numpy.load(args.prefix + 'doubleembedding/' +
                                  args.dataset + '_emb.vec.npy')
    domain_embedding = torch.from_numpy(domain_embedding)

    # load dataset
    train_sentence_packs = json.load(
        open(args.prefix + args.dataset + '/train.json'))
    random.shuffle(train_sentence_packs)
    dev_sentence_packs = json.load(
        open(args.prefix + args.dataset + '/dev.json'))

    instances_train = load_data_instances(train_sentence_packs, word2index,
                                          args)
    instances_dev = load_data_instances(dev_sentence_packs, word2index, args)

    random.shuffle(instances_train)
    trainset = DataIterator(instances_train, args)
    devset = DataIterator(instances_dev, args)

    if not os.path.exists(args.model_dir):
        os.makedirs(args.model_dir)

    # build model
    if args.model == 'bilstm':
        model = MultiInferRNNModel(general_embedding, domain_embedding,
                                   args).to(args.device)
    elif args.model == 'cnn':
        model = MultiInferCNNModel(general_embedding, domain_embedding,
                                   args).to(args.device)

    parameters = list(model.parameters())
    parameters = filter(lambda x: x.requires_grad, parameters)
    optimizer = torch.optim.Adam(parameters, lr=args.lr)

    # training
    best_joint_f1 = 0
    best_joint_epoch = 0
    for i in range(args.epochs):
        print('Epoch:{}'.format(i))
        for j in trange(trainset.batch_count):
            _, sentence_tokens, lengths, masks, aspect_tags, _, tags = trainset.get_batch(
                j)
            predictions = model(sentence_tokens, lengths, masks)

            loss = 0.
            tags_flatten = tags[:, :lengths[0], :lengths[0]].reshape([-1])
            for k in range(len(predictions)):
                prediction_flatten = predictions[k].reshape(
                    [-1, predictions[k].shape[3]])
                loss = loss + F.cross_entropy(
                    prediction_flatten, tags_flatten, ignore_index=-1)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        joint_precision, joint_recall, joint_f1 = eval(model, devset, args)

        if joint_f1 > best_joint_f1:
            model_path = args.model_dir + args.model + args.task + '.pt'
            torch.save(model, model_path)
            best_joint_f1 = joint_f1
            best_joint_epoch = i
    print('best epoch: {}\tbest dev {} f1: {:.5f}\n\n'.format(
        best_joint_epoch, args.task, best_joint_f1))
Esempio n. 26
0
def train(train_dir, batch_size=64, image_height=60, image_width=180, image_channel=1,
          checkpoint_dir="../checkpoint/", num_epochs=100):

    # 加载数据
    train_data = DataIterator(data_dir=train_dir, batch_size=batch_size, begin=0, end=800)
    valid_data = DataIterator(data_dir=train_dir,  batch_size=batch_size, begin=800, end=1000)
    print('train data batch number: {}'.format(train_data.number_batch))
    print('valid data batch number: {}'.format(valid_data.number_batch))

    # 模型
    model = cnn_lstm_otc_ocr.LSTMOCR(NumClasses, batch_size, image_height=image_height,
                                     image_width=image_width, image_channel=image_channel, is_train=True)
    model.build_graph()

    config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True), allow_soft_placement=True)
    with tf.Session(config=config) as sess:
        # 初始化
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=100)
        train_writer = tf.summary.FileWriter(checkpoint_dir + 'train', sess.graph)

        # 加载模型
        ckpt = tf.train.latest_checkpoint(checkpoint_dir)
        if ckpt:
            saver.restore(sess, ckpt)
            print('restore from checkpoint{0}'.format(ckpt))
        else:
            print('no checkpoint to restore')
            pass

        print('=======begin training=======')
        for cur_epoch in range(num_epochs):
            start_time = time.time()
            batch_time = time.time()

            # 训练
            train_cost = 0
            for cur_batch in range(train_data.number_batch):
                if cur_batch % 100 == 0:
                    print('batch {}/{} time: {}'.format(cur_batch, train_data.number_batch, time.time() - batch_time))
                batch_time = time.time()

                batch_inputs, _, sparse_labels = train_data.next_train_batch()

                summary, cost, step, _ = sess.run([model.merged_summay, model.cost, model.global_step, model.train_op],
                                                  {model.inputs: batch_inputs, model.labels: sparse_labels})
                train_cost += cost
                train_writer.add_summary(summary, step)
                pass
            print("loss is {}".format(train_cost / train_data.number_batch))

            # 保存模型
            if cur_epoch % 1 == 0:
                if not os.path.isdir(checkpoint_dir):
                    os.mkdir(checkpoint_dir)
                saver.save(sess, os.path.join(checkpoint_dir, 'ocr-model'), global_step=cur_epoch)
                pass

            # 测试
            if cur_epoch % 1 == 0:
                lr = 0
                acc_batch_total = 0
                for j in range(valid_data.number_batch):
                    val_inputs, _, sparse_labels, ori_labels = valid_data.next_test_batch(j)
                    dense_decoded, lr = sess.run([model.dense_decoded, model.lrn_rate],
                                                 {model.inputs: val_inputs, model.labels: sparse_labels})
                    acc_batch_total += accuracy_calculation(ori_labels, dense_decoded, -1)
                    pass

                accuracy = acc_batch_total / valid_data.number_batch

                now = datetime.datetime.now()
                log = "{}/{} {}:{}:{} Epoch {}/{}, accuracy = {:.3f}, time = {:.3f},lr={:.8f}"
                print(log.format(now.month, now.day, now.hour, now.minute, now.second,
                                 cur_epoch + 1, num_epochs, accuracy, time.time() - start_time, lr))

            pass
        pass

    pass
Esempio n. 27
0
threshold = 0.5

corpus = ParsedCorpus(base_dirs)
vocab = HeadWordVocabulary()
vocab.load()
entity_vocab = HeadWordVocabulary()
entity_vocab.load("./evoc.txt")
net_arch = args
net_arch.num_input = len(vocab)
model = Extractor(net_arch)
model.load_cpu_model(args.model_path, None)
model.cuda()
model.eval()

iterator = DataIterator(corpus, vocab, entity_vocab)
iterator.reset()

slot_word_dist = F.log_softmax(torch.FloatTensor(model.get_unnormalized_phi()),
                               dim=-1)  # tensor [K, V]
assert torch.isnan(slot_word_dist).sum().item() == 0
slot_mean_dist = torch.FloatTensor(model.get_beta_mean())  # tensor [K, D + 1]
slot_stdvar_dist = torch.FloatTensor(
    model.get_beta_logvar()).exp().sqrt()  # tensor [K, D + 1]
if not args.nogpu:
    slot_word_dist = slot_word_dist.cuda()
    slot_mean_dist = slot_mean_dist.cuda()
    slot_stdvar_dist = slot_stdvar_dist.cuda()
dists = [
    MultivariateNormal(loc=slot_mean_dist[k],
                       covariance_matrix=torch.diag_embed(slot_stdvar_dist[k]))
Esempio n. 28
0
    # negative = random.sample(negative, len(positive))
    # train_dataset = positive + negative
    train_dataset = positive
    random.shuffle(train_dataset)

    positive = [data for data in test_dataset if np.max(data.other_need) > 0]
    negative = [data for data in test_dataset if np.max(data.other_need) == 0]
    print("[player Ting Pai] positive : negative = %d : %d" %
          (len(positive), len(negative)))
    # negative = random.sample(negative, len(positive))
    # test_dataset = positive + negative
    test_dataset = positive
    random.shuffle(test_dataset)

    augmentator = DataAugmentator()
    train_iter = DataIterator(train_dataset,
                              x_names=cfg.x_names,
                              y_names=cfg.y_names,
                              batch_size=128,
                              augmentator=augmentator)
    test_iter = DataIterator(test_dataset,
                             x_names=cfg.x_names,
                             y_names=cfg.y_names,
                             batch_size=128)
    model = BuildModel(cfg.x_names, cfg.y_names)
    model.summary()
    model.fit_generator(train_iter,
                        steps_per_epoch=train_iter.steps_per_epoch,
                        epochs=200,
                        validation_data=test_iter,
                        validation_steps=test_iter.steps_per_epoch)
Esempio n. 29
0
                text = sample.text.permute(1,0).to(device)
                output = model(text)
                p = output.argmax(1).cpu().tolist()
                l = sample.label.tolist()
                preds += p
                labels += l
        report = classification_report(preds, labels)
        print(report)


if __name__ == "__main__":
    device = torch.device("cuda:0")
    save_dir = "./checkpoints"
    config = "CNN-debias-"
    cfg = ConfigBinaryClassification()
    Data = DataIterator(config=cfg)
    print("loading model")
    model = torch.load("checkpoints/CNN-distill-26").to(device)

    print("loading tokenizer")
    tokenizer = Data.tokenizer
    PAD_IND = tokenizer.vocab.stoi['<pad>']
    seq_length = 256
    token_reference = TokenReferenceBase(reference_token_idx=PAD_IND)
    lig = LayerIntegratedGradients(model, model.embedding)
    reference_tokens = token_reference.generate_reference(seq_length, device=device).unsqueeze(0).to(device)

    #black_list = {0:["克罗恩病"], 1:["肠结核"]}
    black_list = {0:["克罗恩病",
        "循腔",
        "进镜",