def test_dataset(self): train_transform = build_transforms(cfg) val_transform = build_transforms(cfg, False) train_set = build_dataset(train_transform) val_test = build_dataset(val_transform, False) from IPython import embed; embed()
def test_dataset(self): train_transform = build_transforms(cfg, True) val_transform = build_transforms(cfg, False) train_set = build_dataset(cfg, train_transform, True) val_test = build_dataset(cfg, val_transform, False) from IPython import embed embed()
def train(cfg): """ training begin :param cfg: config file :return: """ datasets = build_dataset(cfg) algo = TFIDFClustring(cfg) vocab = Vocab(cfg) summary = SummaryTxt(cfg) keyword = Keyword(cfg, summary) processed_news_num = 0 batch_size = cfg.SOLVER.BATCH_SIZE print('start training:') for seg_id in trange(0, datasets.file_num, batch_size): seg = [] for batch_idx in range(batch_size): batch, seg_size = datasets.getitem(seg_id + batch_idx) seg.extend(batch) processed_news_num += seg_size algo.run(segments=seg, vocab=vocab, seg_id=seg_id, keyword=keyword, summary=summary) # keyword.update_per_seg(new_updated_topic=new_updated_topic) print("seg idx: {}. processed news: {}".format(seg_id, processed_news_num)) pass