"""Structure generator components in PCG""" def __init__(self, encoder=None, decoder=None, outViewN=8, outW=128, outH=128, renderDepth=1.0): super(Structure_Generator, self).__init__() if encoder: self.encoder = encoder else: self.encoder = Encoder() if decoder: self.decoder = decoder else: self.decoder = Decoder(outViewN, outW, outH, renderDepth) def forward(self, x): latent = self.encoder(x) XYZ, maskLogit = self.decoder(latent) return XYZ, maskLogit # TESTING if __name__ == '__main__': import options cfg = options.get_arguments() encoder = Encoder() decoder = Decoder(cfg.outViewN, cfg.outW, cfg.outH, cfg.renderDepth) model = Structure_Generator()
import options import utils from trainer import TrainerStage2 if __name__ == "__main__": print("=======================================================") print("Train structure generator with joint 2D optimization from novel viewpoints") print("=======================================================") cfg = options.get_arguments() EXPERIMENT = f"{cfg.model}_{cfg.experiment}" MODEL_PATH = f"models/{EXPERIMENT}" LOG_PATH = f"logs/{EXPERIMENT}" utils.make_folder(MODEL_PATH) utils.make_folder(LOG_PATH) criterions = utils.define_losses() dataloaders = utils.make_data_novel(cfg) model = utils.build_structure_generator(cfg).to(cfg.device) optimizer = utils.make_optimizer(cfg, model) scheduler = utils.make_lr_scheduler(cfg, optimizer) logger = utils.make_logger(LOG_PATH) writer = utils.make_summary_writer(EXPERIMENT) def on_after_epoch(model, df_hist, images, epoch, saveEpoch): utils.save_best_model(MODEL_PATH, model, df_hist)
.astype(np.int) # Convert to Tensor images = torch.from_numpy(images).permute((0, 3, 1, 2)) depthGT = torch.from_numpy(depthGT).permute((0, 3, 1, 2)) maskGT = torch.from_numpy(maskGT).permute((0, 3, 1, 2)) return { "inputImage": images, "depthGT": depthGT, "maskGT": maskGT, } if __name__ == "__main__": import options CFG = options.get_arguments() ds_fixed = PointCloud2dDataset(CFG) dl_fixed = DataLoader(ds_fixed, batch_size=CFG.chunkSize, shuffle=False, collate_fn=ds_fixed.collate_fn_fixed) ds_novel = PointCloud2dDataset(CFG, loadNovel=True) dl_novel = DataLoader(ds_novel, batch_size=CFG.chunkSize, shuffle=False, collate_fn=ds_novel.collate_fn)
images = batch_n["image_in"][modelIdx, angleIdx] depthGT = np.transpose(batch_n["depth"][modelIdx], axes=[0, 2, 3, 1]) maskGT = np.transpose(batch_n["mask"][modelIdx], axes=[0, 2, 3, 1])\ .astype(np.int) # Convert to Tensor images = torch.from_numpy(images).permute((0,3,1,2)) depthGT = torch.from_numpy(depthGT).permute((0,3,1,2)) maskGT = torch.from_numpy(maskGT).permute((0,3,1,2)) return { "inputImage": images, "depthGT": depthGT, "maskGT": maskGT, } if __name__ == "__main__": import options CFG = options.get_arguments() ds_fixed = PointCloud2dDataset(CFG) dl_fixed = DataLoader(ds_fixed, batch_size=CFG.chunkSize, shuffle=False, collate_fn=ds_fixed.collate_fn_fixed) ds_novel = PointCloud2dDataset(CFG, loadNovel=True) dl_novel = DataLoader(ds_novel, batch_size=CFG.chunkSize, shuffle=False, collate_fn=ds_novel.collate_fn)
# d = compute_distance(sketch_features.copy(), shape_features.copy(), l2=False) # scio.savemat('test/example.mat',{'d':d, 'feat':dataset_features, 'labels':dataset_labels}) # AUC, mAP = map_and_auc(sketch_labels.copy(), shape_labels.copy(), d) # print(' * Feature AUC {0:.5} mAP {0:.5}'.format(AUC, mAP)) d_feat = compute_distance(sketch_features.copy(), shape_features.copy(), l2=False) d_feat_norm = compute_distance(sketch_features.copy(), shape_features.copy(), l2=True) mAP_feat = compute_map(sketch_labels.copy(), shape_labels.copy(), d_feat) mAP_feat_norm = compute_map(sketch_labels.copy(), shape_labels.copy(), d_feat_norm) print(' * Feature mAP {0:.5%}\tNorm Feature mAP {1:.5%}'.format(mAP_feat, mAP_feat_norm)) d_score = compute_distance(sketch_scores.copy(), shape_scores.copy(), l2=False) mAP_score = compute_map(sketch_labels.copy(), shape_labels.copy(), d_score) d_score_norm = compute_distance(sketch_scores.copy(), shape_scores.copy(), l2=True) mAP_score_norm = compute_map(sketch_labels.copy(), shape_labels.copy(), d_score_norm) if opt.sf: shape_paths = [img[0] for img in shape_dataloader.dataset.shape_target_path_list] sketch_paths = [img[0] for img in sketch_dataloader.dataset.sketch_target_path_list] scio.savemat('{}/test_feat_temp.mat'.format(opt.checkpoint_folder), {'score_dist':d_score, 'score_dist_norm': d_score_norm, 'feat_dist': d_feat, 'feat_dist_norm': d_feat_norm,'sketch_features':sketch_features, 'sketch_labels':sketch_labels, 'sketch_scores': sketch_scores, 'shape_features':shape_features, 'shape_labels':shape_labels, 'sketch_paths':sketch_paths, 'shape_paths':shape_paths}) print(' * Score mAP {0:.5%}\tNorm Score mAP {1:.5%}'.format(mAP_score, mAP_score_norm)) return [sketch_top1.avg, shape_top1.avg, mAP_feat, mAP_feat_norm, mAP_score, mAP_score_norm] if __name__ == '__main__': from options import get_arguments opt = get_arguments() main(opt)
def main(): args = get_arguments() random.seed(args.random_seed) np.random.seed(args.random_seed) # sklearn use np to generate random value # Create folders and set logging format args.model_dir = os.path.join(args.out_dir, 'ckpt-{}'.format(args.class_weight_scheme)) args.log_dir = os.path.join(args.out_dir, 'log') args.ensemble_dir = os.path.join(args.out_dir, 'ensemble-{}'.format(args.class_weight_scheme)) if args.class_weight_scheme == 'customize': args.model_dir = os.path.join(args.model_dir, 'weight{}'.format(args.additional_weight)) args.ensemble_dir = os.path.join(args.ensemble_dir, 'weight{}'.format(args.additional_weight)) prepare_folders(args) logger = set_logging(args) logger.info("Here is the arguments of this running:") logger.info("{}".format(args)) utils.check_args_conflict(args) # Set files which contain data for training and test. If use "trecis2019-A", it means we want to tune parameters. args.data_prefix = "trecis2019-B" # Note that for 2019-B submission, all '2019' means '2019-B' and '2018' means '2018 + 2019-A' label_file = os.path.join(args.data_dir, 'ITR-H.types.v{}.json'.format( 4 if args.data_prefix == "trecis2019-B" else 3)) tweet_file_list = [os.path.join(args.data_dir, 'all-tweets.txt')] tweet_file_list_2019 = [os.path.join(args.data_dir, 'all-tweets-2019.txt')] train_file_list = [os.path.join(args.data_dir, 'TRECIS-CTIT-H-Training.json')] train_file_list += [os.path.join(args.data_dir, 'TRECIS-2018-TestEvents-Labels', 'assr{}.test'.format(i)) for i in range(1, 7)] if args.data_prefix == "trecis2019-B": train_file_list += [os.path.join(args.data_dir, '2019ALabels', '2019A-assr{}.json'.format(i)) for i in range(1, 6)] train_file_list += [os.path.join(args.data_dir, '2019ALabels', '2019-assr2.json')] test_raw_tweets_json_folder = 'download_tweets' # Some output files which has been formalized for further usages. formal_train_file = os.path.join(args.data_dir, 'train.txt{}'.format('_small' if args.sanity_check else '')) formal_test_file = os.path.join(args.data_dir, 'test.txt{}') tweet_text_out_file = os.path.join(args.out_dir, 'tweets-clean-text.txt') tweet_id_out_file = os.path.join(args.out_dir, 'tweets-id.txt') tweet_text_out_file_2019 = os.path.join(args.out_dir, 'tweets-clean-text-2019.txt') tweet_id_out_file_2019 = os.path.join(args.out_dir, 'tweets-id-2019.txt') predict_priority_score_out_file = os.path.join(args.out_dir, 'predict_priority_score.txt') # Set files for submission. args.model_name = '{0}{1}'.format(args.model, '-event' if args.event_wise else '') args.dev_label_file = os.path.join(args.ensemble_dir, 'dev_label.txt') args.dev_predict_file = os.path.join(args.ensemble_dir, 'dev_predict_{}.txt'.format(args.model_name)) args.test_predict_file = os.path.join(args.ensemble_dir, 'test_predict_{}.txt'.format(args.model_name)) args.submission_folder = utils.prepare_submission_folder(args) args.submission_file = os.path.join(args.submission_folder, 'submission_{}'.format(args.model_name)) # As the original files provided by TREC is quite messy, we formalize them into train and test file. utils.formalize_files(train_file_list, formal_train_file, args) utils.formalize_test_file(test_raw_tweets_json_folder, formal_test_file, prefix=args.data_prefix) logger.info("The training data file is {0} and testing data file is {1}".format( formal_train_file, formal_test_file)) # Step0. Extract some info which can be used later (also useful for generating submission files). label2id, majority_label, short2long_label = utils.get_label2id(label_file, formal_train_file, args.cv_num) id2label = utils.get_id2label(label2id) class_weight = utils.get_class_weight(args, label2id, id2label, formal_train_file) # When get submission, there is no need to run all following steps, but only read the `test_predict_file` and # pick some classes as final output according to policy (such as top-2 or auto-threshold). # You MUST run `--predict_mode` in advance to get the `test_predict_file` prepared. if args.get_submission: postpro = PostProcess(args, label2id, id2label, class_weight, majority_label, short2long_label, formal_train_file, formal_test_file, test_raw_tweets_json_folder, predict_priority_score_out_file) postpro.pick_labels_and_write_final_result() quit() # Step1. Preprocess and extract features for all tweets tweetid_list, tweet_content_list = utils.get_tweetid_content(tweet_file_list) utils.write_tweet_and_ids(tweetid_list, tweet_content_list, tweet_text_out_file, tweet_id_out_file) tweetid_list_2019, tweet_content_list_2019 = utils.get_tweetid_content(tweet_file_list_2019) utils.write_tweet_and_ids(tweetid_list_2019, tweet_content_list_2019, tweet_text_out_file_2019, tweet_id_out_file_2019) # Note that before `extract_features()`, we should manually run the `extract_features.sh` in `feature_tools`. # quit() # The `extract_features.sh` only need to be run once for the same dataset. preprocess = Preprocess(args, tweetid_list, tweet_content_list, label2id, tweet_id_out_file) preprocess.extract_features() preprocess_2019 = Preprocess(args, tweetid_list_2019, tweet_content_list_2019, label2id, tweet_id_out_file_2019, test=True) preprocess_2019.extract_features() if args.train_regression: data_x, data_score = preprocess.extract_train_data(formal_train_file, get_score=True) train_regression = TrainRegression(args, data_x, data_score) if args.cross_validate: train_regression.cross_validate() quit() if args.cross_validate: # Step2. Train and Cross-validation (for tuning hyper-parameters). # If we want to do ensemble in the future, we need the prediction on dev data by setting `--cross_validate`. if args.event_wise: data_x, data_y, event2idx_list, line_num = preprocess.extract_train_data(formal_train_file) data_predict_collect = np.zeros([line_num, len(label2id)]) metrics_collect = [] metric_names = None for event_type in utils.idx2event_type: it_data_x, it_data_y = data_x[event_type], data_y[event_type] train = Train(args, it_data_x, it_data_y, id2label, preprocess.feature_len, class_weight, event_type) metrics, predict_score = train.train() for i, idx in enumerate(event2idx_list[event_type]): data_predict_collect[idx] = predict_score[i] metrics_collect.append((metrics, it_data_x.shape[0])) if metric_names is None: metric_names = train.metric_names utils.get_final_metrics(metrics_collect, metric_names) else: data_x, data_y = preprocess.extract_train_data(formal_train_file) train = Train(args, data_x, data_y, id2label, preprocess.feature_len, class_weight) _, data_predict_collect = train.train() if args.predict_mode: utils.write_predict_and_label(args, formal_train_file, label2id, data_predict_collect) if args.predict_mode: # Step3. Get the 2019 test data, and retrain the model on all training data, then predict on the 2019-test if args.event_wise: data_x, data_y, _, _ = preprocess.extract_train_data(formal_train_file) test_x, event2idx_list, line_num = preprocess_2019.extract_formalized_test_data(formal_test_file) test_predict_collect = np.zeros([line_num, len(label2id)]) for event_type in utils.idx2event_type: it_data_x, it_data_y, it_test_x = data_x[event_type], data_y[event_type], test_x[event_type] if len(it_test_x) == 0: print("[WARNING] There are no event belongs to {} for the test data".format(event_type)) continue train = Train(args, it_data_x, it_data_y, id2label, preprocess_2019.feature_len, class_weight, event_type) train.train_on_all() predict_score = train.predict_on_test(it_test_x) for i, idx in enumerate(event2idx_list[event_type]): test_predict_collect[idx] = predict_score[i] else: data_x, data_y = preprocess.extract_train_data(formal_train_file) test_x = preprocess_2019.extract_formalized_test_data(formal_test_file) train = Train(args, data_x, data_y, id2label, preprocess_2019.feature_len, class_weight) train.train_on_all() test_predict_collect = train.predict_on_test(test_x) utils.write_predict_res_to_file(args, test_predict_collect) if args.train_regression: test_x = preprocess_2019.extract_formalized_test_data(formal_test_file) if args.event_wise: # For event_wise setting, there will be many additional things extracted, what we need is only test_x. test_x = test_x[0] train_regression.train() predict_priority_score = train_regression.predict_on_test(test_x) utils.write_predict_score_to_file(predict_priority_score, predict_priority_score_out_file) if args.ensemble is not None: # TODO(junpeiz): Average the priority score for ensemble. # Step4 (optional). Do the ensemble of different model if args.event_wise: raise NotImplementedError("We don't want to ensemble for event-wise models") else: out_file = os.path.join(args.out_dir, 'ensemble_out.txt') # Note the file list contains predictions from all models with and without the '-event' suffix. # So, we need to train both event-wise and not event-wise models or just delete those files in the folder. dev_predict_file_list = utils.get_predict_file_list(args.ensemble_dir, 'dev_predict_') test_predict_file_list = utils.get_predict_file_list(args.ensemble_dir, 'test_predict_') train_x = utils.get_ensemble_feature(dev_predict_file_list) train_y = utils.get_ensemble_label(args.dev_label_file) print("The shape of ensemble train_x is {0}".format(train_x.shape)) utils.ensemble_cross_validate(train_x, train_y, id2label, train.mlb, args.ensemble) test_x = utils.get_ensemble_feature(test_predict_file_list) predict = utils.ensemble_train_and_predict(train_x, train.mlb.transform(train_y), test_x, id2label, args.ensemble) predict = [id2label[x] for x in predict] with open(out_file, 'w', encoding='utf8') as f: for it_predict in predict: f.write("{}\n".format(it_predict)) print("The ensemble result has been written to {}".format(out_file))