test = copy.copy(whole_corpus).set_filter(split['test']) shuffle_test = copy.copy(shuffle_corpus).set_filter(split['test']) if args.model_type == 'single': test = test.to_col() shuffle_test = shuffle_test.to_col() org_tests.append(test) shuffle_tests.append(shuffle_test) val_dataset = ConcatDataset(org_tests) t2 = time() print("Done ({} sec.)".format(int(t2 - t1))) # create models classifier = build_sherlock(sherlock_feature_groups, num_classes=len(valid_types), topic_dim=topic_dim).to(device) model = CRF(len(valid_types), batch_first=True).to(device) if args.model_type == 'single': # load pre-trained model model_loc = join(os.environ['BASEPATH'], 'model', 'pre_trained_sherlock', TYPENAME) classifier.load_state_dict( torch.load(join(model_loc, args.model_path), map_location=device)) classifier.eval() # eval with torch.no_grad(): result_list = []
sherlock_feature_groups = ['char', 'word', 'par', 'rest'] for f_g in sherlock_feature_groups: feature_group_cols[f_g] = list( pd.read_csv(join(os.environ['BASEPATH'], 'configs', 'feature_groups', "{}_col.tsv".format(f_g)), sep='\t', header=None, index_col=0)[1]) pad_vec = lambda x: np.pad(x, (0, topic_dim - len(x)), 'constant', constant_values=(0.0, 1 / topic_dim)) # load models classifier = models_sherlock.build_sherlock(sherlock_feature_groups, num_classes=len(valid_types), topic_dim=topic_dim, dropout_ratio=0.35) #classifier.load_state_dict(torch.load(join(pre_trained_loc, 'sherlock_None.pt'), map_location=device)) model = CRF(len(valid_types), batch_first=True).to(device) #model.load_state_dict(torch.load(join(pre_trained_loc, 'model.pt'), map_location=device)) loaded_params = torch.load(join(pre_trained_loc, 'model.pt'), map_location=device) classifier.load_state_dict(loaded_params['col_classifier']) model.load_state_dict(loaded_params['CRF_model']) classifier.eval() model.eval() def extract(df):