# In[7]: spreadsheet_names = { #'train': 'data/sentences/sentence_labels_train.xlsx', 'dev': 'data/sentences/sentence_labels_dev.xlsx', 'test': 'data/sentences/sentence_labels_test.xlsx' } # In[8]: candidate_dfs = { key:load_candidate_dataframes(spreadsheet_names[key], "curated_cbg") for key in spreadsheet_names } for key in candidate_dfs: print("Size of {} set: {}".format(key, candidate_dfs[key].shape[0])) # In[9]: lfs = ( list(CG_LFS["CbG"].values()) + list(DG_LFS["DaG"].values())[7:37] + list(CD_LFS["CtD"].values())[3:25] + list(GG_LFS["GiG"].values())[9:37]
quick_load = True # ## Load the data for Generative Model Experiments # In[6]: spreadsheet_names = { 'train': '../../sentence_labels_train.xlsx', 'dev': '../../sentence_labels_dev.xlsx', 'test': '../../sentence_labels_test.xlsx' } # In[7]: candidate_dfs = { key: load_candidate_dataframes(spreadsheet_names[key]) for key in spreadsheet_names } for key in candidate_dfs: print("Size of {} set: {}".format(key, candidate_dfs[key].shape[0])) # In[8]: label_functions = (list(DG_LFS["DaG"].values())) if quick_load: label_matricies = pickle.load(open("label_matricies.pkl", "rb")) else: #labeler = LabelAnnotator(lfs=label_functions) label_matricies = {