def feature_to_torch(self, all_data): for i, data in enumerate(all_data): if "example_id" not in data: data["example_id"] = ''.join([ random.choice(string.ascii_letters + string.digits) for n in range(8) ]) data["snt_id"] = seq_to_id(self.dicts["word_dict"], data["tok"])[0] data["lemma_id"] = seq_to_id(self.dicts["lemma_dict"], data["lem"])[0] data["pos_id"] = seq_to_id(self.dicts["pos_dict"], data["pos"])[0] data["ner_id"] = seq_to_id(self.dicts["ner_dict"], data["ner"])[0] # when using all_data, batch_size is all if self.opt.bert_model: data_iterator = BertDataIterator([], self.opt, self.dicts["ucca_rel_dict"], all_data=all_data) order, idsBatch, srcBatch, src_charBatch, sourceBatch, srcBertBatch = data_iterator[ 0] else: data_iterator = DataIterator([], self.opt, self.dicts["ucca_rel_dict"], all_data=all_data) order, idsBatch, srcBatch, src_charBatch, sourceBatch = data_iterator[ 0] srcBertBatch = None return order, idsBatch, srcBatch, src_charBatch, sourceBatch, srcBertBatch, data_iterator
def main(opt): dicts = read_dicts() assert opt.train_from with_jamr = "_with_jamr" if opt.jamr else "_without_jamr" suffix = ".pickle"+with_jamr+"_processed" trainFolderPath = opt.folder+"/training/" trainingFilesPath = folder_to_files_path(trainFolderPath,suffix) devFolderPath = opt.folder+"/dev/" devFilesPath = folder_to_files_path(devFolderPath,suffix) testFolderPath = opt.folder+"/test/" testFilesPath = folder_to_files_path(testFolderPath,suffix) AmrDecoder = parser.AMRProcessors.AMRDecoder(opt,dicts) AmrDecoder.eval() AmrModel,parameters,optt = load_old_model(dicts,opt,True) opt.start_epoch = 1 out = "/".join(testFilesPath[0].split("/")[:-2])+ "/model" with open(out, 'w') as outfile: outfile.write(opt.train_from+"\n") outfile.write(str(AmrModel)+"\n") outfile.write(str(optt)+"\n") outfile.write(str(opt)) print('processing testing') for file in testFilesPath: dev_data = DataIterator([file],opt,dicts["rel_dict"],volatile = True) concept_scores,rel_scores,output =generate_graph(AmrModel,AmrDecoder,dev_data,dicts,file) print('processing validation') for file in devFilesPath: dev_data = DataIterator([file],opt,dicts["rel_dict"],volatile = True) concept_scores,rel_scores,output =generate_graph(AmrModel,AmrDecoder,dev_data,dicts,file) print('processing training') for file in trainingFilesPath: dev_data = DataIterator([file],opt,dicts["rel_dict"],volatile = True) concept_scores,rel_scores,output =generate_graph(AmrModel,AmrDecoder,dev_data,dicts,file)
def feature_to_torch(self, all_data): for i, data in enumerate(all_data): data["snt_id"] = seq_to_id(self.dicts["word_dict"], data["tok"])[0] data["lemma_id"] = seq_to_id(self.dicts["lemma_dict"], data["lem"])[0] data["pos_id"] = seq_to_id(self.dicts["pos_dict"], data["pos"])[0] data["ner_id"] = seq_to_id(self.dicts["ner_dict"], data["ner"])[0] data_iterator = DataIterator([], self.opt, self.dicts["rel_dict"], volatile=True, all_data=all_data) order, srcBatch, src_sourceBatch = data_iterator[0] return order, srcBatch, src_sourceBatch, data_iterator