def test(self, test_file, model_file): self.loadModel(model_file) reader = Reader() testInsts = reader.readInstances(test_file, self.hyperParams.maxInstance) testExamples = self.instance2Example(testInsts) for idx in range(len(testExamples)): self.predict(testExamples[idx])
def train(self, train_file, dev_file, test_file): self.hyperParams.show() torch.set_num_threads(self.hyperParams.thread) reader = Reader(self.hyperParams.maxInstance) trainInsts = reader.readInstances(train_file) devInsts = reader.readInstances(dev_file) testInsts = reader.readInstances(test_file) print("Training Instance: ", len(trainInsts)) print("Dev Instance: ", len(devInsts)) print("Test Instance: ", len(testInsts)) self.createAlphabet(trainInsts, devInsts, testInsts) trainExamples = self.instance2Example(trainInsts) devExamples = self.instance2Example(devInsts) testExamples = self.instance2Example(testInsts) self.model = RNNLabeler(self.hyperParams) parameters = filter(lambda p: p.requires_grad, self.model.parameters()) optimizer = torch.optim.Adagrad(parameters, lr=self.hyperParams.learningRate) indexes = [] for idx in range(len(trainExamples)): indexes.append(idx) for iter in range(self.hyperParams.maxIter): print('###Iteration' + str(iter) + "###") random.shuffle(indexes) for idx in range(len(trainExamples)): self.model.zero_grad() self.model.LSTMHidden = self.model.init_hidden() exam = trainExamples[indexes[idx]] lstm_feats = self.model(exam.feat) loss = self.model.crf.neg_log_likelihood( lstm_feats, exam.labelIndexs) loss.backward() optimizer.step() if (idx + 1) % self.hyperParams.verboseIter == 0: print('current: ', idx + 1, ", cost:", loss.data[0]) eval_dev = Eval() for idx in range(len(devExamples)): predictLabels = self.predict(devExamples[idx]) devInsts[idx].evalPRF(predictLabels, eval_dev) print('Dev: ', end="") eval_dev.getFscore() eval_test = Eval() for idx in range(len(testExamples)): predictLabels = self.predict(testExamples[idx]) testInsts[idx].evalPRF(predictLabels, eval_test) print('Test: ', end="") eval_test.getFscore()
def main(): try: env = {} for lib in ['stdlib.lisp', 'bq.lisp']: with open(lib) as f: try: env.update(load(f, lib, env)) except (LoadError, LoadWarning) as e: print_exception(e) exit(2) while True: try: expr = raw_input('* ') except (KeyboardInterrupt, EOFError): print break if not expr: continue try: form = Reader(expr, '<string>').read() result = eval_form(form, env) except ReadError as e: print_exception(e) continue if isinstance(result, Error): print_error(result) print pprint(result) env[Symbol('_')] = result except KeyboardInterrupt: print
def getAEOcount(self,path): a = 0 e = 0 o = 0 reader = Reader() Inst = reader.readfiles(path) for i in Inst: for j in range(len(i.labels)): if len(i.labels[j]) == 3: if i.labels[j][2] == 'A': a = a+1 elif i.labels[j][2] == 'E': e = e+1 elif i.labels[j] == 'O': o += 1 return a,e,o
def __init__(self, hyperParams): super(Encoder, self).__init__() self.hyperParams = hyperParams if hyperParams.wordEmbFile == "": self.wordEmb = nn.Embedding(hyperParams.postWordNum, hyperParams.wordEmbSize) self.wordDim = hyperParams.wordEmbSize else: reader = Reader() self.wordEmb, self.wordDim = reader.load_pretrain( hyperParams.wordEmbFile, hyperParams.postWordAlpha, hyperParams.unk) self.wordEmb.weight.requires_grad = hyperParams.wordFineTune self.dropOut = nn.Dropout(hyperParams.dropProb) self.gru = nn.GRU(input_size=self.wordDim, hidden_size=hyperParams.rnnHiddenSize, batch_first=True, dropout=hyperParams.dropProb)
def __init__(self, hyperParams): super(Decoder, self).__init__() reader = Reader() self.wordEmb, self.wordDim = reader.load_pretrain( hyperParams.wordEmbFile, hyperParams.wordAlpha, hyperParams.unk) self.wordEmb.weight.requires_grad = hyperParams.wordFineTune self.dropOut = torch.nn.Dropout(hyperParams.dropProb) self.lastWords = [] self.hyperParams = hyperParams #self.linearLayer = nn.Linear(hyperParams.rnnHiddenSize * 2, hyperParams.labelSize) self.linearLayer = nn.Linear( hyperParams.rnnHiddenSize * 2 + self.wordDim, hyperParams.labelSize) self.softmax = nn.LogSoftmax()
def train(self, train_file, dev_file, test_file): self.hyperParams.show() torch.set_num_threads(self.hyperParams.thread) reader = Reader(self.hyperParams.maxInstance) trainInsts = reader.readInstances(train_file) devInsts = reader.readInstances(dev_file) trainExamples = self.instance2Example(trainInsts) devExamples = self.instance2Example(devInsts) print("Training Instance: ", len(trainInsts)) print("Dev Instance: ", len(devInsts)) self.createAlphabet(trainInsts) self.model = RNNLabeler(self.hyperParams) optimizer = torch.optim.Adagrad(self.model.parameters(), lr=self.hyperParams.learningRate) indexes = [] for idx in range(len(trainExamples)): indexes.append(idx) for iter in range(self.hyperParams.maxIter): print('###Iteration' + str(iter) + "###") random.shuffle(indexes) for idx in range(len(trainExamples)): self.model.zero_grad() self.model.LSTMHidden = self.model.init_hidden() exam = trainExamples[indexes[idx]] tag_scores = self.model(exam.feat) loss = torch.nn.functional.cross_entropy( tag_scores, exam.labelIndexs) loss.backward() optimizer.step() if (idx + 1) % self.hyperParams.verboseIter == 0: print('current: ', idx + 1, ", cost:", loss.data[0]) eval_dev = Eval() for idx in range(len(devExamples)): predictLabels = self.predict(devExamples[idx]) devInsts[idx].evalPRF(predictLabels, eval_dev) eval_dev.getFscore()
def run_tests(f, filename, env): passed = 0 failed = 0 error = 0 reader = Reader(f, filename) while True: form = reader.read() if form == None: break result = eval_form(form, env) if result == False: failed += 1 print 'Test failed:' display_form(form, False) elif result == True: passed += 1 print 'Test passed.' elif isinstance(result, Error): error += 1 try: desc = assoc(result.attrs, Symbol(':msg')) print 'Test error ({}): {}'.format(result.type.name, desc) except (KeyError, InvalidAssocList) as e: print 'Test error ({}).'.format(result.type.name) try: form = assoc(result, Symbol(':form')) display_form(form, False) except (KeyError, InvalidAssocList): print 'No location information for the error.' else: error += 1 print 'Test cases must return either #t or #f; got {}.'.format( result) display_form(form, False) return passed, failed, error
def prediction(self, path_test, path_train, path_bestModel): self.hyperpara = Hyperparameter() eval_test = Eval() ner = NER() reader = Reader() traininsts = reader.readfiles(path_train) testinsts = reader.readfiles(path_test) ner.create_alphabet(traininsts) self.hyperpara.tag_size = ner.hyperpara.tag_size self.hyperpara.embedding_num = ner.hyperpara.embedding_num self.model = BiLstm(self.hyperpara) # BiLstm模型 if self.hyperpara.loadModel == 1 and\ self.hyperpara.load_pattern == 1: try: self.model.load_state_dict(torch.load(path_bestModel)) except Exception: print('模型参数不匹配') else: pass elif self.hyperpara.loadModel == 1 and\ self.hyperpara.load_pattern == 0 : try: self.model = torch.load(path_bestModel) except Exception: print('模型参数不匹配') else: pass testExamples = ner.change(testinsts) for idx in range(len(testExamples)): test_list = [] test_list.append(testExamples[idx]) x, y = ner.variable(test_list) lstm_feats = self.model(x) predict = ner.getMaxIndex(lstm_feats) predictLabels = [] for idy in range(len(predict)): predictLabels.append(ner.label_AlphaBet.list[predict[idy]]) testinsts[idx].evalPRF(predictLabels, eval_test) a, e = testinsts[idx].extractA_and_E() self.Attr.append(a) self.Eval.append(e)
def __init__(self, hyperParams): super(Encoder, self).__init__() self.hyperParams = hyperParams if hyperParams.charEmbFile == "": self.charEmb = nn.Embedding(hyperParams.charNUM, hyperParams.charEmbSize) self.charDim = hyperParams.charEmbSize else: reader = Reader() self.charEmb, self.charDim = reader.load_pretrain( hyperParams.charEmbFile, hyperParams.charAlpha, hyperParams.unk) self.charEmb.weight.requires_grad = hyperParams.charFineTune if hyperParams.bicharEmbFile == "": self.bicharEmb = nn.Embedding(hyperParams.bicharNUM, hyperParams.bicharEmbSize) self.bicharDim = hyperParams.bicharEmbSize else: reader = Reader() self.bicharEmb, self.bicharDim = reader.load_pretrain( hyperParams.bicharEmbFile, hyperParams.bicharAlpha, hyperParams.unk) self.bicharEmb.weight.requires_grad = hyperParams.bicharFineTune self.dropOut = nn.Dropout(hyperParams.dropProb) self.bilstm = nn.LSTM(input_size=self.charDim + self.bicharDim, hidden_size=hyperParams.rnnHiddenSize, batch_first=True, bidirectional=True, num_layers=2, dropout=hyperParams.dropProb)
def load(f, filename, env): reader = Reader(f, filename) while True: form = reader.read() if form == None: break expanded, _ = macro_expand(form, env) if type(expanded) != List or len( expanded) != 3 or expanded[0] != Symbol('define'): raise LoadWarning('Unrecognized top-level form.', expanded) if type(expanded[1]) != Symbol: raise LoadError('Invalid top-level form.', form) val = eval_form(expanded[2], env) if isinstance(val, Error): print_error(val) exit(2) env[expanded[1]] = val return env
def time_since(since, percent): now = time.time() s = now - since es = s / (percent) rs = es - s return '%s (- %s)' % (as_minutes(s), as_minutes(rs)) if __name__ == '__main__': start_time = time.time() print('begin time:', as_minutes(time.time() - start_time)) config_path = './config.cfg' config = Configer(config_path) print('reading ', config.train_path) train_reader = Reader(config.train_path, max_len=config.max_len) print('done, using time:', as_minutes(time.time() - start_time)) print('reading ', config.test_path) test_reader = Reader(config.test_path, max_len=config.max_len) print('done, using time:', as_minutes(time.time() - start_time)) train_texts, train_labels = train_reader.getData() test_texts, test_labels = test_reader.getData() print('train:', len(train_texts)) print('test:', len(test_texts)) # print(train_texts[:10]) # print(train_labels[:10]) text_alpha = Alphabet() label_alpha = Alphabet() text_dic = OrderedDict()
def train(self, train_file, dev_file, test_file, model_file): self.hyperParams.show() torch.set_num_threads(self.hyperParams.thread) reader = Reader() trainInsts = reader.readInstances(train_file, self.hyperParams.maxInstance) devInsts = reader.readInstances(dev_file, self.hyperParams.maxInstance) testInsts = reader.readInstances(test_file, self.hyperParams.maxInstance) print("Training Instance: ", len(trainInsts)) print("Dev Instance: ", len(devInsts)) print("Test Instance: ", len(testInsts)) self.createAlphabet(trainInsts) trainExamples = self.instance2Example(trainInsts) devExamples = self.instance2Example(devInsts) testExamples = self.instance2Example(testInsts) self.encoder = Encoder(self.hyperParams) self.decoder = Decoder(self.hyperParams) indexes = [] for idx in range(len(trainExamples)): indexes.append(idx) encoder_parameters = filter(lambda p: p.requires_grad, self.encoder.parameters()) encoder_optimizer = torch.optim.Adam(encoder_parameters, lr=self.hyperParams.learningRate) decoder_parameters = filter(lambda p: p.requires_grad, self.decoder.parameters()) decoder_optimizer = torch.optim.Adam(decoder_parameters, lr=self.hyperParams.learningRate) batchBlock = len(trainExamples) // self.hyperParams.batch for iter in range(self.hyperParams.maxIter): print('###Iteration' + str(iter) + "###") random.shuffle(indexes) self.encoder.train() self.decoder.train() for updateIter in range(batchBlock): exams = [] start_pos = updateIter * self.hyperParams.batch end_pos = (updateIter + 1) * self.hyperParams.batch for idx in range(start_pos, end_pos): exams.append(trainExamples[indexes[idx]]) postFeats, responseFeats = self.getBatchFeatLabel(exams) encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() encoder_hidden = self.encoder.init_hidden( self.hyperParams.batch) encoder_output, encoder_hidden = self.encoder( postFeats, encoder_hidden) decoder_hidden = self.decoder.initHidden( self.hyperParams.batch) response_len = responseFeats.size()[1] last_word = torch.autograd.Variable( torch.LongTensor(1, self.hyperParams.batch)) for idx in range(self.hyperParams.batch): last_word.data[0][idx] = self.hyperParams.responseStartID loss = 0 for idx in range(response_len): decoder_output, decoder_hidden = self.decoder( encoder_hidden, decoder_hidden, last_word) loss += torch.nn.functional.nll_loss( decoder_output, responseFeats.permute(1, 0)[idx]) for idy in range(self.hyperParams.batch): last_word.data[0][idy] = self.getMaxIndex( decoder_output[idy]) loss.backward() print('Current: ', updateIter + 1, ", Cost:", loss.data[0]) encoder_optimizer.step() decoder_optimizer.step() if iter + 1 % 10 == 0: self.encoder.eval() self.decoder.eval() print("Save model .....") self.saveModel(model_file + str(iter)) print("Model model ok")
from read import Reader from models import model from keras.preprocessing.image import ImageDataGenerator from generator import BSONIterator r = Reader(full_prep=True) # need the generator train_bson_file = open(r.train_bson_path, "rb") # num_classes = len(train_offsets_df["category_id"].unique()) num_classes = 5270 print("Total number of categories in train {}".format(num_classes)) num_train_images = len(r.train_images_df) num_val_images = len(r.val_images_df) batch_size = 16 # Tip: use ImageDataGenerator for data augmentation and preprocessing. train_datagen = ImageDataGenerator() train_gen = BSONIterator(train_bson_file, r.train_images_df, r.train_offsets_df, num_classes, train_datagen, batch_size=batch_size, shuffle=True) val_datagen = ImageDataGenerator() val_gen = BSONIterator(train_bson_file, r.val_images_df,
def train(self, train_file, dev_file, test_file, model_file): self.hyperParams.show() torch.set_num_threads(self.hyperParams.thread) reader = Reader() trainInsts = reader.readInstances(train_file, self.hyperParams.maxInstance) devInsts = reader.readInstances(dev_file, self.hyperParams.maxInstance) testInsts = reader.readInstances(test_file, self.hyperParams.maxInstance) print("Training Instance: ", len(trainInsts)) print("Dev Instance: ", len(devInsts)) print("Test Instance: ", len(testInsts)) self.createAlphabet(trainInsts, devInsts, testInsts) trainExamples = self.instance2Example(trainInsts) devExamples = self.instance2Example(devInsts) testExamples = self.instance2Example(testInsts) self.encoder = Encoder(self.hyperParams) self.decoder = Decoder(self.hyperParams) indexes = [] for idx in range(len(trainExamples)): indexes.append(idx) encoder_parameters = filter(lambda p: p.requires_grad, self.encoder.parameters()) encoder_optimizer = torch.optim.Adam(encoder_parameters, lr = self.hyperParams.learningRate) decoder_parameters = filter(lambda p: p.requires_grad, self.decoder.parameters()) decoder_optimizer = torch.optim.Adam(decoder_parameters, lr = self.hyperParams.learningRate) train_num = len(trainExamples) batchBlock = train_num // self.hyperParams.batch if train_num % self.hyperParams.batch != 0: batchBlock += 1 for iter in range(self.hyperParams.maxIter): print('###Iteration' + str(iter) + "###") random.shuffle(indexes) self.encoder.train() self.decoder.train() train_eval = Eval() for updateIter in range(batchBlock): exams = [] start_pos = updateIter * self.hyperParams.batch end_pos = (updateIter + 1) * self.hyperParams.batch if end_pos > train_num: end_pos = train_num for idx in range(start_pos, end_pos): exams.append(trainExamples[indexes[idx]]) batchCharFeats, batchBiCharFeats, batchLabel, batch, maxSentSize = self.getBatchFeatLabel(exams) encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() encoderHidden = self.encoder.init_hidden(batch) encoderOutput, encoderHidden = self.encoder(batchCharFeats, batchBiCharFeats, encoderHidden) loss = 0 decoderOutput = self.decoder(batch, encoderOutput, exams, bTrain=True) for idx in range(batch): exam = exams[idx] for idy in range(exam.size): labelID = getMaxIndex(self.hyperParams, decoderOutput[idx * maxSentSize + idy]) if labelID == exam.labelIndexes[idy]: train_eval.correct_num += 1 train_eval.gold_num += 1 loss += torch.nn.functional.nll_loss(decoderOutput, batchLabel) loss.backward() if (updateIter + 1) % self.hyperParams.verboseIter == 0: print('Current: ', updateIter + 1, ", Cost:", loss.data[0], ", ACC:", train_eval.acc()) encoder_optimizer.step() decoder_optimizer.step() self.encoder.eval() self.decoder.eval() dev_eval = Eval() for idx in range(len(devExamples)): exam = devExamples[idx] predict_labels = self.predict(exam) devInsts[idx].evalPRF(predict_labels, dev_eval) p, r, f = dev_eval.getFscore() print("precision: ", p, ", recall: ", r, ", fscore: ", f) test_eval = Eval() for idx in range(len(testExamples)): exam = testExamples[idx] predict_labels = self.predict(exam) testInsts[idx].evalPRF(predict_labels, test_eval) p, r, f = test_eval.getFscore() print("precision: ", p, ", recall: ", r, ", fscore: ", f) '''
from __future__ import absolute_import, division, print_function from builtins import super, range, zip, round, map import logging from read import Reader from ditto.store import Store logger = logging.getLogger(__name__) m = Store() reader = Reader() reader.parse(m, "test_input.csv") for i in m.models: logger.debug(i) for obj_name in m.model_names: logger.debug(obj_name) for i in m.model_names["load1"].traits(): # logger.debug(i,type(m.model_names['load1'].traits()[i])) class_name = (str(type( m.model_names["load1"].traits()[i])).strip("<>'").split(".")[-1]) if class_name == "List": logger.debug(m.model_names["load1"].traits()[i]._trait.klass)
def createReader(): reader = Reader("reader", fileName, Tidy.queue) reader.start()
def train(self, train_file, dev_file, test_file): self.hyperParams.show() torch.set_num_threads(self.hyperParams.thread) reader = Reader() trainInsts = reader.readInstances(train_file, self.hyperParams.maxInstance) devInsts = reader.readInstances(dev_file, self.hyperParams.maxInstance) testInsts = reader.readInstances(test_file, self.hyperParams.maxInstance) print("Training Instance: ", len(trainInsts)) print("Dev Instance: ", len(devInsts)) print("Test Instance: ", len(testInsts)) self.createAlphabet(trainInsts, devInsts, testInsts) trainExamples = self.instance2Example(trainInsts) devExamples = self.instance2Example(devInsts) testExamples = self.instance2Example(testInsts) self.model = RNNLabeler(self.hyperParams) parameters = filter(lambda p: p.requires_grad, self.model.parameters()) optimizer = torch.optim.Adam(parameters, lr=self.hyperParams.learningRate) indexes = [] for idx in range(len(trainExamples)): indexes.append(idx) batchBlock = len(trainExamples) // self.hyperParams.batch for iter in range(self.hyperParams.maxIter): print('###Iteration' + str(iter) + "###") random.shuffle(indexes) self.model.train() for updateIter in range(batchBlock): #self.model.zero_grad() optimizer.zero_grad() exams = [] start_pos = updateIter * self.hyperParams.batch end_pos = (updateIter + 1) * self.hyperParams.batch for idx in range(start_pos, end_pos): exams.append(trainExamples[indexes[idx]]) feats, labels = self.getBatchFeatLabel(exams) output = self.model(feats, self.hyperParams.batch) loss = torch.nn.functional.cross_entropy(output, labels) loss.backward() optimizer.step() if (updateIter + 1) % self.hyperParams.verboseIter == 0: print('current: ', idx + 1, ", cost:", loss.data[0]) self.model.eval() eval_dev = Eval() for idx in range(len(devExamples)): predictLabel = self.predict(devExamples[idx]) devInsts[idx].evalACC(predictLabel, eval_dev) print("dev: ", end='') eval_dev.getACC() eval_test = Eval() for idx in range(len(testExamples)): predictLabel = self.predict(testExamples[idx]) testInsts[idx].evalACC(predictLabel, eval_test) print("test: ", end='') eval_test.getACC()
def main(): import argparse parser = argparse.ArgumentParser( description='Loads and evaluates parable files and expressions.') parser.add_argument('-l', '--load', type=str, dest='load_files', nargs='+', metavar='FILES', help='Load one or more files.') parser.add_argument('-t', '--test', type=str, dest='test_files', nargs='+', metavar='FILES', help='run one or more test files.') parser.add_argument('-e', '--eval', type=str, dest='eval_expression', metavar='EXPR', help='Evaluate the given expression.') parser.add_argument('-m', '--macro-expand', type=str, dest='expand_expression', metavar='EXPR', help='Macro-expand the given expression.') args = parser.parse_args() count = len( list(1 for i in ('eval_expression', 'expand_expression', 'test_files') if getattr(args, i) != None)) if count > 1: print 'Only one of -t, -m and -e can be used.' exit(1) elif count == 0: print 'Either -t, -m or -e must be used.' exit(1) env = {} for lib in args.load_files: with open(lib) as f: try: env.update(load(f, lib, env)) except (LoadError, LoadWarning) as e: print_exception(e) exit(2) if args.eval_expression: try: form = Reader(args.eval_expression, '<string>').read() result = eval_form(form, env) except ReadError as e: print_exception(e) exit(2) if isinstance(result, Error): print_error(result) exit(2) print 'Evaluation Result:', pprint(result) elif args.expand_expression: try: form = Reader(args.expand_expression, '<string>').read() result = macro_expand(form, env) except ReadError as e: print_exception(e) exit(2) if isinstance(result, Error): print_error(result) exit(2) print 'Macro Expansion Result:', result[1], pprint(result[0]) elif args.test_files: passed = failed = error = 0 for test_file in args.test_files: try: with open(test_file) as f: p, f, e = run_tests(f, test_file, env) passed += p failed += f error += e except ReadError as e: print_exception(e) exit(2) print 'Total tests:', passed + failed + error print ' Successful:', passed print ' Failed:', failed print ' Error:', error if failed != 0 or error != 0: exit(3)
def train(self, path_train, path_dev, path_test, path_PRF, path_model, path_bestModel): #读取训练集、测试集、开发集 并 建立字典 reader = Reader() traininsts = reader.readfiles(path_train) devinsts = reader.readfiles(path_dev) testinsts = reader.readfiles(path_test) print('Training Instance:', len(traininsts)) print('Dev Instance:', len(devinsts)) print('Test Instance:', len(testinsts)) self.create_alphabet(traininsts) #字符串转成ID trainExamples = self.change(traininsts) # e_train devExamples = self.change(devinsts) testExamples = self.change(testinsts) self.model = BiLstm(self.hyperpara) # BiLstm模型 # 加载模型 if self.hyperpara.loadModel == 1 and\ self.hyperpara.load_pattern == 1: try: self.model.load_state_dict(torch.load(path_bestModel)) except Exception: print('模型参数不匹配') else: pass elif self.hyperpara.loadModel == 1 and\ self.hyperpara.load_pattern == 0 : try: self.model = torch.load(path_bestModel) except Exception: print('模型参数不匹配') else: pass optimizer = torch.optim.Adam(self.model.parameters(), lr=self.hyperpara.lr) # 优化器 total_num = len(trainExamples) for epoch in range(1, self.hyperpara.epochs): print("————————第{}轮迭代,共{}轮————————".format(epoch, self.hyperpara.epochs)) total = 0 random.shuffle(trainExamples) # 随机打乱训练集顺序,能有效提高准确率 try: part = total_num // self.hyperpara.batch if total_num % self.hyperpara.batch != 0: part += 1 except ZeroDivisionError: print('batch数为0,除0错误') else: #开始训练 self.model.train() for idx in range(part): begin = idx * self.hyperpara.batch end = (idx + 1) * self.hyperpara.batch if end > total_num: end = total_num batch_list = [] # batch_list_len = [] for idy in range(begin, end): batch_list.append(trainExamples[idy]) # batch_list_len.append(len(trainExamples[idy].wordIndexs)) optimizer.zero_grad() x, y = self.variable(batch_list) lstm_feats = self.model(x) loss = F.cross_entropy(lstm_feats, y) total += 1 loss.backward() optimizer.step() print('current:', total, ", loss:", loss.data[0]) #开发集测试效果 eval_dev = Eval() eval_dev_A = Eval() eval_dev_E = Eval() for idx in range(len(devExamples)): dev_list = [] dev_list.append(devExamples[idx]) x, y = self.variable(dev_list) lstm_feats = self.model(x) predict = self.getMaxIndex(lstm_feats) predictLabels = [] for idy in range(len(predict)): predictLabels.append( self.label_AlphaBet.list[predict[idy]]) gold_ent, predict_ent = devinsts[idx].evalPRF( predictLabels, eval_dev) gold_ent_A, gold_ent_E, predict_ent_A, predict_ent_E = devinsts[ idx].getAE(gold_ent, predict_ent) devinsts[idx].evalAEPRF(gold_ent_A, predict_ent_A, eval_dev_A) devinsts[idx].evalAEPRF(gold_ent_E, predict_ent_E, eval_dev_E) line = '' print('Dev: ', end="") d_precision, d_recall, d_fscore = eval_dev.getFscore() line = line + str(epoch) + '.dev:\nP:' + ( '%.2f' % (d_precision * 100)) + ' R:' + ( '%.2f' % (d_recall * 100)) + ' F:' + ('%.2f' % (d_fscore * 100)) + '\n' print("precision:", d_precision * 100, ", recall: ", d_recall * 100, ", fscore:", d_fscore * 100) d_precision, d_recall, d_fscore = eval_dev_A.getFscore() line = line + 'A_P:' + ('%.2f' % (d_precision * 100)) + ' A_R:' + ( '%.2f' % (d_recall * 100)) + ' A_F:' + ('%.2f' % (d_fscore * 100)) + '\n' print("precision:", d_precision * 100, ", recall: ", d_recall * 100, ", fscore:", d_fscore * 100) d_precision, d_recall, d_fscore = eval_dev_E.getFscore() line = line + 'E_P:' + ('%.2f' % (d_precision * 100)) + ' E_R:' + ( '%.2f' % (d_recall * 100)) + ' E_F:' + ('%.2f' % (d_fscore * 100)) + '\n' print("precision:", d_precision * 100, ", recall: ", d_recall * 100, ", fscore:", d_fscore * 100) #测试集测试效果 eval_test = Eval() eval_test_A = Eval() eval_test_E = Eval() for idx in range(len(testExamples)): test_list = [] test_list.append(testExamples[idx]) x, y = self.variable(test_list) lstm_feats = self.model(x) predict = self.getMaxIndex(lstm_feats) predictLabels = [] for idy in range(len(predict)): predictLabels.append( self.label_AlphaBet.list[predict[idy]]) gold_ent, predict_ent = testinsts[idx].evalPRF( predictLabels, eval_test) gold_ent_A, gold_ent_E, predict_ent_A, predict_ent_E = testinsts[ idx].getAE(gold_ent, predict_ent) testinsts[idx].evalAEPRF(gold_ent_A, predict_ent_A, eval_test_A) testinsts[idx].evalAEPRF(gold_ent_E, predict_ent_E, eval_test_E) print('Test: ', end="") t_precision, t_recall, t_fscore = eval_test.getFscore() line = line + 'test:\nP:' + ( '%.2f' % (t_precision * 100)) + ' R:' + ( '%.2f' % (t_recall * 100)) + ' F:' + ('%.2f' % (t_fscore * 100)) + '\n' print("precision:", t_precision * 100, ", recall: ", t_recall * 100, ", fscore:", t_fscore * 100) t_precision, t_recall, t_fscore = eval_test_A.getFscore() line = line + 'A_P:' + ('%.2f' % (t_precision * 100)) + ' A_R:' + ( '%.2f' % (t_recall * 100)) + ' A_F:' + ('%.2f' % (t_fscore * 100)) + '\n' print("precision:", t_precision * 100, ", recall: ", t_recall * 100, ", fscore:", t_fscore * 100) t_precision, t_recall, t_fscore = eval_test_E.getFscore() line = line + 'E_P:' + ('%.2f' % (t_precision * 100)) + ' E_R:' + ( '%.2f' % (t_recall * 100)) + ' E_F:' + ('%.2f' % (t_fscore * 100)) + '\n' print("precision:", t_precision * 100, ", recall: ", t_recall * 100, ", fscore:", t_fscore * 100) #保存模型 if self.hyperpara.save_pattern == 0: torch.save(self.model.state_dict(), path_model + str(epoch) + '.pkl') elif self.hyperpara.save_pattern == 1: torch.save(self.model, path_model + str(epoch) + '.pkl') try: file = open(path_PRF, 'a+', encoding='utf-8') except IOError: print('文件读取异常') else: file.write(line) file.close()
def __init__(self, config): super(OnlineDistributor,self).__init__(config) self._reader = Reader(config)
class OnlineDistributor(Distributor): ''' Distributes mails in "real-time". There is one public method, update. When it is called, the server is polled. If new mails have arrived, they are processed and resent to the members of the list. Afterward, the mails are deleted, but only if the resend process finished successfully. If the subject is in a special format, instead of resending the mail, a DownloadMessage is generated and sent back. ''' def __init__(self, config): super(OnlineDistributor,self).__init__(config) self._reader = Reader(config) def update(self): ''' Update the distribution list. Every new message in the server is processed and resent to the members of the list. If the resend is successful the new messages are deleted. ''' logger.debug('update is called') try: self._reader.connect() except Exception as e: logger.info('connect failed with the exception: %s', e) return False ids = self._reader.new_messages() for id in ids: msg = self._reader.get(id) if self._isvalid(msg): self._process(msg) self._reader.delete(id) self._reader.disconnect() logger.debug('update is finished') return len(ids) != 0 def _process(self, msg): ''' Redirects to the correct action based on the subject of the message. ''' subject = msg['Subject'] if subject.lower().startswith('get'): logger.debug('calling _download_and_send') self._download_and_send(subject, msg) else: logger.debug('calling _resend') self._resend(msg) def _download_and_send(self, subject, msg): ''' Creates a new DownloadMessage based on the subject and sends it back to the sender. The format of the subject must be: GET 'url'. ''' id = self._store.archive(msg) sender = self._find_sender_email(msg) url = self._get_download_url(subject) if url is not None: logger.info('Downloading message for %s with url %s', sender, url) self._sender.send(DownloadMessage(url), sender) self._store.mark_as_sent(id) def _get_download_url(self, subject): ''' Returns the url to download from the subject of the message, or None if no url could be found. ''' subject = subject.lower().strip(' ') parts = re.split(r'\s+', subject) if len(parts) != 2: logger.error('_get_download_url, %s has no valid url', subject) return None return parts[1] def _resend(self, msg): ''' Sends a message to the appropriate members of the list after processing it. ''' self._edit_msg(msg) id = self._store.archive(msg) sender = self._find_sender_email(msg) self._sender.send(msg, *self._mgr.active_members(sender)) self._store.digest(id, *self._mgr.digest_members(sender)) self._store.mark_as_sent(id) def _edit_msg(self, msg): ''' Processes a message and returns it. The following steps are taken for each part of the message that can be interpreted as text: - A header and a footer are added, both using the encoding of the payload. - The payload has all the email hosts removed. The parts are separated with newlines, which depend on whether the message is plain text or other (like HTML). ''' header = self._create_header(msg) footer = self._create_footer(msg) for editable in self._find_actual_text(msg): nl = u'\n' if editable.get_content_subtype() == 'plain' else u'<br>' editable.set_payload((nl * 2).join([ nl.join(header), EMAIL.sub(anonymize_email, editable.get_clean_payload(self._cfg['forbidden_words'])), nl.join(footer)])) def _choose_intro(self): '''Randomly chooses an introduction text from the configuration.''' return random.choice(self._cfg['introductions']) def _create_footer(self, msg): ''' Creates a footer for the message, returned as a list of strings. The footer contains the name of the list, a randomly chosen quote and the program id. ''' return [FOOTER_DIVIDER, self._cfg['real_name'], random.choice(self._cfg['quotes']), self._powered_by()] def _powered_by(self): ''' Returns the program id, which consists of the name, version, and description of this sw. ''' name = self._manifest['name'] version = self._manifest['version'] description = self._manifest['description'] return u'Powered by %s %s, %s' % (name, version, description)