def load_static_model_class(config): abs_dir = config['config_abs_dir'] sys.path.append(abs_dir) from static_model import StaticModel static_model = StaticModel(config) return static_model
# -*- coding: utf-8 -*- from static_model import StaticModel model = StaticModel() for i, line in enumerate(open("../sensitive.txt")): if i > 5000: break line = unicode(line, "utf-8").strip() text = line #text = model.predict_email(text) text = model.predict_name(text) #text = model.predict_name_ann(text) #text = model.predict_street(text) #text = model.predict_number(text) if line != text: print "\n#### Before ####" print line.encode("utf-8") print "\n#### After ####" print text.encode("utf-8") print "\n=====\n\n"
predict_sentences = predict_sentences[0:len(test_pairs)] return predict_sentences if __name__ == '__main__': ini_char = '</i>' unk_char = '<unk>' t0 = time.time() print ("loading word2vec...") ctable = W2vCharacterTable(opts.w2v_file,ini_char,unk_char) print(" dict size:",ctable.getDictSize()) print (" emb size:",ctable.getEmbSize()) print (time.time()-t0) print ("") if opts.type_model: model = StaticModel(ctable.getDictSize(),ctable.getEmbSize(),opts.hidden_size,opts.batch_size,opts.dropout, opts.max_senten_len,opts.teach_forcing).cuda() else: model = DynamicModel(ctable.getDictSize(),ctable.getEmbSize(),opts.hidden_size,opts.batch_size,opts.dropout, opts.max_senten_len,opts.teach_forcing).cuda() if opts.weights != None: print ("load model parameters...") model.load_state_dict(torch.load(opts.weights)) else: print ("No model parameters!") exit() test_contexts,test_replys = readingTestCorpus(opts.test_file) print ("len(test_contexts):",len(test_contexts)) print ("len(test_replys):",len(test_replys))
t0 = time.time() print("loading word2vec...") ctable = W2vCharacterTable(opts.w2v_file, ini_char, unk_char) print(" dict size:", ctable.getDictSize()) print(" emb size:", ctable.getEmbSize()) print("") ctable, corpus_pairs = readingData(ctable, opts.train_file, opts.max_senten_len, opts.max_context_size) print(time.time() - t0) print("") if opts.type_model: model = StaticModel(ctable.getDictSize(), ctable.getEmbSize(), opts.hidden_size, opts.batch_size, opts.dropout, opts.max_senten_len, opts.teach_forcing).cuda() else: model = DynamicModel(ctable.getDictSize(), ctable.getEmbSize(), opts.hidden_size, opts.batch_size, opts.dropout, opts.max_senten_len, opts.teach_forcing).cuda() if opts.weights != None: print("load weights...") model.load_state_dict(torch.load(opts.weights)) else: model.init_parameters(ctable.getEmbMatrix()) model_optimizer = optim.Adam(model.parameters(), lr=opts.lr, weight_decay=opts.weight_decay)
def test_name(self): model = StaticModel() #prediction = model.predict_name(u"hejsan david, mvh ( Henrik).") #self.assertEqual(prediction,u"hejsan <NAME>, mvh ( <NAME>).") print model.predict_name(u"Mitt namn är Kalle, men du kan kalla mig SAS.")
# -*- coding: utf-8 -*- import os import sys import json HERE = os.path.dirname( os.path.realpath(__file__)) # add dependencies for AWS lambda to detect sys.path.append(os.path.join(HERE, "libraries")) from static_model import StaticModel model = StaticModel() def to_unicode(s): unicode(s, "utf-8") if isinstance(s, str) else s def gateway_response(code, body): return {"statusCode": code, "body": json.dumps(body)} def predict(event, context): try: payload = json.loads(event["body"]) messages = payload["messages"] except Exception as e: return gateway_response(503, { "error": "The payload needs to be a valid JSON. Message: " + str(e) })