def __init__(self, args): self.args = args self.state = prototype_state() with open(self.args.state) as src: self.state.update(cPickle.load(src)) self.state.update(eval("dict({})".format(self.args.changes))) logging.basicConfig(level=getattr(logging, self.state['level']), format="%(asctime)s: %(name)s: %(levelname)s: %(message)s") rng = numpy.random.RandomState(self.state['seed']) enc_dec = RNNEncoderDecoder(self.state, rng, skip_init=True) enc_dec.build() self.lm_model = enc_dec.create_lm_model() self.lm_model.load(self.args.model_path) self.indx_word = cPickle.load(open(self.state['word_indx'], 'rb')) self.sampler = None self.beam_search = None if self.args.beam_search: self.beam_search = BeamSearch(enc_dec) self.beam_search.compile() else: self.sampler = enc_dec.create_sampler(many_samples=True) self.idict_src = cPickle.load(open(self.state['indx_word'], 'r'))
def __init__(self, args): self.args = args self.state = prototype_state() with open(self.args.state) as src: self.state.update(cPickle.load(src)) self.state.update(eval("dict({})".format(self.args.changes))) logging.basicConfig(level=getattr(logging, self.state['level']), format="%(asctime)s: %(name)s: %(levelname)s: %(message)s") rng = numpy.random.RandomState(self.state['seed']) enc_dec = RNNEncoderDecoder(self.state, rng, skip_init=True) enc_dec.build() self.lm_model = enc_dec.create_lm_model() self.lm_model.load(self.args.model_path) self.indx_word = cPickle.load(open(self.state['word_indx'], 'rb')) self.sampler = None self.beam_search = None if self.args.beam_search: self.beam_search = BeamSearch(enc_dec) self.beam_search.compile() else: self.sampler = enc_dec.create_sampler(many_samples=True) self.idict_src = cPickle.load(open(self.state['indx_word'], 'r'))
def main(): args = parse_args() state = prototype_phrase_state() with open(args.state) as src: state.update(cPickle.load(src)) state.update(eval("dict({})".format(args.changes))) logging.basicConfig( level=getattr(logging, state['level']), format="%(asctime)s: %(name)s: %(levelname)s: %(message)s") server_address = ('', args.port) httpd = ThreadedHTTPServer(server_address, MTReqHandler) #httpd = BaseHTTPServer.HTTPServer(server_address, MTReqHandler) rng = numpy.random.RandomState(state['seed']) enc_dec = RNNEncoderDecoder(state, rng, skip_init=True) enc_dec.build() lm_model = enc_dec.create_lm_model() lm_model.load(args.model_path) indx_word = cPickle.load(open(state['word_indx'], 'rb')) sampler = None beam_search = None if args.beam_search: beam_search = BeamSearch(enc_dec) beam_search.compile() else: sampler = enc_dec.create_sampler(many_samples=True) idict_src = cPickle.load(open(state['indx_word'], 'r')) tokenizer_cmd = [os.getcwd() + '/tokenizer.perl', '-l', 'en', '-q', '-'] detokenizer_cmd = [ os.getcwd() + '/detokenizer.perl', '-l', 'fr', '-q', '-' ] sampler = Sampler(state, lm_model, indx_word, idict_src, beam_search=beam_search, tokenizer_cmd=tokenizer_cmd, detokenizer_cmd=detokenizer_cmd) httpd.sampler = sampler print 'Server starting..' httpd.serve_forever() '''
def main(): args = parse_args() state = prototype_state() with open(args.state) as src: state.update(cPickle.load(src)) state.update(eval("dict({})".format(args.changes))) logging.basicConfig( level=getattr(logging, state["level"]), format="%(asctime)s: %(name)s: %(levelname)s: %(message)s" ) server_address = ("", args.port) httpd = BaseHTTPServer.HTTPServer(server_address, MTReqHandler) rng = numpy.random.RandomState(state["seed"]) enc_dec = RNNEncoderDecoder(state, rng, skip_init=True) enc_dec.build() lm_model = enc_dec.create_lm_model() lm_model.load(args.model_path) indx_word = cPickle.load(open(state["word_indx"], "rb")) sampler = None beam_search = None if args.beam_search: beam_search = BeamSearch(enc_dec) beam_search.compile() else: sampler = enc_dec.create_sampler(many_samples=True) idict_src = cPickle.load(open(state["indx_word"], "r")) tokenizer_cmd = [os.getcwd() + "/tokenizer.perl", "-l", "en", "-q", "-"] detokenizer_cmd = [os.getcwd() + "/detokenizer.perl", "-l", "fr", "-q", "-"] sampler = Sampler( state, lm_model, indx_word, idict_src, beam_search=beam_search, tokenizer_cmd=tokenizer_cmd, detokenizer_cmd=detokenizer_cmd, ) httpd.sampler = sampler print "Server starting.." httpd.serve_forever() """
def main(): args = parse_args() state = prototype_state() with open(args.state) as src: state.update(cPickle.load(src)) state.update(eval("dict({})".format(args.changes))) logging.basicConfig(level=getattr(logging, state['level']), format="%(asctime)s: %(name)s: %(levelname)s: %(message)s") rng = numpy.random.RandomState(state['seed']) enc_dec = RNNEncoderDecoder(state, rng, skip_init=True) enc_dec.build() lm_model = enc_dec.create_lm_model() lm_model.load(args.model_path) indx_word = cPickle.load(open(state['word_indx'],'rb')) sampler = None beam_search = None if args.beam_search: beam_search = BeamSearch(enc_dec) beam_search.compile() else: sampler = enc_dec.create_sampler(many_samples=True) idict_src = cPickle.load(open(state['indx_word'],'r')) if args.source and args.trans: # Actually only beam search is currently supported here assert beam_search assert args.beam_size fsrc = open(args.source, 'r') ftrans = open(args.trans, 'w') start_time = time.time() n_samples = args.beam_size total_cost = 0.0 logging.debug("Beam size: {}".format(n_samples)) for i, line in enumerate(fsrc): seqin = line.strip() seq, parsed_in = parse_input(state, indx_word, seqin, idx2word=idict_src) if args.verbose: print "Parsed Input:", parsed_in trans, costs, _ = sample(lm_model, seq, n_samples, sampler=sampler, beam_search=beam_search, ignore_unk=args.ignore_unk, normalize=args.normalize) try: best = numpy.argmin(costs) print >>ftrans, trans[best] total_cost += costs[best] except: print >> ftrans, "FAIL" if args.verbose: print "Translation:", trans[best] if (i + 1) % 100 == 0: ftrans.flush() logger.debug("Current speed is {} per sentence". format((time.time() - start_time) / (i + 1))) print "Total cost of the translations: {}".format(total_cost) fsrc.close() ftrans.close() else: while True: try: seqin = raw_input('Input Sequence: ') n_samples = int(raw_input('How many samples? ')) alpha = None if not args.beam_search: alpha = float(raw_input('Inverse Temperature? ')) seq,parsed_in = parse_input(state, indx_word, seqin, idx2word=idict_src) print "Parsed Input:", parsed_in except Exception: print "Exception while parsing your input:" traceback.print_exc() continue sample(lm_model, seq, n_samples, sampler=sampler, beam_search=beam_search, ignore_unk=args.ignore_unk, normalize=args.normalize, alpha=alpha, verbose=True)
def main(): args = parse_args() state = prototype_state() with open(args.state) as src: state.update(cPickle.load(src)) state.update(eval("dict({})".format(args.changes))) logging.basicConfig( level=getattr(logging, state['level']), format="%(asctime)s: %(name)s: %(levelname)s: %(message)s") rng = numpy.random.RandomState(state['seed']) ########################################################### # by He Wei #enc_dec = RNNEncoderDecoder(state, rng, skip_init=True) enc_dec = RNNEncoderDecoder(state, rng, skip_init=True, compute_alignment=True) ########################################################### enc_dec.build() lm_model = enc_dec.create_lm_model() lm_model.load(args.model_path) indx_word = cPickle.load(open(state['word_indx'], 'rb')) sampler = None beam_search = None if args.beam_search: beam_search = BeamSearch(enc_dec) beam_search.compile() else: sampler = enc_dec.create_sampler(many_samples=True) idict_src = cPickle.load(open(state['indx_word'], 'r')) if args.source and args.trans: # Actually only beam search is currently supported here #assert beam_search #assert args.beam_size fsrc = open(args.source, 'r') ftrans = open(args.trans, 'w') start_time = time.time() #n_samples = args.beam_size total_cost = 0.0 #logging.debug("Beam size: {}".format(n_samples)) for i, line in enumerate(fsrc): seqin = line.strip() seq, parsed_in = parse_input(state, indx_word, seqin, idx2word=idict_src) if args.verbose: print "Parsed Input:", parsed_in if args.beam_search: trans, costs, _, aligns = sample(lm_model, seq, args.beam_size, sampler=sampler, beam_search=beam_search, ignore_unk=args.ignore_unk, normalize=args.normalize) else: trans, costs, _, aligns = sample(lm_model, seq, 1, sampler=sampler, beam_search=beam_search, ignore_unk=args.ignore_unk, normalize=args.normalize) best = numpy.argmin(costs) out_str = trans[best] align_str = [] if args.beam_search and args.alignment: for (idx, _a) in enumerate(aligns[best]): align_str.append("[%s]" % ' '.join(map(str, _a))) #align_str.append("[%d-%d:%f,%d-%d:%f]" % (idx, _a[0], _a[1], idx, _a[2], _a[3])) out_str += "\t" + ' '.join(align_str) if args.beam_search and args.nbest: nbest_trans = trans nbest_costs = costs nbest_trans = numpy.array(nbest_trans)[numpy.argsort( nbest_costs)] nbest_costs = numpy.array(sorted(nbest_costs)) nbest_str = ' ||| '.join( "%s | %f" % (t, c) for (t, c) in zip(nbest_trans, nbest_costs)) out_str += "\t" + nbest_str print >> ftrans, out_str if args.verbose: print "[Translation]%s\t[Align]%s" % (trans[best], ' '.join(align_str)) total_cost += costs[best] if (i + 1) % 100 == 0: ftrans.flush() logger.debug("Current speed is {} per sentence".format( (time.time() - start_time) / (i + 1))) print "Total cost of the translations: {}".format(total_cost) print "Total used time: {}".format(time.time() - start_time) fsrc.close() ftrans.close() else: while True: try: seqin = raw_input('Input Sequence: ') n_samples = int(raw_input('How many samples? ')) alpha = None if not args.beam_search: alpha = float(raw_input('Inverse Temperature? ')) seq, parsed_in = parse_input(state, indx_word, seqin, idx2word=idict_src) print "Parsed Input:", parsed_in except Exception: print "Exception while parsing your input:" traceback.print_exc() continue sample(lm_model, seq, n_samples, sampler=sampler, beam_search=beam_search, ignore_unk=args.ignore_unk, normalize=args.normalize, alpha=alpha, verbose=True)
def main(): args = parse_args() state = prototype_search_with_coverage_state() with open(args.state) as src: state.update(cPickle.load(src)) state.update(eval("dict({})".format(args.changes))) logging.basicConfig( level=getattr(logging, state['level']), format="%(asctime)s: %(name)s: %(levelname)s: %(message)s") rng = numpy.random.RandomState(state['seed']) enc_dec = RNNEncoderDecoder(state, rng, skip_init=True, compute_alignment=True) enc_dec.build() lm_model = enc_dec.create_lm_model() lm_model.load(args.model_path) indx_word = cPickle.load(open(state['word_indx'], 'rb')) sampler = None beam_search = None if args.beam_search: beam_search = BeamSearch(enc_dec) beam_search.compile() else: sampler = enc_dec.create_sampler(many_samples=True) idict_src = cPickle.load(open(state['indx_word'], 'r')) if args.source and args.trans: # Actually only beam search is currently supported here assert beam_search assert args.beam_size fsrc = open(args.source, 'r') ftrans = open(args.trans, 'w') start_time = time.time() n_samples = args.beam_size total_cost = 0.0 logging.debug("Beam size: {}".format(n_samples)) for i, line in enumerate(fsrc): seqin = line.strip() seq, parsed_in = parse_input(state, indx_word, seqin, idx2word=idict_src) if lm_model.maintain_coverage: if lm_model.use_linguistic_coverage and lm_model.use_fertility_model: trans, aligns, costs, coverages, fertility, _ = sample( lm_model, seq, n_samples, sampler=sampler, beam_search=beam_search, ignore_unk=args.ignore_unk, normalize=args.normalize) else: trans, aligns, costs, coverages, _ = sample( lm_model, seq, n_samples, sampler=sampler, beam_search=beam_search, ignore_unk=args.ignore_unk, normalize=args.normalize) else: trans, aligns, costs, _ = sample(lm_model, seq, n_samples, sampler=sampler, beam_search=beam_search, ignore_unk=args.ignore_unk, normalize=args.normalize) if args.verbose: print "Parsed Input:", parsed_in if len(trans) == 0: trans = ['Failed'] costs = [0.0] best = numpy.argmin(costs) print >> ftrans, trans[best] if args.verbose: print "Translation:", trans[best] print "Aligns:" # aligns shape: (target_len, source_len) # we reverse it to the shape (source_len, target_len) to show the matrix print numpy.array(aligns[best]).transpose().tolist() if lm_model.maintain_coverage: # since we filtered <eos> from trans[best], thus the index adds 1 coverage = coverages[best] print "Coverage:", words = parsed_in.split() for k in xrange(len(words)): print '%s/%.2f' % (words[k], coverage[k]), print '' if lm_model.use_linguistic_coverage and lm_model.use_fertility_model: print 'Fertility: ', for k in xrange(len(words)): print '%s/%.2f' % (words[k], fertility[k]), print '' print total_cost += costs[best] if (i + 1) % 100 == 0: ftrans.flush() logger.debug("Current speed is {} per sentence".format( (time.time() - start_time) / (i + 1))) print "Total cost of the translations: {}".format(total_cost) fsrc.close() ftrans.close() else: while True: try: seqin = raw_input('Input Sequence: ') n_samples = int(raw_input('How many samples? ')) alpha = None if not args.beam_search: alpha = float(raw_input('Inverse Temperature? ')) seq, parsed_in = parse_input(state, indx_word, seqin, idx2word=idict_src) print "Parsed Input:", parsed_in except Exception: print "Exception while parsing your input:" traceback.print_exc() continue sample(lm_model, seq, n_samples, sampler=sampler, beam_search=beam_search, ignore_unk=args.ignore_unk, normalize=args.normalize, alpha=alpha, verbose=True)
def main(): args = parse_args() state = prototype_search_with_coverage_state() with open(args.state) as src: state.update(cPickle.load(src)) state.update(eval("dict({})".format(args.changes))) logging.basicConfig(level=getattr(logging, state['level']), format="%(asctime)s: %(name)s: %(levelname)s: %(message)s") rng = numpy.random.RandomState(state['seed']) enc_dec = RNNEncoderDecoder(state, rng, skip_init=True, compute_alignment=True) enc_dec.build() lm_model = enc_dec.create_lm_model() lm_model.load(args.model_path) indx_word = cPickle.load(open(state['word_indx'],'rb')) sampler = None beam_search = None if args.beam_search: beam_search = BeamSearch(enc_dec) beam_search.compile() else: sampler = enc_dec.create_sampler(many_samples=True) idict_src = cPickle.load(open(state['indx_word'],'r')) if args.source and args.trans: # Actually only beam search is currently supported here assert beam_search assert args.beam_size fsrc = open(args.source, 'r') ftrans = open(args.trans, 'w') start_time = time.time() n_samples = args.beam_size total_cost = 0.0 logging.debug("Beam size: {}".format(n_samples)) for i, line in enumerate(fsrc): seqin = line.strip() seq, parsed_in = parse_input(state, indx_word, seqin, idx2word=idict_src) if lm_model.maintain_coverage: if lm_model.use_linguistic_coverage and lm_model.use_fertility_model: trans, aligns, costs, coverages, fertility, _ = sample(lm_model, seq, n_samples, sampler=sampler, beam_search=beam_search, ignore_unk=args.ignore_unk, normalize=args.normalize) else: trans, aligns, costs, coverages, _ = sample(lm_model, seq, n_samples, sampler=sampler, beam_search=beam_search, ignore_unk=args.ignore_unk, normalize=args.normalize) else: trans, aligns, costs, _ = sample(lm_model, seq, n_samples, sampler=sampler, beam_search=beam_search, ignore_unk=args.ignore_unk, normalize=args.normalize) if args.verbose: print "Parsed Input:", parsed_in if len(trans) == 0: trans = ['Failed'] costs = [0.0] best = numpy.argmin(costs) print >>ftrans, trans[best] if args.verbose: print "Translation:", trans[best] print "Aligns:" # aligns shape: (target_len, source_len) # we reverse it to the shape (source_len, target_len) to show the matrix print numpy.array(aligns[best]).transpose().tolist() if lm_model.maintain_coverage: # since we filtered <eos> from trans[best], thus the index adds 1 coverage = coverages[best] print "Coverage:", words = parsed_in.split() for k in xrange(len(words)): print '%s/%.2f'%(words[k], coverage[k]), print '' if lm_model.use_linguistic_coverage and lm_model.use_fertility_model: print 'Fertility: ', for k in xrange(len(words)): print '%s/%.2f'%(words[k], fertility[k]), print '' print total_cost += costs[best] if (i + 1) % 100 == 0: ftrans.flush() logger.debug("Current speed is {} per sentence". format((time.time() - start_time) / (i + 1))) print "Total cost of the translations: {}".format(total_cost) fsrc.close() ftrans.close() else: while True: try: seqin = raw_input('Input Sequence: ') n_samples = int(raw_input('How many samples? ')) alpha = None if not args.beam_search: alpha = float(raw_input('Inverse Temperature? ')) seq,parsed_in = parse_input(state, indx_word, seqin, idx2word=idict_src) print "Parsed Input:", parsed_in except Exception: print "Exception while parsing your input:" traceback.print_exc() continue sample(lm_model, seq, n_samples, sampler=sampler, beam_search=beam_search, ignore_unk=args.ignore_unk, normalize=args.normalize, alpha=alpha, verbose=True)
def main(): args = parse_args() state = prototype_state() with open(args.state) as src: state.update(cPickle.load(src)) state.update(eval("dict({})".format(args.changes))) logging.basicConfig(level=getattr(logging, state['level']), format="%(asctime)s: %(name)s: %(levelname)s: %(message)s") rng = numpy.random.RandomState(state['seed']) enc_dec = RNNEncoderDecoder(state, rng, skip_init=True) enc_dec.build() lm_model = enc_dec.create_lm_model() lm_model.load(args.model_path) indx_word = cPickle.load(open(state['word_indx'],'rb')) sampler = None beam_search = None if args.beam_search: beam_search = BeamSearch(enc_dec) beam_search.compile() else: sampler = enc_dec.create_sampler(many_samples=True) idict_src = cPickle.load(open(state['indx_word'],'r')) if args.source and args.trans: # Actually only beam search is currently supported here assert beam_search assert args.beam_size fsrc = open(args.source, 'r') ftrans = open(args.trans, 'w') start_time = time.time() n_samples = args.beam_size total_cost = 0.0 logging.debug("Beam size: {}".format(n_samples)) for i, line in enumerate(fsrc): seqin = line.strip() seq, parsed_in = parse_input(state, indx_word, seqin, idx2word=idict_src) if lm_model.maintain_coverage: trans, costs, coverages, _ = sample(lm_model, seq, n_samples, sampler=sampler, beam_search=beam_search, ignore_unk=args.ignore_unk, normalize=args.normalize) else: trans, costs, _ = sample(lm_model, seq, n_samples, sampler=sampler, beam_search=beam_search, ignore_unk=args.ignore_unk, normalize=args.normalize) if args.verbose: print "Parsed Input:", parsed_in if len(trans) == 0: trans = ['Failed'] costs = [0.0] best = numpy.argmin(costs) print >>ftrans, trans[best] if args.verbose: print "Translation:", trans[best] if lm_model.maintain_coverage: