import runner import mark from nltk.tokenize import sent_tokenize model_prefix = 'model/current' encdec, opt, conf = runner.load(model_prefix) def split(source_text): return sent_tokenize(source_text) def predict(source_text): sources = split(source_text) sentences = [] for source in sources: batch, hyp = runner.predict(conf, encdec, source) x = conf.corpus.tokenize(source, cleanup_tag=False) t, y = hyp[0] annotations = mark.decoded_vec_to_hash(y) result = [] for i in range(len(x)): result.append({ "source": x[i], "annotation": annotations[i] }) sentences.append(result) return sentences
this file collects and runs ``importer``'s testsuite. :author: Sam Gammon <*****@*****.**> :license: This software follows the MIT (OSI-approved) license for open source software. A truncated version is included here; for full licensing details, see ``LICENSE.md`` in the root directory of the project. Copyright (c) 2013, Keen IO The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ''' # testrunner import runner ## Run the testsuite! :) runner.fix_path() # fix sys.path runner.run(runner.load())
sys.path.append('lib') import corpus import config import runner import mark conf = config.parse_args() corpus = conf.open_corpus() if conf.mode() == 'console': embed() elif conf.mode() == 'train': train_scores, test_scores = runner.train(conf) runner.report_bleu_graph(train_scores, test_scores) elif conf.mode() == 'restore_console': encdec, opt, conf = runner.load(conf.load_prefix()) embed() # usage: --------------------------------------------------------------- # source = "this is a pen." # batch, hyp = runner.predict(conf, encdec, source) # x = batch.data_at(0) # t, y = hyp[0] # mark.decoded_vec_to_str(y) # # In [24]: corpus.tokenize(source, cleanup_tag=False) # Out[24]: [u'<bos>', u'this', u'is', u'a', u'pen', u'.', u'<eos>'] # # In [20]: corpus.ids_to_tokens(x) # Out[20]: [u'<bos>', u'this', u'is', u'a', u'<unk>', u'.', u'<eos>'] # # In [21]: mark.decoded_vec_to_str(y)
:author: Sam Gammon <*****@*****.**> :license: This software follows the MIT (OSI-approved) license for open source software. A truncated version is included here; for full licensing details, see ``LICENSE.md`` in the root directory of the project. Copyright (c) 2013, Keen IO The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ''' # testrunner import runner ## Run the testsuite! :) runner.fix_path() # fix sys.path runner.run(runner.load())
import runner import mark from nltk.tokenize import sent_tokenize model_prefix = 'model/current' encdec, opt, conf = runner.load(model_prefix) def split(source_text): return sent_tokenize(source_text) def predict(source_text): sources = split(source_text) sentences = [] for source in sources: batch, hyp = runner.predict(conf, encdec, source) x = conf.corpus.tokenize(source, cleanup_tag=False) t, y = hyp[0] annotations = mark.decoded_vec_to_hash(y) result = [] for i in range(len(x)): result.append({"source": x[i], "annotation": annotations[i]}) sentences.append(result) return sentences