def RewriteContext(task_context): context = task_spec_pb2.TaskSpec() with gfile.FastGFile(task_context, 'rb') as fin: text_format.Merge(fin.read(), context) for resource in context.input: for part in resource.part: if part.file_pattern != '-': part.file_pattern = os.path.join(FLAGS.resource_dir, part.file_pattern) for resource2 in context.input: if resource2.name == FLAGS.output: for part2 in resource2.part: out_pat = part2.file_pattern contexts = [] for resource in context.input: if resource.name == FLAGS.input: for part in resource.part: if part.file_pattern != '-': for name in glob.glob(part.file_pattern): part.file_pattern = name for resource2 in context.input: if resource2.name == FLAGS.output: for part2 in resource2.part: part2.file_pattern = os.path.splitext(name)[0]+'.out'+out_pat with tempfile.NamedTemporaryFile(delete=False) as fout: fout.write(str(context)) contexts.append(fout.name) return contexts
def WriteContext(self, corpus_format): context = task_spec_pb2.TaskSpec() self.AddInput('documents', self.corpus_file, corpus_format, context) for name in ('word-map', 'lcword-map', 'tag-map', 'category-map', 'label-map', 'prefix-table', 'suffix-table', 'tag-to-category'): self.AddInput(name, os.path.join(FLAGS.test_tmpdir, name), '', context) logging.info('Writing context to: %s', self.context_file) with open(self.context_file, 'w') as f: f.write(str(context))
def RewriteContext(task_context): context = task_spec_pb2.TaskSpec() with gfile.FastGFile(task_context) as fin: text_format.Merge(fin.read(), context) for resource in context.input: for part in resource.part: if part.file_pattern != '-': part.file_pattern = os.path.join(FLAGS.resource_dir, part.file_pattern) with tempfile.NamedTemporaryFile(delete=False) as fout: fout.write(str(context)) return fout.name
def RewriteContext(): context = task_spec_pb2.TaskSpec() with gfile.FastGFile(FLAGS.task_context, 'rb') as fin: text_format.Merge(fin.read(), context) for resource in context.input: if resource.creator == StageName(): del resource.part[:] part = resource.part.add() part.file_pattern = os.path.join(OutputPath(resource.name)) with gfile.FastGFile(OutputPath('context'), 'w') as fout: fout.write(str(context))
def create_lexicon_context(path): """Construct a SyntaxNet TaskContext file for standard lexical resources.""" context = task_spec_pb2.TaskSpec() for name in [ 'word-map', 'tag-map', 'tag-to-category', 'lcword-map', 'category-map', 'char-map', 'char-ngram-map', 'label-map', 'prefix-table', 'suffix-table' ]: context.input.add(name=name).part.add( file_pattern=os.path.join(path, name)) return context
def WriteContext(self, corpus_format): context = task_spec_pb2.TaskSpec() self.AddParameter('brain_parser_embedding_names', 'words;tags', context) self.AddParameter('brain_parser_features', 'input.token.word;input.tag', context) self.AddInput('documents', self.corpus_file, corpus_format, context) for name in ('word-map', 'lcword-map', 'tag-map', 'category-map', 'label-map', 'prefix-table', 'suffix-table', 'tag-to-category', 'char-map', 'char-ngram-map'): self.AddInput(name, os.path.join(FLAGS.test_tmpdir, name), '', context) logging.info('Writing context to: %s', self.context_file) with open(self.context_file, 'w') as f: f.write(str(context))
def RewriteContext(self, task_context, in_corpus_name): context = task_spec_pb2.TaskSpec() with gfile.FastGFile(task_context, 'rb') as fin: text_format.Merge(fin.read(), context) tf_in = tempfile.NamedTemporaryFile(delete=False) for resource in context.input: for part in resource.part: if part.file_pattern != '-': part.file_pattern = os.path.join(self.model_dir, part.file_pattern) if resource.name == in_corpus_name: for part in resource.part: if part.file_pattern == '-': part.file_pattern = tf_in.name fout = tempfile.NamedTemporaryFile(delete=False) fout.write(str(context)) return fout.name, tf_in.name
def testCreateLexiconContext(self): expected_context = task_spec_pb2.TaskSpec() text_format.Parse(_EXPECTED_CONTEXT, expected_context) self.assertProtoEquals( lexicon.create_lexicon_context('/tmp'), expected_context)