def eval_mst(model_path, test_path, out_prefix, lowercase=True, tagger=None, force=False, result_strem=None): mp = MSTParser() # ------------------------------------------- # Use the output prefix to create some new files. # ------------------------------------------- eval_path = out_prefix + '_eval_tagged.txt' out_path = out_prefix + '_out_tagged.txt' # ------------------------------------------- # Rewrite the test file; POS tag the data # with the POS tags from our tagger, # and strip features. # ------------------------------------------- if not os.path.exists(eval_path) or force: LOG.log(1000, "") cc = ConllCorpus.read(test_path, lowercase=True) if lowercase: cc.lower() cc.strip_tags() cc.strip_feats() if tagger is not None: LOG.log(1000, "POS Tagging evaluation ") cc.tag(StanfordPOSTagger(tagger)) os.makedirs(os.path.dirname(eval_path), exist_ok=True) cc.write(eval_path) # ------------------------------------------- mp.test(model_path, eval_path, out_path) eval_conll_paths(test_path, out_path)
def train_tagger(prefix, slashtags=[], conll=[], tagmap = None, lowercase=False): trainsents = [] for c in conll: cc = ConllCorpus.read(c, lowercase=lowercase, tagmap=tagmap) for sent in cc: trainsents.append(sent.slashtags()) for st in slashtags: raise NotImplementedError alldatatrain = NamedTemporaryFile('w', delete=False) # ------------------------------------------- # Now write all the training sentences out to the temporary file. # ------------------------------------------- for trainsent in trainsents: alldatatrain.write(trainsent+'\n') alldatatrain.close() # ------------------------------------------- # And train the tagger. # ------------------------------------------- r = stanford_tagger.train_postagger(alldatatrain.name, prefix+'.tagger') unlink(alldatatrain.name)
def conll_to_slashtags(infiles, outpath): ''' This will @param infiles: A list of CONLL pathnames to convert. @param outpath: A single output pathname for the slashtags. ''' main_c = POSCorpus() for f in infiles: from intent.corpora.conll import ConllCorpus cp = ConllCorpus.read(f) main_c.extend(c) st = c.slashtags('/', lowercase=True) # Create the containing path if it doesn't already exist os.makedirs(os.path.dirname(outpath), exist_ok=True) of = open(outpath, 'w', encoding='utf-8') of.write(st) of.close()