def main(): argv = parse_args(__doc__, version='version 2') print(argv) lemmatizer = Lemmatizer() ptb2dep = NegBioPtb2DepConverter(lemmatizer, universal=True) splitter = NegBioSSplitter(newline=argv['--newline_is_sentence_break']) parser = NegBioParser(model_dir=argv['--bllip-model']) argv = get_absolute_path(argv, '--neg-patterns', 'negbio/patterns/neg_patterns.txt') argv = get_absolute_path(argv, '--uncertainty-patterns', 'negbio/patterns/uncertainty_patterns.txt') mm = pymetamap.MetaMap.get_instance(argv['--metamap']) neg_detector = negdetect.Detector(argv['--neg-patterns'], argv['--uncertainty-patterns']) if argv['--cuis'] == 'None': cuis = None else: cuis = read_cuis(argv['--cuis']) if argv['text']: collection = text2bioc.text2collection(argv['SOURCES']) elif argv['bioc']: with open(argv['SOURCE']) as fp: collection = bioc.load(fp) else: raise KeyError pipeline(collection, mm, splitter, parser, ptb2dep, neg_detector, cuis) with open(os.path.expanduser(argv['--output']), 'w') as fp: bioc.dump(collection, fp)
def main(argv): argv = docopt.docopt(__doc__, argv=argv) print(argv) splitter = ssplit.NltkSSplitter(newline=True) parser = parse.Bllip(model_dir=argv['--model']) ptb2dep = ptb2ud.Ptb2DepConverter(universal=True) lemmatizer = ptb2ud.Lemmatizer() neg_detector = negdetect.Detector(argv['--neg-patterns'], argv['--uncertainty-patterns']) scan.scan_document( source=argv['SOURCE'], directory=argv['--out'], suffix='.neg.xml', fn=pipeline, non_sequences=[splitter, parser, ptb2dep, lemmatizer, neg_detector])
def main(argv): argv = docopt.docopt(__doc__, argv=argv) print(argv) splitter = ssplit.NltkSSplitter(newline=argv['--newline_is_sentence_break']) parser = parse.Bllip(model_dir=argv['--bllip-model']) ptb2dep = ptb2ud.Ptb2DepConverter(universal=True) lemmatizer = ptb2ud.Lemmatizer() mm = pymetamap.MetaMap.get_instance(argv['--metamap']) neg_detector = negdetect.Detector(argv['--neg-patterns'], argv['--uncertainty-patterns']) if argv['--cuis'] == 'None': cuis = None else: cuis = dner_mm.read_cuis(argv['--cuis']) collection = text2bioc.text2collection(argv['SOURCE'], split_document=argv['--split-document']) pipeline(collection, mm, splitter, parser, ptb2dep, lemmatizer, neg_detector, cuis) with open(os.path.expanduser(argv['--out']), 'w') as fp: bioc.dump(collection, fp)