def main(): argv = parse_args(__doc__, version='version 2') print(argv) lemmatizer = Lemmatizer() ptb2dep = NegBioPtb2DepConverter(lemmatizer, universal=True) splitter = NegBioSSplitter(newline=argv['--newline_is_sentence_break']) parser = NegBioParser(model_dir=argv['--bllip-model']) argv = get_absolute_path(argv, '--neg-patterns', 'negbio/patterns/neg_patterns.txt') argv = get_absolute_path(argv, '--uncertainty-patterns', 'negbio/patterns/uncertainty_patterns.txt') mm = pymetamap.MetaMap.get_instance(argv['--metamap']) neg_detector = negdetect.Detector(argv['--neg-patterns'], argv['--uncertainty-patterns']) if argv['--cuis'] == 'None': cuis = None else: cuis = read_cuis(argv['--cuis']) if argv['text']: collection = text2bioc.text2collection(argv['SOURCES']) elif argv['bioc']: with open(argv['SOURCE']) as fp: collection = bioc.load(fp) else: raise KeyError pipeline(collection, mm, splitter, parser, ptb2dep, neg_detector, cuis) with open(os.path.expanduser(argv['--output']), 'w') as fp: bioc.dump(collection, fp)
def main(): argv = parse_args(__doc__, version='version 2') print(argv) lemmatizer = Lemmatizer() ptb2dep = NegBioPtb2DepConverter(lemmatizer, universal=True) ssplitter = NegBioSSplitter(newline=argv['--newline_is_sentence_break']) parser = NegBioParser(model_dir=argv['--bllip-model']) argv = get_absolute_path(argv, '--mention_phrases_dir', 'negbio/chexpert/phrases/mention') argv = get_absolute_path(argv, '--unmention_phrases_dir', 'negbio/chexpert/phrases/unmention') argv = get_absolute_path( argv, '--pre-negation-uncertainty-patterns', 'negbio/chexpert/patterns/pre_negation_uncertainty.txt') argv = get_absolute_path( argv, '--post-negation-uncertainty-patterns', 'negbio/chexpert/patterns/post_negation_uncertainty.txt') argv = get_absolute_path(argv, '--neg-patterns', 'negbio/chexpert/patterns/negation.txt') # chexpert loader = NegBioLoader() extractor = NegBioExtractor(Path(argv['--mention_phrases_dir']), Path(argv['--unmention_phrases_dir']), verbose=argv['--verbose']) neg_detector = ModifiedDetector( argv['--pre-negation-uncertainty-patterns'], argv['--neg-patterns'], argv['--post-negation-uncertainty-patterns']) aggregator = NegBioAggregator(CATEGORIES, verbose=argv['--verbose']) if argv['text']: collection = text2bioc.text2collection(argv['SOURCES']) elif argv['bioc']: with open(argv['SOURCE']) as fp: collection = bioc.load(fp) else: raise KeyError pipeline(collection, loader, ssplitter, extractor, parser, ptb2dep, neg_detector, aggregator, verbose=argv['--verbose']) with open(os.path.expanduser(argv['--output']), 'w') as fp: bioc.dump(collection, fp)
def main(argv): argv = docopt.docopt(__doc__, argv=argv) print(argv) splitter = ssplit.NltkSSplitter(newline=argv['--newline_is_sentence_break']) parser = parse.Bllip(model_dir=argv['--bllip-model']) ptb2dep = ptb2ud.Ptb2DepConverter(universal=True) lemmatizer = ptb2ud.Lemmatizer() mm = pymetamap.MetaMap.get_instance(argv['--metamap']) neg_detector = negdetect.Detector(argv['--neg-patterns'], argv['--uncertainty-patterns']) if argv['--cuis'] == 'None': cuis = None else: cuis = dner_mm.read_cuis(argv['--cuis']) collection = text2bioc.text2collection(argv['SOURCE'], split_document=argv['--split-document']) pipeline(collection, mm, splitter, parser, ptb2dep, lemmatizer, neg_detector, cuis) with open(os.path.expanduser(argv['--out']), 'w') as fp: bioc.dump(collection, fp)
def process_text(sources, metamap, splitter, parser, ptb2dep, lemmatizer, neg_detector, cuis, sec_title_patterns): collection = text2bioc.text2collection(*sources) return process_collection(collection, metamap, splitter, parser, ptb2dep, lemmatizer, neg_detector, cuis, sec_title_patterns)
""" Convert text FILEs to the BioC output file Usage: negbio_pipeline text2bioc [options] --output=<file> <file> ... Options: --output=<file> Specify the output file name. --verbose Print more information about progress. """ import bioc from negbio.cli_utils import parse_args from negbio.pipeline.text2bioc import text2collection if __name__ == '__main__': argv = parse_args(__doc__) collection = text2collection(argv['<file>']) with open(argv['--output'], 'w') as fp: bioc.dump(collection, fp)