예제 #1
0
def main():
    args = parse_args(__doc__, version='negbio version 2', options_first=True)
    logging.debug('CWD: %s', os.getcwd())

    argv = [args['<command>']] + args['<args>']
    if args['<command>'] == 'text2bioc':
        exit(call(['python', '-m', 'negbio.negbio_text2bioc'] + argv))
    elif args['<command>'] == 'normalize':
        exit(call(['python', '-m', 'negbio.negbio_normalize'] + argv))
    elif args['<command>'] == 'section_split':
        exit(call(['python', '-m', 'negbio.negbio_section_split'] + argv))
    elif args['<command>'] == 'ssplit':
        exit(call(['python', '-m', 'negbio.negbio_ssplit'] + argv))
    elif args['<command>'] == 'parse':
        exit(call(['python', '-m', 'negbio.negbio_parse'] + argv))
    elif args['<command>'] == 'ptb2ud':
        exit(call(['python', '-m', 'negbio.negbio_ptb2ud'] + argv))
    elif args['<command>'] == 'dner_mm':
        exit(call(['python', '-m', 'negbio.negbio_dner_matamap'] + argv))
    elif args['<command>'] == 'dner_chexpert':
        exit(call(['python', '-m', 'negbio.negbio_dner_chexpert'] + argv))
    elif args['<command>'] == 'neg':
        exit(call(['python', '-m', 'negbio.negbio_neg'] + argv))
    elif args['<command>'] == 'neg_chexpert':
        exit(call(['python', '-m', 'negbio.negbio_neg_chexpert'] + argv))
    elif args['<command>'] == 'cleanup':
        exit(call(['python', '-m', 'negbio.negbio_clean'] + argv))
    elif args['<command>'] in ['help', None]:
        exit(call(['python', '-m', 'negbio.negbio_pipeline', '--help']))
    else:
        exit("%r is not a negbio command. See 'negbio help'." %
             args['<command>'])
예제 #2
0
def main():
    argv = parse_args(__doc__, version='version 2')
    print(argv)

    lemmatizer = Lemmatizer()
    ptb2dep = NegBioPtb2DepConverter(lemmatizer, universal=True)
    splitter = NegBioSSplitter(newline=argv['--newline_is_sentence_break'])
    parser = NegBioParser(model_dir=argv['--bllip-model'])

    argv = get_absolute_path(argv, '--neg-patterns',
                             'negbio/patterns/neg_patterns.txt')
    argv = get_absolute_path(argv, '--uncertainty-patterns',
                             'negbio/patterns/uncertainty_patterns.txt')

    mm = pymetamap.MetaMap.get_instance(argv['--metamap'])
    neg_detector = negdetect.Detector(argv['--neg-patterns'],
                                      argv['--uncertainty-patterns'])

    if argv['--cuis'] == 'None':
        cuis = None
    else:
        cuis = read_cuis(argv['--cuis'])

    if argv['text']:
        collection = text2bioc.text2collection(argv['SOURCES'])
    elif argv['bioc']:
        with open(argv['SOURCE']) as fp:
            collection = bioc.load(fp)
    else:
        raise KeyError

    pipeline(collection, mm, splitter, parser, ptb2dep, neg_detector, cuis)

    with open(os.path.expanduser(argv['--output']), 'w') as fp:
        bioc.dump(collection, fp)
예제 #3
0
def main():
    argv = parse_args(__doc__, version='version 2')
    print(argv)

    lemmatizer = Lemmatizer()
    ptb2dep = NegBioPtb2DepConverter(lemmatizer, universal=True)
    ssplitter = NegBioSSplitter(newline=argv['--newline_is_sentence_break'])
    parser = NegBioParser(model_dir=argv['--bllip-model'])

    argv = get_absolute_path(argv, '--mention_phrases_dir',
                             'negbio/chexpert/phrases/mention')
    argv = get_absolute_path(argv, '--unmention_phrases_dir',
                             'negbio/chexpert/phrases/unmention')
    argv = get_absolute_path(
        argv, '--pre-negation-uncertainty-patterns',
        'negbio/chexpert/patterns/pre_negation_uncertainty.txt')
    argv = get_absolute_path(
        argv, '--post-negation-uncertainty-patterns',
        'negbio/chexpert/patterns/post_negation_uncertainty.txt')
    argv = get_absolute_path(argv, '--neg-patterns',
                             'negbio/chexpert/patterns/negation.txt')

    # chexpert
    loader = NegBioLoader()
    extractor = NegBioExtractor(Path(argv['--mention_phrases_dir']),
                                Path(argv['--unmention_phrases_dir']),
                                verbose=argv['--verbose'])
    neg_detector = ModifiedDetector(
        argv['--pre-negation-uncertainty-patterns'], argv['--neg-patterns'],
        argv['--post-negation-uncertainty-patterns'])
    aggregator = NegBioAggregator(CATEGORIES, verbose=argv['--verbose'])

    if argv['text']:
        collection = text2bioc.text2collection(argv['SOURCES'])
    elif argv['bioc']:
        with open(argv['SOURCE']) as fp:
            collection = bioc.load(fp)
    else:
        raise KeyError

    pipeline(collection,
             loader,
             ssplitter,
             extractor,
             parser,
             ptb2dep,
             neg_detector,
             aggregator,
             verbose=argv['--verbose'])

    with open(os.path.expanduser(argv['--output']), 'w') as fp:
        bioc.dump(collection, fp)
예제 #4
0
from negbio.pipeline2.dner_mm import MetaMapExtractor
from negbio.pipeline2.pipeline import NegBioPipeline
from pymetamap import MetaMap


def read_cuis(pathname):
    cuis = set()
    with open(pathname) as fp:
        for line in fp:
            line = line.strip()
            if line:
                cuis.add(line)
    return cuis


if __name__ == '__main__':
    argv = parse_args(__doc__)
    mm = MetaMap.get_instance(argv['--metamap'])

    if argv['--cuis'] is None:
        cuis = None
    else:
        cuis = read_cuis(argv['--cuis'])

    extractor = MetaMapExtractor(mm, cuis)
    pipeline = NegBioPipeline(pipeline=[('MetaMapExtractor', extractor)])
    pipeline.scan(source=argv['<file>'],
                  directory=argv['--output'],
                  suffix=argv['--suffix'],
                  overwrite=argv['--overwrite'])