예제 #1
0
def test_scan_collection_skip():
    filenames = create_collections()
    output_dir = tempfile.mkdtemp()
    # remove one file
    os.remove(filenames[0])

    p = NegBioPipeline([('fake', FakePipe())])
    p.scan(source=filenames, directory=output_dir, suffix='.xml')

    assert not os.path.exists(os.path.join(output_dir, filenames[0]))
예제 #2
0
def test_scan_document_error():
    class FakePipeError(Pipe):
        def __call__(self, doc: bioc.BioCDocument, *args, **kwargs):
            raise KeyError

    filenames = create_collections()
    output_dir = tempfile.mkdtemp()

    p = NegBioPipeline([('fake_error', FakePipeError())])
    p.scan(source=filenames, directory=output_dir, suffix='.xml')

    for filename in filenames:
        assert filecmp.cmp(filename, os.path.join(output_dir, os.path.basename(filename)))
예제 #3
0
def test_scan_collection():
    filenames = create_collections()
    output_dir = tempfile.mkdtemp()
    os.rmdir(output_dir)

    p = NegBioPipeline([('fake', FakePipe())])
    p.scan(source=filenames, directory=output_dir, suffix='.xml')
    for filename in filenames:
        filename = os.path.join(output_dir, os.path.basename(filename))
        with open(filename) as fp:
            c = bioc.load(fp)
            for doc in c.documents:
                assert doc.infons['fake']
예제 #4
0
from negbio.pipeline2.dner_mm import MetaMapExtractor
from negbio.pipeline2.pipeline import NegBioPipeline
from pymetamap import MetaMap


def read_cuis(pathname):
    cuis = set()
    with open(pathname) as fp:
        for line in fp:
            line = line.strip()
            if line:
                cuis.add(line)
    return cuis


if __name__ == '__main__':
    argv = parse_args(__doc__)
    mm = MetaMap.get_instance(argv['--metamap'])

    if argv['--cuis'] is None:
        cuis = None
    else:
        cuis = read_cuis(argv['--cuis'])

    extractor = MetaMapExtractor(mm, cuis)
    pipeline = NegBioPipeline(pipeline=[('MetaMapExtractor', extractor)])
    pipeline.scan(source=argv['<file>'],
                  directory=argv['--output'],
                  suffix=argv['--suffix'],
                  overwrite=argv['--overwrite'])
예제 #5
0
from negbio.cli_utils import parse_args, calls_asynchronously
from negbio.pipeline2.pipeline import NegBioPipeline
from negbio.pipeline2.section_split import SectionSplitter


def read_section_titles(pathname):
    with open(pathname) as fp:
        titles = [line.strip() for line in fp]
        p = '|'.join(titles)
        logging.debug('Section patterns: %s', p)
        return re.compile(p, re.IGNORECASE | re.MULTILINE)


if __name__ == '__main__':
    argv = parse_args(__doc__)
    workers = int(argv['--workers'])
    if workers == 1:
        if argv['--pattern'] is None:
            pattern = None
        else:
            pattern = read_section_titles(argv['--pattern'])

        splitter = SectionSplitter(pattern)
        pipeline = NegBioPipeline(pipeline=[('SectionSplitter', splitter)])
        pipeline.scan(source=argv['<file>'],
                      suffix=argv['--suffix'],
                      directory=argv['--output'],
                      overwrite=argv['--overwrite'])
    else:
        calls_asynchronously(argv, 'python -m negbio.negbio_section_split')
예제 #6
0
Parse sentences

Usage:
    negbio_parse [options] --output=<directory> <file> ...

Options:
    --model=<directory>     Bllip parser model directory.
    --output=<directory>    Specify the output directory.
    --suffix=<suffix>       Append an additional SUFFIX to file names. [default: .bllip.xml]
    --verbose               Print more information about progress.
    --workers=<n>           Number of threads [default: 1]
    --files_per_worker=<n>  Number of input files per worker [default: 8]
    --overwrite             Overwrite the output file.
"""
from negbio.cli_utils import parse_args, calls_asynchronously
from negbio.pipeline2.parse import NegBioParser
from negbio.pipeline2.pipeline import NegBioPipeline

if __name__ == '__main__':
    argv = parse_args(__doc__)
    workers = int(argv['--workers'])
    if workers == 1:
        parser = NegBioParser(model_dir=argv['--model'])
        pipeline = NegBioPipeline(pipeline=[('NegBioParser', parser)])
        pipeline.scan(source=argv['<file>'],
                      directory=argv['--output'],
                      suffix=argv['--suffix'],
                      overwrite=argv['--overwrite'])
    else:
        calls_asynchronously(argv, 'python -m negbio.negbio_parse')
예제 #7
0
"""
Usage:
    negbio_normalize [options] --output=<directory> <file> ...

Options:
    --output=<directory>    Specify the output directory.
    --suffix=<suffix>       Append an additional SUFFIX to file names. [default: .normalized.xml]
    --verbose               Print more information about progress.
    --overwrite             Overwrite the output file.
"""

from negbio.cli_utils import parse_args
from negbio.pipeline2.pipeline import NegBioPipeline
from negbio.pipeline2.normalize_mimiccxr import MIMICCXRNormalizer

if __name__ == '__main__':
    argv = parse_args(__doc__)
    normalizer = MIMICCXRNormalizer()
    pipeline = NegBioPipeline(pipeline=[('MIMICCXRNormalizer', normalizer)])
    pipeline.scan(source=argv['<file>'],
                  directory=argv['--output'],
                  suffix=argv['--suffix'],
                  overwrite=argv['--overwrite'])
예제 #8
0
Clean up sentences

Usage:
    negbio_cleanup [options] --output=<directory> <file> ...

Options:
    --suffix=<suffix>       Append an additional SUFFIX to file names. [default: .negbio.xml]
    --verbose               Print more information about progress.
    --output=<directory>    Specify the output directory.
    --overwrite             Overwrite the output file.
    --workers=<n>           Number of threads [default: 1]
    --files_per_worker=<n>  Number of input files per worker [default: 8]
"""

from negbio.cli_utils import parse_args, calls_asynchronously
from negbio.pipeline2.cleanup import CleanUp
from negbio.pipeline2.pipeline import NegBioPipeline

if __name__ == '__main__':
    argv = parse_args(__doc__)
    workers = int(argv['--workers'])
    if workers == 1:
        cleanup = CleanUp()
        pipeline = NegBioPipeline(pipeline=[('CleanUp', cleanup)])
        pipeline.scan(source=argv['<file>'],
                      directory=argv['--output'],
                      suffix=argv['--suffix'],
                      overwrite=argv['--overwrite'])
    else:
        calls_asynchronously(argv, 'python -m negbio.negbio_clean')