Beispiel #1
0
def test_scan_collection_skip():
    filenames = create_collections()
    output_dir = tempfile.mkdtemp()
    # remove one file
    os.remove(filenames[0])

    p = NegBioPipeline([('fake', FakePipe())])
    p.scan(source=filenames, directory=output_dir, suffix='.xml')

    assert not os.path.exists(os.path.join(output_dir, filenames[0]))
Beispiel #2
0
def test_scan_document_error():
    class FakePipeError(Pipe):
        def __call__(self, doc: bioc.BioCDocument, *args, **kwargs):
            raise KeyError

    filenames = create_collections()
    output_dir = tempfile.mkdtemp()

    p = NegBioPipeline([('fake_error', FakePipeError())])
    p.scan(source=filenames, directory=output_dir, suffix='.xml')

    for filename in filenames:
        assert filecmp.cmp(filename, os.path.join(output_dir, os.path.basename(filename)))
Beispiel #3
0
def test_scan_collection():
    filenames = create_collections()
    output_dir = tempfile.mkdtemp()
    os.rmdir(output_dir)

    p = NegBioPipeline([('fake', FakePipe())])
    p.scan(source=filenames, directory=output_dir, suffix='.xml')
    for filename in filenames:
        filename = os.path.join(output_dir, os.path.basename(filename))
        with open(filename) as fp:
            c = bioc.load(fp)
            for doc in c.documents:
                assert doc.infons['fake']
Beispiel #4
0
from negbio.pipeline2.dner_mm import MetaMapExtractor
from negbio.pipeline2.pipeline import NegBioPipeline
from pymetamap import MetaMap


def read_cuis(pathname):
    cuis = set()
    with open(pathname) as fp:
        for line in fp:
            line = line.strip()
            if line:
                cuis.add(line)
    return cuis


if __name__ == '__main__':
    argv = parse_args(__doc__)
    mm = MetaMap.get_instance(argv['--metamap'])

    if argv['--cuis'] is None:
        cuis = None
    else:
        cuis = read_cuis(argv['--cuis'])

    extractor = MetaMapExtractor(mm, cuis)
    pipeline = NegBioPipeline(pipeline=[('MetaMapExtractor', extractor)])
    pipeline.scan(source=argv['<file>'],
                  directory=argv['--output'],
                  suffix=argv['--suffix'],
                  overwrite=argv['--overwrite'])