Exemple #1
0
def vcf_io(request):
    io_config = {
        "infile": relative_to_this_test_folder("fixtures/vcf_input.tsv"),
        "outfile": "-",
    }
    io_config = FrozenBox(io_config)
    io_manager = IOManager(io_config, IOType.TSV, IOType.TSV)
    return io_manager
def variants_io_m(request):
    io_config = FrozenBox({
        "infile":
        relative_to_this_test_folder("fixtures/input_multi.tsv"),
        "outfile":
        "-",
    })
    io_manager = IOManager(io_config, IOType.TSV, IOType.TSV)
    return io_manager
Exemple #3
0
 def build(fixture_name, io_options=dict()):
     io_config = {
         "infile": relative_to_this_test_folder(fixture_name),
         "outfile": "-",
     }
     io_options.update(io_config)
     io_options = FrozenBox(io_options)
     io_manager = IOManager(io_options, IOType.TSV, IOType.TSV)
     return io_manager
Exemple #4
0
    def build(fixture_name, options=Box({})):
        io_options = options.to_dict()
        io_config = {
            "infile": relative_to_this_test_folder(fixture_name),
            "outfile": "-",
        }
        io_options.update(io_config)

        io_options = Box(io_options, default_box=True, default_box_attr=None)
        io_manager = IOManager(io_options, IOType.TSV, IOType.TSV)
        return io_manager
def test_create_file_io():
    io_config = FrozenBox({
        "infile":
        relative_to_this_test_folder("fixtures/input.tsv"),
        "outfile":
        "-",
    })
    with IOManager(io_config, IOType.TSV, IOType.TSV) as io:
        assert io is not None
        lines = list(io.lines_read_iterator())
        print(lines)
        assert len(lines) == 4
        print(io.header)
        assert len(io.header) == 3
def pipeline_main(argv):
    gpf_instance = GPFInstance()
    dae_config = gpf_instance.dae_config
    genomes_db = gpf_instance.genomes_db

    desc = "Program to annotate variants combining multiple annotating tools"
    parser = argparse.ArgumentParser(
        description=desc,
        conflict_handler="resolve",
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument('--verbose', '-V', action='count', default=0)

    for name, args in main_cli_options(gpf_instance):
        parser.add_argument(name, **args)

    options = parser.parse_args()

    if options.verbose == 1:
        logging.basicConfig(level=logging.WARNING)
    elif options.verbose == 2:
        logging.basicConfig(level=logging.INFO)
    elif options.verbose >= 3:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.ERROR)

    if options.annotation_config is not None:
        config_filename = options.annotation_config
    else:
        config_filename = dae_config.annotation.conf_file

    assert os.path.exists(config_filename), config_filename

    options = Box(
        {k: v
         for k, v in options._get_kwargs()},
        default_box=True,
        default_box_attr=None,
    )

    # File IO format specification
    reader_type = IOType.TSV
    writer_type = IOType.TSV
    if options.read_parquet:
        reader_type = IOType.Parquet
    if options.write_parquet:
        writer_type = IOType.Parquet

    start = time.time()

    pipeline = PipelineAnnotator.build(
        options,
        config_filename,
        genomes_db,
    )
    assert pipeline is not None

    with IOManager(options, reader_type, writer_type) as io_manager:
        pipeline.annotate_file(io_manager)

    print("# PROCESSING DETAILS:", file=sys.stderr)
    print("#", time.asctime(), file=sys.stderr)
    print("#", " ".join(sys.argv[1:]), file=sys.stderr)

    print(
        "The program was running for [h:m:s]:",
        str(datetime.timedelta(seconds=round(time.time() - start, 0))),
        file=sys.stderr,
    )

    if options.tabix:
        run_tabix(options.outfile)