def test_async_mappable_regions_50_hisat(tests_config, chrom, hisat2):

    filename = os.path.join('sgains/tests/data',
                            "{}.50mer.mappable.regions.txt.gz".format(chrom))
    gold_df = pd.read_csv(filename,
                          sep='\t',
                          compression='gzip',
                          header=None,
                          names=['chrom', 'start', 'end'])
    with io.StringIO() as outfile:
        pipeline = MappableRegionsPipeline(tests_config, hisat2)
        pipeline.generate_mappable_regions([chrom], 50, outfile=outfile)
        outfile.flush()

        infile = io.StringIO(outfile.getvalue())
        df = pd.read_csv(infile,
                         sep='\t',
                         header=None,
                         names=['chrom', 'start', 'end'])
        print(df.head())
    assert np.all(df.columns == gold_df.columns)
    assert len(df) == len(gold_df)

    assert np.all(df.start == gold_df.start)
    assert np.all(df.end == gold_df.end)
Exemple #2
0
async def test_bowtie_mappings(tests_config, event_loop, chrom):
    event_loop.set_debug(True)
    pipeline = MappableRegionsPipeline(tests_config)

    bowtie = await pipeline.async_start_bowtie()

    reads_generator = pipeline.generate_reads([chrom], 100)
    writer = asyncio.Task(
        pipeline.async_write_reads_generator(bowtie.stdin, reads_generator))

    infile = bowtie.stdout

    while True:
        line = await infile.readline()
        if not line:
            break
        line = line.decode()
        if line[0] == '@':
            # comment
            continue
        mapping = Mapping.parse_sam(line)
        if mapping.flag == 0:
            # print(mapping)
            chromosome, pos = mapping.name.split('.')
            assert int(pos) == mapping.start
            assert chrom == chromosome

    await writer
Exemple #3
0
def create_pipeline(command, config):
    if command == "genomeindex":
        return GenomeIndexPipeline(config)
    elif command == "mappable_regions":
        return MappableRegionsPipeline(config)
    elif command == "bins":
        return BinsPipeline(config)
    elif command == "mapping":
        return MappingPipeline(config)
    elif command == "varbin":
        return VarbinPipeline(config)
    elif command == "scclust":
        return Rpipeline(config)
    elif command == "extract_10x":
        return Extract10xPipeline(config)
    elif command == "varbin_10x":
        return Varbin10xPipeline(config)
    elif command == "prepare":
        pipelines = [
            GenomeIndexPipeline(config),
            MappableRegionsPipeline(config),
            BinsPipeline(config),
        ]
        return CompositePipeline(config, pipelines)
    elif command == "process":
        pipelines = [
            MappingPipeline(config),
            VarbinPipeline(config),
            Rpipeline(config),
        ]
        return CompositePipeline(config, pipelines)

    raise ValueError(f"Unexpected command: {command}")
Exemple #4
0
async def test_generate_reads(tests_config):
    pipeline = MappableRegionsPipeline(tests_config)
    generator = pipeline.generate_reads(['chr1'], 100)

    for num, rec in enumerate(generator):
        print(rec.id, len(rec))
        if num >= 10:
            break
    generator.close()
def test_reads_generator(tests_config):

    pipeline = MappableRegionsPipeline(tests_config)
    reads_generator = pipeline.generate_reads(['chrM'], 100)
    # ouptut_writer = pipeline.mappable_regions_writer()
    count = 0
    for _read in reads_generator:
        # print(read)
        count += 1
    print(count)
    assert 16472 == count
Exemple #6
0
async def test_async_generate_mappable_regions(tests_config, event_loop):
    # Enable debugging
    event_loop.set_debug(True)

    pipeline = MappableRegionsPipeline(tests_config)

    await pipeline.async_generate_mappable_regions(['chrM'], 100)
Exemple #7
0
async def test_async_mappable_regions_50(tests_config, event_loop, chrom):
    event_loop.set_debug(True)
    pipeline = MappableRegionsPipeline(tests_config)

    filename = os.path.join('tests/data',
                            "{}.50mer.mappable.regions.txt.gz".format(chrom))
    gold_df = pd.read_csv(filename,
                          sep='\t',
                          compression='gzip',
                          header=None,
                          names=['chrom', 'start', 'end'])
    with io.StringIO() as outfile:
        await pipeline.async_generate_mappable_regions([chrom],
                                                       50,
                                                       outfile=outfile)
        infile = io.StringIO(outfile.getvalue())
        df = pd.read_csv(infile,
                         sep='\t',
                         header=None,
                         names=['chrom', 'start', 'end'])
        print(df.head())

    assert np.all(df.columns == gold_df.columns)
    assert np.all(df.start == gold_df.start)
    assert np.all(df.end == gold_df.end)
Exemple #8
0
async def test_async_mappings_generator(tests_config, event_loop):
    # Enable debugging
    event_loop.set_debug(True)
    pipeline = MappableRegionsPipeline(tests_config)

    bowtie = await pipeline.async_start_bowtie()

    reads_generator = pipeline.generate_reads(['chrM'], 100)
    writer = asyncio.Task(
        pipeline.async_write_reads_generator(bowtie.stdin, reads_generator))

    async for mapping in pipeline.async_mappable_regions_generator(
            bowtie.stdout):
        print(mapping)

    await bowtie.wait()
    await writer
 def run(self, args):
     print("mappable-regions subcommand called with args: {}".format(args))
     self.process_args(args)
     pipeline = MappableRegionsPipeline(self.config)
     pipeline.run()
def test_generate_mappable_regions_bwa(tests_config, bwa):

    assert bwa is not None
    pipeline = MappableRegionsPipeline(tests_config, bwa)

    pipeline.generate_mappable_regions(['chrM'], 100)
def test_generate_mappable_regions_hisat(tests_config, hisat2):

    assert hisat2 is not None
    pipeline = MappableRegionsPipeline(tests_config, hisat2)

    pipeline.generate_mappable_regions(['chrM'], 100)
def test_generate_mappable_regions(tests_config):
    pipeline = MappableRegionsPipeline(tests_config)
    pipeline.generate_mappable_regions(['chrM'], 100)
def test_bowtie_command(tests_config):
    pipeline = MappableRegionsPipeline(tests_config)
    command = pipeline.genome.aligner.build_mappable_regions_command()
    print(command)
    assert command[0] == 'hisat2'