コード例 #1
0
    def test_add_rg_to_bam(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bwa_index(reference_fpath, directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        lib_name = 'aa'
        log_fhand = NamedTemporaryFile()
        readgroup = {
            'ID': lib_name,
            'PL': 'illumina',
            'LB': lib_name,
            'SM': '{0}_illumina_pe'.format(lib_name),
            'PU': '0'
        }
        bwa = map_with_bwamem(index_fpath,
                              unpaired_fpath=reads_fpath,
                              readgroup=readgroup,
                              log_fpath=log_fhand.name)
        map_process_to_bam(bwa, bam_fhand.name)
        out = subprocess.check_output(
            [get_binary_path('samtools'), 'view', '-h', bam_fhand.name],
            stderr=log_fhand)
        assert '@RG\tID:aa' in out
        assert 'TTCTGATTCAATCTACTTCAAAGTTGGCTTTATCAATAAG' in out

        directory.close()
コード例 #2
0
ファイル: test_mapping.py プロジェクト: JoseBlanca/seq_crumbs
 def test_tophat(self):
     reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
     reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
     directory = TemporaryDir()
     index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                               directory.name)
     map_with_tophat(index_fpath, directory.name,
                     unpaired_fpath=reads_fpath)
     os.path.exists(os.path.join(directory.name, 'accepted_hits.bam'))
     directory.close()
コード例 #3
0
    def test_map_with_bowtie2(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                  directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        map_with_bowtie2(index_fpath, bam_fhand.name,
                         unpaired_fpaths=[reads_fpath])

        directory.close()
コード例 #4
0
 def test_tophat(self):
     reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
     reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
     directory = TemporaryDir()
     index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                               directory.name)
     map_with_tophat(index_fpath,
                     directory.name,
                     unpaired_fpath=reads_fpath)
     os.path.exists(os.path.join(directory.name, 'accepted_hits.bam'))
     directory.close()
コード例 #5
0
    def test_map_with_bowtie2(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                  directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        map_with_bowtie2(index_fpath,
                         bam_fhand.name,
                         unpaired_fpaths=[reads_fpath])

        directory.close()
コード例 #6
0
    def test_map_with_bwa(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bwa_index(reference_fpath, directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        map_with_bwasw(index_fpath, bam_fhand.name, unpaired_fpath=reads_fpath)
        out = subprocess.check_output([get_binary_path('samtools'), 'view',
                                       bam_fhand.name])
        assert  'TTCTGATTCAATCTACTTCAAAGTTGGCTTTATCAATAAG' in out

        directory.close()
コード例 #7
0
    def test_map_with_bwa(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bwa_index(reference_fpath, directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        map_with_bwasw(index_fpath, bam_fhand.name, unpaired_fpath=reads_fpath)
        out = subprocess.check_output(
            [get_binary_path('samtools'), 'view', bam_fhand.name])
        assert 'TTCTGATTCAATCTACTTCAAAGTTGGCTTTATCAATAAG' in out

        directory.close()
コード例 #8
0
ファイル: test_plot.py プロジェクト: JoseBlanca/vcf_crumbs
    def test_plot_window(self):
        iterator = itertools.chain(self.gen_windows(),
                                   self.gen_windows('ch2'))
        tempdir = TemporaryDir()
        out_base = join(tempdir.name, 'out')
        labels = OrderedDict({'val1': {'title': 'val1 title',
                                       'ylabel': 'val1 ylabel'},
                              'val2': {'title': 'val2 title',
                                       'ylabel': 'val2 ylabel'}})

        plot_in_genome(iterator, out_base=out_base, labels=labels)
        # raw_input(tempdir.name)
        tempdir.close()
コード例 #9
0
    def test_plot_window(self):
        iterator = itertools.chain(self.gen_windows(),
                                   self.gen_windows('ch2'))
        tempdir = TemporaryDir()
        out_base = join(tempdir.name, 'out')
        labels = OrderedDict({'val1': {'title': 'val1 title',
                                       'ylabel': 'val1 ylabel'},
                              'val2': {'title': 'val2 title',
                                       'ylabel': 'val2 ylabel'}})

        plot_in_genome(iterator, out_base=out_base, labels=labels)
        # raw_input(tempdir.name)
        tempdir.close()
コード例 #10
0
ファイル: test_plot.py プロジェクト: JoseBlanca/seq_crumbs
    def test_plot_window(self):
        iterator = itertools.chain(self.gen_windows(), self.gen_windows("ch2"))
        tempdir = TemporaryDir()
        out_base = join(tempdir.name, "out")
        labels = OrderedDict(
            {
                "val1": {"title": "val1 title", "ylabel": "val1 ylabel"},
                "val2": {"title": "val2 title", "ylabel": "val2 ylabel"},
            }
        )

        plot_in_genome(iterator, out_base=out_base, labels=labels)
        # raw_input(tempdir.name)
        tempdir.close()
コード例 #11
0
    def test_tophat_paired(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_1_fpath = os.path.join(TEST_DATA_DIR, 'reads_1.fastq')
        reads_2_fpath = os.path.join(TEST_DATA_DIR, 'reads_2.fastq')
        try:
            directory = TemporaryDir()
            index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                      directory.name)
            map_with_tophat(index_fpath,
                            directory.name,
                            paired_fpaths=[reads_1_fpath, reads_2_fpath])
            os.path.exists(os.path.join(directory.name, 'accepted_hits.bam'))
            self.fail('runtimeError expected')
        except RuntimeError:
            pass
        finally:
            directory.close()

        try:
            directory = TemporaryDir()
            index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                      directory.name)
            map_with_tophat(index_fpath,
                            directory.name,
                            paired_fpaths=[reads_1_fpath, reads_2_fpath],
                            mate_inner_dist=350,
                            mate_std_dev=50)
            os.path.exists(os.path.join(directory.name, 'accepted_hits.bam'))
        finally:
            directory.close()
コード例 #12
0
def get_or_create_bwa_index(fpath, directory=None):
    'It creates the bwa index for the given reference'
    fpath = os.path.abspath(fpath)
    if directory is not None:
        index_fpath = os.path.join(directory, os.path.basename(fpath))
    else:
        index_fpath = fpath

    if not _bwa_index_exists(index_fpath):
        if os.path.exists(index_fpath):
            temp_dir = TemporaryDir()
            tmp_index_fpath = os.path.join(temp_dir.name,
                                           os.path.basename(index_fpath))
            os.symlink(fpath, tmp_index_fpath)
            _create_bwa_index(tmp_index_fpath)
            for file_ in os.listdir(temp_dir.name):
                if file_ == os.path.basename(index_fpath):
                    continue
                shutil.copy(os.path.join(temp_dir.name, file_),
                            os.path.join(os.path.dirname(index_fpath), file_))
        else:
            os.symlink(fpath, index_fpath)
            _create_bwa_index(index_fpath)

    return index_fpath
コード例 #13
0
ファイル: test_blast.py プロジェクト: fastq/seq_crumbs
 def test_blastdb():
     'It creates a blast database.'
     db_name = 'arabidopsis_genes'
     seq_fpath = os.path.join(TEST_DATA_DIR, db_name)
     db_dir = TemporaryDir(prefix='blast_dbs_')
     try:
         db_path1 = get_or_create_blastdb(seq_fpath, directory=db_dir.name,
                                 dbtype='nucl')
         db_path = os.path.join(db_dir.name, db_name)
         assert 'CATAGGGTCACCAATGGC' in open(db_path1).read(100)
         assert db_path1 == db_path
         assert os.path.exists(db_path)
         index_fpath = os.path.join(db_dir.name, db_name + '.nsq')
         assert os.path.exists(index_fpath)
     finally:
         db_dir.close()
コード例 #14
0
    def test_run_binary(self):
        binary = join(VCF_BIN_DIR, 'annotate_snvs')
        assert 'usage' in check_output([binary, '-h'])

        config = '''[1]
    [[CloseToSnv]]
        distance = 60
        max_maf_depth = 0.7
[2]
    [[HighVariableRegion]]
        max_variability = 0.05
        window_in_bp = 101
        ref_fpath = '{sample_fasta}'
[3]
    [[CapEnzyme]]
        all_enzymes = True
        ref_fpath = '{sample_fasta}'

[4]
    [[HeterozigoteInSamples]]
        filter_id = 1
[5]
    [[IsVariableDepthAnnotator]]
        filter_id = 1
        samples = ['pep']
[6]
    [[LowComplexityRegionAnnotator]]
        ref_fpath = '{sample_fasta}'
'''
        config = config.format(sample_fasta=REF_FREEBAYES)

        config_fhand = NamedTemporaryFile(suffix='.config')
        config_fhand.write(config)
        config_fhand.flush()
        tmp_dir = TemporaryDir()
        cmd = [
            binary, FREEBAYES3_VCF_PATH, '-f', config_fhand.name, '-p',
            tmp_dir.name
        ]
        # raw_input(' '.join(cmd))
        result = check_output(cmd)
        tmp_dir.close()
        # print result
        assert 'cs60_0.70\t' in result
        assert 'CAP=MmeI' in result
        assert 'HIS1=True' in result
        assert '\tPASS\t' in result
コード例 #15
0
    def test_get_or_create_index(self):
        db_name = 'arabidopsis_genes'
        seq_fpath = os.path.join(TEST_DATA_DIR, db_name)
        assert not _bwa_index_exists(seq_fpath)

        directory = TemporaryDir()
        index_fpath = get_or_create_bwa_index(seq_fpath, directory.name)
        expected_index = os.path.join(directory.name,
                                      os.path.basename(db_name))
        assert index_fpath == expected_index
        assert _bwa_index_exists(index_fpath)

        # already exists
        index_fpath = get_or_create_bwa_index(seq_fpath, directory.name)
        assert index_fpath == expected_index
        assert _bwa_index_exists(index_fpath)
        directory.close()
コード例 #16
0
ファイル: test_mapping.py プロジェクト: JoseBlanca/seq_crumbs
    def test_get_or_create_index(self):
        db_name = 'arabidopsis_genes'
        seq_fpath = os.path.join(TEST_DATA_DIR, db_name)
        assert _bowtie2_index_exists(seq_fpath)

        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(seq_fpath, directory.name)
        expected_index = os.path.join(directory.name,
                                      os.path.basename(db_name))
        assert index_fpath == expected_index
        assert _bowtie2_index_exists(index_fpath)

        # already exists
        index_fpath = get_or_create_bowtie2_index(seq_fpath, directory.name)
        assert index_fpath == expected_index
        assert _bowtie2_index_exists(index_fpath)
        directory.close()
コード例 #17
0
ファイル: test_blast.py プロジェクト: terrycojones/seq_crumbs
 def test_blastdb():
     'It creates a blast database.'
     db_name = 'arabidopsis_genes'
     seq_fpath = os.path.join(TEST_DATA_DIR, db_name)
     db_dir = TemporaryDir(prefix='blast_dbs_')
     try:
         db_path1 = get_or_create_blastdb(seq_fpath,
                                          directory=db_dir.name,
                                          dbtype='nucl')
         db_path = os.path.join(db_dir.name, db_name)
         assert 'CATAGGGTCACCAATGGC' in open(db_path1).read(100)
         assert db_path1 == db_path
         assert os.path.exists(db_path)
         index_fpath = os.path.join(db_dir.name, db_name + '.nsq')
         assert os.path.exists(index_fpath)
     finally:
         db_dir.close()
コード例 #18
0
 def test_blast_search(self):
     'It does a blast search'
     db_name = 'arabidopsis_genes'
     seq_fpath = os.path.join(TEST_DATA_DIR, db_name)
     db_dir = TemporaryDir(prefix='blast_dbs_')
     try:
         db_fpath = get_or_create_blastdb(seq_fpath, directory=db_dir.name,
                                           dbtype='nucl')
         query_fhand = NamedTemporaryFile()
         query_fhand.write(open(seq_fpath).read(200))
         query_fhand.flush()
         out_fhand = NamedTemporaryFile()
         do_blast(seq_fpath, db_fpath, program='blastn',
                  out_fpath=out_fhand.name)
         assert '</BlastOutput>' in open(out_fhand.name).read()
     finally:
         db_dir.close()
コード例 #19
0
    def test_run_binary(self):
        binary = join(VCF_BIN_DIR, 'annotate_snvs')
        assert 'usage' in check_output([binary, '-h'])

        config = '''[1]
    [[CloseToSnv]]
        distance = 60
        max_maf_depth = 0.7
[2]
    [[HighVariableRegion]]
        max_variability = 0.05
        window_in_bp = 101
        ref_fpath = '{sample_fasta}'
[3]
    [[CapEnzyme]]
        all_enzymes = True
        ref_fpath = '{sample_fasta}'

[4]
    [[HeterozigoteInSamples]]
        filter_id = 1
[5]
    [[IsVariableDepthAnnotator]]
        filter_id = 1
        samples = ['pep']
[6]
    [[LowComplexityRegionAnnotator]]
        ref_fpath = '{sample_fasta}'
'''
        config = config.format(sample_fasta=REF_FREEBAYES)

        config_fhand = NamedTemporaryFile(suffix='.config')
        config_fhand.write(config)
        config_fhand.flush()
        tmp_dir = TemporaryDir()
        cmd = [binary, FREEBAYES3_VCF_PATH, '-f', config_fhand.name,
               '-p', tmp_dir.name]
        # raw_input(' '.join(cmd))
        result = check_output(cmd)
        tmp_dir.close()
        # print result
        assert 'cs60_0.70\t' in result
        assert 'CAP=MmeI' in result
        assert 'HIS1=True' in result
        assert '\tPASS\t' in result
コード例 #20
0
ファイル: test_blast.py プロジェクト: fastq/seq_crumbs
    def test_get_or_create_blastdb():
        blastdb = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')

        directory = TemporaryDir()
        assert not _blastdb_exists(blastdb, NUCL)
        get_or_create_blastdb(blastdb, NUCL, directory.name)
        new_blast_path = os.path.join(directory.name,
                                      os.path.basename(blastdb))
        assert _blastdb_exists(new_blast_path, NUCL)
        get_or_create_blastdb(blastdb, NUCL, directory.name)
        assert _blastdb_exists(new_blast_path, NUCL)
        directory.close()

        # already exists
        blastdb = os.path.join(TEST_DATA_DIR, 'blastdbs', 'arabidopsis_genes')
        assert _blastdb_exists(blastdb, NUCL)
        get_or_create_blastdb(blastdb, NUCL)
        assert _blastdb_exists(blastdb, NUCL)
コード例 #21
0
ファイル: test_blast.py プロジェクト: terrycojones/seq_crumbs
    def test_get_or_create_blastdb():
        blastdb = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')

        directory = TemporaryDir()
        assert not _blastdb_exists(blastdb, NUCL)
        get_or_create_blastdb(blastdb, NUCL, directory.name)
        new_blast_path = os.path.join(directory.name,
                                      os.path.basename(blastdb))
        assert _blastdb_exists(new_blast_path, NUCL)
        get_or_create_blastdb(blastdb, NUCL, directory.name)
        assert _blastdb_exists(new_blast_path, NUCL)
        directory.close()

        # already exists
        blastdb = os.path.join(TEST_DATA_DIR, 'blastdbs', 'arabidopsis_genes')
        assert _blastdb_exists(blastdb, NUCL)
        get_or_create_blastdb(blastdb, NUCL)
        assert _blastdb_exists(blastdb, NUCL)
コード例 #22
0
ファイル: test_filters.py プロジェクト: milw/seq_crumbs
    def test_filter_by_bowtie2_bin():
        filter_bin = os.path.join(BIN_DIR, 'filter_by_bowtie2')
        assert 'usage' in check_output([filter_bin, '-h'])
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(os.path.join(TEST_DATA_DIR,
                                                          'arabidopsis_genes'),
                                                  directory=directory.name)

        fastq_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        fasta_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fasta')
        for reads_fpath in [fastq_fpath, fasta_fpath]:
            out_fhand = NamedTemporaryFile(suffix='.seqs')
            filtered_fhand = NamedTemporaryFile(suffix='.seqs')
            cmd = [filter_bin, '-i', index_fpath, '-o', out_fhand.name,
                   '-e', filtered_fhand.name, reads_fpath]
            check_output(cmd)
            assert 'no_arabi' in open(out_fhand.name).read()
            assert 'read1' in open(filtered_fhand.name).read()
        directory.close()
コード例 #23
0
ファイル: test_mapping.py プロジェクト: JoseBlanca/seq_crumbs
    def test_add_rg_to_bam(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bwa_index(reference_fpath, directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        lib_name = 'aa'
        log_fhand = NamedTemporaryFile()
        readgroup = {'ID': lib_name, 'PL': 'illumina', 'LB': lib_name,
                     'SM': '{0}_illumina_pe'.format(lib_name), 'PU': '0'}
        bwa = map_with_bwamem(index_fpath, unpaired_fpath=reads_fpath,
                              readgroup=readgroup, log_fpath=log_fhand.name)
        map_process_to_bam(bwa, bam_fhand.name)
        out = subprocess.check_output([get_binary_path('samtools'), 'view',
                                       '-h', bam_fhand.name], stderr=log_fhand)
        assert '@RG\tID:aa' in out
        assert 'TTCTGATTCAATCTACTTCAAAGTTGGCTTTATCAATAAG' in out

        directory.close()
コード例 #24
0
    def _look_for_blast_matches(self, seq_fpath, oligos, seqs_type):
        'It looks for the oligos in the given sequence files'
        # we need to keep the blast_fhands, because they're temp files and
        # otherwise they might be removed
        temp_dir = TemporaryDir()
        dbpath = os.path.join(temp_dir.name, os.path.basename(seq_fpath))
        seqio([open(seq_fpath)],
              open(dbpath, 'w'),
              out_format='fasta',
              copy_if_same_format=False)

        blasts, blast_fhand = _do_blast_2(dbpath,
                                          oligos,
                                          params=self.params,
                                          program=self.program,
                                          dbtype=seqs_type)
        if self.filters is not None:
            blasts = filter_alignments(blasts, config=self.filters)

        # Which are the regions covered in each sequence?
        indexed_match_parts = {}
        for blast in blasts:
            oligo = blast['query']
            for match in blast['matches']:
                read = match['subject']
                if self.elongate_for_global:
                    elongate_match_parts_till_global(
                        match['match_parts'],
                        query_length=oligo['length'],
                        subject_length=read['length'],
                        align_completely=QUERY)

                # match_parts = [m['match_parts'] for m in blast['matches']]
                match_parts = match['match_parts']
                try:
                    indexed_match_parts[read['name']].extend(match_parts)
                except KeyError:
                    indexed_match_parts[read['name']] = match_parts

        temp_dir.close()
        blast_fhand.close()
        return indexed_match_parts
コード例 #25
0
ファイル: test_blast.py プロジェクト: terrycojones/seq_crumbs
 def test_blast_search(self):
     'It does a blast search'
     db_name = 'arabidopsis_genes'
     seq_fpath = os.path.join(TEST_DATA_DIR, db_name)
     db_dir = TemporaryDir(prefix='blast_dbs_')
     try:
         db_fpath = get_or_create_blastdb(seq_fpath,
                                          directory=db_dir.name,
                                          dbtype='nucl')
         query_fhand = NamedTemporaryFile()
         query_fhand.write(open(seq_fpath).read(200))
         query_fhand.flush()
         out_fhand = NamedTemporaryFile()
         do_blast(seq_fpath,
                  db_fpath,
                  program='blastn',
                  out_fpath=out_fhand.name)
         assert '</BlastOutput>' in open(out_fhand.name).read()
     finally:
         db_dir.close()
コード例 #26
0
    def test_filter_by_bowtie2_bin():
        filter_bin = os.path.join(BIN_DIR, 'filter_by_bowtie2')
        assert 'usage' in check_output([filter_bin, '-h'])
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(os.path.join(
            TEST_DATA_DIR, 'arabidopsis_genes'),
                                                  directory=directory.name)

        fastq_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        fasta_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fasta')
        for reads_fpath in [fastq_fpath, fasta_fpath]:
            out_fhand = NamedTemporaryFile(suffix='.seqs')
            filtered_fhand = NamedTemporaryFile(suffix='.seqs')
            cmd = [
                filter_bin, '-i', index_fpath, '-o', out_fhand.name, '-e',
                filtered_fhand.name, reads_fpath
            ]
            check_output(cmd)
            assert 'no_arabi' in open(out_fhand.name).read()
            assert 'read1' in open(filtered_fhand.name).read()
        directory.close()
コード例 #27
0
ファイル: test_filters.py プロジェクト: milw/seq_crumbs
    def test_filter_by_bowtie2():
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(os.path.join(TEST_DATA_DIR,
                                                          'arabidopsis_genes'),
                                                  directory=directory.name)
        fastq_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        fasta_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fasta')

        passed = ['no_arabi']
        for preffered_classes in [[SEQITEM], [SEQRECORD]]:
            for reads_fpath in [fastq_fpath, fasta_fpath]:
                seq_packets = read_seq_packets([open(reads_fpath)],
                                        prefered_seq_classes=preffered_classes)
                filter_packets = seq_to_filterpackets(seq_packets)
                filter_ = FilterBowtie2Match(index_fpath)
                filter_packet = list(filter_packets)[0]
                filter_packets = filter_(filter_packet)
                assert _seqs_to_names(filter_packets[SEQS_PASSED]) == passed
                assert _seqs_to_names(filter_packets[SEQS_FILTERED_OUT]) == [
                                                     'read1', 'read2', 'read3']
        directory.close()
コード例 #28
0
ファイル: blast.py プロジェクト: charles-plessy/seq_crumbs
    def _look_for_blast_matches(self, seq_fpath, oligos):
        "It looks for the oligos in the given sequence files"
        # we need to keep the blast_fhands, because they're temp files and
        # otherwise they might be removed
        temp_dir = TemporaryDir()
        dbpath = os.path.join(temp_dir.name, os.path.basename(seq_fpath))
        seqio([open(seq_fpath)], [open(dbpath, "w")], out_format="fasta", copy_if_same_format=False)

        blasts, blast_fhand = _do_blast_2(dbpath, oligos, params=self.params, program=self.program)
        if self.filters is not None:
            blasts = filter_alignments(blasts, config=self.filters)

        # Which are the regions covered in each sequence?
        indexed_match_parts = {}
        one_oligo = True if len(oligos) == 1 else False
        for blast in blasts:
            oligo = blast["query"]
            for match in blast["matches"]:
                read = match["subject"]
                if self.elongate_for_global:
                    elongate_match_parts_till_global(
                        match["match_parts"],
                        query_length=oligo["length"],
                        subject_length=read["length"],
                        align_completely=QUERY,
                    )

                # match_parts = [m['match_parts'] for m in blast['matches']]
                match_parts = match["match_parts"]
                if one_oligo:
                    indexed_match_parts[read["name"]] = match_parts
                else:
                    try:
                        indexed_match_parts[read["name"]].extend(match_parts)
                    except KeyError:
                        indexed_match_parts[read["name"]] = match_parts

        temp_dir.close()
        blast_fhand.close()
        return indexed_match_parts
コード例 #29
0
ファイル: test_mapping.py プロジェクト: JoseBlanca/seq_crumbs
    def test_tophat_paired(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_1_fpath = os.path.join(TEST_DATA_DIR, 'reads_1.fastq')
        reads_2_fpath = os.path.join(TEST_DATA_DIR, 'reads_2.fastq')
        try:
            directory = TemporaryDir()
            index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                      directory.name)
            map_with_tophat(index_fpath, directory.name,
                            paired_fpaths=[reads_1_fpath, reads_2_fpath])
            os.path.exists(os.path.join(directory.name, 'accepted_hits.bam'))
            self.fail('runtimeError expected')
        except RuntimeError:
            pass
        finally:
            directory.close()

        try:
            directory = TemporaryDir()
            index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                      directory.name)
            map_with_tophat(index_fpath, directory.name,
                            paired_fpaths=[reads_1_fpath, reads_2_fpath],
                            mate_inner_dist=350, mate_std_dev=50)
            os.path.exists(os.path.join(directory.name, 'accepted_hits.bam'))
        finally:
            directory.close()
コード例 #30
0
    def test_rel_symlink(self):
        'It tests various cases of rel symlinks'
        tempdir = TemporaryDir()
        try:
            hola = os.path.join(tempdir.name, 'hola')
            os.mkdir(hola)
            caracola = os.path.join(tempdir.name, 'caracola')
            rel_symlink(hola, caracola)
            assert os.path.exists(caracola)

            fname = os.path.join(hola, 'fname')
            open(fname, 'w')
            caracola2 = os.path.join(tempdir.name, 'caracola2')
            rel_symlink(fname, caracola2)
            assert os.path.exists(caracola2)

            path2 = os.path.join(tempdir.name, 'dir1', 'dir2')
            os.makedirs(path2)
            caracola3 = os.path.join(path2, 'caracola3')
            rel_symlink(hola, caracola3)
            assert os.path.exists(caracola3)
        finally:
            tempdir.close()
コード例 #31
0
    def test_filter_by_bowtie2():
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(os.path.join(
            TEST_DATA_DIR, 'arabidopsis_genes'),
                                                  directory=directory.name)
        fastq_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        fasta_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fasta')

        passed = ['no_arabi']
        for preffered_classes in [[SEQITEM], [SEQRECORD]]:
            for reads_fpath in [fastq_fpath, fasta_fpath]:
                seq_packets = read_seq_packets(
                    [open(reads_fpath)],
                    prefered_seq_classes=preffered_classes)
                filter_packets = seq_to_filterpackets(seq_packets)
                filter_ = FilterBowtie2Match(index_fpath)
                filter_packet = list(filter_packets)[0]
                filter_packets = filter_(filter_packet)
                assert _seqs_to_names(filter_packets[SEQS_PASSED]) == passed
                assert _seqs_to_names(filter_packets[SEQS_FILTERED_OUT]) == [
                    'read1', 'read2', 'read3'
                ]
        directory.close()
コード例 #32
0
ファイル: test_utils.py プロジェクト: radaniba/seq_crumbs
    def test_rel_symlink(self):
        "It tests various cases of rel symlinks"
        tempdir = TemporaryDir()
        try:
            hola = os.path.join(tempdir.name, "hola")
            os.mkdir(hola)
            caracola = os.path.join(tempdir.name, "caracola")
            rel_symlink(hola, caracola)
            assert os.path.exists(caracola)

            fname = os.path.join(hola, "fname")
            open(fname, "w")
            caracola2 = os.path.join(tempdir.name, "caracola2")
            rel_symlink(fname, caracola2)
            assert os.path.exists(caracola2)

            path2 = os.path.join(tempdir.name, "dir1", "dir2")
            os.makedirs(path2)
            caracola3 = os.path.join(path2, "caracola3")
            rel_symlink(hola, caracola3)
            assert os.path.exists(caracola3)
        finally:
            tempdir.close()
コード例 #33
0
ファイル: test_utils.py プロジェクト: bharatpatel/seq_crumbs
    def test_rel_symlink(self):
        'It tests various cases of rel symlinks'
        tempdir = TemporaryDir()
        try:
            hola = os.path.join(tempdir.name, 'hola')
            os.mkdir(hola)
            caracola = os.path.join(tempdir.name, 'caracola')
            rel_symlink(hola, caracola)
            assert os.path.exists(caracola)

            fname = os.path.join(hola, 'fname')
            open(fname, 'w')
            caracola2 = os.path.join(tempdir.name, 'caracola2')
            rel_symlink(fname, caracola2)
            assert os.path.exists(caracola2)

            path2 = os.path.join(tempdir.name, 'dir1', 'dir2')
            os.makedirs(path2)
            caracola3 = os.path.join(path2, 'caracola3')
            rel_symlink(hola, caracola3)
            assert os.path.exists(caracola3)
        finally:
            tempdir.close()
コード例 #34
0
ファイル: blast.py プロジェクト: JoseBlanca/seq_crumbs
    def _look_for_blast_matches(self, seq_fpath, oligos, seqs_type):
        'It looks for the oligos in the given sequence files'
        # we need to keep the blast_fhands, because they're temp files and
        # otherwise they might be removed
        temp_dir = TemporaryDir()
        dbpath = os.path.join(temp_dir.name, os.path.basename(seq_fpath))
        seqio([open(seq_fpath)], open(dbpath, 'w'), out_format='fasta',
              copy_if_same_format=False)

        blasts, blast_fhand = _do_blast_2(dbpath, oligos, params=self.params,
                                          program=self.program,
                                          dbtype=seqs_type)
        if self.filters is not None:
            blasts = filter_alignments(blasts, config=self.filters)

        # Which are the regions covered in each sequence?
        indexed_match_parts = {}
        for blast in blasts:
            oligo = blast['query']
            for match in blast['matches']:
                read = match['subject']
                if self.elongate_for_global:
                    elongate_match_parts_till_global(match['match_parts'],
                                                 query_length=oligo['length'],
                                                 subject_length=read['length'],
                                                 align_completely=QUERY)

                # match_parts = [m['match_parts'] for m in blast['matches']]
                match_parts = match['match_parts']
                try:
                    indexed_match_parts[read['name']].extend(match_parts)
                except KeyError:
                    indexed_match_parts[read['name']] = match_parts

        temp_dir.close()
        blast_fhand.close()
        return indexed_match_parts
コード例 #35
0
    def test_map_with_bowtie2(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                  directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        bowtie2 = map_with_bowtie2(index_fpath, unpaired_fpath=reads_fpath)
        map_process_to_bam(bowtie2, bam_fhand.name)
        directory.close()

        # With paired_fpahts option
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        forward_fpath = os.path.join(TEST_DATA_DIR, 'arabreads_1.fastq')
        reverse_fpath = os.path.join(TEST_DATA_DIR, 'arabreads_2.fastq')
        paired_fpaths = (forward_fpath, reverse_fpath)
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                  directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        bowtie2 = map_with_bowtie2(index_fpath, paired_fpaths=paired_fpaths)
        map_process_to_bam(bowtie2, bam_fhand.name)
        directory.close()
コード例 #36
0
    def test_downgrade_read_edges_binary(self):
        binary = os.path.join(BAM_BIN_DIR, 'downgrade_bam_edge_qual')
        bam_fpath = os.path.join(TEST_DATA_DIR, 'sample_rev.bam')
        with NamedTemporaryFile() as out_fhand:
            cmd = [binary, '-o', out_fhand.name, bam_fpath]
            check_call(cmd)
            sam = AlignmentFile(out_fhand.name)
            res = [0, 0]
            read = sam.next()
            assert list(read.query_qualities[:2]) == res
            assert read.get_tag('dl') == '8)5B'
            assert read.get_tag('dr') == '8?>>'

        # check bam substitution
        with TemporaryDir() as tmp_dir:
            dirname = tmp_dir.name
            shutil.copy(bam_fpath, dirname)
            bam_fpath = os.path.join(dirname, os.path.basename(bam_fpath))
            cmd = [binary, bam_fpath, '-t', '/home/peio/']
            check_call(cmd)
            sam = AlignmentFile(bam_fpath)
            res = [0, 0]
            read = sam.next()
            assert list(read.query_qualities[:2]) == res
            assert read.get_tag('dl') == '8)5B'
            assert read.get_tag('dr') == '8?>>'

            # we can not downgrade an already downgraded bam
            try:
                cmd = [binary, bam_fpath]
                stderr_fhand = NamedTemporaryFile()
                check_call(cmd, stderr=stderr_fhand)
                self.fail('CalledProcessError expected')
            except CalledProcessError:
                stderr_fhand.flush()
                msg = 'RuntimeError: Edge qualities already downgraded'
                if msg not in open(stderr_fhand.name).read():
                    raise
コード例 #37
0
ファイル: test_mapping.py プロジェクト: JoseBlanca/seq_crumbs
    def test_map_with_bowtie2(self):
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        reads_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                  directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        bowtie2 = map_with_bowtie2(index_fpath, unpaired_fpath=reads_fpath)
        map_process_to_bam(bowtie2, bam_fhand.name)
        directory.close()

        #With paired_fpahts option
        reference_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_genes')
        forward_fpath = os.path.join(TEST_DATA_DIR, 'arabidopsis_reads.fastq')
        reverse_fpath = NamedTemporaryFile().name
        paired_fpaths = (forward_fpath, reverse_fpath)
        directory = TemporaryDir()
        index_fpath = get_or_create_bowtie2_index(reference_fpath,
                                                  directory.name)
        bam_fhand = NamedTemporaryFile(suffix='.bam')
        bowtie2 = map_with_bowtie2(index_fpath, paired_fpaths=paired_fpaths)
        map_process_to_bam(bowtie2, bam_fhand.name)
        directory.close()