Esempio n. 1
0
def get_or_create_blastdb(blastdb_or_path, dbtype=None, directory=None):
    '''it returns a blast database.

    If it does not exists it creates and if you give it a directory it will
    create it in that directory if it does not exist yet
    '''
    seq_fpath = _get_abs_blastdb_path(blastdb_or_path, dbtype)
    if directory:
        dbname = os.path.basename(seq_fpath)
        dbpath = os.path.join(directory, dbname)
    else:
        dbpath = seq_fpath

    if not _blastdb_exists(dbpath, dbtype=dbtype):
        if not os.path.exists(seq_fpath):
            msg = 'An input sequence is required to create a blastdb'
            raise RuntimeError(msg)
        if seq_fpath != dbpath:
            seqio([open(seq_fpath)],
                  open(dbpath, 'w'),
                  out_format='fasta',
                  copy_if_same_format=False)
        if dbtype is None:
            dbtype = guess_seq_type(open(dbpath))
        _makeblastdb_plus(dbpath, dbtype)
    return dbpath
Esempio n. 2
0
    def _look_for_blast_matches(self, seq_fpath, oligos, seqs_type):
        'It looks for the oligos in the given sequence files'
        # we need to keep the blast_fhands, because they're temp files and
        # otherwise they might be removed
        temp_dir = TemporaryDir()
        dbpath = os.path.join(temp_dir.name, os.path.basename(seq_fpath))
        seqio([open(seq_fpath)],
              open(dbpath, 'w'),
              out_format='fasta',
              copy_if_same_format=False)

        blasts, blast_fhand = _do_blast_2(dbpath,
                                          oligos,
                                          params=self.params,
                                          program=self.program,
                                          dbtype=seqs_type)
        if self.filters is not None:
            blasts = filter_alignments(blasts, config=self.filters)

        # Which are the regions covered in each sequence?
        indexed_match_parts = {}
        for blast in blasts:
            oligo = blast['query']
            for match in blast['matches']:
                read = match['subject']
                if self.elongate_for_global:
                    elongate_match_parts_till_global(
                        match['match_parts'],
                        query_length=oligo['length'],
                        subject_length=read['length'],
                        align_completely=QUERY)

                # match_parts = [m['match_parts'] for m in blast['matches']]
                match_parts = match['match_parts']
                try:
                    indexed_match_parts[read['name']].extend(match_parts)
                except KeyError:
                    indexed_match_parts[read['name']] = match_parts

        temp_dir.close()
        blast_fhand.close()
        return indexed_match_parts
Esempio n. 3
0
    def _look_for_blast_matches(self, seq_fpath, oligos, seqs_type):
        'It looks for the oligos in the given sequence files'
        # we need to keep the blast_fhands, because they're temp files and
        # otherwise they might be removed
        temp_dir = TemporaryDir()
        dbpath = os.path.join(temp_dir.name, os.path.basename(seq_fpath))
        seqio([open(seq_fpath)], open(dbpath, 'w'), out_format='fasta',
              copy_if_same_format=False)

        blasts, blast_fhand = _do_blast_2(dbpath, oligos, params=self.params,
                                          program=self.program,
                                          dbtype=seqs_type)
        if self.filters is not None:
            blasts = filter_alignments(blasts, config=self.filters)

        # Which are the regions covered in each sequence?
        indexed_match_parts = {}
        for blast in blasts:
            oligo = blast['query']
            for match in blast['matches']:
                read = match['subject']
                if self.elongate_for_global:
                    elongate_match_parts_till_global(match['match_parts'],
                                                 query_length=oligo['length'],
                                                 subject_length=read['length'],
                                                 align_completely=QUERY)

                # match_parts = [m['match_parts'] for m in blast['matches']]
                match_parts = match['match_parts']
                try:
                    indexed_match_parts[read['name']].extend(match_parts)
                except KeyError:
                    indexed_match_parts[read['name']] = match_parts

        temp_dir.close()
        blast_fhand.close()
        return indexed_match_parts
Esempio n. 4
0
def get_or_create_blastdb(blastdb_or_path, dbtype=None, directory=None):
    '''it returns a blast database.

    If it does not exists it creates and if you give it a directory it will
    create it in that directory if it does not exist yet
    '''
    seq_fpath = _get_abs_blastdb_path(blastdb_or_path, dbtype)
    if directory:
        dbname = os.path.basename(seq_fpath)
        dbpath = os.path.join(directory, dbname)
    else:
        dbpath = seq_fpath

    if not _blastdb_exists(dbpath, dbtype=dbtype):
        if not os.path.exists(seq_fpath):
            msg = 'An input sequence is required to create a blastdb'
            raise RuntimeError(msg)
        if seq_fpath != dbpath:
            seqio([open(seq_fpath)], open(dbpath, 'w'), out_format='fasta',
                  copy_if_same_format=False)
        if dbtype is None:
            dbtype = guess_seq_type(open(dbpath))
        _makeblastdb_plus(dbpath, dbtype)
    return dbpath
Esempio n. 5
0
    def test_seqio(self):
        'It tets the seqio function'

        # fastq to fasta
        out_fhand = NamedTemporaryFile()
        seqio([self._make_fhand(FASTQ)], out_fhand, 'fasta')
        assert ">seq1\natcgt" in open(out_fhand.name).read()

        # fastq to fastq-illumina
        out_fhand = NamedTemporaryFile()
        seqio([self._make_fhand(FASTQ)], out_fhand, 'fastq-illumina')
        assert "@seq1\natcgt\n+\n^^^^" in open(out_fhand.name).read()

        out_fhand = NamedTemporaryFile()
        seqio([self._make_fhand(FASTQ),
               self._make_fhand(FASTQ)], out_fhand, 'fastq-illumina')

        assert "@seq3\natcgt\n+\n^^^^^\n@seq1" in open(out_fhand.name).read()

        # fasta to fastq
        out_fhand = NamedTemporaryFile()
        try:
            seqio([self._make_fhand(FASTA)], out_fhand, 'fastq')
            self.fail("error previously expected")
        except IncompatibleFormatError as error:
            assert 'No qualities available' in str(error)

        # bad_format fastq
        bad_fastq_fhand = self._make_fhand(FASTQ + 'aklsjhdas')
        try:
            seqio([bad_fastq_fhand], out_fhand, 'fasta')
            self.fail("error previously expected")
        except MalformedFile as error:
            assert 'Lengths of sequence and quality' in str(error)

        # genbank to fasta
        out_fhand = NamedTemporaryFile()
        genbank_fhand = open(os.path.join(TEST_DATA_DIR, 'sequence.gb'))
        seqio([genbank_fhand], out_fhand, 'fasta')
        result = open(out_fhand.name).read()
        assert '>NM_019354.2' in result
Esempio n. 6
0
    def test_seqio(self):
        'It tets the seqio function'

        # fastq to fasta
        out_fhand = NamedTemporaryFile()
        seqio([self._make_fhand(FASTQ)], out_fhand, 'fasta')
        assert ">seq1\natcgt" in open(out_fhand.name).read()

        # fastq to fastq-illumina
        out_fhand = NamedTemporaryFile()
        seqio([self._make_fhand(FASTQ)], out_fhand, 'fastq-illumina')
        assert "@seq1\natcgt\n+\n^^^^" in open(out_fhand.name).read()

        out_fhand = NamedTemporaryFile()
        seqio([self._make_fhand(FASTQ), self._make_fhand(FASTQ)],
              out_fhand, 'fastq-illumina')

        assert "@seq3\natcgt\n+\n^^^^^\n@seq1" in open(out_fhand.name).read()

        # fasta to fastq
        out_fhand = NamedTemporaryFile()
        try:
            seqio([self._make_fhand(FASTA)], out_fhand, 'fastq')
            self.fail("error previously expected")
        except IncompatibleFormatError as error:
            assert 'No qualities available' in str(error)

        # bad_format fastq
        bad_fastq_fhand = self._make_fhand(FASTQ + 'aklsjhdas')
        try:
            seqio([bad_fastq_fhand], out_fhand, 'fasta')
            self.fail("error previously expected")
        except MalformedFile as error:
            assert 'Lengths of sequence and quality'  in str(error)

        # genbank to fasta
        out_fhand = NamedTemporaryFile()
        genbank_fhand = open(os.path.join(TEST_DATA_DIR, 'sequence.gb'))
        seqio([genbank_fhand], out_fhand, 'fasta')
        result = open(out_fhand.name).read()
        assert '>NM_019354.2' in result