Example #1
0
def get_or_create_blastdb(blastdb_or_path, dbtype=None, directory=None):
    '''it returns a blast database.

    If it does not exists it creates and if you give it a directory it will
    create it in that directory if it does not exist yet
    '''
    seq_fpath = _get_abs_blastdb_path(blastdb_or_path, dbtype)
    if directory:
        dbname = os.path.basename(seq_fpath)
        dbpath = os.path.join(directory, dbname)
    else:
        dbpath = seq_fpath

    if not _blastdb_exists(dbpath, dbtype=dbtype):
        if not os.path.exists(seq_fpath):
            msg = 'An input sequence is required to create a blastdb'
            raise RuntimeError(msg)
        if seq_fpath != dbpath:
            seqio([open(seq_fpath)],
                  open(dbpath, 'w'),
                  out_format='fasta',
                  copy_if_same_format=False)
        if dbtype is None:
            dbtype = guess_seq_type(open(dbpath))
        _makeblastdb_plus(dbpath, dbtype)
    return dbpath
    def test_seqio(self):
        'It tets the seqio function'

        #fasta-qual to fastq
        in_fhands = (self._make_fhand(FASTA), self._make_fhand(QUAL))
        out_fhands = (self._make_fhand(),)
        out_format = 'fastq'
        seqio(in_fhands, out_fhands, out_format)
        assert "@seq1\natctagtc\n+\n???????" in open(out_fhands[0].name).read()

        #fastq to fasta-qual
        out_fhands = [self._make_fhand(), self._make_fhand()]
        seqio([self._make_fhand(FASTQ)], out_fhands, 'fasta')
        assert ">seq1\natcgt" in open(out_fhands[0].name).read()
        assert ">seq1\n30 30 30" in open(out_fhands[1].name).read()

        #fastq to fasta
        out_fhands = [self._make_fhand()]
        seqio([self._make_fhand(FASTQ)], out_fhands, 'fasta')
        assert ">seq1\natcgt" in open(out_fhands[0].name).read()

        #fastq to fastq-illumina
        out_fhands = [self._make_fhand()]
        seqio([self._make_fhand(FASTQ)], out_fhands, 'fastq-illumina')
        assert "@seq1\natcgt\n+\n^^^^" in open(out_fhands[0].name).read()

        #fasta-qual to fasta-qual
        in_fhands = (self._make_fhand(FASTA), self._make_fhand(QUAL))
        out_fhands = (self._make_fhand(), self._make_fhand())
        out_format = 'fasta'
        seqio(in_fhands, out_fhands, out_format)
        assert FASTA == open(out_fhands[0].name).read()
        assert QUAL == open(out_fhands[1].name).read()
Example #3
0
    def _look_for_blast_matches(self, seq_fpath, oligos, seqs_type):
        'It looks for the oligos in the given sequence files'
        # we need to keep the blast_fhands, because they're temp files and
        # otherwise they might be removed
        temp_dir = TemporaryDir()
        dbpath = os.path.join(temp_dir.name, os.path.basename(seq_fpath))
        seqio([open(seq_fpath)],
              open(dbpath, 'w'),
              out_format='fasta',
              copy_if_same_format=False)

        blasts, blast_fhand = _do_blast_2(dbpath,
                                          oligos,
                                          params=self.params,
                                          program=self.program,
                                          dbtype=seqs_type)
        if self.filters is not None:
            blasts = filter_alignments(blasts, config=self.filters)

        # Which are the regions covered in each sequence?
        indexed_match_parts = {}
        for blast in blasts:
            oligo = blast['query']
            for match in blast['matches']:
                read = match['subject']
                if self.elongate_for_global:
                    elongate_match_parts_till_global(
                        match['match_parts'],
                        query_length=oligo['length'],
                        subject_length=read['length'],
                        align_completely=QUERY)

                # match_parts = [m['match_parts'] for m in blast['matches']]
                match_parts = match['match_parts']
                try:
                    indexed_match_parts[read['name']].extend(match_parts)
                except KeyError:
                    indexed_match_parts[read['name']] = match_parts

        temp_dir.close()
        blast_fhand.close()
        return indexed_match_parts
Example #4
0
def get_or_create_blastdb(blastdb_or_path, dbtype, directory=None):
    """it returns a blast database.

    If it does not exists it creates and if you give it a directory it will
    create it in that directory if it does not exist yet
    """
    seq_fpath = _get_abs_blastdb_path(blastdb_or_path, dbtype)
    if directory:
        dbname = os.path.basename(seq_fpath)
        dbpath = os.path.join(directory, dbname)
    else:
        dbpath = seq_fpath

    if not _blastdb_exists(dbpath, dbtype):
        if not os.path.exists(seq_fpath):
            msg = "An input sequence is required to create a blastdb"
            raise RuntimeError(msg)
        if seq_fpath != dbpath:
            seqio([open(seq_fpath)], [open(dbpath, "w")], out_format="fasta", copy_if_same_format=False)
        _makeblastdb_plus(dbpath, dbtype)
    return dbpath
Example #5
0
    def _look_for_blast_matches(self, seq_fpath, oligos):
        "It looks for the oligos in the given sequence files"
        # we need to keep the blast_fhands, because they're temp files and
        # otherwise they might be removed
        temp_dir = TemporaryDir()
        dbpath = os.path.join(temp_dir.name, os.path.basename(seq_fpath))
        seqio([open(seq_fpath)], [open(dbpath, "w")], out_format="fasta", copy_if_same_format=False)

        blasts, blast_fhand = _do_blast_2(dbpath, oligos, params=self.params, program=self.program)
        if self.filters is not None:
            blasts = filter_alignments(blasts, config=self.filters)

        # Which are the regions covered in each sequence?
        indexed_match_parts = {}
        one_oligo = True if len(oligos) == 1 else False
        for blast in blasts:
            oligo = blast["query"]
            for match in blast["matches"]:
                read = match["subject"]
                if self.elongate_for_global:
                    elongate_match_parts_till_global(
                        match["match_parts"],
                        query_length=oligo["length"],
                        subject_length=read["length"],
                        align_completely=QUERY,
                    )

                # match_parts = [m['match_parts'] for m in blast['matches']]
                match_parts = match["match_parts"]
                if one_oligo:
                    indexed_match_parts[read["name"]] = match_parts
                else:
                    try:
                        indexed_match_parts[read["name"]].extend(match_parts)
                    except KeyError:
                        indexed_match_parts[read["name"]] = match_parts

        temp_dir.close()
        blast_fhand.close()
        return indexed_match_parts
Example #6
0
    def _look_for_blast_matches(self, seq_fpath, oligos, seqs_type):
        'It looks for the oligos in the given sequence files'
        # we need to keep the blast_fhands, because they're temp files and
        # otherwise they might be removed
        temp_dir = TemporaryDir()
        dbpath = os.path.join(temp_dir.name, os.path.basename(seq_fpath))
        seqio([open(seq_fpath)], open(dbpath, 'w'), out_format='fasta',
              copy_if_same_format=False)

        blasts, blast_fhand = _do_blast_2(dbpath, oligos, params=self.params,
                                          program=self.program,
                                          dbtype=seqs_type)
        if self.filters is not None:
            blasts = filter_alignments(blasts, config=self.filters)

        # Which are the regions covered in each sequence?
        indexed_match_parts = {}
        for blast in blasts:
            oligo = blast['query']
            for match in blast['matches']:
                read = match['subject']
                if self.elongate_for_global:
                    elongate_match_parts_till_global(match['match_parts'],
                                                 query_length=oligo['length'],
                                                 subject_length=read['length'],
                                                 align_completely=QUERY)

                # match_parts = [m['match_parts'] for m in blast['matches']]
                match_parts = match['match_parts']
                try:
                    indexed_match_parts[read['name']].extend(match_parts)
                except KeyError:
                    indexed_match_parts[read['name']] = match_parts

        temp_dir.close()
        blast_fhand.close()
        return indexed_match_parts
Example #7
0
    def test_seqio(self):
        'It tets the seqio function'

        # fastq to fasta
        out_fhand = NamedTemporaryFile()
        seqio([self._make_fhand(FASTQ)], out_fhand, 'fasta')
        assert ">seq1\natcgt" in open(out_fhand.name).read()

        # fastq to fastq-illumina
        out_fhand = NamedTemporaryFile()
        seqio([self._make_fhand(FASTQ)], out_fhand, 'fastq-illumina')
        assert "@seq1\natcgt\n+\n^^^^" in open(out_fhand.name).read()

        out_fhand = NamedTemporaryFile()
        seqio([self._make_fhand(FASTQ),
               self._make_fhand(FASTQ)], out_fhand, 'fastq-illumina')

        assert "@seq3\natcgt\n+\n^^^^^\n@seq1" in open(out_fhand.name).read()

        # fasta to fastq
        out_fhand = NamedTemporaryFile()
        try:
            seqio([self._make_fhand(FASTA)], out_fhand, 'fastq')
            self.fail("error previously expected")
        except IncompatibleFormatError as error:
            assert 'No qualities available' in str(error)

        # bad_format fastq
        bad_fastq_fhand = self._make_fhand(FASTQ + 'aklsjhdas')
        try:
            seqio([bad_fastq_fhand], out_fhand, 'fasta')
            self.fail("error previously expected")
        except MalformedFile as error:
            assert 'Lengths of sequence and quality' in str(error)

        # genbank to fasta
        out_fhand = NamedTemporaryFile()
        genbank_fhand = open(os.path.join(TEST_DATA_DIR, 'sequence.gb'))
        seqio([genbank_fhand], out_fhand, 'fasta')
        result = open(out_fhand.name).read()
        assert '>NM_019354.2' in result
Example #8
0
    def test_seqio(self):
        'It tets the seqio function'

        # fastq to fasta
        out_fhand = NamedTemporaryFile()
        seqio([self._make_fhand(FASTQ)], out_fhand, 'fasta')
        assert ">seq1\natcgt" in open(out_fhand.name).read()

        # fastq to fastq-illumina
        out_fhand = NamedTemporaryFile()
        seqio([self._make_fhand(FASTQ)], out_fhand, 'fastq-illumina')
        assert "@seq1\natcgt\n+\n^^^^" in open(out_fhand.name).read()

        out_fhand = NamedTemporaryFile()
        seqio([self._make_fhand(FASTQ), self._make_fhand(FASTQ)],
              out_fhand, 'fastq-illumina')

        assert "@seq3\natcgt\n+\n^^^^^\n@seq1" in open(out_fhand.name).read()

        # fasta to fastq
        out_fhand = NamedTemporaryFile()
        try:
            seqio([self._make_fhand(FASTA)], out_fhand, 'fastq')
            self.fail("error previously expected")
        except IncompatibleFormatError as error:
            assert 'No qualities available' in str(error)

        # bad_format fastq
        bad_fastq_fhand = self._make_fhand(FASTQ + 'aklsjhdas')
        try:
            seqio([bad_fastq_fhand], out_fhand, 'fasta')
            self.fail("error previously expected")
        except MalformedFile as error:
            assert 'Lengths of sequence and quality'  in str(error)

        # genbank to fasta
        out_fhand = NamedTemporaryFile()
        genbank_fhand = open(os.path.join(TEST_DATA_DIR, 'sequence.gb'))
        seqio([genbank_fhand], out_fhand, 'fasta')
        result = open(out_fhand.name).read()
        assert '>NM_019354.2' in result