def get_or_create_blastdb(blastdb_or_path, dbtype=None, directory=None): '''it returns a blast database. If it does not exists it creates and if you give it a directory it will create it in that directory if it does not exist yet ''' seq_fpath = _get_abs_blastdb_path(blastdb_or_path, dbtype) if directory: dbname = os.path.basename(seq_fpath) dbpath = os.path.join(directory, dbname) else: dbpath = seq_fpath if not _blastdb_exists(dbpath, dbtype=dbtype): if not os.path.exists(seq_fpath): msg = 'An input sequence is required to create a blastdb' raise RuntimeError(msg) if seq_fpath != dbpath: seqio([open(seq_fpath)], open(dbpath, 'w'), out_format='fasta', copy_if_same_format=False) if dbtype is None: dbtype = guess_seq_type(open(dbpath)) _makeblastdb_plus(dbpath, dbtype) return dbpath
def _look_for_blast_matches(self, seq_fpath, oligos, seqs_type): 'It looks for the oligos in the given sequence files' # we need to keep the blast_fhands, because they're temp files and # otherwise they might be removed temp_dir = TemporaryDir() dbpath = os.path.join(temp_dir.name, os.path.basename(seq_fpath)) seqio([open(seq_fpath)], open(dbpath, 'w'), out_format='fasta', copy_if_same_format=False) blasts, blast_fhand = _do_blast_2(dbpath, oligos, params=self.params, program=self.program, dbtype=seqs_type) if self.filters is not None: blasts = filter_alignments(blasts, config=self.filters) # Which are the regions covered in each sequence? indexed_match_parts = {} for blast in blasts: oligo = blast['query'] for match in blast['matches']: read = match['subject'] if self.elongate_for_global: elongate_match_parts_till_global( match['match_parts'], query_length=oligo['length'], subject_length=read['length'], align_completely=QUERY) # match_parts = [m['match_parts'] for m in blast['matches']] match_parts = match['match_parts'] try: indexed_match_parts[read['name']].extend(match_parts) except KeyError: indexed_match_parts[read['name']] = match_parts temp_dir.close() blast_fhand.close() return indexed_match_parts
def _look_for_blast_matches(self, seq_fpath, oligos, seqs_type): 'It looks for the oligos in the given sequence files' # we need to keep the blast_fhands, because they're temp files and # otherwise they might be removed temp_dir = TemporaryDir() dbpath = os.path.join(temp_dir.name, os.path.basename(seq_fpath)) seqio([open(seq_fpath)], open(dbpath, 'w'), out_format='fasta', copy_if_same_format=False) blasts, blast_fhand = _do_blast_2(dbpath, oligos, params=self.params, program=self.program, dbtype=seqs_type) if self.filters is not None: blasts = filter_alignments(blasts, config=self.filters) # Which are the regions covered in each sequence? indexed_match_parts = {} for blast in blasts: oligo = blast['query'] for match in blast['matches']: read = match['subject'] if self.elongate_for_global: elongate_match_parts_till_global(match['match_parts'], query_length=oligo['length'], subject_length=read['length'], align_completely=QUERY) # match_parts = [m['match_parts'] for m in blast['matches']] match_parts = match['match_parts'] try: indexed_match_parts[read['name']].extend(match_parts) except KeyError: indexed_match_parts[read['name']] = match_parts temp_dir.close() blast_fhand.close() return indexed_match_parts
def test_seqio(self): 'It tets the seqio function' # fastq to fasta out_fhand = NamedTemporaryFile() seqio([self._make_fhand(FASTQ)], out_fhand, 'fasta') assert ">seq1\natcgt" in open(out_fhand.name).read() # fastq to fastq-illumina out_fhand = NamedTemporaryFile() seqio([self._make_fhand(FASTQ)], out_fhand, 'fastq-illumina') assert "@seq1\natcgt\n+\n^^^^" in open(out_fhand.name).read() out_fhand = NamedTemporaryFile() seqio([self._make_fhand(FASTQ), self._make_fhand(FASTQ)], out_fhand, 'fastq-illumina') assert "@seq3\natcgt\n+\n^^^^^\n@seq1" in open(out_fhand.name).read() # fasta to fastq out_fhand = NamedTemporaryFile() try: seqio([self._make_fhand(FASTA)], out_fhand, 'fastq') self.fail("error previously expected") except IncompatibleFormatError as error: assert 'No qualities available' in str(error) # bad_format fastq bad_fastq_fhand = self._make_fhand(FASTQ + 'aklsjhdas') try: seqio([bad_fastq_fhand], out_fhand, 'fasta') self.fail("error previously expected") except MalformedFile as error: assert 'Lengths of sequence and quality' in str(error) # genbank to fasta out_fhand = NamedTemporaryFile() genbank_fhand = open(os.path.join(TEST_DATA_DIR, 'sequence.gb')) seqio([genbank_fhand], out_fhand, 'fasta') result = open(out_fhand.name).read() assert '>NM_019354.2' in result