def get_or_create_blastdb(blastdb_or_path, dbtype=None, directory=None): '''it returns a blast database. If it does not exists it creates and if you give it a directory it will create it in that directory if it does not exist yet ''' seq_fpath = _get_abs_blastdb_path(blastdb_or_path, dbtype) if directory: dbname = os.path.basename(seq_fpath) dbpath = os.path.join(directory, dbname) else: dbpath = seq_fpath if not _blastdb_exists(dbpath, dbtype=dbtype): if not os.path.exists(seq_fpath): msg = 'An input sequence is required to create a blastdb' raise RuntimeError(msg) if seq_fpath != dbpath: seqio([open(seq_fpath)], open(dbpath, 'w'), out_format='fasta', copy_if_same_format=False) if dbtype is None: dbtype = guess_seq_type(open(dbpath)) _makeblastdb_plus(dbpath, dbtype) return dbpath
def test_seqio(self): 'It tets the seqio function' #fasta-qual to fastq in_fhands = (self._make_fhand(FASTA), self._make_fhand(QUAL)) out_fhands = (self._make_fhand(),) out_format = 'fastq' seqio(in_fhands, out_fhands, out_format) assert "@seq1\natctagtc\n+\n???????" in open(out_fhands[0].name).read() #fastq to fasta-qual out_fhands = [self._make_fhand(), self._make_fhand()] seqio([self._make_fhand(FASTQ)], out_fhands, 'fasta') assert ">seq1\natcgt" in open(out_fhands[0].name).read() assert ">seq1\n30 30 30" in open(out_fhands[1].name).read() #fastq to fasta out_fhands = [self._make_fhand()] seqio([self._make_fhand(FASTQ)], out_fhands, 'fasta') assert ">seq1\natcgt" in open(out_fhands[0].name).read() #fastq to fastq-illumina out_fhands = [self._make_fhand()] seqio([self._make_fhand(FASTQ)], out_fhands, 'fastq-illumina') assert "@seq1\natcgt\n+\n^^^^" in open(out_fhands[0].name).read() #fasta-qual to fasta-qual in_fhands = (self._make_fhand(FASTA), self._make_fhand(QUAL)) out_fhands = (self._make_fhand(), self._make_fhand()) out_format = 'fasta' seqio(in_fhands, out_fhands, out_format) assert FASTA == open(out_fhands[0].name).read() assert QUAL == open(out_fhands[1].name).read()
def _look_for_blast_matches(self, seq_fpath, oligos, seqs_type): 'It looks for the oligos in the given sequence files' # we need to keep the blast_fhands, because they're temp files and # otherwise they might be removed temp_dir = TemporaryDir() dbpath = os.path.join(temp_dir.name, os.path.basename(seq_fpath)) seqio([open(seq_fpath)], open(dbpath, 'w'), out_format='fasta', copy_if_same_format=False) blasts, blast_fhand = _do_blast_2(dbpath, oligos, params=self.params, program=self.program, dbtype=seqs_type) if self.filters is not None: blasts = filter_alignments(blasts, config=self.filters) # Which are the regions covered in each sequence? indexed_match_parts = {} for blast in blasts: oligo = blast['query'] for match in blast['matches']: read = match['subject'] if self.elongate_for_global: elongate_match_parts_till_global( match['match_parts'], query_length=oligo['length'], subject_length=read['length'], align_completely=QUERY) # match_parts = [m['match_parts'] for m in blast['matches']] match_parts = match['match_parts'] try: indexed_match_parts[read['name']].extend(match_parts) except KeyError: indexed_match_parts[read['name']] = match_parts temp_dir.close() blast_fhand.close() return indexed_match_parts
def get_or_create_blastdb(blastdb_or_path, dbtype, directory=None): """it returns a blast database. If it does not exists it creates and if you give it a directory it will create it in that directory if it does not exist yet """ seq_fpath = _get_abs_blastdb_path(blastdb_or_path, dbtype) if directory: dbname = os.path.basename(seq_fpath) dbpath = os.path.join(directory, dbname) else: dbpath = seq_fpath if not _blastdb_exists(dbpath, dbtype): if not os.path.exists(seq_fpath): msg = "An input sequence is required to create a blastdb" raise RuntimeError(msg) if seq_fpath != dbpath: seqio([open(seq_fpath)], [open(dbpath, "w")], out_format="fasta", copy_if_same_format=False) _makeblastdb_plus(dbpath, dbtype) return dbpath
def _look_for_blast_matches(self, seq_fpath, oligos): "It looks for the oligos in the given sequence files" # we need to keep the blast_fhands, because they're temp files and # otherwise they might be removed temp_dir = TemporaryDir() dbpath = os.path.join(temp_dir.name, os.path.basename(seq_fpath)) seqio([open(seq_fpath)], [open(dbpath, "w")], out_format="fasta", copy_if_same_format=False) blasts, blast_fhand = _do_blast_2(dbpath, oligos, params=self.params, program=self.program) if self.filters is not None: blasts = filter_alignments(blasts, config=self.filters) # Which are the regions covered in each sequence? indexed_match_parts = {} one_oligo = True if len(oligos) == 1 else False for blast in blasts: oligo = blast["query"] for match in blast["matches"]: read = match["subject"] if self.elongate_for_global: elongate_match_parts_till_global( match["match_parts"], query_length=oligo["length"], subject_length=read["length"], align_completely=QUERY, ) # match_parts = [m['match_parts'] for m in blast['matches']] match_parts = match["match_parts"] if one_oligo: indexed_match_parts[read["name"]] = match_parts else: try: indexed_match_parts[read["name"]].extend(match_parts) except KeyError: indexed_match_parts[read["name"]] = match_parts temp_dir.close() blast_fhand.close() return indexed_match_parts
def _look_for_blast_matches(self, seq_fpath, oligos, seqs_type): 'It looks for the oligos in the given sequence files' # we need to keep the blast_fhands, because they're temp files and # otherwise they might be removed temp_dir = TemporaryDir() dbpath = os.path.join(temp_dir.name, os.path.basename(seq_fpath)) seqio([open(seq_fpath)], open(dbpath, 'w'), out_format='fasta', copy_if_same_format=False) blasts, blast_fhand = _do_blast_2(dbpath, oligos, params=self.params, program=self.program, dbtype=seqs_type) if self.filters is not None: blasts = filter_alignments(blasts, config=self.filters) # Which are the regions covered in each sequence? indexed_match_parts = {} for blast in blasts: oligo = blast['query'] for match in blast['matches']: read = match['subject'] if self.elongate_for_global: elongate_match_parts_till_global(match['match_parts'], query_length=oligo['length'], subject_length=read['length'], align_completely=QUERY) # match_parts = [m['match_parts'] for m in blast['matches']] match_parts = match['match_parts'] try: indexed_match_parts[read['name']].extend(match_parts) except KeyError: indexed_match_parts[read['name']] = match_parts temp_dir.close() blast_fhand.close() return indexed_match_parts
def test_seqio(self): 'It tets the seqio function' # fastq to fasta out_fhand = NamedTemporaryFile() seqio([self._make_fhand(FASTQ)], out_fhand, 'fasta') assert ">seq1\natcgt" in open(out_fhand.name).read() # fastq to fastq-illumina out_fhand = NamedTemporaryFile() seqio([self._make_fhand(FASTQ)], out_fhand, 'fastq-illumina') assert "@seq1\natcgt\n+\n^^^^" in open(out_fhand.name).read() out_fhand = NamedTemporaryFile() seqio([self._make_fhand(FASTQ), self._make_fhand(FASTQ)], out_fhand, 'fastq-illumina') assert "@seq3\natcgt\n+\n^^^^^\n@seq1" in open(out_fhand.name).read() # fasta to fastq out_fhand = NamedTemporaryFile() try: seqio([self._make_fhand(FASTA)], out_fhand, 'fastq') self.fail("error previously expected") except IncompatibleFormatError as error: assert 'No qualities available' in str(error) # bad_format fastq bad_fastq_fhand = self._make_fhand(FASTQ + 'aklsjhdas') try: seqio([bad_fastq_fhand], out_fhand, 'fasta') self.fail("error previously expected") except MalformedFile as error: assert 'Lengths of sequence and quality' in str(error) # genbank to fasta out_fhand = NamedTemporaryFile() genbank_fhand = open(os.path.join(TEST_DATA_DIR, 'sequence.gb')) seqio([genbank_fhand], out_fhand, 'fasta') result = open(out_fhand.name).read() assert '>NM_019354.2' in result