def main(): args = _parse_arguments() _check_blastplus_dependencies(args) print(str(args), end='\n\n') db_fpath = _create_database(args) args.set_database_path(db_fpath) print('{} - Start.'.format(getwt())) result_status = _clean_reads(args) _cleanup(args) if result_status == 0: print('\n{} - Completed.'.format(getwt())) print(' Output directory: `{}`'.format(args.outdir_path)) else: print_err('\n\a{} - Completed with errors.'.format(getwt())) # end if return result_status
def _parse_primers(self): sep = ',' primer_pairs = list() n_lines = sum(1 for _ in open(self.primers_fpath, 'rt')) n_lines_is_even = n_lines % 2 == 0 if not n_lines_is_even: error_msg = '\nError: Cannot parse primers from file `{}`.\n' \ 'There are {} lines in this file.\n' \ 'There must be even number of lines ' \ '(and therefore even number of primers), though.' \ .format(self.primers_fpath, self.primers_fpath) raise FatalError(error_msg) # end if print('{} - Parsing primers...'.format(getwt())) reference_seq = src.fasta.read_fasta_sequence(self.reference_fpath) find_start_pos = 0 with open(self.primers_fpath, 'rt') as primers_file: for _ in range(n_lines // 2): try: left_primer_seq, right_primer_seq = self._parse_primer_pair( primers_file, sep) self.max_primer_len = max(self.max_primer_len, len(left_primer_seq), len(right_primer_seq)) left_start, left_end = self._find_primer_anneal_coords( left_primer_seq, reference_seq, Orientation.LEFT, beg=find_start_pos) find_start_pos = left_start right_start, right_end = self._find_primer_anneal_coords( src.sequences.reverse_complement(right_primer_seq), reference_seq, Orientation.RIGHT, beg=find_start_pos) primer_pairs.append( PrimerPair( Primer(left_start, left_end), Primer(right_start, right_end), )) except ValueError as err: error_msg = '\nError: cannot parse a line in file `{}`.\n{}' \ .format(self.primers_fpath, err) raise FatalError(error_msg) # end try # end for # end with print('{} - Primers: found annealing coordinates'.format(getwt())) return primer_pairs
def print_status_bar(self): curr_num_done_reads = self.get_num_done_reads() bar_len = self._get_status_bar_len() ratio_done = curr_num_done_reads / self.NUM_READS_TOTAL percent_done = ratio_done * 100 progress_line_len = round(bar_len * ratio_done) print_arrow = progress_line_len != bar_len if print_arrow: arrow = '>' else: arrow = '' progress_line_len += 1 # end if sys.stdout.write( '\r{} - [{}{}{}] {}/{} ({}%)'.format( getwt(), '=' * progress_line_len, arrow, ' ' * (bar_len - progress_line_len), curr_num_done_reads, self.NUM_READS_TOTAL, round(percent_done) ) ) sys.stdout.flush()
def create_reference_database(kromsatel_args): db_dirpath = os.path.join(kromsatel_args.outdir_path, 'blast_database') fs.create_dir(db_dirpath) db_fpath = os.path.join(db_dirpath, 'kromsatel_blast_database') print('{} - Creating a reference database for BLAST:\n `{}`...'.format( getwt(), db_fpath)) _make_blast_db(kromsatel_args.reference_fpath, db_fpath) print('{} - Database: created'.format(getwt())) if kromsatel_args.use_index: print('{} - Indexing the database...'.format(getwt())) _index_database(db_fpath) print('{} - Index: created'.format(getwt())) else: print('Index will not be created for the database.') # end if return db_fpath
def _count_paired_reads_verbosely(frw_fastq_fpath): print('{} - Counting reads...'.format(getwt())) num_reads_total = src.fastq.count_reads(frw_fastq_fpath) print('{} - {} read pairs.'.format(getwt(), num_reads_total)) return num_reads_total