def poisson_clones(mean, line_num): """Given the mean, finds number of times to clone the line using poisson dist. :param float mean: the mean of the data :param int line_num: the number of lines in the file :return int: number of times to clone the line using poisson dist. """ print yellow("[STATUS] List created.") return np.random.poisson(mean, line_num)
def fastq_clones(opts, line_num): """Finds the number of times to clone based on actual fastq dist. :param args opts: user-inputted args parsed by argparse :param int line_num: the number of lines in the file :return list data_list: the list of numbers selected from the file's distribution """ data_list = give_clone_list(opts, line_num) print yellow("[STATUS] List created.") return data_list
def nb_clones(mean, variance, line_num): """Given mean and variance, gives the number of times to clone using n.b. distribution :param float mean: the mean of the data :param float variance: the standard deviation :param int line_num: the number of lines in the file :return int: number of times to clone the line using neg. binomial distribution """ print yellow("[STATUS] List created.") return np.random.negative_binomial(mean, variance, line_num)
def find_line_nums(opts): """Finds the number of lines in the file to be used later :param args opts: the user-inputted args parsed by argparse :return int line_num: the number of sequences in the file from opts """ line_num = 0 for line in opts.fastq: if '@' in line: line_num += 1 print yellow("[STATUS] Found number of lines.") return line_num
def main(args): """Main method :param args: arguments :return: None """ print green('Running...') opts = parse_cmdline_params(args[1:]) infile = opts.fastq fastq_dict = create_fastq_dict(infile, opts) error_dict = decide_if_error(fastq_dict) print yellow("[STATUS] Error dict created.") if raw_input(blue('Would you like results printed to the terminal? Answer y or n: ')) is 'y': print_results(fastq_dict, error_dict) if raw_input(blue('Would you like to write the results to a file? Answer y or n: ')) is 'y': write_results_to_file(fastq_dict, error_dict, opts) print green('Done')
def create_fastq_dict(infile, opts): """Creates a defaultdict from a fastq file with name and sequence and clones based on cloned Cloned defaults to 0 for no cloning, 1 for poisson, 2 for negative binomial, 3 for fastq dist. :param infile: the file to create the fastq from :param args opts: options parsed from command line :return: defaultdict of sequences and names """ line_num = find_line_nums(opts) infile.seek(0) cloned = opts.cloning if cloned == 1: clones = poisson_clones(opts.mean, line_num) elif cloned == 2: clones = nb_clones(opts.mean, opts.variance, line_num) elif cloned == 3: clones = fastq_clones(opts, line_num) else: clones = [0] * line_num print clones infile.seek(0) sequences = defaultdict(str) line_counter = 1 for line in infile: line = line.rstrip() if line[0] == '@': next_line = next(infile).rstrip() sequences[line[1:].rstrip()] = next_line #print line_num #print clones[line_counter - 1] for clone_num in range(1, clones[line_counter - 1]): clone_num = str(clone_num) sequences[line[1:].rstrip() + ' (' + clone_num + ')'] = next_line #print sequences.keys() line_counter += 1 print yellow(("[STATUS] It's still going! {}".format( ('.' * ((line_counter % 2) + 1))))) print yellow("[STATUS] Dictionary created.") return sequences
def create_fastq_dict(infile, opts): """Creates a defaultdict from a fastq file with name and sequence and clones based on cloned Cloned defaults to 0 for no cloning, 1 for poisson, 2 for negative binomial, 3 for fastq dist. :param infile: the file to create the fastq from :param args opts: options parsed from command line :return: defaultdict of sequences and names """ line_num = find_line_nums(opts) infile.seek(0) cloned = opts.cloning if cloned == 1: clones = poisson_clones(opts.mean, line_num) elif cloned == 2: clones = nb_clones(opts.mean, opts.variance, line_num) elif cloned == 3: clones = fastq_clones(opts, line_num) else: clones = [0] * line_num print clones infile.seek(0) sequences = defaultdict(str) line_counter = 1 for line in infile: line = line.rstrip() if line[0] == '@': next_line = next(infile).rstrip() sequences[line[1:].rstrip()] = next_line #print line_num #print clones[line_counter - 1] for clone_num in range(1, clones[line_counter - 1]): clone_num = str(clone_num) sequences[line[1:].rstrip() + ' (' + clone_num + ')'] = next_line #print sequences.keys() line_counter += 1 print yellow(("[STATUS] It's still going! {}".format(('.' * ((line_counter % 2) + 1))))) print yellow("[STATUS] Dictionary created.") return sequences
def main(args): """Main method :param args: arguments :return: None """ print green('Running...') opts = parse_cmdline_params(args[1:]) infile = opts.fastq fastq_dict = create_fastq_dict(infile, opts) error_dict = decide_if_error(fastq_dict) print yellow("[STATUS] Error dict created.") if raw_input( blue( 'Would you like results printed to the terminal? Answer y or n: ' )) is 'y': print_results(fastq_dict, error_dict) if raw_input( blue( 'Would you like to write the results to a file? Answer y or n: ' )) is 'y': write_results_to_file(fastq_dict, error_dict, opts) print green('Done')