Esempio n. 1
0
def poisson_clones(mean, line_num):
    """Given the mean, finds number of times to clone the line using poisson dist.

    :param float mean: the mean of the data
    :param int line_num: the number of lines in the file
    :return int: number of times to clone the line using poisson dist.
    """
    print yellow("[STATUS] List created.")
    return np.random.poisson(mean, line_num)
Esempio n. 2
0
def poisson_clones(mean, line_num):
    """Given the mean, finds number of times to clone the line using poisson dist.

    :param float mean: the mean of the data
    :param int line_num: the number of lines in the file
    :return int: number of times to clone the line using poisson dist.
    """
    print yellow("[STATUS] List created.")
    return np.random.poisson(mean, line_num)
Esempio n. 3
0
def fastq_clones(opts, line_num):
    """Finds the number of times to clone based on actual fastq dist.

    :param args opts: user-inputted args parsed by argparse
    :param int line_num: the number of lines in the file
    :return list data_list: the list of numbers selected from the file's distribution
    """
    data_list = give_clone_list(opts, line_num)
    print yellow("[STATUS] List created.")
    return data_list
Esempio n. 4
0
def nb_clones(mean, variance, line_num):
    """Given mean and variance, gives the number of times to clone using n.b. distribution

    :param float mean: the mean of the data
    :param float variance: the standard deviation
    :param int line_num: the number of lines in the file
    :return int: number of times to clone the line using neg. binomial distribution
    """
    print yellow("[STATUS] List created.")
    return np.random.negative_binomial(mean, variance, line_num)
Esempio n. 5
0
def fastq_clones(opts, line_num):
    """Finds the number of times to clone based on actual fastq dist.

    :param args opts: user-inputted args parsed by argparse
    :param int line_num: the number of lines in the file
    :return list data_list: the list of numbers selected from the file's distribution
    """
    data_list = give_clone_list(opts, line_num)
    print yellow("[STATUS] List created.")
    return data_list
Esempio n. 6
0
def nb_clones(mean, variance, line_num):
    """Given mean and variance, gives the number of times to clone using n.b. distribution

    :param float mean: the mean of the data
    :param float variance: the standard deviation
    :param int line_num: the number of lines in the file
    :return int: number of times to clone the line using neg. binomial distribution
    """
    print yellow("[STATUS] List created.")
    return np.random.negative_binomial(mean, variance, line_num)
Esempio n. 7
0
def find_line_nums(opts):
    """Finds the number of lines in the file to be used later

    :param args opts: the user-inputted args parsed by argparse
    :return int line_num: the number of sequences in the file from opts
    """
    line_num = 0
    for line in opts.fastq:
        if '@' in line:
            line_num += 1
    print yellow("[STATUS] Found number of lines.")
    return line_num
Esempio n. 8
0
def find_line_nums(opts):
    """Finds the number of lines in the file to be used later

    :param args opts: the user-inputted args parsed by argparse
    :return int line_num: the number of sequences in the file from opts
    """
    line_num = 0
    for line in opts.fastq:
        if '@' in line:
            line_num += 1
    print yellow("[STATUS] Found number of lines.")
    return line_num
Esempio n. 9
0
def main(args):
    """Main method

    :param args: arguments
    :return: None
    """
    print green('Running...')
    opts = parse_cmdline_params(args[1:])
    infile = opts.fastq
    fastq_dict = create_fastq_dict(infile, opts)
    error_dict = decide_if_error(fastq_dict)
    print yellow("[STATUS] Error dict created.")
    if raw_input(blue('Would you like results printed to the terminal? Answer y or n: ')) is 'y':
        print_results(fastq_dict, error_dict)
    if raw_input(blue('Would you like to write the results to a file? Answer y or n: ')) is 'y':
        write_results_to_file(fastq_dict, error_dict, opts)
    print green('Done')
Esempio n. 10
0
def create_fastq_dict(infile, opts):
    """Creates a defaultdict from a fastq file with name and sequence and clones based on cloned

    Cloned defaults to 0 for no cloning, 1 for poisson, 2 for negative binomial, 3 for fastq dist.

    :param infile: the file to create the fastq from
    :param args opts: options parsed from command line
    :return: defaultdict of sequences and names
    """

    line_num = find_line_nums(opts)
    infile.seek(0)
    cloned = opts.cloning

    if cloned == 1:
        clones = poisson_clones(opts.mean, line_num)
    elif cloned == 2:
        clones = nb_clones(opts.mean, opts.variance, line_num)
    elif cloned == 3:
        clones = fastq_clones(opts, line_num)
    else:
        clones = [0] * line_num

    print clones
    infile.seek(0)
    sequences = defaultdict(str)
    line_counter = 1
    for line in infile:
        line = line.rstrip()
        if line[0] == '@':
            next_line = next(infile).rstrip()
            sequences[line[1:].rstrip()] = next_line
            #print line_num
            #print clones[line_counter - 1]
            for clone_num in range(1, clones[line_counter - 1]):
                clone_num = str(clone_num)
                sequences[line[1:].rstrip() + ' (' + clone_num +
                          ')'] = next_line
            #print sequences.keys()
            line_counter += 1
            print yellow(("[STATUS] It's still going! {}".format(
                ('.' * ((line_counter % 2) + 1)))))
    print yellow("[STATUS] Dictionary created.")
    return sequences
Esempio n. 11
0
def create_fastq_dict(infile, opts):
    """Creates a defaultdict from a fastq file with name and sequence and clones based on cloned

    Cloned defaults to 0 for no cloning, 1 for poisson, 2 for negative binomial, 3 for fastq dist.

    :param infile: the file to create the fastq from
    :param args opts: options parsed from command line
    :return: defaultdict of sequences and names
    """

    line_num = find_line_nums(opts)
    infile.seek(0)
    cloned = opts.cloning

    if cloned == 1:
        clones = poisson_clones(opts.mean, line_num)
    elif cloned == 2:
        clones = nb_clones(opts.mean, opts.variance, line_num)
    elif cloned == 3:
        clones = fastq_clones(opts, line_num)
    else:
        clones = [0] * line_num

    print clones
    infile.seek(0)
    sequences = defaultdict(str)
    line_counter = 1
    for line in infile:
        line = line.rstrip()
        if line[0] == '@':
            next_line = next(infile).rstrip()
            sequences[line[1:].rstrip()] = next_line
            #print line_num
            #print clones[line_counter - 1]
            for clone_num in range(1, clones[line_counter - 1]):
                clone_num = str(clone_num)
                sequences[line[1:].rstrip() + ' (' + clone_num + ')'] = next_line
            #print sequences.keys()
            line_counter += 1
            print yellow(("[STATUS] It's still going! {}".format(('.' * ((line_counter % 2) + 1)))))
    print yellow("[STATUS] Dictionary created.")
    return sequences
Esempio n. 12
0
def main(args):
    """Main method

    :param args: arguments
    :return: None
    """
    print green('Running...')
    opts = parse_cmdline_params(args[1:])
    infile = opts.fastq
    fastq_dict = create_fastq_dict(infile, opts)
    error_dict = decide_if_error(fastq_dict)
    print yellow("[STATUS] Error dict created.")
    if raw_input(
            blue(
                'Would you like results printed to the terminal? Answer y or n: '
            )) is 'y':
        print_results(fastq_dict, error_dict)
    if raw_input(
            blue(
                'Would you like to write the results to a file? Answer y or n: '
            )) is 'y':
        write_results_to_file(fastq_dict, error_dict, opts)
    print green('Done')