def write_cluster(file_name, sequence_names, sequence_dict):
    names_list = list()
    sequence_list = list()
    file_name = file_name.replace(' ', '_')

    for current_seq in sequence_names:
        names_list.append(current_seq)
        sequence_list.append(sequence_dict[current_seq])
    oligo.write_fastas(names_list, sequence_list, file_name + ".fasta")
Example #2
0
def main():
    in_file = sys.argv[1]

    names, sequences = oligo.read_fasta_lists(in_file)

    print("Number of oligos in original design: %d" % len(names))
    print("Number of unique oligos:             %d" % len(set(sequences)))

    names, sequences = oligo.get_unique_sequences(names, sequences)

    oligo.write_fastas(names, sequences, in_file + "_unique")
def write_large_cluster(names_list, sequence_list, file_name):
    file_name = file_name.replace(' ', '_')
    dir_for_clusters = "large_clusters"
    if not os.path.exists(dir_for_clusters):
        os.mkdir(dir_for_clusters)
    os.chdir(dir_for_clusters)

    oligo.write_fastas(names_list, sequence_list,
                       str(file_name) + "_too_large.fasta")

    os.chdir("..")
Example #4
0
def write_outputs(dict_to_write, filename):
    """
        Writes all name:sequence pairings stored in dict_to_write
        to the filename specified
    """
    names = list()
    sequences = list

    for item in dict_to_write:
        names.append(item)
        sequences.append(dict_to_write[item])

    oligo.write_fastas(filename, names, sequences)
def main():
    argp = argparse.ArgumentParser(description="Randomly generate a fasta "
                                   "containing AA sequences.")
    argp.add_argument('-f', '--filename')
    argp.add_argument('-l', '--length', type=int)
    argp.add_argument('-g', '--generate', type=int)
    argp.add_argument('-p',
                      '--prefix',
                      help="The prefix for each "
                      "generated sequence. ",
                      default="sequence")

    args = argp.parse_args()

    names, sequences = list(), list()

    for index in range(args.generate):
        name = f'{args.prefix}_{index}'
        sequence = generate_aa_sequence(args.length)

        names.append(name)
        sequences.append(sequence)

    oligo.write_fastas(names, sequences, output_name=args.filename)
Example #6
0
def main():
    arg_parser = argparse.ArgumentParser(
        description=
        "Parse representative output map to produce a sprot/trembl file")

    arg_parser.add_argument('-s', '--sprot', help="Input sprot fasta to parse")
    arg_parser.add_argument('-t',
                            '--trembl',
                            help="Input trembl file to parse")
    arg_parser.add_argument('-m',
                            '--map_file',
                            help="Input map file to parse.")

    args = arg_parser.parse_args()

    out_sprot_name = args.map_file + "_sprot"
    out_trembl_name = args.map_file + "_trembl"

    sprot_names, sprot_seqs = oligo.read_fasta_lists(args.sprot)
    trembl_names, trembl_seqs = oligo.read_fasta_lists(args.trembl)

    in_sprot_seqs = {}
    in_trembl_seqs = {}
    out_sprot_seqs = {}
    out_trembl_seqs = {}

    for index in range(len(sprot_names)):
        current_name = sprot_names[index]
        current_seq = sprot_seqs[index]

        in_sprot_seqs[current_name] = current_seq

    for index in range(len(trembl_names)):
        current_name = trembl_names[index]
        current_seq = trembl_seqs[index]

        in_trembl_seqs[current_name] = current_seq

    map_items = parse_map(args.map_file)

    for current in map_items:
        added = False
        for inner in current:
            if inner in in_sprot_seqs:
                added = True
                out_sprot_seqs[inner.strip()] = in_sprot_seqs[inner.strip()]
                break
        if not added:
            out_trembl_seqs[current[0].strip()] = in_trembl_seqs[
                current[0].strip()]

    out_sprot_names = list()
    out_sprot_sequences = list()
    if len(out_sprot_seqs):
        for key, value in out_sprot_seqs.items():
            out_sprot_names.append(key)
            out_sprot_sequences.append(value)

    out_trembl_names = list()
    out_trembl_sequences = list()
    if len(out_trembl_seqs):
        for key, value in out_trembl_seqs.items():
            out_trembl_names.append(key)
            out_trembl_sequences.append(value)

    oligo.write_fastas(out_sprot_names, out_sprot_sequences, out_sprot_name)
    oligo.write_fastas(out_trembl_names, out_trembl_sequences, out_trembl_name)
Example #7
0
def main():
    usage = "usage: %prog [options]"
    option_parser = optparse.OptionParser(usage)

    add_program_options(option_parser)

    options, arguments = option_parser.parse_args()

    names, sequences = oligo.read_fasta_lists(options.query)

    min_ymers = 999999999999999999999999999999999

    for i in range(options.iterations):

        xmer_seq_dict = {}

        # create list of Xmer sequences
        for index in range(len(sequences)):

            name, sequence = oligo.subset_lists_iter(names[index],
                                                     sequences[index],
                                                     options.XmerWindowSize,
                                                     options.stepSize)

            for index in range(len(sequence)):
                if oligo.is_valid_sequence(sequence[index], options.minLength,
                                           options.percentValid):
                    value = [options.redundancy, name[index]]
                    xmer_seq_dict[sequence[index]] = value

        # create dict of Ymer sequences
        ymer_seq_dict = {}

        # Break each ymer up into subsets of xmer size
        for index in range(len(sequences)):

            name, sequence = oligo.subset_lists_iter(names[index],
                                                     sequences[index],
                                                     options.YmerWindowSize,
                                                     options.stepSize)

            for index in range(len(sequence)):

                if oligo.is_valid_sequence(sequence[index], options.minLength,
                                           options.percentValid):
                    ymer_seq_dict[sequence[index]] = name[index]

        total_ymers = len(ymer_seq_dict)

        array_design = {}
        array_xmers = {}
        to_add = []
        ymer_xmers = []
        iter_count = 0

        while True:
            #reset max score at the beginning of each iteration
            max_score = 0
            for current_ymer in ymer_seq_dict.keys():
                # calculate the score of this ymer
                score, subset_ymer = calculate_score(current_ymer,
                                                     xmer_seq_dict,
                                                     options.XmerWindowSize, 1)

                if score > max_score:
                    to_add = list()
                    max_score = score
                    to_add.append(current_ymer)
                    ymer_xmers = [subset_ymer]
                elif score == max_score:
                    to_add.append(current_ymer)
                    ymer_xmers.append(subset_ymer)

            random_index = random.choice(range(len(to_add)))
            oligo_to_remove = to_add[random_index]
            chosen_xmers = ymer_xmers[random_index]
            #        array_xmers.update(chosen_xmers)
            for each in chosen_xmers:
                array_xmers[each] = array_xmers.get(each, 0) + 1

            # subtract from the score of each xmer within the chosen ymer
            for item in chosen_xmers:
                if item in xmer_seq_dict:
                    # We dont' want negative scores
                    if xmer_seq_dict[item][0] > 0:
                        xmer_seq_dict[item][0] -= 1
                else:
                    print("%s - not found in xmer dict!!!" % (item))

            iter_count += 1

            if len(ymer_seq_dict) == 0 or max_score <= 0:
                print("Final design includes %d %d-mers (%.1f%% of total) " %
                      (len(array_design), options.YmerWindowSize,
                       (len(array_design) / float(total_ymers)) * 100))
                #            average_redundancy = sum( xmer_seq_dict[ item ][ 0 ] for item in xmer_seq_dict ) / len( xmer_seq_dict )
                print("%d unique %d-mers in final %d-mers (%.2f%% of total)" %
                      (len(array_xmers), options.XmerWindowSize,
                       options.YmerWindowSize,
                       (float(len(array_xmers)) / len(xmer_seq_dict)) * 100))
                print("Average redundancy of %d-mers in %d-mers: %.2f" %
                      (options.XmerWindowSize, options.YmerWindowSize,
                       sum(array_xmers.values()) / float(len(array_xmers))))
                if len(array_design) < min_ymers:
                    min_ymers = len(array_design)
                    best_xmer_seq_dict = xmer_seq_dict
                    del (xmer_seq_dict)
                    best_array_design = array_design
                    del (array_design)
                break

            try:
                array_design[oligo_to_remove] = ymer_seq_dict[oligo_to_remove]
                del ymer_seq_dict[oligo_to_remove]
            except KeyError:
                continue

            if not iter_count % 250:
                print("Current Iteration: " + str(iter_count))
                #            print( "Number of output ymers: " + str( len( array_design ) ) )
                print("Current xmer dictionary score: " +
                      str(sum(item[0] for item in xmer_seq_dict.values())))

    write_outputs(best_xmer_seq_dict, options.outPut)

    names = []
    sequences = []

    # Write resulting oligos to file
    for sequence, name in best_array_design.items():
        names.append(name)
        sequences.append(sequence)

    oligo.write_fastas(names,
                       sequences,
                       output_name=options.outPut + "_R" +
                       str(options.redundancy) + ".fasta")