Esempio n. 1
0
def outputEnergies(chimera_blocks, contacts, fragments, parents, output_file, output_string, print_E, print_m):
	if not schema.checkChimera(chimera_blocks, fragments, parents):
		output_file.write("# %s is not a valid chimera\n" % chimera_blocks)
		return
	output_vars = [chimera_blocks]
	E = None
	m = None
	if print_E:
		E = schema.getChimeraDisruption(chimera_blocks, contacts, fragments, parents)
		output_vars = output_vars + [E]
	if print_m:
		m = schema.getChimeraShortestDistance(chimera_blocks, fragments, parents)
		output_vars = output_vars + [m]
	#print output_vars
	output_file.write(output_string % tuple(output_vars))
	return (E,m)
Esempio n. 2
0
def outputEnergies(chimera_blocks, contacts, fragments, parents, output_file,
                   output_string, print_E, print_m):
    if not schema.checkChimera(chimera_blocks, fragments, parents):
        output_file.write("# %s is not a valid chimera\n" % chimera_blocks)
        return
    output_vars = [chimera_blocks]
    E = None
    m = None
    if print_E:
        E = schema.getChimeraDisruption(chimera_blocks, contacts, fragments,
                                        parents)
        output_vars = output_vars + [E]
    if print_m:
        m = schema.getChimeraShortestDistance(chimera_blocks, fragments,
                                              parents)
        output_vars = output_vars + [m]
    #print output_vars
    output_file.write(output_string % tuple(output_vars))
    return (E, m)
Esempio n. 3
0
def main(args):
    arg_dict = parse_arguments(args)
    if not confirm_arguments(arg_dict):
        if args[0].split(os.path.sep)[-1] == "schemarandom.py":
            print_usage(args)
        return

        # Flags and values
    print_E = False
    print_m = False

    # Inputs:
    #   The alignment/fragment file name.
    msa_file = arg_dict[ARG_MULTIPLE_SEQUENCE_ALIGNMENT_FILE]

    # Read the alignment file to create a list of parents.
    # The parents will appear in the list in the order in which they appear in the file.
    parent_list = schema.readMultipleSequenceAlignmentFile(file(msa_file, "r"))
    parents = [p for (k, p) in parent_list]

    # Get the contacts
    pdb_contacts = schema.readContactFile(file(arg_dict[ARG_CONTACT_FILE], "r"))

    # Establish connection to output, either file or, if no output file is
    # specified, to standard output.
    if arg_dict.has_key(ARG_OUTPUT_FILE):
        output_file = file(arg_dict[ARG_OUTPUT_FILE], "w")
    else:
        output_file = sys.stdout

        # Get the number of libraries to evaluate.
    if arg_dict.has_key(ARG_NUM_LIBRARIES):
        num_libraries = int(arg_dict[ARG_NUM_LIBRARIES])
    else:
        num_libraries = int(1e3)

        # Get the minimum fragment size.
    if arg_dict.has_key(ARG_MIN_FRAGMENT_SIZE):
        min_length = int(arg_dict[ARG_MIN_FRAGMENT_SIZE])
    else:
        min_length = 4

        # Get the number of fragments -- one more than the number of crossovers.
    num_fragments = int(arg_dict[ARG_NUM_CROSSOVERS]) + 1

    num_parents = len(parents)
    library_size = num_parents ** num_fragments

    if arg_dict.has_key(ARG_MAX_CHIMERAS_PER_LIBRARY):
        max_chimeras = min(library_size, int(arg_dict[ARG_MAX_CHIMERAS_PER_LIBRARY]))
    else:
        max_chimeras = library_size

    if arg_dict.has_key(ARG_RANDOM_SEED):
        random.seed(int(arg_dict[ARG_RANDOM_SEED]))

        # Make libraries consistent with RASPP
    (new_parents, identical_sites) = raspp.collapse_parents(parents)
    if len(new_parents[0]) < num_fragments * min_length:
        error_msg = (
            "Minimum diversity length of %d is too large.\n%d "
            + "fragments with diversity %d cannot be found in a "
            + "sequence of length %d (with identities removed).  Aborting..."
        )
        print error_msg % (min_length, num_fragments, min_length, len(parents[0]))
        return

    start_time = time.clock()

    output_file.write("# <E>\t<m>\tcrossover points\n")
    random_crossovers = []
    for libnum in range(num_libraries):
        crossovers = schema.generateRandomCrossovers(len(new_parents[0]), num_fragments - 1, min_length)
        crossovers = raspp.translate_collapsed_indices(crossovers, identical_sites)
        random_crossovers.append(crossovers)
    for crossovers in random_crossovers:
        fragments = schema.getFragments(crossovers, parents[0])
        filtered_contacts = schema.getSCHEMAContactsWithCrossovers(pdb_contacts, parents, crossovers)
        all_chimeras = []
        if max_chimeras < library_size:
            # Assemble a random sample of chimeras, with replacement
            for n_chim in range(max_chimeras):
                chim_index = random.randint(0, library_size - 1)
                n2c = schema.base(chim_index, num_parents)
                chimera_blocks = "".join(["1"] * (num_fragments - len(n2c)) + ["%d" % (int(x) + 1,) for x in n2c])
                all_chimeras.append(chimera_blocks)
        else:  # We'll be covering all chimeras in the library; might as well get a good sample.
            # The number of parents and fragments specifies all possible chimeras, regardless of
            # crossover point positions, so pre-generate all chimeras.
            max_chimeras = library_size
            for i in range(library_size):
                # The next two lines turn i into a chimera block pattern
                # (e.g., 0 -> '11111111', 1 -> '11111112', 2 -> '11111113'...)
                n2c = schema.base(i, num_parents)
                chimera_blocks = "".join(["1"] * (num_fragments - len(n2c)) + ["%d" % (int(x) + 1,) for x in n2c])
                all_chimeras.append(chimera_blocks)
                # Randomly assort the chimeras
            random.shuffle(all_chimeras)

            # Calculate average E and m for the library or subsample
        E_values = []
        m_values = []

        for chim_index in range(max_chimeras):
            chimera_blocks = all_chimeras[chim_index]
            E = schema.getChimeraDisruption(chimera_blocks, filtered_contacts, fragments, parents)
            m = schema.getChimeraShortestDistance(chimera_blocks, fragments, parents)
            E_values.append(E)
            m_values.append(m)
        average_E = schema.mean(E_values)
        average_m = schema.mean(m_values)
        xover_pat = "%d " * len(crossovers)
        xover_str = xover_pat % tuple(crossovers)
        output_file.write(("%1.4f\t%1.4f\t%s\n") % (average_E, average_m, xover_str))
        output_file.flush()
    total_time = time.clock() - start_time
    output_file.write(
        "# Finished in %1.2f seconds (%d libraries, %d chimeras)\n"
        % (total_time, num_libraries, num_libraries * max_chimeras)
    )
    if arg_dict.has_key(ARG_OUTPUT_FILE):
        output_file.close()