def main(args): arg_dict = parse_arguments(args) if not confirm_arguments(arg_dict): if args[0].split(os.path.sep)[-1] == "schemaenergy.py": print_usage(args) return # Flags and values print_E = False print_m = False output_file = sys.stdout # Inputs: # The alignment/fragment file name. msa_file = arg_dict[ARG_MULTIPLE_SEQUENCE_ALIGNMENT_FILE] if arg_dict.has_key(ARG_PRINT_E): print_E = True if arg_dict.has_key(ARG_PRINT_M): print_m = True # Read the alignment file to create a list of parents. # The parents will appear in the list in the order in which they appear in the file. parent_list = schema.readMultipleSequenceAlignmentFile(file(msa_file, 'r')) parents = [p for (k,p) in parent_list] crossovers = schema.readCrossoverFile(file(arg_dict[ARG_CROSSOVER_FILE], 'r')) fragments = schema.getFragments(crossovers, parents[0]) # Get the contacts pdb_contacts = schema.readContactFile(file(arg_dict[ARG_CONTACT_FILE], 'r')) contacts = schema.getSCHEMAContactsWithCrossovers(pdb_contacts, parents, crossovers) if arg_dict.has_key(ARG_OUTPUT_FILE): output_file = file(arg_dict[ARG_OUTPUT_FILE], 'w') # Now, what does the user want? output_string = '%s' output_file.write('# chimera') if print_E: output_string += '\t%d' output_file.write('\tE') if print_m: output_string += '\t%d' output_file.write('\tm') output_string += '\n' output_file.write('\n') if arg_dict.has_key(ARG_CHIMERAS): # Print values for chimeras chimeras = arg_dict[ARG_CHIMERAS] # Could be a) a chimera, b) a list of chimeras, or c) a file of chimeras. if type(chimeras) is list: # It's a list of chimeras for chimera_blocks in chimeras: outputEnergies(chimera_blocks, contacts, fragments, parents, output_file, output_string, print_E, print_m) elif os.path.isfile(chimeras): # It's a file of chimeras for line in file(chimeras,'r').readlines(): chimera_blocks = line.strip() outputEnergies(chimera_blocks, contacts, fragments, parents, output_file, output_string, print_E, print_m) else: # It's a single chimera sequence chimera_blocks = chimeras outputEnergies(chimera_blocks, contacts, fragments, parents, output_file, output_string, print_E, print_m) else: # Enumerates all possible chimeras and their disruption and mutation values. p = len(parents) n = len(fragments) Es = [] ms = [] for i in xrange(len(parents)**len(fragments)): # The next two lines turn i into a chimera block pattern # (e.g., 0 -> '11111111', 1 -> '11111112', 2 -> '11111113'...) n2c = schema.base(i,p) chimera_blocks = ''.join(['1']*(n-len(n2c))+['%d'%(int(x)+1,) for x in n2c]) (E, m) = outputEnergies(chimera_blocks, contacts, fragments, parents, output_file, output_string, print_E, print_m) if (print_E): Es.append(E) if (print_m): ms.append(m) if (print_E): mean_str = "# Average disruption <E> = %1.4f\n" % schema.mean(Es) output_file.write(mean_str) if (print_m): mean_str = "# Average mutation <m> = %1.4f\n" % schema.mean(ms) output_file.write(mean_str) if arg_dict.has_key(ARG_OUTPUT_FILE): output_file.close()
def main(args): arg_dict = parse_arguments(args) if not confirm_arguments(arg_dict): if args[0].split(os.path.sep)[-1] == "schemarandom.py": print_usage(args) return # Flags and values print_E = False print_m = False # Inputs: # The alignment/fragment file name. msa_file = arg_dict[ARG_MULTIPLE_SEQUENCE_ALIGNMENT_FILE] # Read the alignment file to create a list of parents. # The parents will appear in the list in the order in which they appear in the file. parent_list = schema.readMultipleSequenceAlignmentFile(file(msa_file, "r")) parents = [p for (k, p) in parent_list] # Get the contacts pdb_contacts = schema.readContactFile(file(arg_dict[ARG_CONTACT_FILE], "r")) # Establish connection to output, either file or, if no output file is # specified, to standard output. if arg_dict.has_key(ARG_OUTPUT_FILE): output_file = file(arg_dict[ARG_OUTPUT_FILE], "w") else: output_file = sys.stdout # Get the number of libraries to evaluate. if arg_dict.has_key(ARG_NUM_LIBRARIES): num_libraries = int(arg_dict[ARG_NUM_LIBRARIES]) else: num_libraries = int(1e3) # Get the minimum fragment size. if arg_dict.has_key(ARG_MIN_FRAGMENT_SIZE): min_length = int(arg_dict[ARG_MIN_FRAGMENT_SIZE]) else: min_length = 4 # Get the number of fragments -- one more than the number of crossovers. num_fragments = int(arg_dict[ARG_NUM_CROSSOVERS]) + 1 num_parents = len(parents) library_size = num_parents ** num_fragments if arg_dict.has_key(ARG_MAX_CHIMERAS_PER_LIBRARY): max_chimeras = min(library_size, int(arg_dict[ARG_MAX_CHIMERAS_PER_LIBRARY])) else: max_chimeras = library_size if arg_dict.has_key(ARG_RANDOM_SEED): random.seed(int(arg_dict[ARG_RANDOM_SEED])) # Make libraries consistent with RASPP (new_parents, identical_sites) = raspp.collapse_parents(parents) if len(new_parents[0]) < num_fragments * min_length: error_msg = ( "Minimum diversity length of %d is too large.\n%d " + "fragments with diversity %d cannot be found in a " + "sequence of length %d (with identities removed). Aborting..." ) print error_msg % (min_length, num_fragments, min_length, len(parents[0])) return start_time = time.clock() output_file.write("# <E>\t<m>\tcrossover points\n") random_crossovers = [] for libnum in range(num_libraries): crossovers = schema.generateRandomCrossovers(len(new_parents[0]), num_fragments - 1, min_length) crossovers = raspp.translate_collapsed_indices(crossovers, identical_sites) random_crossovers.append(crossovers) for crossovers in random_crossovers: fragments = schema.getFragments(crossovers, parents[0]) filtered_contacts = schema.getSCHEMAContactsWithCrossovers(pdb_contacts, parents, crossovers) all_chimeras = [] if max_chimeras < library_size: # Assemble a random sample of chimeras, with replacement for n_chim in range(max_chimeras): chim_index = random.randint(0, library_size - 1) n2c = schema.base(chim_index, num_parents) chimera_blocks = "".join(["1"] * (num_fragments - len(n2c)) + ["%d" % (int(x) + 1,) for x in n2c]) all_chimeras.append(chimera_blocks) else: # We'll be covering all chimeras in the library; might as well get a good sample. # The number of parents and fragments specifies all possible chimeras, regardless of # crossover point positions, so pre-generate all chimeras. max_chimeras = library_size for i in range(library_size): # The next two lines turn i into a chimera block pattern # (e.g., 0 -> '11111111', 1 -> '11111112', 2 -> '11111113'...) n2c = schema.base(i, num_parents) chimera_blocks = "".join(["1"] * (num_fragments - len(n2c)) + ["%d" % (int(x) + 1,) for x in n2c]) all_chimeras.append(chimera_blocks) # Randomly assort the chimeras random.shuffle(all_chimeras) # Calculate average E and m for the library or subsample E_values = [] m_values = [] for chim_index in range(max_chimeras): chimera_blocks = all_chimeras[chim_index] E = schema.getChimeraDisruption(chimera_blocks, filtered_contacts, fragments, parents) m = schema.getChimeraShortestDistance(chimera_blocks, fragments, parents) E_values.append(E) m_values.append(m) average_E = schema.mean(E_values) average_m = schema.mean(m_values) xover_pat = "%d " * len(crossovers) xover_str = xover_pat % tuple(crossovers) output_file.write(("%1.4f\t%1.4f\t%s\n") % (average_E, average_m, xover_str)) output_file.flush() total_time = time.clock() - start_time output_file.write( "# Finished in %1.2f seconds (%d libraries, %d chimeras)\n" % (total_time, num_libraries, num_libraries * max_chimeras) ) if arg_dict.has_key(ARG_OUTPUT_FILE): output_file.close()
def main(args): arg_dict = parse_arguments(args) if not confirm_arguments(arg_dict): if args[0].split(os.path.sep)[-1] == "schemaenergy.py": print_usage(args) return # Flags and values print_E = False print_m = False output_file = sys.stdout # Inputs: # The alignment/fragment file name. msa_file = arg_dict[ARG_MULTIPLE_SEQUENCE_ALIGNMENT_FILE] if arg_dict.has_key(ARG_PRINT_E): print_E = True if arg_dict.has_key(ARG_PRINT_M): print_m = True # Read the alignment file to create a list of parents. # The parents will appear in the list in the order in which they appear in the file. parent_list = schema.readMultipleSequenceAlignmentFile(file(msa_file, 'r')) parents = [p for (k, p) in parent_list] crossovers = schema.readCrossoverFile( file(arg_dict[ARG_CROSSOVER_FILE], 'r')) fragments = schema.getFragments(crossovers, parents[0]) # Get the contacts pdb_contacts = schema.readContactFile(file(arg_dict[ARG_CONTACT_FILE], 'r')) contacts = schema.getSCHEMAContactsWithCrossovers(pdb_contacts, parents, crossovers) if arg_dict.has_key(ARG_OUTPUT_FILE): output_file = file(arg_dict[ARG_OUTPUT_FILE], 'w') # Now, what does the user want? output_string = '%s' output_file.write('# chimera') if print_E: output_string += '\t%d' output_file.write('\tE') if print_m: output_string += '\t%d' output_file.write('\tm') output_string += '\n' output_file.write('\n') if arg_dict.has_key(ARG_CHIMERAS): # Print values for chimeras chimeras = arg_dict[ARG_CHIMERAS] # Could be a) a chimera, b) a list of chimeras, or c) a file of chimeras. if type(chimeras) is list: # It's a list of chimeras for chimera_blocks in chimeras: outputEnergies(chimera_blocks, contacts, fragments, parents, output_file, output_string, print_E, print_m) elif os.path.isfile(chimeras): # It's a file of chimeras for line in file(chimeras, 'r').readlines(): chimera_blocks = line.strip() outputEnergies(chimera_blocks, contacts, fragments, parents, output_file, output_string, print_E, print_m) else: # It's a single chimera sequence chimera_blocks = chimeras outputEnergies(chimera_blocks, contacts, fragments, parents, output_file, output_string, print_E, print_m) else: # Enumerates all possible chimeras and their disruption and mutation values. p = len(parents) n = len(fragments) Es = [] ms = [] for i in xrange(len(parents)**len(fragments)): # The next two lines turn i into a chimera block pattern # (e.g., 0 -> '11111111', 1 -> '11111112', 2 -> '11111113'...) n2c = schema.base(i, p) chimera_blocks = ''.join(['1'] * (n - len(n2c)) + ['%d' % (int(x) + 1, ) for x in n2c]) (E, m) = outputEnergies(chimera_blocks, contacts, fragments, parents, output_file, output_string, print_E, print_m) if (print_E): Es.append(E) if (print_m): ms.append(m) if (print_E): mean_str = "# Average disruption <E> = %1.4f\n" % schema.mean(Es) output_file.write(mean_str) if (print_m): mean_str = "# Average mutation <m> = %1.4f\n" % schema.mean(ms) output_file.write(mean_str) if arg_dict.has_key(ARG_OUTPUT_FILE): output_file.close()