def RASPP_SCHEMA(contacts, parents, num_crossovers, min_fragment_diversity): schema_contacts = schema.getSCHEMAContacts(contacts, parents) (collapsed_parents, identity_list) = collapse_parents(parents) energies = raspp.make_4d_energies(schema_contacts, parents) avg_energies = raspp.calc_average_energies(energies, parents) results = RASPP(avg_energies, parents, num_crossovers, min_fragment_diversity) for i in range(len(results)): (avg_E, collapsed_crossovers, l_min, l_max) = results[i] crossovers = translate_collapsed_indices(collapsed_crossovers, identity_list) results[i] = (avg_E, crossovers, l_min, l_max) return results
def main(args): arg_dict = parse_arguments(args) if not confirm_arguments(arg_dict): if args[0].split(os.path.sep)[-1] == "rasppcurve.py": print_usage(args) return # Flags and values print_E = False print_m = False # Inputs: # The alignment/fragment file name. msa_file = arg_dict[ARG_MULTIPLE_SEQUENCE_ALIGNMENT_FILE] # Read the alignment file to create a list of parents. # The parents will appear in the list in the order in which they appear in the file. parent_list = schema.readMultipleSequenceAlignmentFile(file(msa_file, 'r')) parents = [p for (k,p) in parent_list] # Get the contacts pdb_contacts = schema.readContactFile(file(arg_dict[ARG_CONTACT_FILE], 'r')) # Establish connection to output, either file or, if no output file is # specified, to standard output. if arg_dict.has_key(ARG_OUTPUT_FILE): output_file = file(arg_dict[ARG_OUTPUT_FILE], 'w') else: output_file = sys.stdout # Get the minimum fragment size. if arg_dict.has_key(ARG_MIN_FRAGMENT_SIZE): min_length = int(arg_dict[ARG_MIN_FRAGMENT_SIZE]) else: output_file.write("# No minimum fragment length specified; using L=4.\n") min_length = 4 # Get the bin width if arg_dict.has_key(ARG_BIN_WIDTH): bin_width = float(arg_dict[ARG_BIN_WIDTH]) else: output_file.write("# No bin width specified; using bin width=1.0.\n") bin_width = 1.0 # Get the number of fragments -- one more than the number of crossovers. num_fragments = int(arg_dict[ARG_NUM_CROSSOVERS])+1 num_parents = len(parents) library_size = num_parents**num_fragments # Make libraries consistent with RASPP (new_parents, identical_sites) = raspp.collapse_parents(parents) if len(new_parents[0]) < num_fragments*min_length: error_msg = "Minimum fragment length of %d is too large.\n%d " + \ "fragments with length %d cannot be found in a " + \ "sequence of length %d (with identities removed). Aborting..." print error_msg % (min_length, num_fragments, min_length, len(parents[0])) return contacts = schema.getSCHEMAContacts(pdb_contacts, parents) energies = raspp.make_4d_energies(contacts, parents) avg_energies = raspp.calc_average_energies(energies, parents) tstart = time.clock() res = raspp.RASPP(avg_energies, parents, num_fragments-1, min_length) output_file.write("# RASPP took %1.2f secs\n" % (time.clock()-tstart,)) output_file.write("# RASPP found %d results\n" % (len(res),)) tstart = time.clock() curve = raspp.curve(res, parents, bin_width) output_file.write("# RASPP found %d unique (<E>,<m>) points\n" % (len(curve),)) output_file.write("# RASPP curve took %1.2f secs\n" % (time.clock()-tstart,)) output_file.write("# <E>\t<m>\tcrossover points\n") for (average_E, average_m, crossovers) in curve: xover_pat = '%d '*len(crossovers) xover_str = xover_pat % tuple(crossovers) output_file.write('%1.4f\t%1.4f\t%s\n' % (average_E, average_m, xover_str)) if arg_dict.has_key(ARG_OUTPUT_FILE): output_file.close()
def main(args): arg_dict = parse_arguments(args) if not confirm_arguments(arg_dict): if args[0].split(os.path.sep)[-1] == "rasppcurve.py": print_usage(args) return # Flags and values print_E = False print_m = False # Inputs: # The alignment/fragment file name. msa_file = arg_dict[ARG_MULTIPLE_SEQUENCE_ALIGNMENT_FILE] # Read the alignment file to create a list of parents. # The parents will appear in the list in the order in which they appear in the file. parent_list = schema.readMultipleSequenceAlignmentFile(file(msa_file, 'r')) parents = [p for (k, p) in parent_list] # Get the contacts pdb_contacts = schema.readContactFile(file(arg_dict[ARG_CONTACT_FILE], 'r')) # Establish connection to output, either file or, if no output file is # specified, to standard output. if arg_dict.has_key(ARG_OUTPUT_FILE): output_file = file(arg_dict[ARG_OUTPUT_FILE], 'w') else: output_file = sys.stdout # Get the minimum fragment size. if arg_dict.has_key(ARG_MIN_FRAGMENT_SIZE): min_length = int(arg_dict[ARG_MIN_FRAGMENT_SIZE]) else: output_file.write( "# No minimum fragment length specified; using L=4.\n") min_length = 4 # Get the bin width if arg_dict.has_key(ARG_BIN_WIDTH): bin_width = float(arg_dict[ARG_BIN_WIDTH]) else: output_file.write("# No bin width specified; using bin width=1.0.\n") bin_width = 1.0 # Get the number of fragments -- one more than the number of crossovers. num_fragments = int(arg_dict[ARG_NUM_CROSSOVERS]) + 1 num_parents = len(parents) library_size = num_parents**num_fragments # Make libraries consistent with RASPP (new_parents, identical_sites) = raspp.collapse_parents(parents) if len(new_parents[0]) < num_fragments * min_length: error_msg = "Minimum fragment length of %d is too large.\n%d " + \ "fragments with length %d cannot be found in a " + \ "sequence of length %d (with identities removed). Aborting..." print error_msg % (min_length, num_fragments, min_length, len(parents[0])) return contacts = schema.getSCHEMAContacts(pdb_contacts, parents) energies = raspp.make_4d_energies(contacts, parents) avg_energies = raspp.calc_average_energies(energies, parents) tstart = time.clock() res = raspp.RASPP(avg_energies, parents, num_fragments - 1, min_length) output_file.write("# RASPP took %1.2f secs\n" % (time.clock() - tstart, )) output_file.write("# RASPP found %d results\n" % (len(res), )) tstart = time.clock() curve = raspp.curve(res, parents, bin_width) output_file.write("# RASPP found %d unique (<E>,<m>) points\n" % (len(curve), )) output_file.write("# RASPP curve took %1.2f secs\n" % (time.clock() - tstart, )) output_file.write("# <E>\t<m>\tcrossover points\n") for (average_E, average_m, crossovers) in curve: xover_pat = '%d ' * len(crossovers) xover_str = xover_pat % tuple(crossovers) output_file.write('%1.4f\t%1.4f\t%s\n' % (average_E, average_m, xover_str)) if arg_dict.has_key(ARG_OUTPUT_FILE): output_file.close()