def wrapperProfileGraph(parentFile, contactFile): ''' Draws a graph of the number of clashes at each recombination point ''' pdbName = contactFile.split('_')[0][-4:] parent_list = schema.readMultipleSequenceAlignmentFile( file(parentFile, 'r')) parents = [p for (k, p) in parent_list] pdb_contacts = schema.readContactFile(file(contactFile, 'r')) clash_data = [[] for x in parents[0]] for i in range(1, len(parents)): print i #This reshuffles the alignment to make the first and second sequences the ones analysed. It was needed as SCHEMA is limited to 9 sequences. newList = [parents[0], parents[i]] for x in range(1, len(parents)): if not i == x: newList.append(parents[x]) #Graphs for hotspots for residue in range(0, len(parents[0])): crossovers = [residue] contacts = schema.getSCHEMAContactsWithCrossovers( pdb_contacts, newList, crossovers) fragments = schema.getFragments(crossovers, parents[0]) clash_data[residue].append( schema.getChimeraDisruption('21', contacts, fragments, newList)) means = [np.mean(values) for values in clash_data] StDev = [np.std(values) for values in clash_data] makeBarGraph(means, StDev, pdbName)
def outputEnergies(chimera_blocks, contacts, fragments, parents, output_file, output_string, print_E, print_m): if not schema.checkChimera(chimera_blocks, fragments, parents): output_file.write("# %s is not a valid chimera\n" % chimera_blocks) return output_vars = [chimera_blocks] E = None m = None if print_E: E = schema.getChimeraDisruption(chimera_blocks, contacts, fragments, parents) output_vars = output_vars + [E] if print_m: m = schema.getChimeraShortestDistance(chimera_blocks, fragments, parents) output_vars = output_vars + [m] #print output_vars output_file.write(output_string % tuple(output_vars)) return (E,m)
def outputEnergies(chimera_blocks, contacts, fragments, parents, output_file, output_string, print_E, print_m): if not schema.checkChimera(chimera_blocks, fragments, parents): output_file.write("# %s is not a valid chimera\n" % chimera_blocks) return output_vars = [chimera_blocks] E = None m = None if print_E: E = schema.getChimeraDisruption(chimera_blocks, contacts, fragments, parents) output_vars = output_vars + [E] if print_m: m = schema.getChimeraShortestDistance(chimera_blocks, fragments, parents) output_vars = output_vars + [m] #print output_vars output_file.write(output_string % tuple(output_vars)) return (E, m)
def main(args): arg_dict = parse_arguments(args) if not confirm_arguments(arg_dict): if args[0].split(os.path.sep)[-1] == "schemarandom.py": print_usage(args) return # Flags and values print_E = False print_m = False # Inputs: # The alignment/fragment file name. msa_file = arg_dict[ARG_MULTIPLE_SEQUENCE_ALIGNMENT_FILE] # Read the alignment file to create a list of parents. # The parents will appear in the list in the order in which they appear in the file. parent_list = schema.readMultipleSequenceAlignmentFile(file(msa_file, "r")) parents = [p for (k, p) in parent_list] # Get the contacts pdb_contacts = schema.readContactFile(file(arg_dict[ARG_CONTACT_FILE], "r")) # Establish connection to output, either file or, if no output file is # specified, to standard output. if arg_dict.has_key(ARG_OUTPUT_FILE): output_file = file(arg_dict[ARG_OUTPUT_FILE], "w") else: output_file = sys.stdout # Get the number of libraries to evaluate. if arg_dict.has_key(ARG_NUM_LIBRARIES): num_libraries = int(arg_dict[ARG_NUM_LIBRARIES]) else: num_libraries = int(1e3) # Get the minimum fragment size. if arg_dict.has_key(ARG_MIN_FRAGMENT_SIZE): min_length = int(arg_dict[ARG_MIN_FRAGMENT_SIZE]) else: min_length = 4 # Get the number of fragments -- one more than the number of crossovers. num_fragments = int(arg_dict[ARG_NUM_CROSSOVERS]) + 1 num_parents = len(parents) library_size = num_parents ** num_fragments if arg_dict.has_key(ARG_MAX_CHIMERAS_PER_LIBRARY): max_chimeras = min(library_size, int(arg_dict[ARG_MAX_CHIMERAS_PER_LIBRARY])) else: max_chimeras = library_size if arg_dict.has_key(ARG_RANDOM_SEED): random.seed(int(arg_dict[ARG_RANDOM_SEED])) # Make libraries consistent with RASPP (new_parents, identical_sites) = raspp.collapse_parents(parents) if len(new_parents[0]) < num_fragments * min_length: error_msg = ( "Minimum diversity length of %d is too large.\n%d " + "fragments with diversity %d cannot be found in a " + "sequence of length %d (with identities removed). Aborting..." ) print error_msg % (min_length, num_fragments, min_length, len(parents[0])) return start_time = time.clock() output_file.write("# <E>\t<m>\tcrossover points\n") random_crossovers = [] for libnum in range(num_libraries): crossovers = schema.generateRandomCrossovers(len(new_parents[0]), num_fragments - 1, min_length) crossovers = raspp.translate_collapsed_indices(crossovers, identical_sites) random_crossovers.append(crossovers) for crossovers in random_crossovers: fragments = schema.getFragments(crossovers, parents[0]) filtered_contacts = schema.getSCHEMAContactsWithCrossovers(pdb_contacts, parents, crossovers) all_chimeras = [] if max_chimeras < library_size: # Assemble a random sample of chimeras, with replacement for n_chim in range(max_chimeras): chim_index = random.randint(0, library_size - 1) n2c = schema.base(chim_index, num_parents) chimera_blocks = "".join(["1"] * (num_fragments - len(n2c)) + ["%d" % (int(x) + 1,) for x in n2c]) all_chimeras.append(chimera_blocks) else: # We'll be covering all chimeras in the library; might as well get a good sample. # The number of parents and fragments specifies all possible chimeras, regardless of # crossover point positions, so pre-generate all chimeras. max_chimeras = library_size for i in range(library_size): # The next two lines turn i into a chimera block pattern # (e.g., 0 -> '11111111', 1 -> '11111112', 2 -> '11111113'...) n2c = schema.base(i, num_parents) chimera_blocks = "".join(["1"] * (num_fragments - len(n2c)) + ["%d" % (int(x) + 1,) for x in n2c]) all_chimeras.append(chimera_blocks) # Randomly assort the chimeras random.shuffle(all_chimeras) # Calculate average E and m for the library or subsample E_values = [] m_values = [] for chim_index in range(max_chimeras): chimera_blocks = all_chimeras[chim_index] E = schema.getChimeraDisruption(chimera_blocks, filtered_contacts, fragments, parents) m = schema.getChimeraShortestDistance(chimera_blocks, fragments, parents) E_values.append(E) m_values.append(m) average_E = schema.mean(E_values) average_m = schema.mean(m_values) xover_pat = "%d " * len(crossovers) xover_str = xover_pat % tuple(crossovers) output_file.write(("%1.4f\t%1.4f\t%s\n") % (average_E, average_m, xover_str)) output_file.flush() total_time = time.clock() - start_time output_file.write( "# Finished in %1.2f seconds (%d libraries, %d chimeras)\n" % (total_time, num_libraries, num_libraries * max_chimeras) ) if arg_dict.has_key(ARG_OUTPUT_FILE): output_file.close()