コード例 #1
ファイル: raspp.py プロジェクト: carat64/SCHEMA-RASPP
def RASPP_SCHEMA(contacts, parents, num_crossovers, min_fragment_diversity):
	schema_contacts = schema.getSCHEMAContacts(contacts, parents)
	(collapsed_parents, identity_list) = collapse_parents(parents)
	energies = raspp.make_4d_energies(schema_contacts, parents)
	avg_energies = raspp.calc_average_energies(energies, parents)
	results = RASPP(avg_energies, parents, num_crossovers, min_fragment_diversity)
	for i in range(len(results)):
		(avg_E, collapsed_crossovers, l_min, l_max) = results[i]
		crossovers = translate_collapsed_indices(collapsed_crossovers, identity_list)
		results[i] = (avg_E, crossovers, l_min, l_max)
	return results
コード例 #2
ファイル: raspp.py プロジェクト: Gienah/SCHEMA-RASPP
def RASPP_SCHEMA(contacts, parents, num_crossovers, min_fragment_diversity):
    schema_contacts = schema.getSCHEMAContacts(contacts, parents)
    (collapsed_parents, identity_list) = collapse_parents(parents)
    energies = raspp.make_4d_energies(schema_contacts, parents)
    avg_energies = raspp.calc_average_energies(energies, parents)
    results = RASPP(avg_energies, parents, num_crossovers,

    for i in range(len(results)):
        (avg_E, collapsed_crossovers, l_min, l_max) = results[i]
        crossovers = translate_collapsed_indices(collapsed_crossovers,
        results[i] = (avg_E, crossovers, l_min, l_max)
    return results
コード例 #3
ファイル: rasppcurve.py プロジェクト: carat64/SCHEMA-RASPP
def main(args):
	arg_dict = parse_arguments(args)
	if not confirm_arguments(arg_dict):
		if args[0].split(os.path.sep)[-1] == "rasppcurve.py":

	# Flags and values
	print_E = False
	print_m = False
	# Inputs:
	#   The alignment/fragment file name.

	# Read the alignment file to create a list of parents.
	# The parents will appear in the list in the order in which they appear in the file.
	parent_list = schema.readMultipleSequenceAlignmentFile(file(msa_file, 'r'))
	parents = [p for (k,p) in parent_list]
	# Get the contacts
	pdb_contacts = schema.readContactFile(file(arg_dict[ARG_CONTACT_FILE], 'r'))
	# Establish connection to output, either file or, if no output file is 
	# specified, to standard output.
	if arg_dict.has_key(ARG_OUTPUT_FILE):
		output_file = file(arg_dict[ARG_OUTPUT_FILE], 'w')
		output_file = sys.stdout

	# Get the minimum fragment size.
	if arg_dict.has_key(ARG_MIN_FRAGMENT_SIZE):
		min_length = int(arg_dict[ARG_MIN_FRAGMENT_SIZE])
		output_file.write("# No minimum fragment length specified; using L=4.\n")
		min_length = 4

	# Get the bin width
	if arg_dict.has_key(ARG_BIN_WIDTH):
		bin_width = float(arg_dict[ARG_BIN_WIDTH])
		output_file.write("# No bin width specified; using bin width=1.0.\n")
		bin_width = 1.0

	# Get the number of fragments -- one more than the number of crossovers.
	num_fragments = int(arg_dict[ARG_NUM_CROSSOVERS])+1
	num_parents = len(parents)
	library_size = num_parents**num_fragments

	# Make libraries consistent with RASPP
	(new_parents, identical_sites) = raspp.collapse_parents(parents)
	if len(new_parents[0]) < num_fragments*min_length:
		error_msg = "Minimum fragment length of %d is too large.\n%d " + \
					"fragments with length %d cannot be found in a " + \
					"sequence of length %d (with identities removed).  Aborting..."
		print error_msg % (min_length, num_fragments, min_length, len(parents[0]))

	contacts = schema.getSCHEMAContacts(pdb_contacts, parents)
	energies = raspp.make_4d_energies(contacts, parents)
	avg_energies = raspp.calc_average_energies(energies, parents)

	tstart = time.clock()
	res = raspp.RASPP(avg_energies, parents, num_fragments-1, min_length)
	output_file.write("# RASPP took %1.2f secs\n" % (time.clock()-tstart,))
	output_file.write("# RASPP found %d results\n" % (len(res),))

	tstart = time.clock()
	curve = raspp.curve(res, parents, bin_width)
	output_file.write("# RASPP found %d unique (<E>,<m>) points\n" % (len(curve),))
	output_file.write("# RASPP curve took %1.2f secs\n" % (time.clock()-tstart,))
	output_file.write("# <E>\t<m>\tcrossover points\n")
	for (average_E, average_m, crossovers) in curve:
		xover_pat = '%d '*len(crossovers)
		xover_str = xover_pat % tuple(crossovers)
		output_file.write('%1.4f\t%1.4f\t%s\n' % (average_E, average_m, xover_str))

	if arg_dict.has_key(ARG_OUTPUT_FILE):
コード例 #4
ファイル: rasppcurve.py プロジェクト: Gienah/SCHEMA-RASPP
def main(args):
    arg_dict = parse_arguments(args)
    if not confirm_arguments(arg_dict):
        if args[0].split(os.path.sep)[-1] == "rasppcurve.py":

    # Flags and values
    print_E = False
    print_m = False

    # Inputs:
    #   The alignment/fragment file name.

    # Read the alignment file to create a list of parents.
    # The parents will appear in the list in the order in which they appear in the file.
    parent_list = schema.readMultipleSequenceAlignmentFile(file(msa_file, 'r'))
    parents = [p for (k, p) in parent_list]

    # Get the contacts
    pdb_contacts = schema.readContactFile(file(arg_dict[ARG_CONTACT_FILE],

    # Establish connection to output, either file or, if no output file is
    # specified, to standard output.
    if arg_dict.has_key(ARG_OUTPUT_FILE):
        output_file = file(arg_dict[ARG_OUTPUT_FILE], 'w')
        output_file = sys.stdout

    # Get the minimum fragment size.
    if arg_dict.has_key(ARG_MIN_FRAGMENT_SIZE):
        min_length = int(arg_dict[ARG_MIN_FRAGMENT_SIZE])
            "# No minimum fragment length specified; using L=4.\n")
        min_length = 4

    # Get the bin width
    if arg_dict.has_key(ARG_BIN_WIDTH):
        bin_width = float(arg_dict[ARG_BIN_WIDTH])
        output_file.write("# No bin width specified; using bin width=1.0.\n")
        bin_width = 1.0

    # Get the number of fragments -- one more than the number of crossovers.
    num_fragments = int(arg_dict[ARG_NUM_CROSSOVERS]) + 1

    num_parents = len(parents)
    library_size = num_parents**num_fragments

    # Make libraries consistent with RASPP
    (new_parents, identical_sites) = raspp.collapse_parents(parents)
    if len(new_parents[0]) < num_fragments * min_length:
        error_msg = "Minimum fragment length of %d is too large.\n%d " + \
           "fragments with length %d cannot be found in a " + \
           "sequence of length %d (with identities removed).  Aborting..."
        print error_msg % (min_length, num_fragments, min_length,

    contacts = schema.getSCHEMAContacts(pdb_contacts, parents)
    energies = raspp.make_4d_energies(contacts, parents)
    avg_energies = raspp.calc_average_energies(energies, parents)

    tstart = time.clock()
    res = raspp.RASPP(avg_energies, parents, num_fragments - 1, min_length)
    output_file.write("# RASPP took %1.2f secs\n" % (time.clock() - tstart, ))
    output_file.write("# RASPP found %d results\n" % (len(res), ))

    tstart = time.clock()
    curve = raspp.curve(res, parents, bin_width)
    output_file.write("# RASPP found %d unique (<E>,<m>) points\n" %
                      (len(curve), ))
    output_file.write("# RASPP curve took %1.2f secs\n" %
                      (time.clock() - tstart, ))
    output_file.write("# <E>\t<m>\tcrossover points\n")
    for (average_E, average_m, crossovers) in curve:
        xover_pat = '%d ' * len(crossovers)
        xover_str = xover_pat % tuple(crossovers)
        output_file.write('%1.4f\t%1.4f\t%s\n' %
                          (average_E, average_m, xover_str))

    if arg_dict.has_key(ARG_OUTPUT_FILE):