Esempio n. 1
0
def main(args):
    arg_dict = parse_arguments(args)
    if not confirm_arguments(arg_dict):
        if args[0].split(os.path.sep)[-1] == "pdbseq.py":
            print_usage(args)
        return

    # Flags and values

    # Inputs:
    #	The PDB file name.
    pdb_file = arg_dict[ARG_PDB_FILE]

    # The PDB chains
    # Many PDB files include multiple chains.  The chain_identifier list includes those
    # chains which correspond to the protein whose contacts are being evaluated.
    # Most often, chain 'A' (in the case of multiple chains) or chain ' ' (only one chain)
    # will be the appropriate choice.
    if arg_dict.has_key(ARG_CHAINS):
        chains = arg_dict[ARG_CHAINS]
        if type(chains) is list:
            chain_identifiers = chains + [' ']
        else:
            chain_identifiers = [chains, ' ']
    else:
        chain_identifiers = ['A', ' ']

    # 	The file name for output.
    if arg_dict.has_key(ARG_OUTPUT_FILE):
        output_file = file(arg_dict[ARG_OUTPUT_FILE], 'w')
    else:
        output_file = sys.stdout

    # Read in the PDB file to create a list of residues.
    residues = pdb.File().read(file(pdb_file, 'r'))

    # Filter residues not in selected chains
    residue_seq = pdb.sequence(residues, chain_identifiers)
    if residue_seq == '':
        print "No residues found for chain(s) %s.  Aborting..." % chain_identifiers
        return

    # Print it
    output_file.write('# Residue sequence for chain(s) %s from PDB file %s\n%s' % \
     (chain_identifiers, pdb_file, residue_seq))

    if arg_dict.has_key(ARG_OUTPUT_FILE):
        output_file.close()
Esempio n. 2
0
def main(args):
	arg_dict = parse_arguments(args)
	if not confirm_arguments(arg_dict):
		if args[0].split(os.path.sep)[-1] == "pdbseq.py":
			print_usage(args)
		return

	# Flags and values
	
	# Inputs:
	#	The PDB file name.
	pdb_file = arg_dict[ARG_PDB_FILE]

	# The PDB chains
	# Many PDB files include multiple chains.  The chain_identifier list includes those
	# chains which correspond to the protein whose contacts are being evaluated.
	# Most often, chain 'A' (in the case of multiple chains) or chain ' ' (only one chain)
	# will be the appropriate choice.
	if arg_dict.has_key(ARG_CHAINS):
		chains = arg_dict[ARG_CHAINS]
		if type(chains) is list:
			chain_identifiers = chains + [' ']
		else:
			chain_identifiers = [chains, ' ']
	else:
		chain_identifiers = ['A',' ']
	
	# 	The file name for output.
	if arg_dict.has_key(ARG_OUTPUT_FILE):
		output_file = file(arg_dict[ARG_OUTPUT_FILE], 'w')
	else:
		output_file = sys.stdout
	
	# Read in the PDB file to create a list of residues.
	residues = pdb.File().read(file(pdb_file, 'r'))
	
	# Filter residues not in selected chains
	residue_seq = pdb.sequence(residues, chain_identifiers)
	if residue_seq == '':
		print "No residues found for chain(s) %s.  Aborting..." % chain_identifiers
		return
	
	# Print it
	output_file.write('# Residue sequence for chain(s) %s from PDB file %s\n%s' % \
		(chain_identifiers, pdb_file, residue_seq))
	
	if arg_dict.has_key(ARG_OUTPUT_FILE):
		output_file.close()
Esempio n. 3
0
def main(args):
    arg_dict = parse_arguments(args)
    if not confirm_arguments(arg_dict):
        if args[0].split(os.path.sep)[-1] == "schemacontacts.py":
            print_usage(args)
        return

    # Flags and values

    # Inputs:
    #	The PDB file name.
    pdb_file = arg_dict[ARG_PDB_FILE]
    #   The alignment/fragment file name.
    msa_file = arg_dict[ARG_MULTIPLE_SEQUENCE_ALIGNMENT_FILE]
    #	The alignment between the reference parent (indicated by reference_parent_index)
    #	and the target protein sequence in the provided PDB file.  The amino acids in
    #	the aligned reference parent should correspond exactly to those in the
    #	msa_file above.
    # If you don't provide a PDB alignment file, the program will assume that the ID of the PDB structure
    # contained in the HEADER field corresponds to one of the sequence IDs in the MSA.
    parent_pdb_alignment_file = None
    if arg_dict.has_key(ARG_PDB_ALIGNMENT_FILE):
        if not os.path.isfile(arg_dict[ARG_PDB_ALIGNMENT_FILE]):
            print "  Can't find PDB/parent alignment file %s" % arg_dict[
                ARG_PDB_ALIGNMENT_FILE]
            return
        else:
            parent_pdb_alignment_file = arg_dict[ARG_PDB_ALIGNMENT_FILE]
    else:
        pdb_key = pdb.File().getIDCode(file(pdb_file, 'r'))

    # The PDB chains
    # Many PDB files include multiple chains.  The chain_identifier list includes those
    # chains which correspond to the protein whose contacts are being evaluated.
    # Most often, chain 'A' (in the case of multiple chains) or chain ' ' (only one chain)
    # will be the appropriate choice.
    if arg_dict.has_key(ARG_CHAINS):
        chains = arg_dict[ARG_CHAINS]
        if type(chains) is list:
            chain_identifiers = chains + [' ']
        else:
            chain_identifiers = [chains, ' ']
    else:
        chain_identifiers = ['A', ' ']

    # Read the alignment file to create a list of parents.
    # The parents will appear in the list in the order in which they appear in the file.
    parent_list = schema.readMultipleSequenceAlignmentFile(file(msa_file, 'r'))
    parent_dict = dict(parent_list)

    # Generate the contacts
    # Read in the PDB file to create a list of residues.
    residues = pdb.File().read(file(pdb_file, 'r'))
    # Because the PDB file's residue sequence may differ from those of the parents, we
    # must align the PDB residues to one parent.
    if not parent_pdb_alignment_file:  # Just get PDB sequence from the multiple sequence alignment
        try:
            aligned_pdb = parent_dict[pdb_key]
            aligned_prot = parent_dict[pdb_key]
        except KeyError:
            print "Could not find sequence %s in the multiple sequence alignment file %s.  Aborting..." % (
                pdb_key, msa_file)
            return
    else:  # Pull information from the parent/PDB alignment file.
        # Our objective is to find the sequence with the same key in both the parent MSA file and
        # the parent/PDB alignment file.
        pdb_parent_seq_list = schema.readMultipleSequenceAlignmentFile(
            file(parent_pdb_alignment_file, 'r'))
        pdb_parent_seq_dict = dict(pdb_parent_seq_list)

        # Bail out if there are fewer than 2 sequences.
        if len(pdb_parent_seq_dict.keys()) < 2:
            print "Only found one uniquely named sequence in the PDB/parent alignment, %s.  Aborting..." % pdb_parent_seq_dict.keys(
            )[0]
            return

        # Find the matching key
        pdb_key = None
        for k in parent_dict.keys():
            if pdb_parent_seq_dict.has_key(k):
                pdb_key = k

        # Bail out if no matching key is found
        if not pdb_key:
            print "Could not find parents %s in PDB/parent aligned sequences %s.  Aborting..." % (
                parent_dict.keys(), )
            return
        aligned_prot = pdb_parent_seq_dict[pdb_key]
        # Remove the sequence corresponding to the pdb_key, leaving only the parent sequence.
        del pdb_parent_seq_dict[pdb_key]
        # Take the first remaining sequence, which should be the parent sequence.
        aligned_pdb = pdb_parent_seq_dict.values()[0]

    # Check to make sure the parent sequence from both alignment files matches.
    if aligned_prot.replace('-', '') != parent_dict[pdb_key].replace('-', ''):
        print "The PDB-aligned parent and the named parent, %s, don't match!  Aborting..." % (
            pdb_key, )
        return
    # Check to ensure the aligned PDB sequence matches the residue sequence pulled directly from the PDB file.
    if aligned_pdb.replace('-', '') != pdb.sequence(residues,
                                                    chain_identifiers):
        print "The parent-aligned PDB sequence, %s, and the PDB file sequence, chain(s) %s in %s, don't match!  Aborting..." % (
            pdb_key, chain_identifiers, pdb_file)
        return
    #print aligned_prot
    #print aligned_pdb
    #print parent_dict[pdb_key]
    #print pdb.sequence(residues)

    # Align the residues with the parent protein.
    try:
        residues = schema.alignPDBResidues(residues, aligned_prot, aligned_pdb,
                                           parent_dict[pdb_key],
                                           chain_identifiers)
    except ValueError, ve:
        print ve
        return
Esempio n. 4
0
def main(args):
	arg_dict = parse_arguments(args)
	if not confirm_arguments(arg_dict):
		if args[0].split(os.path.sep)[-1] == "schemacontacts.py":
			print_usage(args)
		return

	# Flags and values
	
	# Inputs:
	#	The PDB file name.
	pdb_file = arg_dict[ARG_PDB_FILE]
	#   The alignment/fragment file name.
	msa_file = arg_dict[ARG_MULTIPLE_SEQUENCE_ALIGNMENT_FILE]
	#	The alignment between the reference parent (indicated by reference_parent_index)
	#	and the target protein sequence in the provided PDB file.  The amino acids in
	#	the aligned reference parent should correspond exactly to those in the 
	#	msa_file above.
	# If you don't provide a PDB alignment file, the program will assume that the ID of the PDB structure
	# contained in the HEADER field corresponds to one of the sequence IDs in the MSA.
	parent_pdb_alignment_file = None
	if arg_dict.has_key(ARG_PDB_ALIGNMENT_FILE):
		if not os.path.isfile(arg_dict[ARG_PDB_ALIGNMENT_FILE]):
			print "  Can't find PDB/parent alignment file %s" % arg_dict[ARG_PDB_ALIGNMENT_FILE]
			return 
		else:
			parent_pdb_alignment_file = arg_dict[ARG_PDB_ALIGNMENT_FILE]
	else:
		pdb_key = pdb.File().getIDCode(file(pdb_file,'r'))
		
	# The PDB chains
	# Many PDB files include multiple chains.  The chain_identifier list includes those
	# chains which correspond to the protein whose contacts are being evaluated.
	# Most often, chain 'A' (in the case of multiple chains) or chain ' ' (only one chain)
	# will be the appropriate choice.
	if arg_dict.has_key(ARG_CHAINS):
		chains = arg_dict[ARG_CHAINS]
		if type(chains) is list:
			chain_identifiers = chains + [' ']
		else:
			chain_identifiers = [chains, ' ']
	else:
		chain_identifiers = ['A',' ']
	
	# Read the alignment file to create a list of parents.
	# The parents will appear in the list in the order in which they appear in the file.
	parent_list = schema.readMultipleSequenceAlignmentFile(file(msa_file, 'r'))
	parent_dict = dict(parent_list)

	# Generate the contacts
	# Read in the PDB file to create a list of residues.
	residues = pdb.File().read(file(pdb_file, 'r'))
	# Because the PDB file's residue sequence may differ from those of the parents, we
	# must align the PDB residues to one parent.
	if not parent_pdb_alignment_file:  # Just get PDB sequence from the multiple sequence alignment
		try:
			aligned_pdb = parent_dict[pdb_key]
			aligned_prot = parent_dict[pdb_key]
		except KeyError:
			print "Could not find sequence %s in the multiple sequence alignment file %s.  Aborting..." % (pdb_key, msa_file)
			return
	else: # Pull information from the parent/PDB alignment file.
		# Our objective is to find the sequence with the same key in both the parent MSA file and 
		# the parent/PDB alignment file.
		pdb_parent_seq_list = schema.readMultipleSequenceAlignmentFile(file(parent_pdb_alignment_file, 'r'))
		pdb_parent_seq_dict = dict(pdb_parent_seq_list)
	
		# Bail out if there are fewer than 2 sequences.
		if len(pdb_parent_seq_dict.keys()) < 2:
			print "Only found one uniquely named sequence in the PDB/parent alignment, %s.  Aborting..." % pdb_parent_seq_dict.keys()[0]
			return

		# Find the matching key
		pdb_key = None
		for k in parent_dict.keys():
			if pdb_parent_seq_dict.has_key(k):
				pdb_key = k

		# Bail out if no matching key is found
		if not pdb_key:
			print "Could not find parents %s in PDB/parent aligned sequences %s.  Aborting..." % (parent_dict.keys(),)
			return
		aligned_prot = pdb_parent_seq_dict[pdb_key]
		# Remove the sequence corresponding to the pdb_key, leaving only the parent sequence.
		del pdb_parent_seq_dict[pdb_key]
		# Take the first remaining sequence, which should be the parent sequence.
		aligned_pdb = pdb_parent_seq_dict.values()[0]

	# Check to make sure the parent sequence from both alignment files matches.
	if aligned_prot.replace('-','') != parent_dict[pdb_key].replace('-',''):
		print "The PDB-aligned parent and the named parent, %s, don't match!  Aborting..." % (pdb_key,)
		return
	# Check to ensure the aligned PDB sequence matches the residue sequence pulled directly from the PDB file.
	if aligned_pdb.replace('-','') != pdb.sequence(residues, chain_identifiers):
		print "The parent-aligned PDB sequence, %s, and the PDB file sequence, chain(s) %s in %s, don't match!  Aborting..." % (pdb_key, chain_identifiers, pdb_file)
		return
	#print aligned_prot
	#print aligned_pdb
	#print parent_dict[pdb_key]
	#print pdb.sequence(residues)
	
	# Align the residues with the parent protein.
	try:
		residues = schema.alignPDBResidues(residues, aligned_prot, aligned_pdb, parent_dict[pdb_key], chain_identifiers)
	except ValueError, ve:
		print ve
		return