def all_pdbids_from_file(filename): """Returns a set of all PDB ID:chains in the specified SCOP class file. """ with open(filename, 'r') as f: res = set() for record in cla_parse(f): if record.residues.fragments and record.residues.fragments[0]: res.add(record.residues.pdbid + ':' + record.residues.fragments[0][0]) return res
def all_pdbids_from_file_in(filename, target_key, target_value): """Returns a set of all PDB ID:chainss in the specified SCOP classification file in the hierarchy at the specified level (target_key) and with the specified SCOP unique identifying number (target_value). target_key should be one of the members of the Keys class. """ pdbids = set(()) with open(filename, 'r') as f: # iterate over each record in the SCOP Classification file for record in cla_parse(f): # iterate over each key/value pair in that record's hierarchy for key, value in record.hierarchy: # if this record is of the correct level of the hierarchy if key == target_key and value == target_value and record.residues.fragments and record.residues.fragments[0]: pdbids.add(record.residues.pdbid + ':' + record.residues.fragments[0][0]) return pdbids
def hierarchy_sets_from_file(filename, target_key, target_value): """Reads PDB ids from the specified SCOP classification file and returns a map from elements of the hierarchy level beneath the specified target key to a set containing (PDB ID, chain) tupless of all proteins described by that hierarchy classification. For example, if target_key is Keys.SUPERFAMILY and target_value is 50156 (the PDZ domain superfamily), this function returns a map from family IDs to sets of all protein PDB IDs which are in that family. """ # TODO change this to return the specific chain:range_start-range_end # instead of the 'to' in the map being PDB IDs, it should be to (maybe) pdbCHAIN:start-end # FOR NOW this returns a tuple of pdbid,chain (chain may be None) result = {} logger.debug("opening " + filename + " in hierarchy_sets_from_file. target: " + target_key + ':' + str(target_value)) with open(filename, 'r') as hierarchy_file: try: if Keys.order.index(target_key) == len(Keys.order): raise Error('Cannot get sets for lowest level of hierarchy') except ValueError: raise ValueError('Key "' + str(target_key) + '" is not a ' + \ 'known hierarchy key') # iterate over each record in the SCOP Classification file for record in cla_parse(hierarchy_file): # iterate over each key/value pair in that record's hierarchy for key, value in record.hierarchy: # if this record is of the correct level of the hierarchy if key == target_key and value == target_value: # get the key for the next level down in the hierarchy next_target_key = Keys.order[Keys.order.index(key) + 1] # iterate over the record's hierarchy AGAIN for key2, value2 in record.hierarchy: # if we are looking at the next target key if key2 == next_target_key: # add this record's pdbid # and chain to the result dictionary # we get the fragments (residues.fragments) # and add pdbid + fragments[0] if record.residues.fragments: # we have a fragment, use first chain newfrag = str(record.residues.fragments[0][0]) else: newfrag = None newval = (str(record.residues.pdbid), newfrag) if value2 in result: result[value2].add(newval) else: result[value2] = set(()) result[value2].add(newval) return result