コード例 #1
0
ファイル: scop.py プロジェクト: ndaniels/HomologyTesting
def all_pdbids_from_file(filename):
    """Returns a set of all PDB ID:chains in the specified SCOP class file.

    """
    with open(filename, 'r') as f:
        res = set()
        for record in cla_parse(f):
          if record.residues.fragments and record.residues.fragments[0]:
            res.add(record.residues.pdbid + ':' + record.residues.fragments[0][0])
        return res
コード例 #2
0
ファイル: scop.py プロジェクト: ndaniels/HomologyTesting
def all_pdbids_from_file_in(filename, target_key, target_value):
    """Returns a set of all PDB ID:chainss in the specified SCOP classification file
    in the hierarchy at the specified level (target_key) and with the specified
    SCOP unique identifying number (target_value).

    target_key should be one of the members of the Keys class.

    """
    pdbids = set(())
    with open(filename, 'r') as f:
        # iterate over each record in the SCOP Classification file
        for record in cla_parse(f):

            # iterate over each key/value pair in that record's hierarchy
            for key, value in record.hierarchy:

                # if this record is of the correct level of the hierarchy
                if key == target_key and value == target_value and record.residues.fragments and record.residues.fragments[0]:
                    pdbids.add(record.residues.pdbid + ':' + record.residues.fragments[0][0])
    return pdbids
コード例 #3
0
ファイル: scop.py プロジェクト: ndaniels/HomologyTesting
def hierarchy_sets_from_file(filename, target_key, target_value):
    """Reads PDB ids from the specified SCOP classification file and returns a
    map from elements of the hierarchy level beneath the specified target key
    to a set containing (PDB ID, chain) tupless of all proteins described by 
    that hierarchy classification.

    For example, if target_key is Keys.SUPERFAMILY and target_value is 50156
    (the PDZ domain superfamily), this function returns a map from family IDs
    to sets of all protein PDB IDs which are in that family.

    """
    
    # TODO change this to return the specific chain:range_start-range_end
    # instead of the 'to' in the map being PDB IDs, it should be to (maybe) pdbCHAIN:start-end
    # FOR NOW this returns a tuple of pdbid,chain (chain may be None)
    
    
    result = {}
    logger.debug("opening " + filename + " in hierarchy_sets_from_file. target: " + target_key + ':' + str(target_value))
    with open(filename, 'r') as hierarchy_file:

        try:
            if Keys.order.index(target_key) == len(Keys.order):
                raise Error('Cannot get sets for lowest level of hierarchy')
        except ValueError:
            raise ValueError('Key "' + str(target_key) + '" is not a ' + \
                             'known hierarchy key')

        # iterate over each record in the SCOP Classification file
        for record in cla_parse(hierarchy_file):

            # iterate over each key/value pair in that record's hierarchy
            for key, value in record.hierarchy:

                # if this record is of the correct level of the hierarchy
                if key == target_key and value == target_value:

                    # get the key for the next level down in the hierarchy
                    next_target_key = Keys.order[Keys.order.index(key) + 1]

                    # iterate over the record's hierarchy AGAIN
                    for key2, value2 in record.hierarchy:

                        # if we are looking at the next target key
                        if key2 == next_target_key:

                            # add this record's pdbid
                            # and chain to the result dictionary
                            # we get the fragments (residues.fragments)
                            # and add pdbid + fragments[0]
                            if record.residues.fragments:
                              # we have a fragment, use first chain
                              newfrag = str(record.residues.fragments[0][0])
                            else:
                              newfrag = None
                            newval = (str(record.residues.pdbid), newfrag)
                            if value2 in result:
                                result[value2].add(newval)
                                  
                            else:
                                result[value2] = set(())
                                result[value2].add(newval)

    return result