def get_interactions(self, structureId, structure):
        rows = []

        cutoffDistanceSquared = self.filter.get_distance_cutoff()**2
        arrays = ColumnarStructure(structure, True)

        chainNames = arrays.get_chain_names()
        groupNames = arrays.get_group_names()
        groupNumbers = arrays.get_group_numbers()
        atomNames = arrays.get_atom_names()
        entityIndices = arrays.get_entity_indices()
        elements = arrays.get_elements()
        polymer = arrays.is_polymer()

        sequenceMapIndices = arrays.get_sequence_positions()
        x = arrays.get_x_coords()
        y = arrays.get_y_coords()
        z = arrays.get_z_coords()

        # create a distance box for quick lookup interactions of polymer atoms
        # of the specified elements
        boxes = {}
        for i in range(arrays.get_num_atoms()):

            if polymer[i] \
                and (self.filter.is_target_group(groupNames[i]) or self.filter.is_query_group(groupNames[i])) \
                and (self.filter.is_target_atom_name(atomNames[i]) or self.filter.is_query_atom_name(atomNames[i])) \
                and (self.filter.is_target_element(elements[i]) or self.filter_is_query_element_name(elements[i])) \
                and not self.filter.is_prohibited_target_group(groupNames[i]):

                if chainNames[i] not in boxes:
                    box = DistanceBox(self.filter.get_distance_cutoff())
                    boxes[chainNames[i]] = box

                newPoint = np.array([x[i], y[i], z[i]])
                boxes[chainNames[i]].add_point(newPoint, i)

        chainBoxes = [(k, v) for k, v in boxes.items()]

        # loop over all pairwise polymer chain interactions
        for i in range(len(chainBoxes) - 1):
            chainI = chainBoxes[i][0]
            boxI = chainBoxes[i][1]

            for j in range(i + 1, len(chainBoxes)):
                chainJ = chainBoxes[j][0]
                boxJ = chainBoxes[j][1]

                intersectionI = boxI.getIntersection(boxJ)
                intersectionJ = boxJ.getIntersection(boxI)

                # maps to store sequence indices mapped to group numbers
                indicesI = {}
                indicesJ = {}

                entityIndexI = -1
                entityIndexJ = -1

                # loop over pairs of atom interactions and check if
                # they satisfy the interaction filter criteria

                for n in intersectionI:

                    for m in intersectionJ:

                        dx = x[n] - x[m]
                        dy = y[n] - y[m]
                        dz = z[n] - z[m]
                        dSq = dx * dx + dy * dy + dz * dz

                        if dSq <= cutoffDistanceSquared:
                            if self.filter.is_target_group(groupNames[n]) \
                                and self.filter.is_target_atom_name(atomNames[n]) \
                                and self.filter.is_target_element(elements[n]) \
                                and self.filter.is_query_group(groupNames[m]) \
                                and self.filter.is_query_atom_name(atomNames[m]) \
                                and self.filter.is_query_element(elements[m]):

                                entityIndexI = entityIndices[n]
                                indicesI[
                                    sequenceMapIndices[n]] = groupNumbers[n]

                            if self.filter.is_target_group(groupNames[m]) \
                                and self.filter.is_target_atom_name(atomNames[m]) \
                                and self.filter.is_target_element(elements[m]) \
                                and self.filter.is_query_group(groupNames[n]) \
                                and self.filter.is_query_atom_name(atomNames[n]) \
                                and self.filter.is_query_element(elements[n]):

                                entityIndexJ = entityIndices[m]
                                indicesJ[
                                    sequenceMapIndices[m]] = groupNumbers[m]

            if len(indicesI) >= self.filter.get_min_interactions():
                sequenceIndiciesI = sorted([int(i) for i in indicesI.keys()])
                groupNumbersI = sorted(list(indicesI.values()))

                rows.append(Row(structureId + '.' + chainI, chainJ, chainI, \
                                groupNumbersI, sequenceIndiciesI, \
                                structure.entity_list[entityIndexI]['sequence']))

            if len(indicesJ) >= self.filter.get_min_interactions():
                sequenceIndiciesJ = sorted([int(i) for i in indicesJ.keys()])
                groupNumbersJ = sorted(list(indicesJ.values()))

                rows.append(Row(structureId + '.' + chainJ, chainI, chainJ, \
                                groupNumbersJ, sequenceIndiciesJ, \
                                structure.entity_list[entityIndexJ]['sequence']))

        return rows
    def __call__(self, t):
        structure_id = t[0]
        structure = t[1]

        arrays = ColumnarStructure(structure, True)

        # if there is only a single chain, there are no intermolecular interactions
        if structure.num_chains == 1 and self.inter and not self.intra:
            return []

        # Apply query filter
        group_names = arrays.get_group_names()
        qg = self.filter.is_query_group_np(group_names)
        if np.count_nonzero(qg) == 0:
            return []

        elements = arrays.get_elements()
        qe = self.filter.is_query_element_np(elements)
        if np.count_nonzero(qe) == 0:
            return []

        atom_names = arrays.get_atom_names()
        qa = self.filter.is_query_atom_name_np(atom_names)
        if np.count_nonzero(qa) == 0:
            return []

        # Create mask for polymer atoms
        polymer = arrays.is_polymer()

        # Apply query filter to polymer
        polyq = polymer & qg & qe & qa

        if np.count_nonzero(polyq) == 0:
            return []

        # Apply target filter to polymer atoms
        tg = self.filter.is_target_group_np(group_names)
        te = self.filter.is_target_element_np(elements)
        ta = self.filter.is_target_atom_name_np(atom_names)

        polyt = polymer & tg & te & ta

        if np.count_nonzero(polyt) == 0:
            return []

        chain_names = arrays.get_chain_names()
        group_numbers = arrays.get_group_numbers()
        entity_indices = arrays.get_entity_indices()
        sequence_positions = arrays.get_sequence_positions()

        # Stack coordinates into an nx3 array
        # TODO add this to ColumnarStructure
        c = np.stack((arrays.get_x_coords(), arrays.get_y_coords(), arrays.get_z_coords()), axis=-1)

        # Apply mask for query atoms
        cpq = c[polyq]
        pgq = group_names[polyq]
        pnq = group_numbers[polyq]
        paq = atom_names[polyq]
        pcq = chain_names[polyq]

        # Apply mask for target atoms
        cpt = c[polyt]
        pgt = group_names[polyt]
        pnt = group_numbers[polyt]
        pat = atom_names[polyt]
        pct = chain_names[polyt]
        pet = entity_indices[polyt]
        pst = sequence_positions[polyt]

        # Calculate distances between the two atom sets
        tree_t = cKDTree(cpt)
        tree_q = cKDTree(cpq)
        distance_cutoff = self.filter.get_distance_cutoff()
        sparse_dm = tree_t.sparse_distance_matrix(tree_q, max_distance=distance_cutoff, output_type='dict')

        # Add interactions to rows.
        # There are redundant interactions when aggregating the results at the 'group' level,
        # since multiple atoms in a group may be involved in interactions.
        # Therefore we use a set of rows to store only unique interactions.
        rows = set([])
        for ind, dis in sparse_dm.items():
            i = ind[0]  # polymer target atom index
            j = ind[1]  # polymer query atom index

            # handle intra vs inter-chain interactions
            if pcq[j] == pct[i]:
                # cases with interactions in the same chain
                if not self.intra:
                    # exclude intrachain interactions
                    continue

                elif pnq[j] == pnt[i]:
                    # exclude interactions within the same chain and group
                    continue

            else:
                # case with interactions in different chains
                if not self.inter:
                    # exclude inter-chain interactions
                    continue

            # exclude self interactions (this can happen if the query and target criteria overlap)
            if dis < 0.001:
                continue

            if self.level == 'chain':
                row = Row(structure_id + "." + pct[i],  # structureChainId
                          pgq[j],  # queryGroupId
                          pcq[j],  # queryChainId
                          pnq[j],  # queryGroupNumber
                          pct[i]  # targetChainId
                          )
                rows.add(row)
            elif self.level == 'group':
                row = Row(structure_id + "." + pct[i],  # structureChainId
                          pgq[j],  # queryGroupId
                          pcq[j],  # queryChainId
                          pnq[j],  # queryGroupNumber
                          pgt[i],  # targetGroupId
                          pct[i],  # targetChainId
                          pnt[i],  # targetGroupNumber
                          pst[i].item(),  # sequenceIndex
                          structure.entity_list[pet[i]]['sequence']  # sequence
                          )
                rows.add(row)
            elif self.level == 'atom':
                row = Row(structure_id + "." + pct[i],  # structureChainId
                          pgq[j],  # queryGroupId
                          pcq[j],  # queryChainId
                          pnq[j],  # queryGroupNumber
                          paq[j],  # queryAtomName
                          pgt[i],  # targetGroupId
                          pct[i],  # targetChainId
                          pnt[i],  # targetGroupNumber
                          pat[i],  # targetAtomName
                          dis,  # distance
                          pst[i].item(),  # sequenceIndex
                          structure.entity_list[pet[i]]['sequence']  # sequence
                          )
                rows.add(row)

        return rows
    def get_interactions(self, structureId, structure):
        rows = []

        cutoffDistanceSquared = self.filter.get_distance_cutoff() ** 2
        arrays = ColumnarStructure(structure, True)

        chainNames = arrays.get_chain_names()
        groupNames = arrays.get_group_names()
        groupNumbers = arrays.get_group_numbers()
        atomNames = arrays.get_atom_names()
        entityIndices = arrays.get_entity_indices()
        elements = arrays.get_elements()
        polymer = arrays.is_polymer()

        sequenceMapIndices = arrays.get_sequence_positions()
        x = arrays.get_x_coords()
        y = arrays.get_y_coords()
        z = arrays.get_z_coords()

        # create a distance box for quick lookup interactions of polymer atoms
        # of the specified elements
        box = DistanceBox(self.filter.get_distance_cutoff())
        for i in range(arrays.get_num_atoms()):

            if polymer[i] \
                and self.filter.is_target_group(groupNames[i]) \
                and self.filter.is_target_atom_name(atomNames[i]) \
                and self.filter.is_target_element(elements[i]) \
                and not self.filter.is_prohibited_target_group(groupNames[i]):

                newPoint = np.array([x[i],y[i],z[i]])
                box.add_point(newPoint, i)

        groupToAtomIndices = arrays.get_group_to_atom_indices()

        for g in range(arrays.get_num_groups()):

            # position of first and last atom +1 in group
            start = groupToAtomIndices[g]
            end = groupToAtomIndices[g+1]

            # skip polymer groups
            if polymer[start]:
                continue

            # the specified filter conditions (some groups may be excluded,
            # e.g. water)
            if self.filter.is_query_group(groupNames[start]):

                print(groupNames[start])
                # create list of atoms that interact within the cutoff distance
                neighbors = []
                for a in range(start,end):

                    if self.filter.is_query_atom_name(atomNames[a]) \
                        and self.filter.is_query_element(elements[a]):

                        p = np.array([x[a], y[a], z[a]])

                        # loop up neighbors that are within a cubic
                        for j in box.get_neighbors(p):
                            dx = x[j] - x[a]
                            dy = y[j] - y[a]
                            dz = z[j] - z[a]
                            dSq = dx * dx + dy * dy + dz * dz

                            if dSq <= cutoffDistanceSquared:
                                neighbors.append(j)

                if len(neighbors) == 0:
                    continue

                interactions2 = {}
                for neighbor in neighbors:

                    if chainNames[neighbor] not in interactions2:
                        interactions2[chainNames[neighbor]] = []

                    # keep track of which group is interacting
                    seqPos = sequenceMapIndices[neighbor]

                    # non-polymer groups have a negative index and are exlcuded here
                    if seqPos > 0:
                        l = [seqPos, groupNumbers[neighbor], entityIndices[neighbor]]
                        interactions2[chainNames[neighbor]].append(l)

                for key, val in interactions2.items():

                    sequenceIndices = set()
                    residueNames = set()
                    sequence = None

                    for v in val:
                        sequenceIndices.add(int(v[0]))
                        residueNames.add(int(v[1]))
                        if sequence is None:
                            sequence = structure.entity_list[v[2]]['sequence']

                    if len(sequenceIndices) > 0:
                        rows.append(Row(structureId + "." + key, groupNames[start], \
                                        groupNumbers[start], chainNames[start], \
                                        key, sorted(list(residueNames)), \
                                        sorted(list(sequenceIndices)), sequence,\
                                        len(interactions2)))
        return rows
Esempio n. 4
0
    def __call__(self, t):
        structure_id = t[0]
        structure = t[1]

        arrays = ColumnarStructure(structure, True)

        # Apply query (ligand) filter
        group_names = arrays.get_group_names()
        qg = self.filter.is_query_group_np(group_names)
        if np.count_nonzero(qg) == 0:
            return []

        elements = arrays.get_elements()
        qe = self.filter.is_query_element_np(elements)
        if np.count_nonzero(qe) == 0:
            return []

        atom_names = arrays.get_atom_names()
        qa = self.filter.is_query_atom_name_np(atom_names)
        if np.count_nonzero(qa) == 0:
            return []

        ### filter prohibited groups??

        # Create mask for polymer atoms
        polymer = arrays.is_polymer()

        # Create mask for ligand atoms
        lig = ~polymer & qg & qe & qa
        if np.count_nonzero(lig) == 0:
            return []

        # Apply target (polymer) filter
        tg = self.filter.is_target_group_np(group_names)
        te = self.filter.is_target_element_np(elements)
        ta = self.filter.is_target_atom_name_np(atom_names)

        poly = polymer & tg & te & ta

        if np.count_nonzero(poly) == 0:
            return []

        chain_names = arrays.get_chain_names()
        group_numbers = arrays.get_group_numbers()
        entity_indices = arrays.get_entity_indices()
        sequence_positions = arrays.get_sequence_positions()

        # Stack coordinates into an nx3 array
        # TODO add this to ColumnarStructure
        c = np.stack((arrays.get_x_coords(), arrays.get_y_coords(),
                      arrays.get_z_coords()),
                     axis=-1)

        # Apply ligand mask to ligand data
        c_ligand = c[lig]
        lg = group_names[lig]
        ln = group_numbers[lig]
        la = atom_names[lig]
        lc = chain_names[lig]

        # Apply polymer mask to polymer data
        c_polymer = c[poly]
        pg = group_names[poly]
        pn = group_numbers[poly]
        pa = atom_names[poly]
        pc = chain_names[poly]
        pt = entity_indices[poly]
        ps = sequence_positions[poly]

        # Calculate distances between polymer and ligand atoms
        poly_tree = cKDTree(c_polymer)
        lig_tree = cKDTree(c_ligand)
        distance_cutoff = self.filter.get_distance_cutoff()
        sparse_dm = poly_tree.sparse_distance_matrix(
            lig_tree, max_distance=distance_cutoff, output_type='dict')

        # Add interactions to rows.
        # There are redundant interactions when aggregating the results at the 'group' level,
        # since multiple atoms in a group may be involved in interactions.
        # Therefore we use a set of rows to store only unique interactions.
        rows = set([])
        for ind, dis in sparse_dm.items():
            i = ind[0]  # ligand atom index
            j = ind[1]  # polymer atom index
            if self.level == 'chain':
                row = Row(
                    structure_id + "." + pc[i],  # structureChainId
                    lg[j],  # queryLigandId
                    lc[j],  # queryLigandChainId
                    ln[j],  # queryLigandNumber
                    pc[i]  # targetChainId
                )
                rows.add(row)
            elif self.level == 'group':
                row = Row(
                    structure_id + "." + pc[i],  # structureChainId
                    lg[j],  # queryLigandId
                    lc[j],  # queryLigandChainId
                    ln[j],  # queryLigandNumber
                    pg[i],  # targetGroupId
                    pc[i],  # targetChainId
                    pn[i],  # targetGroupNumber
                    ps[i].item(),  # sequenceIndex
                    structure.entity_list[pt[i]]['sequence']  # sequence
                )
                rows.add(row)
            elif self.level == 'atom':
                row = Row(
                    structure_id + "." + pc[i],  # structureChainId
                    lg[j],  # queryLigandId
                    lc[j],  # queryLigandChainId
                    ln[j],  # queryLigandNumber
                    la[j],  # queryAtomName
                    pg[i],  # targetGroupId
                    pc[i],  # targetChainId
                    pn[i],  # targetGroupNumber
                    pa[i],  # targetAtomName
                    dis,  # distance
                    ps[i].item(),  # sequenceIndex
                    structure.entity_list[pt[i]]['sequence']  # sequence
                )
                rows.add(row)

        return rows