def calculate_ss(msaName, chain):
    # msaName = "1FM0_E_1FM0_D"
    array = pdb2biotite(msaName)
    array = array[array.hetero == False]    # filters out hetatoms
    # Estimate secondary structure
    if len(chain) > 1:
        sse = []
        for ch in chain:
            sse.append(struc.annotate_sse(array, chain_id=ch))
        return np.append(sse[0], sse[1])
    else:
        return struc.annotate_sse(array, chain_id=chain)
Esempio n. 2
0
def test_sse():
    array = strucio.load_structure(join(data_dir("structure"), "3o5r.mmtf"))
    sse = struc.annotate_sse(array, "A")
    sse_str = "".join(sse.tolist())
    assert sse_str == ("caaaaaacccccccccccccbbbbbccccccbbbbccccccccccccccc"
                       "ccccccccccccbbbbbbcccccccaaaaaaaaaccccccbbbbbccccc"
                       "ccccccccccccbbbbbbbccccccccc")
Esempio n. 3
0
def psea_sec(file):
    array = mmtf.get_structure(file, model=1)
    tk_dimer = array[struc.filter_amino_acids(array)]
    tk_mono = tk_dimer[tk_dimer.chain_id == "A"]

    sse = struc.annotate_sse(array, chain_id="A")
    return sse
Esempio n. 4
0
def pdb2Gdata(dirName, fileName, saveDir=False):
    # print(os.path.join(dirName, fileName))
    array = strucio.load_structure(
        os.path.join(dirName, fileName),
        # extra_fields=['atom_id', 'b_factor', 'occupancy', 'charge'],
        extra_fields=['b_factor', 'occupancy'],
        model=1)
    # if type(array) == biotite.structure.AtomArrayStack:
    #     array = array[0]

    # ca = array[array.atom_name == "CA"]
    # cell_list = struc.CellList(ca, cell_size=self.threshold)

    chain_id = []
    for chain in array.chain_id:
        if chain not in chain_id:
            chain_id.append(chain)

    sseDict = dict([(chain, struc.annotate_sse(array, chain_id=chain))
                    for chain in chain_id])

    sseMaskDict = {}
    for key, value in sseDict.items():
        mask = array[(array.chain_id == key)
                     & (array.atom_name == 'CA')].res_id
        tmp = mask.shape[0] - value.shape[0]
        if tmp > 0:
            sseDict[key] = np.append(value, ['Null'] * tmp)

        sseMaskDict[key] = {}
        for maskId, sseId in zip(mask, sseDict[key]):
            sseMaskDict[key][maskId] = sseId

    cell_list = struc.CellList(array, cell_size=cfg.threshold)
    adj_matrix = cell_list.create_adjacency_matrix(cfg.threshold)

    # (adj_matrix[adj_matrix == True].shape[0] - 5385) / 2
    edge_index = [[], []]

    nodeFeatures = []
    arrayShp = array.shape[0]
    for i in range(arrayShp - 1):
        for j in range(i + 1, arrayShp):
            if adj_matrix[i][j]:
                edge_index[0].append(i)
                edge_index[1].append(j)

        nodeFeatures.append(
            list(array.coord[i]) +
            [atomsDict.get(array.atom_name[i], atomsDict['Null'])] +
            [elementsDict.get(array.element[i], elementsDict['Null'])] +
            [array.res_id[i]] +
            [residualesDict.get(array.res_name[i], residualesDict['Null'])] +
            [float(array.hetero[i])] + [array.occupancy[i]] +
            [array.b_factor[i]] + [
                ssesTypeDict.get(
                    sseMaskDict[array.chain_id[i]].get(
                        array.res_id[i], 'Null'), ssesTypeDict['Null'])
            ])
    nodeFeatures.append(
        list(array.coord[arrayShp - 1]) +
        [atomsDict.get(array.atom_name[arrayShp - 1], atomsDict['Null'])] +
        [elementsDict.get(array.element[arrayShp - 1], elementsDict['Null'])] +
        [array.res_id[arrayShp - 1]] + [
            residualesDict.get(array.res_name[arrayShp -
                                              1], residualesDict['Null'])
        ] + [float(array.hetero[arrayShp - 1])] +
        [array.occupancy[arrayShp - 1]] + [array.b_factor[arrayShp - 1]] + [
            ssesTypeDict.get(
                sseMaskDict[array.chain_id[arrayShp - 1]].get(
                    array.res_id[arrayShp - 1], 'Null'), ssesTypeDict['Null'])
        ])

    nodeFeaturesT = torch.tensor(nodeFeatures, dtype=torch.float)
    edge_indexT = torch.tensor(edge_index, dtype=torch.long)
    data = Data(x=nodeFeaturesT, edge_index=edge_indexT)

    if saveDir:
        torch.save(data, os.path.join(saveDir, fileName))

    return data
Esempio n. 5
0
        feature_plotters=[HelixPlotter(), SheetPlotter()])
    fig.tight_layout()


# Visualize seconday structure array
# Sine the residues may not start at 1,
# provide the actual first residue ID
visualize_secondary_structure(sse, tk_mono.res_id[0])

########################################################################
# Almost the same result can be achieved, when we calculate the
# secondary structure ourselves using the DSSP software,
# as the content in ``'secStructList'`` is also calculated by the RCSB.

sse = dssp.DsspApp.annotate_sse(tk_mono)
sse = np.array([dssp_to_abc[e] for e in sse], dtype="U1")
visualize_secondary_structure(sse, tk_mono.res_id[0])
# sphinx_gallery_thumbnail_number = 4

########################################################################
# The one and only difference is that the second helix is slightly
# shorter.
# This is probably caused by different versions of DSSP.
#
# Last but not least we calculate the secondary structure using
# *Biotite*'s built-in method, based on the P-SEA algorithm.

sse = struc.annotate_sse(array, chain_id="A")
visualize_secondary_structure(sse, tk_mono.res_id[0])

plt.show()