def box_of_element(element, spacing, \
    xmin, xmax, ymin, ymax, zmin, zmax, outfile):
    xs = np.arange(np.float(xmin), np.float(xmax), np.float(spacing))
    ys = np.arange(np.float(ymin), np.float(ymax), np.float(spacing))
    zs = np.arange(np.float(zmin), np.float(zmax), np.float(spacing))
    xx, yy, zz = np.meshgrid(xs, ys, zs)
    coords = np.stack([xx.flatten(), yy.flatten(), zz.flatten()], axis=1)
    len = coords.shape[0]
    df = pd.DataFrame(data={ \
       'line_idx':np.arange(len), \
       'record_name':['ATOM']*len, \
       'atom_number':np.arange(1,len+1), \
       'blank_1':[' ']*len, \
       'atom_name':[element]*len, \
       'alt_loc':[' ']*len, \
       'residue_name':['DUM']*len, \
       'blank_2':[' ']*len, \
       'chain_id':['X']*len, \
       'residue_number':[1]*len, \
       'insertion':[' ']*len, \
       'blank_3':['   ']*len, \
       'x_coord':coords[:,0], \
       'y_coord':coords[:,1], \
       'z_coord':coords[:,2], \
       'occupancy':[0.50]*len, \
       'b_factor':[35.88]*len, \
       'blank_4':['      ']*len, \
       'segment_id':['X1']*len, \
       'element_symbol':[element]*len, \
       'charge':[0.0]*len})
    pdb = PandasPdb()
    pdb.df['ATOM'] = df
    pdb.to_pdb(path=outfile, records=['ATOM'], gz=False, append_newline=True)
    return df
Beispiel #2
0
def test_multichain():
    TESTDATA_5mtn = os.path.join(os.path.dirname(__file__),
                                 'data', '5mtn_multichain.pdb')
    mtn = PandasPdb()
    mtn.read_pdb(TESTDATA_5mtn)
    expect_res_a = ['S', 'L', 'E', 'P', 'E', 'P', 'W', 'F', 'F', 'K', 'N', 'L',
                    'S', 'R', 'K', 'D', 'A', 'E', 'R', 'Q', 'L', 'L', 'A', 'P',
                    'G', 'N', 'T', 'H', 'G', 'S', 'F', 'L', 'I', 'R', 'E', 'S',
                    'E', 'S', 'T', 'A', 'G', 'S', 'F', 'S', 'L', 'S', 'V', 'R',
                    'D', 'F', 'D', 'Q', 'G', 'E', 'V', 'V', 'K', 'H', 'Y', 'K',
                    'I', 'R', 'N', 'L', 'D', 'N', 'G', 'G', 'F', 'Y', 'I', 'S',
                    'P', 'R', 'I', 'T', 'F', 'P', 'G', 'L', 'H', 'E', 'L', 'V',
                    'R', 'H', 'Y', 'T']
    expect_res_b = ['S', 'V', 'S', 'S', 'V', 'P', 'T', 'K', 'L', 'E', 'V', 'V',
                    'A', 'A', 'T', 'P', 'T', 'S', 'L', 'L', 'I', 'S', 'W', 'D',
                    'A', 'P', 'A', 'V', 'T', 'V', 'V', 'Y', 'Y', 'L', 'I', 'T',
                    'Y', 'G', 'E', 'T', 'G', 'S', 'P', 'W', 'P', 'G', 'G', 'Q',
                    'A', 'F', 'E', 'V', 'P', 'G', 'S', 'K', 'S', 'T', 'A', 'T',
                    'I', 'S', 'G', 'L', 'K', 'P', 'G', 'V', 'D', 'Y', 'T', 'I',
                    'T', 'V', 'Y', 'A', 'H', 'R', 'S', 'S', 'Y', 'G', 'Y', 'S',
                    'E', 'N', 'P', 'I', 'S', 'I', 'N', 'Y', 'R', 'T']

    transl = mtn.amino3to1()

    expect_chain = ['A' for _ in range(88)] + ['B' for _ in range(94)]
    got_chain = list(transl['chain_id'].values)

    got_res_a = list(transl.loc[transl['chain_id'] == 'A',
                                'residue_name'].values)
    got_res_b = list(transl.loc[transl['chain_id'] == 'B',
                                'residue_name'].values)

    assert expect_chain == got_chain
    assert expect_res_a == got_res_a
    assert expect_res_b == got_res_b
Beispiel #3
0
def test_rna_and_nonmatching_indices():
    ehz = PandasPdb().read_pdb(TESTDATA_rna)
    at = ehz.df['ATOM']
    a64 = at[at['residue_number'] == 64]
    a66 = at[at['residue_number'] == 66]
    r = PandasPdb.rmsd(a64, a66)
    assert r == 10.2007, r
Beispiel #4
0
def get_lig_name(PDB, lig_list):  #load in PDB
    #print(PDB)
    ppdb = PandasPdb()
    #structure = parser.get_structure(PDB, PDB+'.pdb')
    ppdb.read_pdb(PDB + '.pdb')
    HETATM = ppdb.df['HETATM']
    residue_names = set(HETATM['residue_name'])
    lig_res_number = 0
    atoms = 0  #base number of atoms for a ligand
    all_ligands = []
    _lig_name = ''
    for i in residue_names:
        all_ligands.append(i)
        if i in set(lig_list['Lig_name']):
            continue
        else:
            subset = (ppdb.df['HETATM']['residue_name'] == i)
            lig_res_nubmer = subset.values.sum()
            if lig_res_number >= atoms:
                _lig_name = i
                atoms = lig_res_nubmer
            else:
                continue

    #removing HOH from restrained ligand list
    all_ligands.remove('HOH')
    with open('ligand_name.txt', 'w') as file:
        file.write(_lig_name)
    with open('ligand_list.txt', 'w') as file:
        for i in range(0, len(all_ligands)):
            file.write('resname ' + str(all_ligands[i]) + ' or ')
    return _lig_name
Beispiel #5
0
def output_centers(centers,
                   element,
                   out_file,
                   xlabel=10,
                   ylabel=11,
                   zlabel=12):
    df_len = len(centers.index)
    df_data={ \
       'line_idx':np.arange(df_len), \
       'record_name':['ATOM']*df_len, \
       'atom_number':np.arange(1,df_len+1), \
       'blank_1':[' ']*df_len, \
       'atom_name':[element]*df_len, \
       'alt_loc':[' ']*df_len, \
       'residue_name':['DUM']*df_len, \
       'blank_2':[' ']*df_len, \
       'chain_id':['X']*df_len, \
       'residue_number':[1]*df_len, \
       'insertion':[' ']*df_len, \
       'blank_3':['   ']*df_len, \
       'x_coord':centers[[xlabel]].to_numpy().reshape((df_len,)), \
       'y_coord':centers[[ylabel]].to_numpy().reshape((df_len,)), \
       'z_coord':centers[[zlabel]].to_numpy().reshape((df_len,)), \
       'occupancy':[0.50]*df_len, \
       'b_factor':[35.88]*df_len, \
       'blank_4':['      ']*df_len, \
       'segment_id':['X1']*df_len, \
       'element_symbol':[element]*df_len, \
       'charge':[0.0]*df_len}
    print(df_data)
    df = pd.DataFrame(data=df_data)
    pdb = PandasPdb()
    pdb.df['ATOM'] = df
    pdb.to_pdb(path=out_file, records=['ATOM'], gz=False, append_newline=True)
    return df
Beispiel #6
0
def main(args):

    global model_name
    if args.input_type == 'pdb_id':

        struct = PandasPdb().fetch_pdb(args.input)
        model_name = args.input

    elif args.input_type == 'structure':

        struct = PandasPdb()
        struct = struct.read_pdb(args.input)
        model_name = re.search('[\d\w]+$', struct.header).group()

    global resolution
    try:
        resolution = float(
            re.search("REMARK\s+2\s+RESOLUTION\.\s+(\d\.\d+)",
                      struct.pdb_text).group(1))
    except:
        resolution = 100

    global header
    try:
        header = re.search("COMPND\s+2\s+MOLECULE\:\s+(.+)\S+",
                           struct.pdb_text).group(1)
    except:
        header = model_name

    permition = filter()
def run_test(prot_dir, protein, data):
    results_df = pd.DataFrame(columns=[
        'gene_name', 'uniprot_ID', 'permutation risk', 'permutation prot'
    ])
    file_repo = 'SWISS-MODEL_Repository/' + prot_dir + '/swissmodel/'
    if os.path.isdir(file_repo):
        try:
            print(file_repo)
            pdb_file = file_repo + str(os.listdir(file_repo)[0])

            ppdb = PandasPdb().read_pdb(pdb_file)
            df = pd.DataFrame(ppdb.df['ATOM'])
            sequence = ppdb.amino3to1()

            protein_spec_df = data[data['uniprot_repo'] == prot_dir]
            gene_name = protein_spec_df['gene'].values[0]
            uniprot_ID = protein_spec_df['uniprot'].values[0]
            protein_spec_df = protein_spec_df[[
                'mutation', 'effect_size', 'p-value', 'transition'
            ]]

            df_write = "protein_structs/" + gene_name + '.csv'
            df.to_csv(df_write, header=None, index=None, sep='\t')

            write_to_dir = 'protein_mutation_locs_txts/' + gene_name + '.T2D.txt'
            protein_spec_df.to_csv(write_to_dir,
                                   header=None,
                                   index=None,
                                   sep='\t')
            protein_df = pd.read_csv(write_to_dir, header=None, sep="\t")
            muts_df = tests.make_dataframe(df, protein_df, sequence)

            print(muts_df)

            if not (muts_df[muts_df['score'] > 0].empty
                    or muts_df[muts_df['score'] < 0].empty):
                risk = tests.get_dist_vec(muts_df, True)
                prot = tests.get_dist_vec(muts_df, False)

                #mw = tests.mannwhitneyu(risk, prot)
                #print(mw.pvalue)

                perm = tests.run_permutation(muts_df, df, np.mean(risk),
                                             np.mean(prot), 1000)
                print(perm)

                new_row = {
                    'gene_name': gene_name,
                    'uniprot_ID': uniprot_ID,
                    "permutation risk": perm[0],
                    "permutation prot": perm[1]
                }
                results_df.append(new_row, ignore_index=True)

            out_csv = 'parallelized/' + str(protein) + '-pval.csv'
            results_df.to_csv(out_csv)

        except Exception as e:
            print(e)
            pass
Beispiel #8
0
def test_sameindex():
    TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), 'data',
                                 '1t48_995.pdb')
    p1t48 = PandasPdb()
    p1t48.read_pdb(TESTDATA_1t48)
    print(p1t48)
    p1t48.df['ATOM'].index = np.zeros(p1t48.df['ATOM'].shape[0], dtype=int)

    expect_res = [
        'M', 'E', 'M', 'E', 'K', 'E', 'F', 'E', 'Q', 'I', 'D', 'K', 'S', 'G',
        'S', 'W', 'A', 'A', 'I', 'Y', 'Q', 'D', 'I', 'R', 'H', 'E', 'A', 'S',
        'D', 'F', 'P', 'C', 'R', 'V', 'A', 'K', 'L', 'P', 'K', 'N', 'K', 'N',
        'R', 'N', 'R', 'Y', 'R', 'D', 'V', 'S', 'P', 'F', 'D', 'H', 'S', 'R',
        'I', 'K', 'L', 'H', 'Q', 'E', 'D', 'N', 'D', 'Y', 'I', 'N', 'A', 'S',
        'L', 'I', 'K', 'M', 'E', 'E', 'A', 'Q', 'R', 'S', 'Y', 'I', 'L', 'T',
        'Q', 'G', 'P', 'L', 'P', 'N', 'T', 'C', 'G', 'H', 'F', 'W', 'E', 'M',
        'V', 'W', 'E', 'Q', 'K', 'S', 'R', 'G', 'V', 'V', 'M', 'L', 'N', 'R',
        'V', 'M', 'E', 'K', 'G', 'S', 'L', 'K'
    ]

    transl = p1t48.amino3to1()
    expect_chain = ['A' for _ in range(transl.shape[0])]
    got_chain = list(transl['chain_id'].values)
    got_res = list(transl['residue_name'].values)

    assert expect_chain == got_chain
    assert expect_res == got_res
def import_pdb_with_biopandas(fname, label=None):
    from biopandas.pdb import PandasPdb
    cite = '''
    @article{raschkas2017biopandas,
             doi = {10.21105/joss.00279},
             url = {http://dx.doi.org/10.21105/joss.00279},
             year  = {2017},
             month = {jun},
             publisher = {The Open Journal},
             volume = {2},
             number = {14},
             author = {Sebastian Raschka},
             title = {BioPandas: Working with molecular structures in pandas DataFrames},
             journal = {The Journal of Open Source Software}
             }
    '''
    print(cite)
    #import numpy as np
    #from biopandas.pdb import PandasPdb
    ppdb = PandasPdb()
    ppdb.read_pdb(fname)
    properties = ppdb.df['ATOM'].dtypes.index

    num_of_atoms = ppdb.df['ATOM'][properties[1]].max()
    frame_nums = ppdb.df['ATOM'][properties[1]].size // num_of_atoms
    coords = ppdb.df['ATOM'][properties[11:14]].to_numpy().reshape(
        (frame_nums, num_of_atoms, 3))

    properties = ppdb.df['OTHERS'].dtypes.index
    rows = str(ppdb.df['OTHERS'][properties[1]][1::3].values).split()

    time = np.asarray([float(x.replace('time=', '')) for x in rows[2::8]])
    energy = np.asarray([float(x.replace('energy=', '')) for x in rows[4::8]])
    return coords, time, energy
Beispiel #10
0
def test_pdb_with_insertion_codes():

    PDB_2D7T_PATH = os.path.join(os.path.dirname(__file__), 'data', '2d7t.pdb')

    ppdb = PandasPdb().read_pdb(PDB_2D7T_PATH)
    sequence = ppdb.amino3to1()
    assert "".join(sequence[50:60]['residue_name'].values) == 'INPKSGDTNY'
Beispiel #11
0
def test_read_pdb():
    """Test public read_pdb"""
    ppdb = PandasPdb()
    ppdb.read_pdb(TESTDATA_FILENAME)
    assert ppdb.pdb_text == three_eiy
    assert ppdb.code == '3eiy', ppdb.code
    assert ppdb.pdb_path == TESTDATA_FILENAME
Beispiel #12
0
def parse_ligand_from_pdb(pdb_id, base_folder):
    """
    Identifies drug-like ligands from PDB input file.
    :param pdb_id:
    :param base_folder:
    :return:
    """

    # Read PDB file into PandasPDB df
    ppdb = PandasPdb()
    ppdb.read_pdb("{}/{}/{}.pdb".format(base_folder, pdb_id, pdb_id))

    # Subset df to hetatms
    hetatm_df = ppdb.df['HETATM']

    # Read in ligands
    lig_to_remove_df = pd.read_csv(
        "~/Fraser_Lab/phenix_pipeline/ligands_to_remove.csv")
    lig_to_remove_df.columns = ["name", "unknown"]

    # Get list of unique residue names
    residue_names = list(set(hetatm_df['residue_name']))

    lig_name = ''
    for res_name in residue_names:
        # all_ligands.append(i)
        if res_name not in set(lig_to_remove_df['name']):
            print("###################################")
            lig_name = res_name
            print(lig_name)

    return lig_name
Beispiel #13
0
def pdb_atoms(filename):
    bottleneck_pdb = PandasPdb()
    bottleneck_pdb.read_pdb(filename)
    df = bottleneck_pdb.df['ATOM']
    radii = np.array([get_vdwr(i) for i in get_elem(df)])
    coords = df.filter(items=stat_items).to_numpy()
    n = coords.shape[0]
    return df, radii, coords, n
Beispiel #14
0
def load_custom_pdb(filepath):
    '''
    get the 'ATOM' key and set the line index as default index
    '''
    ppdb = PandasPdb()
    ppdb.read_pdb(filepath)
    df_atoms = ppdb.df["ATOM"].set_index(["line_idx"])
    return df_atoms
    def show_info(self, selected):
        ppdb = PandasPdb()
        ppdb.read_pdb(self.folder + '/' + selected)

        info = '\nRaw PDB file contents:\n\n%s\n...' % ppdb.pdb_text[:1000]

        self.mol_info.set_text(info)
        return
Beispiel #16
0
def test_read_pdb_from_list():
    """Test public read_pdb_from_list"""

    for pdb_text, code in zip([three_eiy, four_eiy], ['3eiy', '4eiy']):
        ppdb = PandasPdb()
        ppdb.read_pdb_from_list(pdb_text.splitlines(True))
        assert ppdb.pdb_text == pdb_text
        assert ppdb.code == code
        assert ppdb.pdb_path == ''
Beispiel #17
0
    def persist(self):
        """
        Save .npy files of the different averages and pdb files with the beta column set to importance
        :return: itself
        """
        directory = self.working_dir + "/{}/".format(self.extractor.name)

        if not os.path.exists(directory):
            os.makedirs(directory)

        np.save(directory + "importance_per_residue",
                self.importance_per_residue)
        np.save(directory + "std_importance_per_residue",
                self.std_importance_per_residue)
        np.save(directory + "feature_importance", self.feature_importances)
        np.save(directory + "std_feature_importance",
                self.std_feature_importances)

        if self.importance_per_residue_and_cluster is not None and self.std_importance_per_residue_and_cluster is not None:
            np.save(directory + "importance_per_residue_and_cluster",
                    self.importance_per_residue_and_cluster)
            np.save(directory + "std_importance_per_residue_and_cluster",
                    self.std_importance_per_residue_and_cluster)
        if self.separation_score is not None:
            np.save(directory + 'separation_score', self.separation_score)
        if self.predefined_relevant_residues is not None:
            np.save(directory + "predefined_relevant_residues",
                    self.predefined_relevant_residues)
        if self.accuracy is not None:
            np.save(directory + 'accuracy', self.accuracy)
        if self.accuracy_per_cluster is not None:
            np.save(directory + 'accuracy_per_cluster',
                    self.accuracy_per_cluster)
        if self.test_set_errors is not None:
            np.save(directory + 'test_set_errors', self.test_set_errors)
        if self.feature_to_resids is not None:
            np.save(directory + 'feature_to_resids', self.feature_to_resids)
        if self.pdb_file is not None:
            pdb = PandasPdb()
            pdb.read_pdb(self.pdb_file)
            self._save_to_pdb(
                pdb, directory + "importance.pdb",
                self._map_to_correct_residues(self.importance_per_residue))

            if self.importance_per_residue_and_cluster is not None:
                for cluster_idx, importance in enumerate(
                        self.importance_per_residue_and_cluster.T):
                    cluster_name = "cluster_{}".format(cluster_idx) \
                        if self.extractor.label_names is None else \
                        self.extractor.label_names[cluster_idx]
                    self._save_to_pdb(
                        pdb,
                        directory + "{}_importance.pdb".format(cluster_name),
                        self._map_to_correct_residues(importance))

        return self
Beispiel #18
0
 def get_seq(struc):
     structure = PandasPdb().read_pdb(struc)
     sequences = structure.amino3to1(
     )  # cols = ['chain_id', 'residue_name']
     seqs = [
         ''.join(sequences.loc[sequences['chain_id'] == i,
                               'residue_name'].to_list())
         for i in sequences['chain_id'].unique()
     ]
     return seqs[0] if len(seqs) == 1 else seqs
Beispiel #19
0
def test_equal():
    TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), 'data',
                                                            '1t48_995.pdb')

    p1t48 = PandasPdb()
    p1t48.read_pdb(TESTDATA_1t48)
    dist = p1t48.distance(xyz=(70.785, 15.477, 23.359), record='ATOM')

    expect = pd.Series([2.533259, 1.520502, 0.000000, 1.257597, 1.252510],
                       index=[12, 13, 14, 15, 16])
    assert dist[dist < 3].all() == expect.all()
Beispiel #20
0
def test_use_external_df():
    TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), 'data',
                                                            '1t48_995.pdb')

    p1t48 = PandasPdb()
    p1t48.read_pdb(TESTDATA_1t48)
    new_df = p1t48.df['ATOM'].iloc[:-1, :].copy()
    dist = p1t48.distance(df=new_df, xyz=(70.785, 15.477, 23.359))

    expect = pd.Series([2.533259, 1.520502, 0.000000, 1.257597],
                       index=[12, 13, 14, 15])
    assert dist[dist < 3].all() == expect.all()
Beispiel #21
0
def expanded_bottleneck(src_file,trg_file,factor):
    src_df, src_radii, n, mean, coords, coords_u, coords_s, coords_vh, proj_xy, src_plot_df = proj_stats(src_file)
    expanded_proj = factor*proj_xy
    fat_ep = np.concatenate(expanded_proj,coords_s[2]*coords_u[:,2])
    coords = np.matmul(fat_ep,coords_vh)
    coords += mean
    df_coords =  pd.DataFrame(coords,columns=stat_items)
    trg_df = src_df.copy()
    trg_df[stat_items]=df_coords[stat_items]
    trg_pdb = PandasPdb()
    trg_pdb.df['ATOM'] = trg_df
    trg_pdb.to_pdb(path=trg_file, records=['ATOM'], gz=False, append_newline=True)
Beispiel #22
0
def test_fetch_pdb():
    """Test fetch_pdb"""

    try:
        ppdb = PandasPdb()
        txt = ppdb._fetch_pdb('3eiy')
    except HTTPError:
        pass

    if txt:  # skip if PDB down
        txt[:100] == three_eiy[:100]
        ppdb.fetch_pdb('3eiy')
        assert ppdb.pdb_text == txt
Beispiel #23
0
def get_neighbors(path, device):
    res_encoder =  {'LYS': 1, 'GLU': 2, 'ASP': 3, 'SER': 4, 'PHE': 5,
                    'CYS': 6, 'VAL': 7, 'ILE': 8, 'MET': 9, 'HIS': 10,
                    'GLY': 11, 'LEU': 12, 'TYR': 13, 'THR': 14, 'PRO': 15,
                    'ARG': 16, 'TRP': 17, 'ALA': 18, 'GLN': 19, 'ASN': 20,
                    'SEC': 21, 'UNK': 21, 'ASX': 21, 'GLX': 21, 'XLE': 21,
                    'PYL': 21}

    ppdb = PandasPdb()
    ppdb.read_pdb(path=path)
    # Load through read_ply function.

    mol_name = path.rsplit('/', 1)[1].split('.')[0]
    train = True
    try:
        structure = read_ply('./structures/train/{}.ply'.format(mol_name))

    except FileNotFoundError:
        structure = read_ply('./structures/test/{}.ply'.format(mol_name))
        train = False
    nodes = structure.pos.to(device).float()
    n_nodes = nodes.shape[0]

    pos = ['x_coord', 'y_coord', 'z_coord']

    atoms = torch.tensor(ppdb.df['ATOM'][pos].values).to(device).float()

    atom_shape = atoms.shape
    atoms = atoms.view(-1, 1, 3).expand(-1, n_nodes, 3)

    closest_atom = (atoms-nodes).norm(dim=2).argmin(dim=0)

    structure_residues = ppdb.df['ATOM'][['atom_number', 'residue_name']]
    n_atoms = structure_residues.shape[0]
    idx_translation = torch.LongTensor(structure_residues.residue_name.
                                       replace(res_encoder)).to(device)

    node_idx = torch.tensor(range(0, n_nodes)).to(device)
    node_idx = torch.stack((node_idx, closest_atom)).t()

    closest_atom_sparse = torch.sparse.LongTensor(node_idx.t(),
                                                  torch.ones(n_nodes, dtype=torch.long).to(device),
                                                  torch.Size([n_nodes, n_atoms])).to(device)

    amino_acids = (closest_atom_sparse.to_dense() * idx_translation.view(-1, 1).t()).to_sparse().values().to(cpu)
    structure.residues = amino_acids

    if train is True:
        return train, structure
    else:
        return train, structure
Beispiel #24
0
def map_dataframe(file, dx, dy, dz, remove_non_ca=0):
    """Construct the new pdb mapping x, y and z averaged coordinates"""
    ppdb = PandasPdb()
    ppdb.read_pdb(file)
    r = ppdb.df['ATOM']
    r.x_coord = r.atom_number.map(dx)
    r.y_coord = r.atom_number.map(dy)
    r.z_coord = r.atom_number.map(dz)
    if remove_non_ca == 0:
        newname = os.path.splitext(file)[0] + "_averaged.pdb"
        ppdb.to_pdb(newname)
        return newname, ppdb
    elif remove_non_ca == 1:
        r.drop(r[r['atom_name'] != "CA"].index, inplace=True)
        newname = os.path.splitext(file)[0] + "_averaged_CA.pdb"
        ppdb.to_pdb(newname)
        return newname, ppdb
    else:
        exit(1)


#f1 = "/Users/stefanocucuzza/Desktop/Stefano/CHILL/Test_files/Test_average_structures/File1.pdb"
#f1w = "/Users/stefanocucuzza/Desktop/Stefano/CHILL/Test_files/Test_average_structures/File1_wrong.pdb"
#f2 = "/Users/stefanocucuzza/Desktop/Stefano/CHILL/Test_files/Test_average_structures/File2.pdb"
#f3 = "/Users/stefanocucuzza/Desktop/Stefano/CHILL/Test_files/Test_average_structures/File3.pdb"
#
#dx, dy, dz = get_dictionaries([f1, f2])
#ax = average_dict_values(dx)
#ay = average_dict_values(dy)
#az = average_dict_values(dz)
#map_dataframe(f1, ax, ay, az, remove_non_ca=1)
Beispiel #25
0
def get_dictionaries(list):
    """For each file in the list, generate three dictionaries linking atom number to x, y and z coordinates"""
    ppdb = PandasPdb()
    dictio_x = {}
    dictio_y = {}
    dictio_z = {}
    for file in list:
        ppdb.read_pdb(file)
        r = ppdb.df['ATOM']
        for index, row in r.iterrows():
            set_key(dictio_x, row['atom_number'], row['x_coord'])
            set_key(dictio_y, row['atom_number'], row['y_coord'])
            set_key(dictio_z, row['atom_number'], row['z_coord'])
    return dictio_x, dictio_y, dictio_z
Beispiel #26
0
def test_fetch_pdb():
    """Test fetch_pdb"""

    try:
        ppdb = PandasPdb()
        url, txt = ppdb._fetch_pdb('3eiy')
    except HTTPError:
        pass

    if txt:  # skip if PDB down
        txt[:100] == three_eiy[:100]
        ppdb.fetch_pdb('3eiy')
        assert ppdb.pdb_text == txt
        assert ppdb.pdb_path == 'http://www.rcsb.org/pdb/files/3eiy.pdb'
Beispiel #27
0
def save_pdb_df_to_pdb(df: pd.DataFrame, path: str, gz: bool = False):
    """Saves pdb dataframe to a PDB file.

    :param g: Dataframe to save as PDB
    :type g: pd.DataFrame
    :param path: Path to save PDB file to.
    :type path: str
    :param gz: Whether to gzip the file. Defaults to ``False``.
    :type gz: bool
    """
    ppd = PandasPdb()
    ppd.df["ATOM"] = df
    ppd.to_pdb(path=path, records=None, gz=False, append_newline=True)
    log.info(f"Successfully saved PDB dataframe to {path}")
 def __call__(self, pdbid):

    print(pdbid)
    # A. Pdb reading
    # Reading in the pdb for the current conformation
    ppdb = PandasPdb()
    CurrentPdbStructure = ppdb.read_pdb("%s/%s.pdb" %(args.blindfolder, str(pdbid)))

    proteindf = CurrentPdbStructure.df['ATOM'][~CurrentPdbStructure.df['ATOM']["residue_name"].isin(["A","T","C","G","U","DA","DT","DC","DG","DU"])]
    proteinpoint =  np.array([proteindf["x_coord"].tolist(),proteindf["y_coord"].tolist(),proteindf["z_coord"].tolist()]).T
    proteintree = spatial.cKDTree(proteinpoint)



    # B. Grid Creation
    # Define Dimension of Grid box
    maxx=max(CurrentPdbStructure.df['ATOM']["x_coord"].tolist())+5
    minx=min(CurrentPdbStructure.df['ATOM']["x_coord"].tolist())-5
    maxy=max(CurrentPdbStructure.df['ATOM']["y_coord"].tolist())+5
    miny=min(CurrentPdbStructure.df['ATOM']["y_coord"].tolist())-5
    maxz=max(CurrentPdbStructure.df['ATOM']["z_coord"].tolist())+5
    minz=min(CurrentPdbStructure.df['ATOM']["z_coord"].tolist())-5



    # Surface Grid Points
    points = np.mgrid[minx:maxx, miny:maxy, minz:maxz]
    points = np.matrix(points.reshape(3, -1).T)
    tree = spatial.cKDTree(points)
    # Index of points within cutoff
    pointswithincutoff1 = set(itertools.chain.from_iterable(list(tree.query_ball_point(proteinpoint, halo[0]))))
    # Index of points within cutoff
    pointswithincutoff2 = set(itertools.chain.from_iterable(list(tree.query_ball_point(proteinpoint, halo[1]))))
    # Surface points within the midline and finalise tree for Surface accordingly
    midlineindex = sorted(pointswithincutoff2 - pointswithincutoff1)
    
    print (pdbid,len(points),len(midlineindex))

    print (points[1].tolist()[0][0])



    with open("%s/%s_Grid.ptf"%(args.blindfolder, pdbid),'w+') as f:
        for i in midlineindex:
          f.write('%s\t%.3f\t%.3f\t%.3f\t#\t%s000:X@XX:grid\n' %(pdbid, points[i].tolist()[0][0], points[i].tolist()[0][1], points[i].tolist()[0][2], str("X")))
    #print(points[np.array(midlineindex)].tolist())

    XYZ(points[np.array(midlineindex)].tolist(),"Ge","%s/%s_Grid.xyz" %(args.blindfolder,pdbid))
 def prepare_docking_grid_and_dock(self):
     df = PandasPdb().read_pdb('./protein_pdbqts/' + self.protein).df[
         'ATOM']  # opens protein to calculate grid
     minx = df['x_coord'].min()
     maxx = df['x_coord'].max()
     cent_x = round((maxx + minx) / 2, 2)
     size_x = round(abs(maxx - minx) + 3, 2)
     miny = df['y_coord'].min()
     maxy = df['y_coord'].max()
     cent_y = round((maxy + miny) / 2, 2)
     size_y = round(abs(maxy - miny) + 3, 2)
     minz = df['z_coord'].min()
     maxz = df['z_coord'].max()
     cent_z = round((maxz + minz) / 2, 2)
     size_z = round(abs(maxz - minz) + 3, 2)
     print("Center point of docking grid for {} is as follows: "
           "x: {}, y: {}, z: {}".format(self.protein, size_x, size_y,
                                        size_z))
     print("Sizes of docking grid are as follows:"
           "x: {}, y: {}, z: {}".format(cent_x, cent_y, cent_z))
     os.system(
         'vina --receptor {} --ligand {} --center_x {} --center_y {} --center_z {} --size_x {} --size_y {} --size_z {} --log {} --out {}'
         .format('./protein_pdbqts/' + self.protein,
                 './ligand_pdbqts/' + self.ligand, cent_x, cent_y, cent_z,
                 size_x, size_y, size_z, self.ligand + '_docking_log',
                 self.ligand + '.out'))
     try:  # cleaning
         shutil.move(self.ligand + '.out', './results/')
         shutil.move(self.ligand + '_docking_log', './results/')
     except Exception as e:
         print(e)
         os.remove(self.ligand + '.out')
         os.remove(self.ligand + '_docking_log')
Beispiel #30
0
def get_residue_dictionary(pdb_file, sites, residues):
    from biopandas.pdb import PandasPdb
    ppdb = PandasPdb().read_pdb(pdb_file)
    files = ppdb.pdb_text.split('\n')[:1000]
    temp = [i for i in files if "SITE" in i and "REMARK" not in i]

    for i in temp:
        tempp = i.split()
        flag = 0
        for i in temp:
            if len(i) > 4:
                flag = 1
                break
        if flag == 1:
            t = []
            for i in tempp:
                if len(i) > 4:
                    t.append(i[0])
                    t.append(i[1:])
                else:
                    t.append(i)
            tempp = t
        for j, k, l in zip(tempp[4::3], tempp[5::3], tempp[6::3]):
            residues["Residue Name"][sites[tempp[2]]].append(j)
            residues["Residue Chain"][sites[tempp[2]]].append(k)
            residues["Residue Number"][sites[tempp[2]]].append(l)

    return residues