コード例 #1
0
ファイル: test_amino3to1.py プロジェクト: xxffliu/biopandas
def test_sameindex():
    TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), 'data',
                                 '1t48_995.pdb')
    p1t48 = PandasPdb()
    p1t48.read_pdb(TESTDATA_1t48)
    print(p1t48)
    p1t48.df['ATOM'].index = np.zeros(p1t48.df['ATOM'].shape[0], dtype=int)

    expect_res = [
        'M', 'E', 'M', 'E', 'K', 'E', 'F', 'E', 'Q', 'I', 'D', 'K', 'S', 'G',
        'S', 'W', 'A', 'A', 'I', 'Y', 'Q', 'D', 'I', 'R', 'H', 'E', 'A', 'S',
        'D', 'F', 'P', 'C', 'R', 'V', 'A', 'K', 'L', 'P', 'K', 'N', 'K', 'N',
        'R', 'N', 'R', 'Y', 'R', 'D', 'V', 'S', 'P', 'F', 'D', 'H', 'S', 'R',
        'I', 'K', 'L', 'H', 'Q', 'E', 'D', 'N', 'D', 'Y', 'I', 'N', 'A', 'S',
        'L', 'I', 'K', 'M', 'E', 'E', 'A', 'Q', 'R', 'S', 'Y', 'I', 'L', 'T',
        'Q', 'G', 'P', 'L', 'P', 'N', 'T', 'C', 'G', 'H', 'F', 'W', 'E', 'M',
        'V', 'W', 'E', 'Q', 'K', 'S', 'R', 'G', 'V', 'V', 'M', 'L', 'N', 'R',
        'V', 'M', 'E', 'K', 'G', 'S', 'L', 'K'
    ]

    transl = p1t48.amino3to1()
    expect_chain = ['A' for _ in range(transl.shape[0])]
    got_chain = list(transl['chain_id'].values)
    got_res = list(transl['residue_name'].values)

    assert expect_chain == got_chain
    assert expect_res == got_res
コード例 #2
0
def test_pdb_with_insertion_codes():

    PDB_2D7T_PATH = os.path.join(os.path.dirname(__file__), 'data', '2d7t.pdb')

    ppdb = PandasPdb().read_pdb(PDB_2D7T_PATH)
    sequence = ppdb.amino3to1()
    assert "".join(sequence[50:60]['residue_name'].values) == 'INPKSGDTNY'
コード例 #3
0
def run_test(prot_dir, protein, data):
    results_df = pd.DataFrame(columns=[
        'gene_name', 'uniprot_ID', 'permutation risk', 'permutation prot'
    ])
    file_repo = 'SWISS-MODEL_Repository/' + prot_dir + '/swissmodel/'
    if os.path.isdir(file_repo):
        try:
            print(file_repo)
            pdb_file = file_repo + str(os.listdir(file_repo)[0])

            ppdb = PandasPdb().read_pdb(pdb_file)
            df = pd.DataFrame(ppdb.df['ATOM'])
            sequence = ppdb.amino3to1()

            protein_spec_df = data[data['uniprot_repo'] == prot_dir]
            gene_name = protein_spec_df['gene'].values[0]
            uniprot_ID = protein_spec_df['uniprot'].values[0]
            protein_spec_df = protein_spec_df[[
                'mutation', 'effect_size', 'p-value', 'transition'
            ]]

            df_write = "protein_structs/" + gene_name + '.csv'
            df.to_csv(df_write, header=None, index=None, sep='\t')

            write_to_dir = 'protein_mutation_locs_txts/' + gene_name + '.T2D.txt'
            protein_spec_df.to_csv(write_to_dir,
                                   header=None,
                                   index=None,
                                   sep='\t')
            protein_df = pd.read_csv(write_to_dir, header=None, sep="\t")
            muts_df = tests.make_dataframe(df, protein_df, sequence)

            print(muts_df)

            if not (muts_df[muts_df['score'] > 0].empty
                    or muts_df[muts_df['score'] < 0].empty):
                risk = tests.get_dist_vec(muts_df, True)
                prot = tests.get_dist_vec(muts_df, False)

                #mw = tests.mannwhitneyu(risk, prot)
                #print(mw.pvalue)

                perm = tests.run_permutation(muts_df, df, np.mean(risk),
                                             np.mean(prot), 1000)
                print(perm)

                new_row = {
                    'gene_name': gene_name,
                    'uniprot_ID': uniprot_ID,
                    "permutation risk": perm[0],
                    "permutation prot": perm[1]
                }
                results_df.append(new_row, ignore_index=True)

            out_csv = 'parallelized/' + str(protein) + '-pval.csv'
            results_df.to_csv(out_csv)

        except Exception as e:
            print(e)
            pass
コード例 #4
0
def test_multichain():
    TESTDATA_5mtn = os.path.join(os.path.dirname(__file__),
                                 'data', '5mtn_multichain.pdb')
    mtn = PandasPdb()
    mtn.read_pdb(TESTDATA_5mtn)
    expect_res_a = ['S', 'L', 'E', 'P', 'E', 'P', 'W', 'F', 'F', 'K', 'N', 'L',
                    'S', 'R', 'K', 'D', 'A', 'E', 'R', 'Q', 'L', 'L', 'A', 'P',
                    'G', 'N', 'T', 'H', 'G', 'S', 'F', 'L', 'I', 'R', 'E', 'S',
                    'E', 'S', 'T', 'A', 'G', 'S', 'F', 'S', 'L', 'S', 'V', 'R',
                    'D', 'F', 'D', 'Q', 'G', 'E', 'V', 'V', 'K', 'H', 'Y', 'K',
                    'I', 'R', 'N', 'L', 'D', 'N', 'G', 'G', 'F', 'Y', 'I', 'S',
                    'P', 'R', 'I', 'T', 'F', 'P', 'G', 'L', 'H', 'E', 'L', 'V',
                    'R', 'H', 'Y', 'T']
    expect_res_b = ['S', 'V', 'S', 'S', 'V', 'P', 'T', 'K', 'L', 'E', 'V', 'V',
                    'A', 'A', 'T', 'P', 'T', 'S', 'L', 'L', 'I', 'S', 'W', 'D',
                    'A', 'P', 'A', 'V', 'T', 'V', 'V', 'Y', 'Y', 'L', 'I', 'T',
                    'Y', 'G', 'E', 'T', 'G', 'S', 'P', 'W', 'P', 'G', 'G', 'Q',
                    'A', 'F', 'E', 'V', 'P', 'G', 'S', 'K', 'S', 'T', 'A', 'T',
                    'I', 'S', 'G', 'L', 'K', 'P', 'G', 'V', 'D', 'Y', 'T', 'I',
                    'T', 'V', 'Y', 'A', 'H', 'R', 'S', 'S', 'Y', 'G', 'Y', 'S',
                    'E', 'N', 'P', 'I', 'S', 'I', 'N', 'Y', 'R', 'T']

    transl = mtn.amino3to1()

    expect_chain = ['A' for _ in range(88)] + ['B' for _ in range(94)]
    got_chain = list(transl['chain_id'].values)

    got_res_a = list(transl.loc[transl['chain_id'] == 'A',
                                'residue_name'].values)
    got_res_b = list(transl.loc[transl['chain_id'] == 'B',
                                'residue_name'].values)

    assert expect_chain == got_chain
    assert expect_res_a == got_res_a
    assert expect_res_b == got_res_b
コード例 #5
0
ファイル: vdsl.py プロジェクト: UoMMIB/vina-diesel
 def get_seq(struc):
     structure = PandasPdb().read_pdb(struc)
     sequences = structure.amino3to1(
     )  # cols = ['chain_id', 'residue_name']
     seqs = [
         ''.join(sequences.loc[sequences['chain_id'] == i,
                               'residue_name'].to_list())
         for i in sequences['chain_id'].unique()
     ]
     return seqs[0] if len(seqs) == 1 else seqs
コード例 #6
0
def main(argv):
    ppdb = PandasPdb().read_pdb(argv[1])
    df = pd.DataFrame(ppdb.df['ATOM'])
    sequence = ppdb.amino3to1()

    data = pd.read_csv(argv[2], header=None, sep="\t")
    num_runs = argv[3]

    df = make_dataframe(df, data, sequence)

    risk = get_dist_vec(df, True)
    prot = get_dist_vec(df, False)

    mw = mannwhitneyu(risk, prot)
    #print(mw)

    perm = run_permutation(df, np.mean(risk), np.mean(prot), num_runs)
コード例 #7
0
def test_defaults():
    TESTDATA_1t48 = os.path.join(os.path.dirname(__file__), 'data',
                                 '1t48_995.pdb')
    p1t48 = PandasPdb()
    p1t48.read_pdb(TESTDATA_1t48)
    expect = [
        'M', 'E', 'M', 'E', 'K', 'E', 'F', 'E', 'Q', 'I', 'D', 'K', 'S', 'G',
        'S', 'W', 'A', 'A', 'I', 'Y', 'Q', 'D', 'I', 'R', 'H', 'E', 'A', 'S',
        'D', 'F', 'P', 'C', 'R', 'V', 'A', 'K', 'L', 'P', 'K', 'N', 'K', 'N',
        'R', 'N', 'R', 'Y', 'R', 'D', 'V', 'S', 'P', 'F', 'D', 'H', 'S', 'R',
        'I', 'K', 'L', 'H', 'Q', 'E', 'D', 'N', 'D', 'Y', 'I', 'N', 'A', 'S',
        'L', 'I', 'K', 'M', 'E', 'E', 'A', 'Q', 'R', 'S', 'Y', 'I', 'L', 'T',
        'Q', 'G', 'P', 'L', 'P', 'N', 'T', 'C', 'G', 'H', 'F', 'W', 'E', 'M',
        'V', 'W', 'E', 'Q', 'K', 'S', 'R', 'G', 'V', 'V', 'M', 'L', 'N', 'R',
        'V', 'M', 'E', 'K', 'G', 'S', 'L', 'K'
    ]
    assert expect == list(p1t48.amino3to1().values)