Python DihedralFeaturizer.describe_features 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: msmbuilder.featurizer

클래스/타입: DihedralFeaturizer

메소드/함수: describe_features

hotexamples.com에서의 예제들: 10

Python DihedralFeaturizer.describe_features - 10개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 msmbuilder.featurizer.DihedralFeaturizer.describe_features에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

DihedralFeaturizer(30)

transform(13)

describe_features(7)

fit_transform(5)

partial_transform(4)

예제 #1

파일 보기

파일: featurize_project.py 프로젝트: HWaymentSteele/kinase_msm

def featurize_file(job_tuple):

    yaml_file, protein, feat, traj_file,stride = job_tuple
    yaml_file = load_yaml_file(yaml_file)

    if feat is None:
        feat = DihedralFeaturizer(types=['phi', 'psi','chi1'])

    _check_output_folder_exists(yaml_file, protein)

    output_folder = os.path.join(yaml_file["base_dir"],
                                 protein,
                                 yaml_file["feature_dir"])

    traj_name = os.path.splitext(os.path.basename(traj_file))[0]
    output_fname = os.path.join(output_folder, traj_name+".jl")

    feat_descriptor = os.path.join(output_folder, "feature_descriptor.h5")
    try:
        trj = mdt.load(traj_file)
    except :
        warnings.warn("Removing %s because of misformed trajectory"%traj_file)
        os.remove(traj_file)
        return

    features = feat.partial_transform(trj)
    verbosedump(features, output_fname)

    if not os.path.isfile(feat_descriptor) and hasattr(feat, "describe_features"):
        dih_df = pd.DataFrame(feat.describe_features(trj[0]))
        verbosedump(dih_df, feat_descriptor)

    return

예제 #2

파일 보기

파일: test_feature_descriptor.py 프로젝트: rgejman/msmbuilder

def test_DihedralFeaturizer_describe_features_nosincos():
    feat = DihedralFeaturizer(sincos=False)
    rnd_traj = np.random.randint(len(trajectories))
    features = feat.transform([trajectories[rnd_traj]])
    df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj]))

    for f in range(25):
        f_index = np.random.choice(len(df))

        atom_inds = df.iloc[f_index].atominds
        feature_value = md.compute_dihedrals(trajectories[rnd_traj],
                                             [atom_inds])
        if feat.sincos:
            func = getattr(np, '%s' % df.iloc[f_index].otherinfo)
            feature_value = func(feature_value)

        assert (features[0][:, f_index] == feature_value.flatten()).all()

예제 #3

파일 보기

파일: test_feature_descriptor.py 프로젝트: msmbuilder/msmbuilder

def test_DihedralFeaturizer_describe_features_nosincos():
    feat = DihedralFeaturizer(sincos=False)
    rnd_traj = np.random.randint(len(trajectories))
    features = feat.transform([trajectories[rnd_traj]])
    df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj]))

    for f in range(25):
        f_index = np.random.choice(len(df))

        atom_inds = df.iloc[f_index].atominds
        feature_value = md.compute_dihedrals(trajectories[rnd_traj],
                                             [atom_inds])
        if feat.sincos:
            func = getattr(np, '%s' % df.iloc[f_index].otherinfo)
            feature_value = func(feature_value)

        assert (features[0][:, f_index] == feature_value.flatten()).all()

예제 #4

파일 보기

def Get_dihedral_features_villin():
 import os 
 import shutil
 import mdtraj as md
 os.chdir('/homes/anuginueni/traj_villin')
 if(os.path.isdir('./diheds')):  
   shutil.rmtree('./diheds')
 from msmbuilder.dataset import dataset
 t=md.load( "/homes/anuginueni/traj_villin/trajectory-331.xtc",top='/homes/anuginueni/traj_villin/filtered.pdb',stride=5)
 xyz = dataset( "/homes/anuginueni/traj_villin/*.xtc",topology='/homes/anuginueni/traj_villin/filtered.pdb',stride=5) 
 from msmbuilder.featurizer import DihedralFeaturizer        #for dihedrals          
 featurizer = DihedralFeaturizer(types=['phi', 'psi'])       #for dihedrals
 diheds = xyz.fit_transform_with(featurizer, 'diheds/', fmt='dir-npy') #for dihedrals
 des_feat=featurizer.describe_features(t)
 res = [ sub['resids'] for sub in des_feat ]
 print(str(res))
 return diheds

예제 #5

파일 보기

파일: test_feature_descriptor.py 프로젝트: rgejman/msmbuilder

def test_FeatureSelector_describe_features():
    rnd_traj = np.random.randint(len(trajectories))
    f_ca = ContactFeaturizer(scheme='CA', ignore_nonprotein=True)
    f1 = f_ca.transform([trajectories[rnd_traj]])
    df1 = pd.DataFrame(f_ca.describe_features(trajectories[rnd_traj]))

    f_dih = DihedralFeaturizer()
    f2 = f_dih.transform([trajectories[rnd_traj]])
    df2 = pd.DataFrame(f_dih.describe_features(trajectories[rnd_traj]))

    df_dict = {}
    df_dict["ca"] = df1
    df_dict["dih"] = df2

    f_comb = FeatureSelector([('ca', f_ca), ('dih', f_dih)])
    f3 = f_comb.transform([trajectories[rnd_traj]])
    df3 = pd.DataFrame(f_comb.describe_features(trajectories[rnd_traj]))
    assert len(df3) == len(df1) + len(df2)
    df4 = pd.concat([df_dict[i] for i in f_comb.feat_list])
    # lets randomly compare 40 features
    for i in np.random.choice(range(len(df3)), 40):
        for j in df3.columns:
            assert eq(df3.iloc[i][j], df4.iloc[i][j])

예제 #6

파일 보기

파일: test_feature_descriptor.py 프로젝트: msmbuilder/msmbuilder

def test_FeatureSelector_describe_features():
    rnd_traj = np.random.randint(len(trajectories))
    f_ca = ContactFeaturizer(scheme='CA', ignore_nonprotein=True)
    f1 = f_ca.transform([trajectories[rnd_traj]])
    df1 = pd.DataFrame(f_ca.describe_features(trajectories[rnd_traj]))

    f_dih = DihedralFeaturizer()
    f2 = f_dih.transform([trajectories[rnd_traj]])
    df2 = pd.DataFrame(f_dih.describe_features(trajectories[rnd_traj]))

    df_dict = {}
    df_dict["ca"] = df1
    df_dict["dih"] = df2

    f_comb = FeatureSelector([('ca', f_ca), ('dih', f_dih)])
    f3 = f_comb.transform([trajectories[rnd_traj]])
    df3 = pd.DataFrame(f_comb.describe_features(trajectories[rnd_traj]))
    assert len(df3) == len(df1) + len(df2)
    df4 = pd.concat([df_dict[i] for i in f_comb.feat_list])
    # lets randomly compare 40 features
    for i in np.random.choice(range(len(df3)), 40):
        for j in df3.columns:
            assert eq(df3.iloc[i][j], df4.iloc[i][j])

예제 #7

파일 보기

파일: featurize.py 프로젝트: msultan/mass_repartitioned_fyn

def featurize_traj(job_tuple):
    #separate out the job tuple into required things
    mutant,mutant_dir,project,proj_folder,proj_top_folder,traj_file,stride,save_common,allowed_residue_ind \
    = job_tuple
    #load top file to setup solute/solvent indices
    top_path = os.path.join(proj_top_folder, "%s.pdb"%os.path.basename(traj_file).split("_")[0])
    top_trj = mdtraj.load(top_path)

    #set up featurizer objects
    dihedral_feat = DihedralFeaturizer(types=['phi', 'psi','chi1'])

    #load the trajectory
    try:
        trj = mdtraj.load(traj_file,stride=stride)
    except:
        print "Cant featurize %s"%traj_file
        return 
    #setup file name
    traj_name = os.path.splitext(os.path.basename(traj_file))[0]
    print traj_name
    dihedral_output_file = os.path.join(mutant_dir,"features/dihedral_features/")+str(project)+\
    "_"+traj_name+".h5"
    water_output_file = os.path.join(mutant_dir,"features/water_features/")+str(project)+\
    "_"+traj_name+".h5"
    combined_output_file = os.path.join(mutant_dir,"features/combined_features/")+str(project)+\
    "_"+traj_name+".h5"
    do_again=True
    already_done=False
    if os.path.isfile(combined_output_file):
    	f = verboseload(combined_output_file)
	if f.shape[0]!=trj.n_frames:
		already_done=True

    if not already_done or do_again:
        dihedral_features = dihedral_feat.partial_transform(trj)

    	traj_name = os.path.splitext(os.path.basename(traj_file))[0]

        dihedral_output_file = os.path.join(mutant_dir,"features/dihedral_features/")+str(project)+\
        "_"+traj_name+".h5"

        #now we can dump
    	verbosedump(dihedral_features,dihedral_output_file)

        if save_common:
            dih_df = pandas.DataFrame(dihedral_feat.describe_features(top_trj))

            dih_f_ind = numpy.array([set(i).issubset(allowed_residue_ind) for i in dih_df["resid"]])

            subset_dihedral_features = dihedral_features[:,dih_f_ind]

            dihedral_output_file = os.path.join(mutant_dir,"features/common_basis/dihedral_features/")+\
            str(project)+"_"+traj_name+".h5"


            #now we can dump
            verbosedump(subset_dihedral_features,dihedral_output_file)
            #save the featurizer information.
            verbosedump([dih_df,allowed_residue_ind,dih_f_ind,],\
os.path.join(mutant_dir,"features/common_basis/dihedral_features/")+"saved_dihed_feat.h5")

            return

    else:
	   print "skipping featurization for %s since its already done"%traj_name
    return

예제 #8

파일 보기

파일: train.py 프로젝트: msultan/tica_metadynamics

top = md.load("../top.pdb")

trj_list = [md.load(i, top=top) for i in flist]
print("Found %d trajs" % len(trj_list))

f = DihedralFeaturizer(sincos=False)
dump(f, "raw_featurizer.pkl")

feat = f.transform(trj_list)

dump(feat, "raw_features.pkl")

f = load("./featurizer.pkl")
dump(f, "featurizer.pkl")
df1 = pd.DataFrame(f.describe_features(trj_list[0]))
dump(df1, "feature_descriptor.pkl")
feat = f.transform(trj_list)

dump(feat, "features.pkl")

t = tICA(lag_time=100, n_components=2, kinetic_mapping=False)

tica_feat = t.fit_transform(feat)

dump(t, "tica_mdl.pkl")
dump(tica_feat, "tica_features.pkl")

kmeans_mdl = KMeans(50)
ass = kmeans_mdl.fit_predict(tica_feat)
msm_mdl = MarkovStateModel(100)

예제 #9

파일 보기

파일: feature_descriptor.py 프로젝트: sbhakat/Classifier-CV-sampling

#msmbuilder imports
from msmbuilder.dataset import dataset
from msmbuilder.featurizer import ContactFeaturizer
from msmbuilder.featurizer import DihedralFeaturizer
from msmbuilder.decomposition import tICA
from msmbuilder.cluster import MiniBatchKMeans
from msmbuilder.msm import ContinuousTimeMSM
from msmbuilder.utils import verbosedump, verboseload
from msmbuilder.cluster import KCenters
from msmbuilder.utils import load, dump

#other imports
import os, glob, shutil
import numpy as np
import mdtraj as md
import pandas as pd
import pickle
#prettier plots

a = np.arange(1119, 1277)
top = md.load("../prot.pdb", atom_indices=a)

# swap this for whatever you have. The code for now supports contacts, dihedral, and angles.
feat = DihedralFeaturizer(types=['chi1', 'chi2'])

# this basically maps every feature to atom indices.
df1 = pd.DataFrame(feat.describe_features(top))
dump(df1, "feature_descriptor.pkl")

예제 #10

파일 보기

featurizer = DihedralFeaturizer(types=['chi1', 'chi2'])
#dump(featurizer,"raw_featurizer.pkl")

#from msmbuilder.utils import load,dump
f = DihedralFeaturizer(types=['chi1', 'chi2'], sincos=False)
dump(f, "raw_featurizer.pkl")

#featurizer = DihedralFeaturizer(types=['chi1', 'chi2'], resids= 73,74,75,76,77,78,79,80,81,82,83)
diheds = featurizer.fit_transform(ds)
dump(diheds, "features.pkl")

#print(ds[0].shape)
print(diheds[0].shape)

# this basically maps every feature to atom indices.
df1 = pd.DataFrame(featurizer.describe_features(ds))
dump(df1, "feature_descriptor.pkl")

#Robust scaling
from msmbuilder.preprocessing import RobustScaler
scaler = RobustScaler()
scaled_diheds = scaler.fit_transform(diheds)

print(diheds[0].shape)
print(scaled_diheds[0].shape)

#Reducing dimension
tica_model = tICA(lag_time=1, n_components=10)
# fit and transform can be done in seperate steps:
tica_model.fit(diheds)
tica_trajs = tica_model.transform(diheds)