def test_all_large_auto(): result = gaussdca.run(os.path.join(base_path, 'data/large.a3m')) N = 465 # Verify shape assert result['gdca'].shape == (N, N) assert result['gdca_corr'].shape == (N, N) # Verify diagonal assert result['gdca'].diagonal().sum() == 0. assert result['gdca_corr'].diagonal().sum() == 0. # Verify sequence reweiting assert result['seq'] == 35555
def _generate_features(fname, verbose=0): feat_lst = [ 'gdca', 'cross_h', 'nmi_corr', 'mi_corr', 'seq', 'part_entr', 'self_info' ] if not os.path.isfile(fname): raise IOError("Alignment file does not exist.") if verbose > 1: print('Extracting column statistics') self_info, part_entr, seq = process_a3m(fname) seq_dict = {'seq': seq, 'part_entr': part_entr, 'self_info': self_info} original_length = seq.shape[-2] if verbose > 1: print('Computing mutual information') a3m_ali = load_a3m(fname) mi_dict = compute_mi_scores(a3m_ali) if verbose > 1: print('Running GaussDCA') gdca_dict = gaussdca.run(fname) feat_dict = {} for feat in feat_lst: if feat == 'gdca': x_i = gdca_dict['gdca_corr'] x_i = x_i[..., None] elif feat in ['cross_h', 'nmi_corr', 'mi_corr']: x_i = mi_dict[feat] x_i = x_i[..., None] elif feat in ['seq', 'part_entr', 'self_info']: x_i = seq_dict[feat] else: raise ValueError('Unkown feature {}'.format(feat)) L = x_i.shape[0] x_i = _pad(x_i) feat_dict[feat] = x_i[None, ...] mask = np.ones((L, L)) mask = mask[..., None] # reshape from (L,L) to (L,L,1) mask = _pad(mask) feat_dict['mask'] = mask[None, ...] # reshape from (L,L,1) to (1,L,L,1) return feat_dict, original_length, gdca_dict['eff_seq']
def test_all_small_auto(): result = gaussdca.run(os.path.join(base_path, 'data/small.a3m')) N = 53 # Verify shape assert result['gdca'].shape == (N, N) assert result['gdca_corr'].shape == (N, N) # Verify diagonal assert result['gdca'].diagonal().sum() == 0. assert result['gdca_corr'].diagonal().sum() == 0. # Check symmetry: sym = result['gdca'] - result['gdca'].T assert sym.max() == 0 assert sym.min() == 0 sym = result['gdca_corr'] - result['gdca_corr'].T assert sym.max() == 0 assert sym.min() == 0 # Verify sequence reweiting assert result['seq'] == 13279 assert abs(result['eff_seq'] - 2392.921) < 0.1 # This number is a bit fuzzy from run to run
def gen_feat(fasta_file_name,aln_file_name): if not os.path.isfile(aln_file_name): raise IOError("Alignment file does not exist.") tmpdir = tempfile.mkdtemp() + "/" target = fasta_file_name.split("/")[-1].split(".")[0] base_path = os.path.dirname(os.path.abspath(__file__)) alnstats = base_path + "/lib/alnstats" os.system("chmod +x " + alnstats) os.system("cp " + aln_file_name + " " + tmpdir + "target.aln") colstats_file_name = tmpdir + target + ".colstats" pairstats_file_name = tmpdir + target + ".pairstats" os.system(alnstats + " " + aln_file_name + " " + colstats_file_name + " " + pairstats_file_name) feat_file_name = tmpdir + target + ".feat" feat_file = open(feat_file_name,"w") #################################################################################################### seq = linecache.getline(fasta_file_name, 2).strip().lower() feat_file.write("# Sequence Length (log)"+"\n") feat_file.write(str(round(log(len(seq)),6))+"\n") feat_file.write("# alignment-count (log)"+"\n") feat_file.write(str(round(log(int(linecache.getline(colstats_file_name, 2).strip())),6))+"\n") feat_file.write("# effective-alignment-count (log)"+"\n") feat_file.write(str(round(log(float(linecache.getline(colstats_file_name, 3).strip())),6))+"\n") #################################################################################################### feat_file.write("# AA composition"+"\n") ass = "ACDEFGHIKLMNPQRSTVWY" for s in ass: feat_file.write(str(round((float(seq.count(s.lower()))/len(seq)),6))+"\n") #################################################################################################### feat_file.write("# Atchley factors"+"\n") factor = { "A" : [-0.591,-1.302,-0.733,1.570,-0.146], "C" : [-1.343,0.465,-0.862,-1.020,-0.255], "D" : [1.050,0.302,-3.656,-0.259,-3.242], "E" : [1.357,-1.453,1.477,0.113,-0.837], "F" : [-1.006,-0.590,1.891,-0.397,0.412], "G" : [-0.384,1.652,1.330,1.045,2.064], "H" : [0.336,-0.417,-1.673,-1.474,-0.078], "I" : [-1.239,-0.547,2.131,0.393,0.816], "K" : [1.831,-0.561,0.533,-0.277,1.648], "L" : [-1.019,-0.987,-1.505,1.266,-0.912], "M" : [-0.663,-1.524,2.219,-1.005,1.212], "N" : [0.945,0.828,1.299,-0.169,0.933], "P" : [0.189,2.081,-1.628,0.421,-1.392], "Q" : [0.931,-0.179,-3.005,-0.503,-1.853], "R" : [1.538,-0.055,1.502,0.440,2.897], "S" : [-0.228,1.399,-4.760,0.670,-2.647], "T" : [-0.032,0.326,2.213,0.908,1.313], "V" : [-1.337,-0.279,-0.544,1.242,-1.262], "W" : [-0.595,0.009,0.672,-2.128,-0.184], "Y" : [0.260,0.830,3.097,-0.838,1.512] } for i in range(5): factors = np.empty((len(seq))) for j in range(len(seq)): if seq[j].upper() in factor: factors[j] = factor[seq[j].upper()][i] else: factors[j] = 0 for j in range(len(seq)): feat_file.write(str(round(factors[j],6))+' ') feat_file.write("\n") #################################################################################################### feat_file.write("# Relative sequence separation"+"\n") for i in range(len(seq)): for j in range(len(seq)): feat_file.write(str(round(abs(i-j)/float(len(seq)),6))+' ') feat_file.write("\n") #################################################################################################### feat_file.write("# Sequence separation 5-"+"\n") for i in range(len(seq)): for j in range(len(seq)): if abs(i-j)<5: feat_file.write("1 ") else: feat_file.write("0 ") feat_file.write("\n") feat_file.write("# Sequence separation =5"+"\n") for i in range(len(seq)): for j in range(len(seq)): if abs(i-j)==5: feat_file.write("1 ") else: feat_file.write("0 ") feat_file.write("\n") feat_file.write("# Sequence separation =6"+"\n") for i in range(len(seq)): for j in range(len(seq)): if abs(i-j)==6: feat_file.write("1 ") else: feat_file.write("0 ") feat_file.write("\n") feat_file.write("# Sequence separation =7"+"\n") for i in range(len(seq)): for j in range(len(seq)): if abs(i-j)==7: feat_file.write("1 ") else: feat_file.write("0 ") feat_file.write("\n") feat_file.write("# Sequence separation =8"+"\n") for i in range(len(seq)): for j in range(len(seq)): if abs(i-j)==8: feat_file.write("1 ") else: feat_file.write("0 ") feat_file.write("\n") feat_file.write("# Sequence separation =9"+"\n") for i in range(len(seq)): for j in range(len(seq)): if abs(i-j)==9: feat_file.write("1 ") else: feat_file.write("0 ") feat_file.write("\n") feat_file.write("# Sequence separation =10"+"\n") for i in range(len(seq)): for j in range(len(seq)): if abs(i-j)==10: feat_file.write("1 ") else: feat_file.write("0 ") feat_file.write("\n") feat_file.write("# Sequence separation =11"+"\n") for i in range(len(seq)): for j in range(len(seq)): if abs(i-j)==11: feat_file.write("1 ") else: feat_file.write("0 ") feat_file.write("\n") feat_file.write("# Sequence separation =12"+"\n") for i in range(len(seq)): for j in range(len(seq)): if abs(i-j)==12: feat_file.write("1 ") else: feat_file.write("0 ") feat_file.write("\n") feat_file.write("# Sequence separation =13"+"\n") for i in range(len(seq)): for j in range(len(seq)): if abs(i-j)==13: feat_file.write("1 ") else: feat_file.write("0 ") feat_file.write("\n") feat_file.write("# Sequence separation between 14 and 18"+"\n") for i in range(len(seq)): for j in range(len(seq)): if abs(i-j)>=14 and abs(i-j)<18: feat_file.write("1 ") else: feat_file.write("0 ") feat_file.write("\n") feat_file.write("# Sequence separation between 18 and 23"+"\n") for i in range(len(seq)): for j in range(len(seq)): if abs(i-j)>=18 and abs(i-j)<23: feat_file.write("1 ") else: feat_file.write("0 ") feat_file.write("\n") feat_file.write("# Sequence separation between 23 and 28"+"\n") for i in range(len(seq)): for j in range(len(seq)): if abs(i-j)>=23 and abs(i-j)<28: feat_file.write("1 ") else: feat_file.write("0 ") feat_file.write("\n") feat_file.write("# Sequence separation between 28 and 38"+"\n") for i in range(len(seq)): for j in range(len(seq)): if abs(i-j)>=28 and abs(i-j)<38: feat_file.write("1 ") else: feat_file.write("0 ") feat_file.write("\n") feat_file.write("# Sequence separation between 38 and 48"+"\n") for i in range(len(seq)): for j in range(len(seq)): if abs(i-j)>=38 and abs(i-j)<48: feat_file.write("1 ") else: feat_file.write("0 ") feat_file.write("\n") feat_file.write("# Sequence separation 48+"+"\n") for i in range(len(seq)): for j in range(len(seq)): if abs(i-j)>=48: feat_file.write("1 ") else: feat_file.write("0 ") feat_file.write("\n") #################################################################################################### feat_file.write("# pref score"+"\n") def residue_residue_contacts(a,b): a = a.upper() b = b.upper() alphabet = "IVLFCMAGTSWYPHEQDNKR" i_index = alphabet.find(a) j_index = alphabet.find(b) if i_index < 0 or j_index < 0: i_index = 10 j_index = 10 preference_score = [[.78,1.52,1.37,.82,.23,.50,1.74,1.55,1.09,.87,.32,.77,1.10,.35,.73,.56,.70,.56,.49,.60], [0,1.79,1.93,1.09,.46,.63,2.52,1.82,1.60,1.84,.23,.81,1.56,.52,1.13,.79,.99,.82,1.00,1.01], [0,0,1.80,1.10,.45,.76,2.56,1.78,1.30,1.43,.43,.83,1.38,.74,1.07,.81,.85,.99,.72,1.18], [0,0,0,.62,.27,.38,1.36,1.01,.88,.78,.22,.61,1.04,.27,.51,.49,.39,.60,.40,.53], [0,0,0,0,.43,.11,.61,.59,.33,.59,.06,.18,.47,.20,.30,.16,.21,.17,.18,.23], [0,0,0,0,0,.28,.72,.75,.41,.74,.11,.30,.53,.22,.40,.30,.21,.31,.27,.27], [0,0,0,0,0,0,2.28,2.45,2.03,2.15,.47,1.06,1.95,.83,1.47,1.03,1.52,1.63,1.08,1.10], [0,0,0,0,0,0,0,1.92,2.31,1.98,.43,1.15,1.88,.84,1.16,1.17,1.65,1.40,1.29,1.47], [0,0,0,0,0,0,0,0,1.23,1.82,.42,.42,.74,1.62,.51,1.15,.63,1.71,.92,1.01], [0,0,0,0,0,0,0,0,0,1.47,.32,.78,1.53,.42,1.38,.84,1.76,1.27,.95,1.04], [0,0,0,0,0,0,0,0,0,0,.07,.21,.76,.17,.11,.08,.18,.21,.21,.43], [0,0,0,0,0,0,0,0,0,0,0,.55,.91,.43,.66,.26,.41,.60,.52,.56], [0,0,0,0,0,0,0,0,0,0,0,0,.97,.51,1.18,.89,.94,1.29,.90,1.02], [0,0,0,0,0,0,0,0,0,0,0,0,0,.28,.30,.31,.69,.34,.22,.39], [0,0,0,0,0,0,0,0,0,0,0,0,0,0,.56,.42,.46,.79,.87,1.03], [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,.36,.67,.66,.40,.54], [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,.55,1.22,.74,1.01], [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,.93,.59,.74], [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,.36,.31], [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,.83]] if i_index < j_index: return preference_score[i_index][j_index] else: return preference_score[j_index][i_index] for i in range(len(seq)): for j in range(len(seq)): feat_file.write(str(round((residue_residue_contacts(seq[i],seq[j]) - 0.06)/2.5,6))+' ') feat_file.write("\n") #################################################################################################### feat_file.write("# scld lu con pot"+"\n") def lu_contact_potential(a,b): a = a.upper() b = b.upper() alphabet = "GASCVTIPMDNLKEQRHFYW" i_index = alphabet.find(a) j_index = alphabet.find(b) if i_index < 0 or j_index < 0: i_index = 0 j_index = 16 contact_potential = [[0.4,0.4,0.7,-1.0,-0.1,0.6,-0.4,0.6,-0.4,1.3,0.7,-0.4,1.1,1.3,0.6,0.1,-0.0,-0.8,-0.8,-1.0], [0,-0.4,0.4,-1.2,-0.7,0.4,-0.9,0.5,-0.9,1.2,0.5,-1.0,1.3,1.1,0.5,-0.0,-0.1,-1.2,-1.0,-0.9], [0,0,0.1,-1.2,-0.2,0.5,-0.6,0.6,-0.2,0.9,0.6,-0.5,1.1,0.8,0.6,0.1,-0.4,-0.9,-0.7,-0.8], [0,0,0,-4.4,-1.9,-0.8,-2.2,-1.0,-2.2,0.0,-0.3,-2.0,-0.2,0.2,-0.8,-1.6,-1.9,-2.4,-2.3,-2.6], [0,0,0,0,-2.0,-0.5,-1.9,-0.3,-1.7,0.4,-0.2,-1.9,0.3,0.4,-0.3,-0.8,-0.9,-2.1,-1.8,-2.0], [0,0,0,0,0,-0.1,-0.8,0.6,-0.5,1.0,0.5,-0.7,0.8,1.0,0.5,0.1,-0.3,-0.9,-0.7,-1.1], [0,0,0,0,0,0,-2.7,-0.6,-2.0,0.3,-0.3,-2.3,-0.0,-0.0,-0.6,-1.1,-1.3,-2.4,-2.0,-2.2], [0,0,0,0,0,0,0,0.1,-0.8,1.3,0.8,-0.8,1.1,1.3,0.4,0.1,0.1,-0.8,-1.0,-1.0], [0,0,0,0,0,0,0,0,-2.9,0.3,-0.4,-2.2,-0.0,-0.0,-0.7,-0.7,-1.3,-2.4,-2.1,-2.6], [0,0,0,0,0,0,0,0,0,1.1,1.0,0.2,0.7,1.9,0.9,-0.2,0.3,0.1,-0.4,-0.2], [0,0,0,0,0,0,0,0,0,0,0.3,-0.3,1.0,1.1,0.6,0.1,0.3,-0.7,-0.5,-0.6], [0,0,0,0,0,0,0,0,0,0,0,-2.7,0.0,-0.0,-0.6,-1.0,-1.1,-2.3,-2.1,-2.4], [0,0,0,0,0,0,0,0,0,0,0,0,1.6,0.8,1.0,0.8,0.7,-0.2,-0.2,-0.2], [0,0,0,0,0,0,0,0,0,0,0,0,0,1.2,1.1,0.0,0.2,-0.1,-0.4,0.0], [0,0,0,0,0,0,0,0,0,0,0,0,0,0,-0.2,0.1,-0.0,-0.9,-0.6,-1.2], [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-0.6,-0.4,-1.2,-1.3,-1.4], [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.3,-1.4,-1.6,-1.9], [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-3.0,-2.3,-2.5], [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-2.3,-2.4], [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-3.3]] if i_index < j_index: return contact_potential[i_index][j_index] else: return contact_potential[j_index][i_index] for i in range(len(seq)): for j in range(len(seq)): feat_file.write(str(round((lu_contact_potential(seq[i],seq[j]) + 4.4)/6.3,6))+' ') feat_file.write("\n") #################################################################################################### feat_file.write("# levitt con pot"+"\n") def levitt_contact_potential(a,b): a = a.upper() b = b.upper() alphabet = "GAVLIPDENQKRSTMCYWHF" i_index = alphabet.find(a) j_index = alphabet.find(b) if i_index < 0 or j_index < 0: i_index = 0 j_index = 4 contact_potential = [[.1,.7,.1,.1,0,.5,.4,.6,.1,0,.4,-0.1,.4,.2,-0.1,-0.1,-0.4,-0.7,0,-0.3], [0,.5,-0.3,-0.4,-0.4,.6,.3,.6,.3,0,1.0,.2,.5,0,-0.5,.3,-0.7,-0.8,0,-0.8], [0,0,1.1,-1.2,-1.2,0,.4,0,0,-0.4,0.1,-0.5,0,-0.3,-1.0,-0.5,-1.2,-1.6,-0.5,-1.5], [0,0,0,-1.4,-1.4,-0.1,0,-0.1,-0.1,-0.6,0.1,-0.6,0,-0.3,-1.3,-0.8,-1.4,-1.7,-0.7,-1.6], [0,0,0,0,-1.5,-0.1,0,-0.2,-0.1,-0.4,0,-0.7,-0.1,-0.6,-1.4,-0.8,-1.4,-1.8,-0.8,-1.7], [0,0,0,0,0,.1,.1,.1,-0.1,-0.3,.6,-0.2,.2,0,-0.5,0,-1.0,-1.3,-0.4,-0.7], [0,0,0,0,0,0,0,0,-0.6,-0.3,-1.0,-1.4,-0.3,-0.3,0.1,0,-1.0,-0.6,-1.1,-0.3], [0,0,0,0,0,0,0,0.1,-0.6,-0.4,-1.1,-1.5,-0.2,-0.3,-0.3,0.1,-1.0,-0.8,-1.0,-0.5], [0,0,0,0,0,0,0,0,-0.7,-0.7,-0.3,-0.8,-0.1,-0.4,-0.3,0,-0.8,-0.8,-0.8,-0.6], [0,0,0,0,0,0,0,0,0,-0.5,-0.4,-0.9,0,-0.5,-0.6,-0.2,-1.1,-1.0,-0.5,-0.8], [0,0,0,0,0,0,0,0,0,0,.7,.1,.1,0,-0.1,.5,-1.0,-0.8,0,-0.4], [0,0,0,0,0,0,0,0,0,0,0,-0.9,-0.4,-0.6,-0.5,0,-1.4,-1.3,-1.0,-0.9], [0,0,0,0,0,0,0,0,0,0,0,0,0,-0.2,-0.1,-0.1,-0.6,-0.6,-0.6,-0.4], [0,0,0,0,0,0,0,0,0,0,0,0,0,-0.5,-0.6,-0.3,-0.8,-0.9,-0.7,-0.7], [0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.5,-0.8,-1.5,-2.0,-0.9,-1.9], [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-2.7,-0.8,-1.3,-0.6,-1.2], [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.6,-1.8,-1.5,-1.7], [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-2.2,-1.5,-2.0], [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.6,-1.2], [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-2.0]] if i_index < j_index: return contact_potential[i_index][j_index] else: return contact_potential[j_index][i_index] for i in range(len(seq)): for j in range(len(seq)): feat_file.write(str(round((levitt_contact_potential(seq[i],seq[j]) + 2.7)/3.8,6))+' ') feat_file.write("\n") #################################################################################################### feat_file.write("# braun con pot"+"\n") def braun_contact_potential(a,b): a = a.upper() b = b.upper() alphabet = "GAVLIFYWMCPSTNQHKRDE" i_index = alphabet.find(a) j_index = alphabet.find(b) if i_index < 0 or j_index < 0: i_index = 11 j_index = 0 contact_potential = [[-0.29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], [-0.14,-0.18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], [-0.10,-0.15,-0.48,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], [-0.04,-0.24,-0.29,-0.43,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], [0.27,-0.25,-0.31,-0.45,-0.48,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], [-0.09,-0.16,-0.31,-0.28,-0.05,-0.50,0,0,0,0,0,0,0,0,0,0,0,0,0,0], [-0.21,-0.18,0.00,-0.10,-0.34,-0.27,-0.11,0,0,0,0,0,0,0,0,0,0,0,0,0], [-0.34,-0.01,0.18,-0.18,-0.28,0.16,-0.30,-0.53,0,0,0,0,0,0,0,0,0,0,0,0], [0.25,-0.02,-0.02,-0.32,0.21,-0.36,0.01,-0.73,-0.75,0,0,0,0,0,0,0,0,0,0,0], [-0.42,0.08,0.08,0.36,-0.16,-0.28,0.69,-0.74,0.27,-1.77,0,0,0,0,0,0,0,0,0,0], [0.06,0.28,0.76,0.30,0.99,0.65,-0.02,0.70,-0.78,0.31,-0.78,0,0,0,0,0,0,0,0,0], [0.04,0.38,0.18,0.30,0.57,0.15,-0.03,0.44,0.00,0.12,0.21,-0.68,0,0,0,0,0,0,0,0], [0.28,0.06,0.19,0.57,0.34,0.25,0.23,0.74,0.43,0.28,0.04,-0.23,-0.58,0,0,0,0,0,0,0], [0.49,-0.04,0.48,0.25,1.45,0.12,-0.14,0.46,-0.52,0.07,0.59,-0.21,-0.06,-0.45,0,0,0,0,0,0], [0.54,0.35,0.41,0.35,0.44,-0.04,-0.06,-0.09,0.07,0.39,0.73,0.19,-0.31,0.20,-0.17,0,0,0,0,0], [-0.09,0.44,0.37,0.10,0.24,0.25,0.33,-0.34,1.07,-0.45,-0.21,-0.13,-0.22,-0.56,0.28,-0.15,0,0,0,0], [0.56,0.28,0.53,0.37,-0.00,0.75,-0.00,0.02,0.44,0.68,0.26,-0.05,-0.26,-0.27,0.05,0.57,0.21,0,0,0], [0.40,0.59,0.43,0.37,0.05,0.31,0.03,-0.20,0.53,0.92,0.34,0.24,-0.31,-0.00,0.56,-0.11,0.58,-0.03,0,0], [-0.26,0.24,0.51,0.80,0.26,0.33,0.61,0.74,0.21,0.53,0.87,-0.03,0.32,-0.43,-0.03,-0.61,-0.43,-0.79,0.11,0], [0.21,0.53,0.37,0.51,0.53,0.38,0.25,1.37,0.44,0.17,0.41,0.10,0.27,0.76,-0.20,-0.14,-1.12,-0.85,0.86,0.5]] if i_index > j_index: return contact_potential[i_index][j_index] else: return contact_potential[j_index][i_index] for i in range(len(seq)): for j in range(len(seq)): feat_file.write(str(round((braun_contact_potential(seq[i],seq[j]) + 1.77)/3.22,6))+' ') feat_file.write("\n") #################################################################################################### feat_file.write("# Shannon entropy sum"+"\n") def remove_empty(l): while '' in l: l.remove('') return l for i in range(len(seq)): feat_file.write(str(round(float(remove_empty(linecache.getline(colstats_file_name, i+5).strip().split(" "))[-1]),6))+' ') feat_file.write("\n") #################################################################################################### feat_file.write("# pstat_pots"+"\n") pstat_pots = np.zeros((len(seq),len(seq))) with open(pairstats_file_name,"r") as f: for line in f.readlines(): items = remove_empty(line.strip().split(" ")) if len(items) != 0: i = int(items[0]) - 1 j = int(items[1]) - 1 d = float(items[2]) pstat_pots[i,j] = d pstat_pots[j,i] = d for i in range(len(seq)): for j in range(len(seq)): feat_file.write(str(round(pstat_pots[i,j],6))+' ') feat_file.write("\n") #################################################################################################### feat_file.write("# pstat_mimt"+"\n") pstat_mimt = np.zeros((len(seq),len(seq))) with open(pairstats_file_name,"r") as f: for line in f.readlines(): items = remove_empty(line.strip().split(" ")) if len(items) != 0: i = int(items[0]) - 1 j = int(items[1]) - 1 d = float(items[3]) pstat_mimt[i,j] = d pstat_mimt[j,i] = d for i in range(len(seq)): for j in range(len(seq)): feat_file.write(str(round(pstat_mimt[i,j],6))+' ') feat_file.write("\n") #################################################################################################### feat_file.write("# pstat_mip"+"\n") pstat_mip = np.zeros((len(seq),len(seq))) with open(pairstats_file_name,"r") as f: for line in f.readlines(): items = remove_empty(line.strip().split(" ")) if len(items) != 0: i = int(items[0]) - 1 j = int(items[1]) - 1 d = float(items[4]) pstat_mip[i,j] = d pstat_mip[j,i] = d for i in range(len(seq)): for j in range(len(seq)): feat_file.write(str(round(pstat_mip[i,j],6))+' ') feat_file.write("\n") #################################################################################################### feat_file.write("# guassdca"+"\n") gdca_dict = gaussdca.run(aln_file_name) x_i = gdca_dict['gdca_corr'] for i in range(len(x_i)): for j in range(len(x_i)): feat_file.write(str(round(x_i[i][j],6))+' ') feat_file.write("\n") #################################################################################################### feat_file.close() return feat_file_name
import gaussdca import pylab as plt results = gaussdca.run('../../gaussdca/test/data/large.a3m') plt.imshow(results['gdca_corr']) plt.colorbar() plt.show()
import gaussdca import pylab as plt results = gaussdca.run('../../gaussdca/test/data/small.a3m') plt.imshow(results['gdca_corr']) plt.colorbar() plt.show()