def getfromDB(previous_smotif, current_ss, direction, database_cutoff): # print "previous_smotif: ", previous_smotif searched_smotifs = [] for entry in previous_smotif: if 'smotif_def' == entry[0]: searched_smotifs = entry[-1] # ['smotif_def', [['helix', 6, 7, 5, 145, 150], ['helix', 23, 5, 1, 156, 178], ['strand', 5, 7, 8, 133, 137]]] if direction == 'left': previous_ss = searched_smotifs[0] else: previous_ss = searched_smotifs[-1] # print "previous_ss: ", previous_ss # print "current_ss : ", current_ss if direction == 'left': # double check this implementation smotif_def = sm.getSmotif(current_ss, previous_ss) else: smotif_def = sm.getSmotif(previous_ss, current_ss) return sm.readSmotifDatabase(smotif_def, database_cutoff), smotif_def
def getfromDB(previous_smotif, current_ss, direction, database_cutoff): """ :param database_cutoff: :param previous_smotif: :param current_ss: :param direction: :return: """ for entry in previous_smotif: if 'smotif_def' == entry[0]: psmotif = entry[-1] if direction == 'left': previous_ss = psmotif[0] else: previous_ss = psmotif[1] # current_ss, previous_ss if direction == 'left': # double check this implementation smotif_def = sm.getSmotif(current_ss, previous_ss) else: smotif_def = sm.getSmotif(previous_ss, current_ss) return sm.readSmotifDatabase(smotif_def, database_cutoff), smotif_def
def getfromDB(pair, sse_ordered, database_cutoff): from utility.smotif_util import getSmotif, readSmotifDatabase s1 = sse_ordered[pair[0]] s2 = sse_ordered[pair[1]] smotif_def = getSmotif(s1, s2) return readSmotifDatabase(smotif_def, database_cutoff), sse_ordered, smotif_def
def getfromDB(previous_smotif, current_ss, direction, database_cutoff, stage, alt_smotif_def): """ :param previous_smotif: :param current_ss: :param direction: :param database_cutoff: :param stage: :param alt_smotif_def: :return: """ if stage == 2: previous_sse_index = previous_smotif[0][2] psmotif = previous_smotif[1][-1] if direction == 'left': previous_sse = alt_smotif_def[1] previous_ss_index = previous_sse_index.index(previous_sse) previous_ss = psmotif[previous_ss_index] else: previous_sse = alt_smotif_def[0] previous_ss_index = previous_sse_index.index(previous_sse) previous_ss = psmotif[previous_ss_index] else: searched_smotifs = (previous_smotif[1][1])[:] previous_sse_index = (previous_smotif[1][2])[:] if direction == 'left': previous_ss_def = previous_sse_index.index(alt_smotif_def[1]) else: previous_ss_def = previous_sse_index.index(alt_smotif_def[0]) previous_ss = searched_smotifs[previous_ss_def] # print "Get correct db:", searched_smotifs, previous_sse_index # print alt_smotif_def, previous_ss_def # current_ss, previous_ss if direction == 'left': # double check this implementation smotif_def = sm.getSmotif(current_ss, previous_ss) else: smotif_def = sm.getSmotif(previous_ss, current_ss) return sm.readSmotifDatabase(smotif_def, database_cutoff), smotif_def
def SmotifSearch(index_array): """ Main () :param index_array: :return: """ # print index_array s1_def, s2_def = getSSdef(index_array) smotif_def = sm.getSmotif(s1_def, s2_def) # print s1_def, s2_def exp_data = io.readPickle("exp_data.pickle") exp_data_types = exp_data.keys( ) # ['ss_seq', 'pcs_data', 'aa_seq', 'contacts'] smotif_data = sm.readSmotifDatabase(smotif_def, exp_data['database_cutoff']) if not smotif_data: # If the smotif library doesn't exist, terminate further execution. return True dump_log = [] # ************************************************************************************************ # Main # The 'for' loop below iterates over all of the Smotifs and applies various filters # This is the place to add new filters as you desire. For starters, look at Sequence filter. # ************************************************************************************************ for i in range(0, len(smotif_data)): # loop over for all of the entries in the smotif_db file # ************************************************ # Excluding the natives # ************************************************ if 'natives' in exp_data_types: natives = exp_data['natives'] tpdbid = smotif_data[i][0][0] pdbid = tpdbid[0:4] if pdbid in natives: #if pdbid not in ['2z2i']: # Stop further execution, but, iterate. continue # ************************************************ # Applying different filters to Smotifs # Prepare temp log array to save data at the end # ************************************************ tlog, pcs_tensor_fits, rdc_tensor_fits, noe_fmeasure = [], [], [], [] tlog.append(['smotif', smotif_data[i]]) tlog.append(['smotif_def', [s1_def, s2_def]]) tlog.append(['cathcodes', [smotif_data[i][0]]]) # ************************************************ # Sequence filter # Aligns the smotif seq to target seq and calculates # sequence identity and the alignment score # ************************************************ smotif_seq, seq_identity, blosum62_score = \ Sfilter.SequenceSimilarity(s1_def, s2_def, smotif_data[i], exp_data) tlog.append(['seq_filter', smotif_seq, seq_identity, blosum62_score]) # ************************************************ # Pseudocontact Shift filter # uses experimental PCS data to filter Smotifs # scoring based on normalised chisqr # ************************************************ if 'pcs_data' in exp_data_types: pcs_tensor_fits = Pfilter.PCSAxRhFit(s1_def, s2_def, smotif_data[i], exp_data) tlog.append(['PCS_filter', pcs_tensor_fits]) # ************************************************ # Ambiguous NOE score filter # uses experimental ambiguous noe data to filter Smotifs # scoring based on f-measure? # ************************************************ if 'noe_data' in exp_data_types: noe_fmeasure = Nfilter.s1NOEfit(s1_def, s2_def, smotif_data[i], exp_data) tlog.append(['NOE_filter', noe_fmeasure]) # ************************************************ # Residual dipolar coupling filter # uses experimental RDC data to filter Smotifs # scoring based on normalised chisqr # ************************************************ if 'rdc_data' in exp_data_types: if noe_fmeasure and noe_fmeasure > 0.5: rdc_tensor_fits = Rfilter.RDCAxRhFit(s1_def, s2_def, smotif_data[i], exp_data) tlog.append(['RDC_filter', rdc_tensor_fits]) # Dump the data to the disk if pcs_tensor_fits or rdc_tensor_fits: # print smotif_data[i][0][0], "seq_id", seq_identity, "i=", i, "/", len(smotif_data) print tpdbid, noe_fmeasure, rdc_tensor_fits dump_log.append(tlog) # Save all of the hits in pickled arrays if dump_log: print "num of hits", len(dump_log) io.dumpPickle( '0_' + str(index_array[0]) + "_" + str(index_array[1]) + ".pickle", dump_log) return True
def S1SmotifSearch(task): """ Main () :param task: :return: """ index_array = task[0] stage = task[1] s1_def, s2_def, sse_route = mutil.getSSdef(index_array) smotif_def = sm.getSmotif(s1_def, s2_def) exp_data = io.readPickle("exp_data.pickle") exp_data_types = exp_data.keys( ) # ['ss_seq', 'pcs_data', 'aa_seq', 'contacts'] smotif_data = sm.readSmotifDatabase(smotif_def, exp_data['database_cutoff']) if not smotif_data: # If the smotif library doesn't exist, terminate further execution. return False dump_log = [] # ************************************************************************************************ # Main # The 'for' loop below iterates over all of the Smotifs and applies various filters # This is the place to add new filters as you desire. For starters, look at Sequence filter. # ************************************************************************************************ for i in range(0, len(smotif_data)): # ************************************************ # Excluding the natives # ************************************************ natives = exp_data['natives'] tpdbid = smotif_data[i][0][0] pdbid = tpdbid[0:4] if 'natives' in exp_data_types: if pdbid in natives: continue # Stop further execution, but, iterate. else: pass if 'homologs' in exp_data_types: # Smotif assembly only from the specified pdb files homologs = exp_data['homologs'] if pdbid not in homologs: # Stop further execution, but, iterate. continue else: pass # ************************************************ # Applying different filters to Smotifs # Prepare temp log array to save data at the end # ************************************************ tlog, pcs_tensor_fits, rdc_tensor_fits, = [], [], [] ref_rmsd, noe_probability = 0.0, 0.0 tlog.append(['smotif', smotif_data[i], sse_route]) tlog.append(['smotif_def', [s1_def, s2_def]]) tlog.append(['qcp_rmsd']) tlog.append(['cathcodes', [smotif_data[i][0]], [sse_route]]) # ************************************************ # Sequence filter # Aligns the smotif seq to target seq and calculates # sequence identity and the alignment score # ************************************************ smotif_seq, seq_identity = Sfilter.getS1SeqIdentity( s1_def, s2_def, smotif_data[i], exp_data) tlog.append(['seq_filter', smotif_seq, seq_identity]) # ************************************************ # Unambiguous NOE score filter # uses experimental ambiguous noe data to filter Smotifs # scoring based on f-measure? # ************************************************ if 'ilva_noes' in exp_data_types: noe_probability, no_of_noes, noe_energy, noe_data, cluster_protons, cluster_sidechains = noepdf.s1ILVApdf( s1_def, s2_def, smotif_data[i], exp_data, stage) if noe_probability >= exp_data['expected_noe_prob'][stage - 1]: tlog.append([ 'NOE_filter', noe_probability, no_of_noes, noe_energy, noe_data, cluster_protons, cluster_sidechains ]) else: continue # ************************************************ # Residual dipolar coupling filter # uses experimental RDC data to filter Smotifs # scoring based on normalised chisqr. # ************************************************ if 'rdc_data' in exp_data_types: rdc_tensor_fits, log_likelihood, rdc_energy = Rfilter.RDCAxRhFit( s1_def, s2_def, smotif_data[i], exp_data) if rdc_tensor_fits: tlog.append([ 'RDC_filter', rdc_tensor_fits, log_likelihood, rdc_energy ]) else: continue # ************************************************ # Pseudocontact Shift filter # uses experimental PCS data to filter Smotifs # scoring based on normalised chisqr # ************************************************ if 'pcs_data' in exp_data_types: pcs_tensor_fits = Pfilter.PCSAxRhFit(s1_def, s2_def, smotif_data[i], exp_data) tlog.append(['PCS_filter', pcs_tensor_fits]) # ************************************************ # Calc RMSD of the reference structure. # Used to identify the lowest possible RMSD # structure for the target, from the Smotif library. # ************************************************ if 'reference_ca' in exp_data_types: ref_rmsd = ref.calcRefRMSD(exp_data['reference_ca'], s1_def, s2_def, smotif_data[i], rmsd_cutoff=100.0) tlog.append(['Ref_RMSD', ref_rmsd, seq_identity]) # Dump the data to the disk if pcs_tensor_fits or noe_probability: dump_log.append(tlog) # Save all of the hits in pickled arrays if dump_log: if 'rank_top_hits' in exp_data_types: rank_top_hits = exp_data['rank_top_hits'] num_hits = rank_top_hits[stage - 1] dump_log = rank.rank_assembly(dump_log, num_hits) print "Reducing the amount of data to:", rank_top_hits[ stage - 1], len(dump_log) print "num of hits", len(dump_log) io.dumpGzipPickle( '0_' + str(index_array[0]) + "_" + str(index_array[1]) + ".gzip", dump_log) return dump_log else: return False
def loopConstraintAlt(coo_arrays, sse_order, direction): """ :param coo_arrays: :param sse_order: :param direction: :param smotif_def: :return: """ nsh_dict = [ 0, 3.809, 3.137, 2.818, 2.482, 2.154, 1.928, 1.749, 1.67, 1.531, 1.428, 1.377, 1.282, 1.261, 1.203, 1.135, 1.045, 1.004, 1.02, 0.977, 0.928, 0.865, 0.834, 0.811, 0.756, 0.761, 0.749, 0.777, 0.74, 0.655, 0.648 ] nhs_dict = [ 0, 3.809, 3.137, 2.818, 2.482, 2.154, 1.928, 1.749, 1.67, 1.531, 1.428, 1.377, 1.282, 1.261, 1.203, 1.135, 1.045, 1.004, 1.02, 0.977, 0.928, 0.865, 0.834, 0.811, 0.756, 0.761, 0.749, 0.777, 0.74, 0.655, 0.648 ] nhh_dict = [ 0, 3.81, 3.036, 2.836, 2.511, 2.275, 2.178, 2.026, 1.876, 1.835, 1.669, 1.658, 1.666, 1.625, 1.53, 1.445, 1.374, 1.292, 1.212, 1.164, 1.133, 1.049, 1.043, 1.074, 0.977, 0.965, 0.938, 0.868, 0.824, 0.805, 0.788 ] nss_dict = [ 0, 3.81, 3.19, 1.846, 1.607, 1.274, 1.14, 1.139, 1.198, 1.177, 1.115, 1.029, 1.048, 0.935, 0.91, 0.908, 0.85, 0.83, 0.852, 0.849, 0.761, 0.722, 0.742, 0.684, 0.677, 0.611, 0.587, 0.596, 0.565, 0.576, 0.532 ] hh_std = [ 0, 0.027, 0.284, 0.397, 0.441, 0.483, 0.499, 0.504, 0.537, 0.534, 0.538, 0.545, 0.507, 0.494, 0.468, 0.447, 0.428, 0.439, 0.415, 0.432, 0.392, 0.382, 0.38, 0.401, 0.381, 0.38, 0.317, 0.328, 0.304, 0.318, 0.273 ] ss_std = [ 0, 0.027, 0.313, 0.293, 0.469, 0.419, 0.474, 0.49, 0.505, 0.447, 0.501, 0.475, 0.479, 0.417, 0.451, 0.416, 0.373, 0.395, 0.47, 0.418, 0.36, 0.349, 0.359, 0.312, 0.302, 0.281, 0.279, 0.264, 0.259, 0.346, 0.257 ] sh_std = [ 0, 0.067, 0.278, 0.361, 0.418, 0.45, 0.448, 0.455, 0.436, 0.452, 0.438, 0.416, 0.407, 0.402, 0.411, 0.405, 0.381, 0.378, 0.373, 0.36, 0.372, 0.338, 0.322, 0.308, 0.285, 0.289, 0.296, 0.298, 0.294, 0.286, 0.208 ] hs_std = [ 0, 0.067, 0.278, 0.361, 0.418, 0.45, 0.448, 0.455, 0.436, 0.452, 0.438, 0.416, 0.407, 0.402, 0.411, 0.405, 0.381, 0.378, 0.373, 0.36, 0.372, 0.338, 0.322, 0.308, 0.285, 0.289, 0.296, 0.298, 0.294, 0.286, 0.208 ] if direction == 'right': csse = sse_order[-1] psse = sse_order[-2] smotif_def = getSmotif(psse, csse) loop_length = csse[-2] - psse[-1] c_coo = getCAcoo(coo_arrays[-1]) p_coo = getCAcoo(coo_arrays[-2]) c_CA = [c_coo[0][0], c_coo[1][0], c_coo[2][0]] p_CA = [p_coo[0][-1], p_coo[1][-1], p_coo[2][-1]] else: csse = sse_order[0] psse = sse_order[1] smotif_def = getSmotif(csse, psse) loop_length = (psse[-2] - csse[-1]) c_coo = getCAcoo(coo_arrays[0]) p_coo = getCAcoo(coo_arrays[1]) c_CA = [c_coo[0][-1], c_coo[1][-1], c_coo[2][-1]] p_CA = [p_coo[0][0], p_coo[1][0], p_coo[2][0]] dist = get_dist(c_CA, p_CA) if loop_length > 30.0 or loop_length == 0.0: return False Ndist = round(dist / float(loop_length), 2) stat_dist = 0 stat_std = 0 if smotif_def[0] == 'hh': stat_dist = nhh_dict[loop_length] stat_std = hh_std[loop_length] if smotif_def[0] == 'hs': stat_dist = nhs_dict[loop_length] stat_std = hs_std[loop_length] if smotif_def[0] == 'sh': stat_dist = nsh_dict[loop_length] stat_std = sh_std[loop_length] if smotif_def[0] == 'ss': stat_dist = nss_dict[loop_length] stat_std = ss_std[loop_length] stat_std = 3.0 * stat_std if stat_dist - stat_std <= Ndist <= stat_dist + stat_std: return True else: return False