Ejemplo n.º 1
0
def getfromDB(previous_smotif, current_ss, direction, database_cutoff):
    # print "previous_smotif: ", previous_smotif

    searched_smotifs = []
    for entry in previous_smotif:
        if 'smotif_def' == entry[0]:
            searched_smotifs = entry[-1]

    # ['smotif_def', [['helix', 6, 7, 5, 145, 150], ['helix', 23, 5, 1, 156, 178], ['strand', 5, 7, 8, 133, 137]]]

    if direction == 'left':
        previous_ss = searched_smotifs[0]
    else:
        previous_ss = searched_smotifs[-1]

    # print "previous_ss: ", previous_ss
    # print "current_ss : ", current_ss

    if direction == 'left':  # double check this implementation

        smotif_def = sm.getSmotif(current_ss, previous_ss)
    else:
        smotif_def = sm.getSmotif(previous_ss, current_ss)

    return sm.readSmotifDatabase(smotif_def, database_cutoff), smotif_def
Ejemplo n.º 2
0
def getfromDB(previous_smotif, current_ss, direction, database_cutoff):
    """
    :param database_cutoff:
    :param previous_smotif:
    :param current_ss:
    :param direction:
    :return:
    """

    for entry in previous_smotif:
        if 'smotif_def' == entry[0]:
            psmotif = entry[-1]

    if direction == 'left':
        previous_ss = psmotif[0]
    else:
        previous_ss = psmotif[1]

    # current_ss, previous_ss
    if direction == 'left':  # double check this implementation
        smotif_def = sm.getSmotif(current_ss, previous_ss)
    else:
        smotif_def = sm.getSmotif(previous_ss, current_ss)

    return sm.readSmotifDatabase(smotif_def, database_cutoff), smotif_def
Ejemplo n.º 3
0
def getfromDB(pair, sse_ordered, database_cutoff):
    from utility.smotif_util import getSmotif, readSmotifDatabase
    s1 = sse_ordered[pair[0]]
    s2 = sse_ordered[pair[1]]
    smotif_def = getSmotif(s1, s2)
    return readSmotifDatabase(smotif_def,
                              database_cutoff), sse_ordered, smotif_def
Ejemplo n.º 4
0
def getfromDB(previous_smotif, current_ss, direction, database_cutoff, stage,
              alt_smotif_def):
    """

    :param previous_smotif:
    :param current_ss:
    :param direction:
    :param database_cutoff:
    :param stage:
    :param alt_smotif_def:
    :return:
    """
    if stage == 2:

        previous_sse_index = previous_smotif[0][2]
        psmotif = previous_smotif[1][-1]

        if direction == 'left':
            previous_sse = alt_smotif_def[1]
            previous_ss_index = previous_sse_index.index(previous_sse)
            previous_ss = psmotif[previous_ss_index]
        else:
            previous_sse = alt_smotif_def[0]
            previous_ss_index = previous_sse_index.index(previous_sse)
            previous_ss = psmotif[previous_ss_index]
    else:
        searched_smotifs = (previous_smotif[1][1])[:]
        previous_sse_index = (previous_smotif[1][2])[:]

        if direction == 'left':
            previous_ss_def = previous_sse_index.index(alt_smotif_def[1])
        else:
            previous_ss_def = previous_sse_index.index(alt_smotif_def[0])
        previous_ss = searched_smotifs[previous_ss_def]
        # print "Get correct db:", searched_smotifs, previous_sse_index
        # print alt_smotif_def, previous_ss_def

    # current_ss, previous_ss
    if direction == 'left':  # double check this implementation
        smotif_def = sm.getSmotif(current_ss, previous_ss)
    else:
        smotif_def = sm.getSmotif(previous_ss, current_ss)

    return sm.readSmotifDatabase(smotif_def, database_cutoff), smotif_def
Ejemplo n.º 5
0
def SmotifSearch(index_array):
    """
    Main ()
    :param index_array:
    :return:
    """

    # print index_array
    s1_def, s2_def = getSSdef(index_array)
    smotif_def = sm.getSmotif(s1_def, s2_def)
    # print s1_def, s2_def

    exp_data = io.readPickle("exp_data.pickle")
    exp_data_types = exp_data.keys(
    )  # ['ss_seq', 'pcs_data', 'aa_seq', 'contacts']

    smotif_data = sm.readSmotifDatabase(smotif_def,
                                        exp_data['database_cutoff'])

    if not smotif_data:
        # If the smotif library doesn't exist, terminate further execution.
        return True

    dump_log = []
    # ************************************************************************************************
    # Main
    # The 'for' loop below iterates over all of the Smotifs and applies various filters
    # This is the place to add new filters as you desire. For starters, look at Sequence filter.
    # ************************************************************************************************

    for i in range(0, len(smotif_data)):
        # loop over for all of the entries in the smotif_db file

        # ************************************************
        # Excluding the natives
        # ************************************************

        if 'natives' in exp_data_types:
            natives = exp_data['natives']
            tpdbid = smotif_data[i][0][0]
            pdbid = tpdbid[0:4]
            if pdbid in natives:
                #if pdbid not in ['2z2i']:
                # Stop further execution, but, iterate.
                continue

        # ************************************************
        # Applying different filters to Smotifs
        # Prepare temp log array to save data at the end
        # ************************************************

        tlog, pcs_tensor_fits, rdc_tensor_fits, noe_fmeasure = [], [], [], []
        tlog.append(['smotif', smotif_data[i]])
        tlog.append(['smotif_def', [s1_def, s2_def]])
        tlog.append(['cathcodes', [smotif_data[i][0]]])

        # ************************************************
        # Sequence filter
        # Aligns the smotif seq to target seq and calculates
        # sequence identity and the alignment score
        # ************************************************

        smotif_seq, seq_identity, blosum62_score = \
            Sfilter.SequenceSimilarity(s1_def, s2_def, smotif_data[i], exp_data)
        tlog.append(['seq_filter', smotif_seq, seq_identity, blosum62_score])

        # ************************************************
        # Pseudocontact Shift filter
        # uses experimental PCS data to filter Smotifs
        # scoring based on normalised chisqr
        # ************************************************

        if 'pcs_data' in exp_data_types:
            pcs_tensor_fits = Pfilter.PCSAxRhFit(s1_def, s2_def,
                                                 smotif_data[i], exp_data)
            tlog.append(['PCS_filter', pcs_tensor_fits])

        # ************************************************
        # Ambiguous NOE score filter
        # uses experimental ambiguous noe data to filter Smotifs
        # scoring based on f-measure?
        # ************************************************

        if 'noe_data' in exp_data_types:
            noe_fmeasure = Nfilter.s1NOEfit(s1_def, s2_def, smotif_data[i],
                                            exp_data)
            tlog.append(['NOE_filter', noe_fmeasure])

        # ************************************************
        # Residual dipolar coupling filter
        # uses experimental RDC data to filter Smotifs
        # scoring based on normalised chisqr
        # ************************************************

        if 'rdc_data' in exp_data_types:

            if noe_fmeasure and noe_fmeasure > 0.5:
                rdc_tensor_fits = Rfilter.RDCAxRhFit(s1_def, s2_def,
                                                     smotif_data[i], exp_data)
                tlog.append(['RDC_filter', rdc_tensor_fits])

        # Dump the data to the disk
        if pcs_tensor_fits or rdc_tensor_fits:
            # print smotif_data[i][0][0], "seq_id", seq_identity, "i=", i, "/", len(smotif_data)
            print tpdbid, noe_fmeasure, rdc_tensor_fits
            dump_log.append(tlog)

    # Save all of the hits in pickled arrays
    if dump_log:
        print "num of hits", len(dump_log)
        io.dumpPickle(
            '0_' + str(index_array[0]) + "_" + str(index_array[1]) + ".pickle",
            dump_log)

    return True
Ejemplo n.º 6
0
def S1SmotifSearch(task):
    """
    Main ()
    :param task:
    :return:
    """

    index_array = task[0]
    stage = task[1]
    s1_def, s2_def, sse_route = mutil.getSSdef(index_array)
    smotif_def = sm.getSmotif(s1_def, s2_def)
    exp_data = io.readPickle("exp_data.pickle")
    exp_data_types = exp_data.keys(
    )  # ['ss_seq', 'pcs_data', 'aa_seq', 'contacts']

    smotif_data = sm.readSmotifDatabase(smotif_def,
                                        exp_data['database_cutoff'])

    if not smotif_data:
        # If the smotif library doesn't exist, terminate further execution.
        return False

    dump_log = []

    # ************************************************************************************************
    # Main
    # The 'for' loop below iterates over all of the Smotifs and applies various filters
    # This is the place to add new filters as you desire. For starters, look at Sequence filter.
    # ************************************************************************************************

    for i in range(0, len(smotif_data)):

        # ************************************************
        # Excluding the natives
        # ************************************************

        natives = exp_data['natives']
        tpdbid = smotif_data[i][0][0]
        pdbid = tpdbid[0:4]

        if 'natives' in exp_data_types:
            if pdbid in natives:
                continue
                # Stop further execution, but, iterate.
            else:
                pass

        if 'homologs' in exp_data_types:  # Smotif assembly only from the specified pdb files
            homologs = exp_data['homologs']
            if pdbid not in homologs:
                # Stop further execution, but, iterate.
                continue
            else:
                pass

        # ************************************************
        # Applying different filters to Smotifs
        # Prepare temp log array to save data at the end
        # ************************************************

        tlog, pcs_tensor_fits, rdc_tensor_fits, = [], [], []
        ref_rmsd, noe_probability = 0.0, 0.0

        tlog.append(['smotif', smotif_data[i], sse_route])
        tlog.append(['smotif_def', [s1_def, s2_def]])
        tlog.append(['qcp_rmsd'])
        tlog.append(['cathcodes', [smotif_data[i][0]], [sse_route]])

        # ************************************************
        # Sequence filter
        # Aligns the smotif seq to target seq and calculates
        # sequence identity and the alignment score
        # ************************************************

        smotif_seq, seq_identity = Sfilter.getS1SeqIdentity(
            s1_def, s2_def, smotif_data[i], exp_data)
        tlog.append(['seq_filter', smotif_seq, seq_identity])

        # ************************************************
        # Unambiguous NOE score filter
        # uses experimental ambiguous noe data to filter Smotifs
        # scoring based on f-measure?
        # ************************************************

        if 'ilva_noes' in exp_data_types:

            noe_probability, no_of_noes, noe_energy, noe_data, cluster_protons, cluster_sidechains = noepdf.s1ILVApdf(
                s1_def, s2_def, smotif_data[i], exp_data, stage)

            if noe_probability >= exp_data['expected_noe_prob'][stage - 1]:
                tlog.append([
                    'NOE_filter', noe_probability, no_of_noes, noe_energy,
                    noe_data, cluster_protons, cluster_sidechains
                ])
            else:
                continue

        # ************************************************
        # Residual dipolar coupling filter
        # uses experimental RDC data to filter Smotifs
        # scoring based on normalised chisqr.
        # ************************************************

        if 'rdc_data' in exp_data_types:
            rdc_tensor_fits, log_likelihood, rdc_energy = Rfilter.RDCAxRhFit(
                s1_def, s2_def, smotif_data[i], exp_data)
            if rdc_tensor_fits:
                tlog.append([
                    'RDC_filter', rdc_tensor_fits, log_likelihood, rdc_energy
                ])
            else:
                continue

        # ************************************************
        # Pseudocontact Shift filter
        # uses experimental PCS data to filter Smotifs
        # scoring based on normalised chisqr
        # ************************************************

        if 'pcs_data' in exp_data_types:
            pcs_tensor_fits = Pfilter.PCSAxRhFit(s1_def, s2_def,
                                                 smotif_data[i], exp_data)
            tlog.append(['PCS_filter', pcs_tensor_fits])

        # ************************************************
        # Calc RMSD of the reference structure.
        # Used to identify the lowest possible RMSD
        # structure for the target, from the Smotif library.
        # ************************************************

        if 'reference_ca' in exp_data_types:
            ref_rmsd = ref.calcRefRMSD(exp_data['reference_ca'],
                                       s1_def,
                                       s2_def,
                                       smotif_data[i],
                                       rmsd_cutoff=100.0)
            tlog.append(['Ref_RMSD', ref_rmsd, seq_identity])

        # Dump the data to the disk
        if pcs_tensor_fits or noe_probability:
            dump_log.append(tlog)

    # Save all of the hits in pickled arrays
    if dump_log:
        if 'rank_top_hits' in exp_data_types:
            rank_top_hits = exp_data['rank_top_hits']
            num_hits = rank_top_hits[stage - 1]
            dump_log = rank.rank_assembly(dump_log, num_hits)
            print "Reducing the amount of data to:", rank_top_hits[
                stage - 1], len(dump_log)
        print "num of hits", len(dump_log)
        io.dumpGzipPickle(
            '0_' + str(index_array[0]) + "_" + str(index_array[1]) + ".gzip",
            dump_log)
        return dump_log
    else:
        return False
Ejemplo n.º 7
0
def loopConstraintAlt(coo_arrays, sse_order, direction):
    """

    :param coo_arrays:
    :param sse_order:
    :param direction:
    :param smotif_def:
    :return:
    """

    nsh_dict = [
        0, 3.809, 3.137, 2.818, 2.482, 2.154, 1.928, 1.749, 1.67, 1.531, 1.428,
        1.377, 1.282, 1.261, 1.203, 1.135, 1.045, 1.004, 1.02, 0.977, 0.928,
        0.865, 0.834, 0.811, 0.756, 0.761, 0.749, 0.777, 0.74, 0.655, 0.648
    ]
    nhs_dict = [
        0, 3.809, 3.137, 2.818, 2.482, 2.154, 1.928, 1.749, 1.67, 1.531, 1.428,
        1.377, 1.282, 1.261, 1.203, 1.135, 1.045, 1.004, 1.02, 0.977, 0.928,
        0.865, 0.834, 0.811, 0.756, 0.761, 0.749, 0.777, 0.74, 0.655, 0.648
    ]
    nhh_dict = [
        0, 3.81, 3.036, 2.836, 2.511, 2.275, 2.178, 2.026, 1.876, 1.835, 1.669,
        1.658, 1.666, 1.625, 1.53, 1.445, 1.374, 1.292, 1.212, 1.164, 1.133,
        1.049, 1.043, 1.074, 0.977, 0.965, 0.938, 0.868, 0.824, 0.805, 0.788
    ]
    nss_dict = [
        0, 3.81, 3.19, 1.846, 1.607, 1.274, 1.14, 1.139, 1.198, 1.177, 1.115,
        1.029, 1.048, 0.935, 0.91, 0.908, 0.85, 0.83, 0.852, 0.849, 0.761,
        0.722, 0.742, 0.684, 0.677, 0.611, 0.587, 0.596, 0.565, 0.576, 0.532
    ]
    hh_std = [
        0, 0.027, 0.284, 0.397, 0.441, 0.483, 0.499, 0.504, 0.537, 0.534,
        0.538, 0.545, 0.507, 0.494, 0.468, 0.447, 0.428, 0.439, 0.415, 0.432,
        0.392, 0.382, 0.38, 0.401, 0.381, 0.38, 0.317, 0.328, 0.304, 0.318,
        0.273
    ]
    ss_std = [
        0, 0.027, 0.313, 0.293, 0.469, 0.419, 0.474, 0.49, 0.505, 0.447, 0.501,
        0.475, 0.479, 0.417, 0.451, 0.416, 0.373, 0.395, 0.47, 0.418, 0.36,
        0.349, 0.359, 0.312, 0.302, 0.281, 0.279, 0.264, 0.259, 0.346, 0.257
    ]
    sh_std = [
        0, 0.067, 0.278, 0.361, 0.418, 0.45, 0.448, 0.455, 0.436, 0.452, 0.438,
        0.416, 0.407, 0.402, 0.411, 0.405, 0.381, 0.378, 0.373, 0.36, 0.372,
        0.338, 0.322, 0.308, 0.285, 0.289, 0.296, 0.298, 0.294, 0.286, 0.208
    ]
    hs_std = [
        0, 0.067, 0.278, 0.361, 0.418, 0.45, 0.448, 0.455, 0.436, 0.452, 0.438,
        0.416, 0.407, 0.402, 0.411, 0.405, 0.381, 0.378, 0.373, 0.36, 0.372,
        0.338, 0.322, 0.308, 0.285, 0.289, 0.296, 0.298, 0.294, 0.286, 0.208
    ]

    if direction == 'right':
        csse = sse_order[-1]
        psse = sse_order[-2]
        smotif_def = getSmotif(psse, csse)
        loop_length = csse[-2] - psse[-1]
        c_coo = getCAcoo(coo_arrays[-1])
        p_coo = getCAcoo(coo_arrays[-2])
        c_CA = [c_coo[0][0], c_coo[1][0], c_coo[2][0]]
        p_CA = [p_coo[0][-1], p_coo[1][-1], p_coo[2][-1]]

    else:
        csse = sse_order[0]
        psse = sse_order[1]
        smotif_def = getSmotif(csse, psse)
        loop_length = (psse[-2] - csse[-1])

        c_coo = getCAcoo(coo_arrays[0])
        p_coo = getCAcoo(coo_arrays[1])
        c_CA = [c_coo[0][-1], c_coo[1][-1], c_coo[2][-1]]
        p_CA = [p_coo[0][0], p_coo[1][0], p_coo[2][0]]

    dist = get_dist(c_CA, p_CA)

    if loop_length > 30.0 or loop_length == 0.0:
        return False

    Ndist = round(dist / float(loop_length), 2)

    stat_dist = 0
    stat_std = 0
    if smotif_def[0] == 'hh':
        stat_dist = nhh_dict[loop_length]
        stat_std = hh_std[loop_length]
    if smotif_def[0] == 'hs':
        stat_dist = nhs_dict[loop_length]
        stat_std = hs_std[loop_length]
    if smotif_def[0] == 'sh':
        stat_dist = nsh_dict[loop_length]
        stat_std = sh_std[loop_length]
    if smotif_def[0] == 'ss':
        stat_dist = nss_dict[loop_length]
        stat_std = ss_std[loop_length]

    stat_std = 3.0 * stat_std
    if stat_dist - stat_std <= Ndist <= stat_dist + stat_std:
        return True
    else:
        return False