Example #1
0
def getRunSeqAlt(num_hits, stage, seq_index):
    """
    # Change to accommodate alternate smotif definitions
    generate run seq, a seq list of pairs of
    indexes of profiles for job scheduling
    """

    import warnings
    warnings.warn("this module (getRunSeqAlt) is deprecated",
                  DeprecationWarning,
                  stacklevel=2)

    map_route = []
    ss_profiles = io.readPickle("ss_profiles.pickle")
    if os.path.isfile("contacts_route.pickle"):
        map_route = io.readPickle("contacts_route.pickle")
    elif os.path.isfile("pcs_route.pickle"):
        map_route = io.readPickle("pcs_route.pickle")
    elif os.path.isfile("rdc_route.pickle"):
        map_route = io.readPickle("rdc_route.pickle")

    alt_smotif_defs = map_route[seq_index]

    try:
        next_index, next_smotif = getNextSmotif(map_route)
        print next_index, next_smotif
    except TypeError:
        return [999], 999

    direction = next_smotif[-1]
    if direction == 'left':
        next_ss_list = ss_profiles[next_smotif[0]]
    else:
        next_ss_list = ss_profiles[next_smotif[1]]

    # get and make a list of top 10(n) of the previous run
    # top_hits = makeTopPickle(next_index - 1, num_hits, stage)  # send the previous Smotif index

    top_hits = makeTopPickle2(next_index - 1, num_hits,
                              stage)  # send the previous Smotif index

    # delete two stages down pickled files
    # check_pickle = str(next_index - 2) + str("_*_*.pickle")
    check_pickle = str(next_index - 2) + str("_*_*.gzip")
    file_list = glob.glob(check_pickle)

    if len(file_list) > 10:
        remove = "rm " + check_pickle
        os.system(remove)

    if top_hits:
        run_seq = []
        for i in range(len(top_hits)):
            for j in range(len(next_ss_list)):
                run_seq.append([i, j])
        return run_seq, next_index
Example #2
0
def readSmotifDatabase(smotif, *database_cutoff):
    """

    :param smotif:
    :return:
    """
    # TODO option to parse in database path
    import os

    if database_cutoff:
        # ['', 'home', 'kalabharath', 'projects', 'boss-evo', 'zinr', 'utility']
        cwd = (os.path.dirname(os.path.realpath(__file__))).split("/")
        root_dir = ''
        for entry in cwd[:-2]:
            if entry == '':
                pass
            else:
                root_dir = root_dir + '/' + entry
        smotif_db_path = root_dir + '/databases/database_cutoff_' + database_cutoff[
            0] + '/'
    else:
        # Backwards compatible with my old code
        # I should retire this soon and keep everything clean

        smotif_db_path = "/short/xc4/kbp502/zinr/main/smotif_cen_db/"
        if os.path.exists(smotif_db_path):
            pass
        else:
            smotif_db_path = "/home/kalabharath/zinr/main/smotif_cen_db/"

    file_name = smotif[0] + "_" + str(smotif[1]) + "_" + str(smotif[2]) + ".db"
    fin = smotif_db_path + file_name
    smotif_data = io.readPickle(fin)
    return smotif_data
Example #3
0
def getSxILVARotamers(res_type, bbc, spin):
    import glob, os
    cwd = (os.path.dirname(os.path.realpath(__file__)))
    file_name = cwd + '/sidechainRotamers/' + res_type + "_sc.pickle"
    rotamers = io.readPickle(file_name)
    rmsd_cutoff = 0.1
    spin_coors, cluster_coors = bbRMSD.bbrmsd(bbc, rotamers, rmsd_cutoff, spin,
                                              res_type)
    return spin_coors, cluster_coors
Example #4
0
def getILVARotamers(res_type, bbc, spin):
    """

    :param res_type:
    :param bbc:
    :param spin:
    :return:
    """

    cwd = (os.path.dirname(os.path.realpath(__file__)))
    file_name = cwd + '/sidechainRotamers/' + res_type + "_sc.pickle"
    rotamers = io.readPickle(file_name)
    rmsd_cutoff = 0.1
    bbc = processBBC(bbc)
    spin_coors, cluster_coors = bbRMSD.bbrmsd(bbc, rotamers, rmsd_cutoff, spin,
                                              res_type)

    return spin_coors, cluster_coors
Example #5
0
        seq_correction = int(data['bmrb_sequence_correction'])
        noe_data, total_noe_count = nu.parseBMRBblockMR(
            data['noe_bmrb_mr'], data_dict['aa_seq'], seq_correction)
        data_dict['noe_data'] = [noe_data, total_noe_count]
else:
    pass

if 'noe_input_files' in datatypes:
    # noe_data, total_noe_count = nu.getNOEData(data['noe_input_files'], ss_seq)
    noe_data, total_noe_count = nu.parseNOEData(data['noe_input_files'])
    data_dict['noe_data'] = [noe_data, total_noe_count]
else:
    pass

if 'ilva_noes' in datatypes:
    ilva_noes = io.readPickle(data['ilva_noes'])
    data_dict['ilva_noes'] = ilva_noes

if 'pcs_broker' in datatypes:
    print data['pcs_broker']
    pcs_broker = data['pcs_broker']
    pcsdata = io.getPcsTagInfo(ss_seq, pcs_broker)
    data_dict['pcs_data'] = pcsdata
else:
    pass

if 'axrh_cutoff' in datatypes:
    axrh_cutoff = data['axrh_cutoff']
    axrh_cutoff = axrh_cutoff.split()
    axrh_cutoff = [float(i) for i in axrh_cutoff]
    data_dict['axrh_cutoff'] = axrh_cutoff
Example #6
0
                   % ('ATOM', i + 1, atom, res, 'A', res_no, " ", x, y, z, 1.0, 30.0, ' ', ' \n')
        outfile.write(pdb_line)
        # print pdb_line
    print 'TER'
    outfile.close()
    return True


smotif_db = glob.glob("/home/kalabharath/zinr/final_smotif_database/*.db")
print len(smotif_db)

for i in range(0, len(smotif_db)):
    # for i in range(0, 1):

    temp_smotif = []
    smotif = io.readPickle(smotif_db[i])
    print smotif_db[i]
    print len(smotif)
    print smotif_db[i][45:]
    for j in range(0, len(smotif)):
        # for j in range(0, 1):
        # print smotif[j][0]
        # print smotif[j][0][0] #['1pp9P00', '172', '203', '222', '245']
        # print smotif[j][0][1] #172- [203, 'THR', 'H', 49.366, 87.846, 102.07]]
        # print smotif[j][0][2] #222- [245, 'PHE', 'H', 40.016, 57.964, 78.166]]

        ss1 = rms.getcoo(
            smotif[j][0]
            [1])  # x,y,z, atom_type, res_no, res = [], [], [], [], [], []
        ss2 = rms.getcoo(
            smotif[j][0][2])  # return [x,y,z, atom_type, res_no, res]
Example #7
0
def SmotifSearch(index_array):
    """
    Main()
    :param index_array:
    :return:
    """

    exp_data = io.readPickle("exp_data.pickle")
    exp_data_types = exp_data.keys(
    )  # ['ss_seq', 'pcs_data', 'aa_seq', 'contacts']

    psmotif = uts2.getPreviousSmotif(index_array[0])

    current_ss, direction = uts2.getSS2(index_array[1])
    csmotif_data, smotif_def = getfromDB(psmotif, current_ss, direction,
                                         exp_data['database_cutoff'])

    if not csmotif_data:
        # If the smotif library doesn't exist
        # Terminate further execution
        return True
    """
    always narrow down to previous sse and current sse and operate on them individually
    """
    sse_ordered = orderSSE(psmotif, current_ss, direction)
    dump_log = []
    no_clashes = False

    # ************************************************************************************************
    # Main
    # The 'for' loop below iterates over all of the Smotifs and applies various filters
    # This is the place to add new filters as you desire. For starters, look at Sequence filter.
    # ************************************************************************************************

    for i in range(0, len(csmotif_data)):

        # ************************************************
        # Applying different filters for the Smotif assembly
        # ************************************************

        # Exclude natives if needed
        if 'natives' in exp_data_types:
            natives = exp_data['natives']
            tpdbid = csmotif_data[i][0][0]
            pdbid = tpdbid[0:4]
            if pdbid in natives:
                #if pdbid not in ['2z2i']:
                # Stop further execution and
                continue

        # ************************************************
        # RMSD filter using QCP method
        # quickly filters non-overlapping smotifs
        # ************************************************

        rmsd, transformed_coos = qcp.rmsdQCP(psmotif[0], csmotif_data[i],
                                             direction)

        if rmsd <= exp_data['rmsd_cutoff'][1]:
            # Loop constraint restricts the overlapping smotifs is not drifted far away.
            loop_constraint = llc.loopConstraint(transformed_coos, sse_ordered,
                                                 direction, smotif_def)
            if loop_constraint:
                # Check whether the SSEs with in the assembled smotifs are clashing to one another
                no_clashes = qcp.clahses(transformed_coos,
                                         exp_data['clash_distance'])
            else:
                no_clashes = False

        if rmsd <= exp_data['rmsd_cutoff'][1] and no_clashes:
            # Prepare temp log array to save data at the end
            tlog, noe_fmeasure, pcs_tensor_fits, rdc_tensor_fits = [], [], [], []

            tlog.append(['smotif', csmotif_data[i]])
            tlog.append(['smotif_def', sse_ordered])

            tlog.append(['qcp_rmsd', transformed_coos, sse_ordered, rmsd])
            cathcodes = sm.orderCATH(psmotif, csmotif_data[i][0], direction)
            tlog.append(['cathcodes', cathcodes])

            # ************************************************
            # Sequence filter
            # Aligns the smotif seq to target seq and calculates
            # sequence identity and the alignment score
            # ************************************************

            csse_seq, seq_identity, blosum62_score = Sfilter.S2SequenceSimilarity(
                current_ss, csmotif_data[i], direction, exp_data)

            # concat current to previous seq
            concat_seq = sm.orderSeq(psmotif, csse_seq, direction)

            tlog.append([
                'seq_filter', concat_seq, csse_seq, seq_identity,
                blosum62_score
            ])

            # ************************************************
            # Pseudocontact Shift filter
            # uses experimental PCS data to filter Smotifs
            # scoring based on normalised chisqr
            # ************************************************

            if 'pcs_data' in exp_data_types:
                pcs_tensor_fits = Pfilter.PCSAxRhFit2(transformed_coos,
                                                      sse_ordered,
                                                      exp_data,
                                                      stage=2)
                tlog.append(['PCS_filter', pcs_tensor_fits])

            # ************************************************
            # Ambiguous NOE score filter
            # uses experimental ambiguous noe data to filter Smotifs
            # scoring based on f-measure?
            # ************************************************

            if 'noe_data' in exp_data_types:
                noe_fmeasure = Nfilter.s2NOEfit(transformed_coos, sse_ordered,
                                                exp_data)
                tlog.append(['NOE_filter', noe_fmeasure])

            # ************************************************
            # Residual dipolar coupling filter
            # uses experimental RDC data to filter Smotifs
            # scoring based on normalised chisqr
            # ************************************************

            if 'rdc_data' in exp_data_types:
                if noe_fmeasure and noe_fmeasure > 0.5:
                    rdc_tensor_fits = Rfilter.RDCAxRhFit2(transformed_coos,
                                                          sse_ordered,
                                                          exp_data,
                                                          stage=2)
                    tlog.append(['RDC_filter', rdc_tensor_fits])

            if pcs_tensor_fits or rdc_tensor_fits:
                #dump data to the disk
                print tpdbid, noe_fmeasure, rdc_tensor_fits
                # print csmotif_data[i][0], 'blosum62 score', blosum62_score, "seq_id", seq_identity, "rmsd=", rmsd, cathcodes
                dump_log.append(tlog)

    # prevent dumping empty arrays with no data
    if len(dump_log) > 0:
        print "num of hits", len(dump_log),
        io.dumpPickle(
            "tx_" + str(index_array[0]) + "_" + str(index_array[1]) +
            ".pickle", dump_log)

    return True
Example #8
0
                # print pdb_line
    # print i, sse_seq[-1]-sse_seq[-2]
    # print 'TER'
    outfile.close()
    return True


import utility.stage2_util as util

seq = int(sys.argv[1])
num_hits = 50
stage = 4

# util.makeTopPickle(seq, num_hits, stage)

top_result = io.readPickle(str(seq) + "_tophits.pickle")

for p in range(0, len(top_result)):
    # for p in range(0,5):
    print 'model_', p,
    top_struct = top_result[p]

    import copy

    top_struct = copy.copy(top_struct)

    # print len(top_struct)

    sse_sequence = top_struct[1][1]

    # print sse_sequence
Example #9
0
def SmotifSearch(index_array):
    """
    Main ()
    :param index_array:
    :return:
    """

    # print index_array
    s1_def, s2_def = getSSdef(index_array)
    smotif_def = sm.getSmotif(s1_def, s2_def)
    # print s1_def, s2_def

    exp_data = io.readPickle("exp_data.pickle")
    exp_data_types = exp_data.keys(
    )  # ['ss_seq', 'pcs_data', 'aa_seq', 'contacts']

    smotif_data = sm.readSmotifDatabase(smotif_def,
                                        exp_data['database_cutoff'])

    if not smotif_data:
        # If the smotif library doesn't exist, terminate further execution.
        return True

    dump_log = []
    # ************************************************************************************************
    # Main
    # The 'for' loop below iterates over all of the Smotifs and applies various filters
    # This is the place to add new filters as you desire. For starters, look at Sequence filter.
    # ************************************************************************************************

    for i in range(0, len(smotif_data)):
        # loop over for all of the entries in the smotif_db file

        # ************************************************
        # Excluding the natives
        # ************************************************

        if 'natives' in exp_data_types:
            natives = exp_data['natives']
            tpdbid = smotif_data[i][0][0]
            pdbid = tpdbid[0:4]
            if pdbid in natives:
                #if pdbid not in ['2z2i']:
                # Stop further execution, but, iterate.
                continue

        # ************************************************
        # Applying different filters to Smotifs
        # Prepare temp log array to save data at the end
        # ************************************************

        tlog, pcs_tensor_fits, rdc_tensor_fits, noe_fmeasure = [], [], [], []
        tlog.append(['smotif', smotif_data[i]])
        tlog.append(['smotif_def', [s1_def, s2_def]])
        tlog.append(['cathcodes', [smotif_data[i][0]]])

        # ************************************************
        # Sequence filter
        # Aligns the smotif seq to target seq and calculates
        # sequence identity and the alignment score
        # ************************************************

        smotif_seq, seq_identity, blosum62_score = \
            Sfilter.SequenceSimilarity(s1_def, s2_def, smotif_data[i], exp_data)
        tlog.append(['seq_filter', smotif_seq, seq_identity, blosum62_score])

        # ************************************************
        # Pseudocontact Shift filter
        # uses experimental PCS data to filter Smotifs
        # scoring based on normalised chisqr
        # ************************************************

        if 'pcs_data' in exp_data_types:
            pcs_tensor_fits = Pfilter.PCSAxRhFit(s1_def, s2_def,
                                                 smotif_data[i], exp_data)
            tlog.append(['PCS_filter', pcs_tensor_fits])

        # ************************************************
        # Ambiguous NOE score filter
        # uses experimental ambiguous noe data to filter Smotifs
        # scoring based on f-measure?
        # ************************************************

        if 'noe_data' in exp_data_types:
            noe_fmeasure = Nfilter.s1NOEfit(s1_def, s2_def, smotif_data[i],
                                            exp_data)
            tlog.append(['NOE_filter', noe_fmeasure])

        # ************************************************
        # Residual dipolar coupling filter
        # uses experimental RDC data to filter Smotifs
        # scoring based on normalised chisqr
        # ************************************************

        if 'rdc_data' in exp_data_types:

            if noe_fmeasure and noe_fmeasure > 0.5:
                rdc_tensor_fits = Rfilter.RDCAxRhFit(s1_def, s2_def,
                                                     smotif_data[i], exp_data)
                tlog.append(['RDC_filter', rdc_tensor_fits])

        # Dump the data to the disk
        if pcs_tensor_fits or rdc_tensor_fits:
            # print smotif_data[i][0][0], "seq_id", seq_identity, "i=", i, "/", len(smotif_data)
            print tpdbid, noe_fmeasure, rdc_tensor_fits
            dump_log.append(tlog)

    # Save all of the hits in pickled arrays
    if dump_log:
        print "num of hits", len(dump_log)
        io.dumpPickle(
            '0_' + str(index_array[0]) + "_" + str(index_array[1]) + ".pickle",
            dump_log)

    return True
Example #10
0
def S1SmotifSearch(task):
    """
    Main ()
    :param task:
    :return:
    """

    index_array = task[0]
    stage = task[1]
    s1_def, s2_def, sse_route = mutil.getSSdef(index_array)
    smotif_def = sm.getSmotif(s1_def, s2_def)
    exp_data = io.readPickle("exp_data.pickle")
    exp_data_types = exp_data.keys(
    )  # ['ss_seq', 'pcs_data', 'aa_seq', 'contacts']

    smotif_data = sm.readSmotifDatabase(smotif_def,
                                        exp_data['database_cutoff'])

    if not smotif_data:
        # If the smotif library doesn't exist, terminate further execution.
        return False

    dump_log = []

    # ************************************************************************************************
    # Main
    # The 'for' loop below iterates over all of the Smotifs and applies various filters
    # This is the place to add new filters as you desire. For starters, look at Sequence filter.
    # ************************************************************************************************

    for i in range(0, len(smotif_data)):

        # ************************************************
        # Excluding the natives
        # ************************************************

        natives = exp_data['natives']
        tpdbid = smotif_data[i][0][0]
        pdbid = tpdbid[0:4]

        if 'natives' in exp_data_types:
            if pdbid in natives:
                continue
                # Stop further execution, but, iterate.
            else:
                pass

        if 'homologs' in exp_data_types:  # Smotif assembly only from the specified pdb files
            homologs = exp_data['homologs']
            if pdbid not in homologs:
                # Stop further execution, but, iterate.
                continue
            else:
                pass

        # ************************************************
        # Applying different filters to Smotifs
        # Prepare temp log array to save data at the end
        # ************************************************

        tlog, pcs_tensor_fits, rdc_tensor_fits, = [], [], []
        ref_rmsd, noe_probability = 0.0, 0.0

        tlog.append(['smotif', smotif_data[i], sse_route])
        tlog.append(['smotif_def', [s1_def, s2_def]])
        tlog.append(['qcp_rmsd'])
        tlog.append(['cathcodes', [smotif_data[i][0]], [sse_route]])

        # ************************************************
        # Sequence filter
        # Aligns the smotif seq to target seq and calculates
        # sequence identity and the alignment score
        # ************************************************

        smotif_seq, seq_identity = Sfilter.getS1SeqIdentity(
            s1_def, s2_def, smotif_data[i], exp_data)
        tlog.append(['seq_filter', smotif_seq, seq_identity])

        # ************************************************
        # Unambiguous NOE score filter
        # uses experimental ambiguous noe data to filter Smotifs
        # scoring based on f-measure?
        # ************************************************

        if 'ilva_noes' in exp_data_types:

            noe_probability, no_of_noes, noe_energy, noe_data, cluster_protons, cluster_sidechains = noepdf.s1ILVApdf(
                s1_def, s2_def, smotif_data[i], exp_data, stage)

            if noe_probability >= exp_data['expected_noe_prob'][stage - 1]:
                tlog.append([
                    'NOE_filter', noe_probability, no_of_noes, noe_energy,
                    noe_data, cluster_protons, cluster_sidechains
                ])
            else:
                continue

        # ************************************************
        # Residual dipolar coupling filter
        # uses experimental RDC data to filter Smotifs
        # scoring based on normalised chisqr.
        # ************************************************

        if 'rdc_data' in exp_data_types:
            rdc_tensor_fits, log_likelihood, rdc_energy = Rfilter.RDCAxRhFit(
                s1_def, s2_def, smotif_data[i], exp_data)
            if rdc_tensor_fits:
                tlog.append([
                    'RDC_filter', rdc_tensor_fits, log_likelihood, rdc_energy
                ])
            else:
                continue

        # ************************************************
        # Pseudocontact Shift filter
        # uses experimental PCS data to filter Smotifs
        # scoring based on normalised chisqr
        # ************************************************

        if 'pcs_data' in exp_data_types:
            pcs_tensor_fits = Pfilter.PCSAxRhFit(s1_def, s2_def,
                                                 smotif_data[i], exp_data)
            tlog.append(['PCS_filter', pcs_tensor_fits])

        # ************************************************
        # Calc RMSD of the reference structure.
        # Used to identify the lowest possible RMSD
        # structure for the target, from the Smotif library.
        # ************************************************

        if 'reference_ca' in exp_data_types:
            ref_rmsd = ref.calcRefRMSD(exp_data['reference_ca'],
                                       s1_def,
                                       s2_def,
                                       smotif_data[i],
                                       rmsd_cutoff=100.0)
            tlog.append(['Ref_RMSD', ref_rmsd, seq_identity])

        # Dump the data to the disk
        if pcs_tensor_fits or noe_probability:
            dump_log.append(tlog)

    # Save all of the hits in pickled arrays
    if dump_log:
        if 'rank_top_hits' in exp_data_types:
            rank_top_hits = exp_data['rank_top_hits']
            num_hits = rank_top_hits[stage - 1]
            dump_log = rank.rank_assembly(dump_log, num_hits)
            print "Reducing the amount of data to:", rank_top_hits[
                stage - 1], len(dump_log)
        print "num of hits", len(dump_log)
        io.dumpGzipPickle(
            '0_' + str(index_array[0]) + "_" + str(index_array[1]) + ".gzip",
            dump_log)
        return dump_log
    else:
        return False
Example #11
0
def sXSmotifSearch(task):
    """
    Main()
    :param task:
    :return:
    """

    # task = [[0, 0, [2, 4, 'left']], 3, 2]
    index_array = [task[0][0], task[0][1]]
    alt_smotif_def = task[0][2]
    #print "Alt_smotif_def", alt_smotif_def
    #print index_array
    stage = task[1]
    file_index = task[2]
    #print file_index, stage
    print task
    exp_data = io.readPickle("exp_data.pickle")
    exp_data_types = exp_data.keys(
    )  # ['ss_seq', 'pcs_data', 'aa_seq', 'contacts']
    psmotif, pre_smotif_assembly, dump_log = [], [], []

    if stage == 2:
        psmotif = uts2.getPreviousSmotif(index_array[0], file_index)
        current_ss, direction, current_ss_in_que = uts2.getSS2(
            index_array[1], alt_smotif_def)
        csmotif_data, smotif_def = mutil.getfromDB(psmotif, current_ss,
                                                   direction,
                                                   exp_data['database_cutoff'],
                                                   stage, alt_smotif_def)
        sse_ordered, sse_index_ordered, previous_sse_route, previous_sse_index = mutil.orderSSE(
            psmotif, current_ss, direction, stage, current_ss_in_que)
        sorted_noe_data, cluster_protons, cluster_sidechains = mutil.fetchNOEdata(
            psmotif)
        print "Here S2x", smotif_def, alt_smotif_def, previous_sse_route, previous_sse_index
    else:
        pre_smotif_assembly = uts2.getPreviousSmotif(index_array[0],
                                                     file_index)
        current_ss, direction, current_ss_in_que = uts2.getSS2(
            index_array[1], alt_smotif_def)
        csmotif_data, smotif_def = mutil.getfromDB(pre_smotif_assembly,
                                                   current_ss, direction,
                                                   exp_data['database_cutoff'],
                                                   stage, alt_smotif_def)
        sse_ordered, sse_index_ordered, previous_sse_route, previous_sse_index = mutil.orderSSE(
            pre_smotif_assembly, current_ss, direction, stage,
            current_ss_in_que)
        sorted_noe_data, cluster_protons, cluster_sidechains = mutil.fetchNOEdata(
            pre_smotif_assembly)
        print "Here S3x", smotif_def, alt_smotif_def, previous_sse_route, previous_sse_index

    print current_ss, direction
    if 'rmsd_cutoff' in exp_data_types:
        rmsd_cutoff = exp_data['rmsd_cutoff'][stage - 1]
    else:
        rmsd_cutoff = sm.getRMSDcutoff(smotif_def)

    if not csmotif_data:
        # If the smotif library doesn't exist.
        # Terminate further execution.
        return False

    # ************************************************************************************************
    # Main
    # The 'for' loop below iterates over all of the Smotifs and applies various filters
    # This is the place to add new filters as you desire. For starters, look at Sequence filter.
    # ************************************************************************************************

    for i in range(0, len(csmotif_data)):

        # ************************************************
        # Applying different filters for the Smotif assembly
        # ************************************************

        # Exclude natives if needed
        ref_rmsd, noe_probability = 0.0, 0.0
        no_clashes = False

        tpdbid = csmotif_data[i][0][0]
        pdbid = tpdbid[0:4]

        if 'natives' in exp_data_types:
            natives = exp_data['natives']
            if pdbid in natives:
                continue
            # Stop further execution, but, go to next.
            else:
                pass

        if 'homologs' in exp_data_types:  # Smotif assembly only from the specified pdb files
            homologs = exp_data['homologs']
            if pdbid not in homologs:
                # Stop further execution, but, go to next.
                continue
            else:
                pass

        # ************************************************
        # RMSD filter using QCP method
        # quickly filters non-overlapping smotifs
        # ************************************************

        if stage == 2:
            rmsd, transformed_coos = qcp.rmsdQCP(psmotif[0], csmotif_data[i],
                                                 direction, rmsd_cutoff,
                                                 previous_sse_index)

        else:
            rmsd, transformed_coos = qcp.rmsdQCP3(pre_smotif_assembly,
                                                  csmotif_data[i], direction,
                                                  rmsd_cutoff,
                                                  previous_sse_index)

        if rmsd <= rmsd_cutoff:
            # Loop constraint restricts the overlapping smotifs is not drifted far away.
            loop_constraint = llc.loopConstraint(transformed_coos, sse_ordered,
                                                 direction, smotif_def)

            if loop_constraint:

                # Check whether the SSEs with in the assembled smotifs are clashing to one another
                no_clashes = qcp.kClashes(transformed_coos, sse_ordered,
                                          current_ss)
            else:
                no_clashes = False

        else:
            continue

        if no_clashes:
            # Prepare temporary arrays to log the data.
            tlog, total_percent, pcs_tensor_fits, rdc_tensor_fits = [], [], [], []
            tlog.append(['smotif', tpdbid])
            tlog.append(['smotif_def', sse_ordered, sse_index_ordered])
            tlog.append(['qcp_rmsd', transformed_coos, sse_ordered, rmsd])

            if stage == 2:
                cathcodes, cathcodes_order = sm.orderCATH(
                    psmotif, csmotif_data[i][0], direction, alt_smotif_def)
            else:
                cathcodes, cathcodes_order = sm.orderCATH(
                    pre_smotif_assembly, csmotif_data[i][0], direction,
                    alt_smotif_def)
            tlog.append(['cathcodes', cathcodes, cathcodes_order])

            # ************************************************
            # Sequence filter
            # Aligns the smotif seq to target seq and calculates
            # sequence identity and the alignment score
            # ************************************************

            # concat current to previous seq
            concat_seq = 'SeqAnchor'
            seq_identity = 30.0
            tlog.append([
                'seq_filter', concat_seq, seq_identity,
                exp_data['cluster_rmsd_cutoff']
            ])

            # ************************************************
            # NOE score filter
            # uses experimental noe data to filter Smotifs
            # scoring based on log-likelihood?
            # ************************************************

            if 'ilva_noes' in exp_data_types:
                noe_probability, no_of_noes, noe_energy, noe_data, new_cluster_protons, new_cluster_sidechains = noepdf.sX2ILVApdf(
                    transformed_coos, sse_ordered, sorted_noe_data,
                    cluster_protons, cluster_sidechains, exp_data, stage)

                if noe_probability >= exp_data['expected_noe_prob'][stage - 1]:
                    tlog.append([
                        'NOE_filter', noe_probability, no_of_noes, noe_energy,
                        noe_data, new_cluster_protons, new_cluster_sidechains
                    ])
                else:
                    continue

            # ************************************************
            # Residual dipolar coupling filter
            # uses experimental RDC data to filter Smotifs
            # scoring based on normalised chisqr
            # ************************************************

            if 'rdc_data' in exp_data_types:
                rdc_tensor_fits, log_likelihood, rdc_energy = Rfilter.RDCAxRhFit2(
                    transformed_coos, sse_ordered, exp_data, stage)
                if rdc_tensor_fits:
                    tlog.append([
                        'RDC_filter', rdc_tensor_fits, log_likelihood,
                        rdc_energy
                    ])
                else:
                    # Do not execute any further
                    continue

            # ************************************************
            # Pseudocontact Shift filter
            # uses experimental PCS data to filter Smotifs
            # scoring based on normalised chisqr
            # ************************************************

            if 'pcs_data' in exp_data_types:
                pcs_tensor_fits = Pfilter.PCSAxRhFit2(transformed_coos,
                                                      sse_ordered, exp_data,
                                                      stage)
                tlog.append(['PCS_filter', pcs_tensor_fits])

            # ************************************************
            # Calc RMSD of the reference structure.
            # Used to identify the lowest possible RMSD
            # structure for the target, from the Smotif library.
            # ************************************************

            if 'reference_ca' in exp_data_types:
                ref_rmsd = ref.calcRefRMSD2(exp_data['reference_ca'],
                                            sse_ordered, transformed_coos)
                tlog.append(['Ref_RMSD', ref_rmsd, seq_identity])
                tlog.append([
                    'Refine_Smotifs',
                    "Place holder to delete this log permanantly"
                ])
                tlog.append(['Alt_smotif', current_ss_in_que])

            if pcs_tensor_fits or noe_probability:
                # dump data to the disk
                dump_log.append(tlog)

    # Dumping hits as a pickle array.
    if dump_log:
        if 'rank_top_hits' in exp_data_types:
            rank_top_hits = exp_data['rank_top_hits']
            num_hits = rank_top_hits[stage - 1]
            dump_log = rank.rank_assembly_with_clustering_noemax(
                dump_log, num_hits)
            print "Reducing the amount of data to:", rank_top_hits[
                stage - 1], len(dump_log)
        print "num of hits", len(dump_log)
        # io.dumpGzipPickle("tx_" + str(index_array[0]) + "_" + str(index_array[1]) + ".gzip", dump_log)
        return dump_log
    else:
        return False
Example #12
0
    if os.path.isfile(t_file):
        top_result = io.readGzipPickle(t_file)
    else:
        print "Somethis is terrribly wrong !"
        exit()

for p in range(0, len(top_result)):
    print 'model_', p,

    top_struct = top_result[p]

    top_struct = copy.copy(top_struct)

    for entry in top_struct:
        if entry[0] == 'cathcodes':
            print entry
            pass
        if entry[0] == 'Ref_RMSD':
            print entry[:-1]
        if entry[0] == 'RDC_filter':
            pass
            print entry
        if entry[0] == 'NOE_filter':
            print entry[0:4]
    ss_list = top_struct[0][-1]
    exp_data = io.readPickle("exp_data.pickle")
    exp_data_types = exp_data.keys(
    )  # ['ss_seq', 'pcs_data', 'aa_seq', 'contacts']

    aa_seq = exp_data['aa_seq']