Ejemplo n.º 1
0
import utility.referenceUtil as ref

# Parse the input data text file
data = io.readInputDataFiles('input_data.txt')
datatypes = data.keys()

data_dict = {}
print datatypes

if 'fasta_file' in datatypes:
    # Read in the amino acid sequence and the Secondary structure assignment
    handle, aa_seq = io.readFasta(data['fasta_file'])
    ss_seq = ru.matchSeq2SS(aa_seq, data['ss_file'])
    # Generate fuzzy +/-2 SSE combinations
    ss_def, ss_combi = ss.genSSCombinations(ss_seq)
    io.dumpPickle("ss_profiles.pickle", ss_combi)
    data_dict['ss_seq'] = ss_seq
    data_dict['aa_seq'] = aa_seq
    print ss_def
    print ss_seq
else:
    pass

if 'native_pdbs' in datatypes:
    # Read the native pdbs that you can exclude from the smotif search
    native_pdbs = data['native_pdbs']
    native_pdbs = native_pdbs.lower()
    native_pdbs = native_pdbs.split()
    data_dict['natives'] = native_pdbs
    print native_pdbs
else:
Ejemplo n.º 2
0
data = io.readInputDataFiles('input_data.txt')

datatypes = data.keys()
handle, aa_seq = io.readFasta(data['fasta_file'])

ss_seq = matchSeq2SS(aa_seq, data['ss_file'])
print ss_seq

# ss_seq = io.readPsiPred(psipred_file)

ss_def, ss_combi = ss.genSSCombinations(ss_seq)
# ss_def, ss_combi = ss.genSSCombinations2(ss_seq)

print ss_combi
io.dumpPickle("ss_profiles.pickle", ss_combi)

# Read in contacts at a given confidence level
if 'contacts_file' in datatypes:
    contacts, contacts_seq = io.readContacts(contactsfile, probability=0.7)

native_pdbs = data['native_pdbs']

native_pdbs = native_pdbs.lower()

native_pdbs = native_pdbs.split()

print native_pdbs

axrh_cutoff = data['axrh_cutoff']
axrh_cutoff = axrh_cutoff.split()
Ejemplo n.º 3
0
def SmotifSearch(index_array):
    """
    Main()
    :param index_array:
    :return:
    """

    exp_data = io.readPickle("exp_data.pickle")
    exp_data_types = exp_data.keys(
    )  # ['ss_seq', 'pcs_data', 'aa_seq', 'contacts']

    psmotif = uts2.getPreviousSmotif(index_array[0])

    current_ss, direction = uts2.getSS2(index_array[1])
    csmotif_data, smotif_def = getfromDB(psmotif, current_ss, direction,
                                         exp_data['database_cutoff'])

    if not csmotif_data:
        # If the smotif library doesn't exist
        # Terminate further execution
        return True
    """
    always narrow down to previous sse and current sse and operate on them individually
    """
    sse_ordered = orderSSE(psmotif, current_ss, direction)
    dump_log = []
    no_clashes = False

    # ************************************************************************************************
    # Main
    # The 'for' loop below iterates over all of the Smotifs and applies various filters
    # This is the place to add new filters as you desire. For starters, look at Sequence filter.
    # ************************************************************************************************

    for i in range(0, len(csmotif_data)):

        # ************************************************
        # Applying different filters for the Smotif assembly
        # ************************************************

        # Exclude natives if needed
        if 'natives' in exp_data_types:
            natives = exp_data['natives']
            tpdbid = csmotif_data[i][0][0]
            pdbid = tpdbid[0:4]
            if pdbid in natives:
                #if pdbid not in ['2z2i']:
                # Stop further execution and
                continue

        # ************************************************
        # RMSD filter using QCP method
        # quickly filters non-overlapping smotifs
        # ************************************************

        rmsd, transformed_coos = qcp.rmsdQCP(psmotif[0], csmotif_data[i],
                                             direction)

        if rmsd <= exp_data['rmsd_cutoff'][1]:
            # Loop constraint restricts the overlapping smotifs is not drifted far away.
            loop_constraint = llc.loopConstraint(transformed_coos, sse_ordered,
                                                 direction, smotif_def)
            if loop_constraint:
                # Check whether the SSEs with in the assembled smotifs are clashing to one another
                no_clashes = qcp.clahses(transformed_coos,
                                         exp_data['clash_distance'])
            else:
                no_clashes = False

        if rmsd <= exp_data['rmsd_cutoff'][1] and no_clashes:
            # Prepare temp log array to save data at the end
            tlog, noe_fmeasure, pcs_tensor_fits, rdc_tensor_fits = [], [], [], []

            tlog.append(['smotif', csmotif_data[i]])
            tlog.append(['smotif_def', sse_ordered])

            tlog.append(['qcp_rmsd', transformed_coos, sse_ordered, rmsd])
            cathcodes = sm.orderCATH(psmotif, csmotif_data[i][0], direction)
            tlog.append(['cathcodes', cathcodes])

            # ************************************************
            # Sequence filter
            # Aligns the smotif seq to target seq and calculates
            # sequence identity and the alignment score
            # ************************************************

            csse_seq, seq_identity, blosum62_score = Sfilter.S2SequenceSimilarity(
                current_ss, csmotif_data[i], direction, exp_data)

            # concat current to previous seq
            concat_seq = sm.orderSeq(psmotif, csse_seq, direction)

            tlog.append([
                'seq_filter', concat_seq, csse_seq, seq_identity,
                blosum62_score
            ])

            # ************************************************
            # Pseudocontact Shift filter
            # uses experimental PCS data to filter Smotifs
            # scoring based on normalised chisqr
            # ************************************************

            if 'pcs_data' in exp_data_types:
                pcs_tensor_fits = Pfilter.PCSAxRhFit2(transformed_coos,
                                                      sse_ordered,
                                                      exp_data,
                                                      stage=2)
                tlog.append(['PCS_filter', pcs_tensor_fits])

            # ************************************************
            # Ambiguous NOE score filter
            # uses experimental ambiguous noe data to filter Smotifs
            # scoring based on f-measure?
            # ************************************************

            if 'noe_data' in exp_data_types:
                noe_fmeasure = Nfilter.s2NOEfit(transformed_coos, sse_ordered,
                                                exp_data)
                tlog.append(['NOE_filter', noe_fmeasure])

            # ************************************************
            # Residual dipolar coupling filter
            # uses experimental RDC data to filter Smotifs
            # scoring based on normalised chisqr
            # ************************************************

            if 'rdc_data' in exp_data_types:
                if noe_fmeasure and noe_fmeasure > 0.5:
                    rdc_tensor_fits = Rfilter.RDCAxRhFit2(transformed_coos,
                                                          sse_ordered,
                                                          exp_data,
                                                          stage=2)
                    tlog.append(['RDC_filter', rdc_tensor_fits])

            if pcs_tensor_fits or rdc_tensor_fits:
                #dump data to the disk
                print tpdbid, noe_fmeasure, rdc_tensor_fits
                # print csmotif_data[i][0], 'blosum62 score', blosum62_score, "seq_id", seq_identity, "rmsd=", rmsd, cathcodes
                dump_log.append(tlog)

    # prevent dumping empty arrays with no data
    if len(dump_log) > 0:
        print "num of hits", len(dump_log),
        io.dumpPickle(
            "tx_" + str(index_array[0]) + "_" + str(index_array[1]) +
            ".pickle", dump_log)

    return True
Ejemplo n.º 4
0
    print smotif_db[i][45:]
    for j in range(0, len(smotif)):
        # for j in range(0, 1):
        # print smotif[j][0]
        # print smotif[j][0][0] #['1pp9P00', '172', '203', '222', '245']
        # print smotif[j][0][1] #172- [203, 'THR', 'H', 49.366, 87.846, 102.07]]
        # print smotif[j][0][2] #222- [245, 'PHE', 'H', 40.016, 57.964, 78.166]]

        ss1 = rms.getcoo(
            smotif[j][0]
            [1])  # x,y,z, atom_type, res_no, res = [], [], [], [], [], []
        ss2 = rms.getcoo(
            smotif[j][0][2])  # return [x,y,z, atom_type, res_no, res]

        # dumpPDBCoo2(ss1, 5)
        # dumpPDBCoo2(ss2, 6)

        # concatenate coordinate arrys of both SSEs to identify their center of mass
        ss1_plus_ss2 = conc_sss(ss1, ss2)

        cen_ss1, cm_ss1 = rms.centerCoo(ss1_plus_ss2)
        # dumpPDBCoo2(cen_ss1, 7)

        # Translate the original SSEs to the new center of mass
        tr_ss1 = translateSSE(cm_ss1, smotif[j][0][1])
        tr_ss2 = translateSSE(cm_ss1, smotif[j][0][2])
        smotif_def = smotif[j][0][0]
        # add the modified Smotifs to temp array
        temp_smotif.append([smotif_def, tr_ss1, tr_ss2])
    io.dumpPickle('./smotif_cen_db/' + smotif_db[i][45:], temp_smotif)
Ejemplo n.º 5
0
def SmotifSearch(index_array):
    """
    Main ()
    :param index_array:
    :return:
    """

    # print index_array
    s1_def, s2_def = getSSdef(index_array)
    smotif_def = sm.getSmotif(s1_def, s2_def)
    # print s1_def, s2_def

    exp_data = io.readPickle("exp_data.pickle")
    exp_data_types = exp_data.keys(
    )  # ['ss_seq', 'pcs_data', 'aa_seq', 'contacts']

    smotif_data = sm.readSmotifDatabase(smotif_def,
                                        exp_data['database_cutoff'])

    if not smotif_data:
        # If the smotif library doesn't exist, terminate further execution.
        return True

    dump_log = []
    # ************************************************************************************************
    # Main
    # The 'for' loop below iterates over all of the Smotifs and applies various filters
    # This is the place to add new filters as you desire. For starters, look at Sequence filter.
    # ************************************************************************************************

    for i in range(0, len(smotif_data)):
        # loop over for all of the entries in the smotif_db file

        # ************************************************
        # Excluding the natives
        # ************************************************

        if 'natives' in exp_data_types:
            natives = exp_data['natives']
            tpdbid = smotif_data[i][0][0]
            pdbid = tpdbid[0:4]
            if pdbid in natives:
                #if pdbid not in ['2z2i']:
                # Stop further execution, but, iterate.
                continue

        # ************************************************
        # Applying different filters to Smotifs
        # Prepare temp log array to save data at the end
        # ************************************************

        tlog, pcs_tensor_fits, rdc_tensor_fits, noe_fmeasure = [], [], [], []
        tlog.append(['smotif', smotif_data[i]])
        tlog.append(['smotif_def', [s1_def, s2_def]])
        tlog.append(['cathcodes', [smotif_data[i][0]]])

        # ************************************************
        # Sequence filter
        # Aligns the smotif seq to target seq and calculates
        # sequence identity and the alignment score
        # ************************************************

        smotif_seq, seq_identity, blosum62_score = \
            Sfilter.SequenceSimilarity(s1_def, s2_def, smotif_data[i], exp_data)
        tlog.append(['seq_filter', smotif_seq, seq_identity, blosum62_score])

        # ************************************************
        # Pseudocontact Shift filter
        # uses experimental PCS data to filter Smotifs
        # scoring based on normalised chisqr
        # ************************************************

        if 'pcs_data' in exp_data_types:
            pcs_tensor_fits = Pfilter.PCSAxRhFit(s1_def, s2_def,
                                                 smotif_data[i], exp_data)
            tlog.append(['PCS_filter', pcs_tensor_fits])

        # ************************************************
        # Ambiguous NOE score filter
        # uses experimental ambiguous noe data to filter Smotifs
        # scoring based on f-measure?
        # ************************************************

        if 'noe_data' in exp_data_types:
            noe_fmeasure = Nfilter.s1NOEfit(s1_def, s2_def, smotif_data[i],
                                            exp_data)
            tlog.append(['NOE_filter', noe_fmeasure])

        # ************************************************
        # Residual dipolar coupling filter
        # uses experimental RDC data to filter Smotifs
        # scoring based on normalised chisqr
        # ************************************************

        if 'rdc_data' in exp_data_types:

            if noe_fmeasure and noe_fmeasure > 0.5:
                rdc_tensor_fits = Rfilter.RDCAxRhFit(s1_def, s2_def,
                                                     smotif_data[i], exp_data)
                tlog.append(['RDC_filter', rdc_tensor_fits])

        # Dump the data to the disk
        if pcs_tensor_fits or rdc_tensor_fits:
            # print smotif_data[i][0][0], "seq_id", seq_identity, "i=", i, "/", len(smotif_data)
            print tpdbid, noe_fmeasure, rdc_tensor_fits
            dump_log.append(tlog)

    # Save all of the hits in pickled arrays
    if dump_log:
        print "num of hits", len(dump_log)
        io.dumpPickle(
            '0_' + str(index_array[0]) + "_" + str(index_array[1]) + ".pickle",
            dump_log)

    return True