Ejemplo n.º 1
0
def predict_for_deepphos(train_file_name,
                         sites,
                         predictFrame='general',
                         hierarchy=None,
                         kinase=None):
    '''

    :param train_file_name: input of your prdict file
                            it must be a .csv file and theinput format  is proteinName, postion,sites, shortseq
    :param sites: the sites predict: site = 'S','T' OR 'Y'
    :param predictFrame: 'general' or 'kinase'
    :param hierarchy: if predictFrame is kinse: you must input the hierarchy:
            group,family,subfamily,kinase to choose corresponding model
    :param kinase: kinase name
    :return:
     a file with the score
    '''

    win1 = 51
    win2 = 33
    win3 = 15
    from methods.dataprocess_predict import getMatrixInput
    [X_test1, y_test, ids, position] = getMatrixInput(train_file_name, sites,
                                                      win1)
    [X_test2, _, _, _] = getMatrixInput(train_file_name, sites, win2)
    [X_test3, _, _, _] = getMatrixInput(train_file_name, sites, win3)

    #     print X_test1.shape
    #     print len(position)

    from methods.model_n import model_net
    model = model_net(X_test1, X_test2, X_test3, y_test, nb_epoch=0)

    #load model weight
    if predictFrame == 'general':
        outputfile = 'general_{:s}'.format(site)
        if site == ('S', 'T'):
            model_weight = './models/model_general_S,T.h5'
        if site == 'Y':
            model_weight = './models/model_general_Y.h5'

    if predictFrame == 'kinase':
        outputfile = 'kinase_{:s}_{:s}'.format(hierarchy, kinase)
        model_weight = './models/model_{:s}_{:s}.h5'.format(hierarchy, kinase)


#     print model_weight
    model.load_weights(model_weight)
    predictions_t = model.predict([X_test1, X_test2, X_test3])
    results_ST = np.column_stack((ids, position, predictions_t[:, 1]))

    result = pd.DataFrame(results_ST)
    result.to_csv(outputfile + "prediction_phosphorylation.txt",
                  index=False,
                  header=None,
                  sep='\t',
                  quoting=csv.QUOTE_NONNUMERIC)
Ejemplo n.º 2
0
def train_for_deepphos(train_file_name,
                       site,
                       predictFrame='general',
                       background_weight=None):
    '''

    :param train_file_name:  input of your train file
                                it must be a .csv file and theinput format  is label,proteinName, postion,sites, shortsequence,
    :param site: the sites predict: site = 'S','T' OR 'Y'
    :param predictFrame: 'general' or 'kinase'
    :param background_weight: the model you want load to pretrain new model
                                usually used in kinase training
    :return:
    '''

    win1 = 51
    win2 = 33
    win3 = 15
    from methods.dataprocess_train import getMatrixLabel
    X_train1, y_train = getMatrixLabel(train_file_name, sites, win1)
    X_train2, _ = getMatrixLabel(train_file_name, sites, win2)
    X_train3, _ = getMatrixLabel(train_file_name, sites, win3)

    modelname = "general_{:s}".format(site)
    if predictFrame == 'general':
        modelname = "general_model_{:s}".format(site)

    if predictFrame == 'kinase':
        modelname = "kinase_model_{:s}".format(site)

    from methods.model_n import model_net

    model = model_net(X_train1,
                      X_train2,
                      X_train3,
                      y_train,
                      weights=background_weight)
    model.save_weights(modelname + '.h5', overwrite=True)
Ejemplo n.º 3
0
def predict_for_deepphos_from_json(input, organism):
    '''

    :param train_file_name: input of your prdict file
                            it must be a .csv file and theinput format  is proteinName, postion,sites, shortseq
    :param sites: the sites predict: site = 'S','T' OR 'Y'
    :param predictFrame: 'general' or 'kinase'
    :param hierarchy: if predictFrame is kinse: you must input the hierarchy:
            group,family,subfamily,kinase to choose corresponding model
    :param kinase: kinase name
    :return:
     a file with the score
    '''
    print("----------------------------")
    print("in predict_for_deepphos")
    win1 = 51
    win2 = 33
    win3 = 15

    from methods.dataprocess_predict import getMatrixInputFromJson

    print('running X_test1 ----------------------------')
    [X_test1, y_test, ids, position, full_names,
     names] = getMatrixInputFromJson(input, organism, win1)
    print('running X_test2 ----------------------------')
    [X_test2, _, _, _, _, _] = getMatrixInputFromJson(input, organism, win2)
    print('running X_test3 ----------------------------')
    [X_test3, _, _, _, _, _] = getMatrixInputFromJson(input, organism, win3)

    print('----------------------------')
    result_json = {}
    #     print X_test1.shape
    #     print len(position)

    from methods.model_n import model_net
    model = model_net(X_test1, X_test2, X_test3, y_test, nb_epoch=0)

    #load model weight
    # if predictFrame == 'general':
    #
    #     if site == ('S','T'):
    #         outputfile = 'general_S_T'
    #         model_weight = './models/model_general_S,T.h5'
    #     if site == 'Y':
    #         outputfile = 'general_Y'
    #         model_weight = './models/model_general_Y.h5'

    #for kin in ['family_CDK']:

    for kin in [
            'family_CDK', 'family_CK2', 'family_MAPK', 'family_PKC',
            'family_Src', 'group_AGC', 'group_Atypical', 'group_CAMK',
            'group_CMGC', 'group_TK', 'kinase_CDC2', 'kinase_CK2a1',
            'kinase_PKACa', 'kinase_PKCa', 'kinase_SRC', 'subfamily_CDC2',
            'subfamily_CDK2', 'subfamily_ERK1', 'subfamily_PKCa'
    ]:
        # if predictFrame == 'kinase':
        #     outputfile = 'kinase_{:s}_{:s}'.format(hierarchy, kinase)
        #     model_weight = './models/model_{:s}_{:s}.h5'.format(hierarchy, kinase)
        print(kin)
        outputfile = 'kinase_{:s}'.format(kin)

        model_weight = './models/model_{:s}.h5'.format(kin)
        print("-----------------------------------------------------")
        print(model_weight)

        try:
            model.load_weights(model_weight)
            predictions_t = model.predict([X_test1, X_test2, X_test3])

            results_ST = np.column_stack(
                (ids, names, full_names, position, predictions_t[:, 1]))

            result = pd.DataFrame(
                results_ST,
                columns=['target', 'name', 'full_name', 'position', 'score'])
            print(result.to_json(orient='records'))
            result_json[kin] = result.to_json(orient='records')
        #print(result_json)

        # result.to_csv(outputfile + "_api_prediction_phosphorylation.txt", index=False, header=None, sep='\t',
        #               quoting=csv.QUOTE_NONNUMERIC)
        except:
            print("there was an error")
            result_json[kin] = json.dumps([{
                'target': "",
                'full_name': "",
                'position': "",
                'score': ""
            }])

    print(result_json)
    return json.dumps(result_json)