def predict_for_deepphos(train_file_name, sites, predictFrame='general', hierarchy=None, kinase=None): ''' :param train_file_name: input of your prdict file it must be a .csv file and theinput format is proteinName, postion,sites, shortseq :param sites: the sites predict: site = 'S','T' OR 'Y' :param predictFrame: 'general' or 'kinase' :param hierarchy: if predictFrame is kinse: you must input the hierarchy: group,family,subfamily,kinase to choose corresponding model :param kinase: kinase name :return: a file with the score ''' win1 = 51 win2 = 33 win3 = 15 from methods.dataprocess_predict import getMatrixInput [X_test1, y_test, ids, position] = getMatrixInput(train_file_name, sites, win1) [X_test2, _, _, _] = getMatrixInput(train_file_name, sites, win2) [X_test3, _, _, _] = getMatrixInput(train_file_name, sites, win3) # print X_test1.shape # print len(position) from methods.model_n import model_net model = model_net(X_test1, X_test2, X_test3, y_test, nb_epoch=0) #load model weight if predictFrame == 'general': outputfile = 'general_{:s}'.format(site) if site == ('S', 'T'): model_weight = './models/model_general_S,T.h5' if site == 'Y': model_weight = './models/model_general_Y.h5' if predictFrame == 'kinase': outputfile = 'kinase_{:s}_{:s}'.format(hierarchy, kinase) model_weight = './models/model_{:s}_{:s}.h5'.format(hierarchy, kinase) # print model_weight model.load_weights(model_weight) predictions_t = model.predict([X_test1, X_test2, X_test3]) results_ST = np.column_stack((ids, position, predictions_t[:, 1])) result = pd.DataFrame(results_ST) result.to_csv(outputfile + "prediction_phosphorylation.txt", index=False, header=None, sep='\t', quoting=csv.QUOTE_NONNUMERIC)
def train_for_deepphos(train_file_name, site, predictFrame='general', background_weight=None): ''' :param train_file_name: input of your train file it must be a .csv file and theinput format is label,proteinName, postion,sites, shortsequence, :param site: the sites predict: site = 'S','T' OR 'Y' :param predictFrame: 'general' or 'kinase' :param background_weight: the model you want load to pretrain new model usually used in kinase training :return: ''' win1 = 51 win2 = 33 win3 = 15 from methods.dataprocess_train import getMatrixLabel X_train1, y_train = getMatrixLabel(train_file_name, sites, win1) X_train2, _ = getMatrixLabel(train_file_name, sites, win2) X_train3, _ = getMatrixLabel(train_file_name, sites, win3) modelname = "general_{:s}".format(site) if predictFrame == 'general': modelname = "general_model_{:s}".format(site) if predictFrame == 'kinase': modelname = "kinase_model_{:s}".format(site) from methods.model_n import model_net model = model_net(X_train1, X_train2, X_train3, y_train, weights=background_weight) model.save_weights(modelname + '.h5', overwrite=True)
def predict_for_deepphos_from_json(input, organism): ''' :param train_file_name: input of your prdict file it must be a .csv file and theinput format is proteinName, postion,sites, shortseq :param sites: the sites predict: site = 'S','T' OR 'Y' :param predictFrame: 'general' or 'kinase' :param hierarchy: if predictFrame is kinse: you must input the hierarchy: group,family,subfamily,kinase to choose corresponding model :param kinase: kinase name :return: a file with the score ''' print("----------------------------") print("in predict_for_deepphos") win1 = 51 win2 = 33 win3 = 15 from methods.dataprocess_predict import getMatrixInputFromJson print('running X_test1 ----------------------------') [X_test1, y_test, ids, position, full_names, names] = getMatrixInputFromJson(input, organism, win1) print('running X_test2 ----------------------------') [X_test2, _, _, _, _, _] = getMatrixInputFromJson(input, organism, win2) print('running X_test3 ----------------------------') [X_test3, _, _, _, _, _] = getMatrixInputFromJson(input, organism, win3) print('----------------------------') result_json = {} # print X_test1.shape # print len(position) from methods.model_n import model_net model = model_net(X_test1, X_test2, X_test3, y_test, nb_epoch=0) #load model weight # if predictFrame == 'general': # # if site == ('S','T'): # outputfile = 'general_S_T' # model_weight = './models/model_general_S,T.h5' # if site == 'Y': # outputfile = 'general_Y' # model_weight = './models/model_general_Y.h5' #for kin in ['family_CDK']: for kin in [ 'family_CDK', 'family_CK2', 'family_MAPK', 'family_PKC', 'family_Src', 'group_AGC', 'group_Atypical', 'group_CAMK', 'group_CMGC', 'group_TK', 'kinase_CDC2', 'kinase_CK2a1', 'kinase_PKACa', 'kinase_PKCa', 'kinase_SRC', 'subfamily_CDC2', 'subfamily_CDK2', 'subfamily_ERK1', 'subfamily_PKCa' ]: # if predictFrame == 'kinase': # outputfile = 'kinase_{:s}_{:s}'.format(hierarchy, kinase) # model_weight = './models/model_{:s}_{:s}.h5'.format(hierarchy, kinase) print(kin) outputfile = 'kinase_{:s}'.format(kin) model_weight = './models/model_{:s}.h5'.format(kin) print("-----------------------------------------------------") print(model_weight) try: model.load_weights(model_weight) predictions_t = model.predict([X_test1, X_test2, X_test3]) results_ST = np.column_stack( (ids, names, full_names, position, predictions_t[:, 1])) result = pd.DataFrame( results_ST, columns=['target', 'name', 'full_name', 'position', 'score']) print(result.to_json(orient='records')) result_json[kin] = result.to_json(orient='records') #print(result_json) # result.to_csv(outputfile + "_api_prediction_phosphorylation.txt", index=False, header=None, sep='\t', # quoting=csv.QUOTE_NONNUMERIC) except: print("there was an error") result_json[kin] = json.dumps([{ 'target': "", 'full_name': "", 'position': "", 'score': "" }]) print(result_json) return json.dumps(result_json)