def main():
    parser = argparse.ArgumentParser()
    # parser.add_argument('-input',  dest='inputfile', type=str, help='Protein sequences to be predicted in fasta format.', required=True)
    parser.add_argument('-output', dest='outputfile', type=str, help='prefix of the prediction results.', required=True)
    parser.add_argument('-window', dest='window', type=int, help='specify the window size', required=True)
    parser.add_argument('-model-prefix', dest='modelprefix', type=str,
                        help='prefix of custom model used for prediciton. If you do not have one, please run train_models.py to train a model.',
                        required=False, default=None)
    parser.add_argument('-residue-types', dest='residues', type=str,
                        help='Residue types that to be predicted. For multiple residues, seperate each with \',\'',
                        required=False, default="C,H,E,D")
    parser.add_argument('-codingMode',  dest='codingMode', type=int, help='Set the input sequence encoding mode.', required=False, default=0)

    args = parser.parse_args()

    # inputfile=args.inputfile;
    outputfile = args.outputfile;
    residues = args.residues.split(",")
    modelprefix = args.modelprefix;
    window = args.window;
    codemode = args.codingMode
    print(outputfile, residues, modelprefix, window)
    # outputfile = r'/home/ucexbw/ZinCaps/ActiveSitePrediction/data/output/'
    # fp = open(outputfile+"eval_by_AUC_precision_scores_polynomial_decay_increase_decrease_1_0.5_1",'w')
    # fp = open(outputfile+"eval_by_AUC_precision_scores_polynomial_decay_1_0.5_1",'w')
    # fp = open(outputfile+"eval_by_AUC_precision_scores_10fold",'w')
    fp = open(outputfile + "eval_by_AUC_precision_scores_10fold_constantweight1_0.5_25.txt", 'w')

    model_arch = Capsnet_main(np.zeros([3, 2 * window + 1, 6]), [], nb_epoch=1, compiletimes=0, lr=0.001,
                              batch_size=500, lam_recon=0, routings=3, modeltype='nogradientstop', nb_classes=2,
                              predict=True)
    # model_arch=Capsnet_main(np.zeros([3,2*16+1,21]),[],nb_epoch=1,compiletimes=0,lr=0.001,batch_size=500,lam_recon=0,routings=3,modeltype='nogradientstop',nb_classes=2,predict=True)

    roc_average_weight = np.zeros(5)
    roc_average_predict = np.zeros(5)
    roc_average_last_predict = np.zeros(5)

    accuracy_average_last_predict = np.zeros(5)
    sensitivity_average_last_predict = np.zeros(5)
    specificity_average_last_predict = np.zeros(5)
    f1_score_average_last_predict = np.zeros(5)
    mcc_average_last_predict = np.zeros(5)

    pr_average_weight = np.zeros(5)
    pr_average_predict = np.zeros(5)
    pr_average_last_predict = np.zeros(5)

    for time in range(5):
        fp.write("############################" + str(time) + "\n")
        inputfile = '/scratch/ucexbw/ZinCaps25/ActiveSitePrediction/lib/K-Fold/annotated_sequence.fasta_training_annotated_' + str(
            time) + '.fasta'
        # if os.path.exists(outputfile+"eval_by_AUC_precision_scores"):
        #   os.rm(outputfile+"eval_by_AUC_precision_scores")

        checkpointweights = '/scratch/ucexbw/ZinCaps25/ActiveSitePrediction/data/weights/Zinc_' + str(time) + '_weights'
        modelprefix = '/scratch/ucexbw/ZinCaps25/ActiveSitePrediction/data/models/Zinc_' + str(time) + '_model'
        eval_type = 'average_last_predict'  # all evaluate by all method
        # average_weight
        # average_predict
        # average_last_predict

        if modelprefix is None:
            # print ("Please specify the prefix for an existing custom model by "
            #        "-model-prefix!\n\
            # It indicates two files [-model-prefix]_HDF5model and [-model-prefix]_parameters.\n \
            # If you don't have such files, please run train_models.py to get the "
            #        "custom model first!\n")
            exit()
        else:  # custom prediction
            model = modelprefix + str("_HDF5model")
            parameter = modelprefix + str("_parameters")
            try:
                f = open(parameter, 'r')
            except IOError:
                print('cannot open ' + parameter + " ! check if the model exists. "
                                                   "please run train_general.py or train_kinase.py to get the custom model first!\n")
            else:
                f = open(parameter, 'r')
                parameters = f.read()
                f.close()

            nclass = int(parameters.split("\t")[0])
            window = int(parameters.split("\t")[1])
            residues = parameters.split("\t")[2]
            residues = residues.split(",")
            codemode = int(parameters.split("\t")[4])
            modeltype = str(parameters.split("\t")[5])
            nb_classes = int(parameters.split("\t")[6])

        testfrag, ids, poses, focuses = extractFragforPredict(inputfile, window, '-', focus=residues)

        testX, testY = convertRawToXY(testfrag.as_matrix(), codingMode=codemode)
        if len(testX.shape) > 3:
            testX.shape = (testX.shape[0], testX.shape[2], testX.shape[3])

        predict_average_weight = np.zeros((testX.shape[0], 2))
        predict_average_predict = np.zeros((testX.shape[0], 2))
        predict_average_last_predict = np.zeros((testX.shape[0], 2))

        for bt in range(nclass):  # 0 648 bt=2 len(tf.trainable_variables())=1530
            # load all involving mode weights
            # sess = tf.Session()
            inputweights = checkpointweights + "_nclass" + str(bt) + "_iteration"
            model_members = load_model_weights(inputweights, model_arch)
            if eval_type == "all" or eval_type == "average_weight":
                predict_temp = predict_by_avg_members(model_members, model_arch, testX)
                predict_average_weight += predict_temp
                auc_score, pr_score, accuracy, sensitivity, specificity, f1_score, mcc = evaluate(predict_temp, testY)
                roc_average_last_predict[time] = auc_score
                pr_average_last_predict[time] = pr_score
                accuracy_average_last_predict[time]  = accuracy
                sensitivity_average_last_predict[time]  = sensitivity
                specificity_average_last_predict[time]  = specificity
                f1_score_average_last_predict[time]  = f1_score
                mcc_average_last_predict[time]  = mcc
                # fp.write(
                #     "average_weight_results_bt" + str(bt) + "\t" + str(auc_score) + "\t" + str(pr_score) + "\t" + str(
                #         accuracy) + "\t" + str(sensitivity) + "\t" + str(specificity) + "\t" + str(
                #         f1_score) + "\t" + str(mcc) + "\n")

            if eval_type == "all" or eval_type == "average_predict":
                predict_temp = predict_by_snapshot(model_members, model_arch, testX)
                predict_average_predict += predict_temp
                auc_score, pr_score, accuracy, sensitivity, specificity, f1_score, mcc = evaluate(predict_temp, testY)
                roc_average_last_predict[time] = auc_score
                pr_average_last_predict[time] = pr_score
                accuracy_average_last_predict[time]  = accuracy
                sensitivity_average_last_predict[time]  = sensitivity
                specificity_average_last_predict[time]  = specificity
                f1_score_average_last_predict[time]  = f1_score
                mcc_average_last_predict[time]  = mcc
                print("average_predict results:")
                # fp.write(
                #     "average_predict_results_bt" + str(bt) + "\t" + str(auc_score) + "\t" + str(pr_score) + "\t" + str(
                #         accuracy) + "\t" + str(sensitivity) + "\t" + str(specificity) + "\t" + str(
                #         f1_score) + "\t" + str(mcc) + "\n")

            del model_members
            # sess.close()

        if eval_type == "all" or eval_type == "average_weight1":
            predict_average_weight = predict_average_weight / float(nclass)
            auc_score, pr_score, accuracy, sensitivity, specificity, f1_score, mcc = evaluate(predict_average_weight,
                                                                                             testY)
            print("average_weight1")
            roc_average_last_predict[time] = auc_score
            pr_average_last_predict[time] = pr_score
            accuracy_average_last_predict[time]  = accuracy
            sensitivity_average_last_predict[time]  = sensitivity
            specificity_average_last_predict[time]  = specificity
            f1_score_average_last_predict[time]  = f1_score
            mcc_average_last_predict[time]  = mcc
            # fp.write(
            #     "average_weight_results\t" + str(auc_score) + "\t" + str(pr_score) + "\t" + str(accuracy) + "\t" + str(
            #         sensitivity) + "\t" + str(specificity) + "\t" + str(f1_score) + "\t" + str(mcc) + "\n")
            # roc_average_weight[time] = auc_score
            # pr_average_weight[time] = pr_score
            # write_output(outputfile + "average_weight_results_fold"+str(time)+".txt",predict_average_weight,ids,poses,focuses)

        if eval_type == "all" or eval_type == "average_predict":
            predict_average_predict = predict_average_predict / float(nclass)
            auc_score, pr_score, accuracy, sensitivity, specificity, f1_score, mcc = evaluate(predict_average_predict,
                                                                                              testY)
            roc_average_last_predict[time] = auc_score
            pr_average_last_predict[time] = pr_score
            accuracy_average_last_predict[time]  = accuracy
            sensitivity_average_last_predict[time]  = sensitivity
            specificity_average_last_predict[time]  = specificity
            f1_score_average_last_predict[time]  = f1_score
            mcc_average_last_predict[time]  = mcc
            # fp.write("average_predict_results:\t" + str(auc_score) + "\t" + str(pr_score) + "\t" + str(
            #     accuracy) + "\t" + str(sensitivity) + "\t" + str(specificity) + "\t" + str(f1_score) + "\t" + str(
            #     mcc) + "\n")
            # roc_average_predict[time] = auc_score
            # pr_average_predict[time] = pr_score
            # write_output(outputfile + "average_predict_results_fold"+str(time)+".txt",predict_average_predict,ids,poses,focuses)

        if eval_type == "all" or eval_type == "average_last_predict":
            nclass_ini = 1
            for bt in range(nclass):
                model_arch[0].load_weights(model + "_class" + str(bt))
                predict_temp = model_arch[1].predict(testX)[0]
                predict_average_last_predict += predict_temp
                auc_score, pr_score, accuracy, sensitivity, specificity, f1_score, mcc = evaluate(predict_temp, testY)
                # fp.write("average_last_predict_results_bt" + str(bt) + "\t" + str(auc_score) + "\t" + str(
                #     pr_score) + "\t" + str(accuracy) + "\t" + str(sensitivity) + "\t" + str(specificity) + "\t" + str(
                #     f1_score) + "\t" + str(mcc) + "\n")

            predict_average_last_predict = predict_average_last_predict / (nclass * nclass_ini)
            auc_score, pr_score, accuracy, sensitivity, specificity, f1_score, mcc = evaluate(
                predict_average_last_predict, testY)
            # fp.write("average_last_predict_results\t" + str(auc_score) + "\t" + str(pr_score) + "\t" + str(
            #     accuracy) + "\t" + str(sensitivity) + "\t" + str(specificity) + "\t" + str(f1_score) + "\t" + str(
            #     mcc) + "\n")
            roc_average_last_predict[time] = auc_score
            pr_average_last_predict[time] = pr_score
            accuracy_average_last_predict[time]  = accuracy
            sensitivity_average_last_predict[time]  = sensitivity
            specificity_average_last_predict[time]  = specificity
            f1_score_average_last_predict[time]  = f1_score
            mcc_average_last_predict[time]  = mcc
            # write_output(outputfile + "average_last_predict_results_fold"+str(time)+".txt",predict_average_last_predict,ids,poses,focuses)
            print("Successfully predicted from custom models !\n")

    fp.write("!!!!!!!!!!!!!!!!!!!!!!!!!\n")
    # fp.write("average_weight_results\t" + ",".join([str(x) for x in roc_average_weight]) + "\t" + ",".join(
    #     [str(x) for x in pr_average_weight]) + "\t" + str(np.mean(roc_average_weight)) + "," + str(
    #     np.std(roc_average_weight)) + "\t" + str(np.mean(pr_average_weight)) + "," + str(
    #     np.std(pr_average_weight)) + "\n")
    # fp.write("average_predict_results\t" + ",".join([str(x) for x in roc_average_predict]) + "\t" + ",".join(
    #     [str(x) for x in pr_average_predict]) + "\t" + str(np.mean(roc_average_predict)) + "," + str(
    #     np.std(roc_average_predict)) + "\t" + str(np.mean(pr_average_predict)) + "," + str(
    #     np.std(pr_average_predict)) + "\n")
    # fp.write("average_last_predict_results\t" + ",".join([str(x) for x in roc_average_last_predict]) + "\t" + ",".join(
    #     [str(x) for x in pr_average_last_predict]) + "\t" + str(np.mean(roc_average_last_predict)) + "," + str(
    #     np.std(roc_average_last_predict)) + "\t" + str(np.mean(pr_average_last_predict)) + "," + str(
    #     np.std(pr_average_last_predict)) + "\n")
    #

    print("roc: \n")
    print(roc_average_last_predict)
    fp.write("average_last_predict_results: \t" + "\t" + str(np.mean(roc_average_last_predict)) + ","  + "\t" + str(np.mean(pr_average_last_predict)) + "," +str(np.mean(accuracy_average_last_predict))+","  + "\t" +
             str(np.mean(sensitivity_average_last_predict)) +","  + "\t" +str(np.mean(specificity_average_last_predict)) +","  + "\t"  +str(np.mean(f1_score_average_last_predict)) +","  + "\t"  +str(np.mean(mcc_average_last_predict)) 
         + "\n")
    fp.close()
Example #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-input',
        dest='inputfile',
        type=str,
        help='Protein sequences to be predicted in fasta format.',
        required=True)
    parser.add_argument('-output',
                        dest='outputfile',
                        type=str,
                        help='prefix of the prediction results.',
                        required=True)
    parser.add_argument(
        '-model-prefix',
        dest='modelprefix',
        type=str,
        help=
        'prefix of custom model used for prediciton. If donnot have one, please run train_general.py to train a custom general PTM model or run train_kinase.py to train a custom kinase-specific PTM model.',
        required=False,
        default=None)
    parser.add_argument(
        '-residue-types',
        dest='residues',
        type=str,
        help=
        'Residue types that to be predicted, only used when -predict-type is \'general\'. For multiple residues, seperate each with \',\'',
        required=False,
        default="S,T,Y")

    args = parser.parse_args()

    inputfile = args.inputfile
    outputfile = args.outputfile
    residues = args.residues.split(",")
    modelprefix = args.modelprefix

    if modelprefix is None:
        print "Please specify the prefix for an existing custom model by -model-prefix!\n\
       It indicates two files [-model-prefix]_HDF5model and [-model-prefix]_parameters.\n \
       If you don't have such files, please run train_models.py to get the custom model first!\n"

        exit()
    else:  #custom prediction
        model = modelprefix + str("_HDF5model")
        parameter = modelprefix + str("_parameters")
        try:
            f = open(parameter, 'r')
        except IOError:
            print 'cannot open ' + parameter + " ! check if the model exists. please run train_general.py or train_kinase.py to get the custom model first!\n"
        else:
            f = open(parameter, 'r')
            parameters = f.read()
            f.close()
        from DProcess import convertRawToXY
        from EXtractfragment_sort import extractFragforPredict
        from capsulenet import Capsnet_main
        nclass = int(parameters.split("\t")[0])
        window = int(parameters.split("\t")[1])
        residues = parameters.split("\t")[2]
        residues = residues.split(",")
        codemode = int(parameters.split("\t")[4])
        modeltype = str(parameters.split("\t")[5])
        nb_classes = int(parameters.split("\t")[6])
        #print "nclass="+str(nclass)+"codemode="+str(codemode)+"\n";
        testfrag, ids, poses, focuses = extractFragforPredict(inputfile,
                                                              window,
                                                              '-',
                                                              focus=residues)

        testX, testY = convertRawToXY(testfrag.as_matrix(),
                                      codingMode=codemode)
        predictproba = np.zeros((testX.shape[0], 2))
        models = Capsnet_main(testX,
                              testY,
                              nb_epoch=1,
                              compiletimes=0,
                              lr=0.001,
                              batch_size=500,
                              lam_recon=0,
                              routings=3,
                              modeltype=modeltype,
                              nb_classes=nb_classes,
                              predict=True)  # only to get config

        nclass_ini = 1
        for bt in range(nclass):
            models[0].load_weights(model + "_class" + str(bt))
            predictproba += models[1].predict(testX)[0]

        predictproba = predictproba / (nclass * nclass_ini)
        poses = poses + 1
        results = np.column_stack((ids, poses, focuses, predictproba[:, 1]))
        result = pd.DataFrame(results)
        result.to_csv(outputfile + ".txt",
                      index=False,
                      header=None,
                      sep='\t',
                      quoting=csv.QUOTE_NONNUMERIC)
        print "Successfully predicted from custom models !\n"
def bootStrapping_allneg_continue_keras2(trainfile,
                                         valfile=None,
                                         srate=0.8,
                                         nb_epoch1=3,
                                         nb_epoch2=30,
                                         earlystop=None,
                                         maxneg=None,
                                         model=0,
                                         codingMode=0,
                                         lam_recon=0,
                                         inputweights=None,
                                         outputweights=None,
                                         nb_classes=2):
    trainX = trainfile
    train_pos = trainX[np.where(trainX[:, 0] != 0)]
    train_neg = trainX[np.where(trainX[:, 0] == 0)]
    train_pos = pd.DataFrame(train_pos)
    train_neg = pd.DataFrame(train_neg)
    train_pos_s = train_pos.sample(train_pos.shape[0])
    #shuffle train pos
    train_neg_s = train_neg.sample(train_neg.shape[0])
    #shuffle train neg
    slength = int(train_pos.shape[0] * srate)
    nclass = int(train_neg.shape[0] / slength)
    if (valfile is not None):  # use all data in valfile as val
        valX = valfile
        val_pos = valX[np.where(valX[:, 0] != 0)]
        val_neg = valX[np.where(valX[:, 0] == 0)]
        val_pos = pd.DataFrame(val_pos)
        val_neg = pd.DataFrame(val_neg)
        val_all = pd.concat([val_pos, val_neg])
        valX1, valY1 = convertRawToXY(val_all.as_matrix(),
                                      codingMode=codingMode)
    else:  #selct 0.1 samples of training data as val
        a = int(train_pos.shape[0] * 0.9)
        b = train_neg.shape[0] - int(train_pos.shape[0] * 0.1)
        print "train pos=" + str(train_pos.shape[0]) + str('\n')
        print "train neg=" + str(train_neg.shape[0]) + str('\n')
        print " a=" + str(a) + " b=" + str(b) + str('\n')
        train_pos_s = train_pos[0:a]
        train_neg_s = train_neg[0:b]
        print "train pos s=" + str(train_pos_s.shape[0]) + str('\n')
        print "train neg s=" + str(train_neg_s.shape[0]) + str('\n')

        val_pos = train_pos[(a + 1):]
        print "val_pos=" + str(val_pos.shape[0]) + str('\n')
        val_neg = train_neg[b + 1:]
        print "val_neg=" + str(val_neg.shape[0]) + str('\n')

        val_all = pd.concat([val_pos, val_neg])
        valX1, valY1 = convertRawToXY(val_all.as_matrix(),
                                      codingMode=codingMode)
        slength = int(train_pos_s.shape[0] * srate)
        #update slength
        nclass = int(train_neg_s.shape[0] / slength)

    if (maxneg is not None):
        nclass = min(maxneg, nclass)
        #cannot do more than maxneg times

    #modelweights=None;
    for I in range(nb_epoch1):
        train_neg_s = train_neg_s.sample(train_neg_s.shape[0])
        #shuffle neg sample
        train_pos_ss = train_pos_s.sample(slength)
        for t in range(nclass):
            train_neg_ss = train_neg_s[(slength * t):(slength * t + slength)]
            train_all = pd.concat([train_pos_ss, train_neg_ss])
            trainX1, trainY1 = convertRawToXY(train_all.as_matrix(),
                                              codingMode=codingMode)
            if t == 0:
                models, eval_model, manipulate_model, weight_c_model, fitHistory = Capsnet_main(
                    trainX=trainX1,
                    trainY=trainY1,
                    valX=valX1,
                    valY=valY1,
                    nb_classes=nb_classes,
                    nb_epoch=nb_epoch2,
                    earlystop=earlystop,
                    weights=inputweights,
                    compiletimes=t,
                    lr=0.001,
                    batch_size=500,
                    lam_recon=lam_recon,
                    routings=3,
                    class_weight=None,
                    modeltype=model)
            else:
                models, eval_model, manipulate_model, weight_c_model, fitHistory = Capsnet_main(
                    trainX=trainX1,
                    trainY=trainY1,
                    valX=valX1,
                    valY=valY1,
                    nb_classes=nb_classes,
                    nb_epoch=nb_epoch2,
                    earlystop=earlystop,
                    weights=inputweights,
                    compiletimes=t,
                    compilemodels=(models, eval_model, manipulate_model,
                                   weight_c_model),
                    lr=0.001,
                    batch_size=500,
                    lam_recon=lam_recon,
                    routings=3,
                    class_weight=None,
                    modeltype=model)

            print "modelweights assigned for " + str(I) + " and " + str(
                t) + "\n"
            if (outputweights is not None):
                models.save_weights(outputweights, overwrite=True)

    return models, eval_model, manipulate_model, weight_c_model, fitHistory
def bootStrapping_allneg_continue_keras2(trainfile,valfile=None,srate=0.8,
                                         nb_epoch1=3,nb_epoch2=30,earlystop=None,
                                         maxneg=None,model=0,codingMode=0,lam_recon=0,
                                         inputweights=None,outputweights=None,nb_classes=2,
                                         hw_res=None,hc_res=None,hc_res2=None): #inputfile:fragments (n*34);srate:selection rate for positive data;nclass:number of class models
  train_pos={} #0 S/T positive;1Y positive
  train_neg={} #0 S/T negative;1Y negative
  train_pos_s={}
  train_neg_s={}
  train_pos_ss={}
  train_neg_ss={}
  slength={}
  nclass={}
  trainX = trainfile
  for i in range(len(trainX)):
      trainX[i,0]=int(trainX[i,0])


  for i in range(2):
      train_pos[i]=trainX[np.where(trainX[:,0]==i)] #sp/tp 0 yp 1 sn/tn 2 yn 3
      train_neg[i]=trainX[np.where(trainX[:,0]==i+2)]
      train_pos[i]=pd.DataFrame(train_pos[i])
      train_neg[i]=pd.DataFrame(train_neg[i])
      train_pos_s[i]=train_pos[i].sample(train_pos[i].shape[0]); #shuffle train pos
      train_neg_s[i]=train_neg[i].sample(train_neg[i].shape[0]); #shuffle train neg
      slength[i]=int(train_pos[i].shape[0]*srate);
      nclass[i]=int(train_neg[i].shape[0]/slength[i]);

  if(valfile is not None): # use all data as val
     valX = valfile
     for i in range(len(valX)):
         valX[i,0]=int(valX[i,0])

     val_all=pd.DataFrame();
     for i in range(2):
         val_pos=valX[np.where(valX[:,0]==i)]
         val_neg=valX[np.where(valX[:,0]==i+2)]
         val_pos=pd.DataFrame(val_pos)
         val_neg=pd.DataFrame(val_neg)
         val_all=pd.concat([val_all,val_pos,val_neg])

     valX1,valY1 = convertRawToXY(val_all.as_matrix(),codingMode=codingMode) #(355340,1,33,21) after extract same size as positive (48050,1,33,21)
  else:
        val_all=pd.DataFrame()
        nclass={}
        for i in range(2):
            a=int(train_pos[i].shape[0]*0.9);
            b=train_neg[i].shape[0]-int(train_pos[i].shape[0]*0.1);
            print "train pos="+str(train_pos[i].shape[0])+str('\n');
            print "train neg="+str(train_neg[i].shape[0])+str('\n');
            print " a="+str(a)+" b="+str(b)+str('\n');
            train_pos_s[i]=train_pos[i][0:a]
            train_neg_s[i]=train_neg[i][0:b];
            print "train pos s="+str(train_pos_s[i].shape[0])+str('\n');
            print "train neg s="+str(train_neg_s[i].shape[0])+str('\n');

            val_pos=train_pos[i][(a+1):];
            print "val_pos="+str(val_pos.shape[0])+str('\n');
            val_neg=train_neg[i][b+1:];
            print "val_neg="+str(val_neg.shape[0])+str('\n');
            val_all=pd.concat([val_all,val_pos,val_neg])

            slength[i]=int(train_pos_s[i].shape[0]*srate); #transfer 0.1 to val so update slength
            nclass[i]=int(train_neg_s[i].shape[0]/slength[i])

        valX1,valY1 = convertRawToXY(val_all.as_matrix(),codingMode=codingMode)

  if(maxneg is not None):
       nclass_n=min(max([nclass[0],nclass[1]]),maxneg)

  #modelweights=None;
  for I in range(nb_epoch1):
    for i in range(2):
        train_neg_s[i]=train_neg_s[i].sample(train_neg_s[i].shape[0]); #shuffle neg sample
        train_pos_ss[i]=train_pos_s[i].sample(slength[i])

    for t in range(nclass_n):
        train_all=pd.DataFrame()
        for i in range(2):
            train_neg_ss[i]=train_neg_s[i][(slength[i]*t%nclass[i]):(slength[i]*t%nclass[i]+slength[i])];
            train_all=pd.concat([train_all,train_pos_ss[i],train_neg_ss[i]])

        classweights=None
        if(hc_res2 is not None): #negative has weight! hc_res2 is [0,2] for T
             classweights = { k:1.0 for k in range(nb_classes)} #stp 0 yp 1 stn 2 yn 3
             classweights[hc_res2[0]]=float(sum(train_all.as_matrix()[:,0]<=1))/sum(train_all.as_matrix()[:,0]==hc_res2[0])
             classweights[hc_res2[1]]=float(sum(train_all.as_matrix()[:,0]<=1))/sum(train_all.as_matrix()[:,0]==hc_res2[1])

        trainX1,trainY1 = convertRawToXY(train_all.as_matrix(),codingMode=codingMode) #(355340,1,33,21) after extract same size as positive (48050,1,33,21)
        if t==0:
            models,eval_model,manipulate_model,weight_c_model,fitHistory=Capsnet_main(trainX=trainX1,trainY=trainY1,valX=valX1,valY=valY1,nb_classes=nb_classes,nb_epoch=nb_epoch2,earlystop=earlystop,weights=inputweights,compiletimes=t,lr=0.001,batch_size=1000,lam_recon=lam_recon,routings=3,class_weight=classweights,modeltype=model)
        else:
            models,eval_model,manipulate_model,weight_c_model,fitHistory=Capsnet_main(trainX=trainX1,trainY=trainY1,valX=valX1,valY=valY1,nb_classes=nb_classes,nb_epoch=nb_epoch2,earlystop=earlystop,weights=inputweights,compiletimes=t,compilemodels=(models,eval_models,manipulate_models,weight_c_models),lr=0.001,batch_size=1000,lam_recon=lam_recon,routings=3,class_weight=classweights,modeltype=model)
        #modelweights=models.get_weights()

        print "modelweights assigned for "+str(I)+" and "+str(t)+"\n";
        if(outputweights is not None):
            models.save_weights(outputweights+ '_iteration'+str(t),overwrite=True)
        #print "learning rate="+str(models.optimizer.lr.get_value())+"\n";


  return models,eval_model,manipulate_model,weight_c_model,fitHistory