def wnngip_cv_eval(method, dataset,output_dir, cv_data, X, D, T, cvs, para):   
    max_metric, metric_opt, optArg  = 0, [], []
    for x in np.arange(0.1, 1.1, 0.1):
        for y in np.arange(0.0, 1.1, 0.1):
            tic = time.clock()
            model = WNNGIP(T=x, sigma=1, alpha=y)
            cmd = "Dataset:"+dataset+" CVS: "+str(cvs)+"\n"+str(model)
            print cmd
            aupr_vec, auc_vec, ndcg_vec , ndcg_inv_vec, results = train(model, cv_data, X, D, T)                        
            aupr_avg, aupr_conf = mean_confidence_interval(aupr_vec)
            auc_avg, auc_conf = mean_confidence_interval(auc_vec)
            ndcg_avg, ndcg_conf = mean_confidence_interval(ndcg_vec)
            ndcg_inv_avg, ndcg_inv_conf = mean_confidence_interval(ndcg_inv_vec)
            with open(os.path.join(output_dir,"optPar", "proc_"+dataset+"_"+str(cvs)+"_"+method+".txt"), "a") as procFile:
                procFile.write(str(model)+": ")
                procFile.write("auc:%.6f, aupr: %.6f,ndcg: %.6f,ndcg_inv: %.6f, Time:%.6f\n" % (auc_avg, aupr_avg, ndcg_avg, ndcg_inv_avg, time.clock()-tic))

            print "auc:%.6f, aupr: %.6f,ndcg: %.6f,ndcg_inv: %.6f, Time:%.6f\n" % (auc_avg, aupr_avg, ndcg_avg, ndcg_inv_avg, time.clock()-tic)
            metric = ndcg_inv_avg + ndcg_avg
            if metric > max_metric:
                max_metric = metric
                metric_opt= [cmd, auc_avg, aupr_avg, ndcg_avg, ndcg_inv_avg ]
                optArg = {"x":x, "y":y}   
                #each time a better solution is found, the params are stored
                with open(os.path.join(output_dir,"optPar", "res_"+dataset+"_"+str(cvs)+"_"+method+".txt"), "w") as resFile:
                    resFile.write(str(optArg)+"\n"+str(metric_opt))
    
    cmd = "Optimal parameter setting:\n%s\n" % metric_opt[0]
    cmd += "auc: %.6f, aupr: %.6f, ndcg:%.6f, ndcg_inv:%.6f\n" % (metric_opt[1], metric_opt[2], metric_opt[3], metric_opt[4])
    print cmd
Esempio n. 2
0
def wnngip_cv_eval(method, dataset, cv_data, X, D, T, cvs, para):
    max_auc, auc_opt = 0, []
    for x in np.arange(0.1, 1.0, 0.1, dtype='float'):
        for y in np.arange(0.0, 1.1, 0.1, dtype='float'):
            for z in np.arange(0.1, 1.1, 0.1, dtype='float'):
                for k in np.arange(0.1, 1.1, 0.1, dtype='float'):
                    tic = time.clock()
                    model = WNNGIP(T=x, sigma=z, alpha=y, gamma=k)
                    cmd = "Dataset:" + dataset + " CVS: " + str(
                        cvs) + "\n" + str(model)
                    print(cmd)
                    aupr_vec, auc_vec, acc_vec, sen_vec, spec_vec = train(
                        model, cv_data, X, D, T)
                    aupr_avg, aupr_conf = mean_confidence_interval(aupr_vec)
                    auc_avg, auc_conf = mean_confidence_interval(auc_vec)
                    acc_avg, acc_st = mean_confidence_interval(acc_vec)
                    sen_avg, sen_st = mean_confidence_interval(sen_vec)
                    spec_avg, spec_st = mean_confidence_interval(spec_vec)
                    # print("AUPR: %s, AUC:%s, ACC:%s, SEN:%s, Spec:%s, Time:%s" % (aupr_avg, auc_avg, acc_avg, sen_avg, spec_avg, time.clock() - tic))
                    if auc_avg > max_auc:
                        max_auc = auc_avg
                        auc_opt = [
                            cmd, auc_avg, aupr_avg, acc_avg, sen_avg, spec_avg
                        ]
    cmd = "Optimal parameter setting:\n%s\n" % auc_opt[0]
    cmd += "auc: %.6f, aupr: %.6f, acc:%.6f, sen:%.6f, spec:%.6f\n" % (
        auc_opt[1], auc_opt[2], auc_opt[3], auc_opt[4], auc_opt[5])
    print(cmd)
Esempio n. 3
0
def wnngip_cv_eval(method, dataset, cv_data, X, D, T, cvs, para, logger):
    start = time.time()
    max_auc, auc_opt = 0, []
    for x in np.arange(0.1, 1.1, 0.1):
        for y in np.arange(0.0, 1.1, 0.1):
            tic = time.time()
            model = WNNGIP(T=x, sigma=1, alpha=y)
            cmd = "Dataset:" + dataset + " CVS: " + str(cvs) + "\n" + str(
                model)
            logger.info(cmd)
            aupr_vec, auc_vec = train(model, cv_data, X, D, T)
            aupr_avg, aupr_conf = mean_confidence_interval(aupr_vec)
            auc_avg, auc_conf = mean_confidence_interval(auc_vec)
            logger.info(
                "auc:%.6f, aupr: %.6f, auc_conf:%.6f, aupr_conf:%.6f, Time:%.6f\n"
                % (auc_avg, aupr_avg, auc_conf, aupr_conf, time.time() - tic))
            if auc_avg > max_auc:
                max_auc = auc_avg
                auc_opt = [cmd, auc_avg, aupr_avg, auc_conf, aupr_conf]
    end = time.time()
    cmd = "Optimal parameter setting:\n%s\n" % auc_opt[0]
    cmd += "auc: %.6f, aupr: %.6f, auc_conf:%.6f, aupr_conf:%.6f, time:%.6f\n" % (
        auc_opt[1], auc_opt[2], auc_opt[3], auc_opt[4], end - start)

    logger.info('')
    logger.info(cmd)
Esempio n. 4
0
def get_model(method, para, par, dataset):
    if method == 'wnngip':
        model = WNNGIP(T=par[0], sigma=1, alpha=par[1])
    elif method == 'gip':
        model = GIP(T=par[0], sigma=1, alpha=par[1])
    elif method == 'nngip':
        model = NNGIP(T=par[0], sigma=1, alpha=par[1], NN=par[2])
    elif method == 'nnwnngip':
        model = NNWNNGIP(T=par[0], sigma=1, alpha=par[1], NN=par[2])
    elif method == 'nrlmf':
        model = NRLMF(cfix=para['c'],
                      K1=para['K1'],
                      K2=para['K2'],
                      num_factors=par[0],
                      lambda_d=2**(par[1]),
                      lambda_t=2**(par[1]),
                      alpha=2**(par[2]),
                      beta=2**(par[3]),
                      theta=2**(par[4]),
                      max_iter=100)
    elif method == 'nnkronsvm':
        model = NNKronSVM(C=par[0],
                          NbNeg=para['NbNeg'],
                          NegNei=par[2],
                          PosNei=par[1],
                          dataset=dataset,
                          n_proc=1)
    elif method == 'nnkronsvmgip':
        model = NNKronSVMGIP(C=par[0],
                             NbNeg=para['NbNeg'],
                             NegNei=par[2],
                             PosNei=par[1],
                             dataset=dataset,
                             n_proc=1)
    elif method == 'nnkronwnnsvmgip':
        model = NNKronWNNSVMGIP(C=par[0],
                                t=par[3],
                                NbNeg=para['NbNeg'],
                                NegNei=par[2],
                                PosNei=par[1],
                                dataset=dataset,
                                n_proc=1)
    elif method == 'nnkronwnnsvm':
        model = NNKronWNNSVM(C=par[0],
                             t=par[3],
                             NbNeg=para['NbNeg'],
                             NegNei=par[2],
                             PosNei=par[1],
                             dataset=dataset,
                             n_proc=1)
    return model
Esempio n. 5
0
def wnngip_cv_eval(method, dataset, output_dir, cv_data, X, D, T, cvs, para):
    max_metric, metric_opt, optArg = 0, [], []
    for x in np.arange(0.1, 1.1, 0.1):
        for y in np.arange(0.0, 1.1, 0.1):
            tic = time.clock()
            model = WNNGIP(T=x, sigma=1, alpha=y, hyperParamLearn=True)

            aupr_vec, auc_vec, ndcg_vec, ndcg_inv_vec, results = train(
                model, cv_data, X, D, T, hyperParamLearn=True)
            aupr_avg, aupr_conf = mean_confidence_interval(aupr_vec)
            auc_avg, auc_conf = mean_confidence_interval(auc_vec)

            metric = aupr_avg + auc_avg
            if metric > max_metric:
                max_metric = metric
                metric_opt = [auc_avg, aupr_avg]
                optArg = {"x": x, "y": y}

    cmd = "Optimal parameter setting:\n"
    cmd += "x:%.6f, y:%.6f, auc: %.6f, aupr: %.6f\n" % (
        optArg["x"], optArg["y"], metric_opt[0], metric_opt[1])
    print cmd
    return optArg
def thear(method, dataset, data_dir, output_dir, cvs, sp_arg, model_settings,
          predict_num, seeds, seedsOptPar, args):
    intMat, drugMat, targetMat = load_data_from_file(
        dataset, os.path.join(data_dir, 'datasets'))
    drug_names, target_names = get_drugs_targets_names(
        dataset, os.path.join(data_dir, 'datasets'))

    invert = 0
    if (method == 'inv_brdti'):
        invert = 1
    if predict_num == 0:
        if cvs == 1:  # CV setting CVS1
            X, D, T, cv = intMat, drugMat, targetMat, 1
        if cvs == 2:  # CV setting CVS2
            X, D, T, cv = intMat, drugMat, targetMat, 0
        if cvs == 3:  # CV setting CVS3
            X, D, T, cv, invert = intMat.T, targetMat, drugMat, 0, 1
        if cvs == 4:
            X, D, T, cv = intMat, drugMat, targetMat, 2
        cv_data = cross_validation(X, seeds, cv, invert, num=10)

    if invert:
        X, D, T = intMat, drugMat, targetMat

        #cv_data_optimize_params = cross_validation(X, seedsOptPar, cv, invert, num=5)

    if sp_arg == 0 and predict_num == 0:
        if (method == "vbmklmf"):
            cv_eval.vbmklmf_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                    args)
        if (method == "ensambledti"):
            cv_eval.vbmklmf_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                    args)
        if method == 'netcbp':
            cv_eval.netcbp_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                   args)
        # if method == 'ndaf':
        #     cv_eval.ndaf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if method == 'grmf':
            cv_eval.grmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if method == 'pudt':
            cv_eval.pudt_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if method == 'daspfind':
            cv_eval.daspfind_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                     args)
        if method == 'dnilmf':
            cv_eval.dnilmf_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                   args)
        if method == 'dthybrid':
            cv_eval.dthybrid_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                     args)
        if method == 'kronrlsmkl':
            cv_eval.kronrismkl_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                       args)
        if (method == 'brdti'):
            cv_eval.brdti_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if (method == 'ddr'):
            cv_eval.ddr_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if (method == 'brdti'):
            cv_eval.brdti_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if (method == 'inv_brdti'):
            cv_eval.brdti_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if method == 'nrlmf':
            cv_eval.nrlmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if method == 'netlaprls':
            cv_eval.netlaprls_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                      args)
        if method == 'blmnii':
            cv_eval.blmnii_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                   args)
        if method == 'wnngip':
            cv_eval.wnngip_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                   args)
        if method == 'kbmf':
            cv_eval.kbmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if method == 'cmf':
            cv_eval.cmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)

    if sp_arg == 1 or predict_num > 0:
        tic = time.clock()
        if (method == "netcbp"):
            model = NetCBP()
        # if (method=="ndaf"):
        #     model = NDAF()
        if (method == "grmf"):
            model = GRMF(cv=cvs)
        if (method == "pudt"):
            model = PUDT(dataset=dataset)
        if (method == "vbmklmf"):
            model = VBMKLMF(dataset=dataset, cvs=cvs)
        if (method == 'dnilmf'):
            model = DNILMF(dataset=dataset)
        if (method == 'kronrlsmkl'):
            model = KronRLsMKL(dataset=dataset)
        if (method == 'dthybrid'):
            model = DTHYBRID(dataset=dataset)
        if (method == 'daspfind'):
            model = DASPFIND(alpha=args['alpha'])
        if (method == 'brdti') | (method == 'inv_brdti'):
            #model = BRDTI(D=args['D'],learning_rate= args['learning_rate'],max_iters=args['max_iters'],simple_predict=args['simple_predict'],bias_regularization=args['bias_regularization'],global_regularization=args['global_regularization'],cbSim=args['cbSim'],cb_alignment_regularization_user=args['cb_alignment_regularization_user'],cb_alignment_regularization_item=args['cb_alignment_regularization_item'])
            model = BRDTI(args)
        if method == 'nrlmf':
            model = NRLMF(cfix=args['c'],
                          K1=args['K1'],
                          K2=args['K2'],
                          num_factors=args['r'],
                          lambda_d=args['lambda_d'],
                          lambda_t=args['lambda_t'],
                          alpha=args['alpha'],
                          beta=args['beta'],
                          theta=args['theta'],
                          max_iter=args['max_iter'])
        if method == 'ddr':
            model = DDR(dataset=dataset, cv=cvs)
        if method == 'netlaprls':
            model = NetLapRLS(gamma_d=args['gamma_d'],
                              gamma_t=args['gamma_t'],
                              beta_d=args['beta_t'],
                              beta_t=args['beta_t'])
        if method == 'blmnii':
            model = BLMNII(alpha=args['alpha'],
                           gamma=args['gamma'],
                           sigma=args['sigma'],
                           avg=args['avg'])
        if method == 'wnngip':
            model = WNNGIP(T=args['T'],
                           sigma=args['sigma'],
                           alpha=args['alpha'])
        if method == 'kbmf':
            model = KBMF(num_factors=args['R'])
        if method == 'cmf':
            model = CMF(K=args['K'],
                        lambda_l=args['lambda_l'],
                        lambda_d=args['lambda_d'],
                        lambda_t=args['lambda_t'],
                        max_iter=args['max_iter'])
        if (method == 'ensambledti'):
            model = EnsambleDTI(args=args, dataset=dataset)
        cmd = str(model)
        if predict_num == 0:
            print("Dataset:" + dataset + " CVS:" + str(cvs) + "\n" + cmd)
            aupr_vec, auc_vec = train(model, cv_data, X, D, T, cvs, dataset)
            aupr_avg, aupr_conf = mean_confidence_interval(aupr_vec)
            auc_avg, auc_conf = mean_confidence_interval(auc_vec)
            print(
                "auc:%.6f, aupr: %.6f, auc_conf:%.6f, aupr_conf:%.6f, Time:%.6f"
                % (auc_avg, aupr_avg, auc_conf, aupr_conf, time.clock() - tic))
            write_metric_vector_to_file(
                auc_vec,
                os.path.join(
                    output_dir,
                    method + "_auc_cvs" + str(cvs) + "_" + dataset + ".txt"))
            write_metric_vector_to_file(
                aupr_vec,
                os.path.join(
                    output_dir,
                    method + "_aupr_cvs" + str(cvs) + "_" + dataset + ".txt"))
        elif predict_num > 0:
            print("Dataset:" + dataset + "\n" + cmd)
            seed = 7771 if method == 'cmf' else 22
            model.fix_model(intMat, intMat, drugMat, targetMat, seed)
            x, y = np.where(intMat == 0)
            scores = model.predict_scores(zip(x, y), 5)
            ii = np.argsort(scores)[::-1]
            predict_pairs = [(drug_names[x[i]], target_names[y[i]], scores[i])
                             for i in ii[:predict_num]]
            new_dti_file = os.path.join(
                output_dir, "_".join([method, dataset, "new_dti.txt"]))
            novel_prediction_analysis(predict_pairs, new_dti_file,
                                      os.path.join(data_dir, 'biodb'))
Esempio n. 7
0
def main(argv):
    try:
        opts, args = getopt.getopt(argv, "m:d:f:c:s:o:n:p", ["method=", "dataset=", "data-dir=", "cvs=", "specify-arg=", "method-options=", "predict-num=", "output-dir=", ])
    except getopt.GetoptError:
        sys.exit()

    data_dir = 'data'
    output_dir = 'output'
    cvs, sp_arg, model_settings, predict_num = 1, 1, [], 0

    seeds = [7771, 8367, 22, 1812, 4659]#
    seedsOptPar = [156]
    # seeds = np.random.choice(10000, 5, replace=False)
    for opt, arg in opts:
        if opt == "--method":
            method = arg
        if opt == "--dataset":
            dataset = arg
        if opt == "--data-dir":
            data_dir = arg
        if opt == "--output-dir":
            output_dir = arg
        if opt == "--cvs":
            cvs = int(arg)
        if opt == "--specify-arg":
            sp_arg = int(arg)
        if opt == "--method-options":
            model_settings = [s.split('=') for s in str(arg).split()]
        if opt == "--predict-num":
            predict_num = int(arg)
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    
    if not os.path.isdir(os.path.join(output_dir,"optPar")):
        os.makedirs(os.path.join(output_dir,"optPar"))    
        
    if method == 'aladin':
        args = {'k': 3, 'seedList': [1], 'featureSetSize': -1, 'model': "ECKNN", 'avg': True, 'hpLearning': 0, 'useKNN': 0}
    if method == 'netlaprls':
        args = {'gamma_d': 10, 'gamma_t': 10, 'beta_d': 1e-5, 'beta_t': 1e-5}
    if method == 'blmnii':
        args = {'alpha': 0.7, 'gamma': 1.0, 'sigma': 1.0, 'avg': False}
    if method == 'wnngip':
        args = {'T': 0.8, 'sigma': 1.0, 'alpha': 0.8}
      
    #print(model_settings)    
    for key, val in model_settings:
        args[key] = float(eval(val))

    intMat, drugMat, targetMat = load_data_from_file(dataset, os.path.join(data_dir, 'datasets'))
    drug_names, target_names = get_drugs_targets_names(dataset, os.path.join(data_dir, 'datasets'))
    global global_dataset
    global_dataset = dataset
    invert = 0    

    if predict_num == 0:
        if cvs == 1:  # CV setting 1 (predicting DTIs)
            X, D, T, cv = intMat, drugMat, targetMat, 1             
                
        if cvs == 2:  # CV setting 2 (novel drugs)
            X, D, T, cv = intMat, drugMat, targetMat, 0
                
        if cvs == 3:  # CV setting 3 (novel targets)
            X, D, T, cv = intMat.T, targetMat, drugMat, 0 
        

            
        cv_data = cross_validation(X, seeds, cv, invert, num=5)

    if sp_arg == 1 or predict_num > 0:
        tic = time.clock()     
        if method == 'aladin':
            model = ALADIN(k=args['k'], seedList=args['seedList'], featureSetSize=args['featureSetSize'], model=args['model'], avg=args['avg'], hpLearning=args['hpLearning'], useKNN=args['useKNN'])        
        if method == 'netlaprls':
            model = NetLapRLS(gamma_d=args['gamma_d'], gamma_t=args['gamma_t'], beta_d=args['beta_t'], beta_t=args['beta_t'])
        if method == 'blmnii':
            model = BLMNII(alpha=args['alpha'], gamma=args['gamma'], sigma=args['sigma'], avg=args['avg'])
        if method == 'wnngip':
            model = WNNGIP(T=args['T'], sigma=args['sigma'], alpha=args['alpha'])        
        cmd = str(model)
        
        #predict hidden part of the current datasets
        if predict_num == 0:
            print "Dataset:"+dataset+" CVS:"+str(cvs)+"\n"+cmd
            name_extension = ""
            if method == 'aladin':            
              if args['hpLearning'] > 0:
                name_extension = "_ens"
              elif args['k'] == 3:
                name_extension = "_k3"
              elif args['k'] == 5:
                name_extension = "_k5"  
              elif args['model'] == 5:
                name_extension = "_k5"    
              
              if args["useKNN"] == 1:
                name_extension = name_extension + "KNN"

            if dataset == "kinase":
                X[X == 0] = -1      
            aupr_vec, auc_vec, ndcg_vec, ndcg_inv_vec, results = train(model, cv_data, X, D, T)
            aupr_avg, aupr_conf = mean_confidence_interval(aupr_vec)
            auc_avg, auc_conf = mean_confidence_interval(auc_vec)
            ndcg_avg, ndcg_conf = mean_confidence_interval(ndcg_vec)
            ndcg_inv_avg, ndcg_inv_conf = mean_confidence_interval(ndcg_inv_vec)
            
            resfile = os.path.join('output','rawResults', method+name_extension+"_res_"+str(cvs)+"_"+dataset+".csv")
            outd = open(resfile, "w")
            outd.write(('drug;target;true;predict\n'))
            
            for r in results:
                outd.write('%s;%s;%s;%s\n' % (r[0],r[1],r[2],r[3]) )
            
            print "auc:%.6f, aupr: %.6f, ndcg: %.6f, ndcg_inv: %.6f, auc_conf:%.6f, aupr_conf:%.6f, ndcg_conf:%.6f, ndcg_inv_conf:%.6f, Time:%.6f" % (auc_avg, aupr_avg, ndcg_avg, ndcg_inv_avg, auc_conf, aupr_conf, ndcg_conf, ndcg_inv_conf, time.clock()-tic)
            write_metric_vector_to_file(auc_vec, os.path.join(output_dir, method+name_extension+"_auc_cvs"+str(cvs)+"_"+dataset+".txt"))
            write_metric_vector_to_file(aupr_vec, os.path.join(output_dir, method+name_extension+"_aupr_cvs"+str(cvs)+"_"+dataset+".txt"))            
            #write_metric_vector_to_file(ndcg_vec, os.path.join(output_dir, method+"_k357_ndcg_cvs"+str(cvs)+"_"+dataset+".txt"))
            #write_metric_vector_to_file(ndcg_inv_vec, os.path.join(output_dir, method+"_k357_ndcg_inv_cvs"+str(cvs)+"_"+dataset+".txt"))
        
        #predict novel DTIs    
        elif predict_num > 0:
            print "Dataset:"+dataset+"\n"+cmd
            seed = 376
            
            model.fix_model(intMat, intMat, drugMat, targetMat, seed)
            npa = newDTIPrediction()
            x, y = np.where(intMat == 0)
            scores = model.predict_scores(zip(x, y), 1)
            sz = np.array(zip(x,y,scores))
            
            sz_croped = sz[sz[:,2].argsort()[::-1],:]
            sz_croped = sz_croped[xrange(0,predict_num),:]
            
            npa.verify_novel_interactions(method, dataset, sz_croped, predict_num, drug_names, target_names)
Esempio n. 8
0
def main(argv):
    try:
        opts, args = getopt.getopt(argv, m:d:f:c:s:o:n:p, ["method=", "dataset=", "data-dir=", "cvs=", "specify-arg=", "method-options=", "predict-num=", "output-dir=", ])
    except getopt.GetoptError:
        sys.exit()

    data_dir = 'data'
    output_dir = 'output'
    cvs, sp_arg, model_settings, predict_num = 1, 1, [], 0

    seeds = [7771, 8367, 22, 1812, 4659]
    seedsOptPar = [156]
    # seeds = np.random.choice(10000, 5, replace=False)
    for opt, arg in opts:
        if opt == "--method":
            method = arg
        if opt == "--dataset":
            dataset = arg
        if opt == "--data-dir":
            data_dir = arg
        if opt == "--output-dir":
            output_dir = arg
        if opt == "--cvs":
            cvs = int(arg)
        if opt == "--specify-arg":
            sp_arg = int(arg)
        if opt == "--method-options":
            model_settings = [s.split('=') for s in str(arg).split()]
        if opt == "--predict-num":
            predict_num = int(arg)
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    
    if not os.path.isdir(os.path.join(output_dir,"optPar")):
        os.makedirs(os.path.join(output_dir,"optPar"))    
        
    # default parameters for each methods
    if (method == 'brdti') | (method == 'inv_brdti') :
        args = {
            'D':100,
            'learning_rate':0.1,
            'max_iters' : 100,   
            'simple_predict' :False, 
            'bias_regularization':1,                 
            'global_regularization':10**(-2),  
            "cbSim": "knn",
            'cb_alignment_regularization_user' :1,                 
            'cb_alignment_regularization_item' :1}

    if method == 'netlaprls':
        args = {'gamma_d': 10, 'gamma_t': 10, 'beta_d': 1e-5, 'beta_t': 1e-5}
    if method == 'blmnii':
        args = {'alpha': 0.7, 'gamma': 1.0, 'sigma': 1.0, 'avg': False}
    if method == 'wnngip':
        args = {'T': 0.8, 'sigma': 1.0, 'alpha': 0.8}
    if method == 'cmf':
        args = {'K': 100, 'lambda_l': 0.5, 'lambda_d': 0.125, 'lambda_t': 0.125, 'max_iter': 100}
     
    #print(model_settings)    
    for key, val in model_settings:
        args[key] = float(eval(val))

    intMat, drugMat, targetMat = load_data_from_file(dataset, os.path.join(data_dir, 'datasets'))
    drug_names, target_names = get_drugs_targets_names(dataset, os.path.join(data_dir, 'datasets'))
    
    invert = 0    
    if (method == 'inv_brdti')  : 
        invert = 1
        
    if predict_num == 0:
        if cvs == 1:  # CV setting CVS1
            X, D, T, cv = intMat, drugMat, targetMat, 1             
                
        if cvs == 2:  # CV setting CVS2
            X, D, T, cv = intMat, drugMat, targetMat, 0
                
        if cvs == 3:  # CV setting CVS3
            X, D, T, cv = intMat.T, targetMat, drugMat, 0 
        

            
        cv_data = cross_validation(X, seeds, cv, invert)
        cv_data_optimize_params = cross_validation(X, seedsOptPar, cv, invert, num=5)

        
    if sp_arg == 0 and predict_num == 0:
        if (method == 'brdti'):
            cv_eval.brdti_cv_eval(method, dataset,output_dir, cv_data_optimize_params, X, D, T, cvs, args)                             
        if (method == 'inv_brdti'):
            cv_eval.brdti_cv_eval(method, dataset,output_dir, cv_data_optimize_params, X.T, T, D, cvs, args) 
        
        if method == 'netlaprls':
            cv_eval.netlaprls_cv_eval(method, dataset,output_dir, cv_data_optimize_params, X, D, T, cvs, args)
        if method == 'blmnii':
            cv_eval.blmnii_cv_eval(method, dataset,output_dir, cv_data_optimize_params, X, D, T, cvs, args)
        if method == 'wnngip':
            cv_eval.wnngip_cv_eval(method, dataset,output_dir, cv_data_optimize_params, X, D, T, cvs, args)        
        if method == 'cmf':
            cv_eval.cmf_cv_eval(method, dataset,output_dir, cv_data_optimize_params, X, D, T, cvs, args)
    

    if sp_arg == 1 or predict_num > 0:
        tic = time.clock()
        if (method == 'brdti')|(method == 'inv_brdti'):
            model = BRDTI(args)       
        if method == 'netlaprls':
            model = NetLapRLS(gamma_d=args['gamma_d'], gamma_t=args['gamma_t'], beta_d=args['beta_t'], beta_t=args['beta_t'])
        if method == 'blmnii':
            model = BLMNII(alpha=args['alpha'], gamma=args['gamma'], sigma=args['sigma'], avg=args['avg'])
        if method == 'wnngip':
            model = WNNGIP(T=args['T'], sigma=args['sigma'], alpha=args['alpha'])        
        if method == 'cmf':
            model = CMF(K=args['K'], lambda_l=args['lambda_l'], lambda_d=args['lambda_d'], lambda_t=args['lambda_t'], max_iter=args['max_iter'])
        cmd = str(model)
        
        #predict hidden part of the current datasets
        if predict_num == 0:
            print "Dataset:"+dataset+" CVS:"+str(cvs)+"\n"+cmd
            if (method == 'inv_brdti') : 
                aupr_vec, auc_vec, ndcg_inv_vec, ndcg_vec, results = train(model, cv_data, X.T, T, D)
            else:
                aupr_vec, auc_vec, ndcg_vec, ndcg_inv_vec, results = train(model, cv_data, X, D, T)
            aupr_avg, aupr_conf = mean_confidence_interval(aupr_vec)
            auc_avg, auc_conf = mean_confidence_interval(auc_vec)
            ndcg_avg, ndcg_conf = mean_confidence_interval(ndcg_vec)
            ndcg_inv_avg, ndcg_inv_conf = mean_confidence_interval(ndcg_inv_vec)
            
            resfile = os.path.join('output','rawResults', method+"_res_"+str(cvs)+"_"+dataset+".csv")
            outd = open(resfile, "w")
            outd.write(('drug;target;true;predict\n'))
            
            for r in results:
                outd.write('%s;%s;%s;%s\n' % (r[0],r[1],r[2],r[3]) )
            
            print "auc:%.6f, aupr: %.6f, ndcg: %.6f, ndcg_inv: %.6f, auc_conf:%.6f, aupr_conf:%.6f, ndcg_conf:%.6f, ndcg_inv_conf:%.6f, Time:%.6f" % (auc_avg, aupr_avg, ndcg_avg, ndcg_inv_avg, auc_conf, aupr_conf, ndcg_conf, ndcg_inv_conf, time.clock()-tic)
            write_metric_vector_to_file(auc_vec, os.path.join(output_dir, method+"_auc_cvs"+str(cvs)+"_"+dataset+".txt"))
            write_metric_vector_to_file(aupr_vec, os.path.join(output_dir, method+"_aupr_cvs"+str(cvs)+"_"+dataset+".txt"))            
            write_metric_vector_to_file(ndcg_vec, os.path.join(output_dir, method+"_ndcg_cvs"+str(cvs)+"_"+dataset+".txt"))
            write_metric_vector_to_file(ndcg_inv_vec, os.path.join(output_dir, method+"_ndcg_inv_cvs"+str(cvs)+"_"+dataset+".txt"))
        
        #predict novel DTIs    
        elif predict_num > 0:
            print "Dataset:"+dataset+"\n"+cmd
            seed = 376
            if invert: #predicting drugs for targets
                model.fix_model(intMat.T, intMat.T, targetMat, drugMat, seed)
                npa = newDTIPrediction()
                x, y = np.where(intMat == 0)
                scores = model.predict_scores(zip(y, x), 1)
                sz = np.array(zip(x,y,scores))    
                
            else: #predicting targets for drugs
                model.fix_model(intMat, intMat, drugMat, targetMat, seed)
                npa = newDTIPrediction()
                x, y = np.where(intMat == 0)
                scores = model.predict_scores(zip(x, y), 1)
                sz = np.array(zip(x,y,scores))
                
            ndcg_d, ndcg_t, recall_d, recall_t = npa.verify_novel_interactions(method, dataset, sz, predict_num, drug_names, target_names)
            
            st_file= os.path.join('output/newDTI', "_".join([dataset,str(predict_num), "stats.csv"]))
            out = open(st_file, "a")
            out.write(('%s;%f;%f;%f;%f\n' % (method,ndcg_d, ndcg_t, recall_d, recall_t)))
Esempio n. 9
0
def main(argv):
    try:
        opts, args = getopt.getopt(argv, "m:d:f:c:s:o:n:p", [
            "method=",
            "dataset=",
            "data-dir=",
            "cvs=",
            "specify-arg=",
            "method-options=",
            "predict-num=",
            "output-dir=",
        ])
    except getopt.GetoptError:
        sys.exit()

    data_dir = os.path.join(os.path.pardir, 'data')
    output_dir = os.path.join(os.path.pardir, 'output')
    cvs, sp_arg, model_settings, predict_num = 1, 1, [], 0

    seeds = [7771, 8367, 22, 1812, 4659]
    # seeds = np.random.choice(10000, 5, replace=False)
    for opt, arg in opts:
        if opt == "--method":
            method = arg
        if opt == "--dataset":
            dataset = arg
        if opt == "--data-dir":
            data_dir = arg
        if opt == "--output-dir":
            output_dir = arg
        if opt == "--cvs":
            cvs = int(arg)
        if opt == "--specify-arg":
            sp_arg = int(arg)
        if opt == "--method-options":
            model_settings = [s.split('=') for s in str(arg).split()]
        if opt == "--predict-num":
            predict_num = int(arg)
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    # default parameters for each methods
    if method == 'nrlmf':
        args = {
            'c': 5,
            'K1': 5,
            'K2': 5,
            'r': 50,
            'lambda_d': 0.125,
            'lambda_t': 0.125,
            'alpha': 0.25,
            'beta': 0.125,
            'theta': 0.5,
            'max_iter': 100
        }
    if method == 'netlaprls':
        args = {'gamma_d': 10, 'gamma_t': 10, 'beta_d': 1e-5, 'beta_t': 1e-5}
    if method == 'blmnii':
        args = {'alpha': 0.7, 'gamma': 1.0, 'sigma': 1.0, 'avg': False}
    if method == 'wnngip':
        args = {'T': 0.8, 'sigma': 1.0, 'alpha': 0.8}
    if method == 'kbmf':
        args = {'R': 50}
    if method == 'cmf':
        args = {
            'K': 50,
            'lambda_l': 0.5,
            'lambda_d': 0.125,
            'lambda_t': 0.125,
            'max_iter': 30
        }

    for key, val in model_settings:
        args[key] = val

    intMat, drugMat, targetMat = load_data_from_file(
        dataset, os.path.join(data_dir, 'datasets'))
    drug_names, target_names = get_drugs_targets_names(
        dataset, os.path.join(data_dir, 'datasets'))

    if predict_num == 0:
        if cvs == 1:  # CV setting CVS1
            X, D, T, cv = intMat, drugMat, targetMat, 1
        if cvs == 2:  # CV setting CVS2
            X, D, T, cv = intMat, drugMat, targetMat, 0
        if cvs == 3:  # CV setting CVS3
            X, D, T, cv = intMat.T, targetMat, drugMat, 0
        cv_data = cross_validation(X, seeds, cv)

    if sp_arg == 0 and predict_num == 0:
        if method == 'nrlmf':
            cv_eval.nrlmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if method == 'netlaprls':
            cv_eval.netlaprls_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                      args)
        if method == 'blmnii':
            cv_eval.blmnii_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                   args)
        if method == 'wnngip':
            cv_eval.wnngip_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                   args)
        if method == 'kbmf':
            cv_eval.kbmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if method == 'cmf':
            cv_eval.cmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)

    if sp_arg == 1 or predict_num > 0:
        tic = time.clock()
        if method == 'nrlmf':
            model = NRLMF(cfix=args['c'],
                          K1=args['K1'],
                          K2=args['K2'],
                          num_factors=args['r'],
                          lambda_d=args['lambda_d'],
                          lambda_t=args['lambda_t'],
                          alpha=args['alpha'],
                          beta=args['beta'],
                          theta=args['theta'],
                          max_iter=args['max_iter'])
        if method == 'netlaprls':
            model = NetLapRLS(gamma_d=args['gamma_d'],
                              gamma_t=args['gamma_t'],
                              beta_d=args['beta_t'],
                              beta_t=args['beta_t'])
        if method == 'blmnii':
            model = BLMNII(alpha=args['alpha'],
                           gamma=args['gamma'],
                           sigma=args['sigma'],
                           avg=args['avg'])
        if method == 'wnngip':
            model = WNNGIP(T=args['T'],
                           sigma=args['sigma'],
                           alpha=args['alpha'])
        if method == 'kbmf':
            model = KBMF(num_factors=args['R'])
        if method == 'cmf':
            model = CMF(K=args['K'],
                        lambda_l=args['lambda_l'],
                        lambda_d=args['lambda_d'],
                        lambda_t=args['lambda_t'],
                        max_iter=args['max_iter'])
        cmd = str(model)
        if predict_num == 0:
            print "Dataset:" + dataset + " CVS:" + str(cvs) + "\n" + cmd
            aupr_vec, auc_vec = train(model, cv_data, X, D, T)
            aupr_avg, aupr_conf = mean_confidence_interval(aupr_vec)
            auc_avg, auc_conf = mean_confidence_interval(auc_vec)
            print "auc:%.6f, aupr: %.6f, auc_conf:%.6f, aupr_conf:%.6f, Time:%.6f" % (
                auc_avg, aupr_avg, auc_conf, aupr_conf, time.clock() - tic)
            write_metric_vector_to_file(
                auc_vec,
                os.path.join(
                    output_dir,
                    method + "_auc_cvs" + str(cvs) + "_" + dataset + ".txt"))
            write_metric_vector_to_file(
                aupr_vec,
                os.path.join(
                    output_dir,
                    method + "_aupr_cvs" + str(cvs) + "_" + dataset + ".txt"))
        elif predict_num > 0:
            print "Dataset:" + dataset + "\n" + cmd
            seed = 7771 if method == 'cmf' else 22
            model.fix_model(intMat, intMat, drugMat, targetMat, seed)
            x, y = np.where(intMat == 0)
            scores = model.predict_scores(zip(x, y), 5)
            ii = np.argsort(scores)[::-1]
            predict_pairs = [(drug_names[x[i]], target_names[y[i]], scores[i])
                             for i in ii[:predict_num]]
            new_dti_file = os.path.join(
                output_dir, "_".join([method, dataset, "new_dti.txt"]))
            novel_prediction_analysis(predict_pairs, new_dti_file,
                                      os.path.join(data_dir, 'biodb'))
Esempio n. 10
0
def main(argv):

    try:
        opts, args = getopt.getopt(argv, "m:d:f:c:e:s:o:n:p:g:q:r:l:w", [
            "method=", "dataset=", "data-dir=", "cvs=", "external=",
            "specify-arg=", "method-opt=", "predict-num=", "scoring=", "gpmi=",
            "params=", "output-dir=", "log=", "workdir="
        ])
    except getopt.GetoptError:
        sys.exit()


#    data_dir = os.path.join(os.path.pardir, 'data')
#    output_dir = os.path.join(os.path.pardir, 'output')
    method = "nrlmf"
    dataset = "nr"
    data_dir = '.'
    output_dir = '.'
    cvs, sp_arg, model_settings, predict_num = 1, 1, [], 0
    external = 0
    scoring = 'auc'
    gpmi = None
    params = None
    workdir = "./"
    logfile = 'job.log'

    seeds = [7771, 8367, 22, 1812, 4659]
    # seeds = np.random.choice(10000, 5, replace=False)
    for opt, arg in opts:
        if opt == "--method":
            method = arg
        if opt == "--dataset":
            dataset = arg
        if opt == "--data-dir":
            data_dir = arg
        if opt == "--output-dir":
            output_dir = arg
        if opt == "--cvs":
            cvs = int(arg)
        if opt == "--external":
            external = int(arg)
        if opt == "--specify-arg":
            sp_arg = int(arg)
        if opt == "--method-opt":
            model_settings = [s.split('=') for s in str(arg).split()]
        if opt == "--predict-num":
            predict_num = int(arg)
        if opt == "--scoring":
            scoring = str(arg)
        if opt == "--gpmi":
            gpmi = dict()
            for s in str(arg).split():
                key, val = s.split('=')
                gpmi[key] = float(val)
        if opt == "--params":
            params = read_params(str(arg))
        if opt == "--log":
            logfile = str(arg)
        if opt == "--workdir":
            workdir = str(arg)
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    # set logger
    logger = logging.getLogger("logger")
    logger.setLevel(logging.INFO)
    filename = logfile
    fh = logging.FileHandler(workdir + "/" + filename)
    fh.name = filename
    logger.addHandler(fh)

    # default parameters for each methods
    if method == 'nrlmf':
        args = {
            'c': 5,
            'K1': 5,
            'K2': 5,
            'r': 50,
            'lambda_d': 0.125,
            'lambda_t': 0.125,
            'alpha': 0.25,
            'beta': 0.125,
            'theta': 0.5,
            'max_iter': 100
        }
    if method == 'nrlmfb':
        args = {
            'c': 5,
            'K1': 5,
            'K2': 5,
            'r': 50,
            'lambda_d': 0.125,
            'lambda_t': 0.125,
            'alpha': 0.25,
            'beta': 0.125,
            'theta': 0.5,
            'max_iter': 100
        }
    if method == 'netlaprls':
        args = {'gamma_d': 10, 'gamma_t': 10, 'beta_d': 1e-5, 'beta_t': 1e-5}
    if method == 'blmnii':
        args = {'alpha': 0.7, 'gamma': 1.0, 'sigma': 1.0, 'avg': False}
    if method == 'wnngip':
        args = {'T': 0.8, 'sigma': 1.0, 'alpha': 0.8}
    if method == 'kbmf':
        args = {'R': 50}
    if method == 'cmf':
        args = {
            'K': 50,
            'lambda_l': 0.5,
            'lambda_d': 0.125,
            'lambda_t': 0.125,
            'max_iter': 30
        }

    for key, val in model_settings:
        args[key] = float(val)

    intMat, drugMat, targetMat = load_data_from_file(
        dataset, os.path.join(data_dir, 'dataset'))
    drug_names, target_names = get_drugs_targets_names(
        dataset, os.path.join(data_dir, 'dataset'))

    if predict_num == 0:
        if cvs == 1:  # CV setting CVS1
            X, D, T, cv = intMat, drugMat, targetMat, 1
        if cvs == 2:  # CV setting CVS2
            X, D, T, cv = intMat, drugMat, targetMat, 0
        if cvs == 3:  # CV setting CVS3
            X, D, T, cv = intMat.T, targetMat, drugMat, 0
        cv_data = cross_validation(X, seeds, cv)
        if cvs == 1: ev_data = external_validation(X, seeds, cv)

    if sp_arg == 0 and predict_num == 0 and external == 0:
        if method == 'nrlmf':
            cv_eval.nrlmf_cv_eval(method,
                                  dataset,
                                  cv_data,
                                  X,
                                  D,
                                  T,
                                  cvs,
                                  args,
                                  logger,
                                  scoring=scoring,
                                  gpmi=gpmi,
                                  params=params)
        if method == 'nrlmfb':
            cv_eval.nrlmfb_cv_eval(method,
                                   dataset,
                                   cv_data,
                                   X,
                                   D,
                                   T,
                                   cvs,
                                   args,
                                   logger,
                                   scoring=scoring,
                                   gpmi=gpmi,
                                   params=params)
        if method == 'netlaprls':
            cv_eval.netlaprls_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                      args)
        if method == 'blmnii':
            cv_eval.blmnii_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                   args)
        if method == 'wnngip':
            cv_eval.wnngip_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                   args, logger)
        if method == 'kbmf':
            cv_eval.kbmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if method == 'cmf':
            cv_eval.cmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)

    if sp_arg == 0 and predict_num == 0 and external == 1:
        if method == 'nrlmf':
            ev_eval.nrlmf_ev_eval(method,
                                  ev_data,
                                  X,
                                  D,
                                  T,
                                  logger,
                                  scoring=scoring,
                                  gpmi=gpmi,
                                  params=params)
        if method == 'nrlmfb':
            ev_eval.nrlmfb_ev_eval(method,
                                   ev_data,
                                   X,
                                   D,
                                   T,
                                   logger,
                                   scoring=scoring,
                                   gpmi=gpmi,
                                   params=params)

    if sp_arg == 1 or predict_num > 0:
        if method == 'nrlmf':
            model = NRLMF(cfix=args['c'],
                          K1=args['K1'],
                          K2=args['K2'],
                          num_factors=args['r'],
                          lambda_d=args['lambda_d'],
                          lambda_t=args['lambda_t'],
                          alpha=args['alpha'],
                          beta=args['beta'],
                          theta=args['theta'],
                          max_iter=args['max_iter'])
        if method == 'nrlmfb':
            model = NRLMFb(cfix=args['c'],
                           K1=args['K1'],
                           K2=args['K2'],
                           num_factors=args['r'],
                           lambda_d=args['lambda_d'],
                           lambda_t=args['lambda_t'],
                           alpha=args['alpha'],
                           beta=args['beta'],
                           theta=args['theta'],
                           max_iter=args['max_iter'],
                           eta1=args['eta1'],
                           eta2=args['eta2'])
        if method == 'netlaprls':
            model = NetLapRLS(gamma_d=args['gamma_d'],
                              gamma_t=args['gamma_t'],
                              beta_d=args['beta_t'],
                              beta_t=args['beta_t'])
        if method == 'blmnii':
            model = BLMNII(alpha=args['alpha'],
                           gamma=args['gamma'],
                           sigma=args['sigma'],
                           avg=args['avg'])
        if method == 'wnngip':
            model = WNNGIP(T=args['T'],
                           sigma=args['sigma'],
                           alpha=args['alpha'])
        if method == 'kbmf':
            model = KBMF(num_factors=args['R'])
        if method == 'cmf':
            model = CMF(K=args['K'],
                        lambda_l=args['lambda_l'],
                        lambda_d=args['lambda_d'],
                        lambda_t=args['lambda_t'],
                        max_iter=args['max_iter'])
        cmd = str(model)
        if predict_num == 0:

            tic = time.time()
            print("Dataset:" + dataset + " CVS:" + str(cvs) + "\n" + cmd)
            aupr_vec, auc_vec = train(model, cv_data, X, D, T)
            aupr_avg, aupr_conf = mean_confidence_interval(aupr_vec)
            auc_avg, auc_conf = mean_confidence_interval(auc_vec)
            print(
                "auc:%.6f, aupr:%.6f, auc_conf:%.6f, aupr_conf:%.6f, Time:%.6f"
                % (auc_avg, aupr_avg, auc_conf, aupr_conf, time.time() - tic))
            #            write_metric_vector_to_file(auc_vec, os.path.join(output_dir, method+"_auc_cvs"+str(cvs)+"_"+dataset+".txt"))
            #            write_metric_vector_to_file(aupr_vec, os.path.join(output_dir, method+"_aupr_cvs"+str(cvs)+"_"+dataset+".txt"))
            logger.info(
                cmd + ', ' +
                "auc:%.6f, aupr:%.6f, auc_conf:%.6f, aupr_conf:%.6f, Time:%.6f"
                % (auc_avg, aupr_avg, auc_conf, aupr_conf, time.time() - tic))

        elif predict_num > 0:
            print("Dataset:" + dataset + "\n" + cmd)
            seed = 7771 if method == 'cmf' else 22
            model.fix_model(intMat, intMat, drugMat, targetMat, seed)
            x, y = np.where(intMat == 0)
            scores = model.predict_scores(zip(x, y), 5)
            ii = np.argsort(scores)[::-1]
            predict_pairs = [(drug_names[x[i]], target_names[y[i]], scores[i])
                             for i in ii[:predict_num]]
            new_dti_file = os.path.join(
                output_dir, "_".join([method, dataset, "new_dti.txt"]))
            novel_prediction_analysis(predict_pairs, new_dti_file,
                                      os.path.join(data_dir, 'biodb'))
Esempio n. 11
0
def main(argv):
    try:
        opts, args = getopt.getopt(argv, "m:d:f:c:s:o:n:p:C", [
            "method=", "dataset=", "data-dir=", "cvs=", "specify-arg=",
            "method-options=", "predict-num=", "output-dir=", "cv_type=",
            "i_param=", "i_test="
        ])
    except getopt.GetoptError:
        sys.exit()

    # data_dir = os.path.join(os.path.pardir, 'data')
    # output_dir = os.path.join(os.path.pardir, 'output')
    data_dir = 'data'
    output_dir = 'results'
    cvs, sp_arg, model_settings, predict_num = 1, 1, [], 0

    # seeds = [7771, 8367, 22, 1812, 4659]
    seeds = [7771, 8367, 22, 1812]
    # seeds = np.random.choice(10000, 5, replace=False)
    for opt, arg in opts:
        print(opt, arg)
        if opt == "--method":
            method = arg
            if '"' in method:
                method = method.replace('"', '')
        if opt == "--dataset":
            dataset = arg
            if '"' in dataset:
                dataset = dataset.replace('"', '')
        if opt == "--data-dir":
            data_dir = arg
        if opt == "--output-dir":
            output_dir = arg
        if opt == "--cvs":
            cvs = int(arg)
        if opt == "--specify-arg":
            sp_arg = int(arg)
        if opt == "--i_param":
            i_param = int(arg)
        if opt == "--i_test":
            i_test = int(arg)
        if opt == "--method-options":
            model_settings = [s.split('=') for s in str(arg).split()]
        if opt == "--predict-num":
            predict_num = int(arg)
        if opt == "--cv_type":
            cv_type = arg
            if '"' in cv_type:
                cv_type = cv_type.replace('"', '')
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    # default parameters for each methods
    if method == 'nrlmf':
        args = {
            'c': 5,
            'K1': 5,
            'K2': 5,
            'r': 50,
            'lambda_d': 0.125,
            'lambda_t': 0.125,
            'alpha': 0.25,
            'beta': 0.125,
            'theta': 0.5,
            'max_iter': 100
        }
    if method == 'netlaprls':
        args = {'gamma_d': 10, 'gamma_t': 10, 'beta_d': 1e-5, 'beta_t': 1e-5}
    if method == 'blmnii':
        args = {'alpha': 0.7, 'gamma': 1.0, 'sigma': 1.0, 'avg': False}
    if method in ['wnngip', 'gip']:
        args = {'T': 0.8, 'sigma': 1.0, 'alpha': 0.8}
    if method in ['nnwnngip', 'nngip']:
        args = {'T': 0.8, 'sigma': 1.0, 'alpha': 0.8, 'NN': 2}
    if method == 'kbmf':
        args = {'R': 50}
    if method == 'cmf':
        args = {
            'K': 50,
            'lambda_l': 0.5,
            'lambda_d': 0.125,
            'lambda_t': 0.125,
            'max_iter': 30
        }
    if method in ['nnkronsvm', 'nnkronsvmgip']:
        args = {'C': 1., 'NbNeg': 10, 'PosNei': 10, 'NegNei': 2, 'n_proc': 1}
    if method in ['nnkronwnnsvmgip', 'nnkronwnnsvm']:
        args = {
            'C': 1.,
            't': 0.1,
            'NbNeg': 10,
            'PosNei': 10,
            'NegNei': 2,
            'n_proc': 1
        }

    for key, val in model_settings:
        args[key] = val

    if sp_arg == 2 and predict_num == 0:
        print(2, 'bis')
        m = get_name_method(method)
        data_file = dataset + "_" + str(cvs) + "_" + cv_type + '_Model:' + m + '_' + \
            str(i_param) + '_' + str(i_test)
        if os.path.isfile('results/' + data_file + '.data'):
            print('found', 'results/' + data_file + '.data')
            exit(1)

    intMat, drugMat, targetMat, limit = load_data_from_file(
        dataset, os.path.join(data_dir, ''))
    drug_names, target_names = get_drugs_targets_names(
        dataset, os.path.join(data_dir, ''))

    if predict_num == 0:
        if cvs == 1:  # CV setting CVS1
            X, D, T, cv = intMat, drugMat, targetMat, 1
        if cvs == 2:  # CV setting CVS2
            X, D, T, cv = intMat, drugMat, targetMat, 0
        if cvs == 3:  # CV setting CVS3
            X, D, T, cv = intMat.T, targetMat, drugMat, 0
        cv_data = cross_validation(X,
                                   D,
                                   T,
                                   seeds,
                                   cv,
                                   limit,
                                   10,
                                   cv_type=cv_type)

    print(intMat.shape)
    if sp_arg == 0 and predict_num == 0:
        print(0)
        cv_eval.cv_eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type)
        # if method == 'nrlmf':
        #     cv_eval.nrlmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type)
        # if method == 'netlaprls':
        #     cv_eval.netlaprls_cv_eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type)
        # if method == 'blmnii':
        #     cv_eval.blmnii_cv_eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type)
        # if method == 'wnngip':
        #     cv_eval.wnngip_cv_eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type)
        # if method == 'gip':
        #     cv_eval.gip_cv_eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type)
        # if method == 'nngip':
        #     cv_eval.nngip_cv_eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type)
        # if method == 'nnwnngip':
        #     cv_eval.nnwnngip_cv_eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type)
        # if method == 'kbmf':
        #     cv_eval.kbmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type)
        # if method == 'cmf':
        #     cv_eval.cmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type)
        # if method == 'nnkronsvm':
        #     cv_eval.nnkronsvm_cv_eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type)
        # if method == 'nnkronsvmgip':
        #     cv_eval.nnkronsvmgip_cv_eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type)
        # if method == 'nnkronwnnsvm':
        #     cv_eval.nnkronwnnsvm_cv_eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type)
        # if method == 'nnkronwnnsvmgip':
        #     cv_eval.nnkronwnnsvmgip_cv_eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type)

    if sp_arg == 2 and predict_num == 0:
        print(2)
        cv_eval.eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type,
                     i_param, i_test)

        # if method == 'nrlmf':
        #   m = get_name_method(method)
        #   data_file = dataset + "_" + str(cvs) + "_" + cv_type + '_Model:' + m + '_' + \
        #       str(i_param) + '_' + str(i_test)
        #   if not os.path.isfile('results/' + data_file + '.data'):
        #     cv_eval.nrlmf_eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type,
        #                        i_param, i_test)
        # if method == 'wnngip':
        #   m = get_name_method(method)
        #   data_file = dataset + "_" + str(cvs) + "_" + cv_type + '_Model:' + m + '_' + \
        #       str(i_param) + '_' + str(i_test)
        #   if not os.path.isfile('results/' + data_file + '.data'):
        #     cv_eval.wnngip_eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type,
        #                         i_param, i_test)
        # if method == 'gip':
        #   m = get_name_method(method)
        #   data_file = dataset + "_" + str(cvs) + "_" + cv_type + '_Model:' + m + '_' + \
        #       str(i_param) + '_' + str(i_test)
        #   if not os.path.isfile('results/' + data_file + '.data'):
        #     cv_eval.gip_eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type,
        #                      i_param, i_test)
        # if method == 'nngip':
        #   m = get_name_method(method)
        #   data_file = dataset + "_" + str(cvs) + "_" + cv_type + '_Model:' + m + '_' + \
        #       str(i_param) + '_' + str(i_test)
        #   if not os.path.isfile('results/' + data_file + '.data'):
        #     cv_eval.nngip_eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type,
        #                        i_param, i_test)
        # if method == 'nnwnngip':
        #   m = get_name_method(method)
        #   data_file = dataset + "_" + str(cvs) + "_" + cv_type + '_Model:' + m + '_' + \
        #       str(i_param) + '_' + str(i_test)
        #   if not os.path.isfile('results/' + data_file + '.data'):
        #     cv_eval.nnwnngip_eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type,
        #                           i_param, i_test)
        # if method == 'nnkronsvm':
        #   m = get_name_method(method)
        #   data_file = dataset + "_" + str(cvs) + "_" + cv_type + '_Model:' + m + '_' + \
        #       str(i_param) + '_' + str(i_test)
        #   if not os.path.isfile('results/' + data_file + '.data'):
        #     cv_eval.nnkronsvm_eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type,
        #                            i_param, i_test)
        # if method == 'nnkronsvmgip':
        #   m = get_name_method(method)
        #   data_file = dataset + "_" + str(cvs) + "_" + cv_type + '_Model:' + m + '_' + \
        #       str(i_param) + '_' + str(i_test)
        #   if not os.path.isfile('results/' + data_file + '.data'):
        #     cv_eval.nnkronsvmgip_eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type,
        #                               i_param, i_test)
        # if method == 'nnkronwnnsvm':
        #   m = get_name_method(method)
        #   data_file = dataset + "_" + str(cvs) + "_" + cv_type + '_Model:' + m + '_' + \
        #       str(i_param) + '_' + str(i_test)
        #   if not os.path.isfile('results/' + data_file + '.data'):
        #     cv_eval.nnkronwnnsvm_eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type,
        #                               i_param, i_test)
        # if method == 'nnkronwnnsvmgip':
        #   m = get_name_method(method)
        #   data_file = dataset + "_" + str(cvs) + "_" + cv_type + '_Model:' + m + '_' + \
        #       str(i_param) + '_' + str(i_test)
        #   if not os.path.isfile('results/' + data_file + '.data'):
        #     cv_eval.nnkronwnnsvmgip_eval(method, dataset, cv_data, X, D, T, cvs, args, cv_type,
        #                                  i_param, i_test)

    if sp_arg == 1:
        tic, toc = time.clock(), time.time()
        if method == 'nrlmf':
            model = NRLMF(cfix=args['c'],
                          K1=args['K1'],
                          K2=args['K2'],
                          num_factors=args['r'],
                          lambda_d=args['lambda_d'],
                          lambda_t=args['lambda_t'],
                          alpha=args['alpha'],
                          beta=args['beta'],
                          theta=args['theta'],
                          max_iter=args['max_iter'])
        if method == 'netlaprls':
            model = NetLapRLS(gamma_d=args['gamma_d'],
                              gamma_t=args['gamma_t'],
                              beta_d=args['beta_t'],
                              beta_t=args['beta_t'])
        if method == 'blmnii':
            model = BLMNII(alpha=args['alpha'],
                           gamma=args['gamma'],
                           sigma=args['sigma'],
                           avg=args['avg'])
        if method == 'wnngip':
            model = WNNGIP(T=args['T'],
                           sigma=args['sigma'],
                           alpha=args['alpha'])
        if method == 'gip':
            model = GIP(T=args['T'], sigma=args['sigma'], alpha=args['alpha'])
        if method == 'nngip':
            model = NNGIP(T=args['T'],
                          sigma=args['sigma'],
                          alpha=args['alpha'],
                          NN=args['NN'])
        if method == 'nnwnngip':
            model = NNWNNGIP(T=args['T'],
                             sigma=args['sigma'],
                             alpha=args['alpha'],
                             NN=args['NN'])
        if method == 'kbmf':
            model = KBMF(num_factors=args['R'])
        if method == 'cmf':
            model = CMF(K=args['K'],
                        lambda_l=args['lambda_l'],
                        lambda_d=args['lambda_d'],
                        lambda_t=args['lambda_t'],
                        max_iter=args['max_iter'])
        elif method == 'nnkronsvm':
            model = NNKronSVM(C=args['C'],
                              NbNeg=args['NbNeg'],
                              NegNei=args['NegNei'],
                              PosNei=args['PosNei'],
                              dataset=dataset,
                              n_proc=args['n_proc'])
        elif method == 'nnkronsvmgip':
            model = NNKronSVMGIP(C=args['C'],
                                 NbNeg=args['NbNeg'],
                                 NegNei=args['NegNei'],
                                 PosNei=args['PosNei'],
                                 dataset=dataset,
                                 n_proc=args['n_proc'])
        elif method == 'nnkronwnnsvmgip':
            model = NNKronWNNSVMGIP(C=args['C'],
                                    t=args['t'],
                                    NbNeg=args['NbNeg'],
                                    NegNei=args['NegNei'],
                                    PosNei=args['PosNei'],
                                    dataset=dataset,
                                    n_proc=args['n_proc'])
        elif method == 'nnkronwnnsvm':
            model = NNKronWNNSVM(C=args['C'],
                                 t=args['t'],
                                 NbNeg=args['NbNeg'],
                                 NegNei=args['NegNei'],
                                 PosNei=args['PosNei'],
                                 dataset=dataset,
                                 n_proc=args['n_proc'])
        cmd = str(model)
        print("Dataset:" + dataset + " CVS:" + str(cvs) + "\n" + cmd)
        aupr_vec, auc_vec = train(model, method, dataset, cv_data, X, D, T,
                                  cv_type)
        aupr_avg, aupr_conf = mean_confidence_interval(aupr_vec)
        auc_avg, auc_conf = mean_confidence_interval(auc_vec)
        data_file = dataset + "_" + str(
            cvs) + "_" + cv_type + '_DefaultParam_Model:' + method
        tic, toc = tic - time.clock(), toc - time.time()
        print(np.mean(aupr_vec), np.std(aupr_vec), np.mean(auc_vec),
              np.std(auc_vec), tic, toc)
        pickle.dump((aupr_vec, auc_vec, tic, toc),
                    open('results/' + data_file + '.data', 'wb'))