예제 #1
0
def main(argv):
    try:
        opts, args = getopt.getopt(argv, "m:d:f:c:s:o:n:p", ["method=", "dataset=", "data-dir=", "cvs=", "specify-arg=", "method-options=", "predict-num=", "output-dir=", ])
    except getopt.GetoptError:
        sys.exit()

    data_dir = os.path.join(os.path.pardir, 'data')
    output_dir = os.path.join(os.path.pardir, 'output')
    cvs, sp_arg, model_settings, predict_num = 1, 1, [], 0

    seeds = [7771, 8367, 22, 1812, 4659]
    # seeds = np.random.choice(10000, 5, replace=False)
    for opt, arg in opts:
        if opt == "--method":
            method = arg
        if opt == "--dataset":
            dataset = arg
        if opt == "--data-dir":
            data_dir = arg
        if opt == "--output-dir":
            output_dir = arg
        if opt == "--cvs":
            cvs = int(arg)
        if opt == "--specify-arg":
            sp_arg = int(arg)
        if opt == "--method-options":
            model_settings = [s.split('=') for s in str(arg).split()]
        if opt == "--predict-num":
            predict_num = int(arg)
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    # default parameters for each methods
    if method == 'nrlmf':
        args = {'c': 5, 'K1': 5, 'K2': 5, 'r': 50, 'lambda_d': 0.125, 'lambda_t': 0.125, 'alpha': 0.25, 'beta': 0.125, 'theta': 0.5, 'max_iter': 100}
       # args = {'c': 5, 'K1': 2, 'K2': 2, 'r': 3, 'lambda_d': 0.125, 'lambda_t': 0.125, 'alpha': 0.25, 'beta': 0.125, 'theta': 0.5, 'max_iter': 100}
    if method == 'netlaprls':
        args = {'gamma_d': 10, 'gamma_t': 10, 'beta_d': 1e-5, 'beta_t': 1e-5}
    if method == 'blmnii':
        args = {'alpha': 0.7, 'gamma': 1.0, 'sigma': 1.0, 'avg': False}
    if method == 'wnngip':
        args = {'T': 0.8, 'sigma': 1.0, 'alpha': 0.8}
    if method == 'kbmf':
        args = {'R': 50}
    if method == 'cmf':
        args = {'K': 50, 'lambda_l': 0.5, 'lambda_d': 0.125, 'lambda_t': 0.125, 'max_iter': 30}

    for key, val in model_settings:
        args[key] = val
#ZINC test lines
    intMat, testMat, drugMat, targetMat = load_data_from_file_csv(dataset, os.path.join(data_dir))
    drug_names, target_names = get_drug_target_names_zinc()
#ZINC test lines

#    drug_names, target_names = get_drugs_targets_names(dataset, os.path.join(data_dir))

#Demo lines
#    intMat, testMat, drugMat, targetMat = load_data_from_file_demo(dataset, os.path.join(data_dir))
#    drug_names, target_names = get_drug_target_names_demo()
#demo lines

    if predict_num == 0:
        if cvs == 1:  # CV setting CVS1
            X, D, T, cv = intMat, drugMat, targetMat, 1
        if cvs == 2:  # CV setting CVS2
            X, D, T, cv = intMat, drugMat, targetMat, 0
        if cvs == 3:  # CV setting CVS3
            X, D, T, cv = intMat.T, targetMat, drugMat, 0
        cv_data = cross_validation(X, seeds, cv)

    if sp_arg == 0 and predict_num == 0:
        if method == 'nrlmf':
            cv_eval.nrlmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if method == 'netlaprls':
            cv_eval.netlaprls_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if method == 'blmnii':
            cv_eval.blmnii_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if method == 'wnngip':
            cv_eval.wnngip_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if method == 'kbmf':
            cv_eval.kbmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if method == 'cmf':
            cv_eval.cmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)

    if sp_arg == 1 or predict_num > 0:
        tic = time.clock()
        if method == 'nrlmf':
            model = NRLMF(cfix=args['c'], K1=args['K1'], K2=args['K2'], num_factors=args['r'], lambda_d=args['lambda_d'], lambda_t=args['lambda_t'], alpha=args['alpha'], beta=args['beta'], theta=args['theta'], max_iter=args['max_iter'])
        if method == 'netlaprls':
            model = NetLapRLS(gamma_d=args['gamma_d'], gamma_t=args['gamma_t'], beta_d=args['beta_t'], beta_t=args['beta_t'])
        if method == 'blmnii':
            model = BLMNII(alpha=args['alpha'], gamma=args['gamma'], sigma=args['sigma'], avg=args['avg'])
        if method == 'wnngip':
            model = WNNGIP(T=args['T'], sigma=args['sigma'], alpha=args['alpha'])
        if method == 'kbmf':
            model = KBMF(num_factors=args['R'])
        if method == 'cmf':
            model = CMF(K=args['K'], lambda_l=args['lambda_l'], lambda_d=args['lambda_d'], lambda_t=args['lambda_t'], max_iter=args['max_iter'])
        cmd = str(model)
        if predict_num == 0:
            print "Dataset:"+dataset+" CVS:"+str(cvs)+"\n"+cmd
            aupr_vec, auc_vec = train(model, cv_data, X, D, T)
            aupr_avg, aupr_conf = mean_confidence_interval(aupr_vec)
            auc_avg, auc_conf = mean_confidence_interval(auc_vec)
            print "auc:%.6f, aupr: %.6f, auc_conf:%.6f, aupr_conf:%.6f, Time:%.6f" % (auc_avg, aupr_avg, auc_conf, aupr_conf, time.clock()-tic)
            write_metric_vector_to_file(auc_vec, os.path.join(output_dir, method+"_auc_cvs"+str(cvs)+"_"+dataset+".txt"))
            write_metric_vector_to_file(aupr_vec, os.path.join(output_dir, method+"_aupr_cvs"+str(cvs)+"_"+dataset+".txt"))
        elif predict_num > 0:
            print "Dataset:"+dataset+"\n"+cmd
            seed = 7771 if method == 'cmf' else 22
            model.fix_model(intMat, intMat, drugMat, targetMat, seed)
           # x, y = np.where(intMat == 0)
            x, y = np.where(intMat >= 0) #to pick all pairs including train pairs
            scores = model.predict_scores_NaNtest(zip(x, y), 5)
def thear(method, dataset, data_dir, output_dir, cvs, sp_arg, model_settings,
          predict_num, seeds, seedsOptPar, args):
    intMat, drugMat, targetMat = load_data_from_file(
        dataset, os.path.join(data_dir, 'datasets'))
    drug_names, target_names = get_drugs_targets_names(
        dataset, os.path.join(data_dir, 'datasets'))

    invert = 0
    if (method == 'inv_brdti'):
        invert = 1
    if predict_num == 0:
        if cvs == 1:  # CV setting CVS1
            X, D, T, cv = intMat, drugMat, targetMat, 1
        if cvs == 2:  # CV setting CVS2
            X, D, T, cv = intMat, drugMat, targetMat, 0
        if cvs == 3:  # CV setting CVS3
            X, D, T, cv, invert = intMat.T, targetMat, drugMat, 0, 1
        if cvs == 4:
            X, D, T, cv = intMat, drugMat, targetMat, 2
        cv_data = cross_validation(X, seeds, cv, invert, num=10)

    if invert:
        X, D, T = intMat, drugMat, targetMat

        #cv_data_optimize_params = cross_validation(X, seedsOptPar, cv, invert, num=5)

    if sp_arg == 0 and predict_num == 0:
        if (method == "vbmklmf"):
            cv_eval.vbmklmf_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                    args)
        if (method == "ensambledti"):
            cv_eval.vbmklmf_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                    args)
        if method == 'netcbp':
            cv_eval.netcbp_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                   args)
        # if method == 'ndaf':
        #     cv_eval.ndaf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if method == 'grmf':
            cv_eval.grmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if method == 'pudt':
            cv_eval.pudt_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if method == 'daspfind':
            cv_eval.daspfind_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                     args)
        if method == 'dnilmf':
            cv_eval.dnilmf_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                   args)
        if method == 'dthybrid':
            cv_eval.dthybrid_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                     args)
        if method == 'kronrlsmkl':
            cv_eval.kronrismkl_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                       args)
        if (method == 'brdti'):
            cv_eval.brdti_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if (method == 'ddr'):
            cv_eval.ddr_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if (method == 'brdti'):
            cv_eval.brdti_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if (method == 'inv_brdti'):
            cv_eval.brdti_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if method == 'nrlmf':
            cv_eval.nrlmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if method == 'netlaprls':
            cv_eval.netlaprls_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                      args)
        if method == 'blmnii':
            cv_eval.blmnii_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                   args)
        if method == 'wnngip':
            cv_eval.wnngip_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                   args)
        if method == 'kbmf':
            cv_eval.kbmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if method == 'cmf':
            cv_eval.cmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)

    if sp_arg == 1 or predict_num > 0:
        tic = time.clock()
        if (method == "netcbp"):
            model = NetCBP()
        # if (method=="ndaf"):
        #     model = NDAF()
        if (method == "grmf"):
            model = GRMF(cv=cvs)
        if (method == "pudt"):
            model = PUDT(dataset=dataset)
        if (method == "vbmklmf"):
            model = VBMKLMF(dataset=dataset, cvs=cvs)
        if (method == 'dnilmf'):
            model = DNILMF(dataset=dataset)
        if (method == 'kronrlsmkl'):
            model = KronRLsMKL(dataset=dataset)
        if (method == 'dthybrid'):
            model = DTHYBRID(dataset=dataset)
        if (method == 'daspfind'):
            model = DASPFIND(alpha=args['alpha'])
        if (method == 'brdti') | (method == 'inv_brdti'):
            #model = BRDTI(D=args['D'],learning_rate= args['learning_rate'],max_iters=args['max_iters'],simple_predict=args['simple_predict'],bias_regularization=args['bias_regularization'],global_regularization=args['global_regularization'],cbSim=args['cbSim'],cb_alignment_regularization_user=args['cb_alignment_regularization_user'],cb_alignment_regularization_item=args['cb_alignment_regularization_item'])
            model = BRDTI(args)
        if method == 'nrlmf':
            model = NRLMF(cfix=args['c'],
                          K1=args['K1'],
                          K2=args['K2'],
                          num_factors=args['r'],
                          lambda_d=args['lambda_d'],
                          lambda_t=args['lambda_t'],
                          alpha=args['alpha'],
                          beta=args['beta'],
                          theta=args['theta'],
                          max_iter=args['max_iter'])
        if method == 'ddr':
            model = DDR(dataset=dataset, cv=cvs)
        if method == 'netlaprls':
            model = NetLapRLS(gamma_d=args['gamma_d'],
                              gamma_t=args['gamma_t'],
                              beta_d=args['beta_t'],
                              beta_t=args['beta_t'])
        if method == 'blmnii':
            model = BLMNII(alpha=args['alpha'],
                           gamma=args['gamma'],
                           sigma=args['sigma'],
                           avg=args['avg'])
        if method == 'wnngip':
            model = WNNGIP(T=args['T'],
                           sigma=args['sigma'],
                           alpha=args['alpha'])
        if method == 'kbmf':
            model = KBMF(num_factors=args['R'])
        if method == 'cmf':
            model = CMF(K=args['K'],
                        lambda_l=args['lambda_l'],
                        lambda_d=args['lambda_d'],
                        lambda_t=args['lambda_t'],
                        max_iter=args['max_iter'])
        if (method == 'ensambledti'):
            model = EnsambleDTI(args=args, dataset=dataset)
        cmd = str(model)
        if predict_num == 0:
            print("Dataset:" + dataset + " CVS:" + str(cvs) + "\n" + cmd)
            aupr_vec, auc_vec = train(model, cv_data, X, D, T, cvs, dataset)
            aupr_avg, aupr_conf = mean_confidence_interval(aupr_vec)
            auc_avg, auc_conf = mean_confidence_interval(auc_vec)
            print(
                "auc:%.6f, aupr: %.6f, auc_conf:%.6f, aupr_conf:%.6f, Time:%.6f"
                % (auc_avg, aupr_avg, auc_conf, aupr_conf, time.clock() - tic))
            write_metric_vector_to_file(
                auc_vec,
                os.path.join(
                    output_dir,
                    method + "_auc_cvs" + str(cvs) + "_" + dataset + ".txt"))
            write_metric_vector_to_file(
                aupr_vec,
                os.path.join(
                    output_dir,
                    method + "_aupr_cvs" + str(cvs) + "_" + dataset + ".txt"))
        elif predict_num > 0:
            print("Dataset:" + dataset + "\n" + cmd)
            seed = 7771 if method == 'cmf' else 22
            model.fix_model(intMat, intMat, drugMat, targetMat, seed)
            x, y = np.where(intMat == 0)
            scores = model.predict_scores(zip(x, y), 5)
            ii = np.argsort(scores)[::-1]
            predict_pairs = [(drug_names[x[i]], target_names[y[i]], scores[i])
                             for i in ii[:predict_num]]
            new_dti_file = os.path.join(
                output_dir, "_".join([method, dataset, "new_dti.txt"]))
            novel_prediction_analysis(predict_pairs, new_dti_file,
                                      os.path.join(data_dir, 'biodb'))
예제 #3
0
 def learn_hyperparameters(self, intMat, drugMat, targetMat, seed=500):
     cv_data_optimize_params = cross_validation(intMat, [seed], 1, 0, num=5)
     params = cv_eval.blmnii_cv_eval("blmnii", "", "",
                                     cv_data_optimize_params, intMat,
                                     drugMat, targetMat, 1, "")
     self.alpha = params["alpha"]
예제 #4
0
def main(argv):
    try:
        opts, args = getopt.getopt(argv, m:d:f:c:s:o:n:p, ["method=", "dataset=", "data-dir=", "cvs=", "specify-arg=", "method-options=", "predict-num=", "output-dir=", ])
    except getopt.GetoptError:
        sys.exit()

    data_dir = 'data'
    output_dir = 'output'
    cvs, sp_arg, model_settings, predict_num = 1, 1, [], 0

    seeds = [7771, 8367, 22, 1812, 4659]
    seedsOptPar = [156]
    # seeds = np.random.choice(10000, 5, replace=False)
    for opt, arg in opts:
        if opt == "--method":
            method = arg
        if opt == "--dataset":
            dataset = arg
        if opt == "--data-dir":
            data_dir = arg
        if opt == "--output-dir":
            output_dir = arg
        if opt == "--cvs":
            cvs = int(arg)
        if opt == "--specify-arg":
            sp_arg = int(arg)
        if opt == "--method-options":
            model_settings = [s.split('=') for s in str(arg).split()]
        if opt == "--predict-num":
            predict_num = int(arg)
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    
    if not os.path.isdir(os.path.join(output_dir,"optPar")):
        os.makedirs(os.path.join(output_dir,"optPar"))    
        
    # default parameters for each methods
    if (method == 'brdti') | (method == 'inv_brdti') :
        args = {
            'D':100,
            'learning_rate':0.1,
            'max_iters' : 100,   
            'simple_predict' :False, 
            'bias_regularization':1,                 
            'global_regularization':10**(-2),  
            "cbSim": "knn",
            'cb_alignment_regularization_user' :1,                 
            'cb_alignment_regularization_item' :1}

    if method == 'netlaprls':
        args = {'gamma_d': 10, 'gamma_t': 10, 'beta_d': 1e-5, 'beta_t': 1e-5}
    if method == 'blmnii':
        args = {'alpha': 0.7, 'gamma': 1.0, 'sigma': 1.0, 'avg': False}
    if method == 'wnngip':
        args = {'T': 0.8, 'sigma': 1.0, 'alpha': 0.8}
    if method == 'cmf':
        args = {'K': 100, 'lambda_l': 0.5, 'lambda_d': 0.125, 'lambda_t': 0.125, 'max_iter': 100}
     
    #print(model_settings)    
    for key, val in model_settings:
        args[key] = float(eval(val))

    intMat, drugMat, targetMat = load_data_from_file(dataset, os.path.join(data_dir, 'datasets'))
    drug_names, target_names = get_drugs_targets_names(dataset, os.path.join(data_dir, 'datasets'))
    
    invert = 0    
    if (method == 'inv_brdti')  : 
        invert = 1
        
    if predict_num == 0:
        if cvs == 1:  # CV setting CVS1
            X, D, T, cv = intMat, drugMat, targetMat, 1             
                
        if cvs == 2:  # CV setting CVS2
            X, D, T, cv = intMat, drugMat, targetMat, 0
                
        if cvs == 3:  # CV setting CVS3
            X, D, T, cv = intMat.T, targetMat, drugMat, 0 
        

            
        cv_data = cross_validation(X, seeds, cv, invert)
        cv_data_optimize_params = cross_validation(X, seedsOptPar, cv, invert, num=5)

        
    if sp_arg == 0 and predict_num == 0:
        if (method == 'brdti'):
            cv_eval.brdti_cv_eval(method, dataset,output_dir, cv_data_optimize_params, X, D, T, cvs, args)                             
        if (method == 'inv_brdti'):
            cv_eval.brdti_cv_eval(method, dataset,output_dir, cv_data_optimize_params, X.T, T, D, cvs, args) 
        
        if method == 'netlaprls':
            cv_eval.netlaprls_cv_eval(method, dataset,output_dir, cv_data_optimize_params, X, D, T, cvs, args)
        if method == 'blmnii':
            cv_eval.blmnii_cv_eval(method, dataset,output_dir, cv_data_optimize_params, X, D, T, cvs, args)
        if method == 'wnngip':
            cv_eval.wnngip_cv_eval(method, dataset,output_dir, cv_data_optimize_params, X, D, T, cvs, args)        
        if method == 'cmf':
            cv_eval.cmf_cv_eval(method, dataset,output_dir, cv_data_optimize_params, X, D, T, cvs, args)
    

    if sp_arg == 1 or predict_num > 0:
        tic = time.clock()
        if (method == 'brdti')|(method == 'inv_brdti'):
            model = BRDTI(args)       
        if method == 'netlaprls':
            model = NetLapRLS(gamma_d=args['gamma_d'], gamma_t=args['gamma_t'], beta_d=args['beta_t'], beta_t=args['beta_t'])
        if method == 'blmnii':
            model = BLMNII(alpha=args['alpha'], gamma=args['gamma'], sigma=args['sigma'], avg=args['avg'])
        if method == 'wnngip':
            model = WNNGIP(T=args['T'], sigma=args['sigma'], alpha=args['alpha'])        
        if method == 'cmf':
            model = CMF(K=args['K'], lambda_l=args['lambda_l'], lambda_d=args['lambda_d'], lambda_t=args['lambda_t'], max_iter=args['max_iter'])
        cmd = str(model)
        
        #predict hidden part of the current datasets
        if predict_num == 0:
            print "Dataset:"+dataset+" CVS:"+str(cvs)+"\n"+cmd
            if (method == 'inv_brdti') : 
                aupr_vec, auc_vec, ndcg_inv_vec, ndcg_vec, results = train(model, cv_data, X.T, T, D)
            else:
                aupr_vec, auc_vec, ndcg_vec, ndcg_inv_vec, results = train(model, cv_data, X, D, T)
            aupr_avg, aupr_conf = mean_confidence_interval(aupr_vec)
            auc_avg, auc_conf = mean_confidence_interval(auc_vec)
            ndcg_avg, ndcg_conf = mean_confidence_interval(ndcg_vec)
            ndcg_inv_avg, ndcg_inv_conf = mean_confidence_interval(ndcg_inv_vec)
            
            resfile = os.path.join('output','rawResults', method+"_res_"+str(cvs)+"_"+dataset+".csv")
            outd = open(resfile, "w")
            outd.write(('drug;target;true;predict\n'))
            
            for r in results:
                outd.write('%s;%s;%s;%s\n' % (r[0],r[1],r[2],r[3]) )
            
            print "auc:%.6f, aupr: %.6f, ndcg: %.6f, ndcg_inv: %.6f, auc_conf:%.6f, aupr_conf:%.6f, ndcg_conf:%.6f, ndcg_inv_conf:%.6f, Time:%.6f" % (auc_avg, aupr_avg, ndcg_avg, ndcg_inv_avg, auc_conf, aupr_conf, ndcg_conf, ndcg_inv_conf, time.clock()-tic)
            write_metric_vector_to_file(auc_vec, os.path.join(output_dir, method+"_auc_cvs"+str(cvs)+"_"+dataset+".txt"))
            write_metric_vector_to_file(aupr_vec, os.path.join(output_dir, method+"_aupr_cvs"+str(cvs)+"_"+dataset+".txt"))            
            write_metric_vector_to_file(ndcg_vec, os.path.join(output_dir, method+"_ndcg_cvs"+str(cvs)+"_"+dataset+".txt"))
            write_metric_vector_to_file(ndcg_inv_vec, os.path.join(output_dir, method+"_ndcg_inv_cvs"+str(cvs)+"_"+dataset+".txt"))
        
        #predict novel DTIs    
        elif predict_num > 0:
            print "Dataset:"+dataset+"\n"+cmd
            seed = 376
            if invert: #predicting drugs for targets
                model.fix_model(intMat.T, intMat.T, targetMat, drugMat, seed)
                npa = newDTIPrediction()
                x, y = np.where(intMat == 0)
                scores = model.predict_scores(zip(y, x), 1)
                sz = np.array(zip(x,y,scores))    
                
            else: #predicting targets for drugs
                model.fix_model(intMat, intMat, drugMat, targetMat, seed)
                npa = newDTIPrediction()
                x, y = np.where(intMat == 0)
                scores = model.predict_scores(zip(x, y), 1)
                sz = np.array(zip(x,y,scores))
                
            ndcg_d, ndcg_t, recall_d, recall_t = npa.verify_novel_interactions(method, dataset, sz, predict_num, drug_names, target_names)
            
            st_file= os.path.join('output/newDTI', "_".join([dataset,str(predict_num), "stats.csv"]))
            out = open(st_file, "a")
            out.write(('%s;%f;%f;%f;%f\n' % (method,ndcg_d, ndcg_t, recall_d, recall_t)))
예제 #5
0
def main(argv):
    try:
        opts, args = getopt.getopt(argv, "m:d:f:c:s:o:n:p", [
            "method=",
            "dataset=",
            "data-dir=",
            "cvs=",
            "specify-arg=",
            "method-options=",
            "predict-num=",
            "output-dir=",
        ])
    except getopt.GetoptError:
        sys.exit()

    data_dir = os.path.join(os.path.pardir, 'data')
    output_dir = os.path.join(os.path.pardir, 'output')
    cvs, sp_arg, model_settings, predict_num = 1, 1, [], 0

    seeds = [7771, 8367, 22, 1812, 4659]
    # seeds = np.random.choice(10000, 5, replace=False)
    for opt, arg in opts:
        if opt == "--method":
            method = arg
        if opt == "--dataset":
            dataset = arg
        if opt == "--data-dir":
            data_dir = arg
        if opt == "--output-dir":
            output_dir = arg
        if opt == "--cvs":
            cvs = int(arg)
        if opt == "--specify-arg":
            sp_arg = int(arg)
        if opt == "--method-options":
            model_settings = [s.split('=') for s in str(arg).split()]
        if opt == "--predict-num":
            predict_num = int(arg)
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    # default parameters for each methods
    if method == 'nrlmf':
        args = {
            'c': 5,
            'K1': 5,
            'K2': 5,
            'r': 50,
            'lambda_d': 0.125,
            'lambda_t': 0.125,
            'alpha': 0.25,
            'beta': 0.125,
            'theta': 0.5,
            'max_iter': 100
        }
    if method == 'netlaprls':
        args = {'gamma_d': 10, 'gamma_t': 10, 'beta_d': 1e-5, 'beta_t': 1e-5}
    if method == 'blmnii':
        args = {'alpha': 0.7, 'gamma': 1.0, 'sigma': 1.0, 'avg': False}
    if method == 'wnngip':
        args = {'T': 0.8, 'sigma': 1.0, 'alpha': 0.8}
    if method == 'kbmf':
        args = {'R': 50}
    if method == 'cmf':
        args = {
            'K': 50,
            'lambda_l': 0.5,
            'lambda_d': 0.125,
            'lambda_t': 0.125,
            'max_iter': 30
        }

    for key, val in model_settings:
        args[key] = val

    intMat, drugMat, targetMat = load_data_from_file(
        dataset, os.path.join(data_dir, 'datasets'))
    drug_names, target_names = get_drugs_targets_names(
        dataset, os.path.join(data_dir, 'datasets'))

    if predict_num == 0:
        if cvs == 1:  # CV setting CVS1
            X, D, T, cv = intMat, drugMat, targetMat, 1
        if cvs == 2:  # CV setting CVS2
            X, D, T, cv = intMat, drugMat, targetMat, 0
        if cvs == 3:  # CV setting CVS3
            X, D, T, cv = intMat.T, targetMat, drugMat, 0
        cv_data = cross_validation(X, seeds, cv)

    if sp_arg == 0 and predict_num == 0:
        if method == 'nrlmf':
            cv_eval.nrlmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if method == 'netlaprls':
            cv_eval.netlaprls_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                      args)
        if method == 'blmnii':
            cv_eval.blmnii_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                   args)
        if method == 'wnngip':
            cv_eval.wnngip_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                   args)
        if method == 'kbmf':
            cv_eval.kbmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if method == 'cmf':
            cv_eval.cmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)

    if sp_arg == 1 or predict_num > 0:
        tic = time.clock()
        if method == 'nrlmf':
            model = NRLMF(cfix=args['c'],
                          K1=args['K1'],
                          K2=args['K2'],
                          num_factors=args['r'],
                          lambda_d=args['lambda_d'],
                          lambda_t=args['lambda_t'],
                          alpha=args['alpha'],
                          beta=args['beta'],
                          theta=args['theta'],
                          max_iter=args['max_iter'])
        if method == 'netlaprls':
            model = NetLapRLS(gamma_d=args['gamma_d'],
                              gamma_t=args['gamma_t'],
                              beta_d=args['beta_t'],
                              beta_t=args['beta_t'])
        if method == 'blmnii':
            model = BLMNII(alpha=args['alpha'],
                           gamma=args['gamma'],
                           sigma=args['sigma'],
                           avg=args['avg'])
        if method == 'wnngip':
            model = WNNGIP(T=args['T'],
                           sigma=args['sigma'],
                           alpha=args['alpha'])
        if method == 'kbmf':
            model = KBMF(num_factors=args['R'])
        if method == 'cmf':
            model = CMF(K=args['K'],
                        lambda_l=args['lambda_l'],
                        lambda_d=args['lambda_d'],
                        lambda_t=args['lambda_t'],
                        max_iter=args['max_iter'])
        cmd = str(model)
        if predict_num == 0:
            print "Dataset:" + dataset + " CVS:" + str(cvs) + "\n" + cmd
            aupr_vec, auc_vec = train(model, cv_data, X, D, T)
            aupr_avg, aupr_conf = mean_confidence_interval(aupr_vec)
            auc_avg, auc_conf = mean_confidence_interval(auc_vec)
            print "auc:%.6f, aupr: %.6f, auc_conf:%.6f, aupr_conf:%.6f, Time:%.6f" % (
                auc_avg, aupr_avg, auc_conf, aupr_conf, time.clock() - tic)
            write_metric_vector_to_file(
                auc_vec,
                os.path.join(
                    output_dir,
                    method + "_auc_cvs" + str(cvs) + "_" + dataset + ".txt"))
            write_metric_vector_to_file(
                aupr_vec,
                os.path.join(
                    output_dir,
                    method + "_aupr_cvs" + str(cvs) + "_" + dataset + ".txt"))
        elif predict_num > 0:
            print "Dataset:" + dataset + "\n" + cmd
            seed = 7771 if method == 'cmf' else 22
            model.fix_model(intMat, intMat, drugMat, targetMat, seed)
            x, y = np.where(intMat == 0)
            scores = model.predict_scores(zip(x, y), 5)
            ii = np.argsort(scores)[::-1]
            predict_pairs = [(drug_names[x[i]], target_names[y[i]], scores[i])
                             for i in ii[:predict_num]]
            new_dti_file = os.path.join(
                output_dir, "_".join([method, dataset, "new_dti.txt"]))
            novel_prediction_analysis(predict_pairs, new_dti_file,
                                      os.path.join(data_dir, 'biodb'))
예제 #6
0
파일: PyDTI.py 프로젝트: akiyamalab/NRLMFb
def main(argv):

    try:
        opts, args = getopt.getopt(argv, "m:d:f:c:e:s:o:n:p:g:q:r:l:w", [
            "method=", "dataset=", "data-dir=", "cvs=", "external=",
            "specify-arg=", "method-opt=", "predict-num=", "scoring=", "gpmi=",
            "params=", "output-dir=", "log=", "workdir="
        ])
    except getopt.GetoptError:
        sys.exit()


#    data_dir = os.path.join(os.path.pardir, 'data')
#    output_dir = os.path.join(os.path.pardir, 'output')
    method = "nrlmf"
    dataset = "nr"
    data_dir = '.'
    output_dir = '.'
    cvs, sp_arg, model_settings, predict_num = 1, 1, [], 0
    external = 0
    scoring = 'auc'
    gpmi = None
    params = None
    workdir = "./"
    logfile = 'job.log'

    seeds = [7771, 8367, 22, 1812, 4659]
    # seeds = np.random.choice(10000, 5, replace=False)
    for opt, arg in opts:
        if opt == "--method":
            method = arg
        if opt == "--dataset":
            dataset = arg
        if opt == "--data-dir":
            data_dir = arg
        if opt == "--output-dir":
            output_dir = arg
        if opt == "--cvs":
            cvs = int(arg)
        if opt == "--external":
            external = int(arg)
        if opt == "--specify-arg":
            sp_arg = int(arg)
        if opt == "--method-opt":
            model_settings = [s.split('=') for s in str(arg).split()]
        if opt == "--predict-num":
            predict_num = int(arg)
        if opt == "--scoring":
            scoring = str(arg)
        if opt == "--gpmi":
            gpmi = dict()
            for s in str(arg).split():
                key, val = s.split('=')
                gpmi[key] = float(val)
        if opt == "--params":
            params = read_params(str(arg))
        if opt == "--log":
            logfile = str(arg)
        if opt == "--workdir":
            workdir = str(arg)
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    # set logger
    logger = logging.getLogger("logger")
    logger.setLevel(logging.INFO)
    filename = logfile
    fh = logging.FileHandler(workdir + "/" + filename)
    fh.name = filename
    logger.addHandler(fh)

    # default parameters for each methods
    if method == 'nrlmf':
        args = {
            'c': 5,
            'K1': 5,
            'K2': 5,
            'r': 50,
            'lambda_d': 0.125,
            'lambda_t': 0.125,
            'alpha': 0.25,
            'beta': 0.125,
            'theta': 0.5,
            'max_iter': 100
        }
    if method == 'nrlmfb':
        args = {
            'c': 5,
            'K1': 5,
            'K2': 5,
            'r': 50,
            'lambda_d': 0.125,
            'lambda_t': 0.125,
            'alpha': 0.25,
            'beta': 0.125,
            'theta': 0.5,
            'max_iter': 100
        }
    if method == 'netlaprls':
        args = {'gamma_d': 10, 'gamma_t': 10, 'beta_d': 1e-5, 'beta_t': 1e-5}
    if method == 'blmnii':
        args = {'alpha': 0.7, 'gamma': 1.0, 'sigma': 1.0, 'avg': False}
    if method == 'wnngip':
        args = {'T': 0.8, 'sigma': 1.0, 'alpha': 0.8}
    if method == 'kbmf':
        args = {'R': 50}
    if method == 'cmf':
        args = {
            'K': 50,
            'lambda_l': 0.5,
            'lambda_d': 0.125,
            'lambda_t': 0.125,
            'max_iter': 30
        }

    for key, val in model_settings:
        args[key] = float(val)

    intMat, drugMat, targetMat = load_data_from_file(
        dataset, os.path.join(data_dir, 'dataset'))
    drug_names, target_names = get_drugs_targets_names(
        dataset, os.path.join(data_dir, 'dataset'))

    if predict_num == 0:
        if cvs == 1:  # CV setting CVS1
            X, D, T, cv = intMat, drugMat, targetMat, 1
        if cvs == 2:  # CV setting CVS2
            X, D, T, cv = intMat, drugMat, targetMat, 0
        if cvs == 3:  # CV setting CVS3
            X, D, T, cv = intMat.T, targetMat, drugMat, 0
        cv_data = cross_validation(X, seeds, cv)
        if cvs == 1: ev_data = external_validation(X, seeds, cv)

    if sp_arg == 0 and predict_num == 0 and external == 0:
        if method == 'nrlmf':
            cv_eval.nrlmf_cv_eval(method,
                                  dataset,
                                  cv_data,
                                  X,
                                  D,
                                  T,
                                  cvs,
                                  args,
                                  logger,
                                  scoring=scoring,
                                  gpmi=gpmi,
                                  params=params)
        if method == 'nrlmfb':
            cv_eval.nrlmfb_cv_eval(method,
                                   dataset,
                                   cv_data,
                                   X,
                                   D,
                                   T,
                                   cvs,
                                   args,
                                   logger,
                                   scoring=scoring,
                                   gpmi=gpmi,
                                   params=params)
        if method == 'netlaprls':
            cv_eval.netlaprls_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                      args)
        if method == 'blmnii':
            cv_eval.blmnii_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                   args)
        if method == 'wnngip':
            cv_eval.wnngip_cv_eval(method, dataset, cv_data, X, D, T, cvs,
                                   args, logger)
        if method == 'kbmf':
            cv_eval.kbmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
        if method == 'cmf':
            cv_eval.cmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)

    if sp_arg == 0 and predict_num == 0 and external == 1:
        if method == 'nrlmf':
            ev_eval.nrlmf_ev_eval(method,
                                  ev_data,
                                  X,
                                  D,
                                  T,
                                  logger,
                                  scoring=scoring,
                                  gpmi=gpmi,
                                  params=params)
        if method == 'nrlmfb':
            ev_eval.nrlmfb_ev_eval(method,
                                   ev_data,
                                   X,
                                   D,
                                   T,
                                   logger,
                                   scoring=scoring,
                                   gpmi=gpmi,
                                   params=params)

    if sp_arg == 1 or predict_num > 0:
        if method == 'nrlmf':
            model = NRLMF(cfix=args['c'],
                          K1=args['K1'],
                          K2=args['K2'],
                          num_factors=args['r'],
                          lambda_d=args['lambda_d'],
                          lambda_t=args['lambda_t'],
                          alpha=args['alpha'],
                          beta=args['beta'],
                          theta=args['theta'],
                          max_iter=args['max_iter'])
        if method == 'nrlmfb':
            model = NRLMFb(cfix=args['c'],
                           K1=args['K1'],
                           K2=args['K2'],
                           num_factors=args['r'],
                           lambda_d=args['lambda_d'],
                           lambda_t=args['lambda_t'],
                           alpha=args['alpha'],
                           beta=args['beta'],
                           theta=args['theta'],
                           max_iter=args['max_iter'],
                           eta1=args['eta1'],
                           eta2=args['eta2'])
        if method == 'netlaprls':
            model = NetLapRLS(gamma_d=args['gamma_d'],
                              gamma_t=args['gamma_t'],
                              beta_d=args['beta_t'],
                              beta_t=args['beta_t'])
        if method == 'blmnii':
            model = BLMNII(alpha=args['alpha'],
                           gamma=args['gamma'],
                           sigma=args['sigma'],
                           avg=args['avg'])
        if method == 'wnngip':
            model = WNNGIP(T=args['T'],
                           sigma=args['sigma'],
                           alpha=args['alpha'])
        if method == 'kbmf':
            model = KBMF(num_factors=args['R'])
        if method == 'cmf':
            model = CMF(K=args['K'],
                        lambda_l=args['lambda_l'],
                        lambda_d=args['lambda_d'],
                        lambda_t=args['lambda_t'],
                        max_iter=args['max_iter'])
        cmd = str(model)
        if predict_num == 0:

            tic = time.time()
            print("Dataset:" + dataset + " CVS:" + str(cvs) + "\n" + cmd)
            aupr_vec, auc_vec = train(model, cv_data, X, D, T)
            aupr_avg, aupr_conf = mean_confidence_interval(aupr_vec)
            auc_avg, auc_conf = mean_confidence_interval(auc_vec)
            print(
                "auc:%.6f, aupr:%.6f, auc_conf:%.6f, aupr_conf:%.6f, Time:%.6f"
                % (auc_avg, aupr_avg, auc_conf, aupr_conf, time.time() - tic))
            #            write_metric_vector_to_file(auc_vec, os.path.join(output_dir, method+"_auc_cvs"+str(cvs)+"_"+dataset+".txt"))
            #            write_metric_vector_to_file(aupr_vec, os.path.join(output_dir, method+"_aupr_cvs"+str(cvs)+"_"+dataset+".txt"))
            logger.info(
                cmd + ', ' +
                "auc:%.6f, aupr:%.6f, auc_conf:%.6f, aupr_conf:%.6f, Time:%.6f"
                % (auc_avg, aupr_avg, auc_conf, aupr_conf, time.time() - tic))

        elif predict_num > 0:
            print("Dataset:" + dataset + "\n" + cmd)
            seed = 7771 if method == 'cmf' else 22
            model.fix_model(intMat, intMat, drugMat, targetMat, seed)
            x, y = np.where(intMat == 0)
            scores = model.predict_scores(zip(x, y), 5)
            ii = np.argsort(scores)[::-1]
            predict_pairs = [(drug_names[x[i]], target_names[y[i]], scores[i])
                             for i in ii[:predict_num]]
            new_dti_file = os.path.join(
                output_dir, "_".join([method, dataset, "new_dti.txt"]))
            novel_prediction_analysis(predict_pairs, new_dti_file,
                                      os.path.join(data_dir, 'biodb'))
예제 #7
0
def main(argv):
    try:
        opts, args = getopt.getopt(argv, "m:d:f:c:s:o:n:p", ["method=", "dataset=", "data-dir=", "cvs=", "specify-arg=", "method-options=", "predict-num=", "output-dir=", ])
    except getopt.GetoptError:
        sys.exit()

    data_dir = os.path.join(os.path.pardir, 'data')
    output_dir = os.path.join(os.path.pardir, 'output')
    cvs, sp_arg, model_settings, predict_num = 1, 1, [], 0

    seeds = [7771, 8367, 22, 1812, 4659]
    # seeds = np.random.choice(10000, 5, replace=False)
    for opt, arg in opts:
        if opt == "--method":
            method = arg
        if opt == "--dataset":
            dataset = arg
        if opt == "--data-dir":
            data_dir = arg
        if opt == "--output-dir":
            output_dir = arg
        if opt == "--cvs":
            cvs = int(arg)
        if opt == "--specify-arg":
            sp_arg = int(arg)
        if opt == "--method-options":
            model_settings = [s.split('=') for s in str(arg).split()]
        if opt == "--predict-num":
            predict_num = int(arg)
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    # default parameters for each methods
    if method == 'nrlmf':
        args = {'c': 5, 'K1': 5, 'K2': 5, 'r': 50, 'lambda_d': 0.125, 'lambda_t': 0.125, 'alpha': 0.25, 'beta': 0.125, 'theta': 0.5, 'max_iter': 100}
       # args = {'c': 5, 'K1': 2, 'K2': 2, 'r': 3, 'lambda_d': 0.125, 'lambda_t': 0.125, 'alpha': 0.25, 'beta': 0.125, 'theta': 0.5, 'max_iter': 100}
    if method == 'netlaprls':
        args = {'gamma_d': 10, 'gamma_t': 10, 'beta_d': 1e-5, 'beta_t': 1e-5}
    if method == 'blmnii':
        args = {'alpha': 0.7, 'gamma': 1.0, 'sigma': 1.0, 'avg': False}
    if method == 'wnngip':
        args = {'T': 0.8, 'sigma': 1.0, 'alpha': 0.8}
    if method == 'kbmf':
        args = {'R': 50}
    if method == 'cmf':
        args = {'K': 50, 'lambda_l': 0.5, 'lambda_d': 0.125, 'lambda_t': 0.125, 'max_iter': 30}

    for key, val in model_settings:
        args[key] = val
#ZINC test lines
    drug_names, target_names = get_drug_target_names_zinc()
    tprs=[]	#list storing TPR35 for each CV
    RCRS=[]	#list storing rcrs for each CV #rcrs is a list of lists
    for testnum in range(1,11): #for 10-fold CV
        intMat, testMat, drugMat, targetMat = load_data_from_file_csv_10cv(testnum, os.path.join(data_dir))
#ZINC test lines

#    drug_names, target_names = get_drugs_targets_names(dataset, os.path.join(data_dir))

#Demo lines
#    intMat, testMat, drugMat, targetMat = load_data_from_file_demo(dataset, os.path.join(data_dir))
#    drug_names, target_names = get_drug_target_names_demo()
#demo lines

        if predict_num == 0:
            if cvs == 1:  # CV setting CVS1
                X, D, T, cv = intMat, drugMat, targetMat, 1
            if cvs == 2:  # CV setting CVS2
                X, D, T, cv = intMat, drugMat, targetMat, 0
            if cvs == 3:  # CV setting CVS3
                X, D, T, cv = intMat.T, targetMat, drugMat, 0
            cv_data = cross_validation(X, seeds, cv)

        if sp_arg == 0 and predict_num == 0:
            if method == 'nrlmf':
                cv_eval.nrlmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
            if method == 'netlaprls':
                cv_eval.netlaprls_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
            if method == 'blmnii':
                cv_eval.blmnii_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
            if method == 'wnngip':
                cv_eval.wnngip_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
            if method == 'kbmf':
                cv_eval.kbmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)
            if method == 'cmf':
                cv_eval.cmf_cv_eval(method, dataset, cv_data, X, D, T, cvs, args)

        if sp_arg == 1 or predict_num > 0:
            tic = time.clock()
            if method == 'nrlmf':
                model = NRLMF(cfix=int(args['c']), K1=int(args['K1']), K2=int(args['K2']), num_factors=int(args['r']), lambda_d=float(args['lambda_d']), lambda_t=float(args['lambda_t']), alpha=float(args['alpha']), beta=float(args['beta']), theta=float(args['theta']), max_iter=int(args['max_iter']))
            if method == 'netlaprls':
                model = NetLapRLS(gamma_d=float(args['gamma_d']), gamma_t=float(args['gamma_t']), beta_d=float(args['beta_t']), beta_t=float(args['beta_t']))
            if method == 'blmnii':
                model = BLMNII(alpha=args['alpha'], gamma=args['gamma'], sigma=args['sigma'], avg=args['avg'])
            if method == 'wnngip':
                model = WNNGIP(T=float(args['T']), sigma=float(args['sigma']), alpha=float(args['alpha']))
            if method == 'kbmf':
                model = KBMF(num_factors=int(args['R']))
            if method == 'cmf':
                model = CMF(K=int(args['K']), lambda_l=float(args['lambda_l']), lambda_d=float(args['lambda_d']), lambda_t=float(args['lambda_t']), max_iter=int(args['max_iter']))
            cmd = str(model)
            if predict_num == 0:
                print "Dataset:"+dataset+" CVS:"+str(cvs)+"\n"+cmd
                aupr_vec, auc_vec = train(model, cv_data, X, D, T)
                aupr_avg, aupr_conf = mean_confidence_interval(aupr_vec)
                auc_avg, auc_conf = mean_confidence_interval(auc_vec)
                print "auc:%.6f, aupr: %.6f, auc_conf:%.6f, aupr_conf:%.6f, Time:%.6f" % (auc_avg, aupr_avg, auc_conf, aupr_conf, time.clock()-tic)
                write_metric_vector_to_file(auc_vec, os.path.join(output_dir, method+"_auc_cvs"+str(cvs)+"_"+dataset+".txt"))
                write_metric_vector_to_file(aupr_vec, os.path.join(output_dir, method+"_aupr_cvs"+str(cvs)+"_"+dataset+".txt"))
            elif predict_num > 0:
#                print "Dataset:"+dataset+"\n"+cmd
                seed = 7771 if method == 'cmf' else 22
                model.fix_model(intMat, intMat, drugMat, targetMat, seed)
               # x, y = np.where(intMat == 0)
                x, y = np.where(intMat >= 0) #to pick all pairs including train pairs
                scores = model.predict_scores(zip(x, y), 5)
#               ii = np.argsort(scores)[::-1]
#               predict_pairs = [(drug_names[x[i]], target_names[y[i]], scores[i]) for i in ii[:predict_num]]
#               print(predict_pairs)
                sarr=np.array(scores)
                r, c = np.where(testMat > 0)
                rcrs=get_rcrs(sarr,zip(r,c))
                RCRS=RCRS+rcrs #extending RCRS for each CV
                tpr_top35=TPR_by_cutRank(rcrs,35)
                tprs.append(tpr_top35)
#    tpr35s=TPR_by_cutRank(RCRS,350)
        
    print "Dataset: "+dataset+" Rank: "+str(args['r'])+" Iter: "+str(args['max_iter'])
    print "Avg. TPR35: "+str(np.average(tprs))
    print "S.E.M. TPR35: "+str(np.std(tprs)/math.sqrt(len(tprs)))
    print "TPR35 values:"
    print tprs
    print "Rank\tTPR"
    for rank in range(1,351):
        tpr=TPR_by_cutRank(RCRS,rank)
        print "%s\t%s"%(str(rank),str(tpr))