def score(model, val_data, train_data, params): model.eval() src_sen_emb = [] tgt_sen_emb = [] for data in val_data: src, tgt, _ = model(data) src_sen_emb.append(src.cpu().detach().numpy()) tgt_sen_emb.append(tgt.cpu().detach().numpy()) src = np.concatenate(src_sen_emb, 0) # src = remove_pc(src) tgt = np.concatenate(tgt_sen_emb, 0) # tgt = remove_pc(tgt) # src_sen_emb_ = [] # tgt_sen_emb_ = [] # for i,data in enumerate(train_data): # src_,tgt_,_ = model(data) # src_sen_emb_.append(src_.cpu().detach().numpy()) # tgt_sen_emb_.append(tgt_.cpu().detach().numpy()) # # src_ = np.concatenate(src_sen_emb_, 0) # # src_ = remove_pc(src_) # tgt_ = np.concatenate(tgt_sen_emb_, 0) # # tgt_ = remove_pc(tgt_) # mapping = torch.nn.Linear(300,300,bias=False) # M = np.dot(tgt_.transpose(),src_) # u,s,v_t = scipy.linalg.svd(M, full_matrices=True) # mapping.weight.data.copy_(torch.from_numpy(u.dot(v_t)).float()) # # cal_topk_csls(torch.from_numpy(src),torch.from_numpy(tgt)) # # src = mapping(torch.from_numpy(src)).detach().numpy() get_score(torch.from_numpy(src), torch.from_numpy(tgt), params.score_file)
def get_sgd_model_score(training_features, training_response, test_features, test_response): # Run the optimization over different values of the regularization strengh penalty_func = 'l1' C = 1 model = SGDClassifier(loss='log', penalty=penalty_func, fit_intercept=True, n_iter=500, alpha=C) #model = LogisticRegression(penalty=penalty_func, dual=False, tol=0.01, C=C, fit_intercept=True) model.fit(training_features, training_response) test_pred = model.predict_proba(test_features) return get_score(test_response, test_pred[:, 1])
def get_lg_model_score(training_features, training_response, test_features, test_response): # Run the optimization over different values of the regularization strengh penalty_func = 'l2' C = 1 model = LogisticRegression(penalty=penalty_func, dual=False, tol=0.00001, C=C, fit_intercept=True) model.fit(training_features, training_response) test_pred = model.predict_proba(test_features) return get_score(test_response, test_pred[:, 1])
def fit_and_validate(self, ): ''' 拟合并验证 :return: ''' from m_VAR import m_VAR_model from util import re_log1p self.data_prepro() l2 = int(self.re_cl.get()) steps_valid = 10 Y_valid, Y_future, Y_exog, Y_exog_future = self.prepare_Y( 0, -steps_valid) model = m_VAR_model(Y_valid, exog=Y_exog, maxlags=self.maxlags, l2=l2) model.fit() self.model = model y_pred = predict_fix( model.forecast( Y_valid, steps_valid, exog_future=Y_exog_future, )) score_mae, score_mr = get_score(y_pred=y_pred, y_true=Y_future) if self.v2.get(): self.verbose_list.insert( 0, '滞后阶数:%d, 正则化系数:%d, 考虑注入井, mae:%.6f, mer:%.6f' % (int(self.Lag_cl.get()), l2, score_mae, score_mr)) else: self.verbose_list.insert( 0, '滞后阶数:%d, 正则化系数:%d, mae:%.6f, mer:%.6f' % (int(self.Lag_cl.get()), l2, score_mae, score_mr)) if self.v3.get(): point_forecast, forc_lower, forc_upper = model.forecast_interval( np.array(Y_valid), steps_valid, exog_future=Y_exog_future, ) forc_lower = re_log1p(forc_lower) forc_upper = re_log1p(forc_upper) self.validate_window = m_forecast_plot_window( Y_valid, Y_future, y_pred, fore_cov=(forc_lower, forc_upper)) else: self.validate_window = m_forecast_plot_window( Y_valid, Y_future, y_pred, )
def _plot_pr(config, cases_df, controls_df, case_total, control_total, alpha, fig_path): fig, ax = plt.subplots() #ax.set_ylim(0, 0.5) ax.set_yticks(np.arange(0, 0.51, 0.1)) for name, info in config.items(): case_scores = util.get_score(cases_df, name) control_scores = util.get_score(controls_df, name) print(name, case_scores.shape, control_scores.shape) estimated_tpr, estimated_num, pvalues = _calc_precision_vec( case_scores, control_scores, case_total, control_total, info['thres_vec'], alpha) print(estimated_tpr) print(estimated_num) ax.scatter(estimated_num, estimated_tpr, color=info['color']) if name == 'cnn_prob': name = 'MVP' if 'DNN' in name: name = 'FCNN' c = info['color'] marker = '.' for num_, tpr_, pvalue_ in zip(estimated_num, estimated_tpr, pvalues): markersize = marker_size_pvalue(pvalue_) ax.plot(num_, tpr_, markersize=markersize, marker=marker, color=c) ax.plot(estimated_num, estimated_tpr, linestyle='-', color=info['color'], label=name.split('_')[0]) for thres_, num_, tpr_ in zip(info['thres_vec'], estimated_num, estimated_tpr): ax.annotate("{}".format(thres_), xy=(num_, tpr_), ha='left', va='bottom') if name == 'MVP_rank': estimated_tpr, estimated_num, pvalues = _calc_precision_vec( case_scores, control_scores, case_total, control_total, [-1.0], alpha) c = 'purple' ax.plot(estimated_num[0], estimated_tpr[0], color=c, marker='.', markersize=marker_size_pvalue(pvalues[0])) #ax.plot(estimated_num, # estimated_tpr, # linestyle='-', # color=c, # label=name.split('_')[0]) ax.annotate("All Mis", xy=(estimated_num[0], estimated_tpr[0]), ha='left', va='bottom') ax.set_ylabel('Estimated Positive Predictive Value', weight='normal') ax.set_xlabel('Estimated number of risk variants', weight='normal') lgnd = ax.legend(loc='upper right') # plot pvalue legend ls = [] for p in [10**-8, 10**-6, 10**-4, 10**-2]: l, = ax.plot([], [], 'o', marker='.', markersize=-np.log(p), color='black') ls.append(l) labels = ["10E-8", "10E-6", "10E-4", "10E-2"] leg = ax.legend(ls, labels, numpoints=1, ncol=4, frameon=False, loc='lower center', handlelength=2, borderpad=0, handletextpad=1, title='p value') leg.get_title().set_fontsize('8') # legend 'Title' fontsize #plt.setp(plt.gca().get_legend().get_texts(), # fontsize='20') # legend 'list' fontsize plt.gca().add_artist(lgnd) #ax.set_xlim([50, 300]) #plt.xticks(fontsize = 28) #plt.yticks(fontsize = 28) #plt.legend(loc="upper right") #ax.legend(loc="best") plt.savefig(fig_path, format='pdf', bbox_inches='tight') plt.close(fig)
def _get_scores(asd, chd, control, n): return util.get_score(asd, n),\ util.get_score(chd, n),\ util.get_score(control, n)
cvec = [0.0001, 0.001, .01, 0.1, 1] cvec = [0.01, .1, 1, 100, 1000] score_best = -100 bestC = 10 penalty_func = 'l2' for C in cvec: model = SGDClassifier(loss='log', penalty=penalty_func, fit_intercept=True, n_iter=1000, alpha=C) model.fit(training_features, training_response) validate_pred = model.predict(validate_features) score = get_score(validate_response, validate_pred) print C, score np.set_printoptions(precision=3) #print model.coef_, (model.coef_[0]**2).sum() if score_best < score: bestC = C score_best = score print "==============================================", bestC model = SGDClassifier(loss='log', penalty=penalty_func, fit_intercept=True, n_iter=1000, alpha=bestC) model.fit(training_features, training_response) validate_pred = model.predict(validate_features)
training_features, training_response, validate_features, validate_response, test_features, test_response = get_bank_data() cvec = [ 0.0001, 0.001, .01, 0.1, 1] cvec = [ 0.01, .1, 1, 100, 1000 ] score_best = -100 bestC = 10 penalty_func = 'l2' for C in cvec: model = SGDClassifier(loss='log', penalty=penalty_func, fit_intercept=True, n_iter=1000, alpha=C ) model.fit(training_features, training_response) validate_pred = model.predict(validate_features) score = get_score( validate_response, validate_pred) print C, score np.set_printoptions(precision=3) #print model.coef_, (model.coef_[0]**2).sum() if score_best < score: bestC = C score_best = score print "==============================================" , bestC model = SGDClassifier(loss='log', penalty=penalty_func, fit_intercept=True, n_iter=1000, alpha=bestC ) model.fit(training_features, training_response) validate_pred = model.predict(validate_features) print bestC, get_score( validate_response, validate_pred)