def get_score_median_95(y, y_train_temp): url_clicks = dict() for i in y_train_temp: # only consider the training urls url_clicks[int(float(i[0]))] = float(i[1])/float(i[2]) median_ctr = utils.get_percentile_url(url_clicks, 50) percentile95_ctr = utils.get_percentile_url(url_clicks, 95) # need to calculate score per user user_score_median = dict() user_score_95 = dict() for user in users_posting_urls: num_urls_with_ctr_above_median = 0 num_urls_posting_in_training = 0 for u in users_posting_urls[user]: if int(u) not in url_clicks: # might not be there because only training urls continue num_urls_posting_in_training += 1 if url_clicks[int(u)] > median_ctr: num_urls_with_ctr_above_median += 1 user_score_median[user] = utils.calc_score(0.5, num_urls_with_ctr_above_median, num_urls_posting_in_training) num_urls_with_ctr_above_median = 0 for u in users_posting_urls[user]: if int(u) not in url_clicks: # might not be there because only training urls continue if url_clicks[int(u)] > percentile95_ctr: num_urls_with_ctr_above_median += 1 user_score_95[user] = utils.calc_score(0.05, num_urls_with_ctr_above_median, num_urls_posting_in_training) #print user, users_posting_urls[user], user_score_median[user] return [user_score_median, user_score_95]
def gen_individual(accept, reject): while True: ind = node.Node(depth=0, root=True) score = utils.calc_score(ind, accept, reject) if score != -1000: break return [ind, score]
def spam_score(preds_file, vlt_file, threshold=0.5): vlts = read_vlt(vlt_file) preds = readpred_vw(preds_file, loss_func) preds = [ 1 if x[1] >= threshold else 0 for x in preds] logging.info('threshold: %f, pos ratio of pred: %f, vlts #: %d, pos ratio of vlts: %f' % (threshold, float(sum(preds))/len(preds), len(vlts), sum(vlts)/float(len(vlts)))) score = calc_score(vlts, preds) return score
def validate_score(self): self.net.eval() ans = [] for in_q, in_a, _ in self.test_loader: in_q, in_a = in_q.detach(), in_a.detach() if config.use_cuda: in_q, in_a = in_q.cuda(), in_a.cuda() logit = self.net(in_q, in_a).flatten() ans += self.net.predict(logit).tolist() curr_map, curr_mrr = utils.calc_score(ans) score = utils.summarize_score(curr_map, curr_mrr) self.summary.add_score(self.epoch, self.step, score) if curr_map >= self.last_map: self.last_map = curr_map return True else: return False
# Do this only if the target_node wasn't the first node # infected in the cascade. # If it was the first node (else branch), then we cannot deduce # anything about the incoming edges. log_sum = 0 for j in range(len(c)): t_j = c[j][0] alpha_ji = Ai[c[j][1]] if t_j < t_i: # TODO # expr += ... # log_sum += ... pass expr += CVX.log(log_sum) prob = CVX.Problem(CVX.Maximize(expr), constraints) res = prob.solve(verbose=True) probs.append(prob) results.append(res) if prob.status in [CVX.OPTIMAL, CVX.OPTIMAL_INACCURATE]: A[:, target_node] = np.asarray(Ai.value).squeeze() else: A[:, target_node] = -1 A_soln = np.loadtxt('solution.csv', delimiter=',') print(U.calc_score(A, A_soln))
# Разделяем выборку на train и test в пропорции 1/9 X_train, X_test, y_train, y_test = train_test_split(X, Y_binarized, test_size=0.1) # Получаем инстанс decision tree классификатора clf = DecisionTreeClassifier() # Обучаем алгоритм по train выборке clf_fit = clf.fit(X_train, y_train) # Предсказываем значения train выборки y_predicted_train = clf.predict(X_train) # Считаем score по y фактическому и y предсказанному для train выборки score = utils.calc_score(y_train, y_predicted_train) print(score) # Предсказываем значения test выборки y_predicted_test = clf.predict(X_test) # Считаем score по y фактическому и y предсказанному для test выборки score = utils.calc_score(y_test, y_predicted_test) print(score) # Визуализируем дерево решений, которое сформировал классификатор, и сохраняем в pdf-файл utils.decision_tree_save(clf, iris, "decision_tree.pdf") # Предсказываем target-значения по выборке, опираясь на обученный ранее алгоритм (делаем выбор по дереву решений) y_score = clf.predict(X_test)
def evaluate_mica(orig_labels, pred_labels): for class_id in eval_list: prec_val, rec_val, f1_val = calc_score(orig_labels, pred_labels, class_id) print(class_id + ": prec = {:.3f} rec = {:.3f} f1 = {:.3f}".format( prec_val, rec_val, f1_val))
population = gen_population(1000, accept, reject) bestest = gen_individual(accept, reject) for i in range(100): best = ['fjkghdfkghdfkghsdkfhgksdfjhgksjdfhgkjsdhfgkjhsdfkghsdfkjghksdfhgkj', -9999] print("\nGeneration:", i) new_pop = [] for _ in range(800): while True: try: m1 = utils.tournament_selection(population, 50) m2 = utils.tournament_selection(population, 50) child = utils.crossover(m1[0], m2[0]) child = [child, utils.calc_score(child, accept, reject)] new_pop.append(child) break except: pass for _ in range(100): new_pop.append(gen_individual(accept, reject)) for _ in range(100): while True: try: a = random.choice(population) b = utils.mutate(a[0]) b = [b, utils.calc_score(b, accept, reject)] new_pop.append(b)
if c[0][0] != infection_time: # Do this only if the target_node wasn't the first node # infected in the cascade. # If it was the first node (else branch), then we cannot deduce # anything about the incoming edges. log_sum = 0 for j in range(len(c)): t_j = c[j][0] alpha_ji = Ai[c[j][1]] if t_j < t_i: # TODO # expr += ... # log_sum += ... pass expr += CVX.log(log_sum) prob = CVX.Problem(CVX.Maximize(expr), constraints) res = prob.solve(verbose=True) probs.append(prob) results.append(res) if prob.status in [CVX.OPTIMAL, CVX.OPTIMAL_INACCURATE]: A[:, target_node] = np.asarray(Ai.value).squeeze() else: A[:, target_node] = -1 A_soln = np.loadtxt('solution.csv', delimiter=',') print(U.calc_score(A, A_soln))