def update_answers(user_label, problem_label): train_problems = data_files.get_train_problems() train_users = data_files.get_train_users() submissions = data_files.get_submissions() test_problems = data_files.get_test_problems() test_users = data_files.get_test_users() tests_df = pandas.read_csv('test/test.csv') known_problems = set(train_problems.keys()) known_users = set(train_users.keys()) queries = [] for test in tests_df.values: user_id = test[1] problem_id = test[2] u = 1 if user_id in known_users else 0 p = 1 if problem_id in known_problems else 0 if u == user_label and p == problem_label: queries.append({'index': test[0], 'user_id': user_id, 'problem_id': problem_id}) solutions = [[u0p0.solution, u0p0.solution], [u0p0.solution, u1p1.solution]] answers = solutions[user_label][problem_label](queries, train_problems, train_users, test_problems, test_users, submissions) result = pandas.read_csv(ANSWER_FILE) result = result.solved_status.tolist() for index, answer in enumerate(answers): query_id = queries[index]['index'] result[query_id] = answer utils.save_answer(result, ANSWER_FILE)
def run(): train = train_data.values test = test_data.values X_train = train[:, 1:] y_train = train[:, 0] X_test = test[:, 1:] y_test = test[:, 0] perceptron = train_perceptron(X_train, y_train) predictions = perceptron.predict(X_test) default_ac = accuracy_score(y_test, predictions) print('Default accuracy:', default_ac) scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train, y_train) X_test_scaled = scaler.transform(X_test) perceptron_scaled = train_perceptron(X_train_scaled, y_train) predictions_scaled = perceptron_scaled.predict(X_test_scaled) scaled_ac = accuracy_score(y_test, predictions_scaled) print('Scaled accuracy:', scaled_ac) diff = scaled_ac - default_ac print('Difference between default and scaled is:', diff) save_answer(os.path.join(BASE_DIR, 'answer.txt'), round(diff, 3))
def calc_max_roc_auc_score(y_true): calc_scores_list = list() calc_scores = dict() for score_key in SCORE_HEADERS: s = roc_auc_score(y_true, scores[score_key]) calc_scores_list.append(s) calc_scores[s] = score_key print('Max AUC-ROC on table:', calc_scores[max(calc_scores_list)]) save_answer(os.path.join(BASE_DIR, '3.txt'), calc_scores[max(calc_scores_list)])
def calc_max_precision_on_recall_lt70(y_true): score_key_by_max_p = dict() max_p_list = list() for score_key in SCORE_HEADERS: precision, recall, thresholds = precision_recall_curve( y_true, scores[score_key]) ps = list() for i, p in enumerate(precision): if recall[i] >= 0.7: ps.append(p) max_p = max(ps) score_key_by_max_p[max_p] = score_key max_p_list.append(max_p) max_p = max(max_p_list) print('Max P is:', max_p, 'Metrics is:', score_key_by_max_p[max_p]) save_answer(os.path.join(BASE_DIR, '4.txt'), score_key_by_max_p[max_p])
def end(update, context): query = update.callback_query finished = True # Encerrar todas as polls da conversa: for poll_id in context.user_data["polls"]: required = context.bot_data[poll_id]["required"] if ("has_answer" in context.bot_data[poll_id] and context.bot_data[poll_id]["has_answer"]) or not required: if context.bot_data[poll_id]["open"]: context.bot_data[poll_id]["open"] = False context.bot.stop_poll( context.bot_data[poll_id]["chat_id"], context.bot_data[poll_id]["message_id"], ) if "has_answer" in context.bot_data[poll_id]: print(context.bot_data[poll_id]["answer_string"]) user_id = query.message.chat.id utils.save_answer( context.user_data["bot_name"], user_id, context.bot_data[poll_id]["question_id"], context.bot_data[poll_id]["answer_string"], ) else: finished = False query.answer("Campos obrigatórios não preenchidos!") if finished: query.answer() query.edit_message_reply_markup(None) for _, values in context.user_data["regular_answers"].items(): question_id, answer_string = values[0], values[1] print(question_id, answer_string) user_id = query.message.chat.id utils.save_answer( context.user_data["bot_name"], user_id, question_id, answer_string, ) context.bot.send_message( update.effective_user.id, "Agradeço por ter disponibilizado seu tempo em responder o questionário!" ) return ConversationHandler.END
def run(): preprocess_data() train_weights, test_weights = find_tfidf_weights() train_cat, test_cat = get_one_hot_tags() X_train = hstack([train_cat, train_weights]) X_test = hstack([test_cat, test_weights]) clf = Ridge(random_state=241, alpha=1) clf.fit(X_train, train['SalaryNormalized']) predictions = clf.predict(X_test) print('Predicted salary are:', predictions) save_answer(os.path.join(BASE_DIR, 'answer.txt'), ' '.join([str(round(v, 2)) for v in predictions]))
def calc_four_metrics(y_true, y_pred): metric_scores = list() accuracy = accuracy_score(y_true, y_pred) print('Accuracy score:', accuracy) metric_scores.append(accuracy) precision = precision_score(y_true, y_pred) print('Precision score:', precision) metric_scores.append(precision) recall = recall_score(y_true, y_pred) print('Recall score:', recall) metric_scores.append(recall) f1 = f1_score(y_true, y_pred) print('F1 score:', f1) metric_scores.append(f1) save_answer(os.path.join(BASE_DIR, '2.txt'), ' '.join([str(round(v, 2)) for v in metric_scores]))
def run(): vectorizer = TfidfVectorizer() X = vectorizer.fit_transform(newsgroups.data) y = newsgroups.target clf = SVC(kernel='linear', random_state=241) clf.fit(X, y) names = vectorizer.get_feature_names() arr = clf.coef_.toarray() arr[0] = [abs(v) for v in arr[0]] sorted_weights = arr[::, arr[0, :].argsort()[::-1]] top_10_weights = sorted_weights[0, :10] words = list() for w in top_10_weights: index = np.where(arr == w) word_index = index[1][0] words.append(names[word_index]) words.sort() print('Most weight words are:', words) save_answer(os.path.join(BASE_DIR, 'answer.txt'), ','.join(words))
def run(): clf = PCA(n_components=10) clf.fit(train.values[:, 1:]) total_dispersion = 0.0 dispersion_components_gt_90_enough = 0 for r in clf.explained_variance_ratio_: total_dispersion += r dispersion_components_gt_90_enough += 1 if total_dispersion >= 0.9: break print('Components enough fot 90% dispersion:', dispersion_components_gt_90_enough) save_answer(os.path.join(BASE_DIR, '1.txt'), str(dispersion_components_gt_90_enough)) transformed_train = clf.transform(train.values[:, 1:]) X = transformed_train[:, 0] corr_coef = np.corrcoef(X, test['^DJI'])[0, 1] print('Pirson correlation coef:', corr_coef) save_answer(os.path.join(BASE_DIR, '2.txt'), str(round(corr_coef, 2))) first_component_list = list(clf.components_[0]) company_max_weight_index = first_component_list.index( max(first_component_list)) company_max_weight = list(train.columns)[company_max_weight_index + 1] print('Company with max weight on first component:', company_max_weight) save_answer(os.path.join(BASE_DIR, '3.txt'), company_max_weight)
def run(): data['Sex'] = data['Sex'].apply(lambda s: -1 if s == 'F' else (0 if s == 'I' else 1)) classes_names = data.columns[:-1] X = data[classes_names] y = data['Rings'] r2_min_score_gt_52_n_estimators = None k_fold = KFold(random_state=1, n_splits=5, shuffle=True) for i in range(1, 51): max_score = get_r2_score_for_rfr(X, y, cv=k_fold, n_estimators=i) print('Max score for n_estimators =', i, 'is:', max_score) if r2_min_score_gt_52_n_estimators is None and max_score > 0.52: r2_min_score_gt_52_n_estimators = i break # because of the update KFold library and the problem of the floating point answer may differ on +- 1 save_answer(os.path.join(BASE_DIR, '1.txt'), str(r2_min_score_gt_52_n_estimators - 1)) save_answer(os.path.join(BASE_DIR, '2.txt'), str(r2_min_score_gt_52_n_estimators)) # correct answer on MacOS, scikit-learn==0.20.3, python 3.6.5 save_answer(os.path.join(BASE_DIR, '3.txt'), str(r2_min_score_gt_52_n_estimators + 1))
""" Problem: Given set of (x,y) points in the plane, sampled in the vicinity of a circle, estimate the circle's radius and center point. How to get started: Run this script. It will read the input points and plot them. Then, add your code below (see comments). """ from utils import read_pnts, plot_points, save_answer pnts = read_pnts('./training_data_full.npy') plot_points(pnts) # # Here you should complete an algorithm to estimate the circle radius and center # estimated_radius, estimated_center = 1.0, (2.0, 3.0) # # In the end, save the estimates circle coordinates to a file # save_answer('./estimated_circle_coordinates_full.json', estimated_radius, estimated_center)
def include_answer(class_name,answer): answers = load_answer() answers[class_name] = answer save_answer(answers)
def calc_confusion_matrix(y_true, y_pred): tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel() matrix = np.array([[tp, fp], [fn, tn]]) print('Confusion matrix: ', matrix) save_answer(os.path.join(BASE_DIR, '1.txt'), ' '.join([str(v) for v in matrix.flatten()]))