def run_experi(X_matrix_bal, X_matrix_imb, feat_name): """ Get keys and labels according to dataset size """ """ make train / test splits """ n_folds = 5 fold_size_bal = len(labels_bal) / n_folds fold_size_imb = len(labels_imb) / n_folds split_start_bal = 0 split_end_bal = fold_size_bal split_start_imb = 0 split_end_imb = fold_size_imb acc_bal, prec_bal, rec_bal, f1_bal = 0, 0, 0, 0 acc_imb, prec_imb, rec_imb, f1_imb = 0, 0, 0, 0 """ import salient terms matrix """ fold = 1 for iteration in range(0, n_folds): print "running %s fold" % feat_name, fold # get y_array train / test splits: y_train_bal = np.hstack((labels_bal[: int(split_start_bal)], labels_bal[int(split_end_bal) :])) y_test_bal = labels_bal[int(split_start_bal) : int(split_end_bal)] y_train_imb = np.hstack((labels_imb[: int(split_start_imb)], labels_imb[int(split_end_imb) :])) y_test_imb = labels_imb[int(split_start_imb) : int(split_end_imb)] # get X array train/test splits: X_train_bal = np.vstack((X_matrix_bal[: int(split_start_bal), :], X_matrix_bal[int(split_end_bal) :, :])) X_test_bal = X_matrix_bal[int(split_start_bal) : int(split_end_bal)] X_train_imb = np.vstack((X_matrix_imb[: int(split_start_imb), :], X_matrix_imb[int(split_end_imb) :, :])) X_test_imb = X_matrix_imb[int(split_start_imb) : int(split_end_imb)] # run logistic regression: test_bal = logit.run_logit(X_train_bal, X_test_bal, y_train_bal, y_test_bal) acc_bal += test_bal[0] prec_bal += test_bal[1] rec_bal += test_bal[2] f1_bal += test_bal[3] test_imb = logit.run_logit(X_train_imb, X_test_imb, y_train_imb, y_test_imb) acc_imb += test_imb[0] prec_imb += test_imb[1] rec_imb += test_imb[2] f1_imb += test_imb[3] # update to next interation: split_start_bal += fold_size_bal split_end_bal += fold_size_bal split_start_imb += fold_size_imb split_end_imb += fold_size_imb # update fold no: fold += 1 return acc_bal / 5, prec_bal / 5, rec_bal / 5, f1_bal / 5, acc_imb / 5, prec_imb / 5, rec_imb / 5, f1_imb / 5
def run_ahst(combination): """ Get keys and labels according to dataset size """ """ make train / test splits """ n_folds = 5 fold_size_bal = len(labels_bal) / n_folds fold_size_imb = len(labels_imb) / n_folds split_start_bal = 0 split_end_bal = fold_size_bal split_start_imb = 0 split_end_imb = fold_size_imb acc_bal, prec_bal, rec_bal, f1_bal = 0, 0, 0, 0 acc_imb, prec_imb, rec_imb, f1_imb = 0, 0, 0, 0 """ import salient terms matrix """ fold = 1 for iteration in range(0, n_folds): print "running %s hst" % combination, fold # get y_array train / test splits: y_train_bal = np.hstack((labels_bal[: int(split_start_bal)], labels_bal[int(split_end_bal) :])) y_test_bal = labels_bal[int(split_start_bal) : int(split_end_bal)] y_train_imb = np.hstack((labels_imb[: int(split_start_imb)], labels_imb[int(split_end_imb) :])) y_test_imb = labels_imb[int(split_start_imb) : int(split_end_imb)] # author historical salient terms: X_ahst_bal = np.load("X_ahst%s_bal.npy" % iteration) X_ahst_imb = np.load("X_ahst%s_imb.npy" % iteration) if combination == "tauth": X_ahst_bal = np.hstack((X_array9_bal, X_ahst_bal)) X_ahst_imb = np.hstack((X_array9_imb, X_ahst_imb)) # audience (response) historical salient terms: if combination == "r" or combination == "taud": X_ahst_bal = np.load("X_rhst%s_bal.npy" % iteration) X_ahst_imb = np.load("X_rhst%s_imb.npy" % iteration) if combination == "taud": X_rhst_bal = np.load("X_rhst%s_bal.npy" % iteration) X_rhst_imb = np.load("X_rhst%s_imb.npy" % iteration) X_ahst_bal = np.hstack((X_array11_bal, X_rhst_bal)) X_ahst_imb = np.hstack((X_array11_imb, X_rhst_imb)) if combination == "all": X_rhst_bal = np.load("X_rhst%s_bal.npy" % iteration) X_rhst_imb = np.load("X_rhst%s_imb.npy" % iteration) X_rhst_bal = np.hstack((X_array15_bal, X_rhst_bal)) X_rhst_imb = np.hstack((X_array15_imb, X_rhst_imb)) X_ahst_bal = np.hstack((X_rhst_bal, X_ahst_bal)) X_ahst_imb = np.hstack((X_rhst_imb, X_ahst_imb)) # get X array train/test splits: X_train_bal = np.vstack((X_ahst_bal[: int(split_start_bal), :], X_ahst_bal[int(split_end_bal) :, :])) X_test_bal = X_ahst_bal[int(split_start_bal) : int(split_end_bal)] X_train_imb = np.vstack((X_ahst_imb[: int(split_start_imb), :], X_ahst_imb[int(split_end_imb) :, :])) X_test_imb = X_ahst_imb[int(split_start_imb) : int(split_end_imb)] # run logistic regression: test_bal = logit.run_logit(X_train_bal, X_test_bal, y_train_bal, y_test_bal) acc_bal += test_bal[0] prec_bal += test_bal[1] rec_bal += test_bal[2] f1_bal += test_bal[3] test_imb = logit.run_logit(X_train_imb, X_test_imb, y_train_imb, y_test_imb) acc_imb += test_imb[0] prec_imb += test_imb[1] rec_imb += test_imb[2] f1_imb += test_imb[3] # update to next interation: split_start_bal += fold_size_bal split_end_bal += fold_size_bal split_start_imb += fold_size_imb split_end_imb += fold_size_imb # update fold no: fold += 1 return acc_bal / 5, prec_bal / 5, rec_bal / 5, f1_bal / 5, acc_imb / 5, prec_imb / 5, rec_imb / 5, f1_imb / 5