def get_results(results, instance_of_datasets, classifier_name, y_true, y_pred, file_dump): tmp_ = {"y_pred": y_pred, "y_true": y_true, "accuracy": accuracy_score(y_true, y_pred), "precision_micro": precision_score(y_true, y_pred, average="micro"), "precision_macro": precision_score(y_true, y_pred, average="macro"), "recall_micro": recall_score(y_true, y_pred, average="micro"), "recall_macro": recall_score(y_true, y_pred, average="macro"), "f1_micro": f1_score(y_true, y_pred, average="micro"), "f1_macro": f1_score(y_true, y_pred, average="macro") } cPickle.dump(tmp_, gzip.open("%s/single_%s_%s_%s.zcp"%(dir_results,file_dump,instance_of_datasets, classifier_name), "wb+")) results[instance_of_datasets][classifier_name]=tmp_ print(classifier_name, "accuracy", results[instance_of_datasets][classifier_name]["accuracy"], "f1 score_micro", results[instance_of_datasets][classifier_name]["f1_micro"], "precision_micro", results[instance_of_datasets][classifier_name]["precision_micro"], "recall_micro", results[instance_of_datasets][classifier_name]["recall_micro"], "f1 score_macro", results[instance_of_datasets][classifier_name]["f1_macro"], "precision_macro", results[instance_of_datasets][classifier_name]["precision_macro"], "recall_macro", results[instance_of_datasets][classifier_name]["recall_macro"] ) cPickle.dump(results, gzip.open(dir_results+"/"+file_dump, "wb+")) return results
def fit(self, x, y, validation_data=None, validation_split=0.1, epochs=5): self.checkpoint = tf.train.Checkpoint(model=self) self.checkpoint_manager = tf.train.CheckpointManager(self.checkpoint, self.save_path, max_to_keep=1) x = self.__fit_and_transform_on_x(x) y = self.fit_and_transform_on_y(y) if validation_data is None: train_x, train_y, val_x, val_y = train_test_split( x, y, test_size=validation_split, random_state=2020) else: train_x, train_y = x, y val_x = self.transform_on_x(validation_data[0]) val_y = self.transform_on_y(validation_data[1]) max_score = None best_epoch = None for epoch in range(epochs): total_loss = 0 for i in range(len(train_x) // self.batch_size): start_idx = self.batch_size * i end_idx = min(len(train_x), (i + 1) * self.batch_size) batch_loss = self.__train_one_step(train_x[start_idx:end_idx], train_y[start_idx:end_idx]) total_loss += batch_loss print('Epoch {} Batch {} Batch Loss: {}'.format( epoch, i, batch_loss)) val_y_proba = self.__forward(val_x).numpy() val_y_pred = self.change_proba_to_digits(val_y_proba) curr_macro_score = f1_score(val_y, val_y_pred, average='macro') curr_micro_score = f1_score(val_y, val_y_pred, average='micro') print('Epoch {} Loss: {} Macro F1: {} Micro F1: {}'.format( epoch, total_loss / (len(x) // self.batch_size), curr_macro_score, curr_micro_score)) if self.earlystop_metric == 'macro_f1': curr_score = curr_macro_score elif self.earlystop_metric == 'micro_f1': curr_score = curr_macro_score else: raise Exception('This metric is not supported now.') if max_score is None or curr_score > max_score: self.checkpoint_manager.save() max_score = curr_score best_epoch = epoch elif (epoch - best_epoch) >= self.patience: print( 'Early stopped at epoch {}, since macro f1 score does not improve from {}' .format(epoch, max_score)) break
def evaluate(model, eval_iter): model.eval() y_pred = [] y_true = [] for batch in eval_iter.iter_epoch(): batch, (inputs, label) = batch logits = model(inputs) probs = model.get_probs(logits) y_pred.extend(probs.argmax(-1)) y_true.extend(list(label)) y_pred = numpy.array(y_pred, dtype=numpy.int32) y_true = numpy.array(y_true, dtype=numpy.int32) micro_f1 = f1_score(y_true, y_pred, average='micro') macro_f1 = f1_score(y_true, y_pred, average='macro') return (micro_f1 + macro_f1) / 2
def test(self, input_xy, target_names=None, **kwargs): xs, trues = zip(*input_xy) xs = np.stack(xs).reshape(-1, reduce(operator.mul, xs[0].shape)) trues = np.stack(trues) results = self.predict(xs) print(classification_report(trues, results, target_names=target_names)) return f1_score(trues, results, average="micro")
def get_training_results(self, database_file, dataset, cross_validation=10): # Connect DB self.apk_db.connect_db(database_file) results = [] # K-Fold Cross Validation kf = KFold(n_splits=cross_validation, shuffle=True) for train, test in kf.split(dataset): # Get training and testing dataset training_dataset = [dataset[i] for i in train] testing_dataset = [dataset[i] for i in test] # Fit model self.fit(training_dataset) # Predict labels for testing samples testing_labels, predicted_labels = self.i_predict(testing_dataset) # Get score result = {} result['accuracy'] = accuracy_score(testing_labels, predicted_labels, True) result['f-score'] = f1_score(testing_labels, predicted_labels) results.append(result) # Disconnect DB self.apk_db.disconnect_db() return results
def Predict(self, inp, labels, classifier, folds, name, paramdesc): X= inp y = labels X, y = X[y != 2], y[y != 2] n_samples, n_features = X.shape ############################################################################### # Classification and ROC analysis # Run classifier with cross-validation and plot ROC curves cv = StratifiedKFold(y, n_folds=folds) mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) all_tpr = [] _precision = 0.0 _recall = 0.0 _accuracy = 0.0 _f1 = 0.0 for i, (train, test) in enumerate(cv): probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test]) pred_ = classifier.predict(X[test]) _precision += precision_score(y[test], pred_) _recall += recall_score(y[test], pred_) _accuracy += accuracy_score(y[test], pred_) _f1 += f1_score(y[test], pred_) # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1]) mean_tpr += interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 roc_auc = auc(fpr, tpr) plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc)) _precision /= folds _recall /= folds _accuracy /= folds _f1 /= folds plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck') mean_tpr /= len(cv) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) plt.plot(mean_fpr, mean_tpr, 'k--', label='Mean ROC (area = %0.2f)' % mean_auc, lw=2) plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic - {0}'.format(name)) plt.legend(loc="lower right") plt.savefig(self.configObject['outputdir'] + '/' + name + '.png') plt.close() result = self.OutputResult(name, paramdesc, len(inp), floor(labels.size / folds), _precision, _recall, _accuracy, _f1) Announce(result)
def tuneMultimodelKnnIgr(featureSizes, kValues): X_raw, y_raw = common.loadTrainingDataSet() scoreMap = dict() for featureSize in featureSizes: for kValue in kValues: scoreMap[(featureSize, kValue)] = [] kf = KFold(n_splits=5, random_state=42, shuffle=True) foldNumber = 0 for train_index, test_index in kf.split(X_raw): X_train, X_test = X_raw[train_index], X_raw[test_index] y_train, y_test = y_raw[train_index], y_raw[test_index] reducer = InformationGainReducer() reducer.fit(X_train, y_train) for featureSize in featureSizes: reducer.resize(featureSize) X_train_reduced = reducer.transform(X_train).toarray() X_test_reduced = reducer.transform(X_test).toarray() for kValue in kValues: modelList = [] for modelNum in range(11): rus_rs = 555 + (modelNum * featureSize) rus = RandomUnderSampler(random_state=rus_rs) X_model, y_model = rus.fit_resample( X_train_reduced, y_train) clf = KNeighborsClassifier(n_neighbors=kValue, metric='manhattan') clf.fit(X_model, y_model) modelList.append(clf) print(".", end="") output = common.predictCombinedSimple(X_test_reduced, modelList) combinedModelScore = f1_score(y_test, output) scoreMap[(featureSize, kValue)].append(combinedModelScore) print() print("Done with kValue = " + str(kValue) + " for fold #" + str(foldNumber) + " for feature size = " + str(featureSize) + ". F1 = " + str(combinedModelScore)) print("Done with fold #" + str(foldNumber) + " for feature size = " + str(featureSize)) foldNumber += 1 for featureSize in featureSizes: for kValue in kValues: meanF1Score = mean(scoreMap[(featureSize, kValue)]) print("F1 Score for KNN with IGR, K = " + str(kValue) + " and FR size = " + str(featureSize) + " is: " + str(meanF1Score))
def get_score(a, b_max): a_max = np.argmax(a, axis=-1) acc = accuracy_score(a_max, b_max) p = precision_score(a_max, b_max, average='macro') r = recall_score(a_max, b_max, average='macro') f1 = f1_score(a_max, b_max, average='macro') return acc, p, r, f1
def prediction_score(train_X, train_y, test_X, test_y, metric, model): # if the train labels are always the same values_train = set(train_y) if len(values_train) == 1: # predict always that value only_value_train = list(values_train)[0] test_pred = np.ones_like(test_y) * only_value_train # if the train labels have different values else: # create the model if model == "random_forest_classifier": m = RandomForestClassifier(n_estimators=10) elif model == "logistic_regression": m = LogisticRegression() else: raise Exception("Invalid model name.") # fit and predict m.fit(train_X, train_y) test_pred = m.predict(test_X) # calculate the score if metric == "f1": return f1_score(test_y, test_pred) elif metric == "accuracy": return accuracy_score(test_y, test_pred) else: raise Exception("Invalid metric name.")
def run(): paras = create_dataset() X = np.array(get_features(paras)) Y = np.array(get_ys(paras)) skf = StratifiedKFold(Y, n_folds=10) f = open('results/correct.txt', 'w') f2 = open('results/wrong.txt', 'w') accs = [] precs = [] recs = [] f1s = [] for train_index, test_index in skf: X_train, X_test = X[train_index], X[test_index] y_train, y_test = Y[train_index], Y[test_index] cv = CountVectorizer() X_train_counts = cv.fit_transform(X_train) tf_transformer = TfidfTransformer(use_idf=True).fit(X_train_counts) X_train_tfidf = tf_transformer.transform(X_train_counts) clf = DummyClassifier(strategy="most_frequent").fit( X_train_counts, y_train) X_test_counts = cv.transform(X_test) X_test_tfidf = tf_transformer.transform(X_test_counts) y_pred = clf.predict(X_test_counts) acc = accuracy_score(y_test, y_pred) prec = precision_score(y_test, y_pred) rec = recall_score(y_test, y_pred) f1 = f1_score(y_test, y_pred) accs.append(acc) precs.append(prec) recs.append(rec) f1s.append(f1) print 'Acc \t %s' % acc print 'Prec \t %s' % prec print 'Recall \t %s' % rec print 'F1 \t %s' % f1 for para, (y_t, y_p) in zip(X_test, zip(y_test, y_pred)): if y_t == y_p: f.write('%s\n' % para) else: f2.write('%s\n' % para) print 'Avg Acc \t %s \t ' % np.mean(accs) print 'Avg Prec \t %s' % np.mean(precs) print 'Avg Recall \t %s' % np.mean(recs) print 'Avg F1 \t %s' % np.mean(f1s)
def metrics(y_true, y_predict): logger.info("计算分类指标...") F_value = f1_score(y_true, y_predict, average="weighted") Recall_value = recall_score(y_true, y_predict, average="weighted") Precision_value = precision_score(y_true, y_predict, average="weighted") return F_value, Recall_value, Precision_value
def by_class_evaluation(attack_test_y, target_y, p, attack_test_x, labels=None): if labels is None: labels = np.unique(target_y) precisions = [ precision_score(attack_test_y[target_y == c], p[target_y == c]) * 100 for c in np.unique(target_y) ] accuracies = [ accuracy_score(attack_test_y[target_y == c], p[target_y == c]) * 100 for c in np.unique(target_y) ] f1_scores = [ f1_score(attack_test_y[target_y == c], p[target_y == c]) * 100 for c in np.unique(target_y) ] recalls = [ recall_score(attack_test_y[target_y == c], p[target_y == c]) * 100 for c in np.unique(target_y) ] c_train_accs = [ accuracy_score( target_y[np.logical_and(target_y == c, attack_test_y == 1)], np.argmax(attack_test_x[np.logical_and(target_y == c, attack_test_y == 1)], axis=1)) * 100 for c in np.unique(target_y) ] c_test_accs = [ accuracy_score( target_y[np.logical_and(target_y == c, attack_test_y == 0)], np.argmax(attack_test_x[np.logical_and(target_y == c, attack_test_y == 0)], axis=1)) * 100 for c in np.unique(target_y) ] x = PrettyTable() x.float_format = '.2' x.add_column("Class", labels) x.add_column('Target Accuracy Train', np.round(c_train_accs, 2)) x.add_column('Target Accuracy Test', np.round(c_test_accs, 2)) x.add_column("Attack Precision", np.round(precisions, 2)) x.add_column("Attack Accuracy", np.round(accuracies, 2)) x.add_column("Attack Recall", np.round(recalls, 2)) x.add_column("Attack F-1 Score", np.round(f1_scores, 2)) x.add_column( "Percentage of Data", np.round( np.array([ len(target_y[target_y == c]) / len(target_y) * 100 for c in np.unique(target_y) ]), 2)) print(x.get_string(title='Per Class Evaluation'))
def test(self, input_xy, target_names=None, **kwargs): input_x = [] trues = [] for x, y in input_xy: input_x.append(x) trues.append(y.argmax()) results = self.predict(input_x, prob=False) trues = np.array(trues) print(classification_report(trues, results, target_names=target_names)) return f1_score(trues, results, average="micro")
def _print_classificationMetrics(_classifier, _predict): metrics = [['Clasificación SVM', 'Datos obtenidos'], ['Interceptación', _classifier.intercept_], ['Accuracy Score', accuracy_score(ones_like(_predict), _predict)], ['F1 Score', f1_score(ones_like(_predict), _predict)], ['Hamming Loss', hamming_loss(ones_like(_predict), _predict)], ] print('\nMinería de Datos - Clasificación SVM - <VORT>', '\n') print(_classifier, '\n') print(look(metrics))
def eval_step(): sess.run(mdl.running_vars_initializer) dev_feed_dict = { mdl.sent: dstream.sent, mdl.label: dstream.label, mdl.ent1_dist: dstream.ent1_dist, mdl.ent2_dist: dstream.ent2_dist, mdl.dropout_keep_proba: 1.0, mdl.batch_size: dstream.label.shape[0] } dstep, dloss, preds = sess.run( [global_step, mdl.loss, mdl.preds], dev_feed_dict) sess.run(mdl.accuracy_op, dev_feed_dict) dacc_ = sess.run(mdl.accuracy) l = dstream.label p = preds class_int_labels = list(range(len(le.classes_))) target_names = le.classes_ sess.run(mdl.accuracy_op, dev_feed_dict) dsummary = sess.run(dev_summary_op, dev_feed_dict) dev_summary_writer.add_summary(dsummary, dstep) eval_score = (f1_score(l, p, average='micro'), f1_score(l, p, average='macro'), dacc_) print( "EVAL step {}, loss {:g}, f1_micro {:g} f1_macro {:g} accuracy {:g}" .format(tstep, dloss, eval_score[0], eval_score[1], eval_score[2]), flush=True) official_score = eval_score[1] print("Classification Report: \n%s" % classification_report( l, p, labels=class_int_labels, target_names=target_names, ), flush=True) return official_score
def train_model(self, positive_X, negative_X): features = np.concatenate((positive_X, negative_X), axis=0) labels = np.concatenate(( np.ones((len(positive_X), 1)), np.zeros((len(negative_X), 1)), )) x_train, x_test, y_train, y_test = train_test_split( features, labels, train_size=0.7) self._classifier.fit(x_train, y_train) preds = self._classifier.predict(x_test) print('F1: {}'.format(f1_score(y_test, preds)))
def train_and_eval(output, ngram_range=(1, 1), max_features=None, max_df=1.0, C=1.0): """Train and eval newsgroup classification. :param ngram_range: ngram range :param max_features: the number of maximum features :param max_df: max document frequency ratio :param C: Inverse of regularization strength for LogisticRegression :return: metrics """ # Loads train and test data. train_data = fetch_20newsgroups(subset='train') test_data = fetch_20newsgroups(subset='test') # Define the pipeline. pipeline = Pipeline([('tfidf', TfidfVectorizer()), ('clf', LogisticRegression(multi_class='auto'))]) # Set pipeline parameters. params = { 'tfidf__ngram_range': ngram_range, 'tfidf__max_features': max_features, 'tfidf__max_df': max_df, 'clf__C': C, } pipeline.set_params(**params) print(pipeline.get_params().keys()) # Train the model. pipeline.fit(train_data.data, train_data.target) # Predict test data. start_time = time() predictions = pipeline.predict(test_data.data) inference_time = time() - start_time avg_inference_time = 1.0 * inference_time / len(test_data.target) print("Avg. inference time: {}".format(avg_inference_time)) # Calculate the metrics. accuracy = accuracy_score(test_data.target, predictions) recall = recall_score(test_data.target, predictions, average='weighted') f1 = f1_score(test_data.target, predictions, average='weighted') metrics = { 'accuracy': accuracy, 'recall': recall, 'f1': f1, } # Persistent the model. joblib.dump(pipeline, output) return metrics
def __evaluate(self, modelFactory, x, y): """ Perform the cross validation :param modelFactory: a factory that builds a model :param x: the evaluation data :param y: the evaluation classes """ #Creating KFold kf = KFold(self.folds, shuffle=True, random_state=None) print( "=============================" + str(self.folds) + "-fold Cross-Validation training and testing ============================= \n" ) i = 1 # If the number of classes is not given, use the classes that we have if not self.numClasses: self.numClasses = len(set(y)) # A list of results to be used to see how well the model is doing over the folds tableResults = [] #Loop through the folds separation of data for trainIndex, testIndex in kf.split(x): # print(type(trainIndex)) # Build a model adapter using a factory model = modelFactory.create() # A print to see if it is ok print(" ============== Fold ", i, "============") trainDocs, testDocs = x[trainIndex], x[testIndex] trainCats, testCats = y[trainIndex], y[testIndex] # If we want the categories to be represented as a binary array, here is were we do that #TODO: Categorical class error representation on valuating the classes returned by the model # Using the adapter to fit our model model.fit(trainDocs, trainCats, epochs=self.epochs, batch_size=len(trainIndex)) # Predicting it pred = model.predict(testDocs, testCats) print(pred) # Getting the scores accuracy = accuracy_score(testCats, pred) recall = recall_score(testCats, pred, average='weighted') precision = precision_score(testCats, pred, average='weighted') f1 = f1_score(testCats, pred, average='weighted') #Appending it to the result table tableResults.append({ 'result': 'result', 'accuracy': accuracy, 'recall': recall, 'precision': precision, 'f1': f1 }) i += 1 self.tableResults = tableResults
def evalOne(enabledColumns): features = [all_features[i] for i in range(0, len(all_features)) if enabledColumns[i]] Y = [] P = [] for group in range(0,5): # print("Test group " + str(group + 1)) trainStationList = [] testStationList = [] for i in range(0,5): if i == group: testStationList.extend(groups[i]) else: trainStationList.extend(groups[i]) trainStations = set(float(station) for station in trainStationList) # reorder train stations # print("\ttrainStationList:" + str(trainStationList)) trainStationList = [s for s in all_stations if float(s) in trainStations] # print("\ttrainStationList:" + str(trainStationList)) testStations = set(float(station) for station in testStationList) # print("\ttestStationList:" + str(testStationList)) trainX, testX, trainY, testY, trainLocation, testLocation = splitDataForXValidationWithLocation(trainStations, testStations, "location", data, features, "target") train_lower = [float(trainStationList[i]) for i in range(0, len(trainStationList)) if i < (len(trainStationList) / 2.0)] # train_upper = [float(trainStationList[i]) for i in range(0, len(trainStationList)) if i >= (len(trainStationList) / 2.0)] test_lower = [float(testStationList[i]) for i in range(0, len(testStationList)) if i < (len(testStationList) / 2.0)] # test_upper = [float(testStationList[i]) for i in range(0, len(testStationList)) if i >= (len(testStationList) / 2.0)] trainY = [] for l in trainLocation: if l in train_lower: trainY.append(0) else: trainY.append(1) testY = [] for l in testLocation: if l in test_lower: testY.append(0) else: testY.append(1) model = RandomForestClassifier(random_state=42, n_estimators=20, max_depth=9, n_jobs=-1) model.fit(trainX, trainY) predY = model.predict(testX) Y.extend(testY) P.extend(predY) f1 = f1_score(Y, P) accuracy = accuracy_score(Y, P) return f1, accuracy
def main(args): args, model_state, vocabulary, le = torch.load(args.checkpoint) # restore model params model = Classifier(args, vocabulary, args.num_class) model.load_state_dict(model_state) iterator = get_devtest_iterator(args.fact, vocabulary, le, args.label) model.eval() y_pred = predict(model, iterator) for y in le.inverse_transform(y_pred): sys.stdout.write(f'{y}\n') if args.label: y_pred = numpy.array(y_pred, dtype=numpy.int32) y_true = numpy.fromfile(args.label, dtype=numpy.int32, sep='\n') micro_f1 = f1_score(y_true, y_pred, average='micro') macro_f1 = f1_score(y_true, y_pred, average='macro') sys.stderr.write(f'\n' f'|micro_f1:{micro_f1}, ' f'macro_f1:{macro_f1}, ' f'avg:{(micro_f1+macro_f1)/2}\n')
def tuneNaiveBayesIgrFeatureSize(featureSizeList, modelCountList): X_raw, y = common.loadTrainingDataSet() reducer = InformationGainReducer() reducer.fit(X_raw, y) for featureSize in featureSizeList: reducer.resize(featureSize) X = reducer.transform(X_raw).toarray() #print("Counter(y) = " + str(Counter(y))) for modelCount in modelCountList: kf = KFold(n_splits=5, random_state=42, shuffle=True) splitIndex = 0 f1ScoreList = [] for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] modelList = [] for modelNum in range(modelCount): rs = 42 + modelNum rus = RandomUnderSampler(random_state=rs) X_model, y_model = rus.fit_resample(X_train, y_train) nbClassifier = NaiveBayesClassifier() nbClassifier.fit(X_model, y_model) #X_test_2 = reducer.transform(X_test).toarray() #output = nbClassifier.predict(X_test_2) #modelScore = f1_score(y_test, output) #print("Split Index = " + str(splitIndex) + ", Model Num = " + str(modelNum) + ", F1 = " + str(modelScore)) modelList.append(nbClassifier) #print(".", end='') #print() combinedModelOutput = common.predictCombinedSimple( X_test, modelList) combinedModelScore = f1_score(y_test, combinedModelOutput) f1ScoreList.append(combinedModelScore) #print("Combined Model Score for split #" + str(splitIndex) + " = " + str(combinedModelScore)) splitIndex += 1 print("F1 Score for FR size = " + str(featureSize) + " and model count = " + str(modelCount) + " is: " + str(mean(f1ScoreList)))
def tuneMultimodelIGR(featureSizes): X_raw, y_raw = common.loadTrainingDataSet() scoreMap = dict() for featureSize in featureSizes: scoreMap[featureSize] = [] kf = KFold(n_splits=5, random_state=42, shuffle=True) foldNumber = 0 for train_index, test_index in kf.split(X_raw): X_train, X_test = X_raw[train_index], X_raw[test_index] y_train, y_test = y_raw[train_index], y_raw[test_index] reducer = InformationGainReducer() reducer.fit(X_train, y_train) for featureSize in featureSizes: reducer.resize(featureSize) X_train_reduced = reducer.transform(X_train).toarray() modelList = [] for modelNum in range(11): rus_rs = 555 + modelNum rus = RandomUnderSampler(random_state=rus_rs) X_model, y_model = rus.fit_resample(X_train_reduced, y_train) nbClassifier = NaiveBayesClassifier() nbClassifier.fit(X_model, y_model) modelList.append(nbClassifier) print(".", end="") X_test_reduced = reducer.transform(X_test).toarray() output = common.predictCombinedSimple(X_test_reduced, modelList) combinedModelScore = f1_score(y_test, output) scoreMap[featureSize].append(combinedModelScore) print() print("Done with fold #" + str(foldNumber) + " for feature size = " + str(featureSize) + ". F1 = " + str(combinedModelScore)) foldNumber += 1 for featureSize in featureSizes: meanF1Score = mean(scoreMap[featureSize]) print("F1 Score for NN with Chi2 and FR size = " + str(featureSize) + " is: " + str(meanF1Score))
def plot_report(y_test, test_predict, title): precision = precision_score(y_test, test_predict, average=None) recall = recall_score(y_test, test_predict, average=None) f1 = f1_score(y_test, test_predict, average=None) plt.tight_layout(pad=0) plt.plot(precision, color="red") plt.plot(recall, color="green") plt.plot(f1, color="blue") plt.margins(x=0) plt.gcf().subplots_adjust(bottom=0.5) plt.title(title) plt.legend(["precision", "recall", "f1-score"]) plt.xticks(np.arange(0, 12, step=1), MY_LABELS, rotation=60, fontsize=6) plt.show()
def refine_with_unexpectedness(data_set, classes_dict, preY, Ytrue, unexpected_rules): print('Refine with unexpected rules...') y_pred = np.copy(preY) for i in range(data_set.size()): x = data_set.get_transaction(i) for r in unexpected_rules: if r.satisfy_rule(x, is_lhs=True): label = r.right_items[0] y_pred[i] = classes_dict[label] print(f1_score(Ytrue, y_pred, average=None)) if (data_set.number_of_classes() <= 2): fpr, tpr, _ = roc_curve(Ytrue, y_pred.flatten()) print(auc(fpr, tpr))
def sklearn_ensemble_learning_experiment(): from sklearn import datasets from sklearn.metrics.classification import f1_score from sklearn.ensemble import RandomForestClassifier # load the iris datasets dataset = datasets.load_iris() # fit a model to the data model = RandomForestClassifier() model.fit(dataset.data, dataset.target) # make predictions expected = dataset.target predicted = model.predict(dataset.data) # summarize the fit of the model print("Score: " + str(f1_score(expected, predicted, average="macro")))
def sklearn_simple_decision_tree_experiment(min_samples_leaf=1): from sklearn import datasets from sklearn.metrics.classification import f1_score from sklearn.tree import DecisionTreeClassifier # load the iris datasets dataset = datasets.load_iris() # fit a model to the data model = DecisionTreeClassifier(min_samples_leaf=min_samples_leaf) model.fit(dataset.data, dataset.target) # make predictions expected = dataset.target predicted = model.predict(dataset.data) # summarize the fit of the model print("Score: " + str(f1_score(expected, predicted, average="macro")))
def tuneMultimodelSvm(featureSizes): X_raw, y_raw = common.loadTrainingDataSet() scoreMap = dict() for featureSize in featureSizes: scoreMap[featureSize] = [] kf = KFold(n_splits=5, random_state=42, shuffle=True) foldNumber = 0 for train_index, test_index in kf.split(X_raw): X_train, X_test = X_raw[train_index], X_raw[test_index] y_train, y_test = y_raw[train_index], y_raw[test_index] for featureSize in featureSizes: reducer = TruncatedSVD(n_components=featureSize) X_train_reduced = reducer.fit_transform(X_train) modelList = [] for modelNum in range(11): rus_rs = 555 + (modelNum * featureSize) rus = RandomUnderSampler(random_state=rus_rs) X_model, y_model = rus.fit_resample(X_train_reduced, y_train) clf = SVC(gamma='scale') clf.fit(X_model, y_model) modelList.append(clf) print(".", end="") X_test_reduced = reducer.transform(X_test) output = common.predictCombinedSimple(X_test_reduced, modelList) combinedModelScore = f1_score(y_test, output) scoreMap[featureSize].append(combinedModelScore) print() print("Done with fold #" + str(foldNumber) + " for feature size = " + str(featureSize) + ". F1 = " + str(combinedModelScore)) foldNumber += 1 for featureSize in featureSizes: meanF1Score = mean(scoreMap[featureSize]) print("F1 Score for SVM with Truncated SVD and FR size = " + str(featureSize) + " is: " + str(meanF1Score))
def getAvgF1Score(X, y): splitCount = 5 kf = KFold(n_splits=splitCount, random_state=42, shuffle=True) avgSum = 0.0 for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] nbClassifier = NaiveBayesClassifier() nbClassifier.fit(X_train, y_train) output = nbClassifier.predict(X_test) avgSum += f1_score(y_test, output) return avgSum / splitCount
def inverse_f1_score(y, yhat): """The F_1 score is a metric for classification which tries to balance precision and recall, ie both true positives and true negatives. For F_1 score higher is better, so we take inverse.""" # convert real values to boolean with a zero threshold yhat = (yhat > 0) f = f1_score(y, yhat) # this can give a runtime warning of zero division because if we # predict the same value for all samples (trivial individuals will # do so), f-score is undefined (see sklearn implementation) but # it's a warning, we can ignore. with np.errstate(divide='raise'): try: return 1.0 / f except: return 10000.0
def tuneNaiveBayesMultiModel(featureSize, modelCount): X, y = common.loadTrainingDataSet() #print("Counter(y) = " + str(Counter(y))) kf = KFold(n_splits=5, random_state=42, shuffle=True) splitIndex = 0 f1ScoreList = [] for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] modelTransformerList = [] for modelNum in range(modelCount): rs = 42 + modelNum rus = RandomUnderSampler(random_state=rs) X_model_full, y_model = rus.fit_resample(X_train, y_train) reducer = SelectKBest(chi2, k=featureSize) X_model = reducer.fit_transform(X_model_full, y_model).toarray() nbClassifier = NaiveBayesClassifier() nbClassifier.fit(X_model, y_model) #X_test_2 = reducer.transform(X_test).toarray() #output = nbClassifier.predict(X_test_2) #modelScore = f1_score(y_test, output) #print("Split Index = " + str(splitIndex) + ", Model Num = " + str(modelNum) + ", F1 = " + str(modelScore)) modelTransformerList.append((nbClassifier, reducer)) combinedModelOutput = common.predictCombined(X_test, modelTransformerList) combinedModelScore = f1_score(y_test, combinedModelOutput) f1ScoreList.append(combinedModelScore) #print("Combined Model Score = " + str(combinedModelScore)) splitIndex += 1 print("F1 Score for FR size = " + str(featureSize) + " is: " + str(mean(f1ScoreList)))
def myaccuracy(raw_file, result_file): df = pd.read_csv(result_file, sep='\t', header=None, names=['pred_0', 'pred_1']) test_df = pd.read_csv(raw_file, sep='\t', header=None, names=['idx', 'question', 'relation', 'label']) df["pred"] = df.apply(lambda row: func(row["pred_1"], row["pred_0"]), axis=1) f1 = f1_score(y_true=test_df.label, y_pred=df.pred) acc = accuracy_score(y_true=test_df.label, y_pred=df.pred) p = precision_score(y_true=test_df.label, y_pred=df.pred) r = recall_score(y_true=test_df.label, y_pred=df.pred) # print("accuracy: ", acc) # print("precision: ", p) # print("recall: ", r) # print("f1: ", f1) # df['idx'] = test_df.idx.map(lambda x: x.split('-')[0]) df["idx"] = test_df.idx df["group_sort"] = df["pred_1"].groupby(df["idx"]).rank(ascending=0, method="dense") df["candidate"] = test_df.relation # test_df['idx'] = test_df.idx.map(lambda x: x.split('-')[0]) df.drop_duplicates(subset=['idx', 'group_sort'], keep='first', inplace=True) true_relation = test_df.loc[test_df["label"] == 1] pred_relation = df.loc[(df["group_sort"] == 1.0)] # print(pred_relation.tail()) # print(true_relation.tail()) new_df = pd.merge(true_relation, pred_relation, how="inner") new_df["correct"] = new_df.apply( lambda row: row["relation"] == row["candidate"], axis=1) c = new_df.loc[new_df["correct"] == True] correct = c.idx.count() total = new_df.idx.count() print("my_accuracy: {}, {}/{}".format(correct / total, correct, total))
X_train, y_train, lengths_train = load_conll(open("../resources/train.data", "r"), features) clf = StructuredPerceptron(decode="viterbi", lr_exponent=.05, max_iter=30) print("Fitting model " + str(clf)) clf.fit(X_train, y_train, lengths_train) print("\nPredictions on dev set") # читаем отладочное множество X_dev, y_dev, lengths_dev = load_conll(open("../resources/dev.data", "r"), features) y_pred = clf.predict(X_dev, lengths_dev) print("Whole seq accuracy ", whole_sequence_accuracy(y_dev, y_pred, lengths_dev)) print("Element-wise accuracy ", accuracy_score(y_dev, y_pred)) print("Mean F1-score macro ", f1_score(y_dev, y_pred, average="macro")) print("\nPredictions on test set") # читаем тестовое множество X_test, _, lengths_test = load_conll(open("../resources/test.data", "r"), features) y_pred = clf.predict(X_test, lengths_test) print(pd.Series(y_pred).value_counts()) print("Saving predicted as a submission") with open("submission.csv", "w") as wf: wf.write("id,tag\n") for id, tag in enumerate(list(y_pred)): wf.write(str(id + 1) + "," + tag + "\n")
y_train.append(features[i][6]) tmp = [features[i][0], features[i][1], features[i][2], features[i][3], features[i][4], features[i][5]] x_train.append(tmp) y_test = [] x_test = [] for i in test: y_test.append(features[i][6]) tmp = [features[i][0], features[i][1], features[i][2], features[i][3], features[i][4], features[i][5]] x_test.append(tmp) lr.fit(x_train, y_train) lrPredTest = lr.predict(x_test) lrPrecisionTest = precision_score(y_test, lrPredTest) lrRecallTest = recall_score(y_test, lrPredTest) lrF1Test = f1_score(y_test, lrPredTest) lrAvgPrecision += lrPrecisionTest lrAvgRecall += lrRecallTest lrAvgF1 += lrF1Test print "log reg completed in ", time.time() - start, " s" print "lr:\n Precision {}\n Recall {}\n F1 {}\n".format(lrAvgPrecision / 5, lrAvgRecall / 5, lrAvgF1 / 5) start = time.time() """RANDOM FOREST""" rf = RandomForestClassifier(n_estimators=100, min_samples_leaf=5) rfAvgPrecision = 0.0 rfAvgRecall = 0.0 rfAvgF1 = 0.0
def plot(self, plt=None, generate_testpoints=True, generate_background=True, tune_background_model=False, background_resolution=100, scatter_size_scale=1.0, legend=True): """Plots the dataset and the identified decision boundary in 2D. (If you wish to create custom plots, get the data using generate_plot() and plot it manually) Parameters ---------- plt : matplotlib.pyplot or axis object (default=matplotlib.pyplot) Object to be plotted on generate_testpoints : boolean, optional (default=True) Whether to generate demo points around the estimated decision boundary as a sanity check generate_background : boolean, optional (default=True) Whether to generate faint background plot (using prediction probabilities of a fitted suppor vector machine, trained on generated demo points) to aid visualization tune_background_model : boolean, optional (default=False) Whether to tune the parameters of the support vector machine generating the background background_resolution : int, optional (default=100) Desired resolution (height and width) of background to be generated scatter_size_scale : float, optional (default=1.0) Scaling factor for scatter plot marker size legend : boolean, optional (default=False) Whether to display a legend Returns ------- plt : The matplotlib.pyplot or axis object which has been passed in, after plotting the data and decision boundary on it. (plt.show() is NOT called and will be required) """ if plt == None: plt = mplt if len(self.X_testpoints) == 0: self.generate_plot(generate_testpoints=generate_testpoints, generate_background=generate_background, tune_background_model=tune_background_model, background_resolution=background_resolution) if generate_background and generate_testpoints: try: plt.imshow(np.flipud(self.background), extent=[ self.X2d_xmin, self.X2d_xmax, self.X2d_ymin, self.X2d_ymax], cmap="GnBu", alpha=0.33) except (Exception, ex): print("Failed to render image background") # decision boundary plt.scatter(self.decision_boundary_points_2d[:, 0], self.decision_boundary_points_2d[ :, 1], 600 * scatter_size_scale, c='c', marker='p') # generated demo points if generate_testpoints: plt.scatter(self.X_testpoints_2d[:, 0], self.X_testpoints_2d[ :, 1], 20 * scatter_size_scale, c=['g' if i else 'b' for i in self.y_testpoints], alpha=0.6) # training data plt.scatter(self.X2d[self.train_idx, 0], self.X2d[self.train_idx, 1], 150 * scatter_size_scale, facecolor=['g' if i else 'b' for i in self.y[self.train_idx]], edgecolor=['g' if self.y_pred[self.train_idx[i]] == self.y[self.train_idx[i]] == 1 else ('b' if self.y_pred[self.train_idx[i]] == self.y[self.train_idx[i]] == 0 else 'r') for i in range(len(self.train_idx))], linewidths=5 * scatter_size_scale) # testing data plt.scatter(self.X2d[self.test_idx, 0], self.X2d[self.test_idx, 1], 150 * scatter_size_scale, facecolor=['g' if i else 'b' for i in self.y[self.test_idx]], edgecolor=['g' if self.y_pred[self.test_idx[i]] == self.y[self.test_idx[i]] == 1 else ('b' if self.y_pred[self.test_idx[i]] == self.y[self.test_idx[i]] == 0 else 'r') for i in range(len(self.test_idx))], linewidths=5 * scatter_size_scale, marker='s') # label data points with their indices for i in range(len(self.X2d)): plt.text(self.X2d[i, 0] + (self.X2d_xmax - self.X2d_xmin) * 0.5e-2, self.X2d[i, 1] + (self.X2d_ymax - self.X2d_ymin) * 0.5e-2, str(i), size=8) if legend: plt.legend(["Estimated decision boundary keypoints", "Generated demo data around decision boundary", "Actual data (training set)", "Actual data (demo set)"], loc="lower right", prop={'size': 9}) # decision boundary keypoints, in case not visible in background plt.scatter(self.decision_boundary_points_2d[:, 0], self.decision_boundary_points_2d[:, 1], 600 * scatter_size_scale, c='c', marker='p', alpha=0.1) plt.scatter(self.decision_boundary_points_2d[:, 0], self.decision_boundary_points_2d[:, 1], 30 * scatter_size_scale, c='c', marker='p', edgecolor='c', alpha=0.8) # minimum spanning tree through decision boundary keypoints D = pdist(self.decision_boundary_points_2d) edges = minimum_spanning_tree(squareform(D)) for e in edges: plt.plot([self.decision_boundary_points_2d[e[0], 0], self.decision_boundary_points_2d[e[1], 0]], [self.decision_boundary_points_2d[e[0], 1], self.decision_boundary_points_2d[e[1], 1]], '--c', linewidth=4 * scatter_size_scale) plt.plot([self.decision_boundary_points_2d[e[0], 0], self.decision_boundary_points_2d[e[1], 0]], [self.decision_boundary_points_2d[e[0], 1], self.decision_boundary_points_2d[e[1], 1]], '--k', linewidth=1) if len(self.test_idx) == 0: print("No demo performance calculated, as no testing data was specified") else: freq = itemfreq(self.y[self.test_idx]).astype(float) imbalance = np.round(np.max((freq[0, 1], freq[1, 1])) / len(self.test_idx), 3) acc_score = np.round(accuracy_score( self.y[self.test_idx], self.y_pred[self.test_idx]), 3) f1 = np.round(f1_score(self.y[self.test_idx], self.y_pred[self.test_idx]), 3) plt.title("Test accuracy: " + str(acc_score) + ", F1 score: " + str(f1) + ". Imbalance (max chance accuracy): " + str(imbalance)) if self.verbose: print("Plot successfully generated! Don't forget to call the show() method to display it") return plt
def run(): paras, sents = create_dataset() X = np.array(get_features(paras)) Y = np.array(get_ys(paras)) print len(X[0]) sents = np.array(sents) skf = StratifiedKFold(Y, n_folds=10) f = open('results/correct.txt','w') f2 = open('results/wrong.txt','w') accs = [] precs = [] recs = [] f1s = [] for train_index, test_index in skf: X_train, X_test = X[train_index], X[test_index] y_train, y_test = Y[train_index], Y[test_index] sent_train = sents[train_index] sent_test = sents[test_index] # cv = CountVectorizer(stop_words="english", ngram_range=(1,1), min_df = 5) # sent_train_counts = cv.fit_transform(sent_train) # # tf_transformer = TfidfTransformer(use_idf=True).fit(sent_train_counts) # sent_train_counts = tf_transformer.transform(sent_train_counts) # # sent_train_counts = sent_train_counts.toarray() # # print sent_train_counts.shape # print X_train.shape # # new_train = [] # for i,j in zip(X_train, sent_train_counts): # new_train.append(np.append(i,j)) #fs = SelectKBest(chi2, k=24) #X_train = fs.fit_transform(X_train, y_train) clf = LogisticRegression() clf.fit(X_train, y_train) print clf.coef_ # # sent_test_counts = cv.transform(sent_test) # sent_test_counts = tf_transformer.transform(sent_test_counts) # # sent_test_counts = sent_test_counts.toarray() # # new_test = [] # for i,j in zip(X_test, sent_test_counts): # new_test.append(np.append(i,j)) #X_test = fs.transform(X_test) y_pred = clf.predict(X_test) acc = accuracy_score(y_test, y_pred) prec = precision_score(y_test, y_pred) rec = recall_score(y_test, y_pred) f1 = f1_score(y_test, y_pred) accs.append(acc) precs.append(prec) recs.append(rec) f1s.append(f1) print 'Acc \t %s' % acc print 'Prec \t %s' % prec print 'Recall \t %s' % rec print 'F1 \t %s' % f1 for (index,test),(y_t, y_p) in zip(zip(test_index, X_test), zip(y_test, y_pred)): if y_t == y_p: # if paras[index]['prev_para']: # f.write('%s\n' % paras[index]['prev_para']['sents']) f.write('%s\n' % sents[index]) f.write('%s\n' % (y_t)) else: # if paras[index]['prev_para']: # f2.write('%s\n' % paras[index]['prev_para']['sents']) f2.write('%s\n' % sents[index]) f2.write('%s\n' % (y_t)) print 'Avg Acc \t %s \t ' % np.mean(accs) print 'Avg Prec \t %s' % np.mean(precs) print 'Avg Recall \t %s' % np.mean(recs) print 'Avg F1 \t %s' % np.mean(f1s)