def precision_recall(): # from sklearn.metrics import roc_auc_score # from sklearn.metrics import roc_curve from sklearn.metrics import precision_recall_curve from sklearn.metrics import auc from sklearn.metrics import classification_report from mpltools import style style.use('ggplot') makes = ['bmw', 'ford'] types = ['sedan', 'SUV'] args = makes + types config = get_config(args) (dataset, config) = fgu.get_all_metadata(config) for ii, attrib_name in enumerate(args): # attrib_name = 'bmw' attrib_clf = AttributeClassifier.load('../../../attribute_classifiers/{}.dat'.format(attrib_name)) bnet = BayesNet(config, dataset['train_annos'], dataset['class_meta'], [attrib_clf], desc=str(args)) res = bnet.create_attrib_res_on_images() attrib_selector = AttributeSelector(config, dataset['class_meta']) # attrib_meta = attrib_selector.create_attrib_meta([attrib_clf.name]) pos_classes = attrib_selector.class_ids_for_attribute(attrib_name) true_labels = np.array(res.class_index.isin(pos_classes)) print "--------------{}-------------".format(attrib_name) print res[str.lower(attrib_name)].describe() print classification_report(true_labels, np.array(res[str.lower(attrib_name)]) > 0.65, target_names=['not-{}'.format(attrib_name), attrib_name]) precision, recall, thresholds = precision_recall_curve(true_labels, np.array(res[str.lower(attrib_name)])) score = auc(recall, precision) print("Area Under Curve: %0.2f" % score) # score = roc_auc_score(true_labels, np.array(res[str.lower(attrib_name)])) # fpr, tpr, thresholds = roc_curve(true_labels, np.array(res[str.lower(attrib_name)])) plt.subplot(2,2,ii+1) # plt.plot(fpr, tpr) plt.plot(recall, precision, label='Precision-Recall curve') plt.title('Precision-Recall: {}'.format(attrib_name)) # plt.xlabel('False Positive Rate') # plt.ylabel('True Positive Rate') plt.xlabel('Recall') plt.ylabel('Precision') plt.legend(['area = {}'.format(score)]) plt.draw() plt.show()
def classify_using_attributes(): from sklearn.ensemble import RandomForestClassifier from sklearn import svm from sklearn.metrics import classification_report from sklearn import cross_validation makes = ['bmw', 'ford'] types = ['sedan', 'suv'] args = makes + types + ['germany', 'usa'] # args = get_args_from_file('sorted_attrib_list.txt') config = get_config() (dataset, config) = fgu.get_all_metadata(config) config.attribute.names = args attrib_names = [str.lower(a) for a in args] attrib_classifiers = [] for attrib_name in args: attrib_classifiers.append(AttributeClassifier.load('../../../attribute_classifiers/{}.dat'.format(attrib_name))) classes = dataset['class_meta'] train_annos = dataset['train_annos'] test_annos = dataset['test_annos'] attrib_meta = dataset['attrib_meta'] classes = select_small_set_for_bayes_net(dataset, makes, types) attrib_meta = attrib_meta.loc[classes.index] train_annos = train_annos[np.array( train_annos.class_index.isin(classes.class_index))] test_annos = test_annos[np.array( test_annos.class_index.isin(classes.class_index))] ftr = Bow.load_bow(train_annos, config) fte = Bow.load_bow(test_annos, config) bnet = BayesNet(config, train_annos, classes, attrib_classifiers, attrib_meta, desc=str(args)) attrib_res_train,l = bnet.create_attrib_res_on_images(train_annos, ftr) attrib_res_test,l = bnet.create_attrib_res_on_images(test_annos, fte) # features_train = Bow.load_bow(train_annos, config) # features_test = Bow.load_bow(test_annos, config) # combine attribs and features features_train = np.concatenate([ftr, attrib_res_train[attrib_names]], axis=1) features_test = np.concatenate([fte, attrib_res_test[attrib_names]], axis=1) # define a classifier that uses the attribute scores # clf = RandomForestClassifier(n_estimators=50, n_jobs=-2) # clf = svm.SVC(kernel='rbf') clf = svm.LinearSVC() labels_train = np.array(attrib_res_train.class_index) # features_train = np.array(attrib_res_train[attrib_names]) clf.fit(features_train, labels_train) # features_test = np.array(attrib_res_test[attrib_names]) y_pred = clf.predict(features_test) labels_test = np.array(attrib_res_test.class_index) print(classification_report(labels_test, y_pred, labels=classes.index, target_names=[c for c in classes.class_name])) print("Accuracy: {}".format(accuracy_score(labels_test, y_pred))) print("Mean Accuracy: {}".format(clf.score(features_test, labels_test))) print '' print 'Accuracy at N:' scorer = AccuracyAtN(clf.decision_function(features_test), labels_test, class_names=np.unique(labels_train)) for ii in range(1, 11): print 'Accuracy at {}: {}'.format(ii, scorer.get_accuracy_at(ii)) dummy_1 = DummyClassifier(strategy='most_frequent').fit(features_train, labels_train) dummy_2 = DummyClassifier(strategy='stratified').fit(features_train, labels_train) dummy_3 = DummyClassifier(strategy='stratified').fit(features_train, labels_train) print '' print 'Dummy Classifiers:' print '-----------------' print("Accuracy - most_frequent: {}".format(accuracy_score(labels_test, dummy_1.predict(features_test)))) print("Accuracy - stratified: {}".format(accuracy_score(labels_test, dummy_2.predict(features_test)))) print("Accuracy - uniform: {}".format(accuracy_score(labels_test, dummy_2.predict(features_test)))) print("Mean Accuracy - most_frequent: {}".format(dummy_1.score(features_test, labels_test))) print("Mean Accuracy - stratified: {}".format(dummy_2.score(features_test, labels_test))) print("Mean Accuracy - uniform: {}".format(dummy_3.score(features_test, labels_test)))