def precision_recall():
#   from sklearn.metrics import roc_auc_score
#   from sklearn.metrics import roc_curve
  from sklearn.metrics import precision_recall_curve
  from sklearn.metrics import auc
  from sklearn.metrics import classification_report
  from mpltools import style
  style.use('ggplot')

  makes = ['bmw', 'ford']
  types = ['sedan', 'SUV']
  args = makes + types
  config = get_config(args)
  (dataset, config) = fgu.get_all_metadata(config)


  for ii, attrib_name in enumerate(args):
  #   attrib_name = 'bmw'

    attrib_clf = AttributeClassifier.load('../../../attribute_classifiers/{}.dat'.format(attrib_name))
    bnet = BayesNet(config, dataset['train_annos'],
                    dataset['class_meta'], [attrib_clf], desc=str(args))

    res = bnet.create_attrib_res_on_images()

    attrib_selector = AttributeSelector(config, dataset['class_meta'])
  #   attrib_meta = attrib_selector.create_attrib_meta([attrib_clf.name])
    pos_classes = attrib_selector.class_ids_for_attribute(attrib_name)
    true_labels = np.array(res.class_index.isin(pos_classes))


    print "--------------{}-------------".format(attrib_name)
    print res[str.lower(attrib_name)].describe()

    print classification_report(true_labels, np.array(res[str.lower(attrib_name)]) > 0.65,
                                target_names=['not-{}'.format(attrib_name),
                                              attrib_name])



    precision, recall, thresholds = precision_recall_curve(true_labels, np.array(res[str.lower(attrib_name)]))
    score = auc(recall, precision)
    print("Area Under Curve: %0.2f" % score)
#     score = roc_auc_score(true_labels, np.array(res[str.lower(attrib_name)]))
#     fpr, tpr, thresholds = roc_curve(true_labels, np.array(res[str.lower(attrib_name)]))
    plt.subplot(2,2,ii+1)
#     plt.plot(fpr, tpr)
    plt.plot(recall, precision, label='Precision-Recall curve')
    plt.title('Precision-Recall: {}'.format(attrib_name))
#     plt.xlabel('False Positive Rate')
#     plt.ylabel('True Positive Rate')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.legend(['area = {}'.format(score)])

  plt.draw()
  plt.show()
def classify_using_attributes():
  from sklearn.ensemble import RandomForestClassifier
  from sklearn import svm
  from sklearn.metrics import classification_report
  from sklearn import cross_validation


  makes = ['bmw', 'ford']
  types = ['sedan', 'suv']
  args = makes + types + ['germany', 'usa']
  
#   args = get_args_from_file('sorted_attrib_list.txt')
  config = get_config()
  (dataset, config) = fgu.get_all_metadata(config)
  config.attribute.names = args

  attrib_names = [str.lower(a) for a in args]
  attrib_classifiers = []
  for attrib_name in args:
    attrib_classifiers.append(AttributeClassifier.load('../../../attribute_classifiers/{}.dat'.format(attrib_name)))

  classes = dataset['class_meta']
  train_annos = dataset['train_annos']
  test_annos = dataset['test_annos']
  attrib_meta = dataset['attrib_meta']
  
  classes = select_small_set_for_bayes_net(dataset, makes, types)
  
  attrib_meta = attrib_meta.loc[classes.index]
  train_annos = train_annos[np.array(
                             train_annos.class_index.isin(classes.class_index))]
  test_annos = test_annos[np.array(
                              test_annos.class_index.isin(classes.class_index))]

  ftr = Bow.load_bow(train_annos, config)
  fte = Bow.load_bow(test_annos, config)
  
  bnet = BayesNet(config, train_annos,
                  classes, attrib_classifiers, attrib_meta, desc=str(args))

  attrib_res_train,l = bnet.create_attrib_res_on_images(train_annos, ftr)
  attrib_res_test,l = bnet.create_attrib_res_on_images(test_annos, fte)
  
  
#   features_train = Bow.load_bow(train_annos, config)
#   features_test = Bow.load_bow(test_annos, config)
  
  # combine attribs and features
  features_train = np.concatenate([ftr, attrib_res_train[attrib_names]], axis=1)
  features_test = np.concatenate([fte, attrib_res_test[attrib_names]], axis=1)


  # define a classifier that uses the attribute scores
#   clf = RandomForestClassifier(n_estimators=50, n_jobs=-2)
#   clf = svm.SVC(kernel='rbf')
  clf = svm.LinearSVC()

  labels_train = np.array(attrib_res_train.class_index)
#   features_train = np.array(attrib_res_train[attrib_names])
  clf.fit(features_train, labels_train)


#   features_test = np.array(attrib_res_test[attrib_names])
  y_pred = clf.predict(features_test)
  labels_test = np.array(attrib_res_test.class_index)

  print(classification_report(labels_test, y_pred,
                              labels=classes.index,
                              target_names=[c for c in classes.class_name]))
  
  print("Accuracy: {}".format(accuracy_score(labels_test, y_pred)))
  print("Mean Accuracy: {}".format(clf.score(features_test, labels_test)))
  
  
  print ''
  print 'Accuracy at N:'
  scorer = AccuracyAtN(clf.decision_function(features_test), 
                       labels_test, class_names=np.unique(labels_train))
  for ii in range(1, 11):
    print 'Accuracy at {}: {}'.format(ii, scorer.get_accuracy_at(ii))
    
  
  
  
  dummy_1 = DummyClassifier(strategy='most_frequent').fit(features_train, labels_train)
  dummy_2 = DummyClassifier(strategy='stratified').fit(features_train, labels_train)
  dummy_3 = DummyClassifier(strategy='stratified').fit(features_train, labels_train)
  
  print ''
  print 'Dummy Classifiers:'
  print '-----------------'
  print("Accuracy - most_frequent: {}".format(accuracy_score(labels_test, dummy_1.predict(features_test))))
  print("Accuracy - stratified: {}".format(accuracy_score(labels_test, dummy_2.predict(features_test))))
  print("Accuracy - uniform: {}".format(accuracy_score(labels_test, dummy_2.predict(features_test))))
  
  print("Mean Accuracy - most_frequent: {}".format(dummy_1.score(features_test, labels_test)))
  print("Mean Accuracy - stratified: {}".format(dummy_2.score(features_test, labels_test)))
  print("Mean Accuracy - uniform: {}".format(dummy_3.score(features_test, labels_test)))