예제 #1
0
def dtc_predict_actual(data):
  # split the data into training and testing 
  training_set, testing_set = splitdata_train_test(data, 0.7)
  # generate the feature and targets for the training and test sets
  features_training, targets_training = generate_features_targets(training_set)
  features_testing, targets_testing = generate_features_targets(testing_set)
  # instantiate a decision tree classifier
  dtc = DecisionTreeClassifier()
  # train the classifier 
  dtc.fit(features_training, targets_training)
  # get predictions 
  predictions = dtc.predict(features_testing)
  # return the predictions and targets
  return predictions, targets_testing
def rf_predict_actual(data, n_estimators):
  # generate the features and targets
  features, targets = generate_features_targets(data)

  # instantiate a random forest classifier
  rfc = RandomForestClassifier(n_estimators=n_estimators)
  
  # get predictions using 10-fold cross validation with cross_val_predict
  predicted = cross_val_predict(rfc, features, targets, cv=10)

  # return the predictions and their actual classes
  return predicted, targets

  if __name__ == "__main__":
  data = np.load('galaxy_catalogue.npy')

  features, targets = generate_features_targets(data)

  # Print the shape of each array to check the arrays are the correct dimensions. 
  print("Features shape:", features.shape)
  print("Targets shape:", targets.shape)

  # fraction of data which should be in the training set
  fraction_training = 0.7

  # split the data using your function
  training, testing = splitdata_train_test(data, fraction_training)

  # print the key values
  print('Number data galaxies:', len(data))
  print('Train fraction:', fraction_training)
  print('Number of galaxies in training set:', len(training))
  print('Number of galaxies in testing set:', len(testing))


  predicted_class, actual_class = dtc_predict_actual(data)

  # Print some of the initial results
  print("Some initial results...\n   predicted,  actual")
  for i in range(10):
    print("{}. {}, {}".format(i, predicted_class[i], actual_class[i]))

# get the predicted and actual classes
  number_estimators = 50              # Number of trees
  predicted, actual = rf_predict_actual(data, number_estimators)

  # calculate the model score using your function
  accuracy = calculate_accuracy(predicted, actual)
  print("Accuracy score:", accuracy)
예제 #3
0
def rf_predict_actual(data, n_estimators):
    features, targets = generate_features_targets(data)

    rfc = RandomForestClassifier(n_estimators=n_estimators)
    predict = cross_val_predict(rfc, features, targets, cv=10)

    return predict, data['class']
def dtc_predict_actual(data):
  # split the data into training and testing sets using a training fraction of 0.7
    training, testing = splitdata_train_test(data, 0.7)
  # generate the feature and targets for the training and test sets
  # i.e. train_features, train_targets, test_features, test_targets
    train_features, train_targets = generate_features_targets(training)
    test_features, test_targets =  generate_features_targets(testing)


  # instantiate decision tree classifier
    dtc = DecisionTreeClassifier()

  # train the classifier with the train_features and train_targets
    dtc.fit(train_features, train_targets)
  # get predictions for the test_features
    predictions = dtc.predict(test_features)
  # return the predictions and the test_targets
    return(predictions, test_targets)
예제 #5
0
def rf_predict_actual(data, n_estimators):
    # generate the features and targets
    features, targets = generate_features_targets(data)
    # instantiate a random forest classifier using n estimators
    rfc = RandomForestClassifier(n_estimators=n_estimators)
    # get predictions using 10-fold cross validation with cross_val_predict
    predicted = cross_val_predict(rfc, features, targets, cv=10)
    # return the predictions and their actual classes
    return predicted, targets
예제 #6
0
from sklearn.model_selection import cross_val_predict
from sklearn.tree import DecisionTreeClassifier
from support_functions import plot_confusion_matrix, generate_features_targets


# Implement the following function
def calculate_accuracy(predicted, actual):
  correct = predicted[predicted == actual]
  return len(correct)/len(predicted)


if __name__ == "__main__":
  data = np.load('galaxy_catalogue.npy')

  # split the data
  features, targets = generate_features_targets(data)

  # train the model to get predicted and actual classes
  dtc = DecisionTreeClassifier()
  predicted = cross_val_predict(dtc, features, targets, cv=10)

  # calculate the model score using your function
  model_score = calculate_accuracy(predicted, targets)

  print("Our accuracy score:", model_score)

  # calculate the models confusion matrix using sklearns confusion_matrix function
  class_labels = list(set(targets))
  model_cm = confusion_matrix(y_true=targets, y_pred=predicted, labels=class_labels)

  # Plot the confusion matrix using the provided functions.