def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, pct): number=len(train_set)*pct partial=random.sample(train_set, int(number)) n= ID3(partial, attribute_metadata, numerical_splits_count, depth) print n.label accuracy=validation_accuracy(n, validate_set) return accuracy
def get_graph(train_set, attribute_metadata, validate_set, numerical_splits_count, depth, iterations, lower, upper, increment): ''' get_graph - Given a training set, attribute metadata, validation set, numerical splits count, depth, iterations, lower(range), upper(range), and increment, this function will graph the results from get_graph_data in reference to the drange percentages of the data. ''' m = {} for i in range(lower, upper, increment): accuracy = get_graph_data(train_set, attribute_metadata, validate_set, numerical_splits_count, iterations, i, depth) if accuracy: m[i] = accuracy tree = ID3(train_set, attribute_metadata, numerical_splits_count, depth) m[upper] = validation_accuracy(tree, validate_set) x = [] y = [] for k,v in m.items(): x.append(k) y.append(v) plt.scatter(x, y) plt.xlabel('Percentage of Data Used (%)') plt.ylabel('Validation Accuracy') plt.title('Learning Curve') plt.grid(True)
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, depth, pct): ''' get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage, this function will return the validation accuracy of a specified (percentage) portion of the trainging setself. ''' data_set1 = [] temp = [] if pct !=1: for i in range(0, int(pct*len(train_set))): rand=random.randint(0,int(pct*len(train_set))-1) while(1): if temp.count(rand)!=0: rand=random.randint(0,int(pct*len(train_set))-1) else: break data_set1.append(train_set[rand]) temp.append(rand) else: data_set1=train_set if data_set1 !=[]: pct_tree = ID3(data_set1, attribute_metadata, numerical_splits_count, depth) return validation_accuracy(pct_tree, validate_set,attribute_metadata) else: return 0
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, pct,depth): ''' get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage, this function will return the validation accuracy of a specified (percentage) portion of the trainging setself. ''' num_training_samples = int(math.floor(pct*len(train_set)))+1 #number of training samples to use data_subset = random.sample(train_set,num_training_samples) tree = ID3(data_subset, attribute_metadata, numerical_splits_count, depth) return validation_accuracy(tree,validate_set)
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, pct, depth): ''' get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage, this function will return the validation accuracy of a specified (percentage) portion of the training setself. ''' train_set_size = len(train_set) subset = random.sample(train_set, int(float(train_set_size*pct)/100)) print len(subset) tree = ID3(subset, attribute_metadata, numerical_splits_count, depth) return validation_accuracy(tree,validate_set)
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, depth, pct): ''' get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage, this function will return the validation accuracy of a specified (percentage) portion of the trainging setself. ''' if pct == 0.0: return 0.0, 0.0 dataSet = deepcopy(train_set) size = int(round((len(dataSet) * pct))) if size == 0: return 0.0, 0.0 sub_set = sample(dataSet, size) tree = ID3(sub_set, attribute_metadata, deepcopy(numerical_splits_count), depth) accuracyOriginal = validation_accuracy(tree, validate_set) tree = reduced_error_pruning(tree, train_set, validate_set) accuracyPruned = validation_accuracy(tree, validate_set) return accuracyOriginal, accuracyPruned
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, pct, depth): ''' get_graph_accuracy_partial - Given a PARTIAL training set, attribute metadata, validation set, numerical splits count, and percentage, this function will return the validation accuracy of a specified (percentage) portion of the trainging setself. ''' #depth = 'limit_depth' tree = ID3(train_set, attribute_metadata, numerical_splits_count, depth) print "splits counts after one iter: " + str(numerical_splits_count) return validation_accuracy(tree, validate_set)
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, pct, depth): ''' get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage, this function will return the validation accuracy of a specified (percentage) portion of the training setself. ''' train_set_size = len(train_set) subset = random.sample(train_set, int(float(train_set_size * pct) / 100)) print len(subset) tree = ID3(subset, attribute_metadata, numerical_splits_count, depth) return validation_accuracy(tree, validate_set)
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, pct): ''' get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage, this function will return the validation accuracy of a specified (percentage) portion of the training set. ''' # randomly generate an array of indices from 0 to len(train_set) - 1 random_indices = random.sample(range(len(train_set)), int(pct * len(train_set))) # train the decision tree decision_tree = ID3([train_set[i] for i in random_indices], attribute_metadata, numerical_splits_count, float('inf')) # calculate the accuracy on validation set return validation_accuracy(decision_tree, validate_set)
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, depth, pct, prune): ''' get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage, this function will return the validation accuracy of a specified (percentage) portion of the trainging setself. ''' sample_size = int(pct*len(train_set)) sample_set = sample(train_set, sample_size) tree = ID3(sample_set, attribute_metadata, numerical_splits_count, depth) if prune: reduced_error_pruning(tree, train_set, validate_set) return validation_accuracy(tree, validate_set)
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, pct): ''' get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage, this function will return the validation accuracy of a specified (percentage) portion of the trainging setself. ''' length = len(train_set) * pct new_data_set = random.sample(train_set, int(length)) tree = ID3(new_data_set, attribute_metadata, numerical_splits_count, 20) # reduced_error_pruning(tree, new_data_set, validate_set) accuracy = validation_accuracy(tree, validate_set) return accuracy
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, depth, pct): ''' get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage, this function will return the validation accuracy of a specified (percentage) portion of the trainging setself. ''' data_set = curve_data(train_set, pct) if data_set != []: curve_tree = ID3(data_set, attribute_metadata, numerical_splits_count, depth) return validation_accuracy(curve_tree, validate_set, attribute_metadata) else: return 0 pass
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, depth, pct): ''' get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage, this function will return the validation accuracy of a specified (percentage) portion of the training setself. ''' # single accuracy for whatver percentage you've chosen # call validation_accuracy from pruning.py shuffle(train_set) sub_data = train_set[0:pct] sub_numerical_splits_count = copy.copy(numerical_splits_count) sub_tree = ID3(sub_data, attribute_metadata, sub_numerical_splits_count, depth) accuracy = validation_accuracy(sub_tree, validate_set) return accuracy
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, pct, depth): ''' get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage, this function will return the validation accuracy of a specified (percentage) portion of the trainging setself. ''' shuffle(train_set) frac = int(float(pct) / 100 * len(train_set)) curr_set = train_set[:frac] root = ID3(curr_set, attribute_metadata, numerical_splits_count, depth) accuracy = validation_accuracy(root, validate_set) return accuracy
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, pct, depth,iterations): ''' get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage, this function will return the validation accuracy of a specified (percentage) portion of the trainging setself. ''' if int(len(train_set)*pct) == 0: return 0 else: examples_list = [] for i in range(iterations): examples = random.sample(train_set, int(len(train_set)*pct)) examples_list.append(examples) acc = 0 for x in examples_list: tree = ID3(x, attribute_metadata, numerical_splits_count, depth) acc += validation_accuracy(tree, validate_set) return acc/iterations
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, pct): ''' get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage, this function will return the validation accuracy of a specified (percentage) portion of the trainging setself. ''' data_set = [] size = int(len(train_set) * pct) data_set = random.sample(train_set, size) #print size #for i in range(size - 1): # data_set.append(train_set[i]) #print "train_set" #print train_set tree = ID3(data_set, attribute_metadata, numerical_splits_count, 3) result = validation_accuracy(tree, validate_set) #print "result" #print result return result
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, depth, pct): ''' get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage, this function will return the validation accuracy of a specified (percentage) portion of the training setself. ''' #make the subset train_subset_size = int(math.floor(pct*len(train_set))) #if percent is zero, accuracy must be zero. if train_subset_size == 0: return 0 train_subset = random.sample(train_set, train_subset_size) random.shuffle(train_subset) #make the tree, determine accuracy tree = ID3(train_subset, attribute_metadata, numerical_splits_count, depth) accuracy = validation_accuracy(tree, validate_set) # print accuracy return accuracy
def get_graph_accuracy_partial(train_set, attribute_metadata, validate_set, numerical_splits_count, depth, pct): ''' get_graph_accuracy_partial - Given a training set, attribute metadata, validation set, numerical splits count, and percentage, this function will return the validation accuracy of a specified (percentage) portion of the training setself. ''' #make the subset train_subset_size = int(math.floor(pct * len(train_set))) #if percent is zero, accuracy must be zero. if train_subset_size == 0: return 0 train_subset = random.sample(train_set, train_subset_size) random.shuffle(train_subset) #make the tree, determine accuracy tree = ID3(train_subset, attribute_metadata, numerical_splits_count, depth) accuracy = validation_accuracy(tree, validate_set) # print accuracy return accuracy