def classify_and_plot_grid(cities, k=1): """ TODO Classify and plot for Python, Java, and R languages. """ plots = { "Java" : ([], []), "Python" : ([], []), "R" : ([], []) } markers = { "Java" : "o", "Python" : "s", "R" : "^" } colors = { "Java" : "r", "Python" : "b", "R" : "g" } # Predict preferred language for each city using knn_classify() from knn.py. # longitude range (-130, -60) # latitude in range (20, 55) # Save the coordinate of prediction result in plots variable. # TODO for i in range(-130,-59): for j in range(20,56): knn_pred = knn.knn_classify(k,cities,(i,j)) plots[knn_pred][0].append(i) plots[knn_pred][1].append(j) # create a scatter series for each language # See above plot_cities() to plot your prediction. # TODO for language, (x, y) in plots.items(): plt.scatter(x, y, color=colors[language], marker=markers[language], label=language, zorder=10) plot_state_borders(plt) # assume we have a function that does this plt.legend(loc=0) # let matplotlib choose the location plt.axis([-130,-60,20,55]) # set the axes plt.title("Favorite Programming Languages") plt.show()
def classify_and_plot_grid(cities, k=1): """ TODO Classify and plot for Python, Java, and R languages. """ plots = {"Java": ([], []), "Python": ([], []), "R": ([], [])} markers = {"Java": "o", "Python": "s", "R": "^"} colors = {"Java": "r", "Python": "b", "R": "g"} # Predict preferred language for each city using knn_classify() from knn.py. # longitude range (-130, -60) # latitude in range (20, 55) # Save the coordinate of prediction result in plots variable. # TODO new_list = [] for longitud in range(-120, -60): for latitud in range(20, 55): new_list.append( ([longitud, latitud], knn_classify(k, cities, [longitud, latitud]))) # create a scatter series for each language # See above plot_cities() to plot your prediction. # TODO plot_cities(new_list)
def classify_and_plot_grid(cities, k=1): """ TODO Classify and plot for Python, Java, and R languages. """ plots = {"Java": ([], []), "Python": ([], []), "R": ([], [])} markers = {"Java": "o", "Python": "s", "R": "^"} colors = {"Java": "r", "Python": "b", "R": "g"} # Predict preferred language for each city using knn_classify() from knn.py. # longitude range (-130, -60) # latitude in range (20, 55) # Save the coordinate of prediction result in plots variable. # TODO res = [] for l in range(-130, -60): for la in range(20, 55): res = knn_classify(k, cities, (l, la)) plots[res][0].append(l) plots[res][1].append(la) # create a scatter series for each language # See above plot_cities() to plot your prediction. # TODO for language, (x, y) in plots.items(): plt.scatter(x, y, color=colors[language], marker=markers[language], label=language, zorder=10) plt.show()
def digit_classifier(): # training phase digit_labels = [] train_file_list = os.listdir('trainingDigits') train_file_num = len(train_file_list) train_vector = zeros((train_file_num, 1024)) for i in range(train_file_num): # 檔案全名,包含副檔名 file_full_name = train_file_list[i] # 純檔案名稱,不含副檔名 file_name = file_full_name.split('.')[0] # 檔案名稱格式是"數字_計數.txt",這行是要取得數字部分 digit_name = int(file_name.split('_')[0]) digit_labels.append(digit_name) train_vector[i, :] = img2vector('trainingDigits/%s' % file_full_name) # test phase test_file_list = os.listdir('testDigits') error_count = 0.0 test_file_num = len(test_file_list) for i in range(test_file_num): file_full_name = test_file_list[i] file_name = file_full_name.split('.')[0] digit_name = int(file_name.split('_')[0]) test_vector = img2vector('testDigits/%s' % file_full_name) classify_result = knn_classify(test_vector, train_vector, digit_labels, 3) print('Classify result = %s, real answer = %s' % (classify_result, digit_name)) if classify_result != digit_name: error_count += 1 print('Total error = %d, error rate = %f' % (error_count, (error_count / float(test_file_num))))
def classify_and_plot_grid(cities, k=1): """ TODO Classify and plot for Python, Java, and R languages. """ plots = {"Java": ([], []), "Python": ([], []), "R": ([], [])} markers = {"Java": "o", "Python": "s", "R": "^"} colors = {"Java": "r", "Python": "b", "R": "g"} # Predict preferred language for each city using knn_classify() from knn.py. # longitude range (-130, -60) # latitude in range (20, 55) # Save the coordinate of prediction result in plots variable. # TODO for longitude in range(-130, -60): for latitude in range(20, 55): new_language = knn_classify(k, cities, [longitude, latitude]) plots[new_language][0].append(longitude) plots[new_language][1].append(latitude) # create a scatter series for each language # See above plot_cities() to plot your prediction. # TODO for language, (longitude, latitude) in plots.items(): plt.scatter(longitude, latitude, color=colors[language], marker=markers[language], label=language) plt.legend() plt.title("Favorite Programming Languages for k=" + str(k)) plt.show()
def classify_and_plot_grid(cities, k=1): plots = {"Java": ([], []), "Python": ([], []), "R": ([], [])} markers = {"Java": "o", "Python": "s", "R": "^"} colors = {"Java": "r", "Python": "b", "R": "g"} for city in cities: pred_lang = knn_classify(k, cities, city[0]) plots[pred_lang][0].append(city[0][0]) plots[pred_lang][1].append(city[0][1]) # create a scatter series for each language # See above plot_cities() to plot your prediction. for language, (x, y) in plots.items(): plt.scatter(x, y, color=colors[language], marker=markers[language], label=language, zorder=10) plot_state_borders(plt) plt.legend(loc=0) # let matplotlib choose the location plt.axis([-130, -60, 20, 55]) # set the axes plt.title("Predicted Programming Languages for k value of %d" % (k)) plt.show()
def classify_and_plot_grid(cities, k=1): plots = { "Java" : ([], []), "Python" : ([], []), "R" : ([], []) } # we want each language to have a different marker and color markers = { "Java" : "o", "Python" : "s", "R" : "^" } colors = { "Java" : "r", "Python" : "b", "R" : "g" } for i in range(-130,-60): for j in range (20,55): lang = knn_classify(k,cities,[i,j]) plots[lang][0].append(i) plots[lang][1].append(j) for lang, (i, j) in plots.items(): plt.scatter(i, j, color=colors[lang], marker=markers[lang], label=lang, zorder=0) plot_state_borders(plt, color='black') # assume we have a function that does this plt.legend(loc=0) # let matplotlib choose the location plt.axis([-130,-60,20,55]) # set the axes plt.title(str(k) + "-Nearest Neighbor Programming Languages") plt.show() """
def predict_with_knn(self, knn_k_value, d, wfunc): """ return d's predicted label wfunc = weight function = tfbdc, ... """ logging.info('Weighting docvec') # remove terms that in the test-corpus however not in train-corpus whitelst = [t for t in d.terms if t in self.terms()] dw = [self.weight(t, d, wfunc) for t in whitelst] # weighted copy """ weighted vectors of train data 0. strip unused terms. only respect those terms occurs in docvec_to_predict 1. weight with bdc """ twv = [[self.weight(t, doc, wfunc) for t in whitelst] for doc in self.DOCVECS] labels = [d.label for d in self.DOCVECS] logging.info('Using KNN to classify') return knn.knn_classify(5, dw, twv, labels)
def classify_and_plot_grid(cities, k=1): """ TODO Classify and plot for Python, Java, and R languages. """ plots = { "Java" : ([], []), "Python" : ([], []), "R" : ([], []) } markers = { "Java" : "o", "Python" : "s", "R" : "^" } colors = { "Java" : "r", "Python" : "b", "R" : "g" } for i in range(-130,-59): for j in range(20,56): pred=knn.knn_classify(k,cities,(i,j)) plots[pred][0].append(i) plots[pred][1].append(j) for language, (x, y) in plots.items(): plt.scatter(x, y, color=colors[language], marker=markers[language], label=language, zorder=10) plot_state_borders(plt) plt.legend(loc=0) plt.axis([-130,-60,20,55]) plt.title("Favorite Programming Languages") plt.show()
def classify_and_plot_grid(cities, k=1): """ TODO Classify and plot for Python, Java, and R languages. """ java_x = [] java_y = [] python_x = [] python_y = [] r_x = [] r_y = [] for i in cities: val = knn_classify(k, cities, i[0]) if (val == "Java"): java_x.append(i[0][0]) java_y.append(i[0][1]) elif (val == "Python"): python_x.append(i[0][0]) python_y.append(i[0][1]) else: r_x.append(i[0][0]) r_y.append(i[0][1]) plots = { "Java": ([java_x], [java_y]), "Python": ([python_x], [python_y]), "R": ([r_x], [r_y]) } markers = {"Java": "o", "Python": "s", "R": "^"} colors = {"Java": "r", "Python": "b", "R": "g"} cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF']) cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF']) # Predict preferred language for each city using knn_classify() from knn.py. # longitude range (-130, -60) # latitude in range (20, 55) # Save the coordinate of prediction result in plots variable. # TODO for language, (x, y) in plots.items(): plt.scatter(x, y, color=colors[language], marker=markers[language], label=language, zorder=10) plt.title('for {} neighbours'.format(k)) # create a scatter series for each language # See above plot_cities() to plot your prediction. # TODO plt.show()
def __real_sample_initialization(self, training_data, target_labels, k, p): """ Initializes the weights for the neural network by using real data samples as initial weights. The samples of which the values are going to be used as a neuron's initial weights are checked to make sure they are not misclassified against their k-nearest neighbours. If knn classification is not viable in the dataset then the weights are initialized randomly. """ neuron_counts = [neuron for neuron in self.neurons_per_class] weights_uninitialized = sum([neuron for neuron in neuron_counts]) class_count = self.class_count neuron_weights = [] neuron_labels = [] rows = training_data.shape[0] #for each vector for i in range(rows): #for each label for j in range(class_count): #find the index of the label value in the label list if(target_labels[i] == self.class_labels[j]): #if more neurons are needed for this class if(neuron_counts[j] > 0): #get label from the majority of knn of training_data[i] knn_label = knn.knn_classify(training_data[i], training_data, target_labels, self.class_labels, k) #if the majority of the k-nearest-neighbours have the same class as the true class of the training data sample if(knn_label == target_labels[i]): #Initialize neuron weights based on the training data sample neuron_weights.append(training_data[i]) neuron_labels.append(target_labels[i]) weights_uninitialized -= 1 neuron_counts[j] -= 1 break #check if all weights were initialized if(weights_uninitialized != 0): print("Using real samples as initial weights was not possible. Random initial weights will be used...\n") self.__random_weight_initialization(training_data.shape[1]) else: self.neuron_weights = np.array(neuron_weights) self.neuron_labels = np.array(neuron_labels)
def classify_and_plot_grid(cities, k=1): """ TODO Classify and plot for Python, Java, and R languages. """ plots = {"Java": ([], []), "Python": ([], []), "R": ([], [])} markers = {"Java": "o", "Python": "s", "R": "^"} colors = {"Java": "r", "Python": "b", "R": "g"} # Predict preferred language for each city using knn_classify() from knn.py. # longitude range (-130, -60) # latitude in range (20, 55) # Save the coordinate of prediction result in plots variable. # TODO from knn import knn_classify pred = [] for city in cities: pred.append((city[0], knn_classify(k, [item for item in cities if item != city], city[0]))) for (longitude, latitude), language in pred: plots[language][0].append(longitude) plots[language][1].append(latitude) # create a scatter series for each language # See above plot_cities() to plot your prediction. # TODO for language, (x, y) in plots.items(): plt.scatter(x, y, color=colors[language], marker=markers[language], label=language, zorder=10) plot_state_borders(plt) # assume we have a function that does this plt.legend(loc=0) # let matplotlib choose the location plt.axis([-130, -60, 20, 55]) # set the axes title = "Favorite Programming Languages, " + str(k) + " neighbor[s]" plt.title(title) plt.show()
def classify_and_plot_grid(cities, k=1): """ TODO Classify and plot for Python, Java, and R languages. """ plots = { "Java" : ([], []), "Python" : ([], []), "R" : ([], []) } markers = { "Java" : "o", "Python" : "s", "R" : "^" } colors = { "Java" : "r", "Python" : "b", "R" : "g" } ans = [] for l in range(-130,-60): for la in range(20,55): ans = knn_classify(k,cities,(l,la)) plots[ans][0].append(l) plots[ans][1].append(la) for language, (x, y) in plots.items(): plt.scatter(x, y, color=colors[language], marker=markers[language], label=language, zorder=10) plt.show()
def classify_and_plot_grid(cities, k=1): """ TODO Classify and plot for Python, Java, and R languages. """ plots = {"Java": ([], []), "Python": ([], []), "R": ([], [])} markers = {"Java": "o", "Python": "s", "R": "^"} colors = {"Java": "r", "Python": "b", "R": "g"} # Predict preferred language for each city using knn_classify() from knn.py. # longitude range (-130, -60) # latitude in range (20, 55) # Save the coordinate of prediction result in plots variable. # TODO from knn import knn_classify for log in range(-130, -60): for lat in range(20, 55): language = knn_classify(k, cities, (log, lat)) plots[language][0].append(log) plots[language][1].append(lat) # create a scatter series for each language # See above plot_cities() to plot your prediction. # TODO # create a scatter series for each language for language, (x, y) in plots.items(): plt.scatter(x, y, color=colors[language], marker=markers[language], label=language, zorder=10) plot_state_borders(plt) plt.axis([-130, -60, 20, 55]) plt.title("Predicted Preferred Language by {} - NN".format(k)) plt.legend(loc=1) plt.show()
def classify_and_plot_grid(cities, k=1): plots = {"Java": ([], []), "Python": ([], []), "R": ([], [])} markers = {"Java": "o", "Python": "s", "R": "^"} colors = {"Java": "r", "Python": "b", "R": "g"} for longitude in range(-130, -60): for latitude in range(20, 55): predicted_language = knn_classify(k, cities, [longitude, latitude]) plots[predicted_language][0].append(longitude) plots[predicted_language][1].append(latitude) for language, (x, y) in plots.items(): plt.scatter(x, y, color=colors[language], marker=markers[language], label=language, zorder=0) plt.legend(loc=0) plt.axis([-130, -60, 20, 55]) plt.title(str(k) + "K - value") plt.show()
def classify_and_plot_grid(cities, k=1): plots = {"Java": ([], []), "Python": ([], []), "R": ([], [])} markers = {"Java": "o", "Python": "s", "R": "^"} colors = {"Java": "r", "Python": "b", "R": "g"} for x in range(-130, -60): for y in range(20, 55): city = [x, y] y_predit = knn_classify(k, cities, city) plots[y_predit][0].append(x) plots[y_predit][1].append(y) for language, (x, y) in plots.items(): plt.scatter(x, y, color=colors[language], marker=markers[language], label=language, zorder=10) plt.legend(loc=0) plt.axis([-130, -60, 20, 55]) plt.title(f"Favorite Programming Languages with k: {k}") plt.show()
def classify_and_plot_grid(cities, k=1): """ TODO Classify and plot for Python, Java, and R languages. """ plots = {"Java": ([], []), "Python": ([], []), "R": ([], [])} markers = {"Java": "o", "Python": "s", "R": "^"} colors = {"Java": "r", "Python": "b", "R": "g"} # Predict preferred language for each city using knn_classify() from knn.py. # longitude range (-130, -60) # latitude in range (20, 55) # Save the coordinate of prediction result in plots variable. mycities = [] from knn import knn_classify for longitude in range(-130, 60): for latitude in range(20, 55): lat_long = (longitude, latitude) prediction = knn_classify(k, cities, lat_long) city_test = [lat_long, prediction] mycities.append(city_test) plot_cities(mycities)
def classify_and_plot_grid(cities, k=1): """ TODO Classify and plot for Python, Java, and R languages. """ java_x=[] java_y=[] python_x=[] python_y=[] r_x=[] r_y=[] for i in cities: val = knn_classify(k,cities,i[0]) if(val=="Java"): java_x.append(i[0][0]) java_y.append(i[0][1]) elif(val=="Python"): python_x.append(i[0][0]) python_y.append(i[0][1]) else: r_x.append(i[0][0]) r_y.append(i[0][1]) plots = { "Java" : ([java_x], [java_y]), "Python" : ([python_x], [python_y]), "R" : ([r_x], [r_y]) } markers = { "Java" : "o", "Python" : "s", "R" : "^" } colors = { "Java" : "r", "Python" : "b", "R" : "g" } cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF']) cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF']) for language, (x, y) in plots.items(): plt.scatter(x, y, color=colors[language], marker=markers[language], label=language, zorder=10) plt.title('for {} neighbours'.format(k)) plt.show()
# Bayesian classification with parzen estimate h = 0.3 px_given_1_parzen = parzen.parzen_estimate(x_samples_1, x_test, h) px_given_2_parzen = parzen.parzen_estimate(x_samples_2, x_test, h) label_test_parzen = by.bayesian_classify(px_given_1_parzen, px_given_2_parzen) myplt.plot_with_labels( x_test, label_real, label_test_parzen, 'Clasificacion bayesiana con estimacion Parzen, h=' + str(h)) err_parzen = err.get_error(label_real, label_test_parzen) print('Error with parzen is: ' + str(err_parzen)) # Bayesian classification with knn estimate for k in k_list: px_given_1_knn = knn.knn_estimate(k, x_samples_1, x_test) px_given_2_knn = knn.knn_estimate(k, x_samples_2, x_test) label_test_knn = by.bayesian_classify(px_given_1_knn, px_given_2_knn) myplt.plot_with_labels( x_test, label_real, label_test_knn, 'Clasificacion bayesiana con estimacion KNN, k=' + str(k)) err_knn = err.get_error(label_real, label_test_knn) print('Error with knn is: ' + str(err_knn) + " k=" + str(k)) print('ITEM e) knn classifier') k_list = [1, 11, 51] for k in k_list: label_test_knnclas = knn.knn_classify(k, x_samples_1, x_samples_2, x_test) err_knnclas = err.get_error(label_real, label_test_knnclas) myplt.plot_with_labels(x_test, label_real, label_test_knnclas, 'Clasificacion de KNN, k=' + str(k)) print('Error with knn classify is: ' + str(err_knnclas) + " k=" + str(k))
from knn import knn_classify # Create dataset cities = [(-86.75,33.5666666666667,'Python'),(-88.25,30.6833333333333,'Python'),(-112.016666666667,33.4333333333333,'Java'),(-110.933333333333,32.1166666666667,'Java'),(-92.2333333333333,34.7333333333333,'R'),(-121.95,37.7,'R'),(-118.15,33.8166666666667,'Python'),(-118.233333333333,34.05,'Java'),(-122.316666666667,37.8166666666667,'R'),(-117.6,34.05,'Python'),(-116.533333333333,33.8166666666667,'Python'),(-121.5,38.5166666666667,'R'),(-117.166666666667,32.7333333333333,'R'),(-122.383333333333,37.6166666666667,'R'),(-121.933333333333,37.3666666666667,'R'),(-122.016666666667,36.9833333333333,'Python'),(-104.716666666667,38.8166666666667,'Python'),(-104.866666666667,39.75,'Python'),(-72.65,41.7333333333333,'R'),(-75.6,39.6666666666667,'Python'),(-77.0333333333333,38.85,'Python'),(-80.2666666666667,25.8,'Java'),(-81.3833333333333,28.55,'Java'),(-82.5333333333333,27.9666666666667,'Java'),(-84.4333333333333,33.65,'Python'),(-116.216666666667,43.5666666666667,'Python'),(-87.75,41.7833333333333,'Java'),(-86.2833333333333,39.7333333333333,'Java'),(-93.65,41.5333333333333,'Java'),(-97.4166666666667,37.65,'Java'),(-85.7333333333333,38.1833333333333,'Python'),(-90.25,29.9833333333333,'Java'),(-70.3166666666667,43.65,'R'),(-76.6666666666667,39.1833333333333,'R'),(-71.0333333333333,42.3666666666667,'R'),(-72.5333333333333,42.2,'R'),(-83.0166666666667,42.4166666666667,'Python'),(-84.6,42.7833333333333,'Python'),(-93.2166666666667,44.8833333333333,'Python'),(-90.0833333333333,32.3166666666667,'Java'),(-94.5833333333333,39.1166666666667,'Java'),(-90.3833333333333,38.75,'Python'),(-108.533333333333,45.8,'Python'),(-95.9,41.3,'Python'),(-115.166666666667,36.0833333333333,'Java'),(-71.4333333333333,42.9333333333333,'R'),(-74.1666666666667,40.7,'R'),(-106.616666666667,35.05,'Python'),(-78.7333333333333,42.9333333333333,'R'),(-73.9666666666667,40.7833333333333,'R'),(-80.9333333333333,35.2166666666667,'Python'),(-78.7833333333333,35.8666666666667,'Python'),(-100.75,46.7666666666667,'Java'),(-84.5166666666667,39.15,'Java'),(-81.85,41.4,'Java'),(-82.8833333333333,40,'Java'),(-97.6,35.4,'Python'),(-122.666666666667,45.5333333333333,'Python'),(-75.25,39.8833333333333,'Python'),(-80.2166666666667,40.5,'Python'),(-71.4333333333333,41.7333333333333,'R'),(-81.1166666666667,33.95,'R'),(-96.7333333333333,43.5666666666667,'Python'),(-90,35.05,'R'),(-86.6833333333333,36.1166666666667,'R'),(-97.7,30.3,'Python'),(-96.85,32.85,'Java'),(-95.35,29.9666666666667,'Java'),(-98.4666666666667,29.5333333333333,'Java'),(-111.966666666667,40.7666666666667,'Python'),(-73.15,44.4666666666667,'R'),(-77.3333333333333,37.5,'Python'),(-122.3,47.5333333333333,'Python'),(-89.3333333333333,43.1333333333333,'R'),(-104.816666666667,41.15,'Java')] #create a tuple with a comprehension cities = [([longitude, latitude], language) for longitude, latitude, language in cities] point_to_classify = [1 , 1] print knn_classify(3, cities, point_to_classify)