Ejemplo n.º 1
0
def classify_and_plot_grid(cities, k=1):
    """
    TODO
    Classify and plot for Python, Java, and R languages.
    """
    plots = { "Java" : ([], []), "Python" : ([], []), "R" : ([], []) }
    markers = { "Java" : "o", "Python" : "s", "R" : "^" }
    colors  = { "Java" : "r", "Python" : "b", "R" : "g" }

    # Predict preferred language for each city using knn_classify() from knn.py.
    # longitude range (-130, -60)
    # latitude in range (20, 55)
    # Save the coordinate of prediction result in plots variable.
    # TODO

    for i in range(-130,-59):
        for j in range(20,56):
            knn_pred = knn.knn_classify(k,cities,(i,j))
            plots[knn_pred][0].append(i)
            plots[knn_pred][1].append(j)

    # create a scatter series for each language
    # See above plot_cities() to plot your prediction.
    # TODO
    
    for language, (x, y) in plots.items():
        plt.scatter(x, y, color=colors[language], marker=markers[language],
                          label=language, zorder=10)

    plot_state_borders(plt)    # assume we have a function that does this

    plt.legend(loc=0)          # let matplotlib choose the location
    plt.axis([-130,-60,20,55]) # set the axes
    plt.title("Favorite Programming Languages")
    plt.show()
Ejemplo n.º 2
0
def classify_and_plot_grid(cities, k=1):
    """
    TODO
    Classify and plot for Python, Java, and R languages.
    """
    plots = {"Java": ([], []), "Python": ([], []), "R": ([], [])}
    markers = {"Java": "o", "Python": "s", "R": "^"}
    colors = {"Java": "r", "Python": "b", "R": "g"}

    # Predict preferred language for each city using knn_classify() from knn.py.
    # longitude range (-130, -60)
    # latitude in range (20, 55)
    # Save the coordinate of prediction result in plots variable.
    # TODO
    new_list = []
    for longitud in range(-120, -60):
        for latitud in range(20, 55):
            new_list.append(
                ([longitud,
                  latitud], knn_classify(k, cities, [longitud, latitud])))

    # create a scatter series for each language
    # See above plot_cities() to plot your prediction.
    # TODO
    plot_cities(new_list)
Ejemplo n.º 3
0
def classify_and_plot_grid(cities, k=1):
    """
    TODO
    Classify and plot for Python, Java, and R languages.
    """
    plots = {"Java": ([], []), "Python": ([], []), "R": ([], [])}
    markers = {"Java": "o", "Python": "s", "R": "^"}
    colors = {"Java": "r", "Python": "b", "R": "g"}

    # Predict preferred language for each city using knn_classify() from knn.py.
    # longitude range (-130, -60)
    # latitude in range (20, 55)
    # Save the coordinate of prediction result in plots variable.
    # TODO

    res = []

    for l in range(-130, -60):
        for la in range(20, 55):
            res = knn_classify(k, cities, (l, la))
            plots[res][0].append(l)
            plots[res][1].append(la)

    # create a scatter series for each language
    # See above plot_cities() to plot your prediction.
    # TODO
    for language, (x, y) in plots.items():
        plt.scatter(x,
                    y,
                    color=colors[language],
                    marker=markers[language],
                    label=language,
                    zorder=10)
    plt.show()
Ejemplo n.º 4
0
def digit_classifier():
    # training phase
    digit_labels = []
    train_file_list = os.listdir('trainingDigits')
    train_file_num = len(train_file_list)
    train_vector = zeros((train_file_num, 1024))
    for i in range(train_file_num):
        # 檔案全名,包含副檔名
        file_full_name = train_file_list[i]
        # 純檔案名稱,不含副檔名
        file_name = file_full_name.split('.')[0]
        # 檔案名稱格式是"數字_計數.txt",這行是要取得數字部分
        digit_name = int(file_name.split('_')[0])
        digit_labels.append(digit_name)
        train_vector[i, :] = img2vector('trainingDigits/%s' % file_full_name)

    # test phase
    test_file_list = os.listdir('testDigits')
    error_count = 0.0
    test_file_num = len(test_file_list)
    for i in range(test_file_num):
        file_full_name = test_file_list[i]
        file_name = file_full_name.split('.')[0]
        digit_name = int(file_name.split('_')[0])
        test_vector = img2vector('testDigits/%s' % file_full_name)
        classify_result = knn_classify(test_vector, train_vector, digit_labels, 3)
        print('Classify result = %s, real answer = %s' % (classify_result, digit_name))
        if classify_result != digit_name:
            error_count += 1

    print('Total error = %d, error rate = %f' % (error_count, (error_count / float(test_file_num))))
Ejemplo n.º 5
0
def classify_and_plot_grid(cities, k=1):
    """
    TODO
    Classify and plot for Python, Java, and R languages.
    """
    plots = {"Java": ([], []), "Python": ([], []), "R": ([], [])}
    markers = {"Java": "o", "Python": "s", "R": "^"}
    colors = {"Java": "r", "Python": "b", "R": "g"}

    # Predict preferred language for each city using knn_classify() from knn.py.
    # longitude range (-130, -60)
    # latitude in range (20, 55)
    # Save the coordinate of prediction result in plots variable.
    # TODO

    for longitude in range(-130, -60):
        for latitude in range(20, 55):
            new_language = knn_classify(k, cities, [longitude, latitude])
            plots[new_language][0].append(longitude)
            plots[new_language][1].append(latitude)

    # create a scatter series for each language
    # See above plot_cities() to plot your prediction.
    # TODO
    for language, (longitude, latitude) in plots.items():
        plt.scatter(longitude,
                    latitude,
                    color=colors[language],
                    marker=markers[language],
                    label=language)

    plt.legend()
    plt.title("Favorite Programming Languages for k=" + str(k))
    plt.show()
Ejemplo n.º 6
0
def classify_and_plot_grid(cities, k=1):

    plots = {"Java": ([], []), "Python": ([], []), "R": ([], [])}
    markers = {"Java": "o", "Python": "s", "R": "^"}
    colors = {"Java": "r", "Python": "b", "R": "g"}

    for city in cities:

        pred_lang = knn_classify(k, cities, city[0])
        plots[pred_lang][0].append(city[0][0])
        plots[pred_lang][1].append(city[0][1])

    # create a scatter series for each language
    # See above plot_cities() to plot your prediction.
    for language, (x, y) in plots.items():
        plt.scatter(x,
                    y,
                    color=colors[language],
                    marker=markers[language],
                    label=language,
                    zorder=10)

    plot_state_borders(plt)
    plt.legend(loc=0)  # let matplotlib choose the location
    plt.axis([-130, -60, 20, 55])  # set the axes
    plt.title("Predicted Programming Languages for k value of %d" % (k))
    plt.show()
Ejemplo n.º 7
0
def classify_and_plot_grid(cities, k=1):

    plots = { "Java" : ([], []), "Python" : ([], []), "R" : ([], []) }

    # we want each language to have a different marker and color

    markers = { "Java" : "o", "Python" : "s", "R" : "^" }

    colors  = { "Java" : "r", "Python" : "b", "R" : "g" }
    for i in range(-130,-60):
        for j in range (20,55):
            lang = knn_classify(k,cities,[i,j])
            plots[lang][0].append(i)
            plots[lang][1].append(j)
        
    for lang, (i, j) in plots.items():

        plt.scatter(i, j, color=colors[lang], marker=markers[lang],

                          label=lang, zorder=0)



    plot_state_borders(plt, color='black')    # assume we have a function that does this

    plt.legend(loc=0)          # let matplotlib choose the location

    plt.axis([-130,-60,20,55]) # set the axes

    plt.title(str(k) + "-Nearest Neighbor Programming Languages")

    plt.show()


    """
Ejemplo n.º 8
0
    def predict_with_knn(self, knn_k_value, d, wfunc):
        """
        return d's predicted label
        wfunc = weight function = tfbdc, ...
        """
        logging.info('Weighting docvec')

        # remove terms that in the test-corpus however not in train-corpus
        whitelst = [t for t in d.terms if t in self.terms()]

        dw = [self.weight(t, d, wfunc) for t in whitelst] # weighted copy

        """
        weighted vectors of train data
        0. strip unused terms. only respect those terms occurs in
            docvec_to_predict
        1. weight with bdc
        """
        twv = [[self.weight(t, doc, wfunc) for t in whitelst]
            for doc in self.DOCVECS]

        labels = [d.label for d in self.DOCVECS]

        logging.info('Using KNN to classify')
        return knn.knn_classify(5, dw, twv, labels)
Ejemplo n.º 9
0
def classify_and_plot_grid(cities, k=1):
    """
    TODO
    Classify and plot for Python, Java, and R languages.
    """
    plots = { "Java" : ([], []), "Python" : ([], []), "R" : ([], []) }
    markers = { "Java" : "o", "Python" : "s", "R" : "^" }
    colors  = { "Java" : "r", "Python" : "b", "R" : "g" }

  

    for i in range(-130,-59):
        for j in range(20,56):
            pred=knn.knn_classify(k,cities,(i,j))
            plots[pred][0].append(i)
            plots[pred][1].append(j)

    
    
    for language, (x, y) in plots.items():
        plt.scatter(x, y, color=colors[language], marker=markers[language],
                          label=language, zorder=10)

    plot_state_borders(plt)    

    plt.legend(loc=0)          
    plt.axis([-130,-60,20,55]) 
    plt.title("Favorite Programming Languages")
    plt.show()
Ejemplo n.º 10
0
def classify_and_plot_grid(cities, k=1):
    """
    TODO
    Classify and plot for Python, Java, and R languages.
    """
    java_x = []
    java_y = []
    python_x = []
    python_y = []
    r_x = []
    r_y = []
    for i in cities:
        val = knn_classify(k, cities, i[0])
        if (val == "Java"):
            java_x.append(i[0][0])
            java_y.append(i[0][1])
        elif (val == "Python"):
            python_x.append(i[0][0])
            python_y.append(i[0][1])
        else:
            r_x.append(i[0][0])
            r_y.append(i[0][1])
    plots = {
        "Java": ([java_x], [java_y]),
        "Python": ([python_x], [python_y]),
        "R": ([r_x], [r_y])
    }
    markers = {"Java": "o", "Python": "s", "R": "^"}
    colors = {"Java": "r", "Python": "b", "R": "g"}
    cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
    cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
    # Predict preferred language for each city using knn_classify() from knn.py.
    # longitude range (-130, -60)
    # latitude in range (20, 55)
    # Save the coordinate of prediction result in plots variable.
    # TODO

    for language, (x, y) in plots.items():
        plt.scatter(x,
                    y,
                    color=colors[language],
                    marker=markers[language],
                    label=language,
                    zorder=10)
    plt.title('for {} neighbours'.format(k))
    # create a scatter series for each language
    # See above plot_cities() to plot your prediction.
    # TODO
    plt.show()
Ejemplo n.º 11
0
    def __real_sample_initialization(self, training_data, target_labels, k, p):
        """
        Initializes the weights for the neural network by using real data samples
            as initial weights. The samples of which the values are going to be used as
            a neuron's initial weights are checked to make sure they are not misclassified
            against their k-nearest neighbours. If knn classification is not viable in the
            dataset then the weights are initialized randomly.
        """
        
        neuron_counts =  [neuron for neuron in self.neurons_per_class]
        weights_uninitialized = sum([neuron for neuron in neuron_counts])
        class_count = self.class_count
        neuron_weights = []
        neuron_labels = []
        rows = training_data.shape[0]
        
        #for each vector
        for i in range(rows):
            #for each label
            for j in range(class_count):
                #find the index of the label value in the label list
                if(target_labels[i] == self.class_labels[j]):
                    #if more neurons are needed for this class
                    if(neuron_counts[j] > 0):
                        #get label from the majority of knn of training_data[i]
                        knn_label = knn.knn_classify(training_data[i], training_data, target_labels, self.class_labels, k)   
                        #if the majority of the k-nearest-neighbours have the same class as the true class of the training data sample
                        if(knn_label == target_labels[i]):
                            #Initialize neuron weights based on the training data sample
                            neuron_weights.append(training_data[i])
                            neuron_labels.append(target_labels[i])
                            weights_uninitialized -= 1
                            neuron_counts[j] -= 1
                    break

        
        #check if all weights were initialized
        if(weights_uninitialized != 0):
            print("Using real samples as initial weights was not possible. Random initial weights will be used...\n")
            self.__random_weight_initialization(training_data.shape[1])
        else:
            self.neuron_weights = np.array(neuron_weights)
            self.neuron_labels = np.array(neuron_labels)
Ejemplo n.º 12
0
def classify_and_plot_grid(cities, k=1):
    """
    TODO
    Classify and plot for Python, Java, and R languages.
    """
    plots = {"Java": ([], []), "Python": ([], []), "R": ([], [])}
    markers = {"Java": "o", "Python": "s", "R": "^"}
    colors = {"Java": "r", "Python": "b", "R": "g"}

    # Predict preferred language for each city using knn_classify() from knn.py.
    # longitude range (-130, -60)
    # latitude in range (20, 55)
    # Save the coordinate of prediction result in plots variable.
    # TODO
    from knn import knn_classify
    pred = []
    for city in cities:
        pred.append((city[0],
                     knn_classify(k, [item for item in cities if item != city],
                                  city[0])))

    for (longitude, latitude), language in pred:
        plots[language][0].append(longitude)
        plots[language][1].append(latitude)

    # create a scatter series for each language
    # See above plot_cities() to plot your prediction.
    # TODO
    for language, (x, y) in plots.items():
        plt.scatter(x,
                    y,
                    color=colors[language],
                    marker=markers[language],
                    label=language,
                    zorder=10)

    plot_state_borders(plt)  # assume we have a function that does this

    plt.legend(loc=0)  # let matplotlib choose the location
    plt.axis([-130, -60, 20, 55])  # set the axes
    title = "Favorite Programming Languages, " + str(k) + " neighbor[s]"
    plt.title(title)
    plt.show()
Ejemplo n.º 13
0
def classify_and_plot_grid(cities, k=1):
    """
    TODO
    Classify and plot for Python, Java, and R languages.
    """
    plots = { "Java" : ([], []), "Python" : ([], []), "R" : ([], []) }
    markers = { "Java" : "o", "Python" : "s", "R" : "^" }
    colors  = { "Java" : "r", "Python" : "b", "R" : "g" }

    ans = []
    
    for l in range(-130,-60):
        for la in range(20,55):
            ans = knn_classify(k,cities,(l,la))
            plots[ans][0].append(l)
            plots[ans][1].append(la)
    
    for language, (x, y) in plots.items():
        plt.scatter(x, y, color=colors[language], marker=markers[language],
                          label=language, zorder=10)
    plt.show()
Ejemplo n.º 14
0
def classify_and_plot_grid(cities, k=1):
    """
    TODO
    Classify and plot for Python, Java, and R languages.
    """
    plots = {"Java": ([], []), "Python": ([], []), "R": ([], [])}
    markers = {"Java": "o", "Python": "s", "R": "^"}
    colors = {"Java": "r", "Python": "b", "R": "g"}

    # Predict preferred language for each city using knn_classify() from knn.py.
    # longitude range (-130, -60)
    # latitude in range (20, 55)
    # Save the coordinate of prediction result in plots variable.
    # TODO
    from knn import knn_classify

    for log in range(-130, -60):
        for lat in range(20, 55):
            language = knn_classify(k, cities, (log, lat))
            plots[language][0].append(log)
            plots[language][1].append(lat)

    # create a scatter series for each language
    # See above plot_cities() to plot your prediction.
    # TODO
    # create a scatter series for each language
    for language, (x, y) in plots.items():
        plt.scatter(x, y, color=colors[language], marker=markers[language],
                    label=language, zorder=10)

    plot_state_borders(plt) 

    plt.axis([-130, -60, 20, 55])
    plt.title("Predicted Preferred Language by {} - NN".format(k))
    plt.legend(loc=1) 
    plt.show()
Ejemplo n.º 15
0
def classify_and_plot_grid(cities, k=1):

    plots = {"Java": ([], []), "Python": ([], []), "R": ([], [])}
    markers = {"Java": "o", "Python": "s", "R": "^"}
    colors = {"Java": "r", "Python": "b", "R": "g"}

    for longitude in range(-130, -60):
        for latitude in range(20, 55):
            predicted_language = knn_classify(k, cities, [longitude, latitude])
            plots[predicted_language][0].append(longitude)
            plots[predicted_language][1].append(latitude)

    for language, (x, y) in plots.items():
        plt.scatter(x,
                    y,
                    color=colors[language],
                    marker=markers[language],
                    label=language,
                    zorder=0)

    plt.legend(loc=0)
    plt.axis([-130, -60, 20, 55])
    plt.title(str(k) + "K - value")
    plt.show()
Ejemplo n.º 16
0
def classify_and_plot_grid(cities, k=1):
    plots = {"Java": ([], []), "Python": ([], []), "R": ([], [])}
    markers = {"Java": "o", "Python": "s", "R": "^"}
    colors = {"Java": "r", "Python": "b", "R": "g"}

    for x in range(-130, -60):
        for y in range(20, 55):
            city = [x, y]
            y_predit = knn_classify(k, cities, city)
            plots[y_predit][0].append(x)
            plots[y_predit][1].append(y)

    for language, (x, y) in plots.items():
        plt.scatter(x,
                    y,
                    color=colors[language],
                    marker=markers[language],
                    label=language,
                    zorder=10)

    plt.legend(loc=0)
    plt.axis([-130, -60, 20, 55])
    plt.title(f"Favorite Programming Languages with k: {k}")
    plt.show()
Ejemplo n.º 17
0
def classify_and_plot_grid(cities, k=1):
    """
    TODO
    Classify and plot for Python, Java, and R languages.
    """
    plots = {"Java": ([], []), "Python": ([], []), "R": ([], [])}
    markers = {"Java": "o", "Python": "s", "R": "^"}
    colors = {"Java": "r", "Python": "b", "R": "g"}

    # Predict preferred language for each city using knn_classify() from knn.py.
    # longitude range (-130, -60)
    # latitude in range (20, 55)
    # Save the coordinate of prediction result in plots variable.
    mycities = []
    from knn import knn_classify
    for longitude in range(-130, 60):
        for latitude in range(20, 55):
            lat_long = (longitude, latitude)
            prediction = knn_classify(k, cities, lat_long)

            city_test = [lat_long, prediction]
            mycities.append(city_test)

    plot_cities(mycities)
Ejemplo n.º 18
0
def classify_and_plot_grid(cities, k=1):
    """
    TODO
    Classify and plot for Python, Java, and R languages.
    """
    java_x=[]
    java_y=[]
    python_x=[]
    python_y=[]
    r_x=[]
    r_y=[]
    for i in cities:
        val = knn_classify(k,cities,i[0])
        if(val=="Java"):
            java_x.append(i[0][0])
            java_y.append(i[0][1])
        elif(val=="Python"):
            python_x.append(i[0][0])
            python_y.append(i[0][1])
        else:
            r_x.append(i[0][0])
            r_y.append(i[0][1])
    plots = { "Java" : ([java_x], [java_y]), "Python" : ([python_x], [python_y]), "R" : ([r_x], [r_y]) }
    markers = { "Java" : "o", "Python" : "s", "R" : "^" }
    colors  = { "Java" : "r", "Python" : "b", "R" : "g" }
    cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
    cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
    
    
    
    for language, (x, y) in plots.items():
        plt.scatter(x, y, color=colors[language], marker=markers[language],
                          label=language, zorder=10)
    plt.title('for {} neighbours'.format(k))
   
    plt.show()
Ejemplo n.º 19
0
# Bayesian classification with parzen estimate
h = 0.3
px_given_1_parzen = parzen.parzen_estimate(x_samples_1, x_test, h)
px_given_2_parzen = parzen.parzen_estimate(x_samples_2, x_test, h)
label_test_parzen = by.bayesian_classify(px_given_1_parzen, px_given_2_parzen)
myplt.plot_with_labels(
    x_test, label_real, label_test_parzen,
    'Clasificacion bayesiana con estimacion Parzen, h=' + str(h))
err_parzen = err.get_error(label_real, label_test_parzen)
print('Error with parzen is: ' + str(err_parzen))

# Bayesian classification with knn estimate
for k in k_list:
    px_given_1_knn = knn.knn_estimate(k, x_samples_1, x_test)
    px_given_2_knn = knn.knn_estimate(k, x_samples_2, x_test)
    label_test_knn = by.bayesian_classify(px_given_1_knn, px_given_2_knn)
    myplt.plot_with_labels(
        x_test, label_real, label_test_knn,
        'Clasificacion bayesiana con estimacion KNN, k=' + str(k))
    err_knn = err.get_error(label_real, label_test_knn)
    print('Error with knn is: ' + str(err_knn) + " k=" + str(k))

print('ITEM e) knn classifier')
k_list = [1, 11, 51]
for k in k_list:
    label_test_knnclas = knn.knn_classify(k, x_samples_1, x_samples_2, x_test)
    err_knnclas = err.get_error(label_real, label_test_knnclas)
    myplt.plot_with_labels(x_test, label_real, label_test_knnclas,
                           'Clasificacion de KNN, k=' + str(k))
    print('Error with knn classify is: ' + str(err_knnclas) + " k=" + str(k))
Ejemplo n.º 20
0
from knn import knn_classify



# Create dataset
cities = [(-86.75,33.5666666666667,'Python'),(-88.25,30.6833333333333,'Python'),(-112.016666666667,33.4333333333333,'Java'),(-110.933333333333,32.1166666666667,'Java'),(-92.2333333333333,34.7333333333333,'R'),(-121.95,37.7,'R'),(-118.15,33.8166666666667,'Python'),(-118.233333333333,34.05,'Java'),(-122.316666666667,37.8166666666667,'R'),(-117.6,34.05,'Python'),(-116.533333333333,33.8166666666667,'Python'),(-121.5,38.5166666666667,'R'),(-117.166666666667,32.7333333333333,'R'),(-122.383333333333,37.6166666666667,'R'),(-121.933333333333,37.3666666666667,'R'),(-122.016666666667,36.9833333333333,'Python'),(-104.716666666667,38.8166666666667,'Python'),(-104.866666666667,39.75,'Python'),(-72.65,41.7333333333333,'R'),(-75.6,39.6666666666667,'Python'),(-77.0333333333333,38.85,'Python'),(-80.2666666666667,25.8,'Java'),(-81.3833333333333,28.55,'Java'),(-82.5333333333333,27.9666666666667,'Java'),(-84.4333333333333,33.65,'Python'),(-116.216666666667,43.5666666666667,'Python'),(-87.75,41.7833333333333,'Java'),(-86.2833333333333,39.7333333333333,'Java'),(-93.65,41.5333333333333,'Java'),(-97.4166666666667,37.65,'Java'),(-85.7333333333333,38.1833333333333,'Python'),(-90.25,29.9833333333333,'Java'),(-70.3166666666667,43.65,'R'),(-76.6666666666667,39.1833333333333,'R'),(-71.0333333333333,42.3666666666667,'R'),(-72.5333333333333,42.2,'R'),(-83.0166666666667,42.4166666666667,'Python'),(-84.6,42.7833333333333,'Python'),(-93.2166666666667,44.8833333333333,'Python'),(-90.0833333333333,32.3166666666667,'Java'),(-94.5833333333333,39.1166666666667,'Java'),(-90.3833333333333,38.75,'Python'),(-108.533333333333,45.8,'Python'),(-95.9,41.3,'Python'),(-115.166666666667,36.0833333333333,'Java'),(-71.4333333333333,42.9333333333333,'R'),(-74.1666666666667,40.7,'R'),(-106.616666666667,35.05,'Python'),(-78.7333333333333,42.9333333333333,'R'),(-73.9666666666667,40.7833333333333,'R'),(-80.9333333333333,35.2166666666667,'Python'),(-78.7833333333333,35.8666666666667,'Python'),(-100.75,46.7666666666667,'Java'),(-84.5166666666667,39.15,'Java'),(-81.85,41.4,'Java'),(-82.8833333333333,40,'Java'),(-97.6,35.4,'Python'),(-122.666666666667,45.5333333333333,'Python'),(-75.25,39.8833333333333,'Python'),(-80.2166666666667,40.5,'Python'),(-71.4333333333333,41.7333333333333,'R'),(-81.1166666666667,33.95,'R'),(-96.7333333333333,43.5666666666667,'Python'),(-90,35.05,'R'),(-86.6833333333333,36.1166666666667,'R'),(-97.7,30.3,'Python'),(-96.85,32.85,'Java'),(-95.35,29.9666666666667,'Java'),(-98.4666666666667,29.5333333333333,'Java'),(-111.966666666667,40.7666666666667,'Python'),(-73.15,44.4666666666667,'R'),(-77.3333333333333,37.5,'Python'),(-122.3,47.5333333333333,'Python'),(-89.3333333333333,43.1333333333333,'R'),(-104.816666666667,41.15,'Java')]

#create a tuple with a comprehension
cities = [([longitude, latitude], language) for longitude, latitude, language in cities]

point_to_classify = [1 , 1]

print knn_classify(3, cities, point_to_classify)