Example #1
0
def test_naive_bayes_classifier_predict():
    train = [[1, 5], [2, 6], [1, 5], [1, 5], [1, 6], [2, 6], [1, 5], [1, 6]]
    y = ["yes", "yes", "no", "no", "yes", "no", "yes", "yes"]

    nb = MyNaiveBayesClassifier()
    nb.fit(train, y)

    pred = nb.predict([[1, 5]])

    assert pred == ["yes"]  # TODO: fix this
    # RQ5 (fake) iPhone purchases dataset
    iphone_col_names = [
        "standing", "job_status", "credit_rating", "buys_iphone"
    ]
    iphone_table = [[1, 3, "fair", "no"], [1, 3, "excellent", "no"],
                    [2, 3, "fair", "yes"], [2, 2, "fair", "yes"],
                    [2, 1, "fair", "yes"], [2, 1, "excellent", "no"],
                    [2, 1, "excellent", "yes"], [1, 2, "fair", "no"],
                    [1, 1, "fair", "yes"], [2, 2, "fair", "yes"],
                    [1, 2, "excellent", "yes"], [2, 2, "excellent", "yes"],
                    [2, 3, "fair", "yes"], [2, 2, "excellent", "no"],
                    [2, 3, "fair", "yes"]]
    mypy = MyPyTable(iphone_col_names, iphone_table)
    y2 = myutils.get_mypycol(mypy, "buys_iphone")
    nb2 = MyNaiveBayesClassifier()
    nb2.fit(iphone_table, y2)
    pred2 = nb2.predict([[1, 2, "fair"]])

    assert pred2 == ["yes"]

    # Bramer 3.2 train dataset
    train_col_names = ["day", "season", "wind", "rain", "class"]
    train_table = [["weekday", "spring", "none", "none", "on time"],
                   ["weekday", "winter", "none", "slight", "on time"],
                   ["weekday", "winter", "none", "slight", "on time"],
                   ["weekday", "winter", "high", "heavy", "late"],
                   ["saturday", "summer", "normal", "none", "on time"],
                   ["weekday", "autumn", "normal", "none", "very late"],
                   ["holiday", "summer", "high", "slight", "on time"],
                   ["sunday", "summer", "normal", "none", "on time"],
                   ["weekday", "winter", "high", "heavy", "very late"],
                   ["weekday", "summer", "none", "slight", "on time"],
                   ["saturday", "spring", "high", "heavy", "cancelled"],
                   ["weekday", "summer", "high", "slight", "on time"],
                   ["saturday", "winter", "normal", "none", "late"],
                   ["weekday", "summer", "high", "none", "on time"],
                   ["weekday", "winter", "normal", "heavy", "very late"],
                   ["saturday", "autumn", "high", "slight", "on time"],
                   ["weekday", "autumn", "none", "heavy", "on time"],
                   ["holiday", "spring", "normal", "slight", "on time"],
                   ["weekday", "spring", "normal", "none", "on time"],
                   ["weekday", "spring", "normal", "slight", "on time"]]
    mypy2 = MyPyTable(train_col_names, train_table)
    y3 = myutils.get_mypycol(mypy2, "class")
    nb3 = MyNaiveBayesClassifier()
    nb3.fit(train_table, y3)
    nb3.fit(train_table, y3)
    pred3 = nb3.predict([["weekday", "winter", "high", "heavy"]])

    assert pred3 == ["cancelled"]
def test_random_forest_fit():
    interview_header = ["level", "lang", "tweets", "phd", "interviewed_well"]
    interview_table = [["Senior", "Java", "no", "no", "False"],
                       ["Senior", "Java", "no", "yes", "False"],
                       ["Mid", "Python", "no", "no", "True"],
                       ["Junior", "Python", "no", "no", "True"],
                       ["Junior", "R", "yes", "no", "True"],
                       ["Junior", "R", "yes", "yes", "False"],
                       ["Mid", "R", "yes", "yes", "True"],
                       ["Senior", "Python", "no", "no", "False"],
                       ["Senior", "R", "yes", "no", "True"],
                       ["Junior", "Python", "yes", "no", "True"],
                       ["Senior", "Python", "yes", "yes", "True"],
                       ["Mid", "Python", "no", "yes", "True"],
                       ["Mid", "Java", "yes", "no", "True"],
                       ["Junior", "Python", "no", "yes", "False"]]
    myutils.prepend_attribute_label(interview_table, interview_header)

    interview_pytable = MyPyTable(column_names=interview_header,
                                  data=interview_table)
    y_col = interview_pytable.get_column("interviewed_well", False)
    x_cols = interview_pytable.drop_col("interviewed_well")

    many_trees = MyRandomForestClassifier()
    X_sample, y_sample = myutils.compute_bootstrapped_sample(x_cols, y_col)
    X_train, X_test, y_train, y_test = myutils.train_test_split(
        X_sample, y_sample, .33)
    many_trees.fit(X_train, y_train, X_test, y_test)
    y_predicted = many_trees.predict(X_test)

    numCorrectPredictions = 0
    numWrongPredictions = 0
    for i in range(len(y_test)):
        values = [y_predicted[i], y_test[i]]  #predicted/actual
        if (values[0] == values[1]):
            numCorrectPredictions = numCorrectPredictions + 1
        else:
            numWrongPredictions = numWrongPredictions + 1

    accuracy = np.round((numCorrectPredictions) /
                        (numCorrectPredictions + numWrongPredictions), 3)
    error_rate = np.round(
        (numWrongPredictions) / (numCorrectPredictions + numWrongPredictions),
        3)

    print("-----------------------------------------------------------")
    print("Accuracy and Error Rate")
    print("-----------------------------------------------------------")
    print()
    print("Random Forest: accuracy = {}, error rate = {}".format(
        accuracy, error_rate))
    print()
    print(
        "Because of the random aspect of this classifier, this will not always pass the tests"
    )
    print()
    print("Predicted table: " + str(y_predicted))
    print("Testing set:     " + str(y_test))
    for i in range(len(y_test)):
        assert y_predicted[i] == y_test[i]
Example #3
0
def test_My_Random_Forest_Classifier_predict():
    # Object Declarations
    # Tests with N = 3, M = 2, F = 2 and seed = 1
    rand_forest_test = MyRandomForestClassifier(3, 2, 2, 1)
    table = MyPyTable()

    # Variable Assignment and Declaration
    table.data = interview_table
    table.column_names = interview_header

    y_train, X_train = [], []
    for inst in interview_table:
        y_train.append(inst[-1])
        X_train.append(inst[:-1])

    # Sets X_test
    X_test = [["Junior", "Java", "yes", "no"],
              ["Junior", "Java", "yes", "yes"]]

    # Tests on the Interview Dataset
    rand_forest_test.header = interview_header[:-1]
    rand_forest_test.fit(X_train, y_train)
    y_predicted = rand_forest_test.predict(X_test)

    print("y_predicted:", y_predicted)

    # Trace Test

    assert y_predicted == ['True', 'False']
Example #4
0
def confusionCategorical(yTrue, yTest, header, categories):
    table = MyPyTable()
    table.column_names = header
    table.data = []

    for val in categories:
        newRow = [val]
        for i in range(len(header) - 1):
            newRow.append(0)
        table.data.append(newRow)

    for i in range(len(yTrue)):
        rowIndex = categories.index(yTrue[i])
        colIndex = header.index(yTest[i])
        table.data[rowIndex][colIndex] += 1

    for row in table.data:
        total = 0
        for i in range(1, len(categories) + 1):
            total += row[i]
        row[len(categories) + 1] = total

    for i in range(len(table.data)):
        if table.data[i][len(categories) + 1] != 0:
            recognition = table.data[i][i +
                                        1] / table.data[i][len(categories) + 1]
            table.data[i][len(header) - 1] = round(100 * recognition, 2)
    return table
def table_setUp(file_name):
    """
    """
    
    file_path = os.path.join("input_data", file_name)

    # Inputs data from file into the table 
    table = MyPyTable().load_from_file(file_path)

    return table
Example #6
0
def bagging(X, Y, N, M, F):
    # 1. split your dataset into a test set and a "remainder set"
    x_remainder, x_test, y_r, y_test = myevaluation.train_test_split(X, Y)
    # 2. using the remainder set, sample N bootsrap samples and use each one to build a classifier
    #    for each N sample:
    #        ~63% of the remainder set will be sampled into training set
    #        ~37% will be leftover for this tree's validation set
    forest = []
    # accuracies = [[0] for i in range(N)]
    accuracies = {}
    for i in range(N):
        x_train, y_train = compute_bootstrapped_sample(
            x_remainder, y_r)  #get the bootstrap sample
        tree = my_class.MyDecisionTreeClassifier()
        tree.fit(x_train, y_train, True, F)  #build classifier
        # get remainder of x_train and use as validation set
        x_v = []
        y_v = []
        for j in range(len(x_remainder)):
            if x_remainder[j] not in x_train:
                x_v.append(x_remainder[j])
                y_v.append(y_r[j])
        pred = tree.predict(x_v)
        accuracy = get_accuracy(y_v, pred)
        accuracies[str(i)] = accuracy  # {i: accuracy, }
        forest.append(tree)

# 3. measure the performance of the tree on the validation set and select the best M of N
#   trees based on the performance metrics
    best_trees_dict = best_M(M, accuracies)
    best_trees = []
    for key in best_trees_dict:
        best_trees.append(forest[int(key)])
# 4. using majority voting, make predictions from the M learners for each instance in the test set
    all_predictions = []  # [[predictions1],[predictions2]...]
    for tree in best_trees:
        pred = tree.predict(x_test)
        all_predictions.append(pred)  #think about this like flipping a table
    #get the majority for every single row
    pred_header = build_header(
        all_predictions)  #turn all predictions into a mypy
    pred_mypy = MyPyTable(pred_header, all_predictions)
    voted_predictions = []
    for i in range(
            len(all_predictions[0])
    ):  #loop through every x_test, create a column of predictions, pick the pred by majority rule
        pred_col = pred_mypy.get_column(i)
        vals, counts = get_freq_str(pred_col)
        j = counts.index(max(counts))
        y_predict = vals[j]
        voted_predictions.append(y_predict)

    forest_accuracy = get_accuracy(y_test, voted_predictions)
    return best_trees, voted_predictions, forest_accuracy
Example #7
0
def test_random_forest_classifier_fit():
    mp_table = MyPyTable(interview_header, interview_table)
    # Formulate X_train and y_train
    y_train = mp_table.get_column('interviewed_well')
    X_train_col_names = ["level", "lang", "tweets", "phd"]
    X_train = mp_table.get_rows(X_train_col_names)

    myRF = MyRandomForestClassifier(N=4, M=2, F=4)
    myRF.fit(X_train, y_train)

    assert len(myRF.M_attr_sets) == myRF.M
Example #8
0
def combine_two_columns(column_names, col1, col2):
    """Creates a MyPyTable from two columns and their column names

    Args:
        column_names(list): List of string column names
        col1(list): List of values from first column
        col2(list): List of values from second column

    Returns:
        table(MyPyTable): Returned MyPyTable with two columns"""
    data = []
    for i in range(len(col1)):
        data.append([col1[i], col2[i]])

    table = MyPyTable(column_names, data)
    return table
Example #9
0
def test_My_Random_Forest_Classifier_fit():
    # Object Declarations
    # Tests with N = 3, M = 2, F = 2 and seed = 0
    rand_forest_test = MyRandomForestClassifier(3, 2, 2, 0)
    table = MyPyTable()

    # Variable Assignment and Declaration
    table.data = interview_table
    table.column_names = interview_header

    X_test = interview_table
    y_train = table.get_column("interviewed_well")

    # Tests on the Interview Dataset
    rand_forest_test.header = interview_header
    rand_forest_test.fit(X_test, y_train)

    trees = rand_forest_test.trees
def random_forest_predict(X_test, trees):
# 4. using majority voting, make predictions from the M learners for each instance in the test set
    all_predictions = [] # [[predictions1],[predictions2]...]
    for tree in trees:
        pred = tree.predict(X_test)
        all_predictions.append(pred) #think about this like flipping a table
    #get the majority for every single row
    pred_header = build_header(all_predictions) #turn all predictions into a mypy
    pred_mypy = MyPyTable(pred_header, all_predictions)
    voted_predictions = []
    for i in range(len(all_predictions[0])): #loop through every x_test, create a column of predictions, pick the pred by majority rule
        pred_col = pred_mypy.get_column(i)
        vals, counts = get_freq_str(pred_col)
        j = counts.index(max(counts)) 
        y_predict = vals[j]
        voted_predictions.append(y_predict)

    # forest_accuracy = get_accuracy(y_test, voted_predictions)
    return voted_predictions
Example #11
0
def test_random_forest_classifier_predict():
    X_test = [["Mid", "Python", "no", "no", "True"],
              ["Mid", "R", "yes", "yes", "True"],
              ["Mid", "Python", "no", "yes", "True"]]

    y_test = ["True", "True", "True"]

    mp_table = MyPyTable(interview_header, interview_table)
    # Formulate X_train and y_train
    y_train = mp_table.get_column('interviewed_well')
    X_train_col_names = ["level", "lang", "tweets", "phd"]
    X_train = mp_table.get_rows(X_train_col_names)

    myRF = MyRandomForestClassifier(N=4, M=2, F=4)
    myRF.fit(X_train, y_train)
    predictions = myRF.predict(X_test)

    for i in range(0, len(predictions)):
        assert predictions[i] == y_test[i]
Example #12
0
def get_freq_str(col):

    header = ["y"]
    col_mypy = MyPyTable(header, col)

    dups = col_mypy.ordered_col(header)
    values = []
    counts = []

    for value in dups:
        if value not in values:
            # first time we have seen this value
            values.append(str(value))
            counts.append(1)
        else:
            # we have seen this value before
            counts[-1] += 1  # ok because the list is sorted

    return values, counts
Example #13
0
def compute_entropy(instances, available_attributes, index):
    mypy = MyPyTable(available_attributes, instances)
    classes = mypy.get_column(-1)
    attributes = mypy.get_column(index)
    temp = set(attributes)
    __, tables = group_by(attributes, classes)
    totals = []
    sub_entropies = []
    # get the class counts here
    for jj, element in enumerate(temp):
        totals.append(attributes.count(element))
        # parallel array of counts of each att for each class
        arr = []
        for table in tables:
            arr.append(table.count(element))
        su = 0
        for kk in arr:
            if kk <= 0:
                pass
            else:
                su -= kk / totals[jj] * math.log2(kk / totals[jj])
        su *= totals[jj] / len(attributes)
        sub_entropies.append(su)
    return sum(sub_entropies)
Example #14
0
    track_data = []
    popularity = track["track"]["popularity"]
    if popularity == 0:
        continue #skip any track with 0 popularity, because I'm unsure if this is a default value 
    else:
        #name = track["track"]["name"]
        #track_data.append(name) # will be ignored but could but each track_data_obj should be identifiable
        features_dict = sp.audio_features(track["track"]["id"]) #this  returns a features dictonary
        for key in features_dict[0]: #loop through and add only the attributes we want
            if key != "type" and key != "id" and key != "uri" and key != "track_href" and key != "analysis_url" and key != "time_signature" and key != "mode" and key != "key" and key != "loudness":
                val = features_dict[0][key]
                if key != "tempo" and key != "duration_ms":
                    val = myutils.percent_to_rating(val)
                track_data.append(val)
                # if first == True:
                #     header.append(key)
        # first = False
        pop_class = myutils.pop_rating(popularity)
        track_data.append(pop_class) # popularity will be the y_train
        track_data_objs.append(track_data)
# header.append("popularity")
# now we can turn this into an xtrain and ytrain or keep it stitched together 
# when dealing with the data we can delete the first col, which is the name identifier

print(len(track_data_objs))

header = ['danceability', 'energy', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'popularity']

tracks_mypy = MyPyTable(header, track_data_objs)
tracks_mypy.save_to_file("tracks_data.txt")
  
Example #15
0
from mysklearn.myclassifiers import MyNaiveBayesClassifier
import os
from mysklearn.mypytable import MyPyTable
import mysklearn.myevaluation as myevaluation
import mysklearn.myutils as myutils
import pickle

fname = os.path.join("input_data", "collisions.csv")
collisions_data = MyPyTable().load_from_file(fname)

weather = collisions_data.get_column('WEATHER')
road_condition = collisions_data.get_column('ROADCOND')
light_condition = collisions_data.get_column('LIGHTCOND')
junction_type = collisions_data.get_column('JUNCTIONTYPE')
severity = collisions_data.get_column('SEVERITYDESC')

X_train = [[
    weather[i], road_condition[i], light_condition[i], junction_type[i],
    severity[i]
] for i in range(len(weather))]
y_train = collisions_data.get_column('COLLISIONTYPE')

for i, val in enumerate(y_train):
    if val == 'Unknown':
        del y_train[i]
        del X_train[i]

strattrain_folds, strattest_folds = myevaluation.stratified_kfold_cross_validation(
    X_train, y_train, 10)
strat_xtrain, strat_ytrain, strat_xtest, strat_ytest = myutils.get_from_folds(
    X_train, y_train, strattrain_folds, strattest_folds)
Example #16
0
from mysklearn.myclassifiers import MyKNeighborsClassifier
import os
from mysklearn.mypytable import MyPyTable
import mysklearn.myevaluation as myeval
import mysklearn.myutils as myutils
import pickle

# Importing the data and table and cols
movies_fname = os.path.join("input_data", "movies.csv")
# movie_data = MyPyTable().load_from_file_no_encode(movies_fname)
movies_table = MyPyTable().load_from_file(movies_fname, encode='cp1252')

# Getting profit
gross_profit = [
    movies_table.get_column('gross')[i] - movies_table.get_column('budget')[i]
    for i in range(len(movies_table.data))
]
profitted = [0 if gross < 0 else 1 for gross in gross_profit]
movies_table.add_column(profitted, 'profitted')

# fit the KNN algorithm to the movies data
kn_class = MyKNeighborsClassifier()
feature_cols = [
    'budget', 'votes', 'genre', 'rating', 'score', 'star', 'director', 'writer'
]
features = movies_table.get_key_columns(feature_cols)
outcomes = profitted
kn_class.fit(features, outcomes)

packaged_object = kn_class
import pickle  # standard python library
from mysklearn.mypytable import MyPyTable
from mysklearn.myclassifiers import MyDecisionTreeClassifier, MyNaiveBayesClassifier
import mysklearn.myevaluation as myevaluation
import mysklearn.myutils as myutils
import os

# "pickle" an object (AKA object serialization)
# save a Python object to a binary file

# "unpickle" an object (AKA object de-serialization)
# load a Python object from a binary file (back into memory)

# Get data from csv file
table = MyPyTable().load_from_file(
    os.path.join("input_files", "winequality-red.csv"))
y_col = table.get_column("quality", False)
x_cols = table.drop_col("quality")

# Use Naive Bayes to classify
testcase = MyNaiveBayesClassifier()

#Returns x INDEXES
X_train, X_test = myevaluation.stratified_kfold_cross_validation(x_cols,
                                                                 y_col,
                                                                 n_splits=10)
X_train, X_test, y_train, y_test = myutils.getInstances(
    X_train, X_test, x_cols, y_col)

for i, fold in enumerate(X_train):
    train, test = myutils.normalize_values(X_train[i], X_test[i])
Example #18
0
def load_data(filename):
    data_path = os.path.join("input_data", filename)
    table = MyPyTable().load_from_file(data_path)
    
    return table
Example #19
0
def test_naive_bayes_classifier_fit():
    train = [[1, 5], [2, 6], [1, 5], [1, 5], [1, 6], [2, 6], [1, 5], [1, 6]]
    y = ["yes", "yes", "no", "no", "yes", "no", "yes", "yes"]

    nb = MyNaiveBayesClassifier()
    nb.fit(train, y)
    assert nb.priors == [["yes", 5 / 8], ["no", 3 / 8]]
    assert nb.posteriors == [[0, ['yes', ['1', 0.8], ['2', 0.2]], ['no', ['1', 2/3], ['2', 1/3]]], \
                            [1, ['yes', ['5', 0.4], ['6', 0.6]], ['no', ['5', 2/3], ['6', 1/3]]]]

    # RQ5 (fake) iPhone purchases dataset
    iphone_col_names = [
        "standing", "job_status", "credit_rating", "buys_iphone"
    ]
    iphone_table = [[1, 3, "fair", "no"], [1, 3, "excellent", "no"],
                    [2, 3, "fair", "yes"], [2, 2, "fair", "yes"],
                    [2, 1, "fair", "yes"], [2, 1, "excellent", "no"],
                    [2, 1, "excellent", "yes"], [1, 2, "fair", "no"],
                    [1, 1, "fair", "yes"], [2, 2, "fair", "yes"],
                    [1, 2, "excellent", "yes"], [2, 2, "excellent", "yes"],
                    [2, 3, "fair", "yes"], [2, 2, "excellent", "no"],
                    [2, 3, "fair", "yes"]]
    mypy = MyPyTable(iphone_col_names, iphone_table)
    y2 = myutils.get_mypycol(mypy, "buys_iphone")
    nb2 = MyNaiveBayesClassifier()
    nb2.fit(iphone_table, y2)
    assert nb2.priors == [["no", 1 / 3], ["yes", 2 / 3]]
    nb2_posts = [[
        0, ['no', ['1', 3 / 15], ['2', 2 / 15]],
        ['yes', ['1', 2 / 15], ['2', 8 / 15]]
    ],
                 [
                     1, ['no', ['3', 2 / 15], ['2', 2 / 15], ['1', 2 / 3]],
                     ['yes', ['3', 3 / 15], ['2', 4 / 15], ['1', 3 / 15]]
                 ],
                 [
                     2, ['no', ['fair', 2 / 15], ['excellent', 3 / 15]],
                     ['yes', ['fair', 7 / 15], ['excellent', 3 / 15]]
                 ],
                 [
                     3, ['no', ['no', 1 / 3], ['yes', 0.0]],
                     ['yes', ['no', 0.0], ['yes', 2 / 3]]
                 ]]
    # assert nb2.posteriors == nb2_posts
    # Bramer 3.2 train dataset
    train_col_names = ["day", "season", "wind", "rain", "class"]
    train_table = [["weekday", "spring", "none", "none", "on time"],
                   ["weekday", "winter", "none", "slight", "on time"],
                   ["weekday", "winter", "none", "slight", "on time"],
                   ["weekday", "winter", "high", "heavy", "late"],
                   ["saturday", "summer", "normal", "none", "on time"],
                   ["weekday", "autumn", "normal", "none", "very late"],
                   ["holiday", "summer", "high", "slight", "on time"],
                   ["sunday", "summer", "normal", "none", "on time"],
                   ["weekday", "winter", "high", "heavy", "very late"],
                   ["weekday", "summer", "none", "slight", "on time"],
                   ["saturday", "spring", "high", "heavy", "cancelled"],
                   ["weekday", "summer", "high", "slight", "on time"],
                   ["saturday", "winter", "normal", "none", "late"],
                   ["weekday", "summer", "high", "none", "on time"],
                   ["weekday", "winter", "normal", "heavy", "very late"],
                   ["saturday", "autumn", "high", "slight", "on time"],
                   ["weekday", "autumn", "none", "heavy", "on time"],
                   ["holiday", "spring", "normal", "slight", "on time"],
                   ["weekday", "spring", "normal", "none", "on time"],
                   ["weekday", "spring", "normal", "slight", "on time"]]
    mypy2 = MyPyTable(train_col_names, train_table)
    y3 = myutils.get_mypycol(mypy2, "class")
    nb3 = MyNaiveBayesClassifier()
    nb3.fit(iphone_table, y3)
Example #20
0
def load_data(filename):
    mypytable = MyPyTable()
    mypytable.load_from_file(filename)
    return mypytable
#heavily based on app from class
import pickle
from mysklearn.myclassifiers import MyRandomForestClassifier
from mysklearn.mypytable import MyPyTable
import os

fname = os.path.join("input_data", "tracks_data_backup.txt")
tracks = MyPyTable().load_from_file(fname)

Danceability = tracks.get_column('danceability')
Energy = tracks.get_column('energy')
Acousticness = tracks.get_column('acousticness')
Valence = tracks.get_column('valence')

y_train = Acousticness
x_train = [[Danceability[i], Energy[i], Valence[i]]
           for i in range(len(y_train))]

rf = MyRandomForestClassifier()
rf.fit(x_train, y_train, 20, 7, 2)
rf = MyRandomForestClassifier()
rf.fit(x_train, y_train, 30, 4, 2)
# serialize to file (pickle)
outfile = open("trees.p", "wb")
pickle.dump(rf.trees, outfile)
outfile.close()

# deserialize to object (unpickle)
infile = open("trees.p", "rb")
trees2 = pickle.load(infile)
infile.close()
Example #22
0
from mysklearn.mypytable import MyPyTable

# Object Declaration
table = MyPyTable()

# Trims the Dataset (Gets Data Based on City)
city = "Sydney"
table.load_from_file("weatherAUS.csv")
table.column_names[0] = 'Location'
names, tables = table.group_by("Location")

city_index = names.index(city)

print("\n")
for i in range(10):
    print(tables[city_index][i])

table.data = tables[city_index]

table.save_to_file(city+"_weather.csv")