Ejemplo n.º 1
0
# import sys
# sys.path.append('C:\\Users\\Moritz Eck\\code\\fs18\\intro_ml\\shared')
import reader
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold, RepeatedKFold, train_test_split

# FILE PATH: TRAINING FILE
FILE_PATH_TRAIN = "train1a.csv"
TEST_SIZE = 0.2

# alpha parameter values
alphas = [0.1, 1.0, 10.0, 100.0, 1000.0]

# training data
data = reader.read_csv(FILE_PATH_TRAIN, show_info=False)

# drop the first column
data = pd.DataFrame.drop(data, columns='Id')

# x-values
X = pd.DataFrame(
    data,
    columns=['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9', 'x10'],
    copy=True)
X = X.values

# y-values
Y = pd.DataFrame(data, columns=['y'], copy=True)
Y = Y['y'].values
Ejemplo n.º 2
0
#!/usr/bin/python

import sys
from zip import unzip
from network import dl_file
from reader import read_csv, read_last_line
from pprint import pprint

# variables
url = "http://www.bclc.com/documents/DownloadableNumbers/CSV/649.zip"
data_path = "data/"
file_name = "649"

# download zip file and save
dl_file(url, data_path, file_name)

# unzip to csv
unzip(data_path, file_name)

# read csv
#read_last_line(data_path, file_name)
reader = read_csv(data_path, file_name)
Ejemplo n.º 3
0
import reader
from sklearn.linear_model import ElasticNetCV
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold, train_test_split

# file path constants
FILE_PATH_TRAIN = "train1b.csv"

# feature transformation functions
functions = [
    lambda x: float(x), lambda x: math.pow(x, 2), lambda x: math.exp(x),
    lambda x: math.cos(x), lambda x: float(1)
]

# read input data
data = reader.read_csv(FILE_PATH_TRAIN, False)

# drop the first column
data = pd.DataFrame.drop(data, columns='Id')

# splitting the data set into x & y values
# y-values
Y = pd.DataFrame(data, columns=['y'], copy=True)
Y = Y['y'].values

# x-values
X = pd.DataFrame(data, columns=['x1', 'x2', 'x3', 'x4', 'x5'], copy=True)

# new data frames for feature transformations
quad = pd.DataFrame(data, columns=['x6', 'x7', 'x8', 'x9', 'x10'])
exp = pd.DataFrame(data, columns=['x11', 'x12', 'x13', 'x14', 'x15'])
Ejemplo n.º 4
0
from id3 import ID3
from reader import read_csv
from c45 import C45
from c45_numeric_handler import process_numeric
from Rule import Rule

if __name__ == "__main__":
    data = read_csv('Bagian B/datasets/iris.csv')
    # print(data)
    label = data[0, 0:-1].tolist()
    x = data[1:, 0:-1]
    target = data[1:, -1:].flatten()
    # print(label)
    # print(x)
    # print(target)

    # ID3
    print("=====ID 3=====")
    id3 = ID3()
    id3.label = label
    id3.fit(x, target)
    # print(id3.tree)

    # C45
    print("=====C45=====")
    c45 = C45()
    c45.label = label
    # print(x)
    # print(target)
    c45.fit(x, target)
    # print(c45.tree)
Ejemplo n.º 5
0
from sklearn.preprocessing import StandardScaler

import numpy as np
import pandas as pd
import math
import sys

# personal csv reader module
import reader

FILE_PATH_TRAIN = "./input/train.csv"
FILE_PATH_TEST = "./input/test.csv"
TEST_SIZE = 0.225

# read training file
test_data = reader.read_csv(FILE_PATH_TEST, show_info=False)
training_data = reader.read_csv(FILE_PATH_TRAIN, show_info=False)

# splitting the training data set into x and y components
data_columns = [
    'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9', 'x10', 'x11', 'x12',
    'x13', 'x14', 'x15', 'x16'
]

# test data
# extracting the x-values
x_values_test = test_data[data_columns]
x_values_test = x_values_test.values

# training data
# extracting the x-values
Ejemplo n.º 6
0
        for i in range(len(ruleset)):
            subs = powerset(ruleset[i]['rules'])
            max_sub_acc = 0
            max_sub = None
            for sub in subs:
                ruleset[i]['rules'] = sub
                temp_acc = c45.predict_from_rule_set(x_test, y_test, ruleset,
                                                     label, default_value)
                if max_sub_acc < temp_acc:
                    max_sub_acc = temp_acc
                    max_sub = sub
            ruleset[i]['rules'] = max_sub


if __name__ == "__main__":
    data = read_csv('play_tennis.csv')
    label = data[0, 1:-1].tolist()
    training_label = label.copy()
    x = data[1:, 1:-1]
    target = data[1:, -1:].flatten()

    x_test, y_test, x_train, y_train = C45.train_test_split(x, target)
    c45 = C45()
    c45.tree = c45.fit(x_train, label, y_train)
    default_value = mode(target)
    print(default_value)
    print(c45.tree)
    ruleset = c45.tree.to_rule_list()
    print(x_test)
    print(ruleset)
    c45.prune(x_test, y_test, ruleset, label, default_value)
Ejemplo n.º 7
0
# filename prefixes
FILE_PREFIX_XGB = "../output/xgb/task2_xgb_nativ_["
FILE_PREFIX_MLP = "../output/mlp_lbgfs/task2_mlp_lbgfs_["

# filename suffix
FILE_SUFFIX = "].csv"

# last file index e.g. [5]
INDEX_LAST_FILE = 6

# read training file
files = []

# read all existing files
for i in range(1, INDEX_LAST_FILE + 1):
    files.append(reader.read_csv(FILE_PREFIX_MLP + str(i) + FILE_SUFFIX, False))

rows = []

# create a list for all values of the same row
for i in range(0, 3000):
    row = []

    for file in files:
        row.append(file.iloc[i][1])

    rows.append(row)

average_values = []

for i in range(0, 3000):
Ejemplo n.º 8
0
import reader as r
import sys as s


def plot_it(x, size):
    y = x[1:] + [x[-1]]
    plt.scatter(x, y, marker='p')
    plt.show()
    return True


def histogram(x, y, xlabel, ylabel):
    plt.plot(x, y, color='red')
    plt.bar(x, y, width=0.5)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.show()
    return True


if __name__ == '__main__':
    try:
        assert (len(s.argv) >= 2)
    except:
        print("Please insert file name")
        print("May be specified is reside in your system")
    x = r.read_csv(s.argv[1])
    plot_it(x, len(x))
    x, y = frequency_calc(x, int(s.argv[2]))
    histogram(x, y, "x_axis", "y_axis")
Ejemplo n.º 9
0
def tessplot_csv(fname, ax=None, time_format='BJD_TDB'):
    time, y, yerr = read_csv(fname)
    return tessplot(time, y, yerr, ax=ax, time_format=time_format)
Ejemplo n.º 10
0
def brokenplot_csv(fname, dt=10, ax=None, time_format='BJD_TDB'):
    time, y, yerr = read_csv(fname)
    return brokenplot(time, y, yerr, dt=dt, ax=ax, time_format=time_format)
Ejemplo n.º 11
0
#from stock import Stock
import reader


class Stock(object):
    ...
    def __eq__(self, other):
        return ((self.name, self.shares, self.price) ==
                (other.name, other.shares, other.price))

    def __lt__(self, other):
        return ((self.name, self.shares, self.price) <
                (other.name, other.shares, other.price))

    def __le__(self, other):
        return ((self.name, self.shares, self.price) <=
                (other.name, other.shares, other.price))




#s = MyStock('GOOG', 100, 490.1)
#print (s)

portfolio = reader.read_csv(stock.Stock, 'C:\\Users\\kathiria\\Desktop\\Advanced Python\\pythonmaster\\pythonmaster\\Data\\portfolio.csv')
print("*"*50)
print(portfolio)
print("*"*50)
portfolio.sort()
Ejemplo n.º 12
0
from sklearn.metrics import accuracy_score

import numpy as np
import pandas as pd

import reader

# file paths
XGB_CMP_FILEPATH = "../output/xgb/average/task2_xgb_nativ_av[1-6].csv"
MLP_CMP_FILEPATH = "../output/mlp_lbgfs/average/task2_mlp_lbgfs_av[1-6].csv"

# read comparison files
xgb_cmp_file = reader.read_csv(XGB_CMP_FILEPATH, False)['y'].values
mlp_cmp_file = reader.read_csv(MLP_CMP_FILEPATH, False)['y'].values

# filename prefixes
FILE_PREFIX_XGB = "../output/xgb/task2_xgb_nativ_["
FILE_PREFIX_MLP = "../output/mlp_lbgfs/task2_mlp_lbgfs_["

# filename suffix
FILE_SUFFIX = "].csv"

# read training file
xgb_files = []
mlp_files = []

# read all existing xgb files
for i in range(1, 9):
    xgb_files.append(
        reader.read_csv(FILE_PREFIX_XGB + str(i) + FILE_SUFFIX,
                        False)['y'].values)
Ejemplo n.º 13
0
import calulation as c
from sys import version_info
import csv
import reader as r

if __name__ == '__main__':
    if version_info[0] >= 3:
        file_name = input("Enter data file name : ")
    else:
        file_name = raw_input("Enter data file name :")
    interval = int(input("Enter interval : "))
    #print("File name is :{} and interval is {}".format(file_name,interval))
    try:
        ls = r.read_csv(file_name)
    except IOError as e:
        print(str(e))
    res = c.uniform_chi_square_test(ls, len(ls), interval)
    r.print_tab(res)