# import sys # sys.path.append('C:\\Users\\Moritz Eck\\code\\fs18\\intro_ml\\shared') import reader from sklearn.linear_model import Ridge from sklearn.metrics import mean_squared_error from sklearn.model_selection import KFold, RepeatedKFold, train_test_split # FILE PATH: TRAINING FILE FILE_PATH_TRAIN = "train1a.csv" TEST_SIZE = 0.2 # alpha parameter values alphas = [0.1, 1.0, 10.0, 100.0, 1000.0] # training data data = reader.read_csv(FILE_PATH_TRAIN, show_info=False) # drop the first column data = pd.DataFrame.drop(data, columns='Id') # x-values X = pd.DataFrame( data, columns=['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9', 'x10'], copy=True) X = X.values # y-values Y = pd.DataFrame(data, columns=['y'], copy=True) Y = Y['y'].values
#!/usr/bin/python import sys from zip import unzip from network import dl_file from reader import read_csv, read_last_line from pprint import pprint # variables url = "http://www.bclc.com/documents/DownloadableNumbers/CSV/649.zip" data_path = "data/" file_name = "649" # download zip file and save dl_file(url, data_path, file_name) # unzip to csv unzip(data_path, file_name) # read csv #read_last_line(data_path, file_name) reader = read_csv(data_path, file_name)
import reader from sklearn.linear_model import ElasticNetCV from sklearn.metrics import mean_squared_error from sklearn.model_selection import KFold, train_test_split # file path constants FILE_PATH_TRAIN = "train1b.csv" # feature transformation functions functions = [ lambda x: float(x), lambda x: math.pow(x, 2), lambda x: math.exp(x), lambda x: math.cos(x), lambda x: float(1) ] # read input data data = reader.read_csv(FILE_PATH_TRAIN, False) # drop the first column data = pd.DataFrame.drop(data, columns='Id') # splitting the data set into x & y values # y-values Y = pd.DataFrame(data, columns=['y'], copy=True) Y = Y['y'].values # x-values X = pd.DataFrame(data, columns=['x1', 'x2', 'x3', 'x4', 'x5'], copy=True) # new data frames for feature transformations quad = pd.DataFrame(data, columns=['x6', 'x7', 'x8', 'x9', 'x10']) exp = pd.DataFrame(data, columns=['x11', 'x12', 'x13', 'x14', 'x15'])
from id3 import ID3 from reader import read_csv from c45 import C45 from c45_numeric_handler import process_numeric from Rule import Rule if __name__ == "__main__": data = read_csv('Bagian B/datasets/iris.csv') # print(data) label = data[0, 0:-1].tolist() x = data[1:, 0:-1] target = data[1:, -1:].flatten() # print(label) # print(x) # print(target) # ID3 print("=====ID 3=====") id3 = ID3() id3.label = label id3.fit(x, target) # print(id3.tree) # C45 print("=====C45=====") c45 = C45() c45.label = label # print(x) # print(target) c45.fit(x, target) # print(c45.tree)
from sklearn.preprocessing import StandardScaler import numpy as np import pandas as pd import math import sys # personal csv reader module import reader FILE_PATH_TRAIN = "./input/train.csv" FILE_PATH_TEST = "./input/test.csv" TEST_SIZE = 0.225 # read training file test_data = reader.read_csv(FILE_PATH_TEST, show_info=False) training_data = reader.read_csv(FILE_PATH_TRAIN, show_info=False) # splitting the training data set into x and y components data_columns = [ 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9', 'x10', 'x11', 'x12', 'x13', 'x14', 'x15', 'x16' ] # test data # extracting the x-values x_values_test = test_data[data_columns] x_values_test = x_values_test.values # training data # extracting the x-values
for i in range(len(ruleset)): subs = powerset(ruleset[i]['rules']) max_sub_acc = 0 max_sub = None for sub in subs: ruleset[i]['rules'] = sub temp_acc = c45.predict_from_rule_set(x_test, y_test, ruleset, label, default_value) if max_sub_acc < temp_acc: max_sub_acc = temp_acc max_sub = sub ruleset[i]['rules'] = max_sub if __name__ == "__main__": data = read_csv('play_tennis.csv') label = data[0, 1:-1].tolist() training_label = label.copy() x = data[1:, 1:-1] target = data[1:, -1:].flatten() x_test, y_test, x_train, y_train = C45.train_test_split(x, target) c45 = C45() c45.tree = c45.fit(x_train, label, y_train) default_value = mode(target) print(default_value) print(c45.tree) ruleset = c45.tree.to_rule_list() print(x_test) print(ruleset) c45.prune(x_test, y_test, ruleset, label, default_value)
# filename prefixes FILE_PREFIX_XGB = "../output/xgb/task2_xgb_nativ_[" FILE_PREFIX_MLP = "../output/mlp_lbgfs/task2_mlp_lbgfs_[" # filename suffix FILE_SUFFIX = "].csv" # last file index e.g. [5] INDEX_LAST_FILE = 6 # read training file files = [] # read all existing files for i in range(1, INDEX_LAST_FILE + 1): files.append(reader.read_csv(FILE_PREFIX_MLP + str(i) + FILE_SUFFIX, False)) rows = [] # create a list for all values of the same row for i in range(0, 3000): row = [] for file in files: row.append(file.iloc[i][1]) rows.append(row) average_values = [] for i in range(0, 3000):
import reader as r import sys as s def plot_it(x, size): y = x[1:] + [x[-1]] plt.scatter(x, y, marker='p') plt.show() return True def histogram(x, y, xlabel, ylabel): plt.plot(x, y, color='red') plt.bar(x, y, width=0.5) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.show() return True if __name__ == '__main__': try: assert (len(s.argv) >= 2) except: print("Please insert file name") print("May be specified is reside in your system") x = r.read_csv(s.argv[1]) plot_it(x, len(x)) x, y = frequency_calc(x, int(s.argv[2])) histogram(x, y, "x_axis", "y_axis")
def tessplot_csv(fname, ax=None, time_format='BJD_TDB'): time, y, yerr = read_csv(fname) return tessplot(time, y, yerr, ax=ax, time_format=time_format)
def brokenplot_csv(fname, dt=10, ax=None, time_format='BJD_TDB'): time, y, yerr = read_csv(fname) return brokenplot(time, y, yerr, dt=dt, ax=ax, time_format=time_format)
#from stock import Stock import reader class Stock(object): ... def __eq__(self, other): return ((self.name, self.shares, self.price) == (other.name, other.shares, other.price)) def __lt__(self, other): return ((self.name, self.shares, self.price) < (other.name, other.shares, other.price)) def __le__(self, other): return ((self.name, self.shares, self.price) <= (other.name, other.shares, other.price)) #s = MyStock('GOOG', 100, 490.1) #print (s) portfolio = reader.read_csv(stock.Stock, 'C:\\Users\\kathiria\\Desktop\\Advanced Python\\pythonmaster\\pythonmaster\\Data\\portfolio.csv') print("*"*50) print(portfolio) print("*"*50) portfolio.sort()
from sklearn.metrics import accuracy_score import numpy as np import pandas as pd import reader # file paths XGB_CMP_FILEPATH = "../output/xgb/average/task2_xgb_nativ_av[1-6].csv" MLP_CMP_FILEPATH = "../output/mlp_lbgfs/average/task2_mlp_lbgfs_av[1-6].csv" # read comparison files xgb_cmp_file = reader.read_csv(XGB_CMP_FILEPATH, False)['y'].values mlp_cmp_file = reader.read_csv(MLP_CMP_FILEPATH, False)['y'].values # filename prefixes FILE_PREFIX_XGB = "../output/xgb/task2_xgb_nativ_[" FILE_PREFIX_MLP = "../output/mlp_lbgfs/task2_mlp_lbgfs_[" # filename suffix FILE_SUFFIX = "].csv" # read training file xgb_files = [] mlp_files = [] # read all existing xgb files for i in range(1, 9): xgb_files.append( reader.read_csv(FILE_PREFIX_XGB + str(i) + FILE_SUFFIX, False)['y'].values)
import calulation as c from sys import version_info import csv import reader as r if __name__ == '__main__': if version_info[0] >= 3: file_name = input("Enter data file name : ") else: file_name = raw_input("Enter data file name :") interval = int(input("Enter interval : ")) #print("File name is :{} and interval is {}".format(file_name,interval)) try: ls = r.read_csv(file_name) except IOError as e: print(str(e)) res = c.uniform_chi_square_test(ls, len(ls), interval) r.print_tab(res)