def test_load_CSV_into_numpy(self): file_loader = FileLoader() file_path = "test_load_csv_into_numpy.csv" result = file_loader.load_file(file_path) expected = np.array([[1,2],[3,4]]) self.assertTrue(np.array_equal(result, expected))
def test_load_MergedCSV_into_numpy(self): file_loader = FileLoader() file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" result= file_loader.load_file(file_path) expected = np.zeros((37,397)) self.assertTrue(result.shape == expected.shape)
def __init__(self, path, part_size, stream_id): self.__part_size = part_size dp = DataProcessor() fl = FileLoader() data = fl.load_file(path) self.__size = len(data) self.__chunks = dp.split_data(data, part_size) debug('FileStreamer.init(): len(self.__chunks) == %d' % len(self.__chunks)) self.__stream_id = stream_id
def test_split_merge_csv_4_25_8(self): file_loader = FileLoader() data_manager = DataManager() file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" result = file_loader.load_file(file_path) data_manager.set_data(result) data_manager.split_data(test_split=0.11,train_split=0.22) test_shapes = np.zeros((4, 397)).shape valid_shapes = np.zeros((25,397)).shape train_shapes = np.zeros((8, 397)).shape expected = np.array([test_shapes, valid_shapes, train_shapes]) result = np.array([data_manager.datum[SplitTypes.Test].shape, data_manager.datum[SplitTypes.Valid].shape, data_manager.datum[SplitTypes.Train].shape]) self.assertTrue(np.array_equal(result, expected))
def test_split_merge_csv_7_7_23(self): file_loader = FileLoader() data_manager = DataManager() file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" result = file_loader.load_file(file_path) data_manager.set_data(result) data_manager.split_data(test_split=0.19,train_split=0.62) valid_and_test_shapes = (7, 397) train_shapes = (23, 397) expected = np.array([valid_and_test_shapes, valid_and_test_shapes, train_shapes]) result = np.array([data_manager.datum[SplitTypes.Test].shape, data_manager.datum[SplitTypes.Valid].shape, data_manager.datum[SplitTypes.Train].shape]) self.assertTrue(np.array_equal(result, expected))
def test_split_into_target_and_input(self): file_loader = FileLoader() data_manager = DataManager() file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" result = file_loader.load_file(file_path) data_manager.set_data(result) data_manager.split_data(test_split=0.11,train_split=0.22) test_shapes_input = np.zeros((4, 396)).shape valid_shapes_input = np.zeros((25,396)).shape train_shapes_input = np.zeros((8, 396)).shape test_shapes_target = np.zeros((4, )).shape valid_shapes_target = np.zeros((25,)).shape train_shapes_target = np.zeros((8, )).shape expected = np.array([test_shapes_input, valid_shapes_input, train_shapes_input, test_shapes_target, valid_shapes_target, train_shapes_target]) result = np.array([data_manager.inputs[SplitTypes.Test].shape, data_manager.inputs[SplitTypes.Valid].shape, data_manager.inputs[SplitTypes.Train].shape, data_manager.targets[SplitTypes.Test].shape, data_manager.targets[SplitTypes.Valid].shape, data_manager.targets[SplitTypes.Train].shape]) self.assertTrue(np.array_equal(result, expected))
class CoreApplication(object): def __init__(self): self.iterator = True self.logger = logging.getLogger('SecurityMetricIDS') self.__is_file_valid(config.logname) self.__log_loader = FileLoader() def start_core(self): core_app = threading.Thread(target=self.__start_core_thread, args=()) core_app.daemon = True core_app.start() def __is_file_valid(self, filename): if not os.path.exists(filename): try: raise Exception("""Process terminated at {}. Selected path of logging file is not valid. Please specify correct authentication log path.""".format(datetime.datetime.now())) except Exception as err: self.logger.error(err) sys.exit(1) else: self.logger.info("File validation successful, file {} is available".format(filename)) def __start_core_thread(self): self.logger.info("Starting core of the application") self.iterator = True metrics_computer = MetricsComputer() last_modified_config = os.stat('config.py').st_mtime reload(config) while self.iterator: self.logger.info("New analyse iteration started.") self.__log_loader.read_file() metrics_computer.compute_metrics() time.sleep(config.analyse_time*60) if not last_modified_config == os.stat('config.py').st_mtime: self.logger.info("Config file was changed. Reloading config.") last_modified_config = os.stat('config.py').st_mtime reload(config) self.logger.info("Reading log data finished.") return
def test_experiment(self): output_filename_header = FileLoader.create_output_file() time.sleep(1) loaded_algorithm_combinations = FileLoader.read_csv_file("../Datasets/test.csv") file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" loaded_data = FileLoader.load_file(file_path) # feature_eliminator = SelectKBest(f_regression,k=k_value) print (loaded_algorithm_combinations[0]) output_filename = FileLoader.create_output_file() for i in range(0, 80): normalizer = self.getnormalizer(loaded_algorithm_combinations[i][0]) feature_eliminator = self.getfeature_eliminator(loaded_algorithm_combinations[i][1]) the_model = self.get_model(loaded_algorithm_combinations[i][2]) print "taking ", type(normalizer).__name__, "and feature selector ", type( feature_eliminator ).__name__, "model", type(the_model).__name__ FileLoader.write_model_in_file( output_filename_header, type(normalizer).__name__, type(feature_eliminator).__name__, type(the_model).__name__, "", "", "", "", "", ) the_data_manager = DataManager(feature_eliminator, normalizer=normalizer) the_data_manager.set_data(loaded_data) the_data_manager.split_data(test_split=0.15, train_split=0.70) exp = Experiment(the_data_manager, the_model) exp.run_experiment() # arr_selected = feature_eliminator.get_support(indices=True) # if(exp.get_r2(SplitTypes.Train) > 0 and exp.get_r2(SplitTypes.Valid) > 0 and exp.get_r2(SplitTypes.Test) > 0): FileLoader.write_model_in_file( output_filename, type(normalizer).__name__, type(feature_eliminator).__name__, type(the_model).__name__, "", exp.fitness_matrix[0], exp.get_r2(SplitTypes.Train), exp.get_r2(SplitTypes.Valid), exp.get_r2(SplitTypes.Test), )
def test_experiment_not_transformed_test(self): file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" loaded_data = FileLoader.load_file(file_path) data_manager = DataManager() data_manager.set_data(loaded_data) data_manager.split_data(test_split=0.19, train_split=0.62) learning_model = FakePredictionModel() exp = Experiment(data_manager, learning_model) exp.run_experiment() self.assertEquals(0, exp.get_r2(SplitTypes.Test))
def test_experiment_svm_svr_37dataset_r2_train(self): file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" loaded_data = FileLoader.load_file(file_path) the_data_manager = DataManager() the_data_manager.set_data(loaded_data) the_data_manager.split_data(test_split=0.19, train_split=0.62) the_model = svm.SVR() exp = Experiment(the_data_manager, the_model) exp.run_experiment() r2_train = exp.get_r2(SplitTypes.Train) expected_svm_r2_value = 0.93994377385638073 self.assertEqual(r2_train, expected_svm_r2_value)
def create(self, fname, varnames, plottype, opts={}): f = FileLoader.get_cached_reader(fname) var = map(f.read, varnames) vis = visualizers[plottype]() vis.loadVariable(var, opts) vis.render(opts) k = plottype + '_' + fname + '_' + '_'.join(varnames) self._active[k] = vis view = vis.getView() view.Render() return self.getGlobalId(view)
def test_experiment_svr_37dataset_r2_test(self): file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" loaded_data = FileLoader.load_file(file_path) the_data_manager = DataManager() the_data_manager.set_data(loaded_data) the_data_manager.split_data(test_split=0.19, train_split=0.62) the_model = svm.SVR() exp = Experiment(the_data_manager, the_model) exp.run_experiment() r2_test = exp.get_r2(SplitTypes.Test) expected_svm_r2_value = -0.33005242525900247 self.assertEqual(r2_test, expected_svm_r2_value)
def test_experiment_sum_of_squares_real37_test(self): file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" loaded_data = FileLoader.load_file(file_path) the_data_manager = DataManager() the_data_manager.set_data(loaded_data) the_model = svm.SVR() the_data_manager.split_data(test_split=0.19, train_split=0.62) exp = Experiment(the_data_manager, the_model) exp.run_experiment() sum_of_squares_test = exp.get_sum_of_squares(SplitTypes.Test) expected = 6.708898437500002 self.assertAlmostEqual(expected, sum_of_squares_test)
def setUp(self): unittest.TestCase.setUp(self) self.project = FileLoader.read_from_json('houseproject.json')
def test_read_from_json(self): self.project = FileLoader.read_from_json(self.json_file) self.assertTrue(type(self.project) == Project) # should return a Project self.assertEqual(len(self.project.task_list),22) # This file has 22 tasks self.assertEqual(len(self.project.engr_list), 6) # and 6 engineers
from FileLoader import FileLoader fl = FileLoader() data = fl.load("../athlete_events.csv") fl.display(data, 10) fl.display(data, -10)
import pandas as pd from FileLoader import FileLoader def youngestFellah(df, year): new_dic = {} test = df.loc[df['Year'] == year] new_dic['f'] = test['Age'].loc[df['Sex'] == 'F'].min() new_dic['m'] = test['Age'].loc[df['Sex'] == 'M'].min() print (new_dic) if __name__ == "__main__": path = "./athlete_events.csv" fl = FileLoader() df = fl.load(path) youngestFellah(df, 2004)
import pandas as pd from FileLoader import FileLoader def youngestFellah(data, year): dic = {'f': None, 'm': None} if (isinstance(data, pd.DataFrame) and isinstance(year, int)): crop = data[data["Year"] == year] crop_m = crop[crop["Sex"] == 'M'] crop_f = crop[crop["Sex"] == 'F'] dic['f'] = crop_f.Age.min() dic['m'] = crop_m.Age.min() return (dic) else: print("ERROR: year is not an int or data is not good") fl = FileLoader() df = fl.load("athlete_events.csv") dic = youngestFellah(df, 1992) print(dic)
import pandas from FileLoader import FileLoader def youngestFellah(df, year): #print(df[['Year', 'Age', 'Sex']]) dict = {} youngest_male = df.loc[(df['Year'] == year) & (df['Sex'] == "M")].sort_values(by='Age').head(1) youngest_female = df.loc[(df['Year'] == year) & (df['Sex'] == "F")].sort_values( by='Age').head(1) dict['M'] = youngest_male.iloc[0, 3] dict['F'] = youngest_female.iloc[0, 3] return dict file = FileLoader() df = file.load("athlete_events.csv", ',') year_1992 = youngestFellah(df, 1992) print(year_1992)
from FileLoader import FileLoader def howManyMedals(df, name): medals = {"Gold": "G", "Silver": "S", "Bronze": "B"} how_many_medals = {} athlete_records = df.loc[df["Name"] == name] for index, row in athlete_records.iterrows(): if row["Medal"] in medals: m = medals[row["Medal"]] if row["Year"] not in how_many_medals: how_many_medals.update({row["Year"]: {"G": 0, "S": 0, "B": 0}}) how_many_medals[row["Year"]][m] += 1 else: how_many_medals[row["Year"]][m] += 1 else: if row["Year"] not in how_many_medals: how_many_medals.update({row["Year"]: {"G": 0, "S": 0, "B": 0}}) return how_many_medals loader = FileLoader() data = loader.load("athlete_events.csv") print(howManyMedals(data, "Kjetil Andr Aamodt"))
from FileLoader import FileLoader from itertools import combinations fl = FileLoader() transactions = fl.loadTransactions() def validateInput(): while True: try: userInput = int(input("Please enter a number: ")) if userInput >= 0 and userInput < 15: return userInput print("Please enter a number between 0 and 14.") continue except ValueError: print("Please enter a number.\n") continue def calculateSubset(size, position, transactionNumber): calculatedSubset = [] transaction = transactions[transactionNumber].getTransaction() for value in range(0, size): calculatedSubset.append(transaction[value + position:value + position + 1][0]) return calculatedSubset
def main(): loader = FileLoader() data = loader.load('./day04/athlete_events.csv') print(proportionBySport(data, 2004, 'Tennis', 'F'))
#!/usr/bin/env python3 from FileLoader import FileLoader loader = FileLoader() data = loader.load('../resources/athlete_events.csv') loader.display(data, 12)
from FileLoader import FileLoader loader = FileLoader() df = FileLoader.load(loader, "athlete_events.csv") loader.display(df, -2)
from FileLoader import FileLoader def __proportionBySport__(dataFrame, year, sport, gender): y_df = dataFrame[dataFrame['Year'] == year] g_df = y_df[y_df['Sex'] == gender] non_dup_tot = g_df.drop_duplicates('Name') nb_tot = len(non_dup_tot.index) sport_g_df = g_df[g_df['Sport'] == sport] non_dup_sport = sport_g_df.drop_duplicates('Name') nb_sport = len(non_dup_sport.index) print(non_dup_sport.head(20)) print(nb_tot, nb_sport) return nb_sport * 100 / nb_tot if __name__ == '__main__': ld = FileLoader() dt = ld.load('../ex00/athlete_events.csv') print(__proportionBySport__(dt, 2004, 'Tennis', 'F'))
from FileLoader import FileLoader fl = FileLoader() df = fl.load("../resources/athlete_events.csv") fl.display(df, -5) fl.display(df, 5)
def __init__(self): self.iterator = True self.logger = logging.getLogger('SecurityMetricIDS') self.__is_file_valid(config.logname) self.__log_loader = FileLoader()
from FileLoader import FileLoader from HowManyMedalsByCountry import howManyMedalsByCountry class MyPlotLib(): def histogram(self, df, features): df.hist(features) plt.show() def density(self, df, features): df[features].plot.kde() plt.show() def pair_plot(self, data, features): seaborn.pairplot(data[features]) plt.show() def box_plot(self, data, features): df.boxplot(features) plt.show() f = FileLoader() df = f.load('../athlete_events.csv') mpl = MyPlotLib() #h = mpl.histogram(df, ['Year', 'ID','Height']) #i = mpl.density(df, ['Weight','Height']) #j = mpl.pair_plot(df, ['Weight', 'Height']) #k = mpl.box_plot(df, ['Weight','Height'])
from FileLoader import FileLoader file = FileLoader() df = file.load('../resources/athlete_events.csv') print('5 first rows\n') file.display(df, 5) print('\n\n5 last rows') file.display(df, -5)
# **************************************************************************** # # # # ::: :::::::: # # test.py :+: :+: :+: # # +:+ +:+ +:+ # # By: lboukrou <*****@*****.**> +#+ +:+ +#+ # # +#+#+#+#+#+ +#+ # # Created: 2020/06/27 20:36:59 by lboukrou #+# #+# # # Updated: 2020/06/28 19:25:49 by lboukrou ### ########.fr # # # # **************************************************************************** # from HowManyMedals import HowManyMedals from FileLoader import FileLoader import pandas as pd loader = FileLoader() df = loader.load('../resources/athlete_events.csv') name = 'Kjetil Andr Aamodt' data_years = df[df.Name == name] print(data_years) data_years = data_years.dropna() dic = {} dic = data_years.to_dict() print(dic) gold, silver, bronze = 0, 0, 0
def main(): loader = FileLoader() data = loader.load("../resources/athlete_events.csv") std = howManyMedalsByCountry(data, 'London') print(std)
from FileLoader import FileLoader from DataManager import DataManager from src.Population import Population file_path = "../Dataset/00-91-Drugs-All-In-One-File.csv" loaded_data = FileLoader.load_file(file_path) data_manager = DataManager(normalizer=None) data_manager.create_first_population(loaded_data) data_manager.split_data_into_train_valid_test_sets(test_split=0.15, train_split=0.70) population = Population() population.create_first_population() for i in range(1, 50): print("row", i, population.population_matrix[i].sum())
#!/usr/bin/env python import importlib.util import sys sys.path.append('../ex00') from FileLoader import FileLoader def proportionBySport(df, year, sport, sex): p = df[(df.Year == year) & (df.Sex == sex)]\ .drop_duplicates(subset=['Name', 'ID']).shape[0] p_s = df[(df.Year == year) & (df.Sex == sex) & (df.Sport == sport)]\ .drop_duplicates(subset=['Name', 'ID']).shape[0] return p_s / p if __name__ == '__main__': ld = FileLoader() df = ld.load('../resources/athlete_events.csv') ld.display(df, 3) print(proportionBySport(df, 2004, 'Tennis', 'F'))
from FileLoader import FileLoader loader = FileLoader() path2 = r"C:\Users\Gabriel\Desktop\Mes documents - Google Drive\DATA\19\day04\athlete_events.csv" data = loader.load(path2) from SpatioTemporalData import SpatioTemporalData sp = SpatioTemporalData(data) sp.where(1896) sp.where(2016) sp.when('Athina') sp.when('Paris')
from FileLoader import FileLoader class SpatioTemporalData: def __init__(self, df): self.df = df def when(self, location): years = [] for i in range(len(self.df)): if self.df.loc[i, "City"] == location: year = self.df.loc[i, "Year"] if year not in years: years.append(year) return years def where(self, year): for i in range(len(self.df)): if self.df.loc[i, "Year"] == year: return self.df.loc[i, "City"] return None fl = FileLoader() std = SpatioTemporalData(fl.load("../athlete_events.csv")) print(std.where(1896)) print(std.where(2016)) print(std.when("Athina")) print(std.when("Paris"))
import numpy as np import pandas as pd import sys sys.path.append('../ex00') from FileLoader import FileLoader def howManyMedalsByCountry(df, country): df['Medal'].replace('', np.nan, inplace=True) data = df.loc[df.Team == country, ['Year', 'Medal', 'Team', 'Event']] data.dropna(subset=['Medal'], inplace=True) data = data.drop_duplicates() res = {} for index, row in data.iterrows(): if row['Year'] not in res.keys(): res[row['Year']] = {'G': 0, 'S': 0, 'B': 0} if row['Medal'] == 'Gold': res[row['Year']]['G'] += 1 elif row['Medal'] == 'Silver': res[row['Year']]['S'] += 1 elif row['Medal'] == 'Bronze': res[row['Year']]['B'] += 1 return res if __name__ == '__main__': fl = FileLoader() data = fl.load('../resources/athlete_events.csv') print(howManyMedalsByCountry(data, 'France'))
from FileLoader import FileLoader def howManyMedalsByCountry(dataFrame, name): name_df = dataFrame[dataFrame['Country'] == name] won_md = name_df[pd.notnull(name_df['Medal'])] year_arr = won_md.loc[:, 'Year'].drop_duplicates() dico2 = {} for year in year_arr: dico2[year] = {} b_y = won_md[won_md['Year'] == year] print(b_y) gold_nb = len(b_y[b_y['Medal'] == 'Gold']) silv_nb = len(b_y[b_y['Medal'] == 'Silver']) bro_nb = len(b_y[b_y['Medal'] == 'Bronze']) dico2[year]['Gold'] = gold_nb dico2[year]['Silver'] = silv_nb dico2[year]['Bronze'] = bro_nb return dico2 if __name__ == '__main__': fl = FileLoader() df = fl.load('../ex00/athlete_events.csv') howManyMedalsByCountry(df, 'France')
import matplotlib.pyplot as plt import seaborn as sns from math import sqrt from FileLoader import FileLoader class MyPlotLib: @staticmethod def histogram(data, features): df[features].hist() plt.show() @staticmethod def density(data, features): sns.distplot(df[features]) plt.show() @staticmethod def pair_plot(data, features): pass @staticmethod def box_plot(data, features): pass if __name__ == "__main__": df = FileLoader.load("../data/athlete_events.csv") MyPlotLib.histogram(df.drop_duplicates(['ID']), ['Height', 'Weight']) MyPlotLib.density(df.drop_duplicates(['ID']), ['Height', 'Weight'])
sns.boxplot(x=categorical_var, y=numerical_var, palette="Set2") plt.show() @staticmethod def density(categorical_var, numerical_var): """displays the density of the numerical variable. Each subpopulation should be represented by a separate curve on the graph.""" #print(categorical_var) cat_list = categorical_var.astype('category') for cat in cat_list: sns.kdeplot(numerical_var[categorical_var == cat], label=cat) #, categorical_var) plt.show() @staticmethod def compare_histograms(categorical_var, numerical_var): """plots the numerical variable in a s""" pass if __name__ == "__main__": loader = FileLoader() df = loader.load("../assets/athlete_events.csv").dropna() h = df[df.Sex == "M"] f = df[df.Sex == "F"] k = Komparator() k.compare_box_plots(df.Sex, df.Weight) k.density(df.Sex, df.Weight) k.compare_histograms(df.Sex.head(), df.Weight.head())
import pandas as pd from matplotlib import pyplot as plt from FileLoader import FileLoader class Komparator: def __init__(self, df): self.data = df pass def compare_box_plots(self, categorical_var, numerical_var): self.data[categorical_var].plot.box(colunm=numerical_var) plt.show() def density(self, categorical_var, numerical_var): pass def compare_histograms(categorical_var, numerical_var): pass # to end f = FileLoader() r = f.load("athlete_events.csv") k = Komparator(r) k.compare_box_plots(["Team", "Year"], [1, 2, 3])
from FileLoader import FileLoader import pandas as pd def proportionBySport(df: pd.DataFrame, year: int, sport: str, gender: str) -> float: all_sport_df = df[(df['Sex'] == gender) & (df['Year'] == year)]\ .drop_duplicates(subset='Name', keep='first') sport_df = df[(df['Sex'] == gender) & (df['Year'] == year) & (df['Sport'] == sport)]\ .drop_duplicates(subset='Name', keep='first') return sport_df['Sport'].count() / all_sport_df['Sport'].count() loader = FileLoader() data = loader.load("../resources/athlete_events.csv") print(proportionBySport(data, 2004, 'Tennis', 'F'))
from FileLoader import FileLoader from YoungestFellah import YoungestFellah as yf fl = FileLoader() data = fl.load("../athlete_events.csv") print(yf.youngestFellah(data, 2004))
from FileLoader import FileLoader import pandas as pd class SpatioTemporalData: def __init__(self, data): self.df = data def when(self, location): places = [] years = self.df[self.df.City == location] for index, row in years.iterrows(): if row['Year'] not in places: places.append(row['Year']) return places def where(self, year): year = int(year) city = self.df[self.df.Year == year] return city['City'].iloc[0] loader = FileLoader() data = loader.load('../athlete_events.csv') sp = SpatioTemporalData(data) print(sp.when('Atlanta')) print(sp.where('2016'))
Args: y_true: a scalar or a numpy ndarray for the correct labels y_pred: a scalar or a numpy ndarray for the predicted labels Returns: The accuracy score as a float. None on any error. Raises: This function should not raise any Exception. """ n = 0 for pred, true in zip(y_pred, y_true): if pred == true: n += 1 if normalize == False: return n if y_pred.shape[0] > 0: return n / y_pred.shape[0] return None if __name__ == "__main__": if len(sys.argv) == 3: loader = FileLoader() data1 = loader.load(str(sys.argv[1])) data2 = loader.load(str(sys.argv[2])) y_true = np.array(data1['Hogwarts House']) y_pred = np.array(data2['Hogwarts House']) print("score : ", accuracy_score_(y_true, y_pred)) else: print("Usage : python accuracy_score.py path.csv path.csv")
from FileLoader import FileLoader from YoungestFellah import youngestFellah fl = FileLoader() df = fl.load("../resources/athlete_events.csv") print(youngestFellah(df, 2004))
import pandas as pd import matplotlib.pyplot as plt class MyPlotLib: @staticmethod def histogram(data: pd.DataFrame, features: list): data.hist(column=['Height', 'Weight']) plt.show() @staticmethod def density(data: pd.DataFrame, features: list): ... @staticmethod def pait_plot(data: pd.DataFrame, features: list): ... @staticmethod def box_plot(data: pd.DataFrame, features: list): ... if __name__ == '__main__': from FileLoader import FileLoader data_csv = FileLoader.load('../resources/athlete_events.csv') MyPlotLib.histogram(data_csv, ['Height', 'Weight'])
from FileLoader import FileLoader from DataManager import DataManager from src.Population import Population file_path = "../Dataset/00-91-Drugs-All-In-One-File.csv" loaded_data = FileLoader.load_file(file_path) data_manager = DataManager(normalizer=None) data_manager.create_first_population(loaded_data) data_manager.split_data_into_train_valid_test_sets(test_split=0.15, train_split=0.70) population = Population() population.create_first_population() for i in range (1,50): print("row", i, population.population_matrix[i].sum())
linear_model.BayesianRidge() # ,svm.SVR() # ,RandomForestRegressor() # , LinearRegression() ] normalizers = [ None # , StandardScaler() # ,NumpyNormalizer(), ScikitNormalizer() # , MinMaxScaler() ,Binarizer() # ,Imputer(), KernelCenterer() # ,Normalizer() ] output_filename = FileLoader.create_output_file() for normalizer in normalizers: for feature_eliminator in feature_eliminators: # for k_value in range(5, 20): # for k_value in range(13, 14): for the_model in the_models: print( "taking care of ", type(normalizer).__name__, "and feature selector ", type(feature_eliminator).__name__, "model", type(the_model).__name__, )