from eli5.permutation_importance import get_score_importances from MachineLearningModels.ridge import Ridge from MachineLearningModels.lasso import Lasso from MachineLearningModels.pls import PLS from sklearn.metrics import mean_squared_error, r2_score from MachineLearningModels.gradientboost import GradientBoost from MachineLearningModels.linearregression import LinearRegression from DataPreprocessor.datacleaner import Cleaner from DataPreprocessor.dataspliter import Spliter from Evaluation.evaluation import Evaluation import matplotlib.pyplot as plt from sklearn.ensemble import GradientBoostingClassifier import pandas as pd import numpy as np csvreader = CsvReader() data = csvreader.read('data.csv') data = data.drop(columns='Unnamed: 32') data.set_index('id', inplace=True) labels = data[['diagnosis']].copy() features = data.drop(columns='diagnosis') feature_list = list(features) train_features, test_features = np.split(features, [int(.9 * len(features))]) train_labels, test_labels = np.split(labels, [int(.9 * len(labels))]) print('Training Features Shape:', train_features.shape) print('Training Labels Shape:', train_labels.shape) print('Testing Features Shape:', test_features.shape)
from sklearn.model_selection import train_test_split from sklearn.model_selection import cross_val_score from sklearn.model_selection import KFold from sklearn.pipeline import Pipeline from sklearn.preprocessing import MinMaxScaler from tensorflow.python.keras.models import Sequential from tensorflow.python.keras.layers import Dense from tensorflow.python.keras.wrappers.scikit_learn import KerasRegressor from Utils.csvread import CsvReader from MachineLearningModels.randomforest import RandomForest from DataPreprocessor.datacleaner import Cleaner from DataPreprocessor.dataspliter import Spliter from Evaluation.evaluation import Evaluation from pandas import DataFrame csvreader = CsvReader() data = csvreader.read('memocode_adder_inputs.csv') labels = data[['output']].copy() features = data[['data1', 'data2']].copy() Data_f = {'data1': [10000, 30000], 'data2': [20000, -5000]} test_features = DataFrame(Data_f, columns=['data1', 'data2']) Data_l = {'output': [26897, 27837]} test_labels = DataFrame(Data_l, columns=['output']) model = Sequential() model.add(Dense(5, input_dim=2, activation='linear', use_bias=False)) model.add(Dense(1, activation='linear', use_bias=False)) model.summary()
import os import warnings import argparse from Utils.csvread import CsvReader import numpy as np from DataPreprocessor.datacleaner import Cleaner def init_arg_parser(): parser = argparse.ArgumentParser(description="Automatically generates score table.") parser.add_argument('-train', '--trainingdata', help="Get the training data", required=True) parser.add_argument('-s', '--strategy', help="Data clean strategy", required=True) parser.add_argument('-o', '--output', help="Output path", required=True) return parser if __name__ == "__main__": parser = init_arg_parser() args = parser.parse_args() csvreader = CsvReader() data = csvreader.read(args.trainingdata) cleaner = Cleaner(data) df = cleaner.clean(args.strategy, 'df') df.to_csv(args.output)
indexarray[i] = '' plt.xticks(df.index, indexarray, rotation='vertical') plt.legend(handles=[p, a]) plt.savefig(figpath) df = pd.DataFrame(dict.items(), columns=['key', 'value']) df = df.sort_values(by=['key']) df = df.set_index('key') df = df.T return df if __name__ == "__main__": parser = init_arg_parser() args = parser.parse_args() csvreader = CsvReader() alldata = csvreader.read(args.trainingdata) feature_headers = read_list(args.input) label_headers = read_list(args.output) if args.type == 'c': type = 'classifier' else: type = 'regressor' alldata = alldata.fillna(0) data = alldata #data = data.sample(frac=1).reset_index(drop=True) if args.expandingwindow: vr = (len(data) - 52) / len(data)