def wine_net(seed, r, nn_type, options=None): if nn_type is 'cat': filename = '../resources/winequality/winequality-combined-cat.csv' y_key = 'Reviews' split_options = {'exclude-keys': ['quality']} else: filename = '../resources/winequality/winequality-combined.csv' y_key = 'quality' split_options = {'exclude-keys': []} split_options['perc_sample'] = r dir = 'nn-exports/wine/' seed_dir = 'attempts/' + dir + str(seed) + '/' np.random.seed(seed) options = util.default(options, {}) class_types = util.myget(options, 'class_types', ['net']) layers = util.myget(options, 'layers', [(5, 6)]) data = util.get_data(filename) split_data = util.prep_data(data, y_key, split_options) net_results = {} for t in class_types: for l in layers: np.random.seed(seed) start_time = datetime.datetime.now() [train_results, test_results] = train_and_test(seed_dir, split_data, t, l) net_results[t + '_train'] = train_results net_results[t + '_test'] = test_results total_time = datetime.datetime.now() - start_time write_results(seed, t, dir, train_results['error-perc'], test_results['error-perc'], total_time, len(split_data[0]), l, nn_type + '-') pass
def breast_cancer_net(seed, r, options=None): dir = 'nn-exports/breast_cancer/' seed_dir = 'attempts/' + dir + str(seed) + '/' np.random.seed(seed) options = util.default(options, {}) class_types = util.myget(options, 'class_types', ['net']) layers = util.myget(options, 'layers', [(5, 6)]) data = util.get_data('../resources/breast-cancer/wdbc.data.csv') data = util.split_feature(data, 'diagnosis') split_data = util.prep_data( data, 'diagnosis_M', { 'exclude-keys': ['id', 'diagnosis_B', 'diagnosis'], 'perc_sample': r }) net_results = {} for t in class_types: for l in layers: np.random.seed(seed) start_time = datetime.datetime.now() [train_results, test_results] = train_and_test(seed_dir, split_data, t, l) net_results[t + '_train'] = train_results net_results[t + '_test'] = test_results total_time = datetime.datetime.now() - start_time write_results(seed, t, dir, train_results['error-perc'], test_results['error-perc'], total_time, len(split_data[0]), l) return net_results
def main(): filename = '../resources/winequality/winequality-combined.csv' # filename = '../resources/winequality/dummy-data.csv' exclude_column = 'quality' # exclude_column = 'stuff3' data = util.get_data(filename) duplicated = data.loc[:, data.columns != exclude_column].duplicated() print(duplicated.value_counts()) print(data.duplicated().value_counts()) data = data.sort_values(by=list(data.columns)) return duplicated
def wine_report_prep(level): if level is 'cat': filename = '../resources/winequality/winequality-combined-cat.csv' y_key = 'Reviews' split_options = {'exclude-keys': ['quality']} else: filename = '../resources/winequality/winequality-combined.csv' y_key = 'quality' split_options = {'exclude-keys': []} data = util.get_data(filename) return data, y_key, split_options
def main2(): filename = '../resources/winequality/winequality-combined.csv' data = util.get_data(filename) reviews = [] for i in data['quality']: if i >= 1 and i <= 3: reviews.append('1') elif i >= 4 and i <= 7: reviews.append('2') elif i >= 8 and i <= 10: reviews.append('3') data['Reviews'] = reviews data.to_csv('../resources/winequality/winequality-combined-cat.csv')
def breast_cancer_prep(): bc_data = util.get_data('../resources/breast-cancer/wdbc.data.csv') bc_data = util.split_feature(bc_data, 'diagnosis') return bc_data