def format_data(): box = stor.Box(DATA_PATH) raw = box.get('population_raw') data = {} for i in range(1, 50): if i == 1: skip_first = False for lst in raw[i]: if not skip_first: skip_first = True continue data[lst[1]] = [int(lst[0])] else: skip_first = False for lst in raw[i]: if not skip_first: skip_first = True continue data[lst[1]].append(int(lst[0])) data_format = [] data_format.append( ['State', 'Total', '<=4', '5-9', '10-17', '18-21', '22-49', '>=50']) for key in data: lst = data[key] data_format.append([ key, lst[0], sum(lst[2:3] + lst[26:27]), sum(lst[3:4] + lst[27:28]), sum(lst[4:6] + lst[28:30]), sum(lst[6:9] + lst[30:33]), sum(lst[9:15] + lst[33:39]), sum(lst[15:25] + lst[39:49]) ]) box.put('population', data_format, force=True)
def main(): # get prepared data box = stor.Box('./data_handwritten_digits/') X_train, y_train = box.get('X_train'), box.get('y_train') X_cv, y_cv = box.get('X_cv'), box.get('y_cv') X_test, y_test = box.get('X_test'), box.get('y_test') # tune num_sample_opt = nnp.opt_num_sample(X_train, y_train, X_cv, y_cv, (2950, 3000)) X_train, y_train = X_train[:num_sample_opt], y_train[:num_sample_opt] regulating_rate_opt = nnp.opt_regulating_rate(X_train, y_train, X_cv, y_cv, (0, 1)) # train mlps = nnp.train(X_train, y_train, regulating_rate_opt) box.update('mlps', mlps) # predict y_hypo y_test_hypo = nnp.predict(mlps, X_test) # judge precision = nnp.judge(y=y_test, y_hypo=y_test_hypo) print('num_sample_opt: {}'.format(num_sample_opt)) print('regulating_rate_opt: {}'.format(regulating_rate_opt)) print('precision: {}'.format(precision))
def fetch_data(): data = {} for i in range(1, 50): key = 'P012000{}'.format(i) if i <= 9 else 'P01200{}'.format(i) api = api_of(key) print(i) with request.urlopen(api, timeout=10) as res: context = res.read().decode('utf-8') data_each = json.loads(context) data[i] = data_each box = stor.Box(DATA_PATH) box.put('population_raw', data, force=True)
def prepare_data(): # run once X_path = './data_handwritten_digits/X_total.csv' y_path = './data_handwritten_digits/y_total.csv' X_total, y_total = datah.Data(X_path).to_nparray(float), datah.Data( y_path).to_nparray(int) X_total, y_total = nnp.regulate(X_total), nnp.regulate(y_total) for i in range(0, y_total.shape[0]): y_total[i] = y_total[i] if y_total[i] != 10 else 0 X_total, y_total = shuffle(X_total, y_total) X_train, y_train = X_total[:3000], y_total[:3000] X_cv, y_cv = X_total[3000:4000], y_total[3000:4000] X_test, y_test = X_total[4000:], y_total[4000:] # save data box = stor.Box('./data_handwritten_digits/') box.put('X_total', X_total).put('y_total', y_total)\ .put('X_train', X_train).put('y_train', y_train)\ .put('X_cv', X_cv).put('y_cv', y_cv)\ .put('X_test', X_test).put('y_test', y_test)
def main(): # data from csv # X_path = './data_handwritten_digits/X_total.csv' # y_path = './data_handwritten_digits/y_total.csv' # X_total, y_total = datah.Data(X_path).to_nparray(float), datah.Data(y_path).to_nparray(int) # X_total, y_total = regulate(X_total), regulate(y_total) # y 10 -> 0, in mat file 10 -> 0 # for i in range(0, y_total.shape[0]): # y_total[i] = y_total[i] if y_total[i] != 10 else 0 # shuffle # X_total, y_total = shuffle(X_total, y_total) # split # X_train, y_train = X_total[:3000], y_total[:3000] # X_cv, y_cv = X_total[3000:4000], y_total[3000:4000] # X_test, y_test = X_total[4000:], y_total[4000:] # prepared data box = stor.Box('data_debug') # X_total, y_total = box.get('X_total'), box.get('y_total') X_train, y_train = box.get('X_train'), box.get('y_train') X_cv, y_cv = box.get('X_cv'), box.get('y_cv') X_test, y_test = box.get('X_test'), box.get('y_test') # tune num_sample_opt = opt_num_sample(X_train, y_train, X_cv, y_cv, (2950, 3000)) X_train, y_train = X_train[:num_sample_opt], y_train[:num_sample_opt] regulating_rate_opt = opt_regulating_rate(X_train, y_train, X_cv, y_cv, (0, 1)) # train mlps = train(X_train, y_train, regulating_rate_opt) box.put_or_replace('mlps', mlps) # trained mlps # mlps = box.get('mlps') # predict y_hypo y_test_hypo = predict(mlps, X_test) # judge precision = judge(y=y_test, y_hypo=y_test_hypo) print(precision)
'''\ parse data to csv files ''' import csv import stor DATA_PATH = '../data/' box = stor.Box(DATA_PATH) population_raw = box.get('population_raw') population = box.get('population') f = open(DATA_PATH + 'population.csv', 'w+') csv_file = csv.writer(f) for i in range(0, len(population)): csv_file.writerow(population[i])