def format_data():
    box = stor.Box(DATA_PATH)
    raw = box.get('population_raw')
    data = {}
    for i in range(1, 50):
        if i == 1:
            skip_first = False
            for lst in raw[i]:
                if not skip_first:
                    skip_first = True
                    continue
                data[lst[1]] = [int(lst[0])]
        else:
            skip_first = False
            for lst in raw[i]:
                if not skip_first:
                    skip_first = True
                    continue
                data[lst[1]].append(int(lst[0]))

    data_format = []
    data_format.append(
        ['State', 'Total', '<=4', '5-9', '10-17', '18-21', '22-49', '>=50'])
    for key in data:
        lst = data[key]
        data_format.append([
            key, lst[0],
            sum(lst[2:3] + lst[26:27]),
            sum(lst[3:4] + lst[27:28]),
            sum(lst[4:6] + lst[28:30]),
            sum(lst[6:9] + lst[30:33]),
            sum(lst[9:15] + lst[33:39]),
            sum(lst[15:25] + lst[39:49])
        ])
    box.put('population', data_format, force=True)
def main():
    # get prepared data
    box = stor.Box('./data_handwritten_digits/')
    X_train, y_train = box.get('X_train'), box.get('y_train')
    X_cv, y_cv = box.get('X_cv'), box.get('y_cv')
    X_test, y_test = box.get('X_test'), box.get('y_test')

    # tune
    num_sample_opt = nnp.opt_num_sample(X_train, y_train, X_cv, y_cv,
                                        (2950, 3000))
    X_train, y_train = X_train[:num_sample_opt], y_train[:num_sample_opt]
    regulating_rate_opt = nnp.opt_regulating_rate(X_train, y_train, X_cv, y_cv,
                                                  (0, 1))

    # train
    mlps = nnp.train(X_train, y_train, regulating_rate_opt)
    box.update('mlps', mlps)

    # predict y_hypo
    y_test_hypo = nnp.predict(mlps, X_test)

    # judge
    precision = nnp.judge(y=y_test, y_hypo=y_test_hypo)
    print('num_sample_opt: {}'.format(num_sample_opt))
    print('regulating_rate_opt: {}'.format(regulating_rate_opt))
    print('precision: {}'.format(precision))
def fetch_data():
    data = {}
    for i in range(1, 50):
        key = 'P012000{}'.format(i) if i <= 9 else 'P01200{}'.format(i)
        api = api_of(key)
        print(i)
        with request.urlopen(api, timeout=10) as res:
            context = res.read().decode('utf-8')
            data_each = json.loads(context)
            data[i] = data_each
    box = stor.Box(DATA_PATH)
    box.put('population_raw', data, force=True)
def prepare_data():
    # run once
    X_path = './data_handwritten_digits/X_total.csv'
    y_path = './data_handwritten_digits/y_total.csv'
    X_total, y_total = datah.Data(X_path).to_nparray(float), datah.Data(
        y_path).to_nparray(int)
    X_total, y_total = nnp.regulate(X_total), nnp.regulate(y_total)
    for i in range(0, y_total.shape[0]):
        y_total[i] = y_total[i] if y_total[i] != 10 else 0
    X_total, y_total = shuffle(X_total, y_total)
    X_train, y_train = X_total[:3000], y_total[:3000]
    X_cv, y_cv = X_total[3000:4000], y_total[3000:4000]
    X_test, y_test = X_total[4000:], y_total[4000:]
    # save data
    box = stor.Box('./data_handwritten_digits/')
    box.put('X_total', X_total).put('y_total', y_total)\
        .put('X_train', X_train).put('y_train', y_train)\
        .put('X_cv', X_cv).put('y_cv', y_cv)\
        .put('X_test', X_test).put('y_test', y_test)
Ejemplo n.º 5
0
def main():
    # data from csv
    # X_path = './data_handwritten_digits/X_total.csv'
    # y_path = './data_handwritten_digits/y_total.csv'
    # X_total, y_total = datah.Data(X_path).to_nparray(float), datah.Data(y_path).to_nparray(int)
    # X_total, y_total = regulate(X_total), regulate(y_total)
    # y 10 -> 0, in mat file 10 -> 0
    # for i in range(0, y_total.shape[0]):
    #     y_total[i] = y_total[i] if y_total[i] != 10 else 0
    # shuffle
    # X_total, y_total = shuffle(X_total, y_total)
    # split
    # X_train, y_train = X_total[:3000], y_total[:3000]
    # X_cv, y_cv = X_total[3000:4000], y_total[3000:4000]
    # X_test, y_test = X_total[4000:], y_total[4000:]

    # prepared data
    box = stor.Box('data_debug')
    # X_total, y_total = box.get('X_total'), box.get('y_total')
    X_train, y_train = box.get('X_train'), box.get('y_train')
    X_cv, y_cv = box.get('X_cv'), box.get('y_cv')
    X_test, y_test = box.get('X_test'), box.get('y_test')

    # tune
    num_sample_opt = opt_num_sample(X_train, y_train, X_cv, y_cv, (2950, 3000))
    X_train, y_train = X_train[:num_sample_opt], y_train[:num_sample_opt]
    regulating_rate_opt = opt_regulating_rate(X_train, y_train, X_cv, y_cv,
                                              (0, 1))

    # train
    mlps = train(X_train, y_train, regulating_rate_opt)
    box.put_or_replace('mlps', mlps)

    # trained mlps
    # mlps = box.get('mlps')

    # predict y_hypo
    y_test_hypo = predict(mlps, X_test)

    # judge
    precision = judge(y=y_test, y_hypo=y_test_hypo)
    print(precision)
'''\
    parse data to csv files
'''

import csv
import stor

DATA_PATH = '../data/'

box = stor.Box(DATA_PATH)
population_raw = box.get('population_raw')
population = box.get('population')

f = open(DATA_PATH + 'population.csv', 'w+')
csv_file = csv.writer(f)
for i in range(0, len(population)):
    csv_file.writerow(population[i])