예제 #1
0
def load_yearpred(folder="../data/"):
    data = np.genfromtxt(os.path.join(folder, 'YearPredictionMSD.txt'), delimiter=',')

    test_X = data[463715:, 1:]
    test_y = data[463715:, 0]

    validation_X = data[410000:463715, 1:]
    validation_y = data[410000:463715, 0]

    train = munge.shuffle_data(data[0:410000, :])
    train_X = train[:, 1:]
    train_y = train[:, 0]

    return train_X, train_y, validation_X, validation_y, test_X, test_y
예제 #2
0
def load_seismic_station(siteid, phaseid=0, folder="../data/"):
    reader = csv.reader(open(os.path.join(folder, 'tt_data.csv'), 'rb'), delimiter=',')
    data = np.array([[float(col) for col in row] for row in reader if int(row[0])==int(siteid)] and int(row[1])==int(phaseid))
    data = munge.shuffle_data(data)

    train_n = .8 * data.shape[0]
    validate_n = .1 * data.shape[0]

    # cols: [siteid, phaseid, evlon, evlat, evdepth, sitelon, sitelat, siteheight, tt, ttres]
    X = data[0:train_n, [2,3,4,5,6,7] ]
    y = data[0:train_n, 9]

    v_cutoff = train_n+validate_n
    validation_X = data[train_n:v_cutoff, [2,3,4,5,6,7] ]
    validation_y = data[train_n:v_cutoff, 9]
    test_X = data[v_cutoff:, [2,3,4,5,6,7] ]
    test_y = data[v_cutoff:, 9]

    return X, y, validation_X, validation_y, test_X, test_y