def load_yearpred(folder="../data/"): data = np.genfromtxt(os.path.join(folder, 'YearPredictionMSD.txt'), delimiter=',') test_X = data[463715:, 1:] test_y = data[463715:, 0] validation_X = data[410000:463715, 1:] validation_y = data[410000:463715, 0] train = munge.shuffle_data(data[0:410000, :]) train_X = train[:, 1:] train_y = train[:, 0] return train_X, train_y, validation_X, validation_y, test_X, test_y
def load_seismic_station(siteid, phaseid=0, folder="../data/"): reader = csv.reader(open(os.path.join(folder, 'tt_data.csv'), 'rb'), delimiter=',') data = np.array([[float(col) for col in row] for row in reader if int(row[0])==int(siteid)] and int(row[1])==int(phaseid)) data = munge.shuffle_data(data) train_n = .8 * data.shape[0] validate_n = .1 * data.shape[0] # cols: [siteid, phaseid, evlon, evlat, evdepth, sitelon, sitelat, siteheight, tt, ttres] X = data[0:train_n, [2,3,4,5,6,7] ] y = data[0:train_n, 9] v_cutoff = train_n+validate_n validation_X = data[train_n:v_cutoff, [2,3,4,5,6,7] ] validation_y = data[train_n:v_cutoff, 9] test_X = data[v_cutoff:, [2,3,4,5,6,7] ] test_y = data[v_cutoff:, 9] return X, y, validation_X, validation_y, test_X, test_y