def divide_save(filename, savename, flag_pca=False, portion=0.8, permu_flag=True, usage_ratio=1, n_labels=12, n_seq=20): X_, y_ = csv_read(filename) if flag_pca: pca = PCA(n_components=8, whiten=True) X_ = pca.fit_transform(X_) X, y = csv_reformat(X_, y_, filename, permu_flag=permu_flag, n_labels=n_labels, n_seq=n_seq) print X.shape, y.shape n_samples, n_dimension = X.shape n_use_samples = int(ceil(n_samples * usage_ratio)) train_part = int(ceil(n_use_samples * portion)) X_train = X[:train_part] X_test = X[train_part:n_use_samples] y_train = y[:train_part] y_test = y[train_part:n_use_samples] train = np.concatenate((X_train, y_train), axis=1) test = np.concatenate((X_test, y_test), axis=1) train_name = path + 'train_' + savename + '.csv' test_name = path + 'test_' + savename + '.csv' print 'Saving divided data..' np.savetxt(train_name, train, delimiter=',') np.savetxt(test_name, test, delimiter=',')
def divide_data(portion=0.8, permu_flag=True, usage_ratio=1, n_seq=20): X, y = csv_reformat(permu_flag=True, n_seq=n_seq) n_samples, n_dimension = X.shape n_use_samples = int(ceil(n_samples * usage_ratio)) train_part = int(ceil(n_use_samples * portion)) X_train = X[:train_part, :] X_test = X[train_part:n_use_samples, :] y_train = y[:train_part] y_test = y[train_part:n_use_samples] X_train = np.reshape(X_train, (-1, X_train.shape[1]/n_seq, n_seq)) X_test = np.reshape(X_test, (-1, X_test.shape[1]/n_seq, n_seq)) return X_train, y_train, X_test, y_test