def run_dang(X_S, x_T, n_samples, window_shape, step, text_length): neighgen_operators = [ 'cxOnePoint', 'cxTwoPoint', 'cxUniform', 'sxSuppress' ] X_S_dt = [ TextRecord(x, window_shape=window_shape, step=step, text_length=text_length) for x in X_S ] x = x_T ts = time.time() x_dt = TextRecord(x, window_shape=window_shape, step=step, text_length=text_length) Z_dt = dang_neighborhood_generation(x_dt, X_S_dt, n_samples=n_samples, indpb=0.5, neighgen_op=neighgen_operators, base=None) Z = [z.data for z in Z_dt] # print(Z[0].tolist()) # return None run_train = time.time() - ts return Z, run_train
def main(): dataset = 'italypower' train_size = 1000 D = get_dataset(dataset, path_dataset) X_train, y_train, X_test, y_test = D['X_train'], D['y_train'], D['X_test'], D['y_test'] data_type = D['data_type'] if data_type == 'txt': X_train, X_test = D['X_train_txt'], D['X_test_txt'] if len(X_train) > train_size: idx = np.random.choice(len(X_train), size=train_size, replace=False) X_train = X_train[idx] idx = 0 if data_type == 'tab': X_train_dt = [TabularRecord(x) for x in X_train] x_dt = TabularRecord(X_test[idx]) elif data_type == 'ts': window_shape = D['window_sizes'][0] step = D['window_steps'][0] X_train_dt = [TimeSeriesRecord(x, window_shape=window_shape, step=step) for x in X_train] x_dt = TimeSeriesRecord(X_test[idx], window_shape=window_shape, step=step) elif data_type == 'img': window_shape = D['window_sizes'][2] step = D['window_steps'][2][0] X_train_dt = [ImageRecord(x, window_shape=window_shape, step=step) for x in X_train] x_dt = ImageRecord(X_test[idx], window_shape=window_shape, step=step) elif data_type == 'txt': window_shape = 3 step = 1 X_train_dt = [TextRecord(x, window_shape=window_shape, step=step, text_length=100) for x in X_train] x_dt = TextRecord(X_test[idx], window_shape=window_shape, step=step, text_length=100) else: raise ValueError('Unknown data type %s' % data_type) Z = dang_neighborhood_generation(x_dt, X_train_dt, n_samples=1000, indpb=0.5, neighgen_op=neighgen_operators, base=None) # Z = rand_neighborhood_generation(x_dt, X_train_dt, n_samples=1000, indpb=0.5) # Z = supp_neighborhood_generation(x_dt, 0.0, n_samples=1000, indpb=0.5) # Z = norm_neighborhood_generation(x_dt, X_train_dt, n_samples=1000, indpb=0.5) print(len(Z), np.mean(X_train), np.mean(Z)) plt.plot(x_dt.data.tolist(), lw=5) for i in range(10): plt.plot(Z[i*2].data.tolist()) plt.show()
def run_dang(X_S, x_T, n_samples): neighgen_operators = [ 'cxOnePoint', 'cxTwoPoint', 'cxUniform', 'cxBlend', 'cxUniformBlend', 'sxSuppress' ] X_S_dt = [TabularRecord(x) for x in X_S] x = x_T ts = time.time() x_dt = TabularRecord(x) Z_dt = dang_neighborhood_generation(x_dt, X_S_dt, n_samples=n_samples, indpb=0.5, neighgen_op=neighgen_operators, base=None) Z = [z.data for z in Z_dt] # print(Z[0].tolist()) # return None run_train = time.time() - ts return Z, run_train
def main(): dataset = 'wdbc' train_size = 1000 D = get_dataset(dataset, path_dataset, normalize) X_train, y_train, X_test, y_test = D['X_train'], D['y_train'], D[ 'X_test'], D['y_test'] data_type = D['data_type'] if data_type == 'txt': X_train, X_test = D['X_train_txt'], D['X_test_txt'] if len(X_train) > train_size: idx = np.random.choice(len(X_train), size=train_size, replace=False) X_train = X_train[idx] print(dataset, X_train.shape, data_type) idx = 0 if data_type == 'tab': X_train_dt = [TabularRecord(x) for x in X_train] x_dt = TabularRecord(X_test[idx]) elif data_type == 'ts': window_shape = D['window_sizes'][0] step = D['window_steps'][0] X_train_dt = [ TimeSeriesRecord(x, window_shape=window_shape, step=step) for x in X_train ] x_dt = TimeSeriesRecord(X_test[idx], window_shape=window_shape, step=step) elif data_type == 'img': window_shape = D['window_sizes'][2] step = D['window_steps'][2][0] X_train_dt = [ ImageRecord(x, window_shape=window_shape, step=step) for x in X_train ] x_dt = ImageRecord(X_test[idx], window_shape=window_shape, step=step) elif data_type == 'txt': window_shape = 3 step = 1 X_train_dt = [ TextRecord(x, window_shape=window_shape, step=step, text_length=100) for x in X_train ] x_dt = TextRecord(X_test[idx], window_shape=window_shape, step=step, text_length=100) else: raise ValueError('Unknown data type %s' % data_type) Z = dang_neighborhood_generation(x_dt, X_train_dt, n_samples=1000, indpb=0.5, neighgen_op=neighgen_operators, base=None) print(x_dt.data) print('----') for i in range(10): print(Z[i]) print('----')
def main(): # dataset = 'wdbc' # dataset = 'italypower' # dataset = 'mnist' dataset = '20newsgroups' D = get_dataset(dataset, path_dataset, normalize) X_train, y_train, X_test, y_test = D['X_train'], D['y_train'], D[ 'X_test'], D['y_test'] n_classes = D['n_classes'] data_type = D['data_type'] print(X_train.shape, data_type) if data_type == 'txt': X_train, X_test = D['X_train_txt'], D['X_test_txt'] idx = 0 if data_type == 'tab': X_train_dt = [TabularRecord(x) for x in X_train] x_dt = TabularRecord(X_test[idx]) elif data_type == 'ts': print(data_type, D['window_sizes'][0], D['window_steps'][0]) window_shape = D['window_sizes'][0] step = D['window_steps'][0] X_train_dt = [ TimeSeriesRecord(x, window_shape=window_shape, step=step) for x in X_train ] x_dt = TimeSeriesRecord(X_test[idx], window_shape=window_shape, step=step) elif data_type == 'img': print(data_type) window_shape = (14, 14) #D['window_sizes'][2] step = 7 #D['window_steps'][2][0] X_train_dt = [ ImageRecord(x, window_shape=window_shape, step=step) for x in X_train[:1000] ] x_dt = ImageRecord(X_test[idx], window_shape=window_shape, step=step) elif data_type == 'txt': print(data_type) window_shape = 3 step = 1 X_train_dt = [ TextRecord(x, window_shape=window_shape, step=step, text_length=100) for x in X_train[:1000] ] x_dt = TextRecord(X_test[idx], window_shape=window_shape, step=step, text_length=100) else: raise ValueError('Unknown data type %s' % data_type) # print(x_dt) Z = dang_neighborhood_generation(x_dt, X_train_dt, n_samples=1000, indpb=0.5, neighgen_op=neighgen_operators, base=None) print(len(Z)) # print(Z[3].data.shape) # print(x_dt[0].data) # plt.imshow(x_dt[0].data, cmap='gray') # plt.show() # print(Z[0]) # plt.imshow(Z[1].data, cmap='gray') # plt.show() # plt.plot(x_dt.data.tolist(), lw=5) # for i in range(10): # plt.plot(Z[i*2].data.tolist()) # plt.show() print(' '.join([term for term in x_dt.data if len(term) > 0])) print('----') for i in range(10): print(' '.join([term for term in Z[i].data if len(term) > 0])) print('----')
def main(): dataset = '20newsgroups' train_size = 1000 text_length = 1000 D = get_dataset(dataset, path_dataset, categories=['alt.atheism', 'talk.religion.misc']) X_train, y_train, X_test, y_test = D['X_train'], D['y_train'], D[ 'X_test'], D['y_test'] data_type = D['data_type'] if data_type == 'txt': X_train, X_test = D['X_train_txt'], D['X_test_txt'] if len(X_train) > train_size: idx = np.random.choice(len(X_train), size=train_size, replace=False) X_train = [x for i, x in enumerate(X_train) if i in idx] idx = 0 if data_type == 'tab': X_train_dt = [TabularRecord(x) for x in X_train] x_dt = TabularRecord(X_test[idx]) elif data_type == 'ts': window_shape = D['window_sizes'][0] step = D['window_steps'][0] X_train_dt = [ TimeSeriesRecord(x, window_shape=window_shape, step=step) for x in X_train ] x_dt = TimeSeriesRecord(X_test[idx], window_shape=window_shape, step=step) elif data_type == 'img': window_shape = D['window_sizes'][2] step = D['window_steps'][2][0] X_train_dt = [ ImageRecord(x, window_shape=window_shape, step=step) for x in X_train ] x_dt = ImageRecord(X_test[idx], window_shape=window_shape, step=step) elif data_type == 'txt': window_shape = 3 step = 3 X_train_dt = [ TextRecord(x, window_shape=window_shape, step=step, text_length=text_length) for x in X_train ] x_dt = TextRecord(X_test[idx], window_shape=window_shape, step=step, text_length=text_length) else: raise ValueError('Unknown data type %s' % data_type) Z = dang_neighborhood_generation(x_dt, X_train_dt, n_samples=1000, indpb=0.5, neighgen_op=neighgen_operators, base=None) # Z = rand_neighborhood_generation(x_dt, X_train_dt, n_samples=1000, indpb=0.5) # Z = supp_neighborhood_generation(x_dt, ' ', n_samples=1000, indpb=0.5) # Z = mode_neighborhood_generation(x_dt, X_train_dt, n_samples=1000, indpb=0.5) print(len(Z)) print(' '.join([term for term in x_dt.data if len(term) > 0])) print('----O------') for i in range(10): print(' '.join([term for term in Z[i].data if len(term) > 0])) print('----')