Beispiel #1
0
def run_dang(X_S, x_T, n_samples, window_shape, step, text_length):
    neighgen_operators = [
        'cxOnePoint', 'cxTwoPoint', 'cxUniform', 'sxSuppress'
    ]
    X_S_dt = [
        TextRecord(x,
                   window_shape=window_shape,
                   step=step,
                   text_length=text_length) for x in X_S
    ]

    x = x_T
    ts = time.time()
    x_dt = TextRecord(x,
                      window_shape=window_shape,
                      step=step,
                      text_length=text_length)
    Z_dt = dang_neighborhood_generation(x_dt,
                                        X_S_dt,
                                        n_samples=n_samples,
                                        indpb=0.5,
                                        neighgen_op=neighgen_operators,
                                        base=None)
    Z = [z.data for z in Z_dt]
    # print(Z[0].tolist())
    # return None
    run_train = time.time() - ts

    return Z, run_train
Beispiel #2
0
def main():

    dataset = 'italypower'
    train_size = 1000

    D = get_dataset(dataset, path_dataset)
    X_train, y_train, X_test, y_test = D['X_train'], D['y_train'], D['X_test'], D['y_test']

    data_type = D['data_type']
    if data_type == 'txt':
        X_train, X_test = D['X_train_txt'], D['X_test_txt']

    if len(X_train) > train_size:
        idx = np.random.choice(len(X_train), size=train_size, replace=False)
        X_train = X_train[idx]

    idx = 0
    if data_type == 'tab':
        X_train_dt = [TabularRecord(x) for x in X_train]
        x_dt = TabularRecord(X_test[idx])

    elif data_type == 'ts':
        window_shape = D['window_sizes'][0]
        step = D['window_steps'][0]
        X_train_dt = [TimeSeriesRecord(x, window_shape=window_shape, step=step) for x in X_train]
        x_dt = TimeSeriesRecord(X_test[idx], window_shape=window_shape, step=step)

    elif data_type == 'img':
        window_shape = D['window_sizes'][2]
        step = D['window_steps'][2][0]
        X_train_dt = [ImageRecord(x, window_shape=window_shape, step=step) for x in X_train]
        x_dt = ImageRecord(X_test[idx], window_shape=window_shape, step=step)

    elif data_type == 'txt':
        window_shape = 3
        step = 1
        X_train_dt = [TextRecord(x, window_shape=window_shape, step=step, text_length=100) for x in X_train]
        x_dt = TextRecord(X_test[idx], window_shape=window_shape, step=step, text_length=100)

    else:
        raise ValueError('Unknown data type %s' % data_type)

    Z = dang_neighborhood_generation(x_dt, X_train_dt, n_samples=1000, indpb=0.5, neighgen_op=neighgen_operators, base=None)

    # Z = rand_neighborhood_generation(x_dt, X_train_dt, n_samples=1000, indpb=0.5)

    # Z = supp_neighborhood_generation(x_dt, 0.0, n_samples=1000, indpb=0.5)

    # Z = norm_neighborhood_generation(x_dt, X_train_dt, n_samples=1000, indpb=0.5)

    print(len(Z), np.mean(X_train), np.mean(Z))

    plt.plot(x_dt.data.tolist(), lw=5)
    for i in range(10):
        plt.plot(Z[i*2].data.tolist())
    plt.show()
Beispiel #3
0
def run_dang(X_S, x_T, n_samples):
    neighgen_operators = [
        'cxOnePoint', 'cxTwoPoint', 'cxUniform', 'cxBlend', 'cxUniformBlend',
        'sxSuppress'
    ]
    X_S_dt = [TabularRecord(x) for x in X_S]

    x = x_T
    ts = time.time()
    x_dt = TabularRecord(x)
    Z_dt = dang_neighborhood_generation(x_dt,
                                        X_S_dt,
                                        n_samples=n_samples,
                                        indpb=0.5,
                                        neighgen_op=neighgen_operators,
                                        base=None)
    Z = [z.data for z in Z_dt]
    # print(Z[0].tolist())
    # return None
    run_train = time.time() - ts

    return Z, run_train
Beispiel #4
0
def main():
    dataset = 'wdbc'
    train_size = 1000

    D = get_dataset(dataset, path_dataset, normalize)
    X_train, y_train, X_test, y_test = D['X_train'], D['y_train'], D[
        'X_test'], D['y_test']

    data_type = D['data_type']

    if data_type == 'txt':
        X_train, X_test = D['X_train_txt'], D['X_test_txt']

    if len(X_train) > train_size:
        idx = np.random.choice(len(X_train), size=train_size, replace=False)
        X_train = X_train[idx]
    print(dataset, X_train.shape, data_type)

    idx = 0
    if data_type == 'tab':
        X_train_dt = [TabularRecord(x) for x in X_train]
        x_dt = TabularRecord(X_test[idx])

    elif data_type == 'ts':
        window_shape = D['window_sizes'][0]
        step = D['window_steps'][0]
        X_train_dt = [
            TimeSeriesRecord(x, window_shape=window_shape, step=step)
            for x in X_train
        ]
        x_dt = TimeSeriesRecord(X_test[idx],
                                window_shape=window_shape,
                                step=step)

    elif data_type == 'img':
        window_shape = D['window_sizes'][2]
        step = D['window_steps'][2][0]
        X_train_dt = [
            ImageRecord(x, window_shape=window_shape, step=step)
            for x in X_train
        ]
        x_dt = ImageRecord(X_test[idx], window_shape=window_shape, step=step)

    elif data_type == 'txt':
        window_shape = 3
        step = 1
        X_train_dt = [
            TextRecord(x,
                       window_shape=window_shape,
                       step=step,
                       text_length=100) for x in X_train
        ]
        x_dt = TextRecord(X_test[idx],
                          window_shape=window_shape,
                          step=step,
                          text_length=100)

    else:
        raise ValueError('Unknown data type %s' % data_type)

    Z = dang_neighborhood_generation(x_dt,
                                     X_train_dt,
                                     n_samples=1000,
                                     indpb=0.5,
                                     neighgen_op=neighgen_operators,
                                     base=None)

    print(x_dt.data)
    print('----')
    for i in range(10):
        print(Z[i])
        print('----')
Beispiel #5
0
def main():
    # dataset = 'wdbc'
    # dataset = 'italypower'
    # dataset = 'mnist'
    dataset = '20newsgroups'

    D = get_dataset(dataset, path_dataset, normalize)
    X_train, y_train, X_test, y_test = D['X_train'], D['y_train'], D[
        'X_test'], D['y_test']

    n_classes = D['n_classes']
    data_type = D['data_type']
    print(X_train.shape, data_type)
    if data_type == 'txt':
        X_train, X_test = D['X_train_txt'], D['X_test_txt']

    idx = 0
    if data_type == 'tab':
        X_train_dt = [TabularRecord(x) for x in X_train]
        x_dt = TabularRecord(X_test[idx])

    elif data_type == 'ts':
        print(data_type, D['window_sizes'][0], D['window_steps'][0])
        window_shape = D['window_sizes'][0]
        step = D['window_steps'][0]
        X_train_dt = [
            TimeSeriesRecord(x, window_shape=window_shape, step=step)
            for x in X_train
        ]
        x_dt = TimeSeriesRecord(X_test[idx],
                                window_shape=window_shape,
                                step=step)

    elif data_type == 'img':
        print(data_type)
        window_shape = (14, 14)  #D['window_sizes'][2]
        step = 7  #D['window_steps'][2][0]
        X_train_dt = [
            ImageRecord(x, window_shape=window_shape, step=step)
            for x in X_train[:1000]
        ]
        x_dt = ImageRecord(X_test[idx], window_shape=window_shape, step=step)

    elif data_type == 'txt':
        print(data_type)
        window_shape = 3
        step = 1
        X_train_dt = [
            TextRecord(x,
                       window_shape=window_shape,
                       step=step,
                       text_length=100) for x in X_train[:1000]
        ]
        x_dt = TextRecord(X_test[idx],
                          window_shape=window_shape,
                          step=step,
                          text_length=100)

    else:
        raise ValueError('Unknown data type %s' % data_type)

    # print(x_dt)
    Z = dang_neighborhood_generation(x_dt,
                                     X_train_dt,
                                     n_samples=1000,
                                     indpb=0.5,
                                     neighgen_op=neighgen_operators,
                                     base=None)
    print(len(Z))
    # print(Z[3].data.shape)

    # print(x_dt[0].data)
    # plt.imshow(x_dt[0].data, cmap='gray')
    # plt.show()

    # print(Z[0])
    # plt.imshow(Z[1].data, cmap='gray')
    # plt.show()

    # plt.plot(x_dt.data.tolist(), lw=5)
    # for i in range(10):
    #     plt.plot(Z[i*2].data.tolist())
    # plt.show()

    print(' '.join([term for term in x_dt.data if len(term) > 0]))
    print('----')
    for i in range(10):
        print(' '.join([term for term in Z[i].data if len(term) > 0]))
        print('----')
Beispiel #6
0
def main():

    dataset = '20newsgroups'
    train_size = 1000
    text_length = 1000

    D = get_dataset(dataset,
                    path_dataset,
                    categories=['alt.atheism', 'talk.religion.misc'])
    X_train, y_train, X_test, y_test = D['X_train'], D['y_train'], D[
        'X_test'], D['y_test']

    data_type = D['data_type']
    if data_type == 'txt':
        X_train, X_test = D['X_train_txt'], D['X_test_txt']

    if len(X_train) > train_size:
        idx = np.random.choice(len(X_train), size=train_size, replace=False)
        X_train = [x for i, x in enumerate(X_train) if i in idx]

    idx = 0
    if data_type == 'tab':
        X_train_dt = [TabularRecord(x) for x in X_train]
        x_dt = TabularRecord(X_test[idx])

    elif data_type == 'ts':
        window_shape = D['window_sizes'][0]
        step = D['window_steps'][0]
        X_train_dt = [
            TimeSeriesRecord(x, window_shape=window_shape, step=step)
            for x in X_train
        ]
        x_dt = TimeSeriesRecord(X_test[idx],
                                window_shape=window_shape,
                                step=step)

    elif data_type == 'img':
        window_shape = D['window_sizes'][2]
        step = D['window_steps'][2][0]
        X_train_dt = [
            ImageRecord(x, window_shape=window_shape, step=step)
            for x in X_train
        ]
        x_dt = ImageRecord(X_test[idx], window_shape=window_shape, step=step)

    elif data_type == 'txt':
        window_shape = 3
        step = 3
        X_train_dt = [
            TextRecord(x,
                       window_shape=window_shape,
                       step=step,
                       text_length=text_length) for x in X_train
        ]
        x_dt = TextRecord(X_test[idx],
                          window_shape=window_shape,
                          step=step,
                          text_length=text_length)

    else:
        raise ValueError('Unknown data type %s' % data_type)

    Z = dang_neighborhood_generation(x_dt,
                                     X_train_dt,
                                     n_samples=1000,
                                     indpb=0.5,
                                     neighgen_op=neighgen_operators,
                                     base=None)

    # Z = rand_neighborhood_generation(x_dt, X_train_dt, n_samples=1000, indpb=0.5)

    # Z = supp_neighborhood_generation(x_dt, ' ', n_samples=1000, indpb=0.5)

    # Z = mode_neighborhood_generation(x_dt, X_train_dt, n_samples=1000, indpb=0.5)

    print(len(Z))

    print(' '.join([term for term in x_dt.data if len(term) > 0]))
    print('----O------')
    for i in range(10):
        print(' '.join([term for term in Z[i].data if len(term) > 0]))
        print('----')