Python get_all_oversamplers 예제들, smote_variants.get_all_oversamplers Python 예제들

예제 #1

0

파일 보기

파일: study.py 프로젝트: zhanggaofeng1120/smote_variants

def oversampler_summary_table_thin():
    """
    Creates the oversampler summary table.
    """
    oversamplers= sv.get_all_oversamplers()
    oversamplers.remove(sv.NoSMOTE)
    
    oversampling_bibtex= {o.__name__: extract_bibtex_entry(o.__doc__) for o in oversamplers}
    oversampling_years= {o.__name__: oversampling_bibtex[o.__name__]['year'] for o in oversamplers}
    
    oversamplers= sorted(oversamplers, key= lambda x: oversampling_years[x.__name__])
    
    cat_summary= []
    for o in oversamplers:
        cat_summary.append({'method': o.__name__.replace('_', '-') + ' citep(' + oversampling_bibtex[o.__name__]['key'] + '))'})
    
    pd.set_option('max_colwidth', 100)
    cat_summary= pd.DataFrame(cat_summary)
    cat_summary= cat_summary[['method']]
    cat_summary.index= np.arange(1, len(cat_summary) + 1)
    cat_summary_first= cat_summary.iloc[:int(len(cat_summary)/2+0.5)].reset_index()
    cat_summary_second= cat_summary.iloc[int(len(cat_summary)/2+0.5):].reset_index()

    cat_summary_second['index']= cat_summary_second['index'].astype(str)
    results= pd.concat([cat_summary_first, cat_summary_second], axis= 1)
    
    res= results.to_latex(index= False)
    res= res.replace('index', '')
    res= res.replace('\\toprule', '')
    res= res.replace('citep(', '\\citep{')
    res= res.replace('))', '}')
    res= res.replace('\_', '_')
    res= res.replace('NaN', '')

    print(res)

예제 #2

0

파일 보기

def create_documentation_page_os():
    oversamplers = sv.get_all_oversamplers()

    docs = "Oversamplers\n"
    docs = docs + "*" * len("Oversamplers") + "\n\n"

    for o in oversamplers:
        docs = docs + o.__name__ + "\n" + '-' * len(o.__name__) + "\n"
        docs = docs + "\n\n"
        docs = docs + "API\n"
        docs = docs + "^" * len("API") + "\n\n"
        docs = docs + ('.. autoclass:: smote_variants::%s' % o.__name__) + "\n"
        docs = docs + ('    :members:') + "\n"
        docs = docs + "\n"
        docs = docs + ('    .. automethod:: __init__')
        docs = docs + "\n\n"
        docs = docs + "Example\n"
        docs = docs + "^" * len("Example")
        docs = docs + "\n\n"
        docs = docs + ("    >>> oversampler= smote_variants.%s()\n" %
                       o.__name__)
        docs = docs + "    >>> X_samp, y_samp= oversampler.sample(X, y)\n"
        docs = docs + "\n\n"
        docs = docs + ".. image:: figures/base.png" + "\n"
        docs = docs + (".. image:: figures/%s.png" % o.__name__) + "\n\n"
        docs = docs + o.__doc__.replace("\n    ", "\n")
        docs = docs + "\n\n"

    file = open("oversamplers.rst", "w")
    file.write(docs)
    file.close()

    return docs

예제 #3

0

파일 보기

def generate_multiclass_figures():
    oversamplers = sv.get_all_oversamplers()
    oversamplers = [
        o for o in oversamplers
        if not sv.OverSampling.cat_changes_majority in o.categories
        and 'proportion' in o().get_params()
    ]

    import sklearn.datasets as datasets

    dataset = datasets.load_wine()

    X = dataset['data']
    y = dataset['target']

    import matplotlib.pyplot as plt

    import sklearn.preprocessing as preprocessing

    ss = preprocessing.StandardScaler()

    X_ss = ss.fit_transform(X)

    def plot_and_save(X, y, filename, oversampler_name):
        plt.figure(figsize=(4, 3))
        plt.scatter(X[y == 0][:, 0],
                    X[y == 0][:, 1],
                    c='r',
                    marker='o',
                    label='class 0')
        plt.scatter(X[y == 1][:, 0],
                    X[y == 1][:, 1],
                    c='b',
                    marker='P',
                    label='class 1')
        plt.scatter(X[y == 2][:, 0],
                    X[y == 2][:, 1],
                    c='green',
                    marker='x',
                    label='class 2')
        plt.xlabel('feature 0')
        plt.ylabel('feature 1')
        plt.title(", ".join(["wine dataset", oversampler_name]))
        plt.savefig(filename)
        plt.show()

    plot_and_save(X, y, 'figures/multiclass-base.png', "No Oversampling")

    for o in oversamplers:
        print(o.__name__)
        mcos = sv.MulticlassOversampling(o())
        X_samp, y_samp = mcos.sample(X_ss, y)
        plot_and_save(ss.inverse_transform(X_samp), y_samp,
                      "figures/multiclass-%s" % o.__name__, o.__name__)

예제 #4

0

파일 보기

def test_1_min_1_maj():
    X = np.array([[1.0, 1.1], [1.55, 1.55]])

    y = np.array([0, 1])

    samplers = sv.get_all_oversamplers()

    for s in samplers:
        logging.info("testing %s" % str(s))
        X_samp, y_samp = validation(s(), X, y)
        assert len(X_samp) > 0

예제 #5

0

파일 보기

파일: tests.py 프로젝트: zhanggaofeng1120/smote_variants

    def test_some_min_some_maj(self):
        X = np.array([[1.0, 1.1], [1.1, 1.2], [1.05, 1.1], [1.08, 1.05],
                      [1.1, 1.08], [1.5, 1.6], [1.55, 1.55]])

        y = np.array([0, 0, 0, 0, 0, 1, 1])

        samplers = sv.get_all_oversamplers()

        for s in samplers:
            logging.info("testing %s" % str(s))
            X_samp, y_samp = validation(s(), X, y)
            self.assertTrue(len(X_samp) > 0)

예제 #6

0

파일 보기

def test_same_num():
    X = np.array([[1.0, 1.1], [1.1, 1.2], [1.05, 1.1], [1.1, 1.08], [1.5, 1.6],
                  [1.55, 1.55], [1.5, 1.62], [1.55, 1.51]])

    y = np.array([0, 0, 0, 0, 1, 1, 1, 1])

    samplers = sv.get_all_oversamplers()

    for s in samplers:
        logging.info("testing %s" % str(s))
        X_samp, y_samp = validation(s(), X, y)
        assert len(X_samp) > 0

예제 #7

0

파일 보기

def test_parameters():
    samplers = sv.get_all_oversamplers()

    for s in samplers:
        logging.info("testing %s" % str(s))
        par_comb = s.parameter_combinations()
        if len(par_comb) > 0:
            original_parameters = np.random.choice(par_comb)
            sampler = s(**original_parameters)
            parameters = sampler.get_params()

            for x in original_parameters:
                assert parameters[x] == original_parameters[x]

예제 #8

0

파일 보기

def generate_figures():
    oversamplers = sv.get_all_oversamplers()

    for o in oversamplers:
        ballpark_sample(o(),
                        img_file_base='figures/base.png',
                        img_file_sampled=('figures/%s.png' % o.__name__))

    noisefilters = sv.get_all_noisefilters()

    for n in noisefilters:
        ballpark_sample(n(),
                        img_file_base='figures/base.png',
                        img_file_sampled=('figures/%s.png' % n.__name__))

예제 #9

0

파일 보기

파일: study.py 프로젝트: zhanggaofeng1120/smote_variants

def oversampling_bib_lookup():
    """
    Creates a bibtex lookup table for oversampling techniques based on
    the bibtex entries in the source code.
    
    Returns:
        dict: a lookup table for bibtex entries
    """
    oversamplers= sv.get_all_oversamplers()
    if sv.NoSMOTE in oversamplers:
        oversamplers.remove(sv.NoSMOTE)
    
    oversampling_bibtex= {o.__name__: extract_bibtex_entry(o.__doc__) for o in oversamplers}
    
    return oversampling_bibtex

예제 #10

0

파일 보기

파일: tests.py 프로젝트: zhanggaofeng1120/smote_variants

    def test_normal(self):
        X = np.vstack([data_min, data_maj])
        y = np.hstack(
            [np.repeat(1, len(data_min)),
             np.repeat(0, len(data_maj))])

        samplers = sv.get_all_oversamplers()

        for s in samplers:
            logging.info("testing %s" % str(s))
            X_samp, y_samp = s().sample(X, y)
            self.assertTrue(len(X_samp) > 0)

        samplers_plus = [
            sv.polynom_fit_SMOTE(topology='star'),
            sv.polynom_fit_SMOTE(topology='bus'),
            sv.polynom_fit_SMOTE(topology='mesh'),
            sv.polynom_fit_SMOTE(topology='poly_2'),
            sv.Stefanowski(strategy='weak_amp'),
            sv.Stefanowski(strategy='weak_amp_relabel'),
            sv.Stefanowski(strategy='strong_amp'),
            sv.G_SMOTE(method='non-linear_2.0'),
            sv.SMOTE_PSOBAT(method='pso'),
            sv.AHC(strategy='maj'),
            sv.AHC(strategy='minmaj'),
            sv.SOI_CJ(method='jittering'),
            sv.ADG(kernel='rbf_1'),
            sv.SMOTE_IPF(voting='consensus'),
            sv.ASMOBD(smoothing='sigmoid')
        ]

        for s in samplers_plus:
            logging.info("testing %s" % str(s.__class__.__name__))
            X_samp, y_samp = s.sample(X, y)
            self.assertTrue(len(X_samp) > 0)

        nf = sv.get_all_noisefilters()

        for n in nf:
            logging.info("testing %s" % str(n))
            X_nf, y_nf = n().remove_noise(X, y)
            self.assertTrue(len(X_nf) > 0)

예제 #11

0

파일 보기

파일: tests.py 프로젝트: zhanggaofeng1120/smote_variants

    def test_reproducibility(self):
        X = np.vstack([data_min, data_maj])
        y = np.hstack(
            [np.repeat(1, len(data_min)),
             np.repeat(0, len(data_maj))])

        samplers = sv.get_all_oversamplers()

        for s in samplers:
            logging.info("testing %s" % str(s))

            X_orig = X.copy()
            y_orig = y.copy()

            X_samp_a, y_samp_a = s(random_state=5).sample(X, y)
            sampler = s(random_state=5)
            X_samp_b, y_samp_b = sampler.sample(X, y)
            X_samp_c, y_samp_c = s(**sampler.get_params()).sample(X, y)

            self.assertTrue(np.array_equal(X_samp_a, X_samp_b))
            self.assertTrue(np.array_equal(X_samp_a, X_samp_c))
            self.assertTrue(np.array_equal(X_orig, X))

        samplers_plus = [
            sv.polynom_fit_SMOTE(topology='star', random_state=5),
            sv.polynom_fit_SMOTE(topology='bus', random_state=5),
            sv.polynom_fit_SMOTE(topology='mesh', random_state=5),
            sv.polynom_fit_SMOTE(topology='poly_2', random_state=5),
            sv.Stefanowski(strategy='weak_amp', random_state=5),
            sv.Stefanowski(strategy='weak_amp_relabel', random_state=5),
            sv.Stefanowski(strategy='strong_amp', random_state=5),
            sv.G_SMOTE(method='non-linear_2.0', random_state=5),
            sv.SMOTE_PSOBAT(method='pso', random_state=5),
            sv.AHC(strategy='maj', random_state=5),
            sv.AHC(strategy='minmaj', random_state=5),
            sv.SOI_CJ(method='jittering', random_state=5),
            sv.ADG(kernel='rbf_1', random_state=5),
            sv.SMOTE_IPF(voting='consensus', random_state=5),
            sv.ASMOBD(smoothing='sigmoid', random_state=5)
        ]

        for s in samplers_plus:
            logging.info("testing %s" % str(s.__class__.__name__))

            X_orig = X.copy()
            y_orig = y.copy()
            X_samp_a, y_samp_a = s.sample(X, y)
            sc = s.__class__(**s.get_params())

            X_samp_b, y_samp_b = sc.sample(X, y)

            self.assertTrue(np.array_equal(X_samp_a, X_samp_b))
            self.assertTrue(np.array_equal(X_orig, X))

        nf = sv.get_all_noisefilters()

        for n in nf:
            logging.info("testing %s" % str(n))
            X_orig, y_orig = X.copy(), y.copy()

            nf = n()
            X_nf_a, y_nf_a = nf.remove_noise(X, y)
            nf_b = n(**nf.get_params())
            X_nf_b, y_nf_b = nf_b.remove_noise(X, y)

            self.assertTrue(np.array_equal(X_nf_a, X_nf_b))
            self.assertTrue(np.array_equal(X_orig, X))

예제 #12

0

파일 보기

def test_normal():
    data_min = np.array([[5.7996138, -0.25574582], [3.0637093, 2.11750874],
                         [4.91444087, -0.72380123], [1.06414164, 0.08694243],
                         [2.59071708, 0.75283568], [3.44834937, 1.46118085],
                         [2.8036378, 0.69553702], [3.57901791, 0.71870743],
                         [3.81529064, 0.62580927], [3.05005506, 0.33290343],
                         [1.83674689, 1.06998465], [2.08574889, -0.32686821],
                         [3.49417022, -0.92155623], [2.33920982, -1.59057568],
                         [1.95332431, -0.84533309], [3.35453368, -1.10178101],
                         [4.20791149, -1.41874985], [2.25371221, -1.45181929],
                         [2.87401694, -0.74746037], [1.84435381, 0.15715329]])

    data_maj = np.array([[-1.40972752, 0.07111486], [-1.1873495, -0.20838002],
                         [0.51978825, 2.1631319], [-0.61995016, -0.45111475],
                         [2.6093289, -0.40993063], [-0.06624482, -0.45882838],
                         [-0.28836659, -0.59493865], [0.345051, 0.05188811],
                         [1.75694985, 0.16685025], [0.52901288, -0.62341735],
                         [0.09694047, -0.15811278], [-0.37490451, -0.46290818],
                         [-0.32855088,
                          -0.20893795], [-0.98508364, -0.32003935],
                         [0.07579831, 1.36455355], [-1.44496689, -0.44792395],
                         [1.17083343, -0.15804265], [1.73361443, -0.06018163],
                         [-0.05139342, 0.44876765], [0.33731075, -0.06547923],
                         [-0.02803696, 0.5802353], [0.20885408, 0.39232885],
                         [0.22819482, 2.47835768], [1.48216063, 0.81341279],
                         [-0.6240829, -0.90154291], [0.54349668, 1.4313319],
                         [-0.65925018, 0.78058634], [-1.65006105, -0.88327625],
                         [-1.49996313, -0.99378106], [0.31628974, -0.41951526],
                         [0.64402186, 1.10456105], [-0.17725369, -0.67939216],
                         [0.12000555, -1.18672234], [2.09793313, 1.82636262],
                         [-0.11711376, 0.49655609], [1.40513236, 0.74970305],
                         [2.40025472, -0.5971392], [-1.04860983, 2.05691699],
                         [0.74057019, -1.48622202], [1.32230881, -2.36226588],
                         [-1.00093975,
                          -0.44426212], [-2.25927766, -0.55860504],
                         [-1.12592836, -0.13399132], [0.14500925, -0.89070934],
                         [0.90572513, 1.23923502], [-1.25416346, -1.49100593],
                         [0.51229813, 1.54563048], [-1.36854287, 0.0151081],
                         [0.08169257, -0.69722099], [-0.73737846, 0.42595479],
                         [0.02465411, -0.36742946], [-1.14532211, -1.23217124],
                         [0.98038343, 0.59259824], [-0.20721222, 0.68062552],
                         [-2.21596433, -1.96045872], [-1.20519292, -1.8900018],
                         [0.47189299, -0.4737293], [1.18196143, 0.85320018],
                         [0.03255894, -0.77687178], [0.32485141, -0.34609381]])

    X = np.vstack([data_min, data_maj])
    y = np.hstack([np.repeat(1, len(data_min)), np.repeat(0, len(data_maj))])

    samplers = sv.get_all_oversamplers()

    for s in samplers:
        logging.info("testing %s" % str(s))
        X_samp, y_samp = s().sample(X, y)
        assert len(X_samp) > 0

    samplers_plus = [
        sv.polynom_fit_SMOTE(topology='star'),
        sv.polynom_fit_SMOTE(topology='bus'),
        sv.polynom_fit_SMOTE(topology='mesh'),
        sv.polynom_fit_SMOTE(topology='poly_2'),
        sv.Stefanowski(strategy='weak_amp'),
        sv.Stefanowski(strategy='weak_amp_relabel'),
        sv.Stefanowski(strategy='strong_amp'),
        sv.G_SMOTE(method='non-linear_2.0'),
        sv.SMOTE_PSOBAT(method='pso'),
        sv.AHC(strategy='maj'),
        sv.AHC(strategy='minmaj'),
        sv.SOI_CJ(method='jittering'),
        sv.ADG(kernel='rbf_1'),
        sv.SMOTE_IPF(voting='consensus'),
        sv.ASMOBD(smoothing='sigmoid')
    ]

    for s in samplers_plus:
        logging.info("testing %s" % str(s.__class__.__name__))
        X_samp, y_samp = s.sample(X, y)
        assert len(X_samp) > 0

    nf = sv.get_all_noisefilters()

    for n in nf:
        logging.info("testing %s" % str(n))
        X_nf, y_nf = n().remove_noise(X, y)
        assert len(X_nf) > 0

예제 #13

0

파일 보기

def test_queries():
    assert len(sv.get_all_oversamplers()) > 0
    assert len(sv.get_all_noisefilters()) > 0
    assert len(sv.get_n_quickest_oversamplers(5)) == 5
    assert len(sv.get_all_oversamplers_multiclass()) > 0
    assert len(sv.get_n_quickest_oversamplers_multiclass(5)) == 5

예제 #14

0

파일 보기

def create_gallery_page():
    oversamplers = sv.get_all_oversamplers()
    noise_filters = sv.get_all_noisefilters()

    docs = "Gallery\n" + '*' * len('Gallery\n') + "\n\n"

    docs = docs + "In this page, we demonstrate the output of various oversampling \
                    and noise removal techniques, using default parameters.\n\n"

    docs = docs + "For binary oversampling and nosie removal, an artificial database was used, available in the ``utils` directory of the github repository.\n\n"
    #docs= docs + "For binary oversampling and noise removal, the figures can be reproduced by the ``ballpark_sample`` function using \
    #                a built-in or user definied dataset:\n\n"
    #docs= docs + ".. autofunction:: smote_variants.ballpark_sample\n\n"

    docs = docs + "For multiclass oversampling we have used the 'wine' dataset from \
                    ``sklearn.datasets``, which has 3 classes and many features, out \
                    which the first two coordinates have been used for visualization.\n\n"

    docs = docs + "Oversampling sample results\n"
    docs = docs + "=" * len('Oversampling sample results\n') + "\n\n"

    docs = docs + "In the captions of the images some abbreviations \
                    referring to the operating principles are placed. Namely:\n\n"

    docs = docs + "    * NR: noise removal is involved\n"
    docs = docs + "    * DR: dimension reduction is applied\n"
    docs = docs + "    * Clas: some supervised classifier is used\n"
    docs = docs + "    * SCmp: sampling is carried out componentwise (attributewise)\n"
    docs = docs + "    * SCpy: sampling is carried out by copying instances\n"
    docs = docs + "    * SO: ordinary sampling (just like in SMOTE)\n"
    docs = docs + "    * M: memetic optimization is used\n"
    docs = docs + "    * DE: density estimation is used\n"
    docs = docs + "    * DB: density based - the sampling is based on a density of importance assigned to the instances\n"
    docs = docs + "    * Ex: the sampling is extensive - samples are added successively, not optimizing the holistic distribution of a given number of samples\n"
    docs = docs + "    * CM: changes majority - even majority samples can change\n"
    docs = docs + "    * Clus: uses some clustering technique\n"
    docs = docs + "    * BL: identifies and samples the neighborhoods of borderline samples\n"
    docs = docs + "    * A: developed for a specific application\n"

    docs = docs + "\n"
    docs = docs + ".. figure:: figures/base.png" + "\n\n\n"

    i = 0
    for o in oversamplers:
        docs = docs + (".. image:: figures/%s.png\n" % o.__name__)
        i = i + 1
        if i % 4 == 0:
            docs = docs + "\n"

    docs = docs + "Noise removal sample results\n"
    docs = docs + "=" * len('Noise removal sample results\n') + "\n\n"

    docs = docs + ".. figure:: figures/base.png" + "\n\n\n"

    i = 0
    for n in noise_filters:
        docs = docs + (".. image:: figures/%s.png\n" % n.__name__)
        i = i + 1
        if i % 4 == 0:
            docs = docs + "\n"

    docs = docs + "Multiclass sample results\n"
    docs = docs + "=" * len('Multiclass sample results\n') + "\n\n"

    docs = docs + ".. figure:: figures/multiclass-base.png" + "\n\n\n"

    oversamplers = [
        o for o in oversamplers
        if not sv.OverSampling.cat_changes_majority in o.categories
        and 'proportion' in o().get_params()
    ]

    i = 0
    for o in oversamplers:
        docs = docs + (".. image:: figures/multiclass-%s.png\n" % o.__name__)
        i = i + 1
        if i % 4 == 0:
            docs = docs + "\n"

    file = open("gallery.rst", "w")
    file.write(docs)
    file.close()

    return docs

예제 #15

0

파일 보기

파일: tests.py 프로젝트: zhanggaofeng1120/smote_variants

 def test_queries(self):
     self.assertTrue(len(sv.get_all_oversamplers()) > 0)
     self.assertTrue(len(sv.get_all_noisefilters()) > 0)
     self.assertTrue(len(sv.get_n_quickest_oversamplers(5)) == 5)
     self.assertTrue(len(sv.get_all_oversamplers_multiclass()) > 0)
     self.assertTrue(len(sv.get_n_quickest_oversamplers_multiclass(5)) == 5)

예제 #16

0

파일 보기

        MLPClassifierWrapper(activation=x[0], hidden_layer_fraction=x[1]))

nn_classifiers = []
for x in itertools.product([3, 5, 7], ['uniform', 'distance'], [1, 2, 3]):
    nn_classifiers.append(
        KNeighborsClassifier(n_neighbors=x[0], weights=x[1], p=x[2]))

dt_classifiers = []
for x in itertools.product(['gini', 'entropy'], [None, 3, 5]):
    dt_classifiers.append(
        DecisionTreeClassifier(criterion=x[0], max_depth=x[1]))

classifiers = []
classifiers.extend(sv_classifiers)
classifiers.extend(mlp_classifiers)
classifiers.extend(nn_classifiers)
classifiers.extend(dt_classifiers)

datasets = imbd.get_data_loaders('study')

# instantiate the validation object
results = sv.evaluate_oversamplers(
    datasets,
    samplers=sv.get_all_oversamplers(),
    classifiers=classifiers,
    cache_path=folding_path,
    n_jobs=n_jobs,
    remove_sampling_cache=True,
    max_n_sampler_parameters=max_sampler_parameter_combinations)

print(results)

예제 #17

0

파일 보기

파일: study.py 프로젝트: zhanggaofeng1120/smote_variants

def oversampler_summary_table():
    """
    Creates the oversampler summary table.
    """
    oversamplers= sv.get_all_oversamplers()
    oversamplers.remove(sv.NoSMOTE)

    all_categories= [sv.OverSampling.cat_noise_removal,
                        sv.OverSampling.cat_dim_reduction,
                        sv.OverSampling.cat_uses_classifier,
                        sv.OverSampling.cat_sample_componentwise,
                        sv.OverSampling.cat_sample_ordinary,
                        sv.OverSampling.cat_sample_copy,
                        sv.OverSampling.cat_memetic,
                        sv.OverSampling.cat_density_estimation,
                        sv.OverSampling.cat_density_based,
                        sv.OverSampling.cat_extensive,
                        sv.OverSampling.cat_changes_majority,
                        sv.OverSampling.cat_uses_clustering,
                        sv.OverSampling.cat_borderline,
                        sv.OverSampling.cat_application]
    
    for o in oversamplers:
        sys.stdout.write(o.__name__ + " ")
        sys.stdout.write("& ")
        for i in range(len(all_categories)):
            if all_categories[i] in o.categories:
                sys.stdout.write("$\\times$ ")
            else:
                sys.stdout.write(" ")
            if i != len(all_categories)-1:
                sys.stdout.write("& ")
            else:
                print("\\\\")
    
    oversampling_bibtex= {o.__name__: extract_bibtex_entry(o.__doc__) for o in oversamplers}
    oversampling_years= {o.__name__: oversampling_bibtex[o.__name__]['year'] for o in oversamplers}
    
    oversamplers= sorted(oversamplers, key= lambda x: oversampling_years[x.__name__])
    
    cat_summary= []
    for o in oversamplers:
        cat_summary.append({'method': o.__name__.replace('_', '-') + ' (' + oversampling_years[o.__name__] + ')' + 'cite(' + oversampling_bibtex[o.__name__]['key'] + '))'})
        for a in all_categories:
            cat_summary[-1][a]= str(a in o.categories)
    
    pd.set_option('max_colwidth', 100)
    cat_summary= pd.DataFrame(cat_summary)
    cat_summary= cat_summary[['method'] + all_categories]
    cat_summary.index= np.arange(1, len(cat_summary) + 1)
    cat_summary_first= cat_summary.iloc[:int(len(cat_summary)/2+0.5)].reset_index()
    cat_summary_second= cat_summary.iloc[int(len(cat_summary)/2+0.5):].reset_index()

    cat_summary_second['index']= cat_summary_second['index'].astype(str)
    results= pd.concat([cat_summary_first, cat_summary_second], axis= 1)
    
    res= results.to_latex(index= False)
    res= res.replace('True', '$\\times$').replace('False', '')
    prefix= '\\begin{turn}{90}'
    postfix= '\\end{turn}'
    res= res.replace(' NR ', prefix + 'noise removal' + postfix)
    res= res.replace(' DR ', prefix + 'dimension reduction' + postfix)
    res= res.replace(' Clas ', prefix + 'uses classifier' + postfix)
    res= res.replace(' SCmp ', prefix + 'componentwise sampling' + postfix)
    res= res.replace(' SCpy ', prefix + 'sampling by cloning' + postfix)
    res= res.replace(' SO ', prefix + 'ordinary sampling' + postfix)
    res= res.replace(' M ', prefix + 'memetic' + postfix)
    res= res.replace(' DE ', prefix + 'density estimation' + postfix)
    res= res.replace(' DB ', prefix + 'density based' + postfix)
    res= res.replace(' Ex ', prefix + 'extensive' + postfix)
    res= res.replace(' CM ', prefix + 'changes majority' + postfix)
    res= res.replace(' Clus ', prefix + 'uses clustering' + postfix)
    res= res.replace(' BL ', prefix + 'borderline' + postfix)
    res= res.replace(' A ', prefix + 'application' + postfix)
    res= res.replace('index', '')
    res= res.replace('\\toprule', '')
    res= res.replace('cite(', '\\cite{')
    res= res.replace('))', '}')
    res= res.replace('\_', '_')
    res= res.replace('NaN', '')

    print(res)