import pandas as pd
from unittest2 import TestCase  # or `from unittest import ...` if on Python 3.4+
import category_encoders.tests.helpers as th
import numpy as np
import warnings
import category_encoders as encoders

np_X = th.create_array(n_rows=100)
np_X_t = th.create_array(n_rows=50, extras=True)
np_y = np.random.randn(np_X.shape[0]) > 0.5
np_y_t = np.random.randn(np_X_t.shape[0]) > 0.5
X = th.create_dataset(n_rows=100)
X_t = th.create_dataset(n_rows=50, extras=True)
y = pd.DataFrame(np_y)
y_t = pd.DataFrame(np_y_t)


class TestOrdinalEncoder(TestCase):
    def test_ordinal(self):

        enc = encoders.OrdinalEncoder(verbose=1, return_df=True)
        enc.fit(X)
        out = enc.transform(X_t)
        self.assertEqual(len(set(out['extra'].values)), 4)
        self.assertIn(-1, set(out['extra'].values))
        self.assertFalse(enc.mapping is None)
        self.assertTrue(len(enc.mapping) > 0)

        enc = encoders.OrdinalEncoder(verbose=1,
                                      mapping=enc.mapping,
                                      return_df=True)
# sampling rate of cpu utilization, smaller for more accurate
cpu_sampling_rate = 0.2

# loop times of benchmarking in every encoding, larger for more accurate but longer benchmarking time
benchmark_repeat = 3

# sample num of data
data_lines = 10000

# benchmarking result format
result_cols = ['encoder', 'used_processes', 'X_shape', 'min_time(s)', 'average_time(s)', 'max_cpu_utilization(%)', 'average_cpu_utilization(%)']
results = []
cpu_utilization = multiprocessing.Manager().Queue()

# define data_set
np_X = th.create_array(n_rows=data_lines)
np_y = np.random.randn(np_X.shape[0]) > 0.5
X = th.create_dataset(n_rows=data_lines)
X_t = th.create_dataset(n_rows=int(data_lines / 2), extras=True)

cols = ['unique_str', 'underscore', 'extra', 'none', 'invariant', 321, 'categorical', 'na_categorical']


def get_cpu_utilization():
    """
    new process for recording cpu utilization
    record cpu utilization every [cpu_sampling_rate] second & calculate its mean value
    the value is the cpu utilization during every encoding
    """
    global cpu_utilization
    psutil.cpu_percent(None)