def test_data_health_check(self):
     self.assertDictEqual(d1={
         'cases': [],
         'features': ['J', 'K']
     },
                          d2=DataExplorer(df=DATA_SET,
                                          plot=False).data_health_check())
 def test_data_distribution(self):
     _data_distribution: dict = DataExplorer(
         df=DATA_SET, plot=False).data_distribution()
     _sample_results: dict = dict(F=_data_distribution['F'].get('Hamburg'),
                                  C=_data_distribution['C'].get('mean'))
     self.assertDictEqual(d1=dict(F=4, C=0.49000000000000005),
                          d2=_sample_results)
 def test_get_feature_types(self):
     self.assertDictEqual(d1={
         'continuous': ['C', 'G', 'H'],
         'categorical': ['A', 'B', 'F', 'I', 'J', 'K'],
         'ordinal': [],
         'date': ['D'],
         'text': ['E']
     },
                          d2=DataExplorer(df=DATA_SET,
                                          plot=False).get_feature_types())
 def test_data_typing(self):
     self.assertDictEqual(d1={
         'B': 'int',
         'D': 'datetime',
         'F': 'int',
         'I': 'int',
         'J': 'int',
         'K': 'int'
     },
                          d2=DataExplorer(df=DATA_SET,
                                          plot=False).data_typing())
Ejemplo n.º 5
0
import pandas as pd
import unittest

from easyexplore.data_explorer import DataExplorer
from easyexplore.text_miner import TextMiner

DATA_SET: pd.DataFrame = pd.read_csv(
    filepath_or_buffer='amazon_musical_instruments_reviews.csv')
ID_TEXT: dict = DataExplorer(df=DATA_SET).get_feature_types()
TEXT_MINER: TextMiner = TextMiner(df=DATA_SET,
                                  features=ID_TEXT.get('id_text'),
                                  lang='en',
                                  auto_interpret_natural_language=True)


class TextMinerTest(unittest.TestCase):
    """
    Unit test for class TextMiner
    """
    def test_clustering(self):
        pass

    def test_detect_lang(self):
        _lang_feature: int = len(
            TEXT_MINER.get_str_match(cases=list(TEXT_MINER.df.keys()),
                                     substring='_lang'))
        TEXT_MINER.detect_lang(sampling=True)
        self.assertTrue(expr=_lang_feature == 0 and len(
            TEXT_MINER.get_str_match(cases=list(TEXT_MINER.df.keys()),
                                     substring='_lang')) > 0)
 def test_text_analyzer(self):
     self.assertTrue(expr=DataExplorer(df=TEXT_DATA).text_analyzer(
         lang='en').shape[1] > 0)
 def test_break_down(self):
     self.assertEqual(first=0.49000000000000005,
                      second=DataExplorer(df=DATA_SET,
                                          plot=False).break_down()
                      ['continuous']['J']['de']['C'].get('mean'))