def __init__(self, file_name: str, name: str): self.__file_name = file_name self.__name = Utils.replace_redundant_ws(name).lower() self.__cleaned_name = cleanco(self.__name).clean_name() self.__tokens = set(self.__cleaned_name.split(" ")) if len(self.tokens) == 0: raise AssertionError("length of list is zero")
def create_mapper_from_dataframe(dataframe: DataFrame): dataframe.columns = Index( map(Utils.normalize_string, dataframe.columns)) if not set(CompanyMapper.COLUMN_NAMES).issubset(set( dataframe.columns)): raise AssertionError("necessary columns do not exist") dataframe = dataframe[CompanyMapper.COLUMN_NAMES] name_to_group: Dict[CompanyNameWithFileName, int] = {} for _, row in dataframe.iterrows(): name = CompanyNameWithFileName( row[CompanyMapper.COLUMN_NAMES[0]], Utils.normalize_string(row[CompanyMapper.COLUMN_NAMES[1]])) name_to_group[name] = row[CompanyMapper.COLUMN_NAMES[2]] return CompanyMapper(name_to_group)
def _standartize_columns_names(dataframe: pd.DataFrame): dataframe.columns = pd.Index(data=[ Utils.replace_redundant_ws(column_name).lower() for column_name in dataframe.columns ])
def __init__(self, key_name: str): self.__original_key_name = key_name self.__key_name = Utils.replace_redundant_ws(key_name).lower()
def test_contract(self): try: Utils.replace_redundant_ws(1) assert 1 == 0, "contract doesn't work" except ContractNotRespected: pass
def test_newline_replace(self): arg = "\n\n abc\r\n\r\n\tabc" expected = "abc abc" actual = Utils.replace_redundant_ws(arg) assert actual == expected
def test_ws_remove_2(self): arg = "\tabc \t abc\t\tabc" expected = "abc abc abc" actual = Utils.replace_redundant_ws(arg) assert expected == actual