def __init__(self, errors_dict=None): DoubleStringPatternSplitCollection.__init__(self) self._match = {} self._match_jp = {} if errors_dict is None: self._errors_dict = None else: errors_dict['denormals'] = [] self._errors_dict = errors_dict['denormals']
def test_double_strings_line_split(self): doubles = DoubleStringPatternSplitCollection () splits = doubles.split_line('"Key1","Val1"') self.assertIsNotNone(splits) self.assertEqual(2, len(splits)) self.assertEqual("Key1", splits[0]) self.assertEqual("Val1", splits[1]) splits = doubles.split_line('') self.assertIsNone(splits)
def test_double_strings_line_split(self): doubles = DoubleStringPatternSplitCollection() splits = doubles.split_line('"Key1","Val1"') self.assertIsNotNone(splits) self.assertEqual(2, len(splits)) self.assertEqual("Key1", splits[0]) self.assertEqual("Val1", splits[1]) splits = doubles.split_line('') self.assertIsNone(splits)
def test_double_strings_from_file(self): doubles = DoubleStringPatternSplitCollection () self.assertIsNotNone(doubles) count = doubles.load_from_filename(os.path.dirname(__file__) + os.sep + "test_files" + os.sep + "doubles_pattern.txt") self.assertEqual(count, 3) self.assertTrue(doubles.has_key("key1")) #self.assertEqual(doubles.value("key1"), "(^key1|key1|key1$)") self.assertTrue(doubles.has_key("key2")) #self.assertEqual(doubles.value("key2"), "(^key2|key2|key2$)") self.assertTrue(doubles.has_key("key3"))
def _load_file_contents(self, lookup_collection, filename): YLogger.debug(self, "Loading lookup [%s]", filename) try: line_no = 0 with open(filename, 'r', encoding='utf8') as my_file: for line in my_file: line_no += 1 line = line.strip() if line: if line[0] == '#': continue splits = DoubleStringPatternSplitCollection.split_line_by_pattern( line, DoubleStringPatternSplitCollection. RE_OF_SPLIT_PATTERN) if splits and len(splits) > 1: if type(lookup_collection ) is DoubleStringPatternSplitCollection: index, pattern = self.process_key_value( splits[0], splits[1]) lookup_collection.add_to_lookup(index, pattern) else: lookup_collection.add_to_lookup( splits[0], splits[1], filename, line_no) else: if type( lookup_collection ) is not DoubleStringPatternSplitCollection: error_info = "illegal format [%s]" % line lookup_collection.set_error_info( filename, line_no, error_info) except Exception as excep: YLogger.exception(self, "Failed to load lookup [%s]", excep, filename)
def load(self, lookup_collection): YLogger.debug(self, "Loading lookup from Mongo [%s]", self.collection_name()) collection = self.collection() lookups = collection.find() if lookups is not None: for lookup in lookups: key, value = DoubleStringPatternSplitCollection.process_key_value( lookup['key'], lookup['value']) lookup_collection.add_to_lookup(key, value)
def test_double_strings_from_file(self): doubles = DoubleStringPatternSplitCollection() self.assertIsNotNone(doubles) count = doubles.load_from_filename( os.path.dirname(__file__) + "/test_files/doubles_pattern.txt") self.assertEqual(count, 3) self.assertTrue(doubles.has_key("key1")) self.assertEqual(doubles.value("key1"), "(^key1|key1|key1$)") self.assertTrue(doubles.has_key("key2")) self.assertEqual(doubles.value("key2"), "(^key2|key2|key2$)") self.assertTrue(doubles.has_key("key3")) self.assertEqual(doubles.value("key3"), "(^key3|key3|key3$)")
def load(self, collector, name=None): YLogger.debug(self, "Loading lookup from Mongo [%s]", self.collection_name()) collection = self.collection() lookups = collection.find() for lookup in lookups: key, value = DoubleStringPatternSplitCollection.process_key_value( lookup['key'], lookup['value']) collector.add_to_lookup(key, value) return True
def _load_file_contents(self, lookup_collection, filename): YLogger.debug(self, "Loading lookup [%s]", filename) try: with open(filename, 'r', encoding='utf8') as my_file: for line in my_file: if line: splits = DoubleStringPatternSplitCollection.split_line_by_pattern(line, DoubleStringPatternSplitCollection.RE_OF_SPLIT_PATTERN) if splits and len(splits) > 1: if type(lookup_collection) is DoubleStringPatternSplitCollection: index, pattern = self.process_key_value(splits[0], splits[1]) lookup_collection.add_to_lookup(index, pattern) else: lookup_collection.add_to_lookup(splits[0], splits[1]) except Exception as excep: YLogger.exception(self, "Failed to load lookup [%s]", excep, filename)
def test_double_strings_from_text(self): doubles = DoubleStringPatternSplitCollection() self.assertIsNotNone(doubles) count = doubles.load_from_text(""" "key1","val1" "key2","val2" "key3","val3,val4" """) self.assertEqual(count, 3) self.assertTrue(doubles.has_key("key1")) self.assertTrue(doubles.has_key("key2")) self.assertTrue(doubles.has_key("key3")) self.assertFalse(doubles.has_key("key4")) doubles.empty() self.assertEqual(0, len(doubles.pairs))
def add_to_lookup(self, org_key, org_value): key = org_key value = org_value.strip() if JapaneseLanguage.is_CJKword(org_key) is True: key = key.strip() if key in self._pairs_jp: YLogger.error(self, "%s = %s already exists in jp_collection", key, value) return else: matchs = self._match_jp splits = key check_key = key[0] self._pairs_jp[key] = value else: if key[0] != ' ': key = key.strip() pattern_text = DoubleStringPatternSplitCollection.normalise_pattern( key) start = pattern_text.lstrip() middle = pattern_text end = pattern_text.rstrip() pattern = "(^%s|%s|%s$)" % (start, middle, end) replacement = value replaceInfo = [key, re.compile(pattern), replacement] self._replace.append(replaceInfo) return else: key = key.strip() if key in self._pairs: YLogger.error(self, "%s = %s already exists in en_collection", key, value) return else: matchs = self._match splits = key.split() check_key = splits[0] self._pairs[key] = value if check_key not in matchs: matchs[check_key] = [] matchs[check_key].append(splits)
def test_double_strings_from_text(self): doubles = DoubleStringPatternSplitCollection() self.assertIsNotNone(doubles) count = doubles.load_from_text(""" "key1","val1" "key2","val2" "key3","val3,val4" """) self.assertEqual(count, 3) self.assertTrue(doubles.has_key("key1")) #self.assertEqual(doubles.value("key1"), "(^key1|key1|key1$)") self.assertTrue(doubles.has_key("key2")) #self.assertEqual(doubles.value("key2"), "(^key2|key2|key2$)") self.assertTrue(doubles.has_key("key3")) #self.assertEqual(doubles.value("key3"), "(^key3|key3|key3$)") self.assertFalse(doubles.has_key("key4")) self.assertIsNone(doubles.value("keyX"))
def test_double_strings_from_text(self): doubles = DoubleStringPatternSplitCollection () self.assertIsNotNone(doubles) count = doubles.load_from_text(""" "key1","val1" "key2","val2" "key3","val3,val4" """) self.assertEqual(count, 3) self.assertTrue(doubles.has_key("key1")) #self.assertEqual(doubles.value("key1"), "(^key1|key1|key1$)") self.assertTrue(doubles.has_key("key2")) #self.assertEqual(doubles.value("key2"), "(^key2|key2|key2$)") self.assertTrue(doubles.has_key("key3")) #self.assertEqual(doubles.value("key3"), "(^key3|key3|key3$)") self.assertFalse(doubles.has_key("key4")) self.assertIsNone(doubles.value("keyX"))
def process_key_value(self, key, value, id=None): return DoubleStringPatternSplitCollection.process_key_value( key, value, id)
def __init__(self): DoubleStringPatternSplitCollection.__init__(self)
def __init__(self, process_splits_success=True): DoubleStringPatternSplitCollection.__init__(self) self._process_splits_success = process_splits_success
def add_to_lookup(self, org_key, org_value, filename=None, line=0): key = org_key value = org_value.strip() if key.strip() == '': error_info = "key is empty" self.set_error_info(filename, line, error_info) return if JapaneseLanguage.is_CJKword(org_key) is True: key = key.strip() if key in self._pairs_jp: YLogger.error(self, "%s = %s already exists in jp_collection", key, value) error_info = "duplicate key='%s' (value='%s' is invalid)" % ( key, value) self.set_error_info(filename, line, error_info) return else: matchs = self._match_jp splits = key check_key = key[0] self._pairs_jp[key] = value else: if key[0] != ' ': key = key.strip() if key in self._replace_key: YLogger.error( self, "%s = %s already exists in replace_collection", key, value) error_info = "duplicate replace_chars='%s' (value='%s' is invalid)" % ( key, value) self.set_error_info(filename, line, error_info) return pattern_text = DoubleStringPatternSplitCollection.normalise_pattern( key) start = pattern_text.lstrip() middle = pattern_text end = pattern_text.rstrip() pattern = "(^%s|%s|%s$)" % (start, middle, end) replacement = value replaceInfo = [key, re.compile(pattern), replacement] self._replace.append(replaceInfo) self._replace_key.append(key) return else: key = key.strip() if key in self._pairs: YLogger.error(self, "%s = %s already exists in en_collection", key, value) error_info = "duplicate key='%s' (value='%s' is invalid)" % ( key, value) self.set_error_info(filename, line, error_info) return else: matchs = self._match splits = key.split() check_key = splits[0] self._pairs[key] = value if check_key not in matchs: matchs[check_key] = [] matchs[check_key].append(splits)
def load(self, collector, name=None): db_lookups = self._get_all() for db_lookup in db_lookups: key, value = DoubleStringPatternSplitCollection.process_key_value( db_lookup.key, db_lookup.value) collector.add_to_lookup(key, value)
def split_into_fields(self, line): return DoubleStringPatternSplitCollection.split_line_by_pattern( line, DoubleStringPatternSplitCollection.RE_OF_SPLIT_PATTERN)
def __init__(self): DoubleStringPatternSplitCollection.__init__(self) self._match = {} self._match_jp = {}