def test_schema_compare_distinct(self): """Test comparison between Schema properties using its compare method""" expected_status = 'KO' # Check comaprison against the same object slightly modified es_mapping = ESMapping.from_json(index_name='git', mapping_json=self.__mapping_json) es_mapping_mod = ESMapping.from_json(index_name='git', mapping_json=self.__mapping_json) # # Add fake property to the schema used to COMPARE FROM # es_mapping_mod.get_properties()['fake_prop'] = 'text' # ESMapping vs ESMapping result = es_mapping_mod.compare_properties(es_mapping) self.assertEqual(result['status'], expected_status) self.assertEqual(result['missing'], ['fake_prop']) # Check comparison ESMapping vs IndexPattern index_pattern = IndexPattern.from_json(self.__index_pattern_json) result = es_mapping_mod.compare_properties(index_pattern) self.assertEqual(result['status'], expected_status) self.assertEqual(result['missing'], ['fake_prop']) # # Add fake property to the target schemas with different value # es_mapping.add_property('fake_prop', 'text') index_pattern.add_property('fake_prop', 'text') # ESMapping vs ESMapping result = es_mapping_mod.compare_properties(es_mapping) self.assertEqual(result['status'], expected_status) self.assertEqual(result['missing'], []) self.assertEqual(result['distinct'], ['fake_prop']) # Check comparison ESMapping vs IndexPattern result = es_mapping_mod.compare_properties(index_pattern) self.assertEqual(result['status'], expected_status) self.assertEqual(result['missing'], []) self.assertEqual(result['distinct'], ['fake_prop'])
def test_schema_compare_last_item(self): """Test comparison between Schema properties using its compare method with the last item different (but the same fields)""" expected_status = 'OK' mapping_json = None panel_json = None # Mapping for git enrich loaded from mordred with open(os.path.join(self.__data_dir, 'git-mapping-utc_commit.json')) as fjson: mapping_json = json.load(fjson) # JSON Panel for git with open(os.path.join(self.__data_dir, 'git-utc_commit.json')) as fjson: panel_json = json.load(fjson) # Check comaprison against the same object slightly modified es_mapping = ESMapping.from_json(index_name='git_test', mapping_json=mapping_json) panel = Panel.from_json(panel_json) result = panel.get_index_pattern('git').compare_properties(es_mapping) if result['status'] != expected_status: print(result) self.assertEqual(result['status'], expected_status)
def test_es_mapping_from_json(self): """Test ESMapping from_json class method""" es_mapping = ESMapping.from_json(index_name='git', mapping_json=self.__mapping_json) self.assertDictEqual(es_mapping.get_properties(), self.__ref_mapping_props)
def test_schema_compare_equal(self): """Test comparison between Schema properties using its compare method""" expected_status = 'OK' # Check comparison against the same object es_mapping = ESMapping.from_json(index_name='git', mapping_json=self.__mapping_json) # ESMapping vs ESMapping result = es_mapping.compare_properties(es_mapping) self.assertEqual(result['status'], expected_status) self.assertEqual(result['correct'], list(es_mapping.get_properties().keys())) # Check comparison ESMapping vs IndexPattern index_pattern = IndexPattern.from_json(self.__index_pattern_json) result = es_mapping.compare_properties(index_pattern) self.assertEqual(result['status'], expected_status) self.assertEqual(result['correct'], list(es_mapping.get_properties().keys())) # Check comparison IndexPattern vs ESMapping result = index_pattern.compare_properties(es_mapping) self.assertEqual(result['status'], expected_status) self.assertEqual(result['correct'], list(index_pattern.get_properties().keys())) # # Second schema could have more properties than # first one (used to COMPARE FROM it) # Add a new property to this second instance # es_mapping_mod = ESMapping.from_json(index_name='git', mapping_json=self.__mapping_json) es_mapping_mod.get_properties()['fake_prop'] = 0 # Mapping vs Mapping result = es_mapping.compare_properties(es_mapping_mod) self.assertEqual(result['status'], expected_status) # Index pattern vs Mapping result = index_pattern.compare_properties(es_mapping_mod) self.assertEqual(result['status'], expected_status)
def test_es_mapping_from_csv(self): """Test ESMapping from_csv class method""" es_mapping = ESMapping.from_csv(index_name='git', csv_file=os.path.join(self.__data_dir, 'git.csv')) self.assertDictEqual(es_mapping.get_properties(), self.__ref_mapping_props)
def test_es_mapping_from_csv(self): """Test ESMapping from_csv class method""" es_mapping = ESMapping.from_csv(index_name='git', csv_file=os.path.join( self.__data_dir, 'git.csv')) self.assertDictEqual(es_mapping.get_properties(), self.__ref_mapping_props)
def cmp_mapping_csv(es_host, csv_path, reverse=False): """Compares an ES Mapping to a given CSV schema definition. Returns a dictionary where each 1st level key is an index pattern name. Each of these keys contains a tuple with: {status, correct, missing, distinct, message} being: status 'OK' if all properties in source schema exist in target schema with same values. 'KO' in other case. correct: list of properties that matches. missing: list of properties missing from target schema. distinct: list of properties in both schemas but having with different values. message: a string with additional information. Keyword arguments: es_host -- Elastic Search host to retrieve mappings csv_path -- CSV schema definition file path reverse -- use CSV as source schema. """ # Use file name as index name if '/' in csv_path: schema_name = csv_path[csv_path.rindex('/') + 1:csv_path.rindex('.csv')] else: schema_name = csv_path[:csv_path.rindex('.csv')] csv_mapping = ESMapping.from_csv(index_name=schema_name, csv_file=csv_path) client = Elasticsearch(es_host, timeout=30) mapping_json = client.indices.get_mapping(index=schema_name) es_mapping = ESMapping.from_json(index_name=schema_name, mapping_json=mapping_json) result = {} if reverse: result[schema_name] = csv_mapping.compare_properties(es_mapping) else: result[schema_name] = es_mapping.compare_properties(csv_mapping) return result
def cmp_panel_csv(panel_path, csv_path_list, reverse=False): """Compares index patterns from a given panel to the corresponding mappings from a given CSV schema definition. Returns a dictionary where each 1st level key is an index pattern name. Each of these keys contains a tuple with: {status, correct, missing, distinct, message} being: status 'OK' if all properties in source schema exist in target schema with same values. 'KO' in other case. correct: list of properties that matches. missing: list of properties missing from target schema. distinct: list of properties in both schemas but having with different values. message: a string with additional information. Keyword arguments: panel_path -- JSON panel file path csv_path_list -- CSV schema definition file paths reverse -- use CSV as source schema. """ with open(panel_path) as f: panel_json = json.load(f) panel = Panel.from_json(panel_json) mappings = {} for csv_path in csv_path_list: # Use file name as index name if '/' in csv_path: schema_name = csv_path[csv_path.rindex('/') + 1:csv_path.rindex('.csv')] else: schema_name = csv_path[:csv_path.rindex('.csv')] mappings[schema_name] = ESMapping.from_csv(index_name=schema_name, csv_file=csv_path) result = {} for index_pattern in panel.get_index_patterns().values(): es_mapping = mappings[index_pattern.schema_name] if reverse: result[index_pattern.schema_name] = es_mapping.compare_properties( index_pattern) else: result[index_pattern. schema_name] = index_pattern.compare_properties(es_mapping) return result
def cmp_panel_csv(panel_path, csv_path_list, reverse=False): """Compares index patterns from a given panel to the corresponding mappings from a given CSV schema definition. Returns a dictionary where each 1st level key is an index pattern name. Each of these keys contains a tuple with: {status, correct, missing, distinct, message} being: status 'OK' if all properties in source schema exist in target schema with same values. 'KO' in other case. correct: list of properties that matches. missing: list of properties missing from target schema. distinct: list of properties in both schemas but having with different values. message: a string with additional information. Keyword arguments: panel_path -- JSON panel file path csv_path_list -- CSV schema definition file paths reverse -- use CSV as source schema. """ with open(panel_path) as f: panel_json = json.load(f) panel = Panel.from_json(panel_json) mappings = {} for csv_path in csv_path_list: # Use file name as index name if '/' in csv_path: schema_name = csv_path[csv_path.rindex('/') + 1:csv_path.rindex('.csv')] else: schema_name = csv_path[:csv_path.rindex('.csv')] mappings[schema_name] = ESMapping.from_csv(index_name=schema_name, csv_file=csv_path) result = {} for index_pattern in panel.get_index_patterns().values(): es_mapping = mappings[index_pattern.schema_name] if reverse: result[index_pattern.schema_name] = es_mapping.compare_properties(index_pattern) else: result[index_pattern.schema_name] = index_pattern.compare_properties(es_mapping) return result
def cmp_panel_mapping(panel_path, es_host, reverse=False): """Compares index patterns from a given panel to the corresponding mappings from a given ES host. Returns a dictionary where each 1st level key is an index pattern name. Each of these keys contains a tuple with: {status, correct, missing, distinct, message} being: status 'OK' if all properties in source schema exist in target schema with same values. 'KO' in other case. correct: list of properties that matches. missing: list of properties missing from target schema. distinct: list of properties in both schemas but having with different values. message: a string with additional information. Keyword arguments: panel_path -- JSON panel file path es_host -- Elastic Search host to retrieve mappings reverse -- use mapping as source schema. """ client = Elasticsearch(es_host, timeout=30) with open(panel_path) as f: panel_json = json.load(f) panel = Panel.from_json(panel_json) result = {} for index_pattern in panel.get_index_patterns().values(): mapping_json = client.indices.get_mapping( index=index_pattern.schema_name) es_mapping = ESMapping.from_json(index_name=index_pattern.schema_name, mapping_json=mapping_json) if reverse: result[index_pattern.schema_name] = es_mapping.compare_properties( index_pattern) else: result[index_pattern. schema_name] = index_pattern.compare_properties(es_mapping) return result
def cmp_panel_mapping(panel_path, es_host, reverse=False): """Compares index patterns from a given panel to the corresponding mappings from a given ES host. Returns a dictionary where each 1st level key is an index pattern name. Each of these keys contains a tuple with: {status, correct, missing, distinct, message} being: status 'OK' if all properties in source schema exist in target schema with same values. 'KO' in other case. correct: list of properties that matches. missing: list of properties missing from target schema. distinct: list of properties in both schemas but having with different values. message: a string with additional information. Keyword arguments: panel_path -- JSON panel file path es_host -- Elastic Search host to retrieve mappings reverse -- use mapping as source schema. """ client = Elasticsearch(es_host, timeout=30) with open(panel_path) as f: panel_json = json.load(f) panel = Panel.from_json(panel_json) result = {} for index_pattern in panel.get_index_patterns().values(): mapping_json = client.indices.get_mapping(index=index_pattern.schema_name) es_mapping = ESMapping.from_json(index_name=index_pattern.schema_name, mapping_json=mapping_json) if reverse: result[index_pattern.schema_name] = es_mapping.compare_properties(index_pattern) else: result[index_pattern.schema_name] = index_pattern.compare_properties(es_mapping) return result
def test_es_mapping_type_long(self): """Test type conversion from long type to number""" self.assertEqual(ESMapping.get_schema_type('long'), 'number')
def test_es_mapping_type_integer(self): """Test type conversion from integer type to number""" self.assertEqual(ESMapping.get_schema_type('integer'), 'number')
def test_es_mapping_type_double(self): """Test type conversion from double type to number""" self.assertEqual(ESMapping.get_schema_type('double'), 'number')
def test_es_mapping_type_float(self): """Test type conversion from float type to number""" self.assertEqual(ESMapping.get_schema_type('float'), 'number')
def test_es_mapping_type_boolean(self): """Test type conversion for boolean type: remains unchanged""" self.assertEqual(ESMapping.get_schema_type('boolean'), 'boolean')
def test_es_mapping_type_date(self): """Test type conversion for date type: remains unchanged""" self.assertEqual(ESMapping.get_schema_type('date'), 'date')
def test_es_mapping_type_text(self): """Test type conversion for text type: remains unchanged""" self.assertEqual(ESMapping.get_schema_type('text'), 'text')
def test_es_mapping_type_geo_point(self): """Test type conversion for geo_point type: remains unchanged""" self.assertEqual(ESMapping.get_schema_type('geo_point'), 'geo_point')
def test_es_mapping_type_keyword(self): """Test type conversion for keyword type: remains unchanged""" self.assertEqual(ESMapping.get_schema_type('keyword'), 'keyword')