def test_query_with_where_clauses_defined_by_user_and_generic(self): options = { 'url': 'bolt://fdw-neo4j', 'user': '******', 'password': '******', 'database': 'testdb', 'cypher': 'MATCH (p:Person)-[:ACTED_IN]->(m:Movie) /*WHERE{"actor":"p.name"}*/ WITH p.name AS name, m.title AS title RETURN name AS actor, title AS movie' } columns = ['actor', 'movie'] nfdw = neo4jfdw.Neo4jForeignDataWrapper(options, columns) quals = [ Qual('movie', '=', 'The Matrix'), Qual('actor', '=', 'Keanu Reeves') ] query = nfdw.make_cypher(quals, columns, None) self.assertEqual( 'MATCH (p:Person)-[:ACTED_IN]->(m:Movie) WHERE p.name=$`actor` WITH p.name AS name, m.title AS title WITH name AS actor, title AS movie WHERE movie=$`movie` RETURN actor, movie', query)
def test_like_operator(self): """ tests ~~ operator. """ options = { 'filename': os.path.join(TEST_FILES_DIR, 'rowid_int_col1_str_header_and_100_rows_gzipped.msg') } columns = ['rowid', 'col1'] wrapper = PartitionMsgpackForeignDataWrapper(options, columns) quals = [Qual('col1', '~~', '1')] ret = list(wrapper.execute(quals, columns)) self.assertEqual(len(ret), 1) self.assertEqual(ret[0], [1, '1']) quals = [Qual('col1', '~~', '%1')] ret = list(wrapper.execute(quals, columns)) self.assertEqual(len(ret), 10) self.assertEqual(ret[0], [1, '1']) self.assertEqual(ret[1], [11, '11']) self.assertEqual(ret[-1], [91, '91']) quals = [Qual('col1', '~~', '1%')] ret = list(wrapper.execute(quals, columns)) self.assertEqual(len(ret), 11) self.assertEqual(ret[0], [1, '1']) self.assertEqual(ret[1], [10, '10']) self.assertEqual(ret[-1], [19, '19']) quals = [Qual('col1', '~~', '_')] ret = list(wrapper.execute(quals, columns)) self.assertEqual(len(ret), 10) self.assertEqual(ret[0], [0, '0']) self.assertEqual(ret[1], [1, '1']) self.assertEqual(ret[-1], [9, '9'])
def generate_condition(self, qual, cypher_variable=None): """ Build a neo4j condition from a qual """ condition = '' if cypher_variable is not None: query_param_name = cypher_variable else: query_param_name = qual.field_name # quals is a list with ANY if qual.list_any_or_all == ANY: values = [ '( %s )' % self.generate_condition(Qual(qual.field_name, qual.operator[0], value), '`' + unicode(query_param_name) + '`' + '[' + unicode(array_index) + ']') for array_index, value in enumerate(qual.value) ] condition = ' ( ' + ' OR '.join(values) + ' ) ' # quals is a list with ALL elif qual.list_any_or_all == ALL: conditions = [ self.generate_condition(Qual(qual.field_name, qual.operator[0], value), '`' + unicode(query_param_name) + '`' + '[' + unicode(array_index) + ']') for array_index, value in enumerate(qual.value) ] # quals is just a string else: if qual.operator in ('~~', '!~~', '~~*', '!~~*'): # Convert to cypher regex regex = qual.value.replace('%', '.*') # For the negation, we prefix with NOT if qual.operator in ('!~~', '!~~*'): condition += ' NOT ' # Adding the variable name condition += qual.field_name + " =~ '" # If it's a ILIKE, we prefix regex with (?i) if qual.operator in ('~~*', '!~~*'): condition += '(?i)' # We add the regex condition += regex + "' " else: if query_param_name.startswith('`'): condition = qual.field_name + qual.operator + "$" + unicode(query_param_name) else: condition = qual.field_name + qual.operator + "$`" + unicode(query_param_name) + '`' log_to_postgres('Condition is : ' + unicode(condition), DEBUG) return condition
def setUp(self): self._columns = { 'f__o_o': ColumnDefinition('f__o_o', type_name='text'), 'bar': ColumnDefinition('bar', type_name='int'), 'baz': ColumnDefinition('baz', type_name='text[]'), 'quux': ColumnDefinition('quux', type_name='int') } self._fdw = ESForeignDataWrapper( { 'doc_type': 'foo_doc', 'index': 'our_index' }, self._columns) self._quals = [Qual('f__o_o', '=', 'value'), Qual('bar', '>', 5)]
def compute_columns_stat(self): result = list() session = self.driver.session() try: for column_name in self.columns: quals = [Qual(column_name, '=', 'WHATEVER')] query = 'EXPLAIN ' + self.make_cypher(quals, self.columns, None) rs = session.run(query, {}) explain_summary = rs.summary().plan[2] stats = explain_summary['EstimatedRows'] log_to_postgres( 'Explain query for column ' + unicode(column_name) + ' is : ' + unicode(query), DEBUG) log_to_postgres( 'Stat for column ' + unicode(column_name) + ' is : ' + unicode(explain_summary['EstimatedRows']), DEBUG) result.append(((column_name, ), int(stats))) except CypherError: raise RuntimeError("Bad cypher query : " + query) finally: session.close() log_to_postgres('Columns stats are :' + unicode(result), DEBUG) return result
def test_geom_predicate_missing(self): """ fdw.WCS.execute missing geom predicate """ options = {'url' : '', 'layer' : '', 'crs' : 'EPSG:4326', 'band' : '2'} columns = ['raster', 'geom'] qual = Qual('xyz', '=', 'xyz') fdw = WCS(options, columns) self.assertRaises(MissingQueryPredicateError, fdw._get_predicates, [qual])
def test_geom_predicate_B(self): """ fdw.WCS._get_predicates A = geom """ options = {'url' : '', 'layer' : '', 'crs' : 'EPSG:4326', 'band' : '2'} columns = ['raster', 'geom'] qual = Qual('xyz', '=', 'geom') fdw = WCS(options, columns) value = fdw._get_predicates([qual]) self.assertEquals(value, 'xyz')
def test_no_connect(self): """ fdw.WCS._get_raster bad connection """ options = {'url' : 'http://non-existant-url.net', 'layer' : '', 'crs' : 'EPSG:4326', 'band' : '2'} columns = ['raster', 'geom'] qual = Qual('010100000000000000000000000000000000000000', '=', 'geom') fdw = WCS(options, columns) result = fdw.execute([qual], columns) self.assertListEqual(result, [])
def test_make_match_lists(self): quals = [ Qual('foo', '=', 'bar'), Qual('quux', '<>', 'baz'), Qual('num', '<@', '[1, 10]'), Qual('a', ('=', True), ['x', 'y', 'z']), Qual('b', ('~~', True), ['d', 'e', 'f']), Qual('c', ('!~~', True), ['a%b', 'c_d', '_e%f%']), Qual('d', ('~~', False), ['g%', 'h_%', 'i%']), Qual('e', ('<>', False), [1, 2]) ] ml, mnl = self._fdw._make_match_lists(quals) self.assertEqual(len(ml), 7) self.assertIn({'terms': {'a': ['x', 'y', 'z']}}, ml) self.assertIn( { 'or': [{ 'regexp': { 'b': 'd' } }, { 'regexp': { 'b': 'e' } }, { 'regexp': { 'b': 'f' } }] }, ml) self.assertIn({'prefix': {'d': 'g'}}, ml) self.assertIn({'regexp': {'d': 'h..*'}}, ml) self.assertIn({'prefix': {'d': 'i'}}, ml) self.assertEqual(len(mnl), 3) self.assertIn( { 'and': [{ 'regexp': { 'c': 'a.*b' } }, { 'regexp': { 'c': 'c.d' } }, { 'regexp': { 'c': '.e.*f.*' } }] }, mnl) self.assertIn({'terms': {'e': [1, 2]}}, mnl)
def test_equals_operator(self): """ tests = operator. """ options = { 'filename': os.path.join(TEST_FILES_DIR, 'rowid_int_col1_str_header_and_100_rows_gzipped.msg') } columns = ['rowid', 'col1'] wrapper = PartitionMsgpackForeignDataWrapper(options, columns) quals = [Qual('col1', '=', '3')] ret = list(wrapper.execute(quals, columns)) self.assertEqual(len(ret), 1) self.assertEqual(ret[0], [3, '3'])
def test_greater_than_or_equal_operator(self): """ tests >= operator. """ options = { 'filename': os.path.join(TEST_FILES_DIR, 'rowid_int_col1_int_header_and_100_rows_gzipped.msg') } columns = ['rowid', 'col1'] wrapper = PartitionMsgpackForeignDataWrapper(options, columns) quals = [Qual('col1', '>=', 98)] ret = list(wrapper.execute(quals, columns)) self.assertEqual(len(ret), 2) self.assertEqual(ret[0], [98, 98]) self.assertEqual(ret[1], [99, 99])
def test_greater_than_operator(self): options = { 'filename': os.path.join(TEST_FILES_DIR, 'rowid_int_col1_int_header_and_100_rows_gzipped.msg') } columns = ['rowid', 'col1'] wrapper = PartitionMsgpackForeignDataWrapper(options, columns) quals = [Qual('col1', '>', 10)] ret = list(wrapper.execute(quals, columns)) self.assertEqual(len(ret), 89) self.assertEqual(ret[0], [11, 11]) self.assertEqual(ret[1], [12, 12]) self.assertEqual(ret[2], [13, 13])
def test_logs_unknown_qual(self, fake_log): fake_log\ .expects_call()\ .with_args(arg.contains('Unknown operator'), WARNING, hint=arg.contains('Implement')) options = { 'filename': os.path.join(TEST_FILES_DIR, 'rowid_int_col1_str_header_and_100_rows_gzipped.msg') } columns = ['rowid', 'col1'] wrapper = PartitionMsgpackForeignDataWrapper(options, columns) quals = [Qual('col1', '?', '3')] list(wrapper.execute(quals, columns))
def test_less_than_operator(self): """ tests < operator. """ options = { 'filename': os.path.join(TEST_FILES_DIR, 'rowid_int_col1_int_header_and_100_rows_gzipped.msg') } columns = ['rowid', 'col1'] wrapper = PartitionMsgpackForeignDataWrapper(options, columns) quals = [Qual('col1', '<', 3)] ret = list(wrapper.execute(quals, columns)) self.assertEqual(len(ret), 3) self.assertEqual(ret[0], [0, 0]) self.assertEqual(ret[1], [1, 1]) self.assertEqual(ret[2], [2, 2])
def test_movie_by_title(self): options = { 'url': 'bolt://fdw-neo4j', 'user': '******', 'password': '******', 'cypher': 'MATCH (n:Movie) RETURN n.title as movie' } columns = ['movie'] nfdw = neo4jfdw.Neo4jForeignDataWrapper(options, columns) quals = [Qual('movie', '=', 'The Matrix')] query = nfdw.make_cypher(quals, columns, None) self.assertEqual( 'MATCH (n:Movie) WITH n.title as movie WHERE movie=$`movie` RETURN movie', query)
def test_execute_column_name_translation(self, scan_mock, _elasticsearch_mock): columns = { 'object__nested_field': ColumnDefinition('object__nested_field', type_name='text') } fdw = ESForeignDataWrapper( { 'doc_type': 'foo_doc', 'index': 'our_index', 'column_name_translation': 'true' }, columns) quals = [Qual('object__nested_field', '=', 'value')] scan_mock.return_value = [{ 'fields': { 'object.nested-field': ['value'] } }] rows = list(fdw.execute(quals, ['object__nested_field'])) expected_query = { 'fields': ['object.nested-field'], 'query': { 'filtered': { 'filter': { 'bool': { 'must': [{ 'term': { 'object.nested-field': 'value' } }] } } } } } scan_mock.assert_called_once_with(fdw.esclient, query=expected_query, index='our_index', doc_type='foo_doc', size=fdw._SCROLL_SIZE, scroll=fdw._SCROLL_LENGTH) expected_rows = [{'object__nested_field': 'value'}] self.assertEqual(rows, expected_rows)
def test_create_cond_multi_col(self): quals = [Qual('name', '=', 'alice'), Qual('age', '<', 42)] self.assertEqual(self.td_fdw.create_cond(quals), "(name = 'alice') AND (age < 42)") quals = [Qual('name', '=', None), Qual('age', ('=', True), [42, 99])] self.assertEqual(self.td_fdw.create_cond(quals), "(name IS NULL) AND (age = 42 OR age = 99)") quals = [ Qual('name', ('~~', False), ['%alice%', '%bob%']), Qual('age', '<>', None) ] self.assertEqual( self.td_fdw.create_cond(quals), "(name LIKE '%alice%' AND name LIKE '%bob%') AND (age IS NOT NULL)" ) quals = [ Qual('name', ('!~~', False), ['%alice%', '%bob%']), Qual('age', '<>', None) ] self.assertEqual( self.td_fdw.create_cond(quals), "(name NOT LIKE '%alice%' AND name NOT LIKE '%bob%') AND (age IS NOT NULL)" )
def compute_columns_stat(self): result = list() # Execute & retrieve neo4j data if self.driver.supports_multi_db(): session = self.driver.session(database=self.database) else: session = self.driver.session() try: for column_name in self.columns: quals = [Qual(column_name, '=', 'WHATEVER')] query = 'EXPLAIN ' + self.make_cypher(quals, self.columns, None) rs = session.run(query, {}) explain_summary = rs.consume().plan['args'] stats = explain_summary['EstimatedRows'] log_to_postgres( 'Explain query for column ' + str(column_name) + ' is : ' + str(query), DEBUG) log_to_postgres( 'Stat for column ' + str(column_name) + ' is : ' + str(explain_summary['EstimatedRows']), DEBUG) result.append(((column_name, ), int(stats))) except CypherSyntaxError: raise RuntimeError("Bad cypher query : " + query) except CypherTypeError: raise RuntimeError("Bad cypher type in query : " + query) finally: session.close() log_to_postgres('Columns stats are :' + str(result), DEBUG) return result
def test_create_cond_string(self): quals = [Qual('name', '=', 'alice')] self.assertEqual(self.td_fdw.create_cond(quals), "(name = 'alice')") quals = [Qual('name', '!=', 'alice')] self.assertEqual(self.td_fdw.create_cond(quals), "(name != 'alice')") quals = [Qual('name', '<>', 'alice')] self.assertEqual(self.td_fdw.create_cond(quals), "(name <> 'alice')") quals = [Qual('name', '>', 'alice')] self.assertEqual(self.td_fdw.create_cond(quals), "(name > 'alice')") quals = [Qual('name', '>=', 'alice')] self.assertEqual(self.td_fdw.create_cond(quals), "(name >= 'alice')") quals = [Qual('name', '<', 'alice')] self.assertEqual(self.td_fdw.create_cond(quals), "(name < 'alice')") quals = [Qual('name', '<=', 'alice')] self.assertEqual(self.td_fdw.create_cond(quals), "(name <= 'alice')") quals = [Qual('name', '~~', '%alice%')] self.assertEqual(self.td_fdw.create_cond(quals), "(name LIKE '%alice%')") quals = [Qual('name', '!~~', '%alice%')] self.assertEqual(self.td_fdw.create_cond(quals), "(name NOT LIKE '%alice%')") quals = [Qual('name', ('=', True), ['alice', 'bob'])] self.assertEqual(self.td_fdw.create_cond(quals), "(name = 'alice' OR name = 'bob')") quals = [Qual('name', ('<', False), ['alice', 'bob'])] self.assertEqual(self.td_fdw.create_cond(quals), "(name < 'alice' AND name < 'bob')") quals = [Qual('name', '=', None)] self.assertEqual(self.td_fdw.create_cond(quals), "(name IS NULL)") quals = [Qual('name', '<>', None)] self.assertEqual(self.td_fdw.create_cond(quals), "(name IS NOT NULL)") quals = [Qual('name', '=', "al'ice")] self.assertEqual(self.td_fdw.create_cond(quals), "(name = 'al''ice')")
def make_cypher(self, quals, columns, sortkeys): """ Override cypher query to add search criteria """ query = self.cypher log_to_postgres('Init cypher query is : ' + unicode(query), DEBUG) log_to_postgres('Quals are : ' + unicode(quals), DEBUG) needUpdateProjection = True if (quals is not None and len(quals) > 0): # Step 1 : we check if there is some `where` annotation in the query where_match = re.findall('/\*WHERE([^}]*})\*/', self.cypher) if (where_match is not None): for group in where_match: log_to_postgres( 'Find a custom WHERE clause : ' + unicode(group), DEBUG) group_where_condition = [] # parse the JSON and check if field are in the where clause # if so we replace it and remove the clause from the where where_config = json.loads(group) for field in where_config: fieldQuals = filter( lambda qual: qual.field_name == field, quals) if (len(fieldQuals) > 0): for qual in fieldQuals: log_to_postgres( 'Find a field for this custom WHERE clause : ' + unicode(qual), DEBUG) # Generate the condition customQual = Qual( where_config[qual.field_name], qual.operator, qual.value) group_where_condition.append( self.generate_condition( customQual, qual.field_name)) # Remove the qual from the initial qual list quals = filter( lambda qual: not qual.field_name == field, quals) # replace the captured group by the condition if (len(group_where_condition) > 0): query = query.replace( '/*WHERE' + group + '*/', ' WHERE ' + ' AND '.join(group_where_condition)) else: query = query.replace('/*WHERE' + group + '*/', '') log_to_postgres( 'Current cypher query is : ' + unicode(query), DEBUG) # Step 2 : if there is still some where clause, we replace the return by a with/where/return if (len(quals) > 0): log_to_postgres('Generic where clause', DEBUG) where_clauses = self.generate_where_conditions(quals) pattern = re.compile('(.*)RETURN(.*)', re.IGNORECASE | re.MULTILINE | re.DOTALL) match = pattern.match(query) query = match.group(1) + "WITH" + match.group( 2) + " WHERE " + ' AND '.join( where_clauses) + " RETURN " + ', '.join(columns) log_to_postgres( 'Current cypher query after generic where is : ' + unicode(query), DEBUG) needUpdateProjection = False # Step 3 : We construct the projection for the return # we only modify the projection if it's needed (ie. we don't return all the tables columns) if (needUpdateProjection and len(columns) < len(self.columns)): return_pattern = re.compile( '(.*)RETURN(.*)', re.IGNORECASE | re.MULTILINE | re.DOTALL) return_match = return_pattern.match(query) query = return_match.group(1) + "WITH" + return_match.group( 2) + " RETURN " + ', '.join(columns) # Step 4 : We add the order clause at the end of the query if sortkeys is not None: orders = [] for sortkey in sortkeys: if sortkey.is_reversed: orders.append(sortkey.attname + ' DESC') else: orders.append(sortkey.attname) query = query + ' ORDER BY ' + ', '.join(orders) log_to_postgres( 'Current cypher query after sort is : ' + unicode(query), DEBUG) return query
def test_create_cond_int(self): quals = [Qual('age', '=', 42)] self.assertEqual(self.td_fdw.create_cond(quals), "(age = 42)") quals = [Qual('age', '!=', 42)] self.assertEqual(self.td_fdw.create_cond(quals), "(age != 42)") quals = [Qual('age', '<>', 42)] self.assertEqual(self.td_fdw.create_cond(quals), "(age <> 42)") quals = [Qual('age', '>', 42)] self.assertEqual(self.td_fdw.create_cond(quals), "(age > 42)") quals = [Qual('age', '>=', 42)] self.assertEqual(self.td_fdw.create_cond(quals), "(age >= 42)") quals = [Qual('age', '<', 42)] self.assertEqual(self.td_fdw.create_cond(quals), "(age < 42)") quals = [Qual('age', '<=', 42)] self.assertEqual(self.td_fdw.create_cond(quals), "(age <= 42)") quals = [Qual('age', ('=', True), [42, 99])] self.assertEqual(self.td_fdw.create_cond(quals), "(age = 42 OR age = 99)") quals = [Qual('age', ('<', False), [42, 99])] self.assertEqual(self.td_fdw.create_cond(quals), "(age < 42 AND age < 99)") quals = [Qual('age', '=', None)] self.assertEqual(self.td_fdw.create_cond(quals), "(age IS NULL)") quals = [Qual('age', '<>', None)] self.assertEqual(self.td_fdw.create_cond(quals), "(age IS NOT NULL)")