Beispiel #1
0
 def random_edges(self, limit=20):
     if self.connection is None:
         self.connection = get_db_connection(self.dbname)
     cursor = self.connection.cursor()
     cursor.execute(RANDOM_QUERY, {'limit': limit})
     results = [transform_for_linked_data(data) for uri, data in cursor.fetchall()]
     return results
Beispiel #2
0
    def random_edges(self, limit=20):
        """
        Get a collection of distinct, randomly-selected edges.
        """
        if self.connection is None:
            self.connection = get_db_connection(self.dbname)

        if self.dbname == 'conceptnet-test':
            # Random queries sample 10% of edges. This makes sure we get matches in
            # the test database, where there isn't much data.
            random_query = """
                SELECT uri, data, weight FROM edges
                TABLESAMPLE SYSTEM(10)
                ORDER BY random() LIMIT %(limit)s
            """
        else:
            # In the real database, random queries sample 0.01% of edges.
            random_query = """
                SELECT uri, data, weight FROM edges
                TABLESAMPLE SYSTEM(0.01)
                ORDER BY random() LIMIT %(limit)s
            """

        cursor = self.connection.cursor()
        cursor.execute(random_query, {'limit': limit})
        results = [
            transform_for_linked_data(data)
            for uri, data, weight in cursor.fetchall()
        ]
        return results
Beispiel #3
0
 def lookup_assertion(self, uri):
     if self.connection is None:
         self.connection = get_db_connection(self.dbname)
     cursor = self.connection.cursor()
     cursor.execute("SELECT data FROM edges WHERE uri=%(uri)s", {'uri': uri})
     results = [transform_for_linked_data(data) for (data,) in cursor.fetchall()]
     return results
Beispiel #4
0
    def lookup_grouped_by_feature(self, uri, limit=20):
        if self.connection is None:
            self.connection = get_db_connection(self.dbname)

        def extract_feature(row):
            return tuple(row[:2])

        def feature_data(row):
            direction, _, data = row

            # Hacky way to figure out what the 'other' node is, the one that
            # (in most cases) didn't match the URI. If both start with our
            # given URI, take the longer one, which is either a more specific
            # sense or a different, longer word.
            shorter, longer = sorted([data['start'], data['end']], key=len)
            if shorter.startswith(uri):
                data['other'] = longer
            else:
                data['other'] = shorter
            return data

        cursor = self.connection.cursor()
        cursor.execute(NODE_TO_FEATURE_QUERY, {'node': uri, 'limit': limit})
        results = {}
        for feature, rows in itertools.groupby(cursor.fetchall(),
                                               extract_feature):
            results[feature] = [
                transform_for_linked_data(feature_data(row)) for row in rows
            ]
        return results
Beispiel #5
0
 def lookup_assertion(self, uri):
     if self.connection is None:
         self.connection = get_db_connection(self.dbname)
     cursor = self.connection.cursor()
     cursor.execute("SELECT data FROM edges WHERE uri=:uri", {'uri': uri})
     results = [transform_for_linked_data(data) for (data,) in cursor.fetchall()]
     return results
Beispiel #6
0
    def query(self, criteria, limit=20, offset=0):
        """
        The most general way to query based on a set of criteria.
        """
        cursor = self.connection.cursor()
        if 'node' in criteria:
            query_forward = gin_jsonb_value(criteria, node_forward=True)
            query_backward = gin_jsonb_value(criteria, node_forward=False)
            cursor.execute(
                GIN_QUERY_2WAY,
                {
                    'query_forward': jsonify(query_forward),
                    'query_backward': jsonify(query_backward),
                    'limit': limit,
                    'offset': offset,
                },
            )
        else:
            query = gin_jsonb_value(criteria)
            cursor.execute(
                GIN_QUERY_1WAY,
                {
                    'query': jsonify(query),
                    'limit': limit,
                    'offset': offset
                },
            )

        results = [
            transform_for_linked_data(data)
            for uri, data, weight in cursor.fetchall()
        ]
        return results
Beispiel #7
0
 def random_edges(self, limit=20):
     if self.connection is None:
         self.connection = get_db_connection(self.dbname)
     cursor = self.connection.cursor()
     cursor.execute(RANDOM_QUERY, {'limit': limit})
     results = [transform_for_linked_data(data) for uri, data in cursor.fetchall()]
     return results
Beispiel #8
0
    def lookup_grouped_by_feature(self, uri, limit=20):
        if self.connection is None:
            self.connection = get_db_connection(self.dbname)

        def extract_feature(row):
            return tuple(row[:2])

        def feature_data(row):
            direction, _, data = row

            # Hacky way to figure out what the 'other' node is, the one that
            # (in most cases) didn't match the URI. If both start with our
            # given URI, take the longer one, which is either a more specific
            # sense or a different, longer word.
            shorter, longer = sorted([data['start'], data['end']], key=len)
            if shorter.startswith(uri):
                data['other'] = longer
            else:
                data['other'] = shorter
            return data

        cursor = self.connection.cursor()
        cursor.execute(NODE_TO_FEATURE_QUERY, {'node': uri, 'limit': limit})
        results = {}
        for feature, rows in itertools.groupby(cursor.fetchall(), extract_feature):
            results[feature] = [transform_for_linked_data(feature_data(row)) for row in rows]
        return results
Beispiel #9
0
    def query(self, criteria, limit=20, offset=0):
        """
        The most general way to query based on a set of criteria.
        """
        criteria = criteria.copy()
        if self.connection is None:
            self.connection = get_db_connection(self.dbname)
        for criterion in ['node', 'other', 'start', 'end']:
            if criterion in criteria and criteria[
                    criterion] in TOO_BIG_PREFIXES:
                criteria['filter_' + criterion] = criteria[criterion] + '%'

        query_string = make_list_query(criteria)
        params = {
            key: remove_control_chars(value)
            for (key, value) in criteria.items()
        }
        params['limit'] = limit
        params['offset'] = offset

        cursor = self.connection.cursor()
        cursor.execute(query_string, params)
        results = [
            transform_for_linked_data(data) for uri, data in cursor.fetchall()
        ]
        return results
Beispiel #10
0
    def query(self, criteria, limit=20, offset=0):
        """
        The most general way to query based on a set of criteria.
        """
        if self.connection is None:
            self.connection = get_db_connection(self.dbname)

        cursor = self.connection.cursor()
        if 'node' in criteria:
            query_forward = gin_jsonb_value(criteria, node_forward=True)
            query_backward = gin_jsonb_value(criteria, node_forward=False)
            cursor.execute(
                GIN_QUERY_2WAY,
                {
                    'query_forward': jsonify(query_forward),
                    'query_backward': jsonify(query_backward),
                    'limit': limit,
                    'offset': offset,
                },
            )
        else:
            query = gin_jsonb_value(criteria)
            cursor.execute(
                GIN_QUERY_1WAY,
                {'query': jsonify(query), 'limit': limit, 'offset': offset},
            )

        results = [
            transform_for_linked_data(data) for uri, data, weight in cursor.fetchall()
        ]
        return results
Beispiel #11
0
 def sample_dataset(self, uri, limit=50, offset=0):
     if self.connection is None:
         self.connection = get_db_connection(self.dbname)
     cursor = self.connection.cursor()
     dataset_json = json.dumps(uri)
     cursor.execute(DATASET_QUERY, {'dataset': dataset_json, 'limit': limit, 'offset': offset})
     results = [transform_for_linked_data(data) for uri, data in cursor.fetchall()]
     return results
Beispiel #12
0
 def sample_dataset(self, uri, limit=50, offset=0):
     if self.connection is None:
         self.connection = get_db_connection(self.dbname)
     cursor = self.connection.cursor()
     dataset_json = json.dumps(uri)
     cursor.execute(DATASET_QUERY, {'dataset': dataset_json, 'limit': limit, 'offset': offset})
     results = [transform_for_linked_data(data) for uri, data in cursor.fetchall()]
     return results
Beispiel #13
0
 def query(self, criteria, limit=20, offset=0):
     if self.connection is None:
         self.connection = get_db_connection(self.dbname)
     params = dict(criteria)
     params['limit'] = limit
     params['offset'] = offset
     query_string = make_list_query(criteria)
     cursor = self.connection.cursor()
     cursor.execute(query_string, params)
     results = [transform_for_linked_data(data) for uri, data in cursor.fetchall()]
     return results
Beispiel #14
0
 def query(self, criteria, limit=20, offset=0):
     if self.connection is None:
         self.connection = get_db_connection(self.dbname)
     params = dict(criteria)
     params['limit'] = limit
     params['offset'] = offset
     query_string = make_list_query(criteria)
     cursor = self.connection.cursor()
     cursor.execute(query_string, params)
     results = [transform_for_linked_data(data) for uri, data in cursor.fetchall()]
     return results
Beispiel #15
0
 def random_edges(self, limit=20):
     """
     Get a collection of distinct, randomly-selected edges.
     """
     if self.connection is None:
         self.connection = get_db_connection(self.dbname)
     cursor = self.connection.cursor()
     cursor.execute(RANDOM_QUERY, {'limit': limit})
     results = [
         transform_for_linked_data(data) for uri, data, weight in cursor.fetchall()
     ]
     return results
Beispiel #16
0
 def random_edges(self, limit=20):
     """
     Get a collection of distinct, randomly-selected edges.
     """
     if self.connection is None:
         self.connection = get_db_connection(self.dbname)
     cursor = self.connection.cursor()
     cursor.execute(RANDOM_QUERY, {'limit': limit})
     results = [
         transform_for_linked_data(data) for uri, data, weight in cursor.fetchall()
     ]
     return results
Beispiel #17
0
 def lookup_assertion(self, uri):
     """
     Get a single assertion, given its URI starting with /a/.
     """
     # Sanitize URIs to remove control characters such as \x00. The postgres driver would
     # remove \x00 anyway, but this avoids reporting a server error when that happens.
     uri = remove_control_chars(uri)
     if self.connection is None:
         self.connection = get_db_connection(self.dbname)
     cursor = self.connection.cursor()
     cursor.execute("SELECT data FROM edges WHERE uri=%(uri)s", {'uri': uri})
     results = [transform_for_linked_data(data) for (data,) in cursor.fetchall()]
     return results
Beispiel #18
0
 def lookup_assertion(self, uri):
     """
     Get a single assertion, given its URI starting with /a/.
     """
     # Sanitize URIs to remove control characters such as \x00. The postgres driver would
     # remove \x00 anyway, but this avoids reporting a server error when that happens.
     uri = remove_control_chars(uri)
     if self.connection is None:
         self.connection = get_db_connection(self.dbname)
     cursor = self.connection.cursor()
     cursor.execute("SELECT data FROM edges WHERE uri=%(uri)s", {'uri': uri})
     results = [transform_for_linked_data(data) for (data,) in cursor.fetchall()]
     return results
Beispiel #19
0
 def sample_dataset(self, uri, limit=50, offset=0):
     """
     Get a subsample of edges matching a particular dataset.
     """
     uri = remove_control_chars(uri)
     if self.connection is None:
         self.connection = get_db_connection(self.dbname)
     cursor = self.connection.cursor()
     dataset_json = json.dumps(uri)
     cursor.execute(DATASET_QUERY, {
         'dataset': dataset_json,
         'limit': limit,
         'offset': offset
     })
     results = [
         transform_for_linked_data(data) for uri, data in cursor.fetchall()
     ]
     return results
Beispiel #20
0
    def lookup_grouped_by_feature(self, uri, limit=20):
        """
        The query used by the browseable interface, which groups its results
        by what 'feature' they describe of the queried node.

        A feature is defined by the relation, the queried node, and the direction
        (incoming or outgoing).
        """
        uri = remove_control_chars(uri)
        if self.connection is None:
            self.connection = get_db_connection(self.dbname)

        def extract_feature(row):
            return tuple(row[:2])

        def feature_data(row):
            direction, _, data = row

            # Hacky way to figure out what the 'other' node is, the one that
            # (in most cases) didn't match the URI. If both start with our
            # given URI, take the longer one, which is either a more specific
            # sense or a different, longer word.
            shorter, longer = sorted([data['start'], data['end']], key=len)
            if shorter.startswith(uri):
                data['other'] = longer
            else:
                data['other'] = shorter
            return data

        cursor = self.connection.cursor()
        cursor.execute(NODE_TO_FEATURE_QUERY, {'node': uri, 'limit': limit})
        results = {}
        for feature, rows in itertools.groupby(cursor.fetchall(),
                                               extract_feature):
            results[feature] = [
                transform_for_linked_data(feature_data(row)) for row in rows
            ]
        return results
Beispiel #21
0
    def lookup_grouped_by_feature(self, uri, limit=20):
        """
        The query used by the browseable interface, which groups its results
        by what 'feature' they describe of the queried node.

        A feature is defined by the relation, the queried node, and the direction
        (incoming or outgoing).
        """
        uri = remove_control_chars(uri)
        if self.connection is None:
            self.connection = get_db_connection(self.dbname)

        def extract_feature(row):
            return tuple(row[:2])

        def feature_data(row):
            direction, _, data = row

            # Hacky way to figure out what the 'other' node is, the one that
            # (in most cases) didn't match the URI. If both start with our
            # given URI, take the longer one, which is either a more specific
            # sense or a different, longer word.
            shorter, longer = sorted([data['start'], data['end']], key=len)
            if shorter.startswith(uri):
                data['other'] = longer
            else:
                data['other'] = shorter
            return data

        cursor = self.connection.cursor()
        cursor.execute(NODE_TO_FEATURE_QUERY, {'node': uri, 'limit': limit})
        results = {}
        for feature, rows in itertools.groupby(cursor.fetchall(), extract_feature):
            results[feature] = [
                transform_for_linked_data(feature_data(row)) for row in rows
            ]
        return results