Example #1
0
    def _missing_from_list(
        self, table: str, data: Iterable[Dict], hash_keys: List[str], cur=None
    ):
        """Read from table the data with hash_keys that are missing.

        Args:
            table: Table name (e.g content_mimetype, content_language,
              etc...)
            data: Dict of data to read from
            hash_keys: List of keys to read in the data dict.

        Yields:
            The data which is missing from the db.

        """
        cur = self._cursor(cur)
        keys = ", ".join(hash_keys)
        equality = " AND ".join(("t.%s = c.%s" % (key, key)) for key in hash_keys)
        yield from execute_values_generator(
            cur,
            """
            select %s from (values %%s) as t(%s)
            where not exists (
                select 1 from %s c
                where %s
            )
            """
            % (keys, keys, table, equality),
            (tuple(m[k] for k in hash_keys) for m in data),
        )
Example #2
0
    def extid_get_from_extid_list(self,
                                  extid_type: str,
                                  ids: List[bytes],
                                  version: Optional[int] = None,
                                  cur=None):
        cur = self._cursor(cur)
        query_keys = ", ".join(
            self.mangle_query_key(k, "extid") for k in self.extid_cols)
        filter_query = ""
        if version is not None:
            filter_query = cur.mogrify(f"WHERE extid_version={version}",
                                       (version, )).decode()

        sql = f"""
            SELECT {query_keys}
            FROM (VALUES %s) as t(sortkey, extid, extid_type)
            LEFT JOIN extid USING (extid, extid_type)
            {filter_query}
            ORDER BY sortkey
            """

        yield from execute_values_generator(
            cur,
            sql,
            (((sortkey, extid, extid_type)
              for sortkey, extid in enumerate(ids))),
        )
Example #3
0
 def raw_extrinsic_metadata_get_by_ids(self, ids: List[Sha1Git], cur=None):
     cur = self._cursor(cur)
     yield from execute_values_generator(
         cur,
         self._raw_extrinsic_metadata_select_query +
         "INNER JOIN (VALUES %s) AS t(id) ON t.id = raw_extrinsic_metadata.id",
         [(id_, ) for id_ in ids],
     )
Example #4
0
    def origin_get_by_url(self, origins, cur=None):
        """Retrieve origin `(type, url)` from urls if found."""
        cur = self._cursor(cur)

        query = """SELECT %s FROM (VALUES %%s) as t(url)
                   LEFT JOIN origin ON t.url = origin.url
                """ % ",".join("origin." + col for col in self.origin_cols)

        yield from execute_values_generator(cur, query,
                                            ((url, ) for url in origins))
Example #5
0
    def origin_get_by_sha1(self, sha1s, cur=None):
        """Retrieve origin urls from sha1s if found."""
        cur = self._cursor(cur)

        query = """SELECT %s FROM (VALUES %%s) as t(sha1)
                   LEFT JOIN origin ON t.sha1 = digest(origin.url, 'sha1')
                """ % ",".join("origin." + col for col in self.origin_cols)

        yield from execute_values_generator(cur, query,
                                            ((sha1, ) for sha1 in sha1s))
Example #6
0
    def origin_id_get_by_url(self, origins, cur=None):
        """Retrieve origin `(type, url)` from urls if found."""
        cur = self._cursor(cur)

        query = """SELECT id FROM (VALUES %s) as t(url)
                   LEFT JOIN origin ON t.url = origin.url
                """

        for row in execute_values_generator(cur, query,
                                            ((url, ) for url in origins)):
            yield row[0]
Example #7
0
 def directory_missing_from_list(self, directories, cur=None):
     cur = self._cursor(cur)
     yield from execute_values_generator(
         cur,
         """
         SELECT id FROM (VALUES %s) as t(id)
         WHERE NOT EXISTS (
             SELECT 1 FROM directory d WHERE d.id = t.id
         )
         """,
         ((id, ) for id in directories),
     )
Example #8
0
 def snapshot_missing_from_list(self, snapshots, cur=None):
     cur = self._cursor(cur)
     yield from execute_values_generator(
         cur,
         """
         SELECT id FROM (VALUES %s) as t(id)
         WHERE NOT EXISTS (
             SELECT 1 FROM snapshot d WHERE d.id = t.id
         )
             """,
         ((id, ) for id in snapshots),
     )
Example #9
0
    def content_missing_per_sha1_git(self, contents, cur=None):
        cur = self._cursor(cur)

        yield from execute_values_generator(
            cur,
            """
        SELECT t.sha1_git FROM (VALUES %s) AS t(sha1_git)
        WHERE NOT EXISTS (
            SELECT 1 FROM content c WHERE c.sha1_git = t.sha1_git
        )""",
            ((sha1, ) for sha1 in contents),
        )
Example #10
0
 def release_missing_from_list(self, releases, cur=None):
     cur = self._cursor(cur)
     yield from execute_values_generator(
         cur,
         """
         SELECT id FROM (VALUES %s) as t(id)
         WHERE NOT EXISTS (
             SELECT 1 FROM release r WHERE r.id = t.id
         )
         """,
         ((id, ) for id in releases),
     )
Example #11
0
 def directory_get_raw_manifest(
         self,
         directory_ids: List[Sha1Git],
         cur=None) -> Iterable[Tuple[Sha1Git, bytes]]:
     cur = self._cursor(cur)
     yield from execute_values_generator(
         cur,
         """
         SELECT t.id, raw_manifest FROM (VALUES %s) as t(id)
         INNER JOIN directory ON (t.id=directory.id)
         """,
         ((id_, ) for id_ in directory_ids),
     )
Example #12
0
 def content_get_metadata_from_hashes(self,
                                      hashes: List[bytes],
                                      algo: str,
                                      cur=None):
     cur = self._cursor(cur)
     assert algo in DEFAULT_ALGORITHMS
     query = f"""
         select {", ".join(self.content_get_metadata_keys)}
         from (values %s) as t (hash)
         inner join content on (content.{algo}=hash)
     """
     yield from execute_values_generator(
         cur,
         query,
         ((hash_, ) for hash_ in hashes),
     )
Example #13
0
 def content_ctags_get_from_list(self, ids, cur=None):
     cur = self._cursor(cur)
     keys = map(self._convert_key, self.content_ctags_cols)
     yield from execute_values_generator(
         cur,
         """
         select %s
         from (values %%s) as t(id)
         inner join content_ctags c
             on c.id=t.id
         inner join indexer_configuration i
             on c.indexer_configuration_id=i.id
         order by line
         """
         % ", ".join(keys),
         ((_id,) for _id in ids),
     )
Example #14
0
    def content_fossology_license_get_from_list(self, ids, cur=None):
        """Retrieve licenses per id.

        """
        cur = self._cursor(cur)
        keys = map(self._convert_key, self.content_fossology_license_cols)
        yield from execute_values_generator(
            cur,
            """
            select %s
            from (values %%s) as t(id)
            inner join content_fossology_license c on t.id=c.id
            inner join indexer_configuration i
                on i.id=c.indexer_configuration_id
            """
            % ", ".join(keys),
            ((_id,) for _id in ids),
        )
Example #15
0
    def release_get_from_list(self,
                              releases,
                              ignore_displayname=False,
                              cur=None):
        cur = self._cursor(cur)
        query_keys = ", ".join(
            self.mangle_query_key(k, "release", ignore_displayname)
            for k in self.release_get_cols)

        yield from execute_values_generator(
            cur,
            """
            SELECT %s FROM (VALUES %%s) as t(sortkey, id)
            LEFT JOIN release ON t.id = release.id
            LEFT JOIN person author ON release.author = author.id
            ORDER BY sortkey
            """ % query_keys,
            ((sortkey, id) for sortkey, id in enumerate(releases)),
        )
Example #16
0
 def _get_from_list(self, table, ids, cols, cur=None, id_col="id"):
     """Fetches entries from the `table` such that their `id` field
     (or whatever is given to `id_col`) is in `ids`.
     Returns the columns `cols`.
     The `cur` parameter is used to connect to the database.
     """
     cur = self._cursor(cur)
     keys = map(self._convert_key, cols)
     query = """
         select {keys}
         from (values %s) as t(id)
         inner join {table} c
             on c.{id_col}=t.id
         inner join indexer_configuration i
             on c.indexer_configuration_id=i.id;
         """.format(
         keys=", ".join(keys), id_col=id_col, table=table
     )
     yield from execute_values_generator(cur, query, ((_id,) for _id in ids))
Example #17
0
    def object_find_by_sha1_git(self, ids, cur=None):
        cur = self._cursor(cur)

        yield from execute_values_generator(
            cur,
            """
            WITH t (sha1_git) AS (VALUES %s),
            known_objects as ((
                select
                  id as sha1_git,
                  'release'::object_type as type,
                  object_id
                from release r
                where exists (select 1 from t where t.sha1_git = r.id)
            ) union all (
                select
                  id as sha1_git,
                  'revision'::object_type as type,
                  object_id
                from revision r
                where exists (select 1 from t where t.sha1_git = r.id)
            ) union all (
                select
                  id as sha1_git,
                  'directory'::object_type as type,
                  object_id
                from directory d
                where exists (select 1 from t where t.sha1_git = d.id)
            ) union all (
                select
                  sha1_git as sha1_git,
                  'content'::object_type as type,
                  object_id
                from content c
                where exists (select 1 from t where t.sha1_git = c.sha1_git)
            ))
            select t.sha1_git as sha1_git, k.type
            from t
            left join known_objects k on t.sha1_git = k.sha1_git
            """,
            ((id, ) for id in ids),
        )
Example #18
0
    def content_missing_from_list(self, contents, cur=None):
        cur = self._cursor(cur)

        keys = ", ".join(self.content_hash_keys)
        equality = " AND ".join(
            ("t.%s = c.%s" % (key, key)) for key in self.content_hash_keys)

        yield from execute_values_generator(
            cur,
            """
            SELECT %s
            FROM (VALUES %%s) as t(%s)
            WHERE NOT EXISTS (
                SELECT 1 FROM content c
                WHERE %s
            )
            """ % (keys, keys, equality),
            (tuple(c[key] for key in self.content_hash_keys)
             for c in contents),
        )
Example #19
0
    def extid_get_from_swhid_list(
        self,
        target_type: str,
        ids: List[bytes],
        extid_version: Optional[int] = None,
        extid_type: Optional[str] = None,
        cur=None,
    ):
        cur = self._cursor(cur)
        target_type = ObjectType(
            target_type).name.lower()  # aka "rev" -> "revision", ...
        query_keys = ", ".join(
            self.mangle_query_key(k, "extid") for k in self.extid_cols)
        filter_query = ""
        if extid_version is not None and extid_type is not None:
            filter_query = cur.mogrify(
                "WHERE extid_version=%s AND extid_type=%s",
                (
                    extid_version,
                    extid_type,
                ),
            ).decode()

        sql = f"""
            SELECT {query_keys}
            FROM (VALUES %s) as t(sortkey, target, target_type)
            LEFT JOIN extid USING (target, target_type)
            {filter_query}
            ORDER BY sortkey
            """

        yield from execute_values_generator(
            cur,
            sql,
            (((sortkey, target, target_type)
              for sortkey, target in enumerate(ids))),
            template=b"(%s,%s,%s::object_type)",
        )