Exemple #1
0
 def __init__(self,
              dsn: str,
              querymgr: QueryManager,
              readonly: bool = True,
              autocommit: bool = False,
              application_name: Optional[str] = None) -> None:
     self._db = psycopg2.connect(dsn, application_name=application_name)
     self._db.set_session(readonly=readonly, autocommit=autocommit)
     querymgr.inject_queries(self, self._db)
Exemple #2
0
 def __init__(self,
              dsn: str,
              querymgr: QueryManager,
              readonly: bool = True,
              autocommit: bool = False,
              application_name: str | None = None) -> None:
     self._db = psycopg2.connect(dsn, application_name=application_name)
     self._db.set_session(
         readonly=readonly, autocommit=autocommit
     )  # type: ignore  # broken typing stubs for psycopg2
     querymgr.inject_queries(self, self._db)
Exemple #3
0
def ProcessDatabase(options, logger, repoproc, repositories_updated):
    logger.Log('connecting to database')

    db_logger = logger.GetIndented()

    querymgr = QueryManager(options.sql_dir)
    database = Database(options.dsn,
                        querymgr,
                        readonly=False,
                        application_name='repology-update')
    if options.initdb:
        db_logger.Log('(re)initializing database schema')
        database.create_schema()

        db_logger.Log('committing changes')
        database.commit()

    if options.database:
        db_logger.Log('clearing the database')
        database.update_start()

        package_queue = []
        num_pushed = 0
        start_time = timer()

        def PackageProcessor(packageset):
            nonlocal package_queue, num_pushed, start_time
            FillPackagesetVersions(packageset)
            package_queue.extend(packageset)

            if len(package_queue) >= 10000:
                database.add_packages(package_queue)
                num_pushed += len(package_queue)
                package_queue = []
                db_logger.Log(
                    '  pushed {} packages, {:.2f} packages/second'.format(
                        num_pushed, num_pushed / (timer() - start_time)))

        db_logger.Log('pushing packages to database')
        repoproc.StreamDeserializeMulti(processor=PackageProcessor,
                                        reponames=options.reponames)

        # process what's left in the queue
        database.add_packages(package_queue)

        if options.fetch and options.update and options.parse:
            db_logger.Log('recording repo updates')
            database.mark_repositories_updated(repositories_updated)
        else:
            db_logger.Log(
                'not recording repo updates, need --fetch --update --parse')

        db_logger.Log('updating views')
        database.update_finish()

        db_logger.Log('committing changes')
        database.commit()

    logger.Log('database processing complete')
Exemple #4
0
def main() -> int:
    options = parse_arguments()

    querymgr = QueryManager(options.sql_dir)
    database = Database(options.dsn,
                        querymgr,
                        readonly=True,
                        application_name='repology-benchmark')

    reference: Dict[str, float] = {}
    if options.load:
        try:
            with open(options.load, 'rb') as reffile:
                reference = pickle.load(reffile)
        except:
            pass

    results = []
    for num, (method, name, kwargs) in enumerate(queries):
        if not check_keywords(name, options.keywords):
            continue
        print('===> {}/{}: "{}"\n'.format(num + 1, len(queries), name),
              file=sys.stderr,
              end='')
        results.append(
            (name, run_single_query(database, method, kwargs, options)))
        sys.stderr.flush()

    for name, delta in results:
        change = ''
        if name in reference:
            if max(delta, reference[name]) / min(delta, reference[name]) < (
                    1 + options.epsilon):
                change = ' no change'
            elif delta > reference[name]:
                change = ' \033[0;91m{:.1f}% slower\033[0m'.format(
                    100.0 * delta / reference[name] - 100.0)
            else:
                change = ' \033[0;92m{:.1f}% faster\033[0m'.format(
                    100.0 * reference[name] / delta - 100.0)

            change += ' (was {:.2f}ms)'.format(reference[name] * 1000.0)

        print('{:>50s} {:.2f}ms{}'.format(name, delta * 1000.0, change),
              file=sys.stderr)

    if options.save:
        reference = {name: delta for name, delta in results}
        with open(options.save, 'wb') as reffile:
            pickle.dump(reference, reffile)

    return 0
 def get_query_manager(self) -> QueryManager:
     return QueryManager(self.options.sql_dir)
Exemple #6
0
def main() -> int:
    options = parse_arguments()

    querymgr = QueryManager(options.sql_dir)
    database = Database(options.dsn, querymgr, readonly=True, application_name='repology-gensitemap')

    urls: List[str] = []
    if options.main:
        urls = ['/', '/news', '/statistics', '/about', '/api/v1', '/repositories/']

        urls.extend(('/maintainer/' + name for name in database.get_all_maintainer_names()))
        urls.extend(('/repository/' + name for name in database.get_all_repository_names()))
    elif options.metapackages:
        links_per_metapackage = 3

        print('Guessing threshold for important metapackages', file=sys.stderr)

        num_repos = 1
        while True:
            num_metapackages = database.get_all_metapackage_names_by_min_spread_count(num_repos)

            num_urls_total = len(urls) + num_metapackages * links_per_metapackage

            print('Threshold = {}, {} metapackages, {} total urls'.format(num_repos, num_metapackages, num_urls_total), file=sys.stderr)

            if num_urls_total <= options.max_urls:
                print('  Looks good', file=sys.stderr)
                break

            if num_repos > 20:
                print('  Giving up, will truncate metapackage list', file=sys.stderr)
                break

            num_repos += 1

        # get most important packages
        for name in database.get_all_metapackage_names_by_min_spread(num_repos, (options.max_urls - len(urls)) // links_per_metapackage):
            urls.append('/project/' + name + '/versions')
            urls.append('/project/' + name + '/packages')
            urls.append('/project/' + name + '/information')

        # fill the remaining space with less important packages
        for name in database.get_all_metapackage_names_by_spread(num_repos - 1, (options.max_urls - len(urls)) // links_per_metapackage):
            urls.append('/project/' + name + '/versions')
            urls.append('/project/' + name + '/packages')
            urls.append('/project/' + name + '/information')
    else:
        print('Please specify output mode', file=sys.stderr)

    shuffle(urls)

    # write XML
    print('Writing XML', file=sys.stderr)

    print('<?xml version="1.0" encoding="UTF-8"?>')
    print('<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">')
    for url in urls:
        print('<url><loc>' + html.escape(options.www_home + url) + '</loc><changefreq>daily</changefreq></url>')
    print('</urlset>')

    return 0
Exemple #7
0
# repology is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with repology.  If not, see <http://www.gnu.org/licenses/>.

import flask

from repology.config import config
from repology.database import Database
from repology.querymgr import QueryManager

__all__ = [
    'get_db',
]

_querymgr = QueryManager(config['SQL_DIR'])


def get_db():
    # XXX: this is not really a persistent DB connection!
    if not hasattr(flask.g, 'database'):
        flask.g.database = Database(config['DSN'],
                                    _querymgr,
                                    readonly=False,
                                    autocommit=True,
                                    application_name='repology-app')
    return flask.g.database
def Main():
    options = ParseArguments()

    logger = FileLogger(options.logfile) if options.logfile else StderrLogger()
    querymgr = QueryManager(options.sql_dir)
    database = Database(options.dsn, querymgr, readonly=True, autocommit=True, application_name='repology-linkchecker/reader')

    readqueue = multiprocessing.Queue(10)
    writequeue = multiprocessing.Queue(10)

    writer = multiprocessing.Process(target=LinkUpdatingWorker, args=(writequeue, options, querymgr, logger))
    writer.start()

    processpool = [multiprocessing.Process(target=LinkProcessingWorker, args=(readqueue, writequeue, i, options, logger)) for i in range(options.jobs)]
    for process in processpool:
        process.start()

    # base logger already passed to workers, may append prefix here
    logger = logger.GetPrefixed('master: ')

    prev_url = None
    while True:
        # Get pack of links
        logger.Log('Requesting pack of urls')
        urls = database.get_links_for_check(
            after=prev_url,
            prefix=options.prefix,  # no limit by default
            limit=options.packsize,
            recheck_age=datetime.timedelta(seconds=options.age * 60 * 60 * 24),
            unchecked_only=options.unchecked,
            checked_only=options.checked,
            failed_only=options.failed,
            succeeded_only=options.succeeded
        )
        if not urls:
            logger.Log('  No more urls to process')
            break

        # Get another pack of urls with the last hostname to ensure
        # that all urls for one hostname get into a same large pack
        match = re.match('([a-z]+://[^/]+/)', urls[-1])
        if match:
            urls += database.get_links_for_check(
                after=urls[-1],
                prefix=match.group(1),
                recheck_age=datetime.timedelta(seconds=options.age * 60 * 60 * 24),
                unchecked_only=options.unchecked,
                checked_only=options.checked,
                failed_only=options.failed,
                succeeded_only=options.succeeded
            )

        # Process
        if options.maxpacksize and len(urls) > options.maxpacksize:
            logger.Log('Skipping {} urls ({}..{}), exceeds max pack size'.format(len(urls), urls[0], urls[-1]))
        else:
            readqueue.put(urls)
            logger.Log('Enqueued {} urls ({}..{})'.format(len(urls), urls[0], urls[-1]))

        prev_url = urls[-1]

    logger.Log('Waiting for child processes to exit')

    # close workers
    for process in processpool:
        readqueue.put(None)
    for process in processpool:
        process.join()

    # close writer
    writequeue.put(None)
    writer.join()

    logger.Log('Done')

    return 0
Exemple #9
0
 def __init__(self, dsn, queriesdir, readonly=True, autocommit=False):
     self.db = psycopg2.connect(dsn)
     self.db.set_session(readonly=readonly, autocommit=autocommit)
     self.querymgr = QueryManager(queriesdir, self.db)
Exemple #10
0
class Database:
    def __init__(self, dsn, queriesdir, readonly=True, autocommit=False):
        self.db = psycopg2.connect(dsn)
        self.db.set_session(readonly=readonly, autocommit=autocommit)
        self.querymgr = QueryManager(queriesdir, self.db)

    def Request(self, query, *args):
        with self.db.cursor() as cursor:
            cursor.execute(query, args)

    def RequestSingleValue(self, query, *args):
        with self.db.cursor() as cursor:
            cursor.execute(query, args)

            row = cursor.fetchone()

            if row is None:
                return None

            return row[0]

    def RequestSingleAsDict(self, query, *args):
        with self.db.cursor() as cursor:
            cursor.execute(query, args)

            row = cursor.fetchone()

            if row is None:
                return None

            names = [desc.name for desc in cursor.description]

            return dict(zip(names, row))

    def RequestManyAsSingleColumnArray(self, query, *args):
        with self.db.cursor() as cursor:
            cursor.execute(query, args)

            return [row[0] for row in cursor.fetchall()]

    def RequestManyAsDictOfDicts(self, query, *args):
        with self.db.cursor() as cursor:
            cursor.execute(query, args)

            names = [desc.name for desc in cursor.description]

            return {
                row[0]: dict(zip(names[1:], row[1:]))
                for row in cursor.fetchall()
            }

    def RequestManyAsDicts(self, query, *args):
        with self.db.cursor() as cursor:
            cursor.execute(query, args)

            names = [desc.name for desc in cursor.description]

            return [dict(zip(names, row)) for row in cursor.fetchall()]

    def RequestManyAsPackages(self, query, *args):
        with self.db.cursor() as cursor:
            cursor.execute(query, args)

            names = [desc.name for desc in cursor.description]

            return [
                Package(**dict(zip(names, row))) for row in cursor.fetchall()
            ]

    def CreateSchema(self):
        self.querymgr.create_schema()

    def Clear(self):
        self.querymgr.clear()

    def AddPackages(self, packages):
        with self.db.cursor() as cursor:
            cursor.executemany(
                """
                INSERT INTO packages(
                    repo,
                    family,
                    subrepo,

                    name,
                    effname,

                    version,
                    origversion,
                    versionclass,

                    maintainers,
                    category,
                    comment,
                    homepage,
                    licenses,
                    downloads,

                    flags,
                    shadow,
                    verfixed,

                    flavors,

                    extrafields
                ) VALUES (
                    %s,
                    %s,
                    %s,

                    %s,
                    %s,

                    %s,
                    %s,
                    %s,

                    %s,
                    %s,
                    %s,
                    %s,
                    %s,
                    %s,

                    %s,
                    %s,
                    %s,

                    %s,

                    %s
                )
                """, [(
                    package.repo,
                    package.family,
                    package.subrepo,
                    package.name,
                    package.effname,
                    package.version,
                    package.origversion,
                    package.versionclass,
                    package.maintainers,
                    package.category,
                    package.comment,
                    package.homepage,
                    package.licenses,
                    package.downloads,
                    package.flags,
                    package.shadow,
                    package.verfixed,
                    package.flavors,
                    json.dumps(package.extrafields),
                ) for package in packages])

    def MarkRepositoriesUpdated(self, reponames):
        with self.db.cursor() as cursor:
            cursor.executemany(
                """
                INSERT
                    INTO repositories (
                        name,
                        last_update
                    ) VALUES (
                        %s,
                        now()
                    )
                    ON CONFLICT (name)
                    DO UPDATE SET
                        last_update = now()
                """, [[name] for name in reponames])

    def UpdateViews(self):
        self.querymgr.update_views()

    def Commit(self):
        self.db.commit()

    def GetMetapackage(self, names):
        return self.RequestManyAsPackages(
            """
            SELECT
                repo,
                family,
                subrepo,

                name,
                effname,

                version,
                origversion,
                versionclass,

                maintainers,
                category,
                comment,
                homepage,
                licenses,
                downloads,

                flags,
                shadow,
                verfixed,

                flavors,

                extrafields
            FROM packages
            WHERE effname {}
            """.format('= ANY (%s)' if isinstance(names, list) else '= %s'),
            names)

    def GetMetapackages(self, request, limit=500):
        request.Limit(limit)

        query, args = request.GetQuery()

        return self.RequestManyAsPackages(
            """
            SELECT
                repo,
                family,
                subrepo,

                name,
                effname,

                version,
                origversion,
                versionclass,

                maintainers,
                category,
                comment,
                homepage,
                licenses,
                downloads,

                flags,
                shadow,
                verfixed,

                flavors,

                extrafields
            FROM packages
            WHERE effname IN (
                {}
            )
            """.format(query), *args)

    def GetRelatedMetapackages(self, name, limit=500):
        return self.RequestManyAsPackages(
            """
            SELECT
                repo,
                family,
                subrepo,

                name,
                effname,

                version,
                origversion,
                versionclass,

                maintainers,
                category,
                comment,
                homepage,
                licenses,
                downloads,

                flags,
                shadow,
                verfixed,

                flavors,

                extrafields
            FROM packages
            WHERE effname IN (
                WITH RECURSIVE r AS (
                        SELECT
                            effname,
                            url
                        FROM url_relations
                        WHERE effname=%s
                    UNION
                        SELECT
                            url_relations.effname,
                            url_relations.url
                        FROM url_relations
                        JOIN r ON
                            url_relations.effname = r.effname OR url_relations.url = r.url
                )
                SELECT DISTINCT
                    effname
                FROM r
                ORDER BY effname
                LIMIT %s
            )
            """, name, limit)

    def GetPackagesCount(self):
        return self.RequestSingleValue(
            'SELECT num_packages FROM statistics LIMIT 1')

    def GetMetapackagesCount(self):
        return self.RequestSingleValue(
            'SELECT num_metapackages FROM statistics LIMIT 1')

    def GetMaintainersCount(self):
        return self.RequestSingleValue(
            'SELECT num_maintainers FROM statistics LIMIT 1')

    def GetMaintainersRange(self):
        # could use min/max here, but these are slower on pgsql 9.6
        return (
            self.RequestSingleValue(
                'SELECT maintainer FROM maintainers ORDER BY maintainer LIMIT 1'
            ),
            self.RequestSingleValue(
                'SELECT maintainer FROM maintainers ORDER BY maintainer DESC LIMIT 1'
            ))

    def GetMaintainers(self,
                       bound=None,
                       reverse=False,
                       search=None,
                       limit=500):
        where = []
        tail = ''

        args = []

        order = 'maintainer'

        if bound:
            if reverse:
                where.append('maintainer <= %s')
                order = 'maintainer DESC'
                args.append(bound)
            else:
                where.append('maintainer >= %s')
                args.append(bound)

        if search:
            where.append('maintainer LIKE %s')
            args.append('%' + search + '%')

        if limit:
            tail = 'LIMIT %s'
            args.append(limit)

        return self.RequestManyAsDicts(
            """
            SELECT
                *
            FROM
            (
                SELECT
                    maintainer,
                    num_packages,
                    num_metapackages,
                    num_metapackages_outdated
                FROM maintainers
                {}
                ORDER BY {}
                {}
            ) AS tmp
            ORDER BY maintainer
            """.format('WHERE ' + ' AND '.join(where) if where else '', order,
                       tail), *args)

    def GetMaintainerInformation(self, maintainer):
        return self.RequestSingleAsDict(
            """
            SELECT
                num_packages,
                num_packages_newest,
                num_packages_outdated,
                num_packages_ignored,
                num_packages_unique,
                num_packages_devel,
                num_packages_legacy,
                num_packages_incorrect,
                num_packages_untrusted,
                num_packages_noscheme,
                num_packages_rolling,
                num_metapackages,
                num_metapackages_outdated,
                repository_package_counts,
                repository_metapackage_counts,
                category_metapackage_counts
            FROM maintainers
            WHERE maintainer = %s
            """, maintainer)

    def GetMaintainerMetapackages(self, maintainer, limit=1000):
        return self.RequestManyAsSingleColumnArray(
            """
            SELECT
                effname
            FROM maintainer_metapackages
            WHERE maintainer = %s
            ORDER BY effname
            LIMIT %s
            """, maintainer, limit)

    def GetMaintainerSimilarMaintainers(self, maintainer, limit=100):
        # this obscure request needs some clarification
        #
        # what we calculate as score here is actually Jaccard index
        # (see wikipedia) for two sets (of metapackages maintained by
        # two maintainers)
        #
        # let M = set of metapackages for maintainer passed to this function
        # let C = set of metapackages for other maintainer we test for similarity
        #
        # score = |M⋂C| / |M⋃C| = |M⋂C| / (|M| + |C| - |M⋂C|)
        #
        # - num_metapackages_common is |M⋂C|
        # - num_metapackages is |C|
        # - sub-select just gets |M|
        # - the divisor thus is |M⋃C| = |M| + |C| - |M⋂C|
        return self.RequestManyAsDicts(
            """
            SELECT
                maintainer,
                num_metapackages_common AS count,
                100.0 * num_metapackages_common / (
                    num_metapackages - num_metapackages_common + (
                        SELECT num_metapackages
                        FROM maintainers
                        WHERE maintainer=%s
                    )
                ) AS match
            FROM
                (
                    SELECT
                        maintainer,
                        count(*) AS num_metapackages_common
                    FROM
                        maintainer_metapackages
                    WHERE
                        maintainer != %s AND
                        effname IN (
                            SELECT
                                effname
                            FROM maintainer_metapackages
                            WHERE maintainer=%s
                        )
                    GROUP BY maintainer
                ) AS intersecting_counts
                INNER JOIN maintainers USING(maintainer)
            ORDER BY match DESC
            LIMIT %s
            """, maintainer, maintainer, maintainer, limit)

    def GetRepositories(self):
        return self.RequestManyAsDicts("""
            SELECT
                name,
                num_packages,
                num_packages_newest,
                num_packages_outdated,
                num_packages_ignored,
                num_packages_unique,
                num_packages_devel,
                num_packages_legacy,
                num_packages_incorrect,
                num_packages_untrusted,
                num_packages_noscheme,
                num_packages_rolling,
                num_metapackages,
                num_metapackages_unique,
                num_metapackages_newest,
                num_metapackages_outdated,
                num_metapackages_comparable,
                last_update at time zone 'UTC' AS last_update_utc,
                now() - last_update AS since_last_update,
                num_problems,
                num_maintainers
            FROM repositories
        """)

    def GetRepository(self, repo):
        return self.RequestSingleAsDict(
            """
            SELECT
                num_packages,
                num_packages_newest,
                num_packages_outdated,
                num_packages_ignored,
                num_packages_unique,
                num_packages_devel,
                num_packages_legacy,
                num_packages_incorrect,
                num_packages_untrusted,
                num_packages_noscheme,
                num_packages_rolling,
                num_metapackages,
                num_metapackages_unique,
                num_metapackages_newest,
                num_metapackages_outdated,
                num_metapackages_comparable,
                last_update at time zone 'UTC' AS last_update_utc,
                now() - last_update AS since_last_update,
                num_problems,
                num_maintainers
            FROM repositories
            WHERE name = %s
            """,
            repo,
        )

    def GetRepositoriesHistoryAgo(self, seconds=60 * 60 * 24):
        return self.RequestSingleAsDict(
            """
            SELECT
                ts AS timestamp,
                now() - ts AS timedelta,
                snapshot
            FROM repositories_history
            WHERE ts IN (
                SELECT
                    ts
                FROM repositories_history
                WHERE ts < now() - INTERVAL %s
                ORDER BY ts DESC
                LIMIT 1
            )
            """,
            datetime.timedelta(seconds=seconds),
        )

    def GetRepositoriesHistoryPeriod(self, seconds=60 * 60 * 24, repo=None):
        repopath = ''
        repoargs = ()

        if repo:
            repopath = '#>%s'
            repoargs = ('{' + repo + '}', )

        return self.RequestManyAsDicts(
            """
            SELECT
                ts AS timestamp,
                now() - ts AS timedelta,
                snapshot{} AS snapshot
            FROM repositories_history
            WHERE ts >= now() - INTERVAL %s
            ORDER BY ts
            """.format(repopath), *repoargs,
            datetime.timedelta(seconds=seconds))

    def GetStatisticsHistoryPeriod(self, seconds=60 * 60 * 24):
        return self.RequestManyAsDicts(
            """
            SELECT
                ts AS timestamp,
                now() - ts AS timedelta,
                snapshot
            FROM statistics_history
            WHERE ts >= now() - INTERVAL %s
            ORDER BY ts
            """, datetime.timedelta(seconds=seconds))

    def Query(self, query, *args):
        with self.db.cursor() as cursor:
            cursor.execute(query, args)
            return cursor.fetchall()

    def SnapshotHistory(self):
        self.querymgr.snapshot_history()

    def ExtractLinks(self):
        self.querymgr.extract_links()

    def GetLinksForCheck(self,
                         after=None,
                         prefix=None,
                         recheck_age=None,
                         limit=None,
                         unchecked_only=False,
                         checked_only=False,
                         failed_only=False,
                         succeeded_only=False):
        conditions = []
        args = []

        # reduce the noise while linkchecker code doesn't support other schemas
        conditions.append('(url LIKE %s OR url LIKE %s)')
        args.append('http://%')
        args.append('https://%')

        if after is not None:
            conditions.append('url > %s')
            args.append(after)

        if prefix is not None:
            conditions.append('url LIKE %s')
            args.append(prefix + '%')

        if recheck_age is not None:
            conditions.append(
                '(last_checked IS NULL OR last_checked <= now() - INTERVAL %s)'
            )
            args.append(datetime.timedelta(seconds=recheck_age))

        if unchecked_only:
            conditions.append('last_checked IS NULL')

        if checked_only:
            conditions.append('last_checked IS NOT NULL')

        if failed_only:
            conditions.append('status != 200')

        if succeeded_only:
            conditions.append('status = 200')

        conditions_expr = ''
        limit_expr = ''

        if conditions:
            conditions_expr = 'WHERE ' + ' AND '.join(conditions)

        if limit:
            limit_expr = 'LIMIT %s'
            args.append(limit)

        return self.RequestManyAsSingleColumnArray(
            """
            SELECT
                url
            FROM links
            {}
            ORDER BY url
            {}
            """.format(conditions_expr, limit_expr), *args)

    linkcheck_status_timeout = -1
    linkcheck_status_too_many_redirects = -2
    linkcheck_status_unknown_error = -3
    linkcheck_status_cannot_connect = -4
    linkcheck_status_invalid_url = -5
    linkcheck_status_dns_error = -6

    def UpdateLinkStatus(self,
                         url,
                         status,
                         redirect=None,
                         size=None,
                         location=None):
        success = status == 200

        self.Request(
            """
            UPDATE links
            SET
                last_checked = now(),
                last_success = CASE WHEN %s THEN now() ELSE last_success END,
                last_failure = CASE WHEN %s THEN now() ELSE last_failure END,
                status = %s,
                redirect = %s,
                size = %s,
                location = %s
            WHERE url = %s
            """, success, not success, status, redirect, size, location, url)

    def GetMetapackageLinkStatuses(self, name):
        return self.RequestManyAsDictOfDicts(
            """
            SELECT
                url,
                last_checked,
                last_success,
                last_failure,
                status,
                redirect,
                size,
                location
            FROM links
            WHERE url in (
                -- this additional wrap seem to fix query planner somehow
                -- to use index scan on links instead of seq scan, which
                -- makes the query 100x faster; XXX: recheck with postgres 10
                -- or report this?
                SELECT DISTINCT url from (
                    SELECT
                        unnest(downloads) as url
                    FROM packages
                    WHERE effname = %s
                    UNION
                    SELECT
                        homepage as url
                    FROM packages
                    WHERE homepage IS NOT NULL and effname = %s
                ) AS tmp
            )
            """, name, name)

    def GetProblemsCount(self, repo=None, effname=None, maintainer=None):
        where_expr = ''
        args = []

        conditions = []

        if repo:
            conditions.append('repo = %s')
            args.append(repo)
        if effname:
            conditions.append('effname = %s')
            args.append(effname)
        if maintainer:
            conditions.append('maintainer = %s')
            args.append(maintainer)

        if conditions:
            where_expr = 'WHERE ' + ' AND '.join(conditions)

        return self.RequestSingleValue(
            """
            SELECT count(*)
            FROM problems
            {}
            """.format(where_expr), *args)

    def GetProblems(self,
                    repo=None,
                    effname=None,
                    maintainer=None,
                    limit=None):
        # XXX: eliminate duplication with GetProblemsCount()
        where_expr = ''
        limit_expr = ''
        args = []

        conditions = []

        if repo:
            conditions.append('repo = %s')
            args.append(repo)
        if effname:
            conditions.append('effname = %s')
            args.append(effname)
        if maintainer:
            conditions.append('maintainer = %s')
            args.append(maintainer)

        if conditions:
            where_expr = 'WHERE ' + ' AND '.join(conditions)
        if limit:
            limit_expr = 'LIMIT %s'
            args.append(limit)

        return self.RequestManyAsDicts(
            """
            SELECT
                repo,
                name,
                effname,
                maintainer,
                problem
            FROM problems
            {}
            ORDER by repo, effname, maintainer
            {}
            """.format(where_expr, limit_expr), *args)

    def AddReport(self, effname, need_verignore, need_split, need_merge,
                  comment):
        self.Request(
            """
            INSERT
            INTO reports (
                created,
                effname,
                need_verignore,
                need_split,
                need_merge,
                comment
            ) VALUES (
                now(),
                %s,
                %s,
                %s,
                %s,
                %s
            )
            """, effname, need_verignore, need_split, need_merge, comment)

    def GetReportsCount(self, effname):
        return self.RequestSingleValue(
            'SELECT count(*) FROM reports WHERE effname = %s', effname)

    def GetReports(self, effname):
        return self.RequestManyAsDicts(
            """
            SELECT
                id,
                now() - created AS created_ago,
                effname,
                need_verignore,
                need_split,
                need_merge,
                comment,
                reply,
                accepted
            FROM reports
            WHERE effname = %s
            ORDER BY created DESC
            """, effname)