Exemple #1
0
    def get_site(self, name=None, project=None, lang=None):
        """Return a Site instance based on information from the sitesdb.

        With no arguments, return the default site as specified by our config
        file. This is ``config.wiki["defaultSite"]``.

        With *name* specified, return the site with that name. This is
        equivalent to the site's ``wikiid`` in the API, like *enwiki*.

        With *project* and *lang* specified, return the site whose project and
        language match these values. If there are multiple sites with the same
        values (unlikely), this is not a reliable way of loading a site. Call
        the function with an explicit *name* in that case.

        We will attempt to login to the site automatically using
        ``config.wiki["username"]`` and ``config.wiki["password"]`` if both are
        defined.

        Specifying a project without a lang or a lang without a project will
        raise :py:exc:`TypeError`. If all three args are specified, *name* will
        be first tried, then *project* and *lang* if *name* doesn't work. If a
        site cannot be found in the sitesdb,
        :py:exc:`~earwigbot.exceptions.SiteNotFoundError` will be raised. An
        empty sitesdb will be created if none is found.
        """
        # Someone specified a project without a lang, or vice versa:
        if (project and not lang) or (not project and lang):
            e = "Keyword arguments 'lang' and 'project' must be specified together."
            raise TypeError(e)

        # No args given, so return our default site:
        if not name and not project and not lang:
            try:
                default = self.config.wiki["defaultSite"]
            except KeyError:
                e = "Default site is not specified in config."
                raise SiteNotFoundError(e)
            return self._get_site_object(default)

        # Name arg given, but don't look at others unless `name` isn't found:
        if name:
            try:
                return self._get_site_object(name)
            except SiteNotFoundError:
                if project and lang:
                    name = self._get_site_name_from_sitesdb(project, lang)
                    if name:
                        return self._get_site_object(name)
                raise

        # If we end up here, then project and lang are the only args given:
        name = self._get_site_name_from_sitesdb(project, lang)
        if name:
            return self._get_site_object(name)
        e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang)
        raise SiteNotFoundError(e)
Exemple #2
0
    def add_site(self, project=None, lang=None, base_url=None,
                 script_path="/w", sql=None):
        """Add a site to the sitesdb so it can be retrieved with get_site().

        If only a project and a lang are given, we'll guess the *base_url* as
        ``"//{lang}.{project}.org"`` (which is protocol-relative, becoming
        ``"https"`` if *useHTTPS* is ``True`` in config otherwise ``"http"``).
        If this is wrong, provide the correct *base_url* as an argument (in
        which case project and lang are ignored). Most wikis use ``"/w"`` as
        the script path (meaning the API is located at
        ``"{base_url}{script_path}/api.php"`` ->
        ``"//{lang}.{project}.org/w/api.php"``), so this is the default. If
        your wiki is different, provide the script_path as an argument. SQL
        connection settings are guessed automatically using config's template
        value. If this is wrong or not specified, provide a dict of kwargs as
        *sql* and Site will pass it to :py:func:`oursql.connect(**sql)
        <oursql.connect>`, allowing you to make queries with
        :py:meth:`site.sql_query <earwigbot.wiki.site.Site.sql_query>`.

        Returns ``True`` if the site was added successfully or ``False`` if the
        site is already in our sitesdb (this can be done purposefully to update
        old site info). Raises :py:exc:`~earwigbot.exception.SiteNotFoundError`
        if not enough information has been provided to identify the site (e.g.
        a *project* but not a *lang*).
        """
        if not base_url:
            if not project or not lang:
                e = "Without a base_url, both a project and a lang must be given."
                raise SiteNotFoundError(e)
            base_url = "//{0}.{1}.org".format(lang, project)
        cookiejar = self._get_cookiejar()

        config = self.config
        login = (config.wiki.get("username"), config.wiki.get("password"))
        user_agent = config.wiki.get("userAgent")
        use_https = config.wiki.get("useHTTPS", True)
        assert_edit = config.wiki.get("assert")
        maxlag = config.wiki.get("maxlag")
        wait_between_queries = config.wiki.get("waitTime", 2)

        if user_agent:
            user_agent = user_agent.replace("$1", __version__)
            user_agent = user_agent.replace("$2", python_version())

        # Create a Site object to log in and load the other attributes:
        site = Site(base_url=base_url, script_path=script_path, sql=sql,
                    login=login, cookiejar=cookiejar, user_agent=user_agent,
                    use_https=use_https, assert_edit=assert_edit,
                    maxlag=maxlag, wait_between_queries=wait_between_queries)

        self._logger.info("Added site '{0}'".format(site.name))
        self._add_site_to_sitesdb(site)
        return self._get_site_object(site.name)
Exemple #3
0
    def _load_site_from_sitesdb(self, name):
        """Return all information stored in the sitesdb relating to given site.

        The information will be returned as a tuple, containing the site's
        name, project, language, base URL, article path, script path, SQL
        connection data, and namespaces, in that order. If the site is not
        found in the database, SiteNotFoundError will be raised. An empty
        database will be created before the exception is raised if none exists.
        """
        query1 = "SELECT * FROM sites WHERE site_name = ?"
        query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?"
        query3 = "SELECT ns_id, ns_name, ns_is_primary_name FROM namespaces WHERE ns_site = ?"
        error = "Site '{0}' not found in the sitesdb.".format(name)
        with sqlite.connect(self._sitesdb) as conn:
            try:
                site_data = conn.execute(query1, (name,)).fetchone()
            except sqlite.OperationalError:
                self._create_sitesdb()
                raise SiteNotFoundError(error)
            if not site_data:
                raise SiteNotFoundError(error)
            sql_data = conn.execute(query2, (name,)).fetchall()
            ns_data = conn.execute(query3, (name,)).fetchall()

        name, project, lang, base_url, article_path, script_path = site_data
        sql = dict(sql_data)
        namespaces = {}
        for ns_id, ns_name, ns_is_primary_name in ns_data:
            try:
                if ns_is_primary_name:  # "Primary" name goes first in list
                    namespaces[ns_id].insert(0, ns_name)
                else:  # Ordering of the aliases doesn't matter
                    namespaces[ns_id].append(ns_name)
            except KeyError:
                namespaces[ns_id] = [ns_name]

        return (name, project, lang, base_url, article_path, script_path, sql,
                namespaces)