def get_site(self, name=None, project=None, lang=None): """Return a Site instance based on information from the sitesdb. With no arguments, return the default site as specified by our config file. This is ``config.wiki["defaultSite"]``. With *name* specified, return the site with that name. This is equivalent to the site's ``wikiid`` in the API, like *enwiki*. With *project* and *lang* specified, return the site whose project and language match these values. If there are multiple sites with the same values (unlikely), this is not a reliable way of loading a site. Call the function with an explicit *name* in that case. We will attempt to login to the site automatically using ``config.wiki["username"]`` and ``config.wiki["password"]`` if both are defined. Specifying a project without a lang or a lang without a project will raise :py:exc:`TypeError`. If all three args are specified, *name* will be first tried, then *project* and *lang* if *name* doesn't work. If a site cannot be found in the sitesdb, :py:exc:`~earwigbot.exceptions.SiteNotFoundError` will be raised. An empty sitesdb will be created if none is found. """ # Someone specified a project without a lang, or vice versa: if (project and not lang) or (not project and lang): e = "Keyword arguments 'lang' and 'project' must be specified together." raise TypeError(e) # No args given, so return our default site: if not name and not project and not lang: try: default = self.config.wiki["defaultSite"] except KeyError: e = "Default site is not specified in config." raise SiteNotFoundError(e) return self._get_site_object(default) # Name arg given, but don't look at others unless `name` isn't found: if name: try: return self._get_site_object(name) except SiteNotFoundError: if project and lang: name = self._get_site_name_from_sitesdb(project, lang) if name: return self._get_site_object(name) raise # If we end up here, then project and lang are the only args given: name = self._get_site_name_from_sitesdb(project, lang) if name: return self._get_site_object(name) e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang) raise SiteNotFoundError(e)
def add_site(self, project=None, lang=None, base_url=None, script_path="/w", sql=None): """Add a site to the sitesdb so it can be retrieved with get_site(). If only a project and a lang are given, we'll guess the *base_url* as ``"//{lang}.{project}.org"`` (which is protocol-relative, becoming ``"https"`` if *useHTTPS* is ``True`` in config otherwise ``"http"``). If this is wrong, provide the correct *base_url* as an argument (in which case project and lang are ignored). Most wikis use ``"/w"`` as the script path (meaning the API is located at ``"{base_url}{script_path}/api.php"`` -> ``"//{lang}.{project}.org/w/api.php"``), so this is the default. If your wiki is different, provide the script_path as an argument. SQL connection settings are guessed automatically using config's template value. If this is wrong or not specified, provide a dict of kwargs as *sql* and Site will pass it to :py:func:`oursql.connect(**sql) <oursql.connect>`, allowing you to make queries with :py:meth:`site.sql_query <earwigbot.wiki.site.Site.sql_query>`. Returns ``True`` if the site was added successfully or ``False`` if the site is already in our sitesdb (this can be done purposefully to update old site info). Raises :py:exc:`~earwigbot.exception.SiteNotFoundError` if not enough information has been provided to identify the site (e.g. a *project* but not a *lang*). """ if not base_url: if not project or not lang: e = "Without a base_url, both a project and a lang must be given." raise SiteNotFoundError(e) base_url = "//{0}.{1}.org".format(lang, project) cookiejar = self._get_cookiejar() config = self.config login = (config.wiki.get("username"), config.wiki.get("password")) user_agent = config.wiki.get("userAgent") use_https = config.wiki.get("useHTTPS", True) assert_edit = config.wiki.get("assert") maxlag = config.wiki.get("maxlag") wait_between_queries = config.wiki.get("waitTime", 2) if user_agent: user_agent = user_agent.replace("$1", __version__) user_agent = user_agent.replace("$2", python_version()) # Create a Site object to log in and load the other attributes: site = Site(base_url=base_url, script_path=script_path, sql=sql, login=login, cookiejar=cookiejar, user_agent=user_agent, use_https=use_https, assert_edit=assert_edit, maxlag=maxlag, wait_between_queries=wait_between_queries) self._logger.info("Added site '{0}'".format(site.name)) self._add_site_to_sitesdb(site) return self._get_site_object(site.name)
def _load_site_from_sitesdb(self, name): """Return all information stored in the sitesdb relating to given site. The information will be returned as a tuple, containing the site's name, project, language, base URL, article path, script path, SQL connection data, and namespaces, in that order. If the site is not found in the database, SiteNotFoundError will be raised. An empty database will be created before the exception is raised if none exists. """ query1 = "SELECT * FROM sites WHERE site_name = ?" query2 = "SELECT sql_data_key, sql_data_value FROM sql_data WHERE sql_site = ?" query3 = "SELECT ns_id, ns_name, ns_is_primary_name FROM namespaces WHERE ns_site = ?" error = "Site '{0}' not found in the sitesdb.".format(name) with sqlite.connect(self._sitesdb) as conn: try: site_data = conn.execute(query1, (name,)).fetchone() except sqlite.OperationalError: self._create_sitesdb() raise SiteNotFoundError(error) if not site_data: raise SiteNotFoundError(error) sql_data = conn.execute(query2, (name,)).fetchall() ns_data = conn.execute(query3, (name,)).fetchall() name, project, lang, base_url, article_path, script_path = site_data sql = dict(sql_data) namespaces = {} for ns_id, ns_name, ns_is_primary_name in ns_data: try: if ns_is_primary_name: # "Primary" name goes first in list namespaces[ns_id].insert(0, ns_name) else: # Ordering of the aliases doesn't matter namespaces[ns_id].append(ns_name) except KeyError: namespaces[ns_id] = [ns_name] return (name, project, lang, base_url, article_path, script_path, sql, namespaces)