def main():
    """Print all namespaces in given language to console."""
    length = len(sys.argv)
    if length == 1:
        language = Language('en')
    else:
        language = Language(sys.argv[1])

    site = Site(language.code, 'geogebra')
    print u'== Namespaces in {} =='.format(language)
    for number, names in site.namespaces().iteritems():
        names_print = ', '.join(names)
        print u'Number: {0}\t Names: {1}'.format(number, names_print)
Ejemplo n.º 2
0
def create_seasons(series_id,
                   number_of_seasons,
                   quickstatements=False,
                   dry=False):
    series_title = ItemPage(Site().data_repository(), series_id)
    series_title.get(force=True)
    series_label = series_title.labels['en']
    for i in range(1, number_of_seasons + 1):
        label = f"{series_label}, season {i}"
        descr = f"season {i} of {series_label}"
        if quickstatements:
            create_season_quickstatements(series_id, label, descr, i)
        else:
            create_season(series_id, label, descr, i, dry)
Ejemplo n.º 3
0
def main(dry=False):
    dry_str = ""
    if dry:
        print("Running in dry-run mode, will not implement any changes")
        dry_str = "[DRY-RUN MODE] "
    repo = Site().data_repository()
    for movie_id, title in movies_with_missing_labels_with_title():
        print(
            f"{dry_str}Setting label='{title}' for {movie_id} ( https://www.wikidata.org/wiki/{movie_id} )"
        )
        if not dry:
            movie_item = ItemPage(repo, movie_id)
            movie_item.get()
            movie_item.editLabels({"en": title})
Ejemplo n.º 4
0
 def __repr__(self):
     """Return representation string."""
     kwargs = self.sse_kwargs.copy()
     if self._site != Site():
         kwargs['site'] = self._site
     if self._streams:
         kwargs['streams'] = self._streams
         kwargs.pop('url')
     if self._since:
         kwargs['since'] = self._since
     if kwargs['timeout'] == config.socket_timeout:
         kwargs.pop('timeout')
     return '{0}({1})'.format(self.__class__.__name__, ', '.join(
         '%s=%r' % x for x in kwargs.items()))
Ejemplo n.º 5
0
    def test_all(self):
        site1 = Site('en')
        site2 = Site('de')
        pn = ['page1', 'page2', 'page3']
        sites = [site1, site2]
        pages = [Page(s, '%s-%s' % (p, s.dbName())) for p in pn for s in sites]

        m = PerWikiMapper(2)
        m.add('Foo.jpg', pages[0])
        for index in [1, 2, 3]:
            m.add('Bar.jpg', pages[index])
        m.add('Baz.jpg', pages[1])
        m.add('Quux.jpg', pages[1])

        file_list = []
        for page, files in m.files_per_page():
            file_list.append(page.title() + '>' + '|'.join(files))

        expected = [
            'Page1-enwiki>Foo.jpg', 'Page2-enwiki>Bar.jpg',
            'Page1-dewiki>Bar.jpg|Baz.jpg|Quux.jpg', 'Page2-dewiki>Bar.jpg'
        ]
        self.assertEqual(sorted(file_list), sorted(expected))
Ejemplo n.º 6
0
    def __init__(self, **kwargs):
        """Initializer.

        @keyword site: a project site object. Used when no url is given
        @type site: APISite
        @keyword since: a timestamp for older events; there will likely be
            between 7 and 31 days of history available but is not guaranteed.
            It may be given as a pywikibot.Timestamp, an ISO 8601 string
            or a mediawiki timestamp string.
        @type since: pywikibot.Timestamp or str
        @keyword streams: event stream types. Mandatory when no url is given.
            Multiple streams may be given as a string with comma separated
            stream types or an iterable of strings
            Refer https://stream.wikimedia.org/?doc for available
            wikimedia stream types.
        @type streams: str or iterable
        @keyword timeout: a timeout value indication how long to wait to send
            data before giving up
        @type timeout: int, float or a tuple of two values of int or float
        @keyword url: an url retrieving events from. Will be set up to a
            default url using _site.family settings, stream types and timestamp
        @type url: str
        @param kwargs: keyword arguments passed to SSEClient and requests lib
        @raises ImportError: sseclient is not installed
        @raises NotImplementedError: no stream types specified
        """
        if isinstance(EventSource, Exception):
            raise ImportError('sseclient is required for EventStreams;\n'
                              'install it with "pip install sseclient"\n')
        self.filter = {'all': [], 'any': [], 'none': []}
        self._total = None
        self._site = kwargs.pop('site', Site())

        self._streams = kwargs.pop('streams', None)
        if self._streams and not isinstance(self._streams, StringTypes):
            self._streams = ','.join(self._streams)

        self._since = kwargs.pop('since', None)
        if self._since:
            # assume this is a mw timestamp, convert it to a Timestamp object
            if isinstance(self._streams, StringTypes) \
               and '-' not in self._since:
                self._since = Timestamp.fromtimestampformat(self._since)
            if isinstance(self._streams, Timestamp):
                self._since = self._since.isoformat

        self._url = kwargs.get('url') or self.url
        kwargs.setdefault('url', self._url)
        kwargs.setdefault('timeout', config.socket_timeout)
        self.sse_kwargs = kwargs
Ejemplo n.º 7
0
def main() -> None:
    """Execute the bot."""
    logging.basicConfig(level=logging.WARNING)
    if len(sys.argv) != 2:
        print(f'Usage: {sys.argv[0]} "Title of List Page"')
        return
    listTitle = sys.argv[1]

    # Initialize pywikibot.
    assert Site().code == 'en'
    initLimits(
        editsLimits={'default': 2000},
        brfaNumber=6,
        onlySimulateEdits=False,
        botTrial=False
    )

    listPage = pywikibot.Page(Site(), listTitle)
    if not listPage.exists():
        raise Exception(f'Page [[{listTitle}]] does not exist.')
    print(f'List: [[{listTitle}]]')
    # for rTitle, anchor in parseList(listPage.text):
    #     fixRedirectAnchor(rTitle, anchor, listTitle)
    exceptions = [
        'List of Hindawi academic journals',
        'Hindawi academic journal',
        'List of MDPI academic journals',
        'List of MDPI journals',
        'List of Dove Medical Press academic journals',
        'List of Dove Press academic journals',
        'List of Medknow Publications academic journals',
        'List of Nature Research journals']
    for rPage in getRedirectsToPage(listTitle, namespaces=0, content=True):
        rTitle = rPage.title()
        if rTitle not in exceptions:
            fixRedirectAnchor(rTitle, getPredictedAnchor(rTitle), listTitle)
Ejemplo n.º 8
0
def _clone(src: str, dest: str, props: Iterable[wp.WikidataProperty]):
    """Copy all specified properties from the src ID to the dest ID"""
    repoutil = RepoUtils(Site().data_repository())
    if not src.startswith("Q"):
        raise ValueError(
            f"Expected item ID of the format 'Q####', found {src}")
    if not dest.startswith("Q"):
        raise ValueError(
            f"Expected item ID of the format 'Q####', found {dest}")

    src_item = ItemPage(repoutil.repo, src)
    dest_item = ItemPage(repoutil.repo, dest)

    success, failures = repoutil.copy(src_item, dest_item, props)
    print(f"Success: {success}, Failures: {failures}")
Ejemplo n.º 9
0
def main() -> None:
    """Run the bot."""
    logging.basicConfig(level=logging.WARNING)
    # Initialize pywikibot.
    assert Site().code == 'en'
    utils.initLimits(
        editsLimits={'default': 3000},
        brfaNumber=6,
        onlySimulateEdits=False,
        botTrial=False
    )

    redirects = utils.getCategoryAsSet('Redirects from ISO 4 abbreviations',
                                       recurse=False)
    redirects = set(r for r in redirects if '.' in r)
    for i, rTitle in enumerate(redirects):
        print(f'Doing {i}/{len(redirects)}: {rTitle}', flush=True)
        variants = getVariantRedirects(rTitle)
        if len(variants) <= 2:
            print('Skip: no variants')
            continue
        print(f'Variants: {len(variants) - 2}')
        rPage = pywikibot.Page(Site(), rTitle)
        if not rPage.isRedirectPage():
            print('Skip: not a redirect')
            continue
        if ':' in rTitle[:5]:
            print('Skip: colon in title.')
            continue
        targetArticle = rPage.getRedirectTarget().title()
        if 'Category:' in targetArticle:
            print('Skip: redirect to a category')
            continue
        for variant in variants:
            if variant != rTitle and variant != rTitle.replace('.', ''):
                makeVariantRedirect(variant, targetArticle)
Ejemplo n.º 10
0
def main() -> None:
    """Execute the bot."""
    logging.basicConfig(level=logging.WARNING)
    if len(sys.argv) != 2:
        print(f'Usage: {sys.argv[0]} filename.txt')
        return
    filename = sys.argv[1]

    # Initialize pywikibot.
    assert Site().code == 'en'
    initLimits(
        editsLimits={'create': 600, 'talk': 600, 'fix': 600, 'hatnote': 0},
        brfaNumber=6,
        onlySimulateEdits=False,
        botTrial=False
    )

    state.loadOrInitState(STATE_FILE_NAME)

    configEnd: Optional[int] = None
    configLines: List[str] = []
    numLines = sum(1 for line in open(filename) if line.rstrip())
    with open(filename) as f:
        for i, line in enumerate(f):
            line = line.strip()
            if not line:
                continue

            if configEnd is None:
                print(f'Config line {i}/{numLines} \t [{filename}]')
                if line == '---':
                    configEnd = i
                    config = Config(configLines)
                else:
                    configLines.append(line)
            else:
                print(f'Title line {i - configEnd}/{numLines - configEnd} \t '
                      f'[{filename}]')
                if config.lang:
                    parts = list(map(lambda x: x.strip(), line.split(';')))
                    assert len(parts) == 2
                    doOmicsRedirects(parts[1], config, parts[0])
                else:
                    doOmicsRedirects(line, config)
                if config.publisher:
                    doOmicsHatnotes(line, config.publisher)
            sys.stdout.flush()
    state.saveState(STATE_FILE_NAME)
Ejemplo n.º 11
0
def extract_coach_tenures(name):
    """
    Extract a coaches tenures from Wikipedia.
    
    Arguments:
    - name (name of coach)
    
    Returns:
    - list(dict)
    """
    logging.info('Looking for coach %s' % name)
    page_name = get_page_name_from_coach_name_wiki(name)

    # If we can't find a wikipedia page, return immediately
    if not page_name:
        return []
    else:
        logging.debug('Looking up %s as http://en.wikipedia.org/wiki/%s' %
                      (name, page_name))

    # Extract page content from wikipedia and narrow it down to the templates
    p = Page(Site('en', 'wikipedia'), page_name)
    if p.isRedirectPage():
        p = p.getRedirectTarget()
    content = p.get()

    parsed = mwparserfromhell.parse(content)
    templates = parsed.filter_templates()

    # Extract teams and years from the template
    teams, years = None, None
    for t in templates:
        for p in t.params:
            if "coach_teams" in p.name:
                teams = parse_coach_teams_and_positions_from_wiki(p)
            if "coach_years" in p.name:
                years = parse_coach_years_from_wiki(p)

    # If we were not able to extract information from the page, log & return empty
    if not teams or not years:
        logging.warning(
            'ISSUE DETECTED: %s is valid page but no information extracted' %
            name)
        return []

    tenures = [dict(t[0].items() + t[1].items()) for t in zip(teams, years)]
    [d.update({'name': name}) for d in tenures]
    return tenures
Ejemplo n.º 12
0
def main(dry=False):
    dry_str = ""
    if dry:
        print("Running in dry-run mode, will not implement any changes")
        dry_str = "[DRY-RUN MODE] "
    repo = Site().data_repository()
    for movie_id, movie_label in movies_with_missing_titles():
        print(
            f"{dry_str}Setting title='{movie_label}' for {movie_id} ( https://www.wikidata.org/wiki/{movie_id} )"
        )
        if not dry:
            movie_item = ItemPage(repo, movie_id)
            movie_item.get()
            claim = Claim(repo, wp.TITLE.pid)
            claim.setTarget(WbMonolingualText(movie_label, "en"))
            movie_item.addClaim(claim)
Ejemplo n.º 13
0
class MaccabiPediaCragoDumper:
    def __init__(self):
        self.output_path = ""
        self.maccabipedia = Site()
        self.games = dict()
        self.games_events = dict()

    def dump_games_tables(self):
        request = self.maccabipedia._simple_request(
            action="cargoquery",
            tables="Games_Catalog",
            fields=
            "Date, Hour, MatchDay, Season, Competition, Leg, Opponent, HomeAway, Stadium, ResultMaccabi, ResultOpponent, CoachMaccabi, CoachOpponent, Refs, Crowd",
            limit=5000,
            offset=0)
        self.games = request.submit()
Ejemplo n.º 14
0
def makeAmpersandRedirects(pageTitle: str,
                           foreign: Set[str],
                           targetPageTitle: Optional[str] = None,
                           andToAmpersand: bool = True,
                           ampersandToAnd: bool = True) -> bool:
    """If pageTitle contains 'and'/'&', try creating redirect from '&'/'and'.

    `foreign` is a set of foreign-language titles to avoid.
    Return whether any edits made.
    """
    if len(pageTitle) > 95:
        print('Skipping (length): ', pageTitle)
        return False
    if not targetPageTitle:
        targetPageTitle = pageTitle
    rTitle = ''
    if ' and ' in pageTitle and andToAmpersand:
        rTitle = pageTitle.replace(' and ', ' & ')
        rTitle = rTitle.replace(', & ', ' & ')
    if ' & ' in pageTitle and ampersandToAnd:
        rTitle = pageTitle.replace(' & ', ' and ')
        # Exclude possibly-foreign titles based on categories and
        # on language detection.
        if pageTitle in foreign:
            print('Skipping (lang category): ', pageTitle)
            return False
        if not EnglishWordList.check(pageTitle):
            isReliable, _, details = \
                pycld2.detect(pageTitle, isPlainText=True)
            if not isReliable or details[0][0] != 'ENGLISH':
                print('Skipping (lang detect): ', pageTitle)
                print(isReliable, str(details))
                return False
    if not rTitle:
        return False
    # Try creating a redirect from rTitle to pageTitle.
    rPage = pywikibot.Page(Site(), rTitle)
    # Skip if the page already exists.
    if rPage.exists():
        print('Skipping (already exists): ', rTitle)
        return False
    # Create the redirect.
    print(f'Creating redirect from [[{rTitle}]] to [[{targetPageTitle}]]')
    rNewContent = (f'#REDIRECT [[{targetPageTitle}]]\n'
                   f'{{{{R from modification}}}}\n')
    summary = 'Redirect between ampersand/and variant.'
    return trySaving(rPage, rNewContent, summary, overwrite=False)
Ejemplo n.º 15
0
    def setUpClass(cls):
        """
        Set up the test class.

        Prefetch the Site object for each of the sites the test
        class has declared are needed.
        """
        super(TestCase, cls).setUpClass()

        if not hasattr(cls, 'sites'):
            return

        # This stores the site under the site name.
        if not cls.sites:
            cls.sites = {}

        # If the test is not cached, create new Site objects for this class
        if not hasattr(cls, 'cached') or not cls.cached:
            orig_sites = pywikibot._sites
            pywikibot._sites = {}

        interface = None  # defaults to 'APISite'
        if hasattr(cls, 'dry') and cls.dry:
            # Delay load to avoid cyclic import
            from tests.utils import DrySite
            interface = DrySite

        for data in cls.sites.values():
            if 'site' not in data and 'code' in data and 'family' in data:
                data['site'] = Site(data['code'],
                                    data['family'],
                                    interface=interface)
            if 'hostname' not in data and 'site' in data:
                try:
                    data['hostname'] = data['site'].hostname()
                except KeyError:
                    # The family has defined this as obsolete
                    # without a mapping to a hostname.
                    pass

        if not hasattr(cls, 'cached') or not cls.cached:
            pywikibot._sites = orig_sites

        if len(cls.sites) == 1:
            key = next(iter(cls.sites.keys()))
            if 'site' in cls.sites[key]:
                cls.site = cls.sites[key]['site']
Ejemplo n.º 16
0
    def getCategories(self, article):
        baseDir = "articleCategoriesCache/"
        if not os.path.exists(baseDir):
            os.makedirs(baseDir)
        fname = baseDir + article
        if os.path.isfile(fname):
            lines = []
            try:
                with codecs.open(fname, encoding='utf-8') as f:
                    lines = [line.strip() for line in f.readlines()]
                #print "utf8 encoding"
            except:
                with codecs.open(fname) as f:
                    lines = [line.strip() for line in f.readlines()]
                #print "ascii encoding"
            lines = self.filterCategories(lines)
            if lines != []:
                #print "get Cat Cache:", lines
                return lines

        site = Site("en")
        page = Page(site, article.decode("utf8"))
        #print article
        #print page.get()
        #print page.get(get_redirect = True)
        #print "redirect?", page.isRedirectPage()
        if page.isRedirectPage():
            page = page.getRedirectTarget()
        #print [cat.title() for cat in page.categories()]
        cats = sorted([
            cat.title() for cat in page.categories()
            if not cat.isHiddenCategory()
        ])
        #print "downloaded cats1: ", cats
        cats = self.filterCategories(cats)
        #print "downloaded cats2: ", cats
        text = ""
        for cat in cats:
            text += cat + "\n"
        try:
            with codecs.open(fname, "a+") as f:
                f.write(text)
        except:
            with codecs.open(fname, "a+") as f:
                f.write(text.encode('utf-8'))
        return cats
Ejemplo n.º 17
0
def makeVariantRedirect(vTitle: str, targetArticle: str) -> bool:
    """Try creating a redirect from vTitle to targetArticle."""
    rPage = pywikibot.Page(Site(), vTitle)
    # Skip if the page already exists.
    if rPage.exists():
        print('Skipping variant (already exists): ', vTitle)
        return False
    # Create the redirect.
    print(f'Creating redirect from [[{vTitle}]] to [[{targetArticle}]]')

    # Check number of results in Google search: only possible for <100 request.
    # sleepTime = 15
    # headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
    #            'AppleWebKit/537.36 (KHTML, like Gecko) '
    #            'Chrome/60.0.3112.113 Safari/537.36'}
    # url = 'https://www.google.com/search?'
    # url += urllib.parse.urlencode({'q': '"' + vTitle + '"'})
    # while True:
    #     try:
    #         sleep(sleepTime)
    #         req = urllib.request.Request(url, headers=headers)
    #         with urllib.request.urlopen(req) as response:
    #             html = str(response.read())
    #             if 'No results found' in html:
    #                 print('No Results')
    #                 return False
    #             regex = r'([0-9]+),?\s*([0-9]+),?\s*([0-9]*)\s*results'
    #             m = re.search(regex, html)
    #             if not m:
    #                 print('no Results')
    #                 return False
    #             res = m.group(1) + m.group(2) + m.group(3)
    #             print('Results=', res)
    #             if int(res) < 5:
    #                 return False
    #             break
    #     except urllib.error.URLError as err:
    #         print('Exception: ', sys.exc_info()[0], '\n', err.reason)
    #         sleepTime *= 2
    #         print('sleep=', sleepTime, flush=True)

    rNewContent = '#REDIRECT [[' + targetArticle + ']]\n'
    rNewContent += '{{R from abbreviation}}\n'
    summary = 'Redirect from variant abbreviation.'
    return utils.trySaving(rPage, rNewContent, summary, overwrite=False)
Ejemplo n.º 18
0
def items_with_missing_labels_with_title():
    """Find items with missing labels, but with a title

      Missing labels are identified by checking if the label is equal to
      the QID

      Returns an iterable of (item, item QID, title)
  """
    query = f"""
  SELECT DISTINCT ?item ?itemId ?title WHERE {{
    ?item wdt:{wp.INSTANCE_OF.pid} ?itemType;
      wdt:{wp.TITLE.pid} ?title.
    VALUES ?itemType {{
      wd:{wp.TELEVISION_SERIES.ljust(10, " ")} # television series
      wd:{wp.TELEVISION_SERIES_EPISODE.ljust(10, " ")} # television series episode
      wd:{wp.BOOK.ljust(10, " ")} # book
      wd:{wp.FILM.ljust(10, " ")} # film
      wd:{wp.SILENT_FILM.ljust(10, " ")} # silent film
      wd:{wp.LITERARY_WORK.ljust(10, " ")} # literary work
      wd:{wp.WRITTEN_WORK.ljust(10, " ")} # written work
      wd:{wp.PERIODICAL.ljust(10, " ")} # periodical
    }}
    # Skip "http://www.wikidata.org/entity/" (31 characters)
    BIND(SUBSTR(STR(?item), 32 ) AS ?itemId)

    # Only look for titles that are in English, since we add the English label
    FILTER((LANG(?title)) = "en")

    # The label will be the same as the QID if the label is missing
    FILTER(REGEX(?itemLabel, ?itemId))

    SERVICE wikibase:label {{
      bd:serviceParam wikibase:language "en".
      ?item rdfs:label ?itemLabel.
    }}
  }}
  """
    print(query)
    results = SparqlQuery(repo=Site().data_repository()).select(query)
    for result in results:
        item_link = result["item"]
        item_id = result["itemId"]
        title = result["title"]
        yield item_link, item_id, title
Ejemplo n.º 19
0
def get_names_current_coaches_and_coordinators():
    """
    Gets the names of the current D1 coaches and coordinators.
    
    Params:
    - None
    
    Returns:
    - list[str(), str()]
    """

    p = Page(Site('en', 'wikipedia'),
             'List_of_current_NCAA_Division_I_FBS_football_coaches').get()
    parsed = mwparserfromhell.parse(p)
    nameps = filter(lambda x: 'sortname' == x.name, parsed.filter_templates())
    results = []
    for n in nameps:
        results.append(' '.join([unicode(p.value) for p in n.params[:2]]))
    return results
def create_episodes(series_id,
                    season_id,
                    titles_file,
                    quickstatements=False,
                    dry=False):
    Site().login()
    try:
        commands.create_episodes(series_id, season_id, titles_file,
                                 quickstatements, dry)
    except commands.errors.SuspiciousTitlesError as e:
        click.confirm(
            f"An error occurred when reading the CSV file:\n{e.message}\nDo you want to continue?",
            abort=True)
        commands.create_episodes(series_id,
                                 season_id,
                                 titles_file,
                                 quickstatements,
                                 dry,
                                 confirm_titles=True)
    def test_attributes_after_run(self):
        """Test FamilyFileGenerator attributes after run()."""
        gen = self.generator_instance
        gen.run()

        with self.subTest(test='Test whether default is loaded'):
            self.assertIn(self.site.lang, gen.wikis)

        # Subtest fails on musicbrainz (T130381) and wsbeta (T243669)
        if self.site.family.name not in ('wsbeta', 'musicbrainz'):
            with self.subTest(test='Test element counts'):
                if self.site.lang not in gen.prefixes:
                    gen.prefixes += [self.site.lang]
                self.assertCountEqual(gen.prefixes, gen.wikis)

        # test creating Site from url
        # only test Sites for downloaded wikis (T241413)
        for language in filter(lambda x: x['prefix'] in gen.wikis, gen.langs):
            lang = language['prefix']
            url = language['url']
            wiki = gen.wikis[lang]
            lang_parse = urlparse(url)
            wiki_parse = urlparse(wiki.server)

            with self.subTest(url=url):
                if lang_parse.netloc != wiki_parse.netloc:
                    # skip redirected url (T241413)
                    self.skipTest('{} is redirected to {}'.format(
                        lang_parse.netloc, wiki_parse.netloc))

                site = Site(url=url)

                try:  # T194138 to be solved
                    self.assertEqual(
                        site.lang, lang, 'url has lang "{lang}" '
                        'but Site {site} has lang "{site.lang}"'.format(
                            site=site, lang=lang))
                except AssertionError:
                    self.skipTest(
                        'KNOWN BUG: url has lang "{lang}" '
                        'but Site {site} has lang "{site.lang}"'.format(
                            site=site, lang=lang))
Ejemplo n.º 22
0
def episodes(season_id):
    """Find episodes for a given season (specified by QID)

        Returns an iterable of (season ordinal, episode QID, episode title)
    """
    query = f"""
    SELECT ?seasonOrdinal ?episode ?episodeTitle WHERE {{
      ?episode wdt:{wp.INSTANCE_OF.pid} wd:{wp.TELEVISION_SERIES_EPISODE};
               wdt:{wp.SEASON.pid} wd:{season_id};
               wdt:{wp.TITLE.pid} ?episodeTitle;
               (p:{wp.SEASON.pid}/pq:{wp.SERIES_ORDINAL.pid}) ?seasonOrdinal .
    }}
    ORDER BY (?seasonOrdinal)
    """
    results = SparqlQuery(repo=Site().data_repository()).select(query)
    for result in results:
        ordinal = int(result["seasonOrdinal"])
        episode_id = result["episode"].split("/")[-1]
        title = result["episodeTitle"]
        yield ordinal, episode_id, title
Ejemplo n.º 23
0
def process(day):
    """
    one day bot processing
     
    arguments:
    day -- python date format
    
    """
    if params.verbose:
        print("processing Journal des recréations ({day})".format(
            day=format_date(day)))
    start = to_date(day)
    end = to_date(day + ONE_DAY)
    result = "\n== {} ==\n".format(format_date(day))
    for i, page in enumerate(creation_log(start, end), 1):
        if params.verbose:
            print(i, page["timestamp"])

        dl = deletelog(page["title"])
        if dl:
            r = (
                "* {{{{a-court|{title}}}}} <small>([[{pas}|PàS]])</small> supprimé le {date} recréé par {{{{u|{user}}}}} \n"
                .format(title=wiki_param(page["title"]),
                        pas=wiki_param("Discussion:" + page["title"] +
                                       "/Suppression"),
                        user=wiki_param(page["user"]),
                        date=format_date(from_date(dl["timestamp"]))))
            if params.verbose:
                print(r)
            result += r

    page = Page(Site(), params.prefix + "/" + format_date(day, skip_day=True))

    try:
        result = page.get() + result
    except NoPage:
        pass
    page.put(
        result,
        comment="Journal des recréations ({day})".format(day=format_date(day)))
Ejemplo n.º 24
0
def getCategoryAsSet(name: str, recurse: bool = True, namespaces: int = 0) \
        -> Set[str]:
    """Get all titles of pages in given category as a set().

    ``name`` should not include 'Category:'.
    Be careful with `recurse`, you may accidentally get really deep into
    millions of pages.
    """
    print('Getting category:', name, flush=True)
    result = set()
    count = 0
    if not name.startswith('Category:'):
        name = 'Category:' + name
    cat = pywikibot.Category(Site(), name)
    for page in cat.articles(recurse=recurse,
                             namespaces=namespaces,
                             total=_listLimit,
                             content=False):
        result.add(page.title())
        count = count + 1
    print('Got', str(count), 'pages.', flush=True)
    return result
Ejemplo n.º 25
0
def create_seasons(series_id,
                   number_of_seasons,
                   quickstatements=False,
                   dry=False):
    """Creates multiple season items on WikiData

    Arguments
    ---------
    series_id: str
        The Wiki ID of the series ItemPage
    number_of_seasons: int
        The number of season to create for this series
    quickstatements: bool
        if True, simply print out a list of quickstatements.
        if False, then create the items on WikiData directly
    dry: bool
        Whether or not this function should run in dry-run mode.
        In dry-run mode, no real changes are made to WikiData, they are only
        logged to stdout.

    Returns
    -------
    season_ids: List[str]
        The Wiki IDs of the seasons that were created
    """
    series_title = ItemPage(Site().data_repository(), series_id)
    series_title.get(force=True)
    series_label = series_title.labels['en']
    season_ids = []
    for i in tqdm(range(1, number_of_seasons + 1)):
        label = f"{series_label}, season {i}"
        descr = f"season {i} of {series_label}"
        if quickstatements:
            create_season_quickstatements(series_id, label, descr, i)
        else:
            season_id = create_season(series_id, label, descr, i, dry)
            season_ids.append(season_id)

    return season_ids
Ejemplo n.º 26
0
def copy_delayed(
    src_item: ItemPage, dest_item: ItemPage, props: Iterable[wp.WikidataProperty]
) -> Iterable[api.Fix]:
    repo = Site().data_repository()

    src_item.get()
    dest_item.get()

    claims = []

    for prop in props:
        src_claims = src_item.claims.get(prop.pid, [])

        if len(src_claims) > 1:
            print(
                f"Cannot copy {prop} from {format(src_item)} to {format(dest_item)}. Only scalar properties can be copied"
            )
            continue

        if prop.pid in dest_item.claims:
            print(f"{prop} already has a value in {format(dest_item)}")
            continue

        targets = [claim.getTarget() for claim in src_claims]

        for target in targets:
            target.get()

            target_str = printable_target_value(target)

            print(
                f"Creating claim to copy {prop}={target_str} from {format(src_item)} to {format(dest_item)}"
            )

            new_claim = Claim(repo, prop.pid)
            new_claim.setTarget(target)
            summary = f"Setting {prop.pid} ({prop.name})"
            claims.append(api.ClaimFix(new_claim, summary, dest_item))
    return claims
    def __init__(self, endpoint=None, entity_url=None, repo=None):
        """
        Create endpoint.

        @param endpoint: SPARQL endpoint URL
        @type endpoint: string
        @param entity_url: URL prefix for any entities returned in a query.
        @type entity_url: string
        @param repo: The Wikibase site which we want to run queries on. If
                     provided this overrides any value in endpoint and entity_url.
                     Defaults to Wikidata.
        @type repo: pywikibot.site.DataSite
        """
        # default to Wikidata
        if not repo and not endpoint:
            repo = Site('wikidata', 'wikidata')

        if repo:
            try:
                self.endpoint = repo.sparql_endpoint
                self.entity_url = repo.concept_base_uri
            except NotImplementedError:
                raise NotImplementedError(
                    'Wiki version must be 1.28-wmf.23 or newer to '
                    'automatically extract the sparql endpoint. '
                    'Please provide the endpoint and entity_url '
                    'parameters instead of a repo.')
            if not self.endpoint:
                raise Error('The site {0} does not provide a sparql endpoint.'
                            .format(repo))
        else:
            if not entity_url:
                raise Error('If initialised with an endpoint the entity_url '
                            'must be provided.')
            self.endpoint = endpoint
            self.entity_url = entity_url

        self.last_response = None
Ejemplo n.º 28
0
def main(dry=False):
    dry_str = ""
    if dry:
        print("Running in dry-run mode, will not implement any changes")
        dry_str = "[DRY-RUN MODE] "
    repo = Site().data_repository()
    for item_link, item_id, title in items_with_missing_labels_with_title():
        print(
            f"{dry_str} ( {str(item_link).ljust(40, ' ')} ) Fixing {str(item_id).ljust(9, ' ')}: {title}"
        )
        if dry:
            continue

        # Labels have a character limit, so ignore if trying to add it will result in an error
        if len(title) >= 250:
            continue

        item = ItemPage(repo, item_id)
        item.get()
        try:
            item.editLabels({"en": title})
        except (APIError, OtherPageSaveError) as e:
            print(f"An error occurred while adding label for {item_id}: {e}")
def main(dry=False):
    dry_str = ""
    if dry:
        print("Running in dry-run mode, will not implement any changes")
        dry_str = "[DRY-RUN MODE] "
    repo = Site().data_repository()
    seen = set()
    for board_game_id, bgg_id in board_games_with_missing_labels():
        if board_game_id in seen:
            continue
        seen.add(board_game_id)
        board_game_name = utils.bgg_title(bgg_id)
        if board_game_name is None:
            print(f"Unable to fetch name for {board_game_id}.")
            continue
        wiki_url = f"https://www.wikidata.org/wiki/{board_game_id}"
        print(
            f"{dry_str}Setting label='{board_game_name}' for {board_game_id} ( {wiki_url} )"
        )
        if not dry:
            bg_item = ItemPage(repo, board_game_id)
            bg_item.get()
            bg_item.editLabels({"en": board_game_name})
Ejemplo n.º 30
0
def main() -> None:
    """Run the bot."""
    logging.basicConfig(level=logging.WARNING)
    # Initialize pywikibot.
    assert Site().code == 'en'
    utils.initLimits(editsLimits={'default': 4000},
                     brfaNumber=6,
                     onlySimulateEdits=False,
                     botTrial=False)

    EnglishWordList.init()

    journals: Set[str] = getCategoryAsSet('Academic journals by language')
    magazines: Set[str] = getCategoryAsSet('Magazines by language')

    # Let 'foreign' be the set of page titles in a language-category
    # other than English, or in the multilingual category.
    foreign: Set[str] = set()
    foreign = foreign | journals
    foreign = foreign | magazines
    foreign = foreign - getCategoryAsSet('English-language journals')
    foreign = foreign - getCategoryAsSet('English-language magazines')
    foreign = foreign | getCategoryAsSet('Multilingual journals')
    foreign = foreign | getCategoryAsSet('Multilingual magazines')

    for page in chain(journals, magazines,
                      getPagesWithTemplate('Infobox journal'),
                      getPagesWithTemplate('Infobox Journal'),
                      getPagesWithTemplate('Infobox magazine'),
                      getPagesWithTemplate('Infobox Magazine')):
        pageTitle = page if isinstance(page, str) else page.title()
        try:
            makeAmpersandRedirects(pageTitle, foreign)
            for rPage in getRedirectsToPage(pageTitle, namespaces=0):
                makeAmpersandRedirects(rPage.title(), foreign, pageTitle)
        except pywikibot.exceptions.TitleblacklistError:
            print('Skipping (title blacklist error): ', pageTitle)
Ejemplo n.º 31
0
def check_tv_show(tvshow_id=None,
                  child_type="episode",
                  autofix=False,
                  accumulate=False,
                  always=False,
                  filter=""):
    """Check constraints for season/episodes of this TV show

    TVSHOW_ID is the ID of the television series, in the format Q######.
    """
    if child_type == "episode":
        instance_types = [wp.TELEVISION_SERIES_EPISODE]
    elif child_type == "season":
        instance_types = [wp.TELEVISION_SERIES_SEASON]
    elif child_type == "series":
        instance_types = [wp.TELEVISION_SERIES]
    elif child_type == "all":
        instance_types = [
            wp.TELEVISION_SERIES, wp.TELEVISION_SERIES_SEASON,
            wp.TELEVISION_SERIES_EPISODE
        ]

    for instance_of_type in instance_types:
        key_val_pairs = {
            wp.PART_OF_THE_SERIES.pid: tvshow_id,
            wp.INSTANCE_OF.pid: instance_of_type
        }
        query = generate_sparql_query(key_val_pairs)
        gen = WikidataSPARQLPageGenerator(query)
        if instance_of_type == wp.TELEVISION_SERIES:
            gen = [ItemPage(Site().data_repository(), tvshow_id)]
        bot = getbot(gen,
                     autofix=autofix,
                     accumulate=accumulate,
                     always=always,
                     property_filter=filter)
        bot.run()