Python getInternetArchiveURL Examples, pywikibot.weblib.getInternetArchiveURL Python Examples

Example #1

0

Show file

File: weblib_tests.py Project: anrao91/pywikibot-core

 def testInternetArchiveNewest(self):
     archivedversion = weblib.getInternetArchiveURL('https://google.com')
     parsed = urlparse(archivedversion)
     self.assertIn(parsed.scheme, [u'http', u'https'])
     self.assertEqual(parsed.netloc, u'web.archive.org')
     self.assertTrue(
         parsed.path.strip('/').endswith('www.google.com'), parsed.path)

Example #2

0

Show file

File: weblinkchecker.py Project: ckmp-code/pywikibot

 def setLinkDead(self, url, error, page, weblink_dead_days):
     """Add the fact that the link was found dead to the .dat file."""
     with self.semaphore:
         now = time.time()
         if url in self.historyDict:
             timeSinceFirstFound = now - self.historyDict[url][0][1]
             timeSinceLastFound = now - self.historyDict[url][-1][1]
             # if the last time we found this dead link is less than an hour
             # ago, we won't save it in the history this time.
             if timeSinceLastFound > 60 * 60:
                 self.historyDict[url].append((page.title(), now, error))
             # if the first time we found this link longer than x day ago
             # (default is a week), it should probably be fixed or removed.
             # We'll list it in a file so that it can be removed manually.
             if timeSinceFirstFound > 60 * 60 * 24 * weblink_dead_days:
                 # search for archived page
                 try:
                     archiveURL = get_archive_url(url)
                 except Exception as e:
                     pywikibot.warning(
                         'get_closest_memento_url({0}) failed: {1}'.format(
                             url, e))
                     archiveURL = None
                 if archiveURL is None:
                     archiveURL = weblib.getInternetArchiveURL(url)
                 if archiveURL is None:
                     archiveURL = weblib.getWebCitationURL(url)
                 self.log(url, error, page, archiveURL)
         else:
             self.historyDict[url] = [(page.title(), now, error)]

Example #3

0

Show file

 def setLinkDead(self, url, error, page, day):
     """
     Adds the fact that the link was found dead to the .dat file.
     """
     self.semaphore.acquire()
     now = time.time()
     if url in self.historyDict:
         timeSinceFirstFound = now - self.historyDict[url][0][1]
         timeSinceLastFound = now - self.historyDict[url][-1][1]
         # if the last time we found this dead link is less than an hour
         # ago, we won't save it in the history this time.
         if timeSinceLastFound > 60 * 60:
             self.historyDict[url].append((page.title(), now, error))
         # if the first time we found this link longer than x day ago
         # (default is a week), it should probably be fixed or removed.
         # We'll list it in a file so that it can be removed manually.
         if timeSinceFirstFound > 60 * 60 * 24 * day:
             # search for archived page
             archiveURL = weblib.getInternetArchiveURL(url)
             if archiveURL is None:
                 archiveURL = weblib.getWebCitationURL(url)
             self.log(url, error, page, archiveURL)
     else:
         self.historyDict[url] = [(page.title(), now, error)]
     self.semaphore.release()

Example #4

0

Show file

File: weblib_tests.py Project: hasteur/pywikibot_scripts

 def testInternetArchiveOlder(self):
     archivedversion = weblib.getInternetArchiveURL("https://google.com", "200606")
     parsed = urlparse(archivedversion)
     self.assertIn(parsed.scheme, ["http", "https"])
     self.assertEqual(parsed.netloc, "web.archive.org")
     self.assertTrue(parsed.path.strip("/").endswith("www.google.com"), parsed.path)
     self.assertIn("200606", parsed.path)

Example #5

0

Show file

File: weblinkchecker.py Project: metakgp/batman

 def setLinkDead(self, url, error, page, day):
     """Add the fact that the link was found dead to the .dat file."""
     self.semaphore.acquire()
     now = time.time()
     if url in self.historyDict:
         timeSinceFirstFound = now - self.historyDict[url][0][1]
         timeSinceLastFound = now - self.historyDict[url][-1][1]
         # if the last time we found this dead link is less than an hour
         # ago, we won't save it in the history this time.
         if timeSinceLastFound > 60 * 60:
             self.historyDict[url].append((page.title(), now, error))
         # if the first time we found this link longer than x day ago
         # (default is a week), it should probably be fixed or removed.
         # We'll list it in a file so that it can be removed manually.
         if timeSinceFirstFound > 60 * 60 * 24 * day:
             # search for archived page
             try:
                 archiveURL = get_archive_url(url)
             except Exception as e:
                 pywikibot.warning(
                     'get_closest_memento_url({0}) failed: {1}'.format(
                         url, e))
                 archiveURL = None
             if archiveURL is None:
                 archiveURL = weblib.getInternetArchiveURL(url)
             if archiveURL is None:
                 archiveURL = weblib.getWebCitationURL(url)
             self.log(url, error, page, archiveURL)
     else:
         self.historyDict[url] = [(page.title(), now, error)]
     self.semaphore.release()

Example #6

0

Show file

File: weblib_tests.py Project: legoktm/pywikibot-core

 def testInternetArchiveOlder(self):
     archivedversion = weblib.getInternetArchiveURL('https://google.com', '200606')
     parsed = urlparse(archivedversion)
     self.assertIn(parsed.scheme, [u'http', u'https'])
     self.assertEqual(parsed.netloc, u'web.archive.org')
     self.assertTrue(parsed.path.strip('/').endswith('www.google.com'), parsed.path)
     self.assertIn('200606', parsed.path)

Example #7

0

Show file

File: weblib_tests.py Project: emijrp/pywikibot-core

 def testInternetArchiveNewest(self):
     with PatchedHttp(weblib, False) as p:
         p.after_fetch = self._test_response
         archivedversion = weblib.getInternetArchiveURL('https://google.com')
     parsed = urlparse(archivedversion)
     self.assertIn(parsed.scheme, [u'http', u'https'])
     self.assertEqual(parsed.netloc, u'web.archive.org')
     self.assertTrue(parsed.path.strip('/').endswith('www.google.com'), parsed.path)

Example #8

0

Show file

File: weblib_tests.py Project: Darkdadaah/pywikibot-core

 def _get_archive_url(self, url, date_string=None):
     with PatchedHttp(weblib, False) as p:
         p.after_fetch = self._test_response
         try:
             archivedversion = weblib.getInternetArchiveURL(
                 url, date_string)
         except RequestsConnectionError as e:
             self.skipTest(e)
         self.assertOneDeprecation()
         return archivedversion

Example #9

0

Show file

File: weblib_tests.py Project: mpyproject/uncyclopedia-th

 def _get_archive_url(self, url, date_string=None):
     with PatchedHttp(weblib, False) as p:
         p.after_fetch = self._test_response
         try:
             archivedversion = weblib.getInternetArchiveURL(
                 url, date_string)
         except RequestsConnectionError as e:
             self.skipTest(e)
         self.assertOneDeprecation()
         return archivedversion

Example #10

0

Show file

File: weblib_tests.py Project: PersianWikipedia/pywikibot-core

 def _get_archive_url(self, url, date_string=None):
     with PatchedHttp(weblib, False) as p:
         p.after_fetch = self._test_response
         archivedversion = weblib.getInternetArchiveURL(url, date_string)
         self.assertOneDeprecation()
         return archivedversion

Example #11

0

Show file

 def _get_archive_url(self, url, date_string=None):
     with PatchedHttp(weblib, False) as p:
         p.after_fetch = self._test_response
         archivedversion = weblib.getInternetArchiveURL(url, date_string)
         self.assertOneDeprecation()
         return archivedversion