def setLinkDead(self, url, error, page, day): """ Adds the fact that the link was found dead to the .dat file. """ self.semaphore.acquire() now = time.time() if url in self.historyDict: timeSinceFirstFound = now - self.historyDict[url][0][1] timeSinceLastFound = now - self.historyDict[url][-1][1] # if the last time we found this dead link is less than an hour # ago, we won't save it in the history this time. if timeSinceLastFound > 60 * 60: self.historyDict[url].append((page.title(), now, error)) # if the first time we found this link longer than x day ago # (default is a week), it should probably be fixed or removed. # We'll list it in a file so that it can be removed manually. if timeSinceFirstFound > 60 * 60 * 24 * day: # search for archived page archiveURL = weblib.getInternetArchiveURL(url) if archiveURL is None: archiveURL = weblib.getWebCitationURL(url) self.log(url, error, page, archiveURL) else: self.historyDict[url] = [(page.title(), now, error)] self.semaphore.release()
def setLinkDead(self, url, error, page, weblink_dead_days): """Add the fact that the link was found dead to the .dat file.""" with self.semaphore: now = time.time() if url in self.historyDict: timeSinceFirstFound = now - self.historyDict[url][0][1] timeSinceLastFound = now - self.historyDict[url][-1][1] # if the last time we found this dead link is less than an hour # ago, we won't save it in the history this time. if timeSinceLastFound > 60 * 60: self.historyDict[url].append((page.title(), now, error)) # if the first time we found this link longer than x day ago # (default is a week), it should probably be fixed or removed. # We'll list it in a file so that it can be removed manually. if timeSinceFirstFound > 60 * 60 * 24 * weblink_dead_days: # search for archived page try: archiveURL = get_archive_url(url) except Exception as e: pywikibot.warning( 'get_closest_memento_url({0}) failed: {1}'.format( url, e)) archiveURL = None if archiveURL is None: archiveURL = weblib.getInternetArchiveURL(url) if archiveURL is None: archiveURL = weblib.getWebCitationURL(url) self.log(url, error, page, archiveURL) else: self.historyDict[url] = [(page.title(), now, error)]
def setLinkDead(self, url, error, page, day): """Add the fact that the link was found dead to the .dat file.""" self.semaphore.acquire() now = time.time() if url in self.historyDict: timeSinceFirstFound = now - self.historyDict[url][0][1] timeSinceLastFound = now - self.historyDict[url][-1][1] # if the last time we found this dead link is less than an hour # ago, we won't save it in the history this time. if timeSinceLastFound > 60 * 60: self.historyDict[url].append((page.title(), now, error)) # if the first time we found this link longer than x day ago # (default is a week), it should probably be fixed or removed. # We'll list it in a file so that it can be removed manually. if timeSinceFirstFound > 60 * 60 * 24 * day: # search for archived page try: archiveURL = get_archive_url(url) except Exception as e: pywikibot.warning( 'get_closest_memento_url({0}) failed: {1}'.format( url, e)) archiveURL = None if archiveURL is None: archiveURL = weblib.getInternetArchiveURL(url) if archiveURL is None: archiveURL = weblib.getWebCitationURL(url) self.log(url, error, page, archiveURL) else: self.historyDict[url] = [(page.title(), now, error)] self.semaphore.release()
def testWebCiteOlder(self): archivedversion = weblib.getWebCitationURL('https://google.com', '20130101') self.assertEqual(archivedversion, 'http://www.webcitation.org/6DHSeh2L0')
def testWebCiteOlder(self): archivedversion = weblib.getWebCitationURL("https://google.com", "20130101") self.assertEqual(archivedversion, "http://www.webcitation.org/6DHSeh2L0")
def _get_archive_url(self, url, date_string=None): archivedversion = weblib.getWebCitationURL(url, date_string) self.assertOneDeprecation() return archivedversion
def testWebCiteOlder(self): """Test WebCite for https://google.com as of January 2013.""" archivedversion = weblib.getWebCitationURL('https://google.com', '20130101') self.assertEqual(archivedversion, 'http://www.webcitation.org/6DHSeh2L0')
def testWebCiteOlder(self): archivedversion = weblib.getWebCitationURL('http://google.com', '20130101') self.assertEqual(archivedversion, 'http://www.webcitation.org/6DHSeh2L0')