def main(): global DASHBOARD covid_data = load_previous() previous_case_data = deepcopy(covid_data.get_case_data()) current_case_data, date = check_for_updates() ci = any(x.lower() == "--ci" for x in sys.argv) force = any(x.lower() == "--force" for x in sys.argv) # Only post under the following conditions: # 1. There is new data from RPI # - AND - # 2. there are new positive tests OR new weekly/total numbers reported # This avoids the bs updates where all RPI does is reset the daily/weekly numbers if ( force or current_case_data != previous_case_data and ( current_case_data[0] != 0 or any( current_case_data[x] != previous_case_data[x] for x in range(2, len(current_case_data)) ) ) ): dashboard_url = DASHBOARD try: # We don't want to abuse the Wayback Machine in actions if not ci: dashboard_url = savepagenow.capture(DASHBOARD, accept_cache=True) else: print("Skipping page archive as we are running in CI mode") except: print(f"Page archived failed") traceback.print_exc() old_rolling = covid_data.get_rolling() covid_data.update(current_case_data) post_discord( covid_data.get_rolling(), old_rolling, current_case_data, previous_case_data, date, dashboard_url, create_graph(covid_data), ) save(covid_data) print( f"Done. Old: {previous_case_data} New: {current_case_data}\n Rolling: {covid_data.get_rolling()}" )
def archive(url, user_agent, timeout): try: archive_url = savepagenow.capture(url) except savepagenow.api.WaybackRuntimeError as e: logger_e.info("{} - {}".format(url, e)) return "", "", False try: r = requests.get(archive_url, headers={"User-Agent": user_agent}, timeout=timeout) except requests.exceptions.RequestException as e: logger_e.info("{} - {}".format(url, e)) return "", "", False except: logger_e.info("{} - {}".format(url, "Unknown error")) return "", "", False return r.text, r.url, r.ok
def arquivar_tweets(): print("Arquivando tweets...") lista_ids = database.recupera_ids_sem_arquivo2() for par in lista_ids: url = "https://twitter.com/" + str(par[1]) + "/status/" + str(par[0]) print(url) try: url_arquivo = archiveis.capture(url) database.adiciona_arquivo(par[0], url_arquivo) except Exception as E: print(E) print("Problema no arquivador principal") try: url_arquivo = savepagenow.capture(url) database.adiciona_arquivo(par[0], url_arquivo) time.sleep(20) except Exception as E2: print(E2) print("Problema no arquivador reserva.")
def archived_url(SourceURL): """Get a real-time archived url of the source url.""" archive_url = None status = "Wait" iters = 0 while status == "Wait": iters += 1 try: archive_url = savepagenow.capture( SourceURL, user_agent="User:YouTubeReviewBot on wikimedia commons") status = "Done" except Exception as e: out( e, color="red", ) if iters > 5: status = "Stop" return archive_url
def test_robots_error(self): with self.assertRaises(savepagenow.BlockedByRobots): savepagenow.capture("http://www.columbiamissourian.com/")
#!/usr/bin/env python3 # Usage: python archiveplease.py file_containing_links import savepagenow import sys import time linkfile = sys.argv[1] links = [] with open(linkfile) as infile: for line in infile: if line != "\n" and line[0] != "#": line = line.strip('\n') links.append(line) #print(links) # as check for link in links: #print(link) archiveurl = savepagenow.capture(link) print(archiveurl) time.sleep(120)
try: assert btns[0]["onclick"] == btns[1]["onclick"] except IndexError: return None assert btns[0]["onclick"].startswith("self.location='") return urljoin("https://www.fanfiction.net/", btns[0]["onclick"][15:][:-1]) url = sys.argv[1] errors = [] while url: try: save.capture(url) except save.api.WaybackRuntimeError: print(f"Error: {url}") time.sleep(60) print("slept") try: save.capture(url) except save.api.WaybackRuntimeError: errors.append(url) sys.exit() else: print(f"Saved: {url}") else: print(f"Saved: {url}")
print("Arquivando tweets...") while True: lista_ids = database.recupera_ids_sem_arquivo() for par in lista_ids: url = "https://twitter.com/" + str(par[1]) + "/status/" + str( par[0]) print(url) try: url_arquivo = archiveis.capture(url) database.adiciona_arquivo(par[0], url_arquivo) except Exception as E: print(E) print("Problema no arquivador principal") try: url_arquivo = savepagenow.capture(url) database.adiciona_arquivo(par[0], url_arquivo) time.sleep(20) except Exception as E2: print(E2) print("Problema no arquivador reserva.") def arquivar_tweets(): print("Arquivando tweets...") lista_ids = database.recupera_ids_sem_arquivo2() for par in lista_ids: url = "https://twitter.com/" + str(par[1]) + "/status/" + str(par[0]) print(url) try: url_arquivo = archiveis.capture(url)
import savepagenow url_file = open('urls.txt', 'r') for url in url_file.readlines(): print(url) savepagenow.capture(url) archived_url = savepagenow.capture(url) print(archived_url) file = open('archived_urls.txt', 'a') file.write(archived_url) file.close() url_file.close()