Example #1
0
def main():
    global DASHBOARD
    covid_data = load_previous()
    previous_case_data = deepcopy(covid_data.get_case_data())
    current_case_data, date = check_for_updates()

    ci = any(x.lower() == "--ci" for x in sys.argv)
    force = any(x.lower() == "--force" for x in sys.argv)

    # Only post under the following conditions:
    # 1. There is new data from RPI
    #           - AND -
    # 2. there are new positive tests OR new weekly/total numbers reported
    # This avoids the bs updates where all RPI does is reset the daily/weekly numbers
    if (
        force
        or current_case_data != previous_case_data
        and (
            current_case_data[0] != 0
            or any(
                current_case_data[x] != previous_case_data[x]
                for x in range(2, len(current_case_data))
            )
        )
    ):
        dashboard_url = DASHBOARD
        try:
            # We don't want to abuse the Wayback Machine in actions
            if not ci:
                dashboard_url = savepagenow.capture(DASHBOARD, accept_cache=True)
            else:
                print("Skipping page archive as we are running in CI mode")
        except:
            print(f"Page archived failed")
            traceback.print_exc()

        old_rolling = covid_data.get_rolling()
        covid_data.update(current_case_data)

        post_discord(
            covid_data.get_rolling(),
            old_rolling,
            current_case_data,
            previous_case_data,
            date,
            dashboard_url,
            create_graph(covid_data),
        )

        save(covid_data)
    print(
        f"Done. Old: {previous_case_data} New: {current_case_data}\n Rolling: {covid_data.get_rolling()}"
    )
Example #2
0
def archive(url, user_agent, timeout):
    try:
        archive_url = savepagenow.capture(url)
    except savepagenow.api.WaybackRuntimeError as e:
        logger_e.info("{} - {}".format(url, e))
        return "", "", False
    try:
        r = requests.get(archive_url,
                         headers={"User-Agent": user_agent},
                         timeout=timeout)
    except requests.exceptions.RequestException as e:
        logger_e.info("{} - {}".format(url, e))
        return "", "", False
    except:
        logger_e.info("{} - {}".format(url, "Unknown error"))
        return "", "", False
    return r.text, r.url, r.ok
Example #3
0
def arquivar_tweets():
    print("Arquivando tweets...")
    lista_ids = database.recupera_ids_sem_arquivo2()
    for par in lista_ids:
        url = "https://twitter.com/" + str(par[1]) + "/status/" + str(par[0])
        print(url)
        try:
            url_arquivo = archiveis.capture(url)
            database.adiciona_arquivo(par[0], url_arquivo)
        except Exception as E:
            print(E)
            print("Problema no arquivador principal")
            try:
                url_arquivo = savepagenow.capture(url)
                database.adiciona_arquivo(par[0], url_arquivo)
                time.sleep(20)
            except Exception as E2:
                print(E2)
                print("Problema no arquivador reserva.")
Example #4
0
def archived_url(SourceURL):
    """Get a real-time archived url of the source url."""
    archive_url = None
    status = "Wait"
    iters = 0
    while status == "Wait":
        iters += 1
        try:
            archive_url = savepagenow.capture(
                SourceURL,
                user_agent="User:YouTubeReviewBot on wikimedia commons")
            status = "Done"
        except Exception as e:
            out(
                e,
                color="red",
            )
        if iters > 5:
            status = "Stop"
    return archive_url
Example #5
0
 def test_robots_error(self):
     with self.assertRaises(savepagenow.BlockedByRobots):
         savepagenow.capture("http://www.columbiamissourian.com/")
Example #6
0
#!/usr/bin/env python3
# Usage: python archiveplease.py file_containing_links

import savepagenow
import sys
import time

linkfile = sys.argv[1]
links = []

with open(linkfile) as infile:
    for line in infile:
        if line != "\n" and line[0] != "#":
            line = line.strip('\n')
            links.append(line)

#print(links) # as check

for link in links:
    #print(link)
    archiveurl = savepagenow.capture(link)
    print(archiveurl)
    time.sleep(120)
Example #7
0
    try:
        assert btns[0]["onclick"] == btns[1]["onclick"]
    except IndexError:
        return None
    assert btns[0]["onclick"].startswith("self.location='")

    return urljoin("https://www.fanfiction.net/", btns[0]["onclick"][15:][:-1])


url = sys.argv[1]

errors = []

while url:
    try:
        save.capture(url)
    except save.api.WaybackRuntimeError:
        print(f"Error: {url}")

        time.sleep(60)
        print("slept")
        try:
            save.capture(url)
        except save.api.WaybackRuntimeError:
            errors.append(url)
            sys.exit()
        else:
            print(f"Saved: {url}")
    else:
        print(f"Saved: {url}")
Example #8
0
    print("Arquivando tweets...")
    while True:
        lista_ids = database.recupera_ids_sem_arquivo()
        for par in lista_ids:
            url = "https://twitter.com/" + str(par[1]) + "/status/" + str(
                par[0])
            print(url)
            try:
                url_arquivo = archiveis.capture(url)
                database.adiciona_arquivo(par[0], url_arquivo)
            except Exception as E:
                print(E)
                print("Problema no arquivador principal")
                try:
                    url_arquivo = savepagenow.capture(url)
                    database.adiciona_arquivo(par[0], url_arquivo)
                    time.sleep(20)
                except Exception as E2:
                    print(E2)
                    print("Problema no arquivador reserva.")


def arquivar_tweets():
    print("Arquivando tweets...")
    lista_ids = database.recupera_ids_sem_arquivo2()
    for par in lista_ids:
        url = "https://twitter.com/" + str(par[1]) + "/status/" + str(par[0])
        print(url)
        try:
            url_arquivo = archiveis.capture(url)
Example #9
0
import savepagenow

url_file = open('urls.txt', 'r')

for url in url_file.readlines():
    print(url)
    savepagenow.capture(url)
    archived_url = savepagenow.capture(url)
    print(archived_url)
    file = open('archived_urls.txt', 'a')
    file.write(archived_url)
    file.close()

url_file.close()