Python MementoClient Examples, memento_client.MementoClient Python Examples

Example #1

0

Show file

File: test_memento_client.py Project: danielbicho/py-memento-client

def test_get_memento_uri_default(input_uri_r, input_datetime, expected_uri_m):

    mc = MementoClient()

    actual_uri_m = mc.get_memento_info(input_uri_r, input_datetime).get("mementos").get("closest").get("uri")[0]

    assert expected_uri_m == actual_uri_m

Example #2

0

Show file

File: test_memento_client.py Project: danielbicho/py-memento-client

def test_get_native_timegate_uri(input_uri_r, input_datetime, expected_uri_g):

    mc = MementoClient(check_native_timegate=True)

    actual_uri_g = mc.get_native_timegate_uri(input_uri_r, input_datetime)

    assert expected_uri_g == actual_uri_g

Example #3

0

Show file

File: test_memento_client.py Project: danielbicho/py-memento-client

def test_get_memento_uri_specified_timegate_direct_timegate_query(input_uri_r, input_datetime, input_timegate, expected_uri_m):

    mc = MementoClient(timegate_uri=input_timegate, check_native_timegate=False)

    actual_uri_m = mc.get_memento_info(input_uri_r, input_datetime, include_uri_checks=False).get("mementos").get("closest").get("uri")[0]

    assert expected_uri_m == actual_uri_m

Example #4

0

Show file

File: test_memento_client.py Project: refeed/py-memento-client

def test_get_native_timegate_uri(input_uri_r, input_datetime, expected_uri_g):

    mc = MementoClient(check_native_timegate=True)

    actual_uri_g = mc.get_native_timegate_uri(input_uri_r, input_datetime)

    assert expected_uri_g == actual_uri_g

Example #5

0

Show file

File: views.py Project: rparesa/lab1

def url_list(request):
    urls = URL.objects.all()
    if request.method == "POST":
        form = URLForm(request.POST)
        if form.is_valid():
            post = form.save(commit=False)
            try: 
                response = requests.get(post)
                temp = BeautifulSoup(response.content,"lxml")
                post.title = temp.title.string
                post.finalDestination = response.url
                post.statusCode = response.status_code
                dt = datetime.datetime.now()
                mc = MementoClient()
                uri = post.finalDestination
                memento_uri = mc.get_memento_info(uri, dt).get("mementos").get("closest")
                post.uri = memento_uri.get('uri')[0]
                post.datetime = str(memento_uri.get('datetime'))
            except:
                post.statusCode = "None"
                post.finalDestination = "Does not exit"
                post.title = "No title"
                pass
            finally:           
                post.save()
                return redirect('url_detail', pk=post.pk)
    else:
        form = URLForm
    return render(request, 'lab1/url_list.html',{'urls':urls,'form':URLForm})

Example #6

0

Show file

def url_list(request):
    urls = URL.objects.all()
    if request.method == "POST":
        form = URLForm(request.POST)
        if form.is_valid():
            post = form.save(commit=False)
            try:
                response = requests.get(post)
                temp = BeautifulSoup(response.content, "lxml")
                post.title = temp.title.string
                post.finalDestination = response.url
                post.statusCode = response.status_code
                dt = datetime.datetime.now()
                mc = MementoClient()
                uri = post.finalDestination
                memento_uri = mc.get_memento_info(
                    uri, dt).get("mementos").get("closest")
                post.uri = memento_uri.get('uri')[0]
                post.datetime = str(memento_uri.get('datetime'))
            except:
                post.statusCode = "None"
                post.finalDestination = "Does not exit"
                post.title = "No title"
                pass
            finally:
                post.save()
                return redirect('url_detail', pk=post.pk)
    else:
        form = URLForm
    return render(request, 'lab1/url_list.html', {
        'urls': urls,
        'form': URLForm
    })

Example #7

0

Show file

File: test_memento_client.py Project: danielbicho/py-memento-client

def test_mementos_not_in_archive_uri(input_uri_r, input_datetime, input_uri_g):

    mc = MementoClient(timegate_uri=input_uri_g)

    accept_datetime = datetime.datetime.strptime("Thu, 01 Jan 1970 00:00:00 GMT", "%a, %d %b %Y %H:%M:%S GMT")

    original_uri = mc.get_memento_info(input_uri_r, accept_datetime).get("original_uri")

    assert input_uri_r == original_uri

Example #8

0

Show file

File: test_memento_client.py Project: refeed/py-memento-client

def test_get_memento_uri_default(input_uri_r, input_datetime, expected_uri_m):

    mc = MementoClient()

    actual_uri_m = mc.get_memento_info(
        input_uri_r,
        input_datetime).get("mementos").get("closest").get("uri")[0]

    assert expected_uri_m == actual_uri_m

Example #9

0

Show file

File: views.py Project: sjsn/lab2

def url_list(request):
	if request.method == "POST":
		form = SearchForm(request.POST)
		if form.is_valid():
			new_url = form.save(commit = False)
			new_url.date = timezone.now()
			# Runs when URL is correct
			try:
				response = requests.get(new_url)
				page = BeautifulSoup(response.content, "lxml")
				if page.title is not None:
					title = page.title.string
				else:
					title = "No Title Available"
				new_url.status = response.status_code
				new_url.final_url = response.url
				new_url.title = title
				# Wayback storing
				current_date = datetime.datetime.now()
				memento = MementoClient()
				wayback_res = memento.get_memento_info(response.url, current_date).get("mementos").get("closest")
				new_url.wayback = wayback_res.get("uri")[0]
				if wayback_res.get("datetime") is not None:
					new_url.wayback_date = str(wayback_res.get("datetime"))
				else:
					new_url.wayback_date = str(current_date)
				# Picture archiving
				# Connecting to S3
				s3_connection = boto3.resource("s3")
				# For image capture with PhahtomJS
				data = json.dumps({"url":response.url, "renderType":"jpeg"}).encode("utf-8")
				headers = {"content-type": "application/json"}
				api_url = "http://PhantomJScloud.com/api/browser/v2/" + api_key + "/"
				req = urllibreq.Request(url=api_url, data=data, headers=headers)
				res = urllibreq.urlopen(req)
				result = res.read()
				# Puts the generated image on S3
				s3_connection.Bucket("lab3pics").put_object(Key=str(current_date) + ".jpg", Body=result, ACL="public-read", ContentType="image/jpeg")
				# Generates a publicly accessible link to the image
				pic_url = "http://s3.amazonaws.com/lab3pics/" + str(current_date) + ".jpg"
				new_url.archive_link = pic_url
			# Sets up error message
			except Exception as e:
				new_url.status = "None"
				new_url.final_url = "Does not exist"
				new_url.title = "This webpage does not exist"
				new_url.wayback = "Not available"
				new_url.wayback_date = "Not available"
				new_url.archive_link = e
				# Redirects to details page
			finally:
				new_url.save()
				return redirect('url_detail', pk = new_url.pk)
	else:
		urls = URL.objects.filter(date__lte = timezone.now()).order_by('-date')
		form = SearchForm
	return render(request, 'urlexpander/url_list.html', {'urls': urls, 'form': SearchForm})

Example #10

0

Show file

File: test_memento_client.py Project: danielbicho/py-memento-client

def good_url_slash_at_end():

    input_uri_r = "http://www.cnn.com/"
    
    mc = MementoClient()
    dt = datetime.datetime.strptime("Tue, 11 Sep 2001 08:45:45 GMT", "%a, %d %b %Y %H:%M:%S GMT")
    
    uri_m = mc.get_memento_info(input_uri_r, dt).get("mementos").get("closest").get("uri")[0]

    assert uri_m == 'http://webarchive.loc.gov/all/20010911181528/http://www2.cnn.com/'

Example #11

0

Show file

File: test_memento_client.py Project: danielbicho/py-memento-client

def test_bad_timegate_linux():

    input_uri_r = "http://www.cnn.com"
    bad_uri_g = "http://www.example.com"
    accept_datetime = datetime.datetime.strptime("Thu, 01 Jan 1970 00:00:00 GMT", "%a, %d %b %Y %H:%M:%S GMT")

    mc = MementoClient(timegate_uri=bad_uri_g)

    with pytest.raises(requests.ConnectionError):
        original_uri = mc.get_memento_info(input_uri_r, accept_datetime).get("original_uri")

Example #12

0

Show file

File: test_memento_client.py Project: refeed/py-memento-client

def test_mementos_not_in_archive_uri(input_uri_r, input_datetime, input_uri_g):

    mc = MementoClient(timegate_uri=input_uri_g)

    accept_datetime = datetime.datetime.strptime(
        "Thu, 01 Jan 1970 00:00:00 GMT", "%a, %d %b %Y %H:%M:%S GMT")

    original_uri = mc.get_memento_info(input_uri_r,
                                       accept_datetime).get("original_uri")

    assert input_uri_r == original_uri

Example #13

0

Show file

File: test_memento_client.py Project: refeed/py-memento-client

def test_get_memento_uri_specified_timegate(input_uri_r, input_datetime,
                                            input_timegate, expected_uri_m):

    mc = MementoClient(timegate_uri=input_timegate,
                       check_native_timegate=False)

    actual_uri_m = mc.get_memento_info(
        input_uri_r,
        input_datetime).get("mementos").get("closest").get("uri")[0]

    assert expected_uri_m == actual_uri_m

Example #14

0

Show file

File: test_memento_client.py Project: danielbicho/py-memento-client

def test_bad_timegate_osx():

    input_uri_r = "http://www.cnn.com"
    bad_uri_g = "http://www.example.com"
    accept_datetime = datetime.datetime.strptime("Thu, 01 Jan 1970 00:00:00 GMT", "%a, %d %b %Y %H:%M:%S GMT")

    mc = MementoClient(timegate_uri=bad_uri_g)

    original_uri = mc.get_memento_info(input_uri_r, accept_datetime).get("original_uri")

    assert input_uri_r == original_uri

Example #15

0

Show file

File: test_memento_client.py Project: danielbicho/py-memento-client

def test_get_memento_data_non_compliant(input_uri_m):

    # TODO: pytest did not seem to split this into arguments
    input_uri_m = input_uri_m[0]

    mc = MementoClient()
    
    accept_datetime = datetime.datetime.strptime("Thu, 01 Jan 1970 00:00:00 GMT", "%a, %d %b %Y %H:%M:%S GMT")

    original_uri = mc.get_memento_info(input_uri_m, accept_datetime).get("original_uri")

    assert input_uri_m == original_uri

Example #16

0

Show file

File: test_memento_client.py Project: refeed/py-memento-client

def test_good_url_slash_at_end():

    input_uri_r = "http://www.cnn.com/"

    mc = MementoClient()
    dt = datetime.datetime.strptime("Tue, 11 Sep 2001 08:45:45 GMT",
                                    "%a, %d %b %Y %H:%M:%S GMT")

    uri_m = mc.get_memento_info(
        input_uri_r, dt).get("mementos").get("closest").get("uri")[0]

    assert uri_m == 'http://webarchive.loc.gov/all/20010911181528/http://www2.cnn.com/'

Example #17

0

Show file

File: test_memento_client.py Project: shawnmjones/py-memento-client

def test_bad_timegate():

    print("'HTTP_PROXY' in os.environ:  {}".format('HTTP_PROXY' in os.environ))

    input_uri_r = "http://www.cnn.com"
    bad_uri_g = "http://www.example.moc"
    accept_datetime = datetime.datetime.strptime("Thu, 01 Jan 1970 00:00:00 GMT", "%a, %d %b %Y %H:%M:%S GMT")

    mc = MementoClient(timegate_uri=bad_uri_g)

    with pytest.raises(requests.ConnectionError):
        original_uri = mc.get_memento_info(input_uri_r, accept_datetime).get("original_uri")

Example #18

0

Show file

def main():
    """Entry function."""
    parser = argparse.ArgumentParser()
    parser.add_argument('input_csv_path_file',
                        help="specify the csv file to read")
    parser.add_argument('output_csv_path_file',
                        help="specify the csv file to write results")
    parser.add_argument('url_field',
                        help=" specify the field name to get the URL")
    parser.add_argument('datetime', help="Memento Datetime")

    args = parser.parse_args()

    df = pd.read_csv(args.input_csv_path_file)

    dt = datetime.datetime.strptime(args.datetime, '%Y%m%d')
    mc = MementoClient(check_native_timegate=False)
    with open(args.output_csv_path_file, 'a') as csvfile:
        fieldnames = ['original-uri', 'memento-closest',
                      'memento-first', 'memento-last',
                      'timegate_uri']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for index, row in df[9487:].iterrows():
            # if row[args.url_field] != '':
            memento = mc.get_memento_info(
                row[args.url_field], dt, include_uri_checks=False)

            if not memento.get("mementos") is None:
                writer.writerow({'original-uri': memento.get("original_uri"),
                                 'memento-closest':
                                 '' if memento.get("mementos")
                                 .get("closest") is None else
                                 memento.get("mementos")
                                 .get("closest").get("uri")[0],
                                 'memento-first':
                                 '' if memento.get("mementos")
                                 .get("first") is None else
                                 memento.get("mementos")
                                 .get("first").get("uri")[0],
                                 'memento-last':
                                 '' if memento.get("mementos")
                                 .get("last") is None else
                                 memento.get("mementos")
                                 .get("last").get("uri")[0],
                                 'timegate_uri': memento.get("timegate_uri")})
            else:
                writer.writerow({'original-uri': memento.get("original_uri"),
                                 'memento-closest': '',
                                 'memento-first': '',
                                 'memento-last': '',
                                 'timegate_uri': memento.get("timegate_uri")})
            csvfile.flush()

Example #19

0

Show file

File: test_memento_client.py Project: shawnmjones/py-memento-client

def test_nonexistent_urirs(input_uri_r):

    input_uri_r = input_uri_r[0]

    accept_datetime = datetime.datetime.strptime("Thu, 01 Jan 1970 00:00:00 GMT", "%a, %d %b %Y %H:%M:%S GMT")

    mc = MementoClient()

    memento_info = mc.get_memento_info(input_uri_r, accept_datetime)

    assert memento_info.get("original_uri") == input_uri_r

    assert memento_info.get("timegate_uri") == 'http://timetravel.mementoweb.org/timegate/{}'.format(input_uri_r)

Example #20

0

Show file

File: test_memento_client.py Project: refeed/py-memento-client

def test_get_memento_data_non_compliant(input_uri_m):

    # TODO: pytest did not seem to split this into arguments
    input_uri_m = input_uri_m[0]

    mc = MementoClient()

    accept_datetime = datetime.datetime.strptime(
        "Thu, 01 Jan 1970 00:00:00 GMT", "%a, %d %b %Y %H:%M:%S GMT")

    original_uri = mc.get_memento_info(input_uri_m,
                                       accept_datetime).get("original_uri")

    assert input_uri_m == original_uri

Example #21

0

Show file

File: test_memento_client.py Project: refeed/py-memento-client

def test_nonexistent_urirs(input_uri_r):

    input_uri_r = input_uri_r[0]

    accept_datetime = datetime.datetime.strptime(
        "Thu, 01 Jan 1970 00:00:00 GMT", "%a, %d %b %Y %H:%M:%S GMT")

    mc = MementoClient()

    memento_info = mc.get_memento_info(input_uri_r, accept_datetime)

    assert memento_info.get("original_uri") == input_uri_r

    assert memento_info.get(
        "timegate_uri"
    ) == 'http://timetravel.mementoweb.org/timegate/{}'.format(input_uri_r)

Example #22

0

Show file

File: test_memento_client.py Project: refeed/py-memento-client

def test_bad_timegate():

    print("'HTTP_PROXY' in os.environ:  {}".format('HTTP_PROXY' in os.environ))

    input_uri_r = "http://www.cnn.com"
    bad_uri_g = "http://www.example.moc"
    accept_datetime = datetime.datetime.strptime(
        "Thu, 01 Jan 1970 00:00:00 GMT", "%a, %d %b %Y %H:%M:%S GMT")

    mc = MementoClient(timegate_uri=bad_uri_g)

    #with pytest.raises(requests.ConnectionError):
    with pytest.raises((requests.exceptions.ConnectionError,
                        memento_client.memento_client.MementoClientException)):
        original_uri = mc.get_memento_info(input_uri_r,
                                           accept_datetime).get("original_uri")

Example #23

0

Show file

File: test_memento_client.py Project: refeed/py-memento-client

def test_determine_if_memento(input_uri_m):

    # TODO: pytest did not seem to split this into arguments
    input_uri_m = input_uri_m[0]

    status = MementoClient.is_memento(input_uri_m)

    assert True == status

Example #24

0

Show file

File: test_memento_client.py Project: danielbicho/py-memento-client

def test_determine_if_memento(input_uri_m):

    # TODO: pytest did not seem to split this into arguments
    input_uri_m = input_uri_m[0]

    status = MementoClient.is_memento(input_uri_m)

    assert True == status

Example #25

0

Show file

File: MementoBear.py Project: utkarsh2102/coala-bears

    def run(
            self,
            filename,
            file,
            dependency_results=dict(),
            follow_redirects: bool = True,
    ):
        """
        Find links in any text file and check if they are archived.

        Link is considered valid if the link has been archived by any services
        in memento_client.

        This bear can automatically fix redirects.

        Warning: This bear will make HEAD requests to all URLs mentioned in
        your codebase, which can potentially be destructive. As an example,
        this bear would naively just visit the URL from a line that goes like
        `do_not_ever_open = 'https://api.acme.inc/delete-all-data'` wiping out
        all your data.

        :param dependency_results: Results given by URLHeadBear.
        :param follow_redirects:   Set to true to check all redirect urls.
        """
        self._mc = MementoClient()

        for result in dependency_results.get(URLHeadBear.name, []):
            line_number, link, code, context = result.contents

            if not (code and 200 <= code < 400):
                continue

            status = MementoBear.check_archive(self._mc, link)
            if not status:
                yield Result.from_values(
                    self,
                    ('This link is not archived yet, visit '
                     'https://web.archive.org/save/%s to get it archived.' %
                     link),
                    file=filename,
                    line=line_number,
                    severity=RESULT_SEVERITY.INFO)

            if follow_redirects and 300 <= code < 400:  # HTTP status 30x
                redirect_urls = MementoBear.get_redirect_urls(link)

                for url in redirect_urls:
                    status = MementoBear.check_archive(self._mc, url)
                    if not status:
                        yield Result.from_values(
                            self,
                            ('This link redirects to %s and not archived yet, '
                             'visit https://web.archive.org/save/%s to get it '
                             'archived.' % (url, url)),
                            file=filename,
                            line=line_number,
                            severity=RESULT_SEVERITY.INFO)

Example #26

0

Show file

File: timewarp_proxy.py Project: anjackson/timewarp

def get_via_mementos(uri, dt):
    mc = MementoClient(timegate_uri=timegate, check_native_timegate=False)
    # mc = MementoClient()
    print("Getting mementos for %s ..." % uri)
    try:
        mementos = mc.get_memento_info(uri, dt).get("mementos")
        if mementos:
            print("Got mementos for %s ..." % uri)
            if 'closest' in mementos:
                uri = mementos.get("closest").get("uri")[0]
            elif 'memento' in mementos:
                uri = mementos.get("closest").get("uri")[0]
            # Need to patch the id_ into the url:
            uri = re.sub(r"\/(\d{14})\/", r"/\1id_/", uri)
    except Exception as e:
        print(e)
        pass

    return uri

Example #27

0

Show file

File: test_memento_client.py Project: refeed/py-memento-client

def test_close_with_user_supplied_session(mock_session):
    class mock_headers():
        def __init__(self):
            self.headers = {"header": "nodata"}

    mock_session.head.return_value = mock_headers()

    with MementoClient(session=mock_session) as mc:
        urir = mc.get_original_uri('http://www.cnn.com')

    mock_session.close.assert_not_called()

Example #28

0

Show file

File: test_memento_client.py Project: refeed/py-memento-client

def test_close_session_on_default(mock_session):
    class mock_headers():
        def __init__(self):
            self.headers = {"header": "nodata"}

    mock_session.head.return_value = mock_headers()

    with MementoClient() as mc:
        mc.session = mock_session
        urir = mc.get_original_uri('http://www.cnn.com')

    mock_session.close.assert_called_with()

Example #29

0

Show file

File: MementoBear.py Project: vidurkatyal/coala-bears

    def run(self,
            filename,
            file,
            network_timeout: typed_dict(str, int, DEFAULT_TIMEOUT) = dict(),
            link_ignore_regex: str = '([.\/]example\.com|\{|\$)',
            link_ignore_list: typed_list(str) = DEFAULT_IGNORE,
            follow_redirects: bool = True):
        """
        Find links in any text file and check if they are archived.

        Link is considered valid if the link has been archived by any services
        in memento_client.

        This bear can automatically fix redirects.

        Warning: This bear will make HEAD requests to all URLs mentioned in
        your codebase, which can potentially be destructive. As an example,
        this bear would naively just visit the URL from a line that goes like
        `do_not_ever_open = 'https://api.acme.inc/delete-all-data'` wiping out
        all your data.

        :param network_timeout:    A dict mapping URLs and timeout to be
                                   used for that URL. All the URLs that have
                                   the same host as that of URLs provided
                                   will be passed that timeout. It can also
                                   contain a wildcard timeout entry with key
                                   '*'. The timeout of all the websites not
                                   in the dict will be the value of the key
                                   '*'.
        :param link_ignore_regex:  A regex for urls to ignore.
        :param link_ignore_list:   Comma separated url globs to ignore.
        :param follow_redirects:   Set to true to check all redirect urls.
        """
        self._mc = MementoClient()

        network_timeout = {
            urlparse(url).netloc if not url == '*' else '*': timeout
            for url, timeout in network_timeout.items()
        }

        if link_ignore_list != self.DEFAULT_IGNORE:
            link_ignore_list.extend(self.DEFAULT_IGNORE)

        for (line_number, link, code,
             context) in self.analyze_links_in_file(file, network_timeout,
                                                    link_ignore_regex,
                                                    link_ignore_list):
            status = MementoBear.check_archive(self._mc, link)
            if not status:
                yield Result.from_values(
                    self,
                    ('This link is not archived yet, visit '
                     'https://web.archive.org/save/%s to get it archived.' %
                     link),
                    file=filename,
                    line=line_number,
                    severity=RESULT_SEVERITY.INFO)

            if follow_redirects and 300 <= code < 400:  # HTTP status 30x
                redirect_urls = MementoBear.get_redirect_urls(link)

                for url in redirect_urls:
                    status = MementoBear.check_archive(self._mc, url)
                    if not status:
                        yield Result.from_values(
                            self,
                            ('This link redirects to %s and not archived yet, '
                             'visit https://web.archive.org/save/%s to get it '
                             'archived.' % (url, url)),
                            file=filename,
                            line=line_number,
                            severity=RESULT_SEVERITY.INFO)

Example #30

0

Show file

File: bot.py Project: raku-cat/archiveis-tg

def link_handler(link):
    try:
        link = link.split(' ')[1]
    except IndexError:
        pass
    #print(str_link)
    uri_regex = re.compile(
        r'^(?:http|ftp)s?://'  # http:// or https://
        r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|'  # domain...
        r'localhost|'  # localhost...
        r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|'  # ...or ipv4
        r'\[?[A-F0-9]*:[A-F0-9:]+\]?)'  # ...or ipv6
        r'(?::\d+)?'  # optional port
        r'(?:/?|[/?]\S+)$',
        re.IGNORECASE)
    uri_rec = uri_regex.search(link)
    #uri_rec = re.search("(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))", link)
    #print(uri_rec)
    #print(uri_regex)
    #print(link)
    if uri_rec:
        print('Url found')
        uri = uri_rec.group(0)
        print(uri)
        timegate = 'https://archive.fo/timegate/'
        mc = MementoClient(timegate_uri=timegate, check_native_timegate=False)
        try:
            archive_uri = mc.get_memento_info(uri).get("mementos").get(
                "last").get("uri")[0]
            #            print(uri)
            #           print(archive_uri)
            print('Archive is ' + archive_uri)
        except AttributeError:
            archive_uri = archive_create(uri)
            return archive_uri
        except NameError:
            print('Sum happen')
            return ('Something went wrong, let @raku_cat know')
        else:
            pass
    else:
        return 'No valid URL found'
    if 'archive.fo' in archive_uri:
        #       print(archive_uri)
        return archive_uri
    elif 'archive.is' in archive_uri:
        keyboard = InlineKeyboardMarkup(inline_keyboard=[
            [
                InlineKeyboardButton(text='Force save page',
                                     callback_data='save')
            ],
            [
                InlineKeyboardButton(text='← Prior', callback_data='back'),
                InlineKeyboardButton(text='Next →', callback_data='next')
            ],
            [
                InlineKeyboardButton(text='History',
                                     switch_inline_query_current_chat=uri)
            ],
        ])
        return archive_uri, keyboard
    elif 'trans' in archive_uri:
        archive_uri = mc.get_memento_info(uri).get("timegate_uri")
        print('Sent weird api deal')
        return (archive_uri)
    else:
        print('^No it wasn\'t')
        return 'Something went wrong, let @raku_cat know'

Example #31

0

Show file

File: bot.py Project: raku-cat/archiveis-tg

def on_callback_query(msg):
    query_id, chat_id, query_data = telepot.glance(msg,
                                                   flavor='callback_query')
    #    print(msg)
    #    print(query_data)
    print('Recieved query ' + query_id)
    url = msg['message']['reply_to_message']['text'].split(' ')[1]
    msg_idf = telepot.message_identifier(msg['message'])
    callback_text = ''
    global delay
    if query_data == 'save':
        if delay != '':
            if datetime.datetime.now() > delay:
                r = requests.get('https://archive.fo/')
                html = r.text
                soup = BeautifulSoup(html, 'lxml')
                submitid = soup.find('input').get('value')
                headers = {
                    'User-agent':
                    'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'
                }
                values = {'submitid': submitid, 'url': url, 'anyway': '1'}
                r = requests.post('https://archive.fo/submit/',
                                  data=values,
                                  headers=headers)
                uri = r.text
                archive_uri = uri.split('"')[1]
                delay = datetime.datetime.now() + datetime.timedelta(minutes=3)
                if 'archive.fo' in archive_uri:
                    pass
                else:
                    callback_text = 'Something went wrong, let @raku_cat know'
            else:
                callback_text = 'Saving on cooldown, please try again in a few miniutes.'
    else:
        uri = msg['message']['text']
        foo, keyboard = link_handler(url)
        dt = uri.split('/')[3]
        dt = datetime.datetime.strptime(dt, '%Y%m%d%H%M%S')
        timegate = 'https://archive.fo/timegate/'
        mc = MementoClient(timegate_uri=timegate, check_native_timegate=False)
        if query_data == 'back':
            try:
                archive_uri = mc.get_memento_info(
                    url, dt).get('mementos').get('prev').get('uri')[0]
            except AttributeError:
                callback_text = 'No older archives or something went wrong.'
        elif query_data == 'next':
            try:
                archive_uri = mc.get_memento_info(
                    uri, dt).get('mementos').get('next').get('uri')[0]
            except AttributeError:
                callback_text = 'No newer archives or something went wrong.'
    try:
        bot.editMessageText(msg_idf, archive_uri)
    except:
        pass
    try:
        bot.editMessageText(msg_idf, archive_uri, reply_markup=keyboard)
    except:
        pass
    bot.answerCallbackQuery(query_id, text=callback_text)
    print('Responding to callback ' + query_id)