Esempi in Python per capture, esempi in Python per archiveis.capture

Esempio n. 1

0

Mostra file

def archive_is(url):
    """
    Function for pushing to archive.is
    """
    print("[*] Pushing to archive.is...")
    archiveis_result = archiveis.capture(url).replace("http://", "https://")
    print(archiveis_result)

Esempio n. 2

0

Mostra file

 def capture(url):
     """
     Capture an url in archive.is
     """
     # Easiest way to do it for now, archive.is API sucks
     # FIXME replace this lib
     return archiveis.capture(url)

Esempio n. 3

0

Mostra file

File: archive.py Progetto: pdelteil/ArchivandoChile

def save_in_archive(url):
    """
    Saves an URL in archive.is
    :param url: URL to save
    :return: URL in archive.is
    """
    return archiveis.capture(url)

Esempio n. 4

0

Mostra file

File: ArchiveWeBot.py Progetto: yupengyan/ArchiveWeBot

 def handle_msg_all(self, msg):
     if (msg['msg_type_id'] == 1
             or msg['msg_type_id'] == 4) and msg['content']['type'] == 7:
         self.send_msg_by_uid(
             archiveis.capture(msg['content']['data']['url']),
             msg['user']['id'])
         self.send_msg_by_uid(u'请复制上述链接在墙外打开', msg['user']['id'])

Esempio n. 5

0

Mostra file

File: goedel.py Progetto: MRosenst/GoedelsVortex2

def generate_text(quote, submission):
    text = quote[0].decode('string-escape')  # in case quote has \n in it
    text += '\n\n'
    text += '[Here\'s]('
    text += archiveis.capture(submission.url)
    text += ') an archived version of '

    urls = None
    if submission.is_self and submission.selftext:
        urls = get_urls(submission.selftext)
        archive_urls = map(archiveis.capture, urls)

    if submission.is_self:
        text += 'this thread'
        if urls:
            text += '[,' if quote[1] else ','
        else:
            text += '[.' if quote[1] else '.'
    else:
        text += 'the linked post'
        text += '[.' if quote[1] else '.'

    if quote[1]:
        text += ']('
        text += quote[1]
        text += ')'

    if urls:
        text += ' and the links:'

        for link in zip(urls, archive_urls):
            text += '\n\n'
            text += '[' + link[0] + '](' + link[1] + ')'

    return text

Esempio n. 6

0

Mostra file

File: archive.py Progetto: dpsbot-project/DPSBot

 async def archive(self, ctx, url):
     await bot_log(_("%s가 %s를(을) 아카이브 했습니다.\n") % (ctx.message.author, url))
     try:
         if not "http" in url:
             url = "http://" + url
         archive_url = archiveis.capture(url, self.proxyString)
         await self.bot.send_message(ctx.message.channel,
                                     _("아카이브 중입니다...\n조금만 기다려 주세요!"))
         self.driver.get(url)
         wait = WebdriverWait(self.driver, 2)
         wait.until(EC.presence_of_element_located((By.XPATH, 'html')))
         self.driver.maximize_window()
         self.driver.find_element_by_tag_name('html').screenshot(
             'screenshot.png')
         await self.bot.send_file(ctx.message.channel, 'screenshot.png')
         await self.bot.send_message(ctx.message.channel, archive_url)
         await self.bot.log(_("아카이브 주소:%s\n") % (url))
         os.remove('screenshot.png')
     except:
         try:
             self.driver.close()
         except:
             pass
         await self.bot.send_message(ctx.message.channel, _("오류가 발생했어요!"))
         raise

Esempio n. 7

0

Mostra file

def archive_url(original_url, username, running_locally):
    '''
    Return error_message if failed; otherwise None.
    '''
    error_message = None
    
    print('adjust_url(' + original_url + ')')
    url = url_util.adjust_url(original_url)
    if url is not None:
        # Pop from pending
        dynamodb.pop_account_archive_request_by(list_name=dynamodb.ACCOUNT_TABLE_ARCHIVE_PENDING_REQUEST_LIST,
            username=username, original_url=original_url)
        
        # Record the current datetime
        utc_datetime = datetime.datetime.utcnow()
        utc_datetime_str = str(utc_datetime)

        # Save it on archive website
        initial_archive_md_url = None
        if running_locally:
            try:
                print('archiveis.capture(' + url + ')')
                initial_archive_md_url = archiveis.capture(url)
            except Exception as e:
                print('Unexpected exception: ' + str(e))
        
        # Screenshot the url webpage
        print('take_url_webpage_snapshot(' + url + ')')
        url_webpage_png, _url_inner_html = webpage_snapshot.take_url_webpage_snapshot(url=url, running_locally=running_locally)

        # Create new archive entry on DynamoDB
        dynamodb.create_new_archive(url=url, datetime=utc_datetime_str, username=username, archive_md_url=initial_archive_md_url)

        # Store the screenshot on S3
        archive_id, _, _ = dynamodb.get_archive_info(url=url, datetime=utc_datetime_str)
        url_webpage_png_s3_key = s3.WEBPAGE_SCREENSHOT_DIR + archive_id + '.png'
        s3.upload_file_bytes_object(key=url_webpage_png_s3_key, file_bytes=url_webpage_png)

        # Store the text of the webpage on S3
        # url_webpage_text = clean_text(extract_text(url_inner_html)).encode()
        # url_weboage_text_s3_key = s3.WEBPAGE_TEXT_DIR + archive_id + '.txt'
        #s3.upload_file_bytes_object(key=url_weboage_text_s3_key, file_bytes=url_webpage_text)

        # Early-exit for success
        return None
    else:
        error_message = 'Invalid URL: ' + original_url

    # All success must early-exit
    assert error_message

    # Pop from pending
    dynamodb.pop_account_archive_request_by(list_name=dynamodb.ACCOUNT_TABLE_ARCHIVE_PENDING_REQUEST_LIST,
        username=username, original_url=original_url)
    # Add into failed
    dynamodb.push_account_archive_request(list_name=dynamodb.ACCOUNT_TABLE_ARCHIVE_FAILED_REQUEST_LIST,
        username=username, original_url=original_url)

    return error_message

Esempio n. 8

0

Mostra file

File: app.py Progetto: dhaval055/Shuffle-apps

    def archive_target(self, target):
        archive_url = archiveis.capture(target)
        """
        Returns log of what was archived
        """
        message = f"target {target} has been archived"

        # This logs to the docker logs
        self.logger.info(message)
        return archive_url

Esempio n. 9

0

Mostra file

    def archive(self, url, num):

        ar = archiveis.capture(url)

        if (num + 1) % 5 == 0:
            print("pausing to let the archiver catch up [ 20 seconds ]")
            time.sleep(20)

        print("[ %d ] " % num + ar)
        return str(ar)

Esempio n. 10

0

Mostra file

def get_all_tweets(screen_name):

	if (consumer_key == ""):
		print "You need to set up the script first. Edit it and add your keys."
		return

	#Twitter only allows access to a users most recent 3240 tweets with this method
	
	#authorize twitter, initialize tweepy
	auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
	auth.set_access_token(access_key, access_secret)
	api = tweepy.API(auth)
	
	#initialize a list to hold all the tweepy Tweets
	alltweets = []	
	
	#make initial request for most recent tweets (200 is the maximum allowed count)
	new_tweets = api.user_timeline(screen_name = screen_name,count=200)
	
	#save most recent tweets
	alltweets.extend(new_tweets)
	
	#save the id of the oldest tweet less one
	oldest = alltweets[-1].id - 1
	
	#keep grabbing tweets until there are no tweets left to grab
	while len(new_tweets) > 0:
		print "getting tweets before {0}".format(oldest)
		
		#all subsiquent requests use the max_id param to prevent duplicates
		new_tweets = api.user_timeline(screen_name = screen_name,count=200,max_id=oldest,include_entities = True, tweet_mode = 'extended')
		
				
		#save most recent tweets
		alltweets.extend(new_tweets)

				
		
		#update the id of the oldest tweet less one
		oldest = alltweets[-1].id - 1
				
		print "...{0} tweets downloaded so far".format(len(alltweets))
		
		for tweet in alltweets:
			tweetID=tweet.id_str
			tweetURL="https://twitter.com/{0}/status/{1}".format(screen_name,tweetID)
			
			print "Archiving {0}...".format(tweetURL)
			archive_url = archiveis.capture(tweetURL)
			archiveorg_url = savepagenow.capture_or_cache(tweetURL)
			print "Tweet archived! archive.is: {0} ||| archive.org: {1}".format(archive_url,archiveorg_url[0])
	
		print "All tweets successfully archived."

Esempio n. 11

0

Mostra file

File: arquivar_tweets.py Progetto: projeto7c0/7c0-archiving

def arquivar_tweets():
    lista_ids = database.recupera_ids_sem_arquivo()
    for par in lista_ids:
        url = "https://twitter.com/" + str(par[1]) + "/status/" + str(par[0])
        try:
            url_arquivo = archiveis.capture(url)
            print(url_arquivo)
            database.adiciona_arquivo(par[0], url_arquivo)
        except Exception as E:
            traceback.print_exc()
            print("Problema no arquivador principal")
            exit()

Esempio n. 12

0

Mostra file

    def archive(self, url, num):

        base = "https://archive.today/download/"

        ar = archiveis.capture(url)

        if (num + 1) % 5 == 0:
            print("pausing to let the archiver catch up [ 20 seconds ]")
            time.sleep(20)

        print("[ %d ] " % num + ar)
        return str(ar)

Esempio n. 13

0

Mostra file

File: bot.py Progetto: m-grimaud/dilby

    async def replace_url(self, link, msg):
        await msg.delete()
        if patterns.youtube.search(msg.content):
            new_url = patterns.domain.sub(r"\1hooktube.com\3", link.group(0))
        else:
            new_url = archive.capture(link.group(0))
            new_url = new_url.replace("http", "https")

            logger.report(new_url)
            new_content = patterns.url.sub(new_url, msg.content)

            await msg.channel.send(msg.author.nick + ": " + new_content)
        return

Esempio n. 14

0

Mostra file

File: FakeRulesFactCheck.py Progetto: yaotianzhang/MM-COVID

def job_from_facebook(idx, store_urls, db):
    driver = get_selenium_driver()
    news_collection = db[Constants.NEWS_COLLECTION]
    urls = store_urls[idx]
    t = open("./fb_result.txt", 'a')
    for id_url in urls:
        id = id_url[0]
        url = id_url[1]
        try:
            # url = url[0]
            if "archive" not in url:
                archive_url = archiveis.capture(url)
                time.sleep(30)
            else:
                archive_url = url

            print(archive_url)
            # if "wip/" in archive_url:

        except Exception as e:
            print(str(e))
            t.write(url + "\n")
            t.flush()
            print("Factcheck ERROR in {}".format(url))
            continue
        if "wip/" in archive_url:
            archive_url = archive_url.replace("wip/", "")
        if "wip/" not in archive_url:
            try:
                return_dic = {}
                return_dic['ref_source'] = get_news_source_article(
                    archive_url, driver)
                return_dic['news_id'] = id
                return_dic['ref_source_url'] = return_dic['ref_source']['url']
                print(id)
                news_collection.find_one_and_update(
                    {"news_id": return_dic['news_id']}, {"$set": return_dic},
                    upsert=True)
            except:
                print("Problem in {}".format(archive_url))
                continue

        else:
            news_collection.find_one_and_update({"news_id": id}, {
                "$set": {
                    'archive_wip': archive_url.replace("/wip", ""),
                    'news_id': id
                }
            },
                                                upsert=True)
    t.close()

Esempio n. 15

0

Mostra file

File: tasks.py Progetto: pastpages/savemy.news

def is_memento(clip_id):
    """
    Archive a clip with archive.is.
    """
    clip = Clip.objects.get(id=clip_id)
    logger.debug("Archiving {} with archive.is".format(clip.url))
    try:
        is_url = archiveis.capture(clip.url)
        is_memento = Memento.objects.create(url=is_url, archive="archive.is")
        logger.debug("Created {}".format(is_memento))
        clip.mementos.add(is_memento)
    except Exception as e:
        logger.debug("archive.is failed")
        logger.debug(e)

Esempio n. 16

0

Mostra file

File: ArchiveTeleBot.py Progetto: roceys/ArchiveTeleBot

def echo_all(message):
    reply = archiveis.capture(message.text)
    try:
        bot.reply_to(message, reply)
    except Exception as e:
        bot.reply_to(message, 'oooops, please send the url again.')

    html = requests.get(message.text)
    Title = BeautifulSoup(html.text,
                          "html.parser").title.text.encode('utf-8').strip()

    with open('archive.csv', 'a') as f1:
        f1.write(time.ctime() + ',' + message.text + ',' + reply + ',')
        f1.write(Title)
        f1.write('\n')

Esempio n. 17

0

Mostra file

 def handle_submission(self, submission):
     logging.debug('[submissions] Processing new submission %s', submission.id)
     if submission.selftext == '':
         urls = re.findall(self.regex, submission.url)
     else:
         urls = re.findall(self.regex, submission.selftext)
     if urls:
         logging.info('[submissions] New submission %s with bild.de URLs found', submission.id)
         archive_urls = []
         bildplus = 0
         for url in urls:
             parsed_url = urlparse(url)
             if parsed_url.path.startswith('/bild-plus/'):
                 logging.info('[submissions] Skipping %s because it is probably a BILD+ link', url)
                 bildplus += 1
                 continue
             logging.info('[submissions] Capturing %s', url)
             archive_url = archiveis.capture(url)
             if archive_url:
                 archive_urls.append(archive_url)
                 logging.info('[submissions] Captured: %s', archive_url)
             else:
                 logging.warning('[submissions] Got an empty archive.is URL back. Something is wrong')
         if len(urls) != len(archive_urls) + bildplus:
             logging.warning('[submissions] Found %d bild.de URLs, but got only %d archive.is links', len(urls), len(archive_urls))
         if archive_urls:
             links = "\n- ".join(archive_urls)
             body = ("> Diese Zeitung ist ein Organ der Niedertracht. Es ist falsch, sie zu lesen.\n"
                     "> Jemand, der zu dieser Zeitung beiträgt, ist gesellschaftlich absolut inakzeptabel.\n"
                     "> Es wäre verfehlt, zu einem ihrer Redakteure freundlich oder auch nur höflich zu sein.\n"
                     "> Man muß so unfreundlich zu ihnen sein, wie es das Gesetz gerade noch zuläßt.\n"
                     "> Es sind schlechte Menschen, die Falsches tun.\n\n"
                     "[Max Goldt](https://de.wikipedia.org/wiki/Max_Goldt), deutscher Schriftsteller\n\n"
                     "Du kannst diesen Artikel auf archive.is lesen, wenn du nicht auf bild.de gehen willst:\n\n- " \
                     + links + \
                     "\n\n"
                     "----\n\n"
                     "^^[Info](https://www.reddit.com/r/MaxGoldtBot)&nbsp;|&nbsp;"
                     "[Autor](https://www.reddit.com/u/pille1842)&nbsp;|&nbsp;"
                     "[GitHub](https://github.com/pille1842/MaxGoldtBot)&nbsp;|&nbsp;"
                     "[Warum&nbsp;die&nbsp;Bild&nbsp;schlecht&nbsp;ist]"
                     "(http://www.bildblog.de/62600/warum-wir-gegen-die-bild-zeitung-kaempfen/)")
             submission.reply(body)
             logging.info('[submissions] Replied to %s with %d links', submission.id, len(archive_urls))
         else:
             logging.warning('[submissions] No reply to %s: %d bild.de links found, none archived', submission.id, len(urls))
     else:
         logging.debug('[submissions] No relevant URLs found in %s', submission.id)

Esempio n. 18

0

Mostra file

def echo_all(message):
    reply = archiveis.capture(message.text)
    bot.reply_to(message, reply)
    html = requests.get(message.text)
    Title = BeautifulSoup(html.text,
                          "html.parser").title.text.encode('utf-8').strip()

    with open('archive.csv', 'a') as f1:
        f1.write(time.ctime() + ',' + message.text + ',' + reply + ',')
        f1.write(Title)
        f1.write('\n')

    with open('archive.txt', 'a') as f2:
        f2.write(time.ctime() + '\n' + message.text + '\n' + reply + '\n')
        f2.write(Title)
        f2.write('\n' + '\n')

Esempio n. 19

0

Mostra file

    def on_data(self, data):
        
        # convert from JSON to a dictionary
        tweet = json.loads(data)
        
        # grab the tweet's screen name, ID, etc
        tweet_id     = tweet.get('id_str')
        screen_name  = tweet.get('user',{}).get('screen_name')
        tweet_text   = tweet.get('text')
        
        # grab the reply tweet information
        reply_tweet_id          = tweet.get('in_reply_to_status_id_str')
        reply_tweet_screen_name = tweet.get('in_reply_to_screen_name')
        
        if reply_tweet_id is not None:
        
            # make the URL of the tweet to archive
            tweet_to_archive = "https://twitter.com/%s/status/%s" % (reply_tweet_screen_name, reply_tweet_id)
        
            # print confirmation of finding tweet
            print "[*] Given tweet to archive: %s" % tweet_to_archive
        
            # archive the tweet
            internet_archive_url = internet_archive(tweet_to_archive)
        
            # push to archive.is
            print "[*] Pushing to archive.is..."
            archiveis_result = archiveis.capture(tweet_to_archive).replace("http://", "https://")        
        
            print "[!] Archived %s" % tweet_to_archive
            print internet_archive_url
            print archiveis_result
            
            # sleep, so the bot doesn't immediately reply and potentially trigger bot alerts
            time.sleep(10)

            # content of tweet to send to requester
            message = "Sure thing, here are the archive links: %s, %s" % (internet_archive_url,archiveis_result)
        
            # post a reply to the tweet
            api.update_status(message,in_reply_to_status_id=tweet_id,auto_populate_reply_metadata=True)
            print "[!] Posted a reply"
              
            # sleep to avoid rate limiting
            time.sleep(300)
        
            return True

Esempio n. 20

0

Mostra file

File: FOI_Archive_Bot.py Progetto: snehankekre/FOI-Archive-Bot

def main():
    output_file = open(args.log, 'w')
    for request in range(args.start, args.end + 1):
        if FLAG:
            connection_attempt = 1
            linkToArchive = "https://www.righttoknow.org.au/request/" + str(
                request
            )  #todo: figure out non-naive way to do this. archive human readable urls instead?

            # print link being currently archived
            print "\n[*] Given FOI request URL to archive: %s" % linkToArchive

            # archive the URL
            internet_archive_url = internet_archive(linkToArchive,
                                                    connection_attempt,
                                                    args.retries, FLAG)

            # push to archive.is
            print "[+] Uploading to archive.is..."
            archiveis_result = archiveis.capture(linkToArchive).replace(
                "http", "https")

            print "[+] FOI Request Archived %s" % linkToArchive
            print "[+] Wayback Machine: %s" % str(internet_archive_url)
            print "[+] archive.is: %s \n" % str(archiveis_result)

            # save links to file
            if args.log:
                output_file.write(str(internet_archive_url))
        else:
            continue

# sleep to avoid bot triggers
        time.sleep(0.3)

    output_file.close()

    # kill Tor process on completion
    tor_process.kill()

    print "[*] %d FOI requests archived on the Wayback Machine" % (
        args.end - args.start + 1)
    print "[*] Links saved to file: %s\n" % args.log
    print "[*] Killed Tor process"
    print "[*] Exiting..."
    return True

Esempio n. 21

0

Mostra file

File: saitan.py Progetto: jonz-secops/saitan

def save_with_archiveis(url):
    """saves the page to archive.is
    """
    print("Saving url: {} with the waybackmachine...".format(url))
    try:
        archiveis_location = archiveis.capture(url)
        print("archive.is saved on: {}".format(archiveis_location))
        re.sub('^.+/', '', archiveis_location)
        archiveis_download = 'https://archive.today/download/{}.zip'.format(
            re.sub('^.+/', '', archiveis_location))
        msg = '{} downlodable at {}'.format(archiveis_location,
                                            archiveis_download)
    except Exception as e:
        print("         sorry, something went wrong :(\n {}".format(e))
        print("Impossible to save the URL to archive.is")
        print("ERROR: {}".format(str(e)))
        msg = 'FAILED'
    return msg

Esempio n. 22

0

Mostra file

File: arquivar_tweets.py Progetto: lagolucas/7c0-core

def arquivar_tweets():
    print("Arquivando tweets...")
    lista_ids = database.recupera_ids_sem_arquivo2()
    for par in lista_ids:
        url = "https://twitter.com/" + str(par[1]) + "/status/" + str(par[0])
        print(url)
        try:
            url_arquivo = archiveis.capture(url)
            database.adiciona_arquivo(par[0], url_arquivo)
        except Exception as E:
            print(E)
            print("Problema no arquivador principal")
            try:
                url_arquivo = savepagenow.capture(url)
                database.adiciona_arquivo(par[0], url_arquivo)
                time.sleep(20)
            except Exception as E2:
                print(E2)
                print("Problema no arquivador reserva.")

Esempio n. 23

0

Mostra file

 def test_capture(self):
     archive_url_1 = archiveis.capture("http://www.example.com/")
     self.assertTrue(archive_url_1.startswith("http://archive.vn/"))

Esempio n. 24

0

Mostra file

File: __init__.py Progetto: soupsoup/pastpages.org

                    site=site,
                    update=update,
                    archive='archive.org',
                    url=ia_memento,
                )
            else:
                logger.info("Internet Archive returned a cached memento")
        except Exception:
            logger.info("Adding Internet Archive memento failed")

    # Archive.is mementos where turned on
    if site.has_archiveis_mementos:
        logger.info("Adding archive.is memento for %s" % site.url)
        try:
            is_memento = archiveis.capture(
                site.url,
                user_agent="pastpages.org ([email protected])"
            )
            is_created = Memento.objects.filter(url=is_memento).count() == 0
            if is_created:
                memento = Memento.objects.create(
                    site=site,
                    update=update,
                    archive='archive.is',
                    url=is_memento,
                )
            else:
                logger.info("archive.is returned a cached memento")
        except Exception:
            logger.info("Adding archive.is memento failed")

    # webcitation mementos where turned on

Esempio n. 25

0

Mostra file

    
    print "[*] Pushing to Perma.cc..."
    
    perma_json = {}
    perma_json['url'] = '%s' % input
    
    # remember to put your Perma.cc API key in here
    response = requests.post("https://api.perma.cc/v1/archives/?api_key=YOUR_PERMA_API_KEY_HERE", data=perma_json)
    if response.status_code == 201:
         
        result = json.loads(response.content)
        page_id = result['guid']
        perma_url = "https://perma.cc/%s" % page_id

        return perma_url
    else:
        print "[*] Connection error"
    
# push to The Internet Archive
internet_archive_result = internet_archive(input)
print internet_archive_result

# push to archive.is
print "[*] Pushing to archive.is..."
archiveis_result = archiveis.capture(input)
print archiveis_result

# push to perma.cc
perma_result = perma(input)
print perma_result

Esempio n. 26

0

Mostra file

File: run.py Progetto: MOTSS-Forum/utils

def get_foo(url):
    print(url)
    archive_url = archiveis.capture(url)
    foo = archive_url.split('/')[-1]
    return foo

Esempio n. 27

0

Mostra file

File: arquivar_tweets.py Progetto: lagolucas/7c0-core

import archiveis
import savepagenow
import time

if __name__ == '__main__':
    gevent.monkey.patch_all()

    print("Arquivando tweets...")
    while True:
        lista_ids = database.recupera_ids_sem_arquivo()
        for par in lista_ids:
            url = "https://twitter.com/" + str(par[1]) + "/status/" + str(
                par[0])
            print(url)
            try:
                url_arquivo = archiveis.capture(url)
                database.adiciona_arquivo(par[0], url_arquivo)
            except Exception as E:
                print(E)
                print("Problema no arquivador principal")
                try:
                    url_arquivo = savepagenow.capture(url)
                    database.adiciona_arquivo(par[0], url_arquivo)
                    time.sleep(20)
                except Exception as E2:
                    print(E2)
                    print("Problema no arquivador reserva.")


def arquivar_tweets():
    print("Arquivando tweets...")

Esempio n. 28

0

Mostra file

File: archiver.py Progetto: mauodias/arquivodetweets

 def archive_is(url):
     return archiveis.capture(url)

Esempio n. 29

0

Mostra file

File: mass_archive.py Progetto: colbycamp/mass_archive

    perma_json = {}
    perma_json['url'] = '%s' % input

    # remember to put your Perma.cc API key in here
    response = requests.post(
        "https://api.perma.cc/v1/archives/?api_key=YOUR_PERMA_API_KEY_HERE",
        data=perma_json)
    if response.status_code == 201:

        result = json.loads(response.content)
        page_id = result['guid']
        perma_url = "https://perma.cc/%s" % page_id

        return perma_url
    else:
        print "[*] Connection error"


# push to The Internet Archive
internet_archive_result = internet_archive(input)
print internet_archive_result

# push to archive.is
print "[*] Pushing to archive.is..."
archiveis_result = archiveis.capture(input).replace("http://", "https://")
print archiveis_result

# push to perma.cc
perma_result = perma(input)
print perma_result

Esempio n. 30

0

Mostra file

 def start(self):
     return archiveis.capture(self._url)

Esempio n. 31

0

Mostra file


def bot_login(identifiers):
    login = praw.Reddit(**identifiers)
    return login


with open("identifiers.csv") as id_csv:
    reader = csv.reader(id_csv)
    imported_id = {row[0]: row[1] for row in reader}

reddit = bot_login(imported_id)
subreddit = reddit.subreddit("badmathematics")
submission_stream = subreddit.stream.submissions(skip_existing=True,
                                                 pause_after=0)

while True:
    submission = next(submission_stream)
    if submission and not submission.is_self:
        url = submission.url
        if url.startswith("https://www.reddit.com"):
            url = url[0:8] + 'old' + url[11:]
        archive_url = archiveis.capture(url)
        comment_text = f"[Here's]({archive_url}) an archived version of this thread.  \n" \
                       "[^^Source](https://github.com/kitegi/discount-gv)"
        submission.reply(comment_text)
        print("Reply sent")
    else:
        sleep(50)
    sleep(10)