print('') print('*** Wget will bind address at {0} ***'.format( globals()['bind_address'])) print('') return realize(wget_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project(title="friendfeed", project_html=""" <img class="project-logo" alt="Project logo" src="http://archiveteam.org/images/8/83/Friendfeed_logo.png" height="50px" title=""/> <h2>friendfeed.com <span class="links"><a href="http://friendfeed.com/">Website</a> · <a href="http://tracker.archiveteam.org/friendfeed/">Leaderboard</a></span></h2> <p>Grabbing all accounts from friendfeed.com.</p> """) pipeline = Pipeline( CheckIP(), GetItemFromTracker("http://%s/%s" % (TRACKER_HOST, TRACKER_ID), downloader, VERSION), PrepareDirectories(warc_prefix="friendfeed"), WgetDownload(WgetArgs(), max_tries=2, accept_on_exit_code=[0, 4, 8], env={ "item_dir": ItemValue("item_dir"), "item_value": ItemValue("item_value"), "item_type": ItemValue("item_type"), }),
print('') print('*** Wget will bind address at {0} ***'.format( globals()['bind_address'])) print('') return realize(wget_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project( title = 'tumblr-static', project_html = ''' <img class="project-logo" alt="logo" src="https://archiveteam.org/images/b/ba/Tumblr_on_white.png" height="50px"/> <h2>Tumblr <span class="links"><a href="https://www.tumblr.com/">Website</a> · <a href="https://tracker.archiveteam.org/tumblr/">Leaderboard</a></span></h2> ''' ) pipeline = Pipeline( CheckIP(), GetItemFromTracker('http://%s/%s' % (TRACKER_HOST, TRACKER_ID), downloader, VERSION), PrepareDirectories(warc_prefix='tumblr-static'), WgetDownload( WgetArgs(), max_tries=2, accept_on_exit_code=[0, 4, 8], env={ 'item_dir': ItemValue('item_dir'),
print('') print('*** Wget will bind address at {0} ***'.format( globals()['bind_address'])) print('') return realize(wget_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project(title="reddit", project_html=""" <img class="project-logo" alt="Project logo" src="http://archiveteam.org/images/thumb/b/b5/Reddit_logo.png/320px-Reddit_logo.png" height="50px" title=""/> <h2>www.reddit.com <span class="links"><a href="https://www.reddit.com/">Website</a> · <a href="http://tracker.archiveteam.org/reddit/">Leaderboard</a></span></h2> <p>Grabbing reddit.</p> """) pipeline = Pipeline( CheckIP(), GetItemFromTracker("http://%s/%s" % (TRACKER_HOST, TRACKER_ID), downloader, VERSION), PrepareDirectories(warc_prefix="reddit"), WgetDownload(WgetArgs(), max_tries=2, accept_on_exit_code=[0, 8], env={ "item_dir": ItemValue("item_dir"), "item_value": ItemValue("item_value"), "item_type": ItemValue("item_type"), }),
print('') print('*** Wget will bind address at {0} ***'.format( globals()['bind_address'])) print('') return realize(wget_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project(title='StoryFire', project_html=''' <img class="project-logo" alt="logo" src="https://wiki.archiveteam.org/images/e/e9/Storyfire-icon.png" height="50px"/> <h2>storyfire.com <span class="links"><a href="https://storyfire.com/">Website</a> · <a href="http://tracker.archiveteam.org/storyfire/">Leaderboard</a></span></h2> ''') pipeline = Pipeline( CheckIP(), GetItemFromTracker( 'http://{}/{}/multi={}/'.format(TRACKER_HOST, TRACKER_ID, MULTI_ITEM_SIZE), downloader, VERSION), PrepareDirectories(warc_prefix='storyfire'), WgetDownload(WgetArgs(), max_tries=1, accept_on_exit_code=[0, 4, 8], env={ 'item_dir': ItemValue('item_dir'), 'warc_file_base': ItemValue('warc_file_base'),
print('') print('*** Wget will bind address at {0} ***'.format( globals()['bind_address'])) print('') return realize(wget_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project(title='reddit', project_html=''' <img class="project-logo" alt="Project logo" src="https://www.archiveteam.org/images/b/b5/Reddit_logo.png" height="50px" title=""/> <h2>reddit.com <span class="links"><a href="https://reddit.com/">Website</a> · <a href="http://tracker.archiveteam.org/reddit/">Leaderboard</a></span></h2> <p>Archiving everything from reddit.</p> ''') pipeline = Pipeline( CheckIP(), GetItemFromTracker( 'http://{}/{}/multi={}/'.format(TRACKER_HOST, TRACKER_ID, MULTI_ITEM_SIZE), downloader, VERSION), PrepareDirectories(warc_prefix='reddit'), WgetDownload(WgetArgs(), max_tries=2, accept_on_exit_code=[0, 4, 8], env={ 'item_dir': ItemValue('item_dir'), 'item_names': ItemValue('item_name_newline'),
print('') print('*** Wget will bind address at {0} ***'.format( globals()['bind_address'])) print('') return realize(wget_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project(title='500px', project_html=''' <img class="project-logo" alt="logo" src="https://www.archiveteam.org/images/8/83/500px_logo.png" height="50px"/> <h2>500px.com <span class="links"><a href="https://500px.com/">Website</a> · <a href="http://tracker.archiveteam.org/500px/">Leaderboard</a></span></h2> ''') pipeline = Pipeline( CheckIP(), GetItemFromTracker('http://%s/%s' % (TRACKER_HOST, TRACKER_ID), downloader, VERSION), PrepareDirectories(warc_prefix='500px'), WgetDownload(WgetArgs(), max_tries=2, accept_on_exit_code=[0, 4, 8], env={ 'item_dir': ItemValue('item_dir'), 'item_value': ItemValue('item_value'), 'item_type': ItemValue('item_type'), 'warc_file_base': ItemValue('warc_file_base'),
print('') print('*** Wpull will bind address at {0} ***'.format( globals()['bind_address'])) print('') return realize(wpull_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project(title="newsgrabber", project_html=""" <img class="project-logo" alt="Project logo" src="http://archiveteam.org/images/thumb/f/f3/Archive_team.png/235px-Archive_team.png" height="50px" title=""/> <h2>archiveteam.org <span class="links"><a href="http://archiveteam.org/">Website</a> · <a href="http://tracker.archiveteam.org/newsgrabber/">Leaderboard</a></span></h2> <p>Archiving all the news!</p> """) pipeline = Pipeline( CheckIP(), GetItemFromTracker("http://%s/%s" % (TRACKER_HOST, TRACKER_ID), downloader, VERSION), PrepareDirectories(warc_prefix="newsgrabber"), WgetDownload(WgetArgs(), max_tries=2, accept_on_exit_code=[0, 4, 8]), LimitConcurrent( NumberConfigValue( min=1, max=20, default="1", name="shared:dedupe_threads", title="Deduplicate threads",
print('*** Wget will bind address at {0} ***'.format( globals()['bind_address'])) print('') return realize(wget_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project( title='bintray', project_html=''' <img class="project-logo" alt="logo" src="https://wiki.archiveteam.org/images/Archiveteamsmall.png?959ea" height="50px"/> <h2>Bintray <span class="links"><a href="https://bintray.com/">Website</a> · <a href="http://tracker.archiveteam.org/bintray/">Leaderboard</a></span></h2> ''', ) pipeline = Pipeline( CheckIP(), GetItemFromTracker( 'http://{}/{}/multi={}/'.format(TRACKER_HOST, TRACKER_ID, MULTI_ITEM_SIZE), downloader, VERSION), PrepareDirectories(warc_prefix='bintray'), WgetDownload(WgetArgs(), max_tries=1, accept_on_exit_code=[0, 4, 8], env={ 'item_dir': ItemValue('item_dir'),
print('') print('*** Wget will bind address at {0} ***'.format( globals()['bind_address'])) print('') return realize(wget_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project(title='googleplus', project_html=''' <img class="project-logo" alt="Project logo" src="https://www.archiveteam.org/images/9/95/Google%2B_logo.png" height="50px" title=""/> <h2>plus.google.com <span class="links"><a href="https://plus.google.com/">Website</a> · <a href="http://tracker.archiveteam.org/googleplus/">Leaderboard</a></span></h2> <p>Archiving everything from Google+.</p> ''') pipeline = Pipeline( CheckIP(), GetItemFromTracker('http://%s/%s' % (TRACKER_HOST, TRACKER_ID), downloader, VERSION), PrepareDirectories(warc_prefix='googleplus'), WgetDownload(WgetArgs(), max_tries=2, accept_on_exit_code=[0, 4, 8], env={ 'item_dir': ItemValue('item_dir'), 'item_value': ItemValue('item_value'), 'item_type': ItemValue('item_type'),
print('*** Wget will bind address at {0} ***'.format( globals()['bind_address'])) print('') return realize(wget_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project(title="Verizon", project_html=""" <img class="project-logo" alt="Project logo" src="http://archiveteam.org/images/thumb/b/bc/Verizon_Logo.png/320px-Verizon_Logo.png" height="50px" title=""/> <h2>mysite.verizon.net <span class="links"><a href="http://mysite.verizon.net/">Website</a> · <a href="http://tracker.archiveteam.org/verizon/">Leaderboard</a></span></h2> <h2>members.bellatlantic.net <span class="links"><a href="htp://members.bellatlantic.net/">Website</a> · <a href="http://tracker.archiveteam.org/verizon/">Leaderboard</a></span></h2> <p>Archiving websites from mysite.verizon.net and members.bellatlantic.net.</p> """, utc_deadline=datetime.datetime(2014, 9, 30, 23, 59, 0)) pipeline = Pipeline( CheckIP(), GetItemFromTracker("http://%s/%s" % (TRACKER_HOST, TRACKER_ID), downloader, VERSION), PrepareDirectories(warc_prefix="verizon"), WgetDownload(WgetArgs(), max_tries=2, accept_on_exit_code=[0, 4, 7, 8], env={ "item_dir": ItemValue("item_dir"), "item_value": ItemValue("item_value"), "item_type": ItemValue("item_type"),
# CONTROL CONNECTION # ------------------------------------------------------------------------------ control = control.Control(REDIS_URL, LOG_CHANNEL, PIPELINE_CHANNEL) # ------------------------------------------------------------------------------ # SEESAW EXTENSIONS # ------------------------------------------------------------------------------ extensions.install_stdout_extension(control) # ------------------------------------------------------------------------------ # PIPELINE # ------------------------------------------------------------------------------ project = Project(title="ArchiveBot request handler") #FIXME: Same hack as above; seesaw executes pipeline.py with the pipeline dir as the cwd. # __file__ can't be used because seesaw exec()s the file contents rather than importing the file. REPO_DIRECTORY = os.path.dirname(os.path.realpath('.')) def pipeline_version(): # Returns something like 20190820.5cd1e38 output = subprocess.check_output([ 'git', 'show', '-s', '--format=format:%cd.%h', '--date=format:%Y%m%d' ], cwd=REPO_DIRECTORY) return output.decode('utf-8').strip()
return realize(wget_args, item) downloader = globals()['downloader'] # quiet the code checker ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project( title="Canvas", project_html=""" <img class="project-logo" alt="" src="http://archiveteam.org/images/0/0d/Canvas-beta-logo-medium.png" height="50" title="" /> <h2>Canv.as <span class="links"> <a href="http://canv.as/">Website</a> · <a href="http://%s/%s/">Leaderboard</a></span></h2> <p><b>Canv.as</b> is closed.</p> """ % (TRACKER_HOST, TRACKER_ID) , utc_deadline=datetime.datetime(2014, 03, 03, 00, 00, 1) ) pipeline = Pipeline( CheckIP(), GetItemFromTracker("http://%s/%s" % (TRACKER_HOST, TRACKER_ID), downloader, VERSION), PrepareDirectories(warc_prefix="canvas"), WgetDownload( WgetArgs(), max_tries=5,
print('') print('*** Wget will bind address at {0} ***'.format( globals()['bind_address'])) print('') return realize(wget_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project(title="miiverse", project_html=""" <img class="project-logo" alt="logo" src="http://www.archiveteam.org/images/8/87/Miiverselogo.png" /> <h2>miiverse.net <span class="links"><a href="https://miiverse.net/">Website</a> · <a href="http://tracker.archiveteam.org/miiverse/">Leaderboard</a></span></h2> """, utc_deadline=datetime.datetime(2017, 11, 7, 23, 59, 0)) pipeline = Pipeline( CheckIP(), GetItemFromTracker("http://%s/%s" % (TRACKER_HOST, TRACKER_ID), downloader, VERSION), PrepareDirectories(warc_prefix="miiverse"), WgetDownload(WgetArgs(), max_tries=2, accept_on_exit_code=[0, 4, 8], env={ "item_dir": ItemValue("item_dir"), "item_value": ItemValue("item_value"), "item_type": ItemValue("item_type"), "warc_file_base": ItemValue("warc_file_base"),
print('') print('*** Wget will bind address at {0} ***'.format( globals()['bind_address'])) print('') return realize(wget_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project(title='yourshot', project_html=''' <img class="project-logo" alt="logo" src="https://www.archiveteam.org/images/7/7a/Yourshot-logo.png" height="50px"/> <h2>yourshot.nationalgeographic.com <span class="links"><a href="https://yourshot.nationalgeographic.com/">Website</a> · <a href="http://tracker.archiveteam.org/yourshot/">Leaderboard</a></span></h2> ''') pipeline = Pipeline( CheckIP(), GetItemFromTracker('http://%s/%s' % (TRACKER_HOST, TRACKER_ID), downloader, VERSION), PrepareDirectories(warc_prefix='yourshot'), WgetDownload(WgetArgs(), max_tries=2, accept_on_exit_code=[0, 4, 8], env={ 'item_dir': ItemValue('item_dir'), 'item_value': ItemValue('item_value'), 'item_type': ItemValue('item_type'), 'warc_file_base': ItemValue('warc_file_base'),
return d ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project(title="Foodily Discovery", project_html=""" <img class="project-logo" alt="Project logo" src="http://t.nerds.io/c4d70e9d01baf0931f130f60839d9844.png" height="50px" title=""/> <h2>Foodily Phase 1. <span class="links"> <a href="http://www.foodily.com/">Website</a> · <a href="http://tracker.archiveteam.org/foodilydisco/">Leaderboard</a> <a href="http://archiveteam.org/index.php?title=Foodily">Wiki</a> · </span> </h2> <p>Foodily gets aquired. This is phase 1: content discovery.</p> """, utc_deadline=datetime.datetime(2015, 05, 30, 23, 59, 0)) pipeline = Pipeline( CheckIP(), GetItemFromTracker("http://%s/%s" % (TRACKER_HOST, TRACKER_ID), downloader, VERSION), PrepareDirectories(warc_prefix="foodilydisco"), ExternalProcess('Scraper', CustomProcessArgs(), max_tries=2,
print('') print('*** Wget will bind address at {0} ***'.format( globals()['bind_address'])) print('') return realize(wget_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project(title='YouTube Playlist Notes', project_html=''' <img class="project-logo" alt="logo" src="https://archiveteam.org/images/4/4d/YouTube_logo_2017.png" height="50px"/> <h2>youtube.com <span class="links"><a href="https://youtube.com/">Website</a> · <a href="http://tracker.archiveteam.org/youtube-playlistnotes/">Leaderboard</a></span></h2> ''') pipeline = Pipeline( CheckIP(), GetItemFromTracker('http://%s/%s' % (TRACKER_HOST, TRACKER_ID), downloader, VERSION), PrepareDirectories(warc_prefix='youtube-playlistnotes'), WgetDownload(WgetArgs(), max_tries=1, accept_on_exit_code=[0, 4, 8], env={ 'item_dir': ItemValue('item_dir'), 'item_type': ItemValue('item_type'), 'item_value': ItemValue('item_value'),
import datetime import functools from seesaw.externalprocess import ExternalProcess from seesaw.pipeline import Pipeline from seesaw.project import Project from seesaw.task import Task, LimitConcurrent from tornado.ioloop import IOLoop project = Project(title='Software Update', project_html=''' <h2>Software Update</h2> <p>Select this project, when required, to automatically download and install software to update components of the Warrior. </p> <p>Components: Python3.5 </p> ''') class WarningTask(Task): def __init__(self): Task.__init__(self, 'WarningTask') def enqueue(self, item): self.start_item(item) item.may_be_canceled = True item.log_output( 'Software will be automatically downloaded and installed to update components of the Warrior.' ) item.log_output('Update will continue in 10 seconds...')
print('') print('*** Wget will bind address at {0} ***'.format( globals()['bind_address'])) print('') return realize(wget_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project(title='GitHub', project_html=''' <img class="project-logo" alt="logo" src="https://www.archiveteam.org/images/2/21/Github-icon.png" height="50px"/> <h2>github.com <span class="links"><a href="https://github.com/">Website</a> · <a href="http://tracker.archiveteam.org/github/">Leaderboard</a></span></h2> ''') pipeline = Pipeline( CheckIP(), GetItemFromTracker('http://%s/%s' % (TRACKER_HOST, TRACKER_ID), downloader, VERSION), PrepareDirectories(warc_prefix='github'), WgetDownload(WgetArgs(), max_tries=2, accept_on_exit_code=[0, 4, 8], env={ 'item_dir': ItemValue('item_dir'), 'item_value': ItemValue('item_value'), 'item_type': ItemValue('item_type'), 'item_config': ItemValue('item_config'),
wget_args.extend(['--bind-address', globals()['bind_address']]) print('') print('*** Wget will bind address at {0} ***'.format( globals()['bind_address'])) print('') return realize(wget_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project( title = 'niconico', project_html = ''' <img class="project-logo" alt="logo" src="https://wiki.archiveteam.org/images/0/02/Niconico_Official_Logo.png" height="50px"/> <h2>Niconico <span class="links"><a href="http://www.nicovideo.jp/">Website</a> · <a href="http://tracker.archiveteam.org/niconico/">Leaderboard</a></span></h2> ''') pipeline = Pipeline( CheckIP(), GetItemFromTracker('http://{}/{}/multi={}/' .format(TRACKER_HOST, TRACKER_ID, MULTI_ITEM_SIZE), downloader, VERSION), PrepareDirectories(warc_prefix='niconico'), WgetDownload( WgetArgs(), max_tries=1, accept_on_exit_code=[0, 4, 8], env={ 'item_dir': ItemValue('item_dir'),
print('') print('*** Wget will bind address at {0} ***'.format( globals()['bind_address'])) print('') return realize(wget_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project(title='Google Sites', project_html=''' <img class="project-logo" alt="logo" src="https://archiveteam.org/images/9/9b/Google-Sites-Icon-2016.png" height="50px"/> <h2>sites.google.com <span class="links"><a href="https://sites.google.com/">Website</a> · <a href="http://tracker.archiveteam.org/google-sites/">Leaderboard</a></span></h2> ''') pipeline = Pipeline( CheckIP(), GetItemFromTracker('http://%s/%s' % (TRACKER_HOST, TRACKER_ID), downloader, VERSION), PrepareDirectories(warc_prefix='google-sites'), WgetDownload(WgetArgs(), max_tries=2, accept_on_exit_code=[0, 4, 8], env={ 'item_dir': ItemValue('item_dir'), 'item_value': ItemValue('item_value'), 'item_type': ItemValue('item_type'),
print('') print('*** Wget will bind address at {0} ***'.format( globals()['bind_address'])) print('') return realize(wget_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project(title='tinypic', project_html=''' <img class="project-logo" alt="logo" src="https://www.archiveteam.org/images/7/74/Tinypic-logo.jpg" height="50px"/> <h2>tinypic.com <span class="links"><a href="http://www.tinypic.com/">Website</a> · <a href="http://tracker.archiveteam.org/tinypic/">Leaderboard</a></span></h2> ''') pipeline = Pipeline( CheckIP(), GetItemFromTracker('http://%s/%s' % (TRACKER_HOST, TRACKER_ID), downloader, VERSION), PrepareDirectories(warc_prefix='tinypic'), WgetDownload(WgetArgs(), max_tries=2, accept_on_exit_code=[0, 4, 8], env={ 'item_dir': ItemValue('item_dir'), 'item_value': ItemValue('item_value'), 'item_type': ItemValue('item_type'), 'warc_file_base': ItemValue('warc_file_base'),
print('*** Wget will bind address at {0} ***'.format( globals()['bind_address'])) print('') return realize(wget_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project(title="rapidshare", project_html=""" <img class="project-logo" alt="Project logo" src="http://rapidshare.com/files/251393042" height="50px" title=""/> <h2>www.rapidshare.com <span class="links"><a href="https://www.rapidshare.com/">Website</a> · <a href="http://tracker.archiveteam.org/rapidshare/">Leaderboard</a></span></h2> <p>Grabbing files from RapidShare.</p> """, utc_deadline=datetime.datetime(2015, 3, 31, 23, 59, 0)) pipeline = Pipeline( CheckIP(), GetItemFromTracker("http://%s/%s" % (TRACKER_HOST, TRACKER_ID), downloader, VERSION), PrepareDirectories(warc_prefix="rapidshare"), WgetDownload(WgetArgs(), max_tries=1, accept_on_exit_code=[0, 4, 8], env={ "item_dir": ItemValue("item_dir"), "item_value": ItemValue("item_value"), "item_type": ItemValue("item_type"),
import datetime import functools from seesaw.externalprocess import ExternalProcess from seesaw.pipeline import Pipeline from seesaw.project import Project from seesaw.task import Task, LimitConcurrent from tornado.ioloop import IOLoop project = Project(title='Warrior Extras Installer', project_html=''' <img class="project-logo" alt="" src="https://tracker.archiveteam.org/~chfoo/image/200px-Applications-system.svg.png" height="50" /> <h2>Warrior Extras Installer <span class="links"> <a href="https://github.com/ArchiveTeam/warrior-extras-installer">source code</a> <span> </h2> <p>Select this project, when required, to install extra software components required by other projects.</p> ''') class WarningTask(Task): def __init__(self): Task.__init__(self, 'WarningTask') def enqueue(self, item): self.start_item(item) item.may_be_canceled = True item.log_output(
print('') print('*** Wget will bind address at {0} ***'.format( globals()['bind_address'])) print('') return realize(wget_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project(title="Halo 2", project_html=""" <img class="project-logo" alt="logo" src="https://www.archiveteam.org/images/f/f0/Halo_2_Logo.png" height="50px" /> <h2>halo.bungie.net <span class="links"><a href="http://halo.bungie.net/">Website</a> · <a href="http://tracker.archiveteam.org/halo2/">Leaderboard</a></span></h2> """) pipeline = Pipeline( CheckIP(), GetItemFromTracker("http://%s/%s" % (TRACKER_HOST, TRACKER_ID), downloader, VERSION), PrepareDirectories(warc_prefix="halo2"), WgetDownload(WgetArgs(), max_tries=2, accept_on_exit_code=[0, 4, 8], env={}), PrepareStatsForTracker( defaults={ "downloader": downloader, "version": VERSION
print('') print('*** Wget will bind address at {0} ***'.format( globals()['bind_address'])) print('') return realize(wget_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project(title='halo', project_html=''' <img class="project-logo" alt="logo" src="https://wiki.archiveteam.org/images/8/80/Bungie_Logo_white_background.png" height="50px"/> <h2>halo.bungie.net <span class="links"><a href="https://halo.bungie.net/">Website</a> · <a href="http://tracker.archiveteam.org/halo-new/">Leaderboard</a></span></h2> ''') pipeline = Pipeline( CheckIP(), GetItemFromTracker( 'http://{}/{}/multi={}/'.format(TRACKER_HOST, TRACKER_ID, MULTI_ITEM_SIZE), downloader, VERSION), PrepareDirectories(warc_prefix='halo'), WgetDownload(WgetArgs(), max_tries=1, accept_on_exit_code=[0, 4, 8], env={ 'item_dir': ItemValue('item_dir'), 'warc_file_base': ItemValue('warc_file_base'),
print('') print('*** Wget will bind address at {0} ***'.format( globals()['bind_address'])) print('') return realize(wget_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project(title='yourshot-static', project_html=''' <img class="project-logo" alt="Project logo" src="https://www.archiveteam.org/images/2/22/Radio24syv.png" height="50px" title=""/> <h2>Radio24syv · <class="links"><a href="https://www.24syv.dk/">Website</a> · <a href="http://%s/%s/">Leaderboard</a></span></h2> <p>Archiving audio from radio24syv archive</p> ''' % (TRACKER_HOST, TRACKER_ID)) pipeline = Pipeline( CheckIP(), CheckBan(), GetItemFromTracker('http://%s/%s' % (TRACKER_HOST, TRACKER_ID), downloader, VERSION), PrepareDirectories(warc_prefix='yourshot-static'), WgetDownload( WgetArgs(), max_tries=0, # 2, #changed accept_on_exit_code=[0], # [0, 4, 8], #changed env={ 'item_dir': ItemValue('item_dir'),
print('') print('*** Wget will bind address at {0} ***'.format( globals()['bind_address'])) print('') return realize(wget_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project( title='Telegram', project_html=''' <img class="project-logo" alt="Project logo" src="https://wiki.archiveteam.org/images/thumb/7/7d/Telegram-icon.png/600px-Telegram-icon.png" height="50px" title=""/> <h2>telegram.org <span class="links"><a href="https://telegram.org/">Website</a> · <a href="http://tracker.archiveteam.org/telegram/">Leaderboard</a></span></h2> <p>Archiving public Telegram channels.</p> ''' ) pipeline = Pipeline( CheckIP(), GetItemFromTracker('http://{}/{}/multi={}/' .format(TRACKER_HOST, TRACKER_ID, MULTI_ITEM_SIZE), downloader, VERSION), PrepareDirectories(warc_prefix=TRACKER_ID), WgetDownload( WgetArgs(), max_tries=2, accept_on_exit_code=[0, 4, 8], env={
print('*** Wget will bind address at {0} ***'.format( globals()['bind_address'])) print('') return realize(wget_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project(title="Justin.tv", project_html=""" <img class="project-logo" alt="Project logo" src="http://archiveteam.org/images/thumb/9/97/Justintv_logo.png/320px-Justintv_logo.png" height="50px" /> <h2>Justin.tv <span class="links"><a href="http://justin.tv/">Justin.tv</a> · <a href="http://tracker.archiveteam.org/justintv/">Leaderboard</a></span></h2> <p>Justin.tv is deleting all archives sometime in the next week. We DPoS.</p> """, utc_deadline=datetime.datetime(2014, 6, 8, 0, 0, 0)) pipeline = Pipeline( CheckIP(), GetItemFromTracker("http://%s/%s" % (TRACKER_HOST, TRACKER_ID), downloader, VERSION), PrepareDirectories(warc_prefix="justintv"), Bouncer(), WgetDownload(WgetArgs(), max_tries=5, accept_on_exit_code=[0, 8], env={"item_dir": ItemValue("item_dir")}), PrepareStatsForTracker( defaults={
'pipeline_hash': PIPELINE_SHA1, 'python_version': sys.version, } return d ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project( title="sourceforgersync", project_html=""" <img class="project-logo" alt="Project logo" src="" height="50px" title=""/> <h2>sourceforge.net <span class="links"><a href="http://sourceforge.net/">Website</a> · <a href="http://tracker.archiveteam.org/sourceforge/">Leaderboard</a></span></h2> <p>Saving all project from SourceForge. rsyncing all of the source code repositories.</p> """ ) pipeline = Pipeline( CheckIP(), GetItemFromTracker("http://%s/%s" % (TRACKER_HOST, TRACKER_ID), downloader, VERSION), ExternalProcess("Size Test",[RSYNC_TEST,"-t",getRsyncURL("foo"),"-m",MAX_RSYNC]), LimitConcurrent(1,ExternalProcess("rsync", ["rsync", "-av", getRsyncURL("foo"), cleanItem("%(data_dir)s/%(item_name)s")])), ExternalProcess("tar", ["tar", "-czf", cleanItem("%(data_dir)s/%(item_name)s.tar.gz"), "-C", ItemInterpolation("%(data_dir)s/"), "--owner=1999", "--group=2015", "--no-same-permissions", cleanItem("%(item_name)s")]), LimitConcurrent(NumberConfigValue(min=1, max=4, default="1", name="shared:rsync_threads", title="Rsync threads", description="The maximum number of concurrent uploads."), UploadWithTracker( "http://%s/%s" % (TRACKER_HOST, TRACKER_ID),
print('') print('*** Wget will bind addresss at {0} ***'.format( globals()['bind_address'])) print('') return realize(wget_args, item) ########################################################################### # Initialize the project. # # This will be shown in the warrior management panel. The logo should not # be too big. The deadline is optional. project = Project(title='mediafire', project_html=''' <img class="project-logo" alt="logo" src="https://archiveteam.org/images/thumb/8/8b/Mediafire-icon.png/320px-Mediafire-icon.png" height="50px"/> <h2>mediafire.com <span class="links"><a href="https://mediafire.com/">Website</a> · <a href="http://tracker.archiveteam.org/mediafire/">Leaderboard</a></span></h2> ''') pipeline = Pipeline( CheckIP(), GetItemFromTracker('http://%s/%s' % (TRACKER_HOST, TRACKER_ID), downloader, VERSION), PrepareDirectories(warc_prefix='mediafire'), WgetDownload(WgetArgs(), max_tries=1, accept_on_exit_code=[0, 4, 8], env={ 'item_dir': ItemValue('item_dir'), 'item_type': ItemValue('item_type'), 'item_value': ItemValue('item_value'), 'warc_file_base': ItemValue('warc_file_base'),