예제 #1
0
 PrepareStatsForTracker(
     defaults={
         'downloader': downloader,
         'version': VERSION
     },
     file_groups={
         'data':
         [ItemInterpolation('%(item_dir)s/%(warc_file_base)s.warc.zst')]
     },
     id_function=stats_id_function,
 ), MoveFiles(),
 LimitConcurrent(
     NumberConfigValue(
         min=1,
         max=20,
         default='2',
         name='shared:rsync_threads',
         title='Rsync threads',
         description='The maximum number of concurrent uploads.'),
     UploadWithTracker(
         'http://%s/%s' % (TRACKER_HOST, TRACKER_ID),
         downloader=downloader,
         version=VERSION,
         files=[
             ItemInterpolation(
                 '%(data_dir)s/%(warc_file_base)s.%(dict_project)s.%(dict_id)s.warc.zst'
             ),
             ItemInterpolation('%(data_dir)s/%(warc_file_base)s_data.txt')
         ],
         rsync_target_source_path=ItemInterpolation('%(data_dir)s/'),
         rsync_extra_args=[
예제 #2
0
 PrepareStatsForTracker(
     defaults={
         "downloader": downloader,
         "version": VERSION
     },
     file_groups={
         "data":
         [ItemInterpolation("%(item_dir)s/%(warc_file_base)s.warc.gz")]
     },
     id_function=stats_id_function,
 ), MoveFiles(),
 LimitConcurrent(
     NumberConfigValue(
         min=1,
         max=4,
         default="1",
         name="shared:rsync_threads",
         title="Rsync threads",
         description="The maximum number of concurrent uploads."),
     UploadWithTracker(
         "http://%s/%s" % (TRACKER_HOST, TRACKER_ID),
         downloader=downloader,
         version=VERSION,
         files=[
             ItemInterpolation("%(data_dir)s/%(warc_file_base)s.warc.gz")
         ],
         rsync_target_source_path=ItemInterpolation("%(data_dir)s/"),
         rsync_extra_args=[
             "--recursive",
             "--partial",
             "--partial-dir",
예제 #3
0
    def __init__(self,
                 projects_dir,
                 data_dir,
                 warrior_hq_url,
                 real_shutdown=False,
                 keep_data=False):
        if not os.access(projects_dir, os.W_OK):
            raise Exception("Couldn't write to projects directory: %s" %
                            projects_dir)
        if not os.access(data_dir, os.W_OK):
            raise Exception("Couldn't write to data directory: %s" % data_dir)

        self.projects_dir = projects_dir
        self.data_dir = data_dir
        self.warrior_hq_url = warrior_hq_url
        self.real_shutdown = real_shutdown
        self.keep_data = keep_data

        # disable the password prompts
        self.gitenv = dict(
            list(os.environ.items()) + list({
                'GIT_ASKPASS': '******',
                'SSH_ASKPASS': '******'
            }.items()))

        self.warrior_id = StringConfigValue(
            name="warrior_id",
            title="Warrior ID",
            description="The unique number of your warrior instance.",
            editable=False)
        self.selected_project_config_value = StringConfigValue(
            name="selected_project",
            title="Selected project",
            description="The project (to be continued when the warrior "
            "restarts).",
            default="none",
            editable=False)
        self.downloader = StringConfigValue(
            name="downloader",
            title="Your nickname",
            description="We use your nickname to show your results on our "
            "tracker. Letters and numbers only.",
            regex="^[-_a-zA-Z0-9]{3,30}$",
            advanced=False)
        self.concurrent_items = NumberConfigValue(
            name="concurrent_items",
            title="Concurrent items",
            description="How many items should the warrior download at a "
            "time? (Max: 6)",
            min=1,
            max=6,
            default=2)
        self.http_username = StringConfigValue(
            name="http_username",
            title="HTTP username",
            description="Enter a username to protect the web interface, "
            "or leave empty.",
            default="")
        self.http_password = StringConfigValue(
            name="http_password",
            title="HTTP password",
            description="Enter a password to protect the web interface, "
            "or leave empty.",
            default="")

        self.config_manager = ConfigManager(
            os.path.join(projects_dir, "config.json"))
        self.config_manager.add(self.warrior_id)
        self.config_manager.add(self.selected_project_config_value)
        self.config_manager.add(self.downloader)
        self.config_manager.add(self.concurrent_items)
        self.config_manager.add(self.http_username)
        self.config_manager.add(self.http_password)

        self.bandwidth_monitor = BandwidthMonitor("eth0")
        self.bandwidth_monitor.update()

        self.runner = Runner(concurrent_items=self.concurrent_items,
                             keep_data=self.keep_data)
        self.runner.on_finish += self.handle_runner_finish

        self.current_project_name = None
        self.current_project = None

        self.selected_project = None

        self.projects = {}
        self.installed_projects = set()
        self.failed_projects = set()

        self.on_projects_loaded = Event()
        self.on_project_installing = Event()
        self.on_project_installed = Event()
        self.on_project_installation_failed = Event()
        self.on_project_refresh = Event()
        self.on_project_selected = Event()
        self.on_status = Event()
        self.on_broadcast_message_received = Event()

        self.http_client = AsyncHTTPClient()

        self.installing = False
        self.shut_down_flag = False
        self.reboot_flag = False

        io_loop = ioloop.IOLoop.instance()

        def update_warror_callback():
            io_loop.add_future(self.update_warrior_hq(),
                               lambda fut: fut.result())

        def update_project_callback():
            io_loop.add_future(self.update_project(), lambda fut: fut.result())

        self.hq_updater = ioloop.PeriodicCallback(update_warror_callback,
                                                  10 * 60 * 1000)
        self.project_updater = ioloop.PeriodicCallback(update_project_callback,
                                                       30 * 60 * 1000)
        self.forced_reboot_timeout = None

        self.lat_lng = None
        self.find_lat_lng()

        self.install_output = None
        self.broadcast_message = None
        self.contacting_hq_failed = False
예제 #4
0
 ExternalProcess(
     'Begin',
     [sys.executable, 'helper.py', 'begin'],
     env={
         'user_agent': user_agent,
         'bind_address': globals().get('bind_address', ''),
         'disco_tracker': DISCO_TRACKER_URL,
         "item_dir": ItemValue("item_dir"),
     },
     accept_on_exit_code=[0],
 ),
 LimitConcurrent(
     NumberConfigValue(
         min=1,
         max=6,
         default=globals().get("num_procs", "1"),
         name="shared:fagrab:num_procs",
         title="Number of Processes",
         description="The maximum number of concurrent download processes."
     ),
     WgetDownload(WgetArgs(),
                  max_tries=1,
                  accept_on_exit_code=[0, 4, 7, 8],
                  env={
                      "item_dir": ItemValue("item_dir"),
                      "downloader": downloader,
                      "item_name": ItemValue("item_name"),
                  }),
 ),
 ExternalProcess(
     'End',
     [sys.executable, 'helper.py', 'end'],
예제 #5
0
		"--waitretry", "5"
		],
		max_tries=5,
		accept_on_exit_code=[ 0 ],
	),"""

pipeline = Pipeline(
    GetItemFromTracker("http://%s/%s" % (TRACKER_HOST, TRACKER_ID), downloader,
                       VERSION),
    PrepareDirectories(file_prefix="isohunt"),
    LimitConcurrent(
        NumberConfigValue(
            min=1,
            max=10,
            default="10",
            name="isohunt:download_threads",
            title="Isohunt downloading threads",
            description=
            "How many threads downloading Isohunt torrents and pages can run at once, to avoid throttling."
        ),
        WgetDownloadTorrentRange(
            [
                WGET_LUA,
                "-U",
                USER_AGENT,
                "--no-check-certificate",
                "-e",
                "robots=off",
                "--rotate-dns",
                "--timeout",
                "60",
예제 #6
0
# be too big. The deadline is optional.
project = Project(title='freeml',
                  project_html='''
    <img class="project-logo" alt="logo" src="https://www.archiveteam.org/images/6/65/Freeml_logo-20190809.PNG" height="50px"/>
    <h2>freeml.com <span class="links"><a href="http://www.freeml.com/">Website</a> &middot; <a href="http://tracker.archiveteam.org/freeml/">Leaderboard</a></span></h2>
    ''')

pipeline = Pipeline(
    CheckIP(),
    GetItemFromTracker('http://%s/%s' % (TRACKER_HOST, TRACKER_ID), downloader,
                       VERSION), PrepareDirectories(warc_prefix='freeml'),
    LimitConcurrent(
        NumberConfigValue(
            min=1,
            max=1,
            default='1',
            name='shared:wget_download',
            title='wget-lua threads',
            description='The maximum number of concurrent downloads.'),
        WgetDownload(WgetArgs(),
                     max_tries=2,
                     accept_on_exit_code=[0, 4, 8],
                     env={
                         'item_dir': ItemValue('item_dir'),
                         'item_value': ItemValue('item_value'),
                         'item_type': ItemValue('item_type'),
                         'warc_file_base': ItemValue('warc_file_base')
                     }),
    ),
    PrepareStatsForTracker(
        defaults={