Example #1
0
            [ItemInterpolation('%(item_dir)s/%(warc_file_base)s.warc.zst')]
        },
        id_function=stats_id_function,
    ), MoveFiles(),
    LimitConcurrent(
        NumberConfigValue(
            min=1,
            max=20,
            default='2',
            name='shared:rsync_threads',
            title='Rsync threads',
            description='The maximum number of concurrent uploads.'),
        UploadWithTracker(
            'http://%s/%s' % (TRACKER_HOST, TRACKER_ID),
            downloader=downloader,
            version=VERSION,
            files=[
                ItemInterpolation(
                    '%(data_dir)s/%(warc_file_base)s.%(dict_project)s.%(dict_id)s.warc.zst'
                ),
                ItemInterpolation('%(data_dir)s/%(warc_file_base)s_data.txt')
            ],
            rsync_target_source_path=ItemInterpolation('%(data_dir)s/'),
            rsync_extra_args=[
                '--recursive', '--partial', '--partial-dir', '.rsync-tmp',
                '--min-size', '1', '--no-compress', '--compress-level', '0'
            ]),
    ),
    SendDoneToTracker(tracker_url='http://%s/%s' % (TRACKER_HOST, TRACKER_ID),
                      stats=ItemValue('stats')))
Example #2
0
		# this upload task asks the tracker for an upload target
		# this can be HTTP or rsync and can be changed in the tracker admin panel
		UploadWithTracker(
			TRACKER_URL,
			downloader=downloader,
			version=VERSION,
			# list the files that should be uploaded.
			# this may include directory names.
			# note: HTTP uploads will only upload the first file on this list
			files=[
				ItemInterpolation("%(data_dir)s/%(warc_file_base)s.warc.gz")
			],
			# the relative path for the rsync command
			# (this defines if the files are uploaded to a subdirectory on the server)
			rsync_target_source_path=ItemInterpolation("%(data_dir)s/"),
			# extra rsync parameters (probably standard)
			rsync_extra_args=[
				"--recursive",
				"--partial",
				"--partial-dir", ".rsync-tmp"
			]
		),
	),

	# if the item passed every task, notify the tracker and report the statistics
	SendDoneToTracker(
		tracker_url=TRACKER_URL,
		stats=ItemValue("stats")
	)
)
Example #3
0
            "data":
            [ItemInterpolation("%(item_dir)s/%(warc_file_base)s.warc.gz")]
        },
        id_function=stats_id_function,
    ), MoveFiles(),
    LimitConcurrent(
        NumberConfigValue(
            min=1,
            max=4,
            default="1",
            name="shared:rsync_threads",
            title="Rsync threads",
            description="The maximum number of concurrent uploads."),
        UploadWithTracker(
            "http://%s/%s" % (TRACKER_HOST, TRACKER_ID),
            downloader=downloader,
            version=VERSION,
            files=[
                ItemInterpolation("%(data_dir)s/%(warc_file_base)s.warc.gz")
            ],
            rsync_target_source_path=ItemInterpolation("%(data_dir)s/"),
            rsync_extra_args=[
                "--recursive",
                "--partial",
                "--partial-dir",
                ".rsync-tmp",
            ]),
    ),
    SendDoneToTracker(tracker_url="http://%s/%s" % (TRACKER_HOST, TRACKER_ID),
                      stats=ItemValue("stats")))
Example #4
0
            "downloader": downloader,
            "version": VERSION
        },
        file_groups={
            "data":
            [ItemInterpolation("%(item_dir)s/%(warc_file_base)s.warc.gz")]
        }), MoveFiles(),
    LimitConcurrent(
        NumberConfigValue(
            min=1,
            max=4,
            default="1",
            name="shared:rsync_threads",
            title="Rsync threads",
            description="The maximum number of concurrent uploads."),
        UploadWithTracker(
            "http://tracker.archiveteam.org/%s" % TRACKER_ID,
            downloader=downloader,
            version=VERSION,
            files=[
                ItemInterpolation("%(data_dir)s/%(warc_file_base)s.warc.gz")
            ],
            rsync_target_source_path=ItemInterpolation("%(data_dir)s/"),
            rsync_extra_args=[
                "--recursive", "--partial", "--partial-dir", ".rsync-tmp"
            ]),
    ),
    SendDoneToTracker(tracker_url="http://tracker.archiveteam.org/%s" %
                      TRACKER_ID,
                      stats=ItemValue("stats")))