wpull_args = WpullArgs(default_user_agent=DEFAULT_USER_AGENT, wpull_exe=WPULL_EXE, youtube_dl_exe=YOUTUBE_DL, phantomjs_exe=PHANTOMJS, finished_warcs_dir=os.environ["FINISHED_WARCS_DIR"], warc_max_size=WARC_MAX_SIZE) pipeline = Pipeline( CheckIP(), GetItemFromQueue(control, pipeline_id, downloader, ao_only=env.get('AO_ONLY'), large=env.get('LARGE')), StartHeartbeat(control), SetFetchDepth(), PreparePaths(), WriteInfo(), DownloadUrlFile(control), WgetDownload(wpull_args, accept_on_exit_code=AcceptAny(), env={ 'ITEM_IDENT': ItemInterpolation('%(ident)s'), 'LOG_KEY': ItemInterpolation('%(log_key)s'), 'REDIS_URL': REDIS_URL, 'PATH': os.environ['PATH'] }), RelabelIfAborted(control), WriteInfo(), MoveFiles(), LimitConcurrent( 2, RsyncUpload(target=RSYNC_URL, target_source_path=ItemInterpolation("%(data_dir)s"), files=ItemValue("all_target_files"), extra_args=['--partial', '--partial-dir', '.rsync-tmp'])), StopHeartbeat(), MarkItemAsDone(control, EXPIRE_TIME))
wpull_args = WpullArgs( default_user_agent=DEFAULT_USER_AGENT, wpull_exe=WPULL_EXE, youtube_dl_exe=YOUTUBE_DL, phantomjs_exe=PHANTOMJS, finished_warcs_dir=os.environ["FINISHED_WARCS_DIR"] ) pipeline = Pipeline( CheckIP(), GetItemFromQueue(control, pipeline_id, downloader, ao_only=env.get('AO_ONLY')), StartHeartbeat(control), SetFetchDepth(), PreparePaths(), WriteInfo(), DownloadUrlFile(control), WgetDownload( wpull_args, accept_on_exit_code=AcceptAny(), env={ 'ITEM_IDENT': ItemInterpolation('%(ident)s'), 'LOG_KEY': ItemInterpolation('%(log_key)s'), 'REDIS_URL': REDIS_URL, 'PATH': os.environ['PATH'] } ), RelabelIfAborted(control), WriteInfo(), MoveFiles(), LimitConcurrent(2, RsyncUpload(