예제 #1
0
wpull_args = WpullArgs(default_user_agent=DEFAULT_USER_AGENT,
                       wpull_exe=WPULL_EXE,
                       youtube_dl_exe=YOUTUBE_DL,
                       phantomjs_exe=PHANTOMJS,
                       finished_warcs_dir=os.environ["FINISHED_WARCS_DIR"],
                       warc_max_size=WARC_MAX_SIZE)

pipeline = Pipeline(
    CheckIP(),
    GetItemFromQueue(control,
                     pipeline_id,
                     downloader,
                     ao_only=env.get('AO_ONLY'),
                     large=env.get('LARGE')), StartHeartbeat(control),
    SetFetchDepth(), PreparePaths(), WriteInfo(), DownloadUrlFile(control),
    WgetDownload(wpull_args,
                 accept_on_exit_code=AcceptAny(),
                 env={
                     'ITEM_IDENT': ItemInterpolation('%(ident)s'),
                     'LOG_KEY': ItemInterpolation('%(log_key)s'),
                     'REDIS_URL': REDIS_URL,
                     'PATH': os.environ['PATH']
                 }), RelabelIfAborted(control), WriteInfo(), MoveFiles(),
    LimitConcurrent(
        2,
        RsyncUpload(target=RSYNC_URL,
                    target_source_path=ItemInterpolation("%(data_dir)s"),
                    files=ItemValue("all_target_files"),
                    extra_args=['--partial', '--partial-dir', '.rsync-tmp'])),
    StopHeartbeat(), MarkItemAsDone(control, EXPIRE_TIME))
예제 #2
0
wpull_args = WpullArgs(
    default_user_agent=DEFAULT_USER_AGENT,
    wpull_exe=WPULL_EXE,
    youtube_dl_exe=YOUTUBE_DL,
    phantomjs_exe=PHANTOMJS,
    finished_warcs_dir=os.environ["FINISHED_WARCS_DIR"]
)

pipeline = Pipeline(
    CheckIP(),
    GetItemFromQueue(control, pipeline_id, downloader, ao_only=env.get('AO_ONLY')),
    StartHeartbeat(control),
    SetFetchDepth(),
    PreparePaths(),
    WriteInfo(),
    DownloadUrlFile(control),
    WgetDownload(
        wpull_args,
        accept_on_exit_code=AcceptAny(),
        env={
            'ITEM_IDENT': ItemInterpolation('%(ident)s'),
            'LOG_KEY': ItemInterpolation('%(log_key)s'),
            'REDIS_URL': REDIS_URL,
            'PATH': os.environ['PATH']
        }
    ),
    RelabelIfAborted(control),
    WriteInfo(),
    MoveFiles(),
    LimitConcurrent(2,
        RsyncUpload(