コード例 #1
0
ファイル: engine.py プロジェクト: lostrhythm/crawler_test_git
    def __init__(self):
        self.logger = get_logger('Core',
                                 True)  # core moudles share the same logger

        self.Scheduler = Scheduler(self.logger)
        self.Downloader = Downloader(self.logger)
        self.Uploader = Uploader(self.logger)
        self.Monitor = Monitor(self.logger)
コード例 #2
0
def test_redirect_is_enabled(mock_requests):
    downloader = Downloader()

    mock_requests.return_value = create_ok_return_value()
    downloader.fetch_url(FAKE_COOKIE, FAKE_URL)

    mock_requests.assert_called_once_with(allow_redirects=True,
                                          url=ANY,
                                          headers=ANY,
                                          timeout=ANY)
コード例 #3
0
def test_timeout_is_passed(mock_requests):
    downloader = Downloader()

    mock_requests.return_value = create_ok_return_value()
    downloader.fetch_url(FAKE_COOKIE, FAKE_URL, timeout_secs=3600)

    mock_requests.assert_called_once_with(timeout=3600,
                                          url=ANY,
                                          headers=ANY,
                                          allow_redirects=ANY)
コード例 #4
0
def download_from_url(url, item):
    logger.info("Intentando descargar: %s" % (url))
    if url.lower().endswith(".m3u8") or url.lower().startswith("rtmp"):
        save_server_statistics(item.server, 0, False)
        return {"downloadStatus": STATUS_CODES.error}

    # Obtenemos la ruta de descarga y el nombre del archivo
    item.downloadFilename = item.downloadFilename.replace('/','-')
    download_path = filetools.dirname(filetools.join(DOWNLOAD_PATH, item.downloadFilename))
    file_name = filetools.basename(filetools.join(DOWNLOAD_PATH, item.downloadFilename))

    # Creamos la carpeta si no existe

    if not filetools.exists(download_path):
        filetools.mkdir(download_path)

    # Lanzamos la descarga
    d = Downloader(url, download_path, file_name,
                   max_connections=1 + int(config.get_setting("max_connections", "downloads")),
                   block_size=2 ** (17 + int(config.get_setting("block_size", "downloads"))),
                   part_size=2 ** (20 + int(config.get_setting("part_size", "downloads"))),
                   max_buffer=2 * int(config.get_setting("max_buffer", "downloads")))
    d.start_dialog(config.get_localized_string(60332))

    # Descarga detenida. Obtenemos el estado:
    # Se ha producido un error en la descarga   
    if d.state == d.states.error:
        logger.info("Error al intentar descargar %s" % (url))
        status = STATUS_CODES.error

    # La descarga se ha detenifdo
    elif d.state == d.states.stopped:
        logger.info("Descarga detenida")
        status = STATUS_CODES.canceled

    # La descarga ha finalizado
    elif d.state == d.states.completed:
        logger.info("Descargado correctamente")
        status = STATUS_CODES.completed

        if item.downloadSize and item.downloadSize != d.size[0]:
            status = STATUS_CODES.error

    save_server_statistics(item.server, d.speed[0], d.state != d.states.error)

    dir = os.path.dirname(item.downloadFilename)
    file = filetools.join(dir, d.filename)

    if status == STATUS_CODES.completed:
        move_to_libray(item.clone(downloadFilename=file))

    return {"downloadUrl": d.download_url, "downloadStatus": status, "downloadSize": d.size[0],
            "downloadProgress": d.progress, "downloadCompleted": d.downloaded[0], "downloadFilename": file}
コード例 #5
0
def test_response_is_returned(mock_requests):
    downloader = Downloader()

    mock_requests.return_value = create_ok_return_value()
    res = downloader.fetch_url(FAKE_COOKIE, FAKE_URL)

    assert res == create_ok_return_value()

    mock_requests.assert_called_once_with(url=ANY,
                                          headers=ANY,
                                          allow_redirects=ANY,
                                          timeout=ANY)
コード例 #6
0
ファイル: spider.py プロジェクト: nskyzone/Porcupine
 def __init__(self, start_monitor=True):
     self.init()
     self.number_dict = {core.constant.TOTAL_TASK: 0, core.constant.TOTAL_REQUEST: 0,
                         core.constant.TOTAL_RESPONSE: 0}
     self.color = core.constant.COLOR
     self.close = False
     self.loop = asyncio.get_event_loop()
     self.filter = core.bloomFilter.bloomFilterContext.get_filter(settings.PROJECT_NAME)
     self.scheduler = Scheduler(self)
     self.downloader = Downloader(self, settings.DOWNLOADER_WORKER)
     self.save = Save(self, settings.SAVE_WORKER)
     self.monitor = Monitor(self)
     self.start_monitor = start_monitor
コード例 #7
0
ファイル: downloads.py プロジェクト: gacj22/WizardGacj22
def download_url(url, item, path=None, filename=None, resume=False):
    logger.trace()

    if url.lower().endswith(".m3u8") or url.lower().startswith(
            "rtmp") or item.server == 'torrent':
        logger.debug('Servidor o tipo de medio no soportado')
        return {"status": 3}

    download_path = settings.get_setting('download_path', __file__)

    if path:
        path = filetools.join(download_path, path)
    else:
        path = download_path

    if not filetools.isdir(path):
        filetools.makedirs(path)

    d = Downloader(
        url=url,
        path=filetools.validate_path(path),
        filename=filetools.validate_path(filename),
        resume=resume,
        max_connections=1 + settings.get_setting("max_connections", __file__),
        block_size=2**(17 + settings.get_setting("block_size", __file__)),
        part_size=2**(20 + settings.get_setting("part_size", __file__)),
        max_buffer=2 * settings.get_setting("max_buffer", __file__))

    d.start_dialog("Descargas [%s]" % item.servername or item.server)

    result = {
        'download_size': d.size[0],
        'download_progress': d.progress,
        'download_filename': d.filename,
        'download_path': path
    }

    if d.state == d.states.error:
        logger.debug("Error al intentar descargar %s" % url)
        result['download_status'] = 3

    elif d.state == d.states.stopped:
        logger.debug("Descarga detenida")
        result['download_status'] = 2

    elif d.state == d.states.completed:
        logger.debug("Descargado correctamente")
        result['download_status'] = 1

    return result
コード例 #8
0
def test_status_code_different_from_200_causes_exception(mock_requests):
    downloader = Downloader()

    mock_requests.return_value = create_not_found_return_value()
    got_ex = False
    try:
        downloader.fetch_url(FAKE_COOKIE, FAKE_URL)
    except RuntimeError:
        got_ex = True

    mock_requests.assert_called_once_with(url=ANY,
                                          headers=ANY,
                                          allow_redirects=ANY,
                                          timeout=ANY)
    assert got_ex
コード例 #9
0
def test_empty_returned_text_causes_exception(mock_requests):
    downloader = Downloader()

    mock_requests.return_value = create_ok_return_value_without_text()
    got_ex = False
    try:
        downloader.fetch_url(FAKE_COOKIE, FAKE_URL)
    except RuntimeError:
        got_ex = True

    mock_requests.assert_called_once_with(url=ANY,
                                          headers=ANY,
                                          allow_redirects=ANY,
                                          timeout=ANY)
    assert got_ex
コード例 #10
0
 def __init__(self):  #, spider_group, task_gettter):
     # self.spider_group = spider_group
     # self.task_getter = task_gettter
     self.spiders = self._auto_import_cls(SPIDERS, True)
     self.pool = Pool()
     self.pipelines = self._auto_import_cls(PIPELINES)
     self.spider_mids = self._auto_import_cls(SPIDER_MIDDLEWARES)
     #self.downloader_mids = downloader_mids
     self.downloader_mids = self._auto_import_cls(DOWNLOADER_MIDDLEWARES)
     self.scheduler = Scheduler(ROLE, QUEUE_TYPE)
     self.downloader = Downloader()
     # self.spider_mids = spider_mids
     self.spider_mids = self._auto_import_cls(SPIDER_MIDDLEWARES)
     self.is_running = True
     self.total_response = 0
     self.executor = BaseThreadPoolExecutor(max_workers=ASYNC_COUNT)
コード例 #11
0
def test_retries_when_service_unavailable_then_ok(mock_requests):
    downloader = Downloader()

    mock_requests.side_effect = [
        create_service_unavailable_return_value(),
        create_ok_return_value()
    ]

    res = downloader.fetch_url(FAKE_COOKIE, FAKE_URL, retries=3)

    assert res == create_ok_return_value()

    mock_requests.assert_has_calls([
        call(url=ANY, headers=ANY, allow_redirects=ANY, timeout=ANY),
        call(url=ANY, headers=ANY, allow_redirects=ANY, timeout=ANY)
    ])
コード例 #12
0
def test_exceptions_when_internal_server_error(mock_requests):
    downloader = Downloader()

    mock_requests.return_value = create_internal_server_error_return_value()
    got_ex = False
    try:
        downloader.fetch_url(FAKE_COOKIE, FAKE_URL)

    except RuntimeError:
        got_ex = True

    mock_requests.assert_called_once_with(url=ANY,
                                          headers=ANY,
                                          allow_redirects=ANY,
                                          timeout=ANY)
    assert got_ex
コード例 #13
0
def test_timeout_is_propagated_when_retries_are_disabled(mock_requests):
    downloader = Downloader()

    mock_requests.side_effect = requests.exceptions.Timeout()
    got_ex = False
    try:
        downloader.fetch_url(FAKE_COOKIE, FAKE_URL)

    except requests.exceptions.Timeout:
        got_ex = True

    mock_requests.assert_called_once_with(url=ANY,
                                          headers=ANY,
                                          allow_redirects=ANY,
                                          timeout=ANY)
    assert got_ex
コード例 #14
0
def test_exceptions_from_get_are_propagated(mock_requests):
    downloader = Downloader()

    mock_requests.side_effect = RuntimeError('Boom')
    got_ex = False
    try:
        downloader.fetch_url(FAKE_COOKIE, FAKE_URL)

    except RuntimeError:
        got_ex = True

    mock_requests.assert_called_once_with(url=ANY,
                                          headers=ANY,
                                          allow_redirects=ANY,
                                          timeout=ANY)
    assert got_ex
コード例 #15
0
def test_cookie_is_passed_in_headers(mock_requests):
    downloader = Downloader()

    mock_requests.return_value = create_ok_return_value()
    downloader.fetch_url(FAKE_COOKIE, FAKE_URL)

    expected_headers = {
        'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8',
        'accept': '*/*',
        'accept-encoding': 'gzip, deflate',
        'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' +
        '(KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
        'cookie': FAKE_COOKIE
    }

    mock_requests.assert_called_once_with(headers=expected_headers,
                                          url=ANY,
                                          allow_redirects=ANY,
                                          timeout=ANY)
コード例 #16
0
def test_timeout_is_propagated_after_last_retry_failed(mock_requests):
    downloader = Downloader()

    mock_requests.side_effect = [
        requests.exceptions.Timeout(),
        requests.exceptions.Timeout()
    ]

    got_ex = False
    try:
        downloader.fetch_url(FAKE_COOKIE, FAKE_URL, retries=2)

    except requests.exceptions.Timeout:
        got_ex = True

    mock_requests.assert_has_calls([
        call(url=ANY, headers=ANY, allow_redirects=ANY, timeout=ANY),
        call(url=ANY, headers=ANY, allow_redirects=ANY, timeout=ANY)
    ])
    assert got_ex
コード例 #17
0
ファイル: core.py プロジェクト: gallyamb/FTP_client
    def start_file_downloading(self, source_path: str, path_to_save: str,
                               filename: str):
        if not self.can_continue or self.dtp is not None:
            self.download_queue.put(
                ('down', (source_path, path_to_save, filename)))
            return
        logger.debug('file %s downloading started. destination: %s' %
                     (filename, path_to_save))

        self.pi.change_dir(source_path)

        downloader = Downloader(path_to_save, filename)

        self.dtp = downloader
        self.pi.passive_mode.connect(
            downloader.data_transfer_process.start_transfer,
            QtCore.Qt.QueuedConnection)

        downloader.complete.connect(self.update_local_model)
        downloader.complete.connect(self.set_dtp_to_none)
        downloader.complete.connect(downloader.deleteLater)
        self.pi.initiate_passive_mode()
        self.pi.download_file(filename)
コード例 #18
0
        UpdateStrategies_thread.start()

    def start_threads(self):
        # entrence
        self.start_next_tasks_thread()
        self.start_update_strategies_thread(
        )  # the initial strategies is loaded when instantiate the Scheduler


if __name__ == '__main__':
    # start server_service firstly

    SchedulerIns = Scheduler()
    SchedulerIns.start_threads()

    from core.monitor import Monitor
    MonitorIns = Monitor()
    MonitorIns.start_threads()

    from core.downloader import Downloader
    DownloaderIns = Downloader()
    DownloaderIns.start_threads()

    from core.uploader import Uploader
    UploaderIns = Uploader()
    UploaderIns.start_threads()

    while True:
        sleep(10)
        print GlobalQueues.UploadResults_queue.qsize()
コード例 #19
0
from core.downloader import Downloader
import argparse

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--prod", action="store_true")
    args = parser.parse_args()

    project_dir = "/Users/alex/ml_class/project/"

    print("Configuring")
    if args.prod:
        downloader = Downloader(False)
    else:
        downloader = Downloader(True)
    downloader.configure(project_dir + "scripts/config.json")
    downloader.make_client()
    print("Fetching data")
    downloader.fetch()
    print("Saving to csv files")
    downloader.to_csv(project_dir, 'data')
コード例 #20
0
 def setUp(self):
     self.downloader = Downloader(None)
     self.site = ComicBus()
コード例 #21
0
def do_download(mediaurl,
                download_path,
                file_name,
                headers=[],
                silent=False,
                resume=True):

    # Crear carpeta de destino si no existe
    if not filetools.exists(download_path):
        filetools.mkdir(download_path)

    # Limpiar caracteres para nombre de fichero válido
    file_name = config.text_clean(file_name)

    # Lanzamos la descarga
    d = Downloader(mediaurl,
                   download_path,
                   file_name,
                   headers=headers,
                   resume=resume,
                   max_connections=1 +
                   int(config.get_setting("max_connections")),
                   block_size=2**(17 + int(config.get_setting("block_size"))),
                   part_size=2**(20 + int(config.get_setting("part_size"))),
                   max_buffer=2 * int(config.get_setting("max_buffer")))

    if silent:
        d.start()
        # bucle hasta terminar
        import xbmc
        while not xbmc.Monitor().abortRequested() and d.state not in [
                d.states.error, d.states.stopped, d.states.completed
        ]:
            xbmc.sleep(100)
    else:
        d.start_dialog()

    # Descarga detenida, verificar estado: {"stopped": 0, "connecting": 1, "downloading": 2, "completed": 3, "error": 4, "saving": 5})
    if d.state == d.states.error:
        logger.info('Error en la descarga %s' % mediaurl)
        status = STATUS_CODES.error

    elif d.state == d.states.stopped:
        logger.info("Descarga detenida")
        status = STATUS_CODES.canceled

    elif d.state == d.states.completed:
        logger.info("Descargada finalizada")
        status = STATUS_CODES.completed

    else:
        logger.error("Estado de descarga no previsto! %d" % d.state)
        status = STATUS_CODES.stopped

    params = {
        'downloadStatus': status,  # 3:error / 1:canceled / 2:completed
        'downloadSize': d.size[0],  # total bytes de la descarga
        'downloadCompleted': d.downloaded[0],  # bytes descargados
        'downloadProgress': d.progress,  # porcentaje descargado (float)
        'downloadUrl': d.download_url,  # url origen
        'downloadFilename': d.filename  # nombre del fichero (sin path)
    }

    return params
コード例 #22
0
 def setUp(self):
     self.downloader = Downloader(None)
     self.site = ManHuaBei()
コード例 #23
0
def create_production_fetcher(config):

    downloader = Downloader()
    fb_parser = FacebookSoupParser()

    return FacebookFetcher(downloader, fb_parser, config)
コード例 #24
0
def download_from_url(url, item):
    logger.info("Intentando descargar: %s" % (url))
    if url.lower().endswith(".m3u8") or url.lower().startswith("rtmp"):
        save_server_statistics(item.server, 0, False)
        return {"downloadStatus": STATUS_CODES.error}

    # Obtenemos la ruta de descarga y el nombre del archivo
    download_path = filetools.dirname(
        filetools.join(DOWNLOAD_PATH, item.downloadFilename))
    file_name = filetools.basename(
        filetools.join(DOWNLOAD_PATH, item.downloadFilename))

    # Creamos la carpeta si no existe
    if not filetools.exists(download_path):
        filetools.mkdir(download_path)

    # Mostramos el progreso
    progreso = platformtools.dialog_progress("Descargas",
                                             "Iniciando descarga...")

    # Lanzamos la descarga
    d = Downloader(url, download_path, file_name)
    d.start()

    # Monitorizamos la descarga hasta que se termine o se cancele
    while d.state == d.states.downloading and not progreso.iscanceled():
        time.sleep(0.1)
        line1 = "%s" % (filetools.decode(d.filename))
        line2 = "%.2f%% - %.2f %s de %.2f %s a %.2f %s/s (%d/%d)" % (
            d.progress, d.downloaded[1], d.downloaded[2], d.size[1], d.size[2],
            d.speed[1], d.speed[2], d.connections[0], d.connections[1])
        line3 = "Tiempo restante: %s" % (d.remaining_time)
        progreso.update(int(d.progress), line1, line2, line3)

    # Descarga detenida. Obtenemos el estado:
    # Se ha producido un error en la descarga
    if d.state == d.states.error:
        logger.info("Error al intentar descargar %s" % (url))
        d.stop()
        progreso.close()
        status = STATUS_CODES.error

    # Aun está descargando (se ha hecho click en cancelar)
    elif d.state == d.states.downloading:
        logger.info("Descarga detenida")
        d.stop()
        progreso.close()
        status = STATUS_CODES.canceled

    # La descarga ha finalizado
    elif d.state == d.states.completed:
        logger.info("Descargado correctamente")
        progreso.close()
        status = STATUS_CODES.completed

        if item.downloadSize and item.downloadSize != d.size[0]:
            status = STATUS_CODES.error

    save_server_statistics(item.server, d.speed[0], d.state != d.states.error)

    if progreso.iscanceled():
        status = STATUS_CODES.canceled

    dir = os.path.dirname(item.downloadFilename)
    file = filetools.join(dir, d.filename)

    if status == STATUS_CODES.completed:
        move_to_libray(item.clone(downloadFilename=file))

    return {
        "downloadUrl": d.download_url,
        "downloadStatus": status,
        "downloadSize": d.size[0],
        "downloadProgress": d.progress,
        "downloadCompleted": d.downloaded[0],
        "downloadFilename": file
    }
コード例 #25
0
def download_from_url(url, item):
    logger.info(
        "pelisalacarta.channels.descargas download_from_url - Intentando descargar: %s"
        % (url))

    # Obtenemos la ruta de descarga y el nombre del archivo
    download_path = os.path.dirname(
        filetools.join(config.get_setting("downloadpath"),
                       item.downloadFilename))
    file_name = os.path.basename(
        filetools.join(config.get_setting("downloadpath"),
                       item.downloadFilename))

    # Creamos la carpeta si no existe
    if not filetools.exists(download_path):
        filetools.mkdir(download_path)

    # Mostramos el progreso
    progreso = platformtools.dialog_progress("Descargas",
                                             "Iniciando descarga...")

    # Lanzamos la descarga
    d = Downloader(url, filetools.encode(download_path),
                   filetools.encode(file_name))
    d.start()

    # Monitorizamos la descarga hasta que se termine o se cancele
    while d.state == d.states.downloading and not progreso.iscanceled():
        time.sleep(0.1)
        line1 = "%s" % (filetools.decode(d.filename))
        line2 = "%.2f%% - %.2f %s de %.2f %s a %.2f %s/s (%d/%d)" % (
            d.progress, d.downloaded[1], d.downloaded[2], d.size[1], d.size[2],
            d.speed[1], d.speed[2], d.connections[0], d.connections[1])
        line3 = "Tiempo restante: %s" % (d.remaining_time)
        progreso.update(int(d.progress), line1, line2, line3)

    # Descarga detenida. Obtenemos el estado:
    # Se ha producido un error en la descarga
    if d.state == d.states.error:
        logger.info(
            "pelisalacarta.channels.descargas download_video - Error al intentar descargar %s"
            % (url))
        d.stop()
        progreso.close()
        status = 3

    # Aun está descargando (se ha hecho click en cancelar)
    elif d.state == d.states.downloading:
        logger.info(
            "pelisalacarta.channels.descargas download_video - Descarga detenida"
        )
        d.stop()
        progreso.close()
        status = 1

    # La descarga ha finalizado
    elif d.state == d.states.completed:
        logger.info(
            "pelisalacarta.channels.descargas download_video - Descargado correctamente"
        )
        progreso.close()
        status = 2

        if item.downloadSize and item.downloadSize != d.size[0]:
            status = 3

    dir = os.path.dirname(item.downloadFilename)
    file = filetools.join(dir, filetools.decode(d.filename))

    if status == 2:
        move_to_libray(item.clone(downloadFilename=file))

    return {
        "downloadUrl": d.download_url,
        "downloadStatus": status,
        "downloadSize": d.size[0],
        "downloadProgress": d.progress,
        "downloadCompleted": d.downloaded[0],
        "downloadFilename": file
    }
コード例 #26
0
 def setUp(self):
     self.downloader = Downloader(None)
コード例 #27
0
async def on_start_up():
    fastAPI_logger.info("on_start_up")
    d = Downloader(SingletonAiohttp.get_session())