Ejemplo n.º 1
0
    def get_art(self, art):
        url_, t = self.get_artist_info(art['url'])

        _img_fd = gen_fd_from_image_url(url_)
        fd = os.path.join(self.base_dir, _img_fd)
        helper.mkdir_p(fd, True)
        os.chdir(fd)

        params = [
            {
                'title': art['title'],
                'img_url': url_.format(_ + 1),
            }
            for _ in range(t)]

        _fail_count = 0
        for pam in tqdm(params, ascii=True, desc='%10s' % _img_fd, leave=False):
            rs = self.download_and_save(pam)
            if rs == 1:
                _fail_count += 1
                time.sleep(0.5)
            elif rs == 3:
                time.sleep(1.5)
            else:
                # 如果本地文件已存在, 则不进行等待
                time.sleep(0.0001)
            if _fail_count > 5:
                log.warn('skip({}) => ({})'.format(_img_fd, url_))
                break
Ejemplo n.º 2
0
    def download_image(self, params):
        """
            下载相应图片
        :param params:
        :type params: dict
        :return:
        :rtype:
        """
        img_url, title = params.get('img_url'), params.get('title')

        filename = '{}'.format(img_url.split('/')[-1])
        filename = filename.split('.')[0].zfill(2)

        _img_fd = gen_fd_from_image_url(img_url)
        _rel_pth = '{}/{}.jpg'.format(_img_fd, filename)
        fpth = '{}/{}/{}-{}.jpg'.format(self.base_dir, _img_fd, title, filename)

        if helper.is_file_ok(fpth):
            return

        if os.path.exists(fpth):
            log.debug('RETRY: ({})'.format(_rel_pth))
        else:
            helper.mkdir_p(fpth)

        img = self.crawl(img_url)
        if not img:
            return
        with open(fpth, 'wb') as f:
            f.write(img)
Ejemplo n.º 3
0
    def spawn(self, url, force_spawn=False):
        """use the url for creation of domain and fetch cookies

        - init cache dir by the url domain as ``<base>/domain``
        - save the cookies to file ``<base>/domain/cookie.txt``
        - init ``headers.get/post/json`` with response info
        - init ``site_dir/site_raw/site_media``

        :param url:
        :type url:
        :param force_spawn:
        :type force_spawn:
        :return:
        :rtype:
        """
        _url, domain = self.get_domain_home_from_url(url)
        if not _url:
            return False

        self.cache['site_dir'] = os.path.join(self.cache['base'], self.domain)
        for k in ['raw', 'media']:
            self.cache['site_' + k] = os.path.join(self.cache['site_dir'], k)
            helper.mkdir_p(self.cache['site_' + k], True)

        ck_pth = os.path.join(self.cache['site_dir'], 'cookie.txt')
        helper.mkdir_p(ck_pth)

        name = os.path.join(self.cache['site_raw'], 'homepage')
        # not force spawn and file ok
        if not force_spawn and helper.is_file_ok(name):
            # zlog.debug('{} exist!'.format(name))
            self.sess.cookies = self.load_cookies(ck_pth)
            return True
        else:
            zlog.debug('{} not exist!'.format(name))

        res = self.sess.get(url, headers=self.__header__)
        if res.status_code != 200:
            return False
        if res:
            helper.write_file(res.content, name)
        # self.load(url)

        for k, v in self.headers.items():
            self.headers[k] = res.request.headers

        self.dump_cookies(cookies=self.sess.cookies, save_to=ck_pth)

        return True
Ejemplo n.º 4
0
        def down(lec, which_type):
            """"""
            if which_type not in ['mp3', 'doc']:
                log.warn('Not Supported of ({})'.format(which_type))
                return

            save_name = os.path.join(self.catalog_info['base_pth'], which_type,
                                     '{}.{}'.format(lec['name'], which_type))
            helper.mkdir_p(save_name)
            if helper.is_file_ok(save_name):
                print()
                log.debug('SKIP {}'.format(save_name))
                return
            # 默认等待2s, 如果是下载 mp3 随机等待
            rd = 2
            if which_type == 'mp3':
                rd = abc.randint(3, 6)

            url = lec[which_type]
            log.debug('[WAIT] {}s for ({}:{})...'.format(rd, save_name, url))
            time.sleep(rd)
            wget.download(url, out=save_name)
Ejemplo n.º 5
0
    def download_by_index(self, index):
        global cache_index
        global finished

        _e_list1 = ['33201', '34949', '35850', '45364', '47526']

        if not index:
            log.info('no index found...')
            return

        dat = self.get_page_by_index(index)
        if not dat:
            log.error('fail@none: {}'.format(index))
            return

        try:
            # 扩展名, 文件夹名, 自增名, 图片固定url
            _ext = dat['img_src'].split('.')[-1]
            _name_off = 3 + len(_ext)
            if index in _e_list1:
                _name_off = 4 + len(_ext)
            img = dat['img_src'][:-_name_off]

            if index == '54856':
                _name_pre = ''
                fd_img = 'a'
            elif index in _e_list1:
                fd_img, _ = get_fd_name(dat['img_src'])
                _name_pre = ''
            else:
                _name_pre = img.split('/')[-1]
                fd_img = img[-1]
            _path_local = os.path.join(dat['time'], fd_img)

            fd = os.path.join(self.base_dir, _path_local)
            helper.mkdir_p(fd, True)
            os.chdir(fd)

            if index in _e_list1:
                _img_fmt = '{}{}1.{}'
            else:
                _img_fmt = '{}{}.{}'

            params = [
                {
                    'img_url': _img_fmt.format(img, str(x + 1).zfill(2), _ext),
                    'title': '{}-{}{}.{}'.format(
                        dat['name'].decode(),
                        _name_pre,
                        str(x + 1).zfill(2),
                        _ext,
                    )
                }
                for x in range(dat['total'])
            ]

            _fail_count = 0
            for para in tqdm(params, ascii=True, desc='%8s ✈ %10s' % (index, _path_local)):
                rs = self.download_and_save(para)
                if rs == self.save_status['fail']:
                    _fail_count += 1
                    time.sleep(0.5)
                elif rs == self.save_status['ok']:
                    time.sleep(1.5)
                elif rs == self.save_status['skip']:
                    # 如果本地文件已存在, 则不进行等待
                    time.sleep(0.0001)

                if _fail_count > 5:
                    log.warn('fail@5 img of this, skip({}) => ({})'.format(index, _path_local))
                    break

            cache_index += 1
            finished.append(index)
            helper.write_file(json.dumps(finished), '/tmp/mz.done')
            log.warn('Done:({}/{})'.format(cache_index, index))
        except TypeError as _:
            log.error('fail@type: {}'.format(index))
Ejemplo n.º 6
0
 def _spawn(self):
     self.ac.headers['post'] = SonimeiHeaders.post
     self.ac.headers['Host'] = self.ac.domain
     helper.mkdir_p(self.music_save_dir, is_dir=True)