Ejemplos de Doraemon.readNewImageIds en Python

Lenguaje de programación: Python

Namespace/Package Name: middlewares.doraemonMiddleware

Clase / Tipo: Doraemon

Método / Función: readNewImageIds

Ejemplos en hotexamples.com: 2

Python Doraemon.readNewImageIds - 2 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de middlewares.doraemonMiddleware.Doraemon.readNewImageIds extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

createFilePath(8)

Doraemon(5)

isEmpty(4)

isExceedRestartInterval(4)

storeFinished(3)

readNewImageIds(2)

storeMongodb(2)

getAllHasSet(2)

getDateFromString(2)

hashSet(2)

storeTxt(1)

storeHtml(1)

readNewUrls(1)

isDuplicated(1)

isFileExists(1)

compressImage(1)

getFileSize(1)

downloadImage(1)

delKey(1)

delHashSet(1)

tar(1)

Ejemplo n.º 1

Mostrar archivo

class WeixinSalticidae():
    def __init__(self):
        self.settings = Settings()
        self.getSettings()
        self.file = FileIOMiddleware()
        self.doraemon = Doraemon()
        self.doraemon.createFilePath(self.work_path_prd1)
        self.doraemon.createFilePath(self.settings.LOG_PATH)
        self.doraemon.createFilePath(self.finished_img_path)

    def getSettings(self):
        settings_name = self.settings.CreateSettings('weixin')
        self.source = settings_name['SOURCE_NAME']
        self.work_path_prd1 = settings_name['WORK_PATH_PRD1']
        self.finished_img_path = settings_name['FINISHED_IMG_PATH']
        self.finished_origin_html_path = settings_name[
            'FINISHED_ORIGIN_HTML_PATH']
        self.finished_processed_html_path = settings_name[
            'FINISHED_PROCESSED_HTML_PATH']
        self.finished_content_path = settings_name['FINISHED_CONTENT_PATH']
        self.mongo = settings_name['MONGO']
        self.name = settings_name['NAME']
        self.max_pool_size = settings_name['MAX_POOL_SIZE']
        self.url_deepinews_10002_article = self.settings.URL_DEEPINEWS_10002_ARTICLE
        self.url_deepinews_10002_image = self.settings.URL_DEEPINEWS_10002_IMAGE
        self.log_path = self.settings.LOG_PATH
        self.today = self.settings.TODAY
        self.restart_path = settings_name['RESTART_PATH']
        self.restart_interval = settings_name['RESTART_INTERVAL']
        self.regx_img = re.compile('<img(.*?)/>')
        self.regx_date = re.compile(
            '<em id="publish_time" class="rich_media_meta rich_media_meta_text">(.*?)</em>'
        )
        self.regx_img_type = re.compile('data-type="(.*?)"')
        self.regx_img_data_src = re.compile('data-src="(.*?)"')
        self.regx_img_src = re.compile('src="(.*?)"')
        self.regx_img_class = re.compile('class="(.*?)"')

    def getPostFixOfImage(self, image_type):
        if image_type == 'jpeg':
            return 'jpg'
        if image_type == 'png':
            return 'png'
        if image_type == 'gif':
            return 'gif'
        else:
            print 'Other type: {0}'.format(image_type)

    def start_requests(self):
        self.file.logger(self.log_path,
                         'Start dowload images for: {0} '.format(self.name))
        print 'Start dowload images for: {0} '.format(self.name)
        new_ids = self.doraemon.readNewImageIds(
            self.doraemon.bf_finished_image_id, self.finished_content_path)
        if len(new_ids) == 0:
            self.file.logger(self.log_path,
                             'No new image id for {0}'.format(self.name))
            print 'No new image id for {0}'.format(self.name)
            return
        self.doraemon.createFilePath(self.finished_processed_html_path)
        self.doraemon.createFilePath(self.finished_img_path)
        for id in new_ids:
            print 'Start to remove pictures in: {0}'.format(id)
            html_file = self.file.readFromHtml("{0}/{1}.html".format(
                self.finished_origin_html_path, id))
            img_list = re.findall(self.regx_img, html_file)
            date_list = re.findall(self.regx_date, html_file)
            new_html = ''
            number = 0
            for old_time in date_list:
                new_date = self.doraemon.getDateFromString(old_time)
                old_time_content = '<em id="publish_time" class="rich_media_meta rich_media_meta_text">{0}</em>'.format(
                    old_time)
                new_time_content = '<em id="publish_time" class="rich_media_meta rich_media_meta_text">{0}</em>'.format(
                    new_date)
                new_html = html_file.replace(old_time_content,
                                             new_time_content)
                html_file = new_html
            for img in img_list:
                old_img = img
                image_id = "{0}_{1}".format(id, number)
                image_data_src = ''.join(
                    re.findall(self.regx_img_data_src, img)).strip()
                image_src = re.findall(self.regx_img_src, img)
                image_type = ''.join(re.findall(self.regx_img_type,
                                                img)).strip()
                image_post_fix = self.getPostFixOfImage(image_type)
                if (self.doraemon.isEmpty(image_data_src) is True) or \
                   (self.doraemon.isEmpty(image_src) is True) or \
                   (self.doraemon.isEmpty(image_type) is True):
                    continue
                origin_image_path = "{0}/{1}.{2}".format(
                    self.finished_img_path, image_id, image_post_fix)
                print 'Start to download image: {0}'.format(image_data_src)
                self.doraemon.downloadImage(image_data_src, origin_image_path)
                image_size = self.doraemon.getFileSize(origin_image_path)
                if image_size > 60:
                    print 'Start to compress image: {0}'.format(image_data_src)
                    self.doraemon.compressImage(origin_image_path,
                                                origin_image_path, 2)
                    print 'Finished to compress image: {0}'.format(
                        image_data_src)
                print 'Finished to download image: {0}'.format(image_data_src)
                print 'Start to replace image url: {0}'.format(image_id)
                new_imgurl = "{0}{1}.{2}".format(
                    self.url_deepinews_10002_image, image_id, image_post_fix)
                # new_imgurl = '/home/dev/Data/rsyncData/prd4/weixin/img/{0}.{1}'.format(image_id, image_post_fix)
                src_list = re.findall(self.regx_img_src, img)
                img_class_list = re.findall(self.regx_img_class, img)
                for img_class in img_class_list:
                    new_img = img.replace(img_class, 'rich_pages')
                    img = new_img
                for src in src_list:
                    new_img = img.replace(src, new_imgurl)
                    img = new_img
                new_html = html_file.replace(old_img, img)
                html_file = new_html
                print 'Finished to replace image url: {0}'.format(image_id)
                number += 1
            self.doraemon.storeHtml(id, new_html,
                                    self.finished_processed_html_path)
            self.doraemon.storeFinished(self.doraemon.bf_finished_image_id, id)

Ejemplo n.º 2

Mostrar archivo

Archivo: transfer_files_2_production.py Proyecto: hulu7/news

class TransferToProduction():
    def __init__(self):
        self.settings = Settings()
        self.getSettings()
        self.file = FileIOMiddleware()
        self.request = RequestsMiddleware()
        self.doraemon = Doraemon()
        self.doraemon.createFilePath(self.work_path_prd2)
        self.doraemon.createFilePath(self.settings.LOG_PATH)
        self.doraemon.createFilePath(self.temp_folder_html)
        self.doraemon.createFilePath(self.temp_folder_img)

    def getSettings(self):
        settings_name = self.settings.CreateSettings('weixin')
        self.source = settings_name['SOURCE_NAME']
        self.work_path_prd2 = settings_name['WORK_PATH_PRD2']
        self.mongo = settings_name['MONGO_URLS']
        self.name = settings_name['NAME']
        self.finished_content_path = settings_name['FINISHED_CONTENT_PATH']
        self.finished_img_path = settings_name['FINISHED_IMG_PATH']
        self.finished_processed_html_path = settings_name[
            'FINISHED_PROCESSED_HTML_PATH']
        self.temp_folder_html = self.settings.TEMP_FOLDER_HTML
        self.temp_folder_img = self.settings.TEMP_FOLDER_IMG
        self.log_path = self.settings.LOG_PATH
        self.today = self.settings.TODAY

    def start_transfer(self):
        print 'Start {0} transfer'.format(self.name)
        new_ids = self.doraemon.readNewImageIds(
            self.doraemon.bf_finished_temp_weixin, self.finished_content_path)
        for id in new_ids:
            self.file.logger(self.log_path,
                             'Start transfer image: {0}'.format(id))
            regx_img_file = re.compile(id)
            for f in os.listdir(self.finished_img_path):
                isValidImage = regx_img_file.match(f)
                if isValidImage is None:
                    print 'Invalid image for not match: {0}'.format(f)
                    continue
                from_img_path = "{0}/{1}".format(self.finished_img_path, f)
                to_img_path = "{0}/{1}".format(self.temp_folder_img, f)
                is_from_path_exists = os.path.exists(from_img_path)
                if is_from_path_exists is False:
                    self.file.logger(self.log_path,
                                     'img of {0} not exits.'.format(f))
                    continue
                copyfile(from_img_path, to_img_path)
                print 'Finished to transfer image {0}'.format(f)
            self.file.logger(self.log_path,
                             'Start transfer html: {0}'.format(id))
            from_path = "{0}/{1}.html".format(
                self.finished_processed_html_path, id)
            to_path = "{0}/{1}.html".format(self.temp_folder_html, id)
            is_from_path_exists = os.path.exists(from_path)
            if is_from_path_exists is False:
                self.file.logger(self.log_path,
                                 'html of {0} not exits.'.format(id))
                continue
            copyfile(from_path, to_path)
            print 'Finished to transfer html {0}'.format(id)
            self.doraemon.storeFinished(self.doraemon.bf_finished_temp_weixin,
                                        id)
            print 'Finished to transfer {0}'.format(id)