Example #1
0
    def insert(data):
        post_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        # content = data['content'].replace("'", "\'")
        content = MySQLdb.escape_string(data['content'])
        post_name = data['title']

        # 插入的sql
        insert_sql = "insert into wp_posts(post_author, post_date, post_date_gmt, " \
                                 " post_excerpt, to_ping, pinged, post_content_filtered, " \
                                 " post_title, post_content, post_status,comment_status, ping_status, " \
                                 " post_name, post_type, from_type, from_url, from_ctime)" \
                                 " values('1', '{post_date}', '{post_date_gmt}'," \
                                 "  '', '', '', '', " \
                                 " '{title}', '{content}', 'publish', 'open', 'open', " \
                                 " '{post_name}', 'post', '{from_type}', '{from_url}', '{from_ctime}')" \
            .format(post_author=1, post_date=post_date, post_date_gmt=post_date,
                    title=data['title'], content=content,
                    post_name=post_name, from_type=data['type'], from_url=data['url'],
                    from_ctime=data['send_time'])

        # 打印sql
        # service_logger.log(insert_sql)

        res = SqlService.api(insert_sql, 'execute')
        if res is not None:
            # 插入浏览数
            views_count = random.randint(1, 80)
            insert_meta_sql = "insert into wp_postmeta(post_id, meta_key, meta_value) value (%s, 'post_views_count', '%s')" % (
                res, views_count)
            service_logger.log(insert_meta_sql)
            SqlService.api(insert_meta_sql, 'execute')

            return res

        return False
Example #2
0
    def insert_meta(post_id, attachment_id):
        insert_meta_sql = "insert into wp_postmeta(post_id, meta_key, meta_value) value (%s, '_thumbnail_id', '%s')" % (
            post_id, attachment_id)
        service_logger.log(insert_meta_sql)
        res = SqlService.api(insert_meta_sql, 'execute')
        if res is not None:
            return res

        return False
Example #3
0
    def handle(self):
        service_logger.log(self.url)

        try:
            self._handle()
        except Exception, err:
            service_logger.error("task-exception", {
                "msg": traceback.format_exc(),
                "url": self.url
            })
Example #4
0
    def _others(self):
        others = {
            "name": "",
            "name_cn": "",
            "year": "",
            "country": "",
            "language": "",
            "font": "",
            "release_date": "",
            "score": "",
            "file_size": "",
            "movie_duration": "",
            "director": "",
            "actors": "",
        }

        fields = {
            'name_cn': '◎译  名(.*?)<br />',
            'name': '◎片  名(.*?)<br />',
            'year': '◎年  代(.*?)<br />',
            'country': '◎(产  地|国  家)(.*?)<br />',
            'category': '◎类  别(.*?)<br />',
            'language': '◎语  言(.*?)<br />',
            'font': '◎字  幕(.*?)<br />',
            'release_date': '◎上映日期(.*?)<br />',
            'score': '◎(IMDB评分|豆瓣评分)(.*?)<br />',
            'file_size': '◎文件大小(.*?)<br />',
            'movie_duration': '◎片  长(.*?)<br />',
            'director': '◎导  演(.*?)<br />',
            'actors': '◎主  演(.*?)<br />',
        }

        for key, regex in fields.items():
            try:
                resu = re.findall(regex, self.html, re.S)
                if len(resu) > 0:
                    if type(resu[0]).__name__ == 'tuple':
                        value = resu[0][1]
                    else:
                        value = resu[0]
                    # 评分
                    if key == 'score' and '/' in value:
                        value = value.split('/')[0]
                    value = value.replace('&nbsp;', '')
                    value = value.replace("\s", '')
                    value = value.replace(" ", '')
                    value = value.strip()
                    others[key] = value

            except Exception as e:
                service_logger.log(key + ':' + regex)
                service_logger.log('except:' + repr(e))

        return others
Example #5
0
    def insert(object_id, term_taxonomy_id):
        # 插入的sql
        insert_sql = "insert into wp_term_relationships(object_id, term_taxonomy_id) values('{object_id}', '{term_taxonomy_id}')" \
                     .format(object_id=object_id, term_taxonomy_id=term_taxonomy_id)

        service_logger.log(insert_sql)

        res = SqlService.api(insert_sql, 'execute')
        if res is not None:
            return True

        return False
Example #6
0
    def insert(term_id, taxonomy='post_tag'):
        # taxonomy=post_tag,category
        # 插入的sql
        insert_sql = "insert into wp_term_taxonomy(term_id, taxonomy, description) values('{term_id}', '{taxonomy}', '')" \
                                    .format(term_id=term_id, taxonomy=taxonomy)

        service_logger.log(insert_sql)

        res = SqlService.api(insert_sql, 'execute')
        if res is not None:
            return res

        return False
Example #7
0
    def insert(name):
        slug = urllib.quote(name.encode('utf8'))

        # 插入的sql
        insert_sql = "insert into wp_terms(name, slug) values('{name}', '{slug}')" \
                     .format(name=name, slug=slug)

        service_logger.log(insert_sql)

        res = SqlService.api(insert_sql, 'execute')
        if res is not None:
            return res

        return False
Example #8
0
    def insert_video(data):
        post_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        content = MySQLdb.escape_string(data['content'])
        post_name = MySQLdb.escape_string(data['title'])
        movies_name = MySQLdb.escape_string(data['others']['name'])
        alias_name = MySQLdb.escape_string(data['others']['name_cn'])
        score = data['others']['score']
        if score == '':
            score = 0
        else:
            score = float(score)

        # 插入的sql
        insert_sql = "insert into wp_posts(post_author, post_date, post_date_gmt, " \
                                 " post_excerpt, to_ping, pinged, post_content_filtered, " \
                                 " post_title, post_content, post_status,comment_status, ping_status, " \
                                 " post_name, post_type, from_type, from_url, from_ctime," \
                                 " year, director, movie_duration, file_size, show_font," \
                                 " score, movies_name, alias_name, language, country, actors)" \
                                 " values('1', '{post_date}', '{post_date_gmt}'," \
                                 " '', '', '', '', " \
                                 " '{title}', '{content}', 'publish', 'open', 'open', " \
                                 " '{post_name}', 'post', '{from_type}', '{from_url}', '{from_ctime}', " \
                                 " '{year}', '{director}', '{movie_duration}', '{file_size}', '{show_font}', " \
                                 " '{score}', '{movies_name}', '{alias_name}', '{language}', '{country}', '{actors}')" \
            .format(post_author=1, post_date=post_date, post_date_gmt=post_date,
                    title=data['title'], content=content,
                    post_name=post_name, from_type=data['type'], from_url=data['url'], from_ctime=data['send_time'],
                    year=int(data['others']['year']), director=data['others']['director'], movie_duration=data['others']['movie_duration'], file_size=data['others']['file_size'],
                    show_font=data['others']['font'], score=score,  movies_name=movies_name, alias_name=alias_name,
                    language=data['others']['language'], country=data['others']['country'], actors=data['others']['actors'])

        # 打印sql
        # service_logger.log(insert_sql)

        res = SqlService.api(insert_sql, 'execute')
        if res is not None:
            # 插入浏览数
            views_count = random.randint(1, 80)
            insert_meta_sql = "insert into wp_postmeta(post_id, meta_key, meta_value) value (%s, 'post_views_count', '%s')" % (
                res, views_count)
            service_logger.log(insert_meta_sql)
            SqlService.api(insert_meta_sql, 'execute')

            return res

        return False
Example #9
0
    def insert(data):
        create_ts = int(time.time())
        # 插入的sql
        insert_sql = "insert into wp_article(type, parent, category, " \
                                 " title, content, tags, " \
                                 " image, send_time, url, create_time)" \
                                 " values('{type}', '{parent}', '{category}', '{title}', '{content}', '{tags}', " \
                                 " '{image}', '{send_time}', '{url}', {create_time})" \
            .format(type=data['type'], parent=data['parent'], category=data['category'],
                    title=data['title'], content=data['content'], tags=data['tags'],
                    image=data['image'], send_time=data['send_time'], url=data['url'],
                    create_time=create_ts)

        # 打印sql
        service_logger.log(insert_sql)

        res = SqlService.api(insert_sql, 'execute')
        if res is not None:
            return True

        return False
Example #10
0
    def get_douban_image(name, w=480, h=320):
        image = ''
        url = 'https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=baidu&wd=' + name
        html = get_url_html(url)

        doc = pq(html)
        tables = doc('.c-container').items()
        i = 0
        for tb in tables:
            i = i + 1
            txt = pq(tb)
            title = txt.text()
            imgObj = txt('img')
            if name in title:
                image = imgObj.attr('src')
                break
            if i > 8:
                break

        if image != '':
            service_logger.log('百度搜索图片:' + image)
            image = ImportService.upload_image(image, iscut=False, w=w, h=h)

        return image
Example #11
0
    def insert_image(image, post_id, from_type, from_url):
        post_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        content = ''
        post_name = 'origin_' + time.strftime("%Y%m%d%H%M%S",
                                              time.localtime()) + str(
                                                  random.randint(10000, 99999))
        guid = Config.BASE_URL + 'wp-content/uploads/' + image
        mime_type = 'image/' + guid.split('.')[-1]

        # 插入的sql
        insert_sql = "insert into wp_posts(post_author, post_date, post_date_gmt, " \
                                 " post_excerpt, to_ping, pinged, post_content_filtered, " \
                                 " post_title, post_content, post_status,comment_status, ping_status, " \
                                 " post_name, post_type, from_type, from_url, " \
                                 " post_parent, guid, post_mime_type)" \
                                 " values('1', '{post_date}', '{post_date_gmt}'," \
                                 "  '', '', '', '', " \
                                 " '{title}', '{content}', 'inherit', 'open', 'closed', " \
                                 " '{post_name}', 'attachment', '{from_type}', '{from_url}', " \
                                 " '{post_parent}', '{guid}', '{mime_type}')" \
                        .format(post_author=1, post_date=post_date, post_date_gmt=post_date,
                                    title=post_name, content=content,
                                    post_name=post_name, from_type=from_type, from_url=from_url,
                                    post_parent=post_id, guid=guid, mime_type=mime_type)

        # 打印sql
        # service_logger.log(insert_sql)
        res = SqlService.api(insert_sql, 'execute')
        if res is not None:
            insert_meta1_sql = "insert into wp_postmeta(post_id, meta_key, meta_value) value (%s, '_wp_attached_file', '%s')" % (
                res, image)
            service_logger.log(insert_meta1_sql)
            SqlService.api(insert_meta1_sql, 'execute')

            img = Image.open(Config.IMAGE_PATH + '/' + image)
            imo = {
                "width": img.size[0],
                "height": img.size[1],
                "file": image,
                "sizes": []
            }
            insert_meta2_sql = "insert into wp_postmeta(post_id, meta_key, meta_value) value (%s, '_wp_attachment_metadata', '%s')" % (
                res, phpserialize.dumps(imo))
            service_logger.log(insert_meta2_sql)
            SqlService.api(insert_meta2_sql, 'execute')

            insert_meta3_sql = "insert into wp_postmeta(post_id, meta_key, meta_value) value (%s, '_thumbnail_id', '%s')" % (
                post_id, res)
            service_logger.log(insert_meta3_sql)
            SqlService.api(insert_meta3_sql, 'execute')

            return res

        return False
Example #12
0
    def __init__(self, url=None):
        service_logger.log('#########开始抓取网页########')
        self.url = url

        self.handle()
Example #13
0
    def insert_handle(data, type='article'):
        cates = [
            '技术', 'it', 'IT', 'php', 'python', 'nginx', 'java', 'jquery', 'js',
            '前端'
        ]
        if data['parent'] in cates:
            data['parent'] = '技术'

        if data['parent'] == '其他':
            data['parent'] = '其它'

        # 插入post数据
        if type == 'video':
            width = 480
            height = 320
        else:
            width = 300
            height = 200

        # 下载图片
        image = ''
        if data['image'] != '':
            image = ImportService.upload_image(data['image'],
                                               iscut=True,
                                               w=width,
                                               h=height)
            if type == 'video' and image == '':
                # image = '2018/11/carousel_bg-e1542977701970.png'
                service_logger.log('图片下载失败:' + data['image'])
                # 豆瓣网站下载图片
                image = ImportService.get_douban_image(data['others']['name'],
                                                       w=width,
                                                       h=height)
        else:
            # 豆瓣网站下载图片
            image = ImportService.get_douban_image(data['others']['name'],
                                                   w=width,
                                                   h=height)

        if type == 'video' and image == '':
            service_logger.log('video图片无法下载:' + data['image'])
            return False

        # 插入post数据
        if type == 'video':
            ID = PostsModel.insert_video(data)
        else:
            ID = PostsModel.insert(data)
        # 结果
        if ID is False:
            service_logger.log('插入失败:' + data['url'])
            return False

        # 插入图片
        if image != '':
            PostsModel.insert_image(image, ID, data['type'], data['url'])

        # 检查分类是否存在
        cate = TermsModel.get(data['parent'], 'category')
        if cate is False:
            cate = {}
            term_id = TermsModel.insert(data['parent'])
            if term_id:
                cate['term_id'] = term_id
                # 插入分类同步记录
                cate['term_taxonomy_id'] = TermTaxonomyModel.insert(
                    cate['term_id'], 'category')

        # 将文章关联分类
        if 'term_taxonomy_id' in cate:
            TermRelationshipsModel.insert(ID, cate['term_taxonomy_id'])
            # 更新统计数据
            TermTaxonomyModel.update_count(cate['term_taxonomy_id'])

        # 检查标签是否存在
        if data['tags'] != '':
            tags = data['tags'].split(',')
            for tag in tags:
                tag = tag.strip()
                resu = TermsModel.get(tag, 'post_tag')
                if resu is False:
                    resu = {}
                    resu['term_id'] = TermsModel.insert(tag)
                    # 插入分类同步记录
                    resu['term_taxonomy_id'] = TermTaxonomyModel.insert(
                        resu['term_id'], 'post_tag')

                # 将文章关联分类
                if 'term_taxonomy_id' in resu:
                    TermRelationshipsModel.insert(ID, resu['term_taxonomy_id'])
                    # 更新统计数据
                    TermTaxonomyModel.update_count(resu['term_taxonomy_id'])

        return True