예제 #1
0
 def query(self, sql):
     cursor = self.get_cursor()
     try:
         cursor.execute(sql, None)
         result = cursor.fetchall()
     except Exception as e:
         log.error("mysql query error: %s", e)
         return None
     finally:
         cursor.close()
     return result
예제 #2
0
 def executemany(self, sql, params=None):
     cursor = self.get_cursor()
     try:
         cursor.executemany(sql, params)
         self.conn.commit()
         affected_rows = cursor.rowcount
     except Exception as e:
         log.error("mysql executemany error: %s", e)
         return 0
     finally:
         cursor.close()
     return affected_rows
예제 #3
0
    def _craw(self, url, param=None, *args):
        res = requests.post(url, json.dumps(param), headers=header)
        if res.status_code == 200:
            like_total = args[0]  # 至少喜欢的数量
            # juejin response
            body_json = res.json()
            print(body_json)
            if body_json['data'] is None:
                log.error("爬取掘金失败" + body_json['errors'])
                return
            article_list = body_json['data']['articleFeed']['items']['edges']

            res_list = []
            for artiCol in article_list:

                arti = artiCol['node']

                data = third_post_db.find_by_pt_id(
                    arti['id'], self.third_id)

                if data is None and arti['likeCount'] > like_total:  # 大于30喜欢的加入
                    # 构建
                    post = ThirdPost(self.third_id, self.third_name, 0)
                    tags = []
                    for t in arti['tags']:
                        tags.append(t['title'])
                    post.tags = ",".join(tags)
                    # 顺序 文章id、标题、标签、作者、喜欢数、评论数、跳转url、创建时间
                    post.post_id = arti['id']
                    post.title = arti['title']
                    post.author = arti['user']['username']
                    post.content = arti['content']
                    post.like_num = arti['likeCount']
                    post.comment_num = arti['commentsCount']
                    post.redirect_url = arti['originalUrl']
                    post.creatime = arrow.get(
                        arti['createdAt']).format('YYYY-MM-DD HH:mm:ss')

                    res_list.append(post)
            log.info("[%s]爬取-> %s  %d条记录", self.third_name, url, len(res_list))
            self.batch_insert(res_list)