Beispiel #1
0
class CriticsPipeline:
    def open_spider(self, spider):
        self.mysql_server = Mysql_server()

    def process_item(self, item, spider):
        cursor = self.mysql_server.get_cursor()
        params = (
            item['critic_id'],
            item['critic_url'],
            item['age'],
            item['sex'],
            item['product_review'],
            item['store_reviews'],
            item['product_bad_review'],
        )
        sql = f"""insert into critics (id, critic_id, critic_url, age, sex, product_review, store_reviews, 
                    product_bad_review) values(0, %s, %s, %s, %s, %s, %s, %s)"""
        cursor.execute(sql, params)
        self.mysql_server.conn.commit()

        update_sql = f"""update reviews set state=4 where id=%s"""
        cursor.execute(update_sql, (item['id'], ))
        self.mysql_server.conn.commit()
        print('{}写入成功'.format(item['critic_url']))

    def close_spider(self, spider):
        self.mysql_server.close()
Beispiel #2
0
 def __init__(self, table, fromLang='auto', toLang='zh'):
     env_dist = os.environ
     self.table = table
     self.toLang = toLang
     self.fromLang = fromLang
     self.mysql = Mysql_server()
     self.cursor = self.mysql.get_cursor()
     self.appid = env_dist.get('baidufanyi_appid')  # 填写你的appid
     self.secretKey = env_dist.get('baidufanyi_secretKey')  # 填写你的密钥
Beispiel #3
0
 def start_requests(self):
     mysql_server = Mysql_server()
     cursor = mysql_server.get_cursor()
     cursor.execute(
         f"select id, critic_url from reviews where state=2 limit 100")
     task_list = cursor.fetchall()
     for task in task_list:
         params = (task[0], )
         update_sql = f"""update reviews set state=3 where id=%s"""
         cursor.execute(update_sql, params)
     mysql_server.conn.commit()
     for task in task_list:
         if not task[1]:
             update_sql = f"""update reviews set state=4 where id=%s"""
             cursor.execute(update_sql, task[0])
             mysql_server.conn.commit()
             continue
         meta = {'id': task[0], 'critic_url': task[1]}
         yield scrapy.Request(url=task[1],
                              callback=self.parse,
                              meta=meta,
                              dont_filter=True)
Beispiel #4
0
class ReviewsPipeline:
    def open_spider(self, spider):
        self.mysql_server = Mysql_server()

    def process_item(self, item, spider):
        cursor = self.mysql_server.get_cursor()
        params = (
            item['product_url'],
            item['review_url'],
            item['review_title'],
            item['review_body'],
            item['review_time'],
            item['review_raiting'],
            item['critic_url'],
        )
        sql = f"""insert into reviews (id, product_url, review_url, review_title, review_body, review_time, 
                    review_raiting, critic_url) values(0, %s, %s, %s, %s, %s, %s, %s)"""
        cursor.execute(sql, params)
        self.mysql_server.conn.commit()
        print('{}写入成功'.format(item['review_title']))

    def close_spider(self, spider):
        self.mysql_server.close()
Beispiel #5
0
 def open_spider(self, spider):
     self.mysql_server = Mysql_server()
Beispiel #6
0
class TranslateScript(object):
    def __init__(self, table, fromLang='auto', toLang='zh'):
        env_dist = os.environ
        self.table = table
        self.toLang = toLang
        self.fromLang = fromLang
        self.mysql = Mysql_server()
        self.cursor = self.mysql.get_cursor()
        self.appid = env_dist.get('baidufanyi_appid')  # 填写你的appid
        self.secretKey = env_dist.get('baidufanyi_secretKey')  # 填写你的密钥

    def translate(self, q='/'):
        httpClient = None
        toLang = self.toLang
        fromLang = self.fromLang
        myurl = '/api/trans/vip/translate'
        salt = random.randint(32768, 65536)
        sign = self.appid + str(q) + str(salt) + self.secretKey
        sign = hashlib.md5(sign.encode()).hexdigest()
        myurl = myurl + '?appid=' + self.appid + '&q=' + urllib.parse.quote(
            q) + '&from=' + fromLang + '&to=' + toLang + '&salt=' + str(
            salt) + '&sign=' + sign
        try:
            httpClient = http.client.HTTPConnection('api.fanyi.baidu.com')
            httpClient.request('GET', myurl)
            # response是HTTPResponse对象
            response = httpClient.getresponse()
            result_all = response.read().decode("utf-8")
            result = json.loads(result_all)
            return result.get('trans_result')[0].get('dst')
        except Exception as e:
            print(e)
        finally:
            if httpClient:
                httpClient.close()

    def get_data(self):
        select_sql = "select id, review_title, review_body from {} where state=0 limit 10".format(self.table)
        self.cursor.execute(select_sql)
        data = self.cursor.fetchall()
        for record in data:
            parmas = (record[0])
            update_sql = "update {} set state=1 where id=%s".format(self.table)
            self.cursor.execute(update_sql, parmas)
        self.mysql.conn.commit()
        return data

    def update_data(self, item):
        params = (item['translate_review_title'], item['translate_review_body'], item['id'])
        update_sql = "update {} set translate_review_title=%s, translate_review_body=%s, state=2 where id=%s".format(
            self.table)
        self.cursor.execute(update_sql, params)
        self.mysql.conn.commit()
        print('{}更新成功'.format(item['id']))

    def close_link(self):
        self.mysql.conn.close()

    def start(self):
        data = self.get_data()
        for record in data:
            translate_review_title = ''
            translate_review_body = ''
            if record[2]:
                translate_review_body = self.translate(q=record[2]) or ''
                time.sleep(2)
            if record[1]:
                translate_review_title = self.translate(q=record[1]) or ''
                time.sleep(1)
            item = {
                'id': record[0],
                'translate_review_title': translate_review_title,
                'translate_review_body': translate_review_body,
            }
            self.update_data(item)
        self.close_link()