def login(): if request.method == 'GET': return render_template("login.html") if request.method == 'POST': ''' 设置session过期时间,默认1个月 session.permanent = True app.permanent_session_lifetime = timedelta(minutes=1) ''' account = request.form.get("account").lower() mp = MysqlPool() user = mp.fetch_one("select * from tb_user where account=%s", account) if user: if pyUtils.getMd5(request.form.get("pwd")) == user['password']: if user['status'] == 3: flash("帐号已停用,请联系管理员") elif user['status'] == 2: flash("帐号待审核,请联系管理员") else: session['user'] = user mp = MysqlPool() sql = "update tb_user set login_time=now() where id=%s" mp.update(sql, user['id']) if user['level'] == 1: return redirect('/index') elif user['level'] == 2: return redirect('/review/reviewList') else: flash("密码错误") else: flash("帐号未注册") return redirect(url_for("login"))
def deleteComment(): data = request.get_data() json_data = json.loads(data.decode("utf-8")) mp = MysqlPool() sql = "delete from tb_comment where id=%s" mp.update(sql, json_data.get('id')) res_json = {"code": "0000"} return jsonify(res_json)
def updateTaskDiscount(): data = request.get_data() json_data = json.loads(data.decode("utf-8")) sql = "update tb_review_task set discount=%s where id=%s" param = [json_data.get('discount'), json_data.get('id')] mp = MysqlPool() mp.update(sql, param) res_json = {"code": "0000", "msg": "折扣修改成功!"} return jsonify(res_json)
def updateAsinStatus(): data = request.get_data() json_data = json.loads(data.decode("utf-8")) sql = "update tb_task_asin set status=%s where id=%s" param = [json_data.get('status'), json_data.get('id')] mp = MysqlPool() mp.update(sql, param) res_json = {"code": "0000", "msg": "上架状态修改成功!"} return jsonify(res_json)
def updateUser(): data = request.get_data() json_data = [] if data: json_data = json.loads(data.decode("utf-8")) mp = MysqlPool() sql = "update tb_user set status=%s where id=%s" param = [json_data.get('status'), json_data.get('id')] mp.update(sql, param) res_json = {"code": "0000", "msg": "修改成功"} return jsonify(res_json)
def updateTrackStatus(): data = request.get_data() json_data = json.loads(data.decode("utf-8")) status = json_data.get('status') sql = "update tb_amz_track_pro set status=%s where id=%s" if status == 1: update_status = 0 else: update_status = 1 param = [update_status, json_data.get('id')] mp = MysqlPool() mp.update(sql, param) res_json = {"code": "0000", "msg": "状态修改成功!"} return jsonify(res_json)
def addUser(): data = request.get_data() json_data = [] if data: json_data = json.loads(data.decode("utf-8")) mp = MysqlPool() sql = "insert into tb_user(account, password, nickname, level, status, reg_time) values (%s,%s,%s,%s,1,now())" param = [ json_data.get('account'), pyUtils.getMd5(json_data.get("password")), json_data.get('nickname'), json_data.get('level') ] msg = "用户添加成功" try: mp.update(sql, param) except pymysql.err.IntegrityError: msg = "添加失败,用户名已存在" except Exception as e: msg = "添加失败,%s" % e res_json = {"code": "0000", "msg": msg} return jsonify(res_json)
def updatePassword(): data = request.get_data() json_data = [] if data: json_data = json.loads(data.decode("utf-8")) mp = MysqlPool() find_sql = "select * from tb_user where password=%s and id=%s" find_param = [ pyUtils.getMd5(json_data.get('old_psw')), session.get('user')['id'] ] find_req = mp.fetch_all(find_sql, find_param) if len(find_req) == 1: sql = "update tb_user set password=%s where id=%s" param = [ pyUtils.getMd5(json_data.get('new_psw')), session.get('user')['id'] ] mp.update(sql, param) res_json = {"code": "0000", "msg": "修改成功"} else: res_json = {"code": "9999", "msg": "修改失败,原密码错误"} return jsonify(res_json)
def getBusinessName(seller_list, process_name): ua = UserAgent(verify_ssl=False).chrome ua = re.sub("Chrome/\d{2}", "Chrome/" + str(random.randint(49, 85)), ua) options = webdriver.ChromeOptions() options.add_argument("user-agent=" + ua) url = "http://ip.ipjldl.com/index.php/api/entry?method=proxyServer.tiqu_api_url&packid=0&fa=0&dt=0&groupid=0&fetch_key=&qty=1&time=1&port=1&format=json&ss=5&css=&dt=0&pro=&city=&usertype=6" options.add_argument("--start-maximized") options.add_argument("--headless") options.add_argument("--disable-gpu") options.add_argument('blink-settings=imagesEnabled=false') options.add_experimental_option('useAutomationExtension', False) options.add_experimental_option('excludeSwitches', ['enable-logging', 'enable-automation']) driver = None error_url = "" while True: asin_url = "" try: ip_data = urllib.request.urlopen(url).read() print(ip_data) json_list = list(json.loads(ip_data)['data']) ip = "%s:%s" % (json_list[0]['IP'], json_list[0]['Port']) options.add_argument(('--proxy-server=http://' + ip)) driver = webdriver.Chrome(options=options) driver.get("https://www.baidu.com") WebDriverWait(driver, 15).until( EC.visibility_of_element_located((By.ID, 'su'))) cookies = [{ 'domain': 'www.amazon.com', 'expiry': 1633407103, 'httpOnly': False, 'name': 'csm-hit', 'path': '/', 'secure': False, 'value': 'tb:s-Z135Q1Y24PZMTTNF8DDZ|1603167100870&t:1603167103192&adb:adblk_no' }, { 'domain': '.amazon.com', 'expiry': 2082787201, 'httpOnly': False, 'name': 'lc-main', 'path': '/', 'secure': False, 'value': 'en_US' }, { 'domain': '.amazon.com', 'expiry': 1634703091, 'httpOnly': False, 'name': 'session-token', 'path': '/', 'secure': True, 'value': 'fxzmNhMySgaV1gVga7nbDig972AmQGFxhFgyEZISkgU6//KEtZqCk54TxZV/ttWlmA+5gxnaUgZzFBKseUNhVdQgTHbVI7sDvNIFguqFFGDHATp9swCwfYcd3ViRzafe3d9YkzdIfga0G4kRm5SyB8MRExx3AnOc6jNxeMYPpYxuhaZX8Pe3viZFX6OK551eUxMz5vMEzje8b4ugkSCVV5OKFaJsgqL/iFHyHqnntlRSPPiPwK1eZ2gUicC09p3Q' }, { 'domain': '.amazon.com', 'expiry': 1634703109, 'httpOnly': False, 'name': 'session-id-time', 'path': '/', 'secure': False, 'value': '2082787201l' }, { 'domain': '.amazon.com', 'httpOnly': False, 'name': 'skin', 'path': '/', 'secure': False, 'value': 'noskin' }, { 'domain': '.amazon.com', 'expiry': 1634703109, 'httpOnly': False, 'name': 'ubid-main', 'path': '/', 'secure': True, 'value': '130-0463586-1564060' }, { 'domain': '.amazon.com', 'expiry': 1634703086, 'httpOnly': False, 'name': 'i18n-prefs', 'path': '/', 'secure': False, 'value': 'USD' }, { 'domain': '.amazon.com', 'expiry': 1634703109, 'httpOnly': False, 'name': 'session-id', 'path': '/', 'secure': True, 'value': '147-0153722-0121323' }] for cookie in cookies: driver.add_cookie(cookie_dict=cookie) while len(seller_list) > 1: print("---第%s个线程剩余seller_id数量%s---" % (process_name + 1, len(seller_list))) sleep(1) asin_url = "https://www.amazon.com/sp?seller=" + str( seller_list[0]['seller_id']) driver.get(asin_url) try: WebDriverWait(driver, 10).until( EC.visibility_of_element_located( (By.XPATH, '//span[text()="Business Name:"]'))) except: WebDriverWait(driver, 5).until(EC.title_contains('Page Not Found')) all_log.logger.error("%s页面未找到" % seller_list[0]['seller_id']) seller_list.pop(0) continue sleep(0.5) business_name = driver.find_element_by_xpath( '//span[text()="Business Name:"]/..').text.replace( "Business Name:", "") if business_name: update_sql = "update tb_seller_id set company=%s where seller_id=%s" update_param = [business_name, seller_list[0]['seller_id']] try: update_mp = MysqlPool() update_mp.update(update_sql, update_param) except Exception as e: all_log.logger.error("***%s入库报错%s***" % (seller_list[0]['seller_id'], e)) seller_list.pop(0) except: all_log.logger.error("***第%s个线程%s报错***" % (process_name + 1, asin_url)) if error_url == asin_url: seller_list.pop(0) else: error_url = asin_url if driver: driver.quit() continue break all_log.logger.info("---第%s个线程运行结束---" % (process_name + 1))
def getProData(): mp = MysqlPool() data_sql = "select * from amz123_keyword_left9 where status is null or status=0 order by id limit 2000" data_list = mp.fetch_all(data_sql, None) for data in data_list: os.system("taskkill /f /im chrome.exe /t") proxy = "C:\\py_file\\proxyauth\\%s" % os.listdir( "C:\\py_file\\proxyauth")[random.randint(0, 4)] # proxy = 1 all_log.logger.info("---ip=%s,keyword=%s开始采集---" % (proxy, data['keyword'])) ua = UserAgent().chrome options = webdriver.ChromeOptions() options.add_extension(proxy) options.add_argument("user-agent=" + ua) # options.add_argument("--start-maximized") # options.add_argument("--headless") options.add_argument('blink-settings=imagesEnabled=false') options.add_argument("--disable-gpu") options.add_argument("log-level=3") options.add_experimental_option('useAutomationExtension', False) options.add_experimental_option( 'excludeSwitches', ['enable-logging', 'enable-automation']) driver = webdriver.Chrome(options=options) driver.set_window_size(600, 600) cookies = [{ 'domain': 'www.amazon.com', 'expiry': 1632329890, 'httpOnly': False, 'name': 'csm-hit', 'path': '/', 'secure': False, 'value': 'tb:s-TW8A7SAQXE5512HEHN3F|1602089889292&t:1602089890223&adb:adblk_no' }, { 'domain': '.amazon.com', 'expiry': 2082787202, 'httpOnly': False, 'name': 'lc-main', 'path': '/', 'secure': False, 'value': 'en_US' }, { 'domain': '.amazon.com', 'expiry': 1633625853, 'httpOnly': False, 'name': 'session-token', 'path': '/', 'secure': True, 'value': '3QBwaC0p4MPUmPmkTggA/5KFuQV86y0YLrdo7ONa0Jj32bh7dV8URjqYgcRBuBz3ADk9Svq0h89qS1OuCpZy+uA1IYfO1TNpiYJaP6z6zHy2O/AO4FlwdTphm7+S2ahm1LBYNUTY+xDrwGQmgF8u6Dqx7nXqXJNSOkBCdVrQZ6a30LnhBpQgwinDvWxMFeKNsbK8LnDO+tARUPQiRm0va3zvb4gqiUAPSBe8RxIeunmQvASbwAR4Yc1WHotY6utU' }, { 'domain': '.amazon.com', 'expiry': 1633625894, 'httpOnly': False, 'name': 'ubid-main', 'path': '/', 'secure': True, 'value': '134-4542133-6572654' }, { 'domain': '.amazon.com', 'expiry': 1633625894, 'httpOnly': False, 'name': 'session-id-time', 'path': '/', 'secure': False, 'value': '2082787201l' }, { 'domain': '.amazon.com', 'expiry': 1633625846, 'httpOnly': False, 'name': 'i18n-prefs', 'path': '/', 'secure': False, 'value': 'USD' }, { 'domain': '.amazon.com', 'expiry': 1633625894, 'httpOnly': False, 'name': 'session-id', 'path': '/', 'secure': True, 'value': '132-8928912-9834042' }] driver.get("https://www.baidu.com") try: WebDriverWait(driver, 10).until( EC.visibility_of_element_located((By.ID, 'su'))) except: error_log.logger.error("---%s打开百度失败---" % proxy) continue for cookie in cookies: driver.add_cookie(cookie_dict=cookie) sleep(0.5) driver.get("https://www.amazon.com/s?k=" + data['keyword'] + "&ref=nb_sb_noss") try: WebDriverWait(driver, 15).until( EC.visibility_of_element_located( (By.XPATH, '//div[contains(@class,"s-main-slot")]'))) except: try: WebDriverWait(driver, 10).until( EC.visibility_of_element_located( (By.XPATH, '//h4[contains(text(),"characters you see")]'))) error_log.logger.error("***ip=%s,keyword=%s,出现验证码,结束当前采集***" % (proxy, data['keyword'])) driver.quit() continue except: pass try: WebDriverWait(driver, 10).until( EC.visibility_of_element_located( (By.XPATH, '//div[contains(@class,"s-main-slot")]'))) except: error_log.logger.error("***ip=%s,keyword=%s,页面采集错误,结束当前采集***" % (proxy, data['keyword'])) driver.quit() continue divs = driver.find_elements_by_xpath( '//div[contains(@class,"s-main-slot")]/div') try: success_num = 0 update_sql = "update amz123_keyword_left9 set status=1 where id=%s" for div in divs: asin = div.get_attribute('data-asin') if asin and str(asin).startswith("B"): try: div.find_element_by_xpath( './/div[@class="a-row a-spacing-micro"]') sponsored = "1" except: pass sponsored = "0" try: price = div.find_element_by_xpath( './/span[@data-a-color="base"]/span' ).get_attribute("innerText").replace("$", "") except: price = None try: img1 = div.find_element_by_xpath( './/img').get_attribute('src') except: img1 = None try: title = div.find_element_by_xpath( './/h2/a/span').get_attribute("innerText") except: title = None try: div.find_element_by_xpath( './/span[contains(text(),"by Amazon")]') fba = "1" except: fba = "0" try: star = div.find_element_by_xpath( './/div[@class="a-row a-size-small"]/span' ).get_attribute('aria-label').replace( " out of 5 stars", "") except: star = None try: review = div.find_element_by_xpath( './/div[@class="a-row a-size-small"]/span[2]' ).get_attribute('aria-label').replace(",", "") except: review = "0" try: if int(review) > 70: all_log.logger.info("---%s评价数为%s,跳过---" % (asin, review)) continue if float(price) > 40: all_log.logger.info("---%s价格为%s,跳过---" % (asin, price)) continue if sponsored == "1": all_log.logger.info("---%s为广告,跳过---" % asin) continue except: all_log.logger.info("---%s过滤报错,跳过---" % asin) continue pro_url = div.find_element_by_xpath( './/h2/a').get_attribute("href") js = 'window.open("' + pro_url + '")' driver.execute_script(js) driver.switch_to.window(driver.window_handles[1]) try: WebDriverWait(driver, 15).until( EC.visibility_of_element_located( (By.ID, 'bylineInfo_feature_div'))) try: brand = driver.find_element_by_xpath( '//a[@id="bylineInfo"]').text.replace( 'Brand: ', '').replace('Visit the ', '').replace('Store', '').strip() except: brand = None try: store = filter_str( driver.find_element_by_id( 'sellerProfileTriggerId').text) except: store = None try: qa = driver.find_element_by_xpath( '//*[@id="askATFLink"]/span').get_attribute( 'innerText').replace( " answered questions", "") except: qa = "0" try: seller_id = driver.find_element_by_id( 'merchantID').get_attribute("value") except: seller_id = None try: seller_num = driver.find_element_by_xpath( '//div[@id="olp-upd-new-freeshipping-threshold"]//a/span' ).text seller_num = re.findall("\((.*)\)", seller_num)[0] except: seller_num = 0 br_error_num = 0 rank_type = 0 big_rank_txt = "" big_rank = 0 mid_rank_txt = "" mid_rank = 0 small_rank_txt = "" small_rank = 0 while big_rank_txt == "": if rank_type == 1: try: big_rank_txt = driver.find_element_by_xpath( '//div[@id="detailBullets_feature_div"]/following-sibling::ul' ).get_attribute('innerText') if big_rank_txt == "": br_error_num += 1 except: br_error_num += 1 sleep(1) big_rank_txt = "" else: try: big_rank_txt = getRank(driver, 1) except: try: WebDriverWait(driver, 5).until( EC.visibility_of_element_located(( By.ID, 'detailBulletsWrapper_feature_div' ))) rank_type = 1 big_rank_txt = driver.find_element_by_xpath( '//div[@id="detailBullets_feature_div"]/following-sibling::ul' ).get_attribute('innerText') except: br_error_num += 1 sleep(1) big_rank_txt = "" if br_error_num == 3: all_log.logger.error("%s未采集到大类排名%s次" % (asin, br_error_num)) big_rank_txt = "" break if big_rank_txt != "": if rank_type == 0: big_rank_txt = re.sub("\(.*", "", big_rank_txt).strip() big_rank_list = re.findall("\d", big_rank_txt) big_rank = "" for br in big_rank_list: big_rank += br else: for br_i, br in enumerate( big_rank_txt.split("#")): rank_txt = "#" + br.strip() if br_i == 1: big_rank_txt = re.sub( "\(.*", "", rank_txt).strip() big_rank_list = re.findall( "\d", big_rank_txt) big_rank = "" for br_1 in big_rank_list: big_rank += br_1 elif br_i == 2: mid_rank_txt = rank_txt mid_rank_list = re.findall( "\d", mid_rank_txt) mid_rank = "" for mr in mid_rank_list: mid_rank += mr elif br_i == 3: small_rank_txt = rank_txt small_rank_list = re.findall( "\d", small_rank_txt) small_rank = "" for sr in small_rank_list: small_rank += sr else: big_rank = 0 if rank_type == 0: try: mid_rank_txt = getRank(driver, 2) except: mid_rank_txt = "" if mid_rank_txt != "": mid_rank_txt = re.sub("\(.*", "", mid_rank_txt).strip() mid_rank_list = re.findall("\d", mid_rank_txt) mid_rank = "" for mr in mid_rank_list: mid_rank += mr else: mid_rank = 0 try: small_rank_txt = getRank(driver, 3) except: small_rank_txt = "" if small_rank_txt != "": small_rank_txt = re.sub( "\(.*", "", small_rank_txt).strip() small_rank_list = re.findall( "\d", small_rank_txt) small_rank = "" for sr in small_rank_list: small_rank += sr else: small_rank = 0 try: put_date = driver.find_element_by_xpath( '//th[contains(text(),"Date First Available")]/following-sibling::td[1]' ).get_attribute('innerText') if put_date: put_date = datetime.strptime( put_date, '%B %d, %Y').strftime("%Y-%m-%d") except: put_date = None if big_rank == '' or int( big_rank) == 0 or int(big_rank) > 15000: all_log.logger.info("---%s大类排名为%s,跳过---" % (asin, big_rank)) driver.close() driver.switch_to.window(driver.window_handles[0]) continue img2 = '' img3 = '' img2_num = 0 img2_click_num = 0 img3_num = 0 img3_click_num = 0 while img2 == '' and img2_click_num < 40 and img2_num < 5: sleep(0.5) try: driver.find_element_by_xpath( '//div[@id="altImages"]/ul//li[@class="a-spacing-small template"]/following-sibling::li[2]' ).click() except: img2_click_num += 1 try: WebDriverWait(driver, 5).until( EC.visibility_of_element_located( (By.XPATH, '//li[contains(@class,"itemNo1")]'))) img2 = driver.find_element_by_xpath( '//li[contains(@class,"itemNo1")]//img' ).get_attribute("src") except: img2_num += 1 while img3 == '' and img3_click_num < 40 and img3_num < 5: sleep(0.5) try: driver.find_element_by_xpath( '//div[@id="altImages"]/ul//li[@class="a-spacing-small template"]/following-sibling::li[3]' ).click() except: img3_click_num += 1 try: WebDriverWait(driver, 5).until( EC.visibility_of_element_located( (By.XPATH, '//li[contains(@class,"itemNo2")]'))) img3 = driver.find_element_by_xpath( '//li[contains(@class,"itemNo2")]//img' ).get_attribute("src") except: img3_num += 1 sql = "insert into tb_amz_pro_1129(keyword,asin,img1,img2,img3,sponsored,price,title,fba,star,review,brand,store,qa,seller_id,seller_num," \ "big_rank_txt,big_rank,mid_rank_txt,mid_rank,small_rank_txt,small_rank,put_date,add_date) " \ "values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,now())" sql_param = [ data['keyword'], asin, img1, img2, img3, sponsored, price, title, fba, star, review, brand, store, qa, seller_id, seller_num, big_rank_txt, big_rank, mid_rank_txt, mid_rank, small_rank_txt, small_rank, put_date ] try: mp.insert(sql, sql_param) all_log.logger.info("-----%s(%s)入库成功-----" % (asin, data['keyword'])) success_num += 1 except IntegrityError: all_log.logger.info("-----%s(%s)已存在-----" % (asin, data['keyword'])) success_num += 1 except Exception as e: error_log.logger.error("-----%s(%s)入库失败%s-----" % (asin, data['keyword'], e)) except: traceback.print_exc() error_log.logger.error("-----%s---%s采集出错-----" % (data['keyword'], proxy)) driver.close() driver.switch_to.window(driver.window_handles[0]) mp.update(update_sql, (data['id'], )) except: traceback.print_exc() error_log.logger.error("-----%s---%s出错-----" % (data['keyword'], proxy)) finally: all_log.logger.info("---end---ip=%s,keyword=%s---" % (proxy, data['keyword'])) driver.quit()
def getTask(): mp = MysqlPool() sql = "select * from tb_post where status = 'working'" task_list = mp.fetch_all(sql, None) if task_list: content_sql = "select content from tb_comment" content_list = mp.fetch_all(content_sql, None) num_list = [] for task in task_list: num_list.append(task['id']) all_log.logger.info("-----本次执行的任务列表:%s-----" % num_list) for task in task_list: config = configparser.RawConfigParser() config.read("fb-user.ini", encoding="utf-8") for i, account in enumerate(config): if i > 1: all_log.logger.info("#####等待5秒后,下一个帐号开始执行#####") sleep(5) if i > 0: all_log.logger.info("*****帐号%s开始执行*****%s" % (account, task['id'])) if account in task['accounts']: all_log.logger.info("*****该账号已执行过此任务,跳过*****") continue acc = {} pwd = config[account]['pwd'] try: ua = config[account]['user-agent'] if not ua: ua = UserAgent().chrome except: ua = UserAgent().chrome try: cookies = config[account]['cookies'] if len(cookies) == 0: cookies = login(account, pwd, ua) except KeyError: cookies = login(account, pwd, ua) if cookies is None: continue acc['account'] = account acc['cookies'] = cookies acc['user-agent'] = ua try: last_time_str = config[account][task['group_id']] last_time = datetime.strptime(last_time_str, "%Y-%m-%d %H:%M:%S") compare_time = last_time + timedelta(days=1) if datetime.now() < compare_time: all_log.logger.info( "---帐号%s于%s已在%s发布评论---" % (account, last_time_str, task['group_id'])) continue except KeyError: pass except ValueError: pass rd_num = random.randint(0, len(content_list) - 1) content = content_list[rd_num]['content'].replace( "\\r\\n", "").strip() is_done, is_share, is_find = doComment(acc, task, content) #未找到帖子 if not is_find: break if is_done: done_num = int(task['done_num']) + 1 if int(task['nums']) > done_num: update_sql = "update tb_post set done_num=done_num+1,accounts=concat(accounts,%s),content=concat(content,%s)" if is_share: update_sql += ",done_share=done_share+1" update_sql += " where id=%s" update_param = [ account + "|", content + "|", task['id'] ] mp.update(update_sql, update_param) else: update_sql = "update tb_post set done_num=%s,accounts=concat(accounts,%s),status='finish',finish_time=%s,content=concat(content,%s)" if is_share: update_sql += ",done_share=done_share+1" update_sql += " where id=%s" update_param = [ task['nums'], account, datetime.now(), content, task['id'] ] mp.update(update_sql, update_param) all_log.logger.info("*****更新数据库成功*****") all_log.logger.info("*****帐号%s执行完成*****%s" % (acc['account'], task['id'])) break else: all_log.logger.info("*****帐号%s未完成*****%s" % (acc['account'], task['id'])) all_log.logger.info("-----任务列表%s执行结束-----" % num_list) else: all_log.logger.info("-----无可执行任务-----")