def getFbDataByUser():
    data = request.get_data()
    json_data = []
    if data:
        json_data = json.loads(data.decode("utf-8"))
    mp = MysqlPool()
    sql = "select * from tb_post where user_id=%s "
    param = [session.get('user')['id']]
    try:
        if json_data.get('status'):
            sql += "and status=%s "
            param.append(json_data.get('status'))
    except:
        pass
    try:
        if json_data.get('keyword'):
            keyword = '%' + str(json_data.get('keyword')) + '%'
            sql += " and keyword like %s "
            param.append(keyword)
    except:
        pass
    try:
        if json_data.get('group_id'):
            group_id = '%' + str(json_data.get('group_id')) + '%'
            sql += " and group_id like %s "
            param.append(group_id)
    except:
        pass
    sql += "order by id desc"
    post_list = mp.fetch_all(sql,param)
    res_json = {"code":"0000","list":post_list}
    return jsonify(res_json)
Example #2
0
def getTrackData():
    data = request.get_data()
    json_data = []
    if data:
        json_data = json.loads(data.decode("utf-8"))
    mp = MysqlPool()
    sql = "SELECT t.*,DATE_FORMAT(t.add_time,'%%Y-%%m-%%d %%H') update_time, tt.asin, tt.keyword, tt.page_size, tt.status FROM tb_amz_track_data t, tb_amz_track_pro tt " \
          "WHERE t.add_time IN ( SELECT MAX(t1.add_time) FROM tb_amz_track_data t1 GROUP BY t1.pro_id ) " \
          "AND t.pro_id = tt.id AND tt.user_id = %s"
    param = [session.get('user')['id']]
    try:
        if json_data.get('status'):
            sql += "and tt.status=%s "
            param.append(json_data.get('status'))
    except:
        pass
    try:
        if json_data.get('keyword'):
            keyword = '%' + str(json_data.get('keyword')) + '%'
            sql += " and tt.keyword like %s "
            param.append(keyword)
    except:
        pass
    try:
        if json_data.get('asin'):
            sql += " and tt.asin = %s "
            param.append(json_data.get('asin'))
    except:
        pass
    sql += " order by t.id desc"
    post_list = mp.fetch_all(sql, param)
    res_json = {"code": "0000", "list": post_list}
    return jsonify(res_json)
Example #3
0
def getDataByProId():
    data = request.get_data()
    json_data = json.loads(data.decode("utf-8"))
    sql = "select *,DATE_FORMAT(t.add_time,'%%m月%%d日%%H时') update_time from tb_amz_track_data t where t.pro_id = %s order by t.id asc"
    param = [json_data.get('pro_id')]
    mp = MysqlPool()
    pro_list = mp.fetch_all(sql, param)
    res_json = {"code": "0000", "list": pro_list}
    return jsonify(res_json)
def reviewList():
    mp = MysqlPool()
    user = session.get('user')
    user_sql = "select * from tb_user"
    user_list = None
    if user['level'] == 1:
        user_list = mp.fetch_all(user_sql, None)
    return render_template("review/review-list.html",
                           user=session.get('user'),
                           active="reviewList",
                           user_list=user_list)
def getOrderData():
    data = request.get_data()
    json_data = []
    if data:
        json_data = json.loads(data.decode("utf-8"))
    mp = MysqlPool()
    sql = "SELECT t.*,DATE_FORMAT(t.order_time,'%%Y-%%m-%%d') order_time_str,tb.profile " \
          "from tb_task_order t,tb_buyer tb where t.task_id = %s and t.buyer_id = tb.id"
    param = [json_data.get('task_id')]
    sql += " order by t.order_time desc"
    order_list = mp.fetch_all(sql, param)
    res_json = {"code": "0000", "list": order_list}
    return jsonify(res_json)
def reviewForm():
    mp = MysqlPool()
    if request.method == 'GET':
        user = session.get('user')
        user_sql = "select * from tb_user where status=1"
        user_list = None
        if user['level'] == 1:
            user_list = mp.fetch_all(user_sql, None)
        return render_template("review/review-form.html",
                               user=user,
                               user_list=user_list,
                               active="reviewForm")
    if request.method == 'POST':
        data = request.get_data()
        json_data = json.loads(data.decode("utf-8"))
        sql = "insert into tb_review_task(user_id,asin,brand,country,img,keyword,kw_page,store," \
              "price,days_order,total_order,is_vp,note,add_time,name) values(%s,%s,%s,'us',%s,%s," \
              "%s,%s,%s,%s,%s,1,%s,now(),%s)"
        try:
            user_id = json_data.get("user_id")
        except:
            user_id = session.get('user')['id']
        if not user_id:
            user_id = session.get('user')['id']
        param = [
            user_id,
            json_data.get("asin"),
            json_data.get("brand"),
            json_data.get("img"),
            json_data.get("keyword"),
            json_data.get("kw_page"),
            json_data.get("store"),
            json_data.get("price"),
            json_data.get("days_order"),
            json_data.get("total_order"),
            json_data.get("note"),
            json_data.get("name")
        ]
        try:
            task_id = mp.insert(sql, param)
            asin_sql = "insert into tb_task_asin(asin,task_id,status,is_put) values(%s,%s,%s,%s)"
            for asin in str(json_data.get("asin")).split("|"):
                asin_param = [asin, task_id, 1, 0]
                mp.insert(asin_sql, asin_param)
            res_json = {"code": "0000", "message": "已成功提交刷单任务"}
        except Exception as e:
            res_json = {"code": "9999", "message": "提交失败%s" % e}
        return jsonify(res_json)
def getUserData():
    data = request.get_data()
    json_data = []
    if data:
        json_data = json.loads(data.decode("utf-8"))
    mp = MysqlPool()
    sql = "select t.id,t.account,t.level,t.nickname,t.status,DATE_FORMAT(t.reg_time,'%%Y-%%m-%%d %%H:%%i:%%s') reg_time," \
          "DATE_FORMAT(t.login_time,'%%Y-%%m-%%d %%H:%%i:%%s') login_time from tb_user t where 1=1 "
    param = []
    try:
        if json_data.get('level'):
            sql += "and level=%s "
            param.append(json_data.get('level'))
    except:
        pass
    user_list = mp.fetch_all(sql, param)
    res_json = {"code": "0000", "list": user_list}
    return jsonify(res_json)
def getCpData():
    data = request.get_data()
    json_data = []
    if data:
        json_data = json.loads(data.decode("utf-8"))
    param = []
    mp = MysqlPool()
    sql = "select * from tb_comment "
    try:
        if json_data.get('content'):
            content = '%' + str(json_data.get('content')) + '%'
            sql += "where content like %s "
            param.append(content)
    except:
        pass
    sql += "order by id desc"
    cp_list = mp.fetch_all(sql,param)
    res_json = {"code":"0000","list":cp_list}
    return jsonify(res_json)
Example #9
0
def startTrack():
    url = "http://ip.ipjldl.com/index.php/api/entry?method=proxyServer.tiqu_api_url&packid=0&fa=0" \
          "&dt=0&groupid=0&fetch_key=&qty=1&time=1&port=1&format=json&ss=5&css=&dt=0&pro=&city=&usertype=6"
    i = 1
    find_mp = MysqlPool()
    find_sql = "select * from tb_amz_track_pro where status=1"
    product_list = find_mp.fetch_all(find_sql, None)
    success_list = []
    track_list = [x for x in product_list if x not in success_list]
    all_log.logger.info("#######亚马逊关键词ASIN追踪开始#######")
    while len(track_list) > 0 and i < 10:
        ip_data = urllib.request.urlopen(url).read()
        json_list = list(json.loads(ip_data)['data'])
        json_data = json_list[0]
        ip = "%s:%s" % (json_data['IP'], json_data['Port'])
        success_list += getProData(ip, track_list)
        print("success_list=",len(success_list))
        track_list = [x for x in product_list if x not in success_list]
        all_log.logger.info("##########第%s轮追踪结束##########" % i)
        i += 1
    all_log.logger.info("#######亚马逊关键词ASIN追踪结束#######")
def getReviewData():
    data = request.get_data()
    json_data = []
    if data:
        json_data = json.loads(data.decode("utf-8"))
    mp = MysqlPool()
    sql = "SELECT t.*,DATE_FORMAT(t.add_time,'%%Y-%%m-%%d') add_time_str,u.nickname," \
          "(select count(0) from tb_task_order t1 where t1.task_id=t.id) num," \
          "(select count(0) from tb_task_order t1 where t1.task_id=t.id and t1.status=1) done_num," \
          "REPLACE(t.asin,'|',' ') as asin_str" \
          " from tb_review_task t,tb_user u where t.user_id = u.id "
    param = []
    if session.get('user')['level'] != 1:
        sql += 'and t.user_id = %s'
        param.append(session.get('user')['id'])
    try:
        if json_data.get('keyword'):
            keyword = '%' + str(json_data.get('keyword')) + '%'
            sql += " and t.keyword like %s "
            param.append(keyword)
    except:
        pass
    try:
        if json_data.get('asin'):
            asin = '%' + str(json_data.get('asin')) + '%'
            sql += " and t.asin like %s "
            param.append(asin)
    except:
        pass
    try:
        if json_data.get('user_id'):
            sql += " and t.user_id = %s "
            param.append(json_data.get('user_id'))
    except:
        pass
    sql += " order by t.status desc,t.id desc"
    review_list = mp.fetch_all(sql, param)
    res_json = {"code": "0000", "list": review_list}
    return jsonify(res_json)
def getGroundingData():
    data = request.get_data()
    json_data = []
    if data:
        json_data = json.loads(data.decode("utf-8"))
    mp = MysqlPool()
    sql = "select t.*,t1.price,t1.status as task_status,u.nickname,t1.img," \
          "DATE_FORMAT(t.put_time,'%%Y-%%m-%%d %%H:%%i') put_time_str" \
          " from tb_task_asin t,tb_review_task t1,tb_user u where t1.id=t.task_id and t1.user_id=u.id "
    param = []
    try:
        if json_data.get('order_status'):
            sql += " and t.status = %s "
            param.append(json_data.get('order_status'))
    except:
        pass
    try:
        if json_data.get('asin'):
            sql += " and t.asin = %s "
            param.append(json_data.get('asin'))
    except:
        pass
    try:
        if json_data.get('task_id'):
            sql += " and t.task_id = %s "
            param.append(json_data.get('task_id'))
    except:
        pass
    try:
        if json_data.get('task_status'):
            sql += " and t1.status = %s "
            param.append(json_data.get('task_status'))
    except:
        pass
    sql += " order by t1.status desc,t1.user_id desc,t.id desc"
    asin_list = mp.fetch_all(sql, param)
    res_json = {"code": "0000", "list": asin_list}
    return jsonify(res_json)
def updatePassword():
    data = request.get_data()
    json_data = []
    if data:
        json_data = json.loads(data.decode("utf-8"))
    mp = MysqlPool()
    find_sql = "select * from tb_user where password=%s and id=%s"
    find_param = [
        pyUtils.getMd5(json_data.get('old_psw')),
        session.get('user')['id']
    ]
    find_req = mp.fetch_all(find_sql, find_param)
    if len(find_req) == 1:
        sql = "update tb_user set password=%s where id=%s"
        param = [
            pyUtils.getMd5(json_data.get('new_psw')),
            session.get('user')['id']
        ]
        mp.update(sql, param)
        res_json = {"code": "0000", "msg": "修改成功"}
    else:
        res_json = {"code": "9999", "msg": "修改失败,原密码错误"}
    return jsonify(res_json)
Example #13
0
def getGroupData():
    data = request.get_data()
    json_data = []
    if data:
        json_data = json.loads(data.decode("utf-8"))
    mp = MysqlPool()
    sql = "select * from tb_group where 1=1 "
    param = []
    try:
        if json_data.get('type'):
            sql += "and type=%s "
            param.append(json_data.get('type'))
    except:
        pass
    try:
        if json_data.get('name'):
            name = '%' + str(json_data.get('name')) + '%'
            sql += " and name like %s "
            param.append(name)
    except:
        pass
    try:
        if json_data.get('bigNum'):
            sql += " and nums < %s "
            param.append(json_data.get('bigNum'))
    except:
        pass
    try:
        if json_data.get('smallNum'):
            sql += " and nums > %s "
            param.append(json_data.get('smallNum'))
    except:
        pass
    sql += "order by nums desc"
    group_list = mp.fetch_all(sql, param)
    res_json = {"code": "0000", "list": group_list}
    return jsonify(res_json)
Example #14
0
def getTask():
    mp = MysqlPool()
    sql = "select * from tb_post where status = 'working'"
    task_list = mp.fetch_all(sql, None)
    if task_list:
        content_sql = "select content from tb_comment"
        content_list = mp.fetch_all(content_sql, None)
        num_list = []
        for task in task_list:
            num_list.append(task['id'])
        all_log.logger.info("-----本次执行的任务列表:%s-----" % num_list)
        for task in task_list:
            config = configparser.RawConfigParser()
            config.read("fb-user.ini", encoding="utf-8")
            for i, account in enumerate(config):
                if i > 1:
                    all_log.logger.info("#####等待5秒后,下一个帐号开始执行#####")
                    sleep(5)
                if i > 0:
                    all_log.logger.info("*****帐号%s开始执行*****%s" %
                                        (account, task['id']))
                    if account in task['accounts']:
                        all_log.logger.info("*****该账号已执行过此任务,跳过*****")
                        continue
                    acc = {}
                    pwd = config[account]['pwd']
                    try:
                        ua = config[account]['user-agent']
                        if not ua:
                            ua = UserAgent().chrome
                    except:
                        ua = UserAgent().chrome
                    try:
                        cookies = config[account]['cookies']
                        if len(cookies) == 0:
                            cookies = login(account, pwd, ua)
                    except KeyError:
                        cookies = login(account, pwd, ua)
                    if cookies is None:
                        continue
                    acc['account'] = account
                    acc['cookies'] = cookies
                    acc['user-agent'] = ua
                    try:
                        last_time_str = config[account][task['group_id']]
                        last_time = datetime.strptime(last_time_str,
                                                      "%Y-%m-%d %H:%M:%S")
                        compare_time = last_time + timedelta(days=1)
                        if datetime.now() < compare_time:
                            all_log.logger.info(
                                "---帐号%s于%s已在%s发布评论---" %
                                (account, last_time_str, task['group_id']))
                            continue
                    except KeyError:
                        pass
                    except ValueError:
                        pass
                    rd_num = random.randint(0, len(content_list) - 1)
                    content = content_list[rd_num]['content'].replace(
                        "\\r\\n", "").strip()
                    is_done, is_share, is_find = doComment(acc, task, content)
                    #未找到帖子
                    if not is_find:
                        break
                    if is_done:
                        done_num = int(task['done_num']) + 1
                        if int(task['nums']) > done_num:
                            update_sql = "update tb_post set done_num=done_num+1,accounts=concat(accounts,%s),content=concat(content,%s)"
                            if is_share:
                                update_sql += ",done_share=done_share+1"
                            update_sql += " where id=%s"
                            update_param = [
                                account + "|", content + "|", task['id']
                            ]
                            mp.update(update_sql, update_param)
                        else:
                            update_sql = "update tb_post set done_num=%s,accounts=concat(accounts,%s),status='finish',finish_time=%s,content=concat(content,%s)"
                            if is_share:
                                update_sql += ",done_share=done_share+1"
                            update_sql += " where id=%s"
                            update_param = [
                                task['nums'], account,
                                datetime.now(), content, task['id']
                            ]
                            mp.update(update_sql, update_param)
                        all_log.logger.info("*****更新数据库成功*****")
                        all_log.logger.info("*****帐号%s执行完成*****%s" %
                                            (acc['account'], task['id']))
                        break
                    else:
                        all_log.logger.info("*****帐号%s未完成*****%s" %
                                            (acc['account'], task['id']))
        all_log.logger.info("-----任务列表%s执行结束-----" % num_list)
    else:
        all_log.logger.info("-----无可执行任务-----")
Example #15
0
def collectData():

    ua = UserAgent().chrome
    options = webdriver.ChromeOptions()
    options.add_argument("user-agent=" + ua)
    options.add_argument("--start-maximized")
    options.add_argument("--headless")
    options.add_argument('blink-settings=imagesEnabled=false')
    options.add_argument("--disable-gpu")
    options.add_experimental_option('useAutomationExtension', False)
    options.add_experimental_option('excludeSwitches',
                                    ['enable-logging', 'enable-automation'])
    driver = webdriver.Chrome(options=options)
    driver.get("https://www.baidu.com")
    try:
        WebDriverWait(driver,
                      10).until(EC.visibility_of_element_located(
                          (By.ID, 'su')))
    except:
        all_log.logger.error("---打开百度失败---")
    cookies = [{
        'domain': 'www.asinseed.com',
        'httpOnly': True,
        'name': 'JSESSIONID',
        'path': '/',
        'secure': False,
        'value': 'B0141BDB986A2D91ADCE21BCD1ACA3D2'
    }, {
        'domain':
        'www.asinseed.com',
        'expiry':
        1609251926,
        'httpOnly':
        False,
        'name':
        'asinseed-login-user',
        'path':
        '/',
        'secure':
        False,
        'value':
        '4291529061IrZXNTSoIlHhPKyHGfg/7TMbw6xY7YpCjminsqgfQO1ekWtRZ9/kAs/qVnCI5AMe'
    }, {
        'domain': '.asinseed.com',
        'expiry': 1638195927,
        'httpOnly': False,
        'name': 'ecookie',
        'path': '/',
        'secure': False,
        'value': 'dWcWHqqTU5LL9saj_CN'
    }, {
        'domain': 'www.asinseed.com',
        'expiry': 1606660198,
        'httpOnly': False,
        'name': 'crisp-client%2Fsocket%2Fb43aa37b-4c35-4551-a9d4-ad983960d40c',
        'path': '/',
        'sameSite': 'Lax',
        'secure': False,
        'value': '0'
    }, {
        'domain': '.asinseed.com',
        'expiry': 1669731927,
        'httpOnly': False,
        'name': '_ga',
        'path': '/',
        'secure': False,
        'value': 'GA1.2.1615561945.1606659387'
    }, {
        'domain': '.asinseed.com',
        'expiry': 1622427931,
        'httpOnly': False,
        'name':
        'crisp-client%2Fsession%2Fb43aa37b-4c35-4551-a9d4-ad983960d40c',
        'path': '/',
        'sameSite': 'Lax',
        'secure': False,
        'value': 'session_f9e04788-6bf4-48fa-8a09-883989976e41'
    }, {
        'domain': '.asinseed.com',
        'expiry': 1606659960,
        'httpOnly': False,
        'name': '_gat_gtag_UA_125163434_1',
        'path': '/',
        'secure': False,
        'value': '1'
    }, {
        'domain': '.asinseed.com',
        'expiry': 1606746327,
        'httpOnly': False,
        'name': '_gid',
        'path': '/',
        'secure': False,
        'value': 'GA1.2.1043797262.1606659387'
    }, {
        'domain': '.asinseed.com',
        'expiry': 1922019384,
        'httpOnly': False,
        'name': 'w_guest',
        'path': '/',
        'secure': False,
        'value': 'NpicHiupaa1M_201129-223501'
    }]
    for cookie in cookies:
        driver.add_cookie(cookie_dict=cookie)
    sleep(0.5)
    mp = MysqlPool()
    trend_sql = "select t.* from selected_products t where t.trend_data is null or t.trend_data=''"
    trend_data_list = mp.fetch_all(trend_sql, None)
    for trend_data in trend_data_list:
        driver.get("https://www.asinseed.com/en/US?q=%s" %
                   trend_data['keyword'])
        WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located(
                (By.XPATH, '//div[@class="morris-table-inline"]')))
        trs = driver.find_elements_by_xpath(
            '//div[@class="morris-table-inline"]/../..')
        searches = ''
        for tr in trs:
            if trend_data['keyword'] == tr.find_element_by_xpath(
                    './td[2]').text:
                searches = eval(
                    tr.find_element_by_xpath('./td[3]/div').get_attribute(
                        "data-y"))
        if searches == '':
            searches = eval(
                driver.find_element_by_xpath(
                    '//div[@class="morris-table-inline"]').get_attribute(
                        "data-y"))
        update_sql = "update selected_products set trend_data=%s where id=%s"
        update_param = [str(searches), trend_data['id']]
        mp.insert(update_sql, update_param)
        all_log.logger.info("---%s趋势采集成功---" % trend_data['asin'])
        sleep(1)

    asin_sql = "select t.* from selected_products t where t.id not in (select t2.main_id from asin_searches t2 where t2.main_id=t.id)"
    asin_data_list = mp.fetch_all(asin_sql, None)
    for asin_data in asin_data_list:
        driver.get("https://www.asinseed.com/en/US?q=%s" % asin_data['asin'])
        WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located(
                (By.XPATH, '//td[@class="text-right"]')))
        trs = driver.find_elements_by_xpath('//td[@class="text-right"]/..')
        insert_sql = "insert into asin_searches(main_id,asin,keyword,searches,add_time) values"
        update_param = []
        for tr in trs:
            keyword = tr.find_element_by_xpath('./td').text
            searches = tr.find_element_by_xpath('./td[2]').text.replace(
                ",", "")
            if searches is None or searches == "":
                searches = 0
            insert_sql += "(%s,%s,%s,%s,now()),"
            update_param.append(asin_data['id'])
            update_param.append(asin_data['asin'])
            update_param.append(keyword)
            update_param.append(searches)
        if insert_sql.endswith(","):
            insert_sql = insert_sql[:-1]
        mp.insert(insert_sql, update_param)
        all_log.logger.info("---%s关联关键词成功---" % asin_data['asin'])
        sleep(1)
            return new_list
    else:
        '''如果列表长度小于份数'''
        for i in range(1, len(tabulation) + 1):
            tabulation_subset = [tabulation[i - 1]]
            new_list.append(tabulation_subset)
        return new_list


if __name__ == "__main__":
    # getBusinessName()

    mp = MysqlPool()
    pool_num = 3
    sql = "select seller_id from tb_seller_id order by id limit 600,9000"
    all_seller_list = mp.fetch_all(sql, None)
    # getBusinessName(all_seller_list,0)
    lists = bisector_list(all_seller_list, pool_num)
    process_list = []
    for p_num, p_list in enumerate(lists):
        sleep(0.5)
        process = Process(target=getBusinessName, args=(
            p_list,
            p_num,
        ))
        process.start()
        process_list.append(process)
    for p in process_list:
        p.join()
    all_log.logger.info("运行结束")
Example #17
0
def getProData():
    mp = MysqlPool()
    data_sql = "select * from amz123_keyword_left9 where status is null or status=0 order by id limit 2000"
    data_list = mp.fetch_all(data_sql, None)
    for data in data_list:
        os.system("taskkill /f /im chrome.exe /t")
        proxy = "C:\\py_file\\proxyauth\\%s" % os.listdir(
            "C:\\py_file\\proxyauth")[random.randint(0, 4)]
        # proxy = 1
        all_log.logger.info("---ip=%s,keyword=%s开始采集---" %
                            (proxy, data['keyword']))
        ua = UserAgent().chrome
        options = webdriver.ChromeOptions()
        options.add_extension(proxy)
        options.add_argument("user-agent=" + ua)
        # options.add_argument("--start-maximized")
        # options.add_argument("--headless")
        options.add_argument('blink-settings=imagesEnabled=false')
        options.add_argument("--disable-gpu")
        options.add_argument("log-level=3")
        options.add_experimental_option('useAutomationExtension', False)
        options.add_experimental_option(
            'excludeSwitches', ['enable-logging', 'enable-automation'])
        driver = webdriver.Chrome(options=options)
        driver.set_window_size(600, 600)
        cookies = [{
            'domain':
            'www.amazon.com',
            'expiry':
            1632329890,
            'httpOnly':
            False,
            'name':
            'csm-hit',
            'path':
            '/',
            'secure':
            False,
            'value':
            'tb:s-TW8A7SAQXE5512HEHN3F|1602089889292&t:1602089890223&adb:adblk_no'
        }, {
            'domain': '.amazon.com',
            'expiry': 2082787202,
            'httpOnly': False,
            'name': 'lc-main',
            'path': '/',
            'secure': False,
            'value': 'en_US'
        }, {
            'domain':
            '.amazon.com',
            'expiry':
            1633625853,
            'httpOnly':
            False,
            'name':
            'session-token',
            'path':
            '/',
            'secure':
            True,
            'value':
            '3QBwaC0p4MPUmPmkTggA/5KFuQV86y0YLrdo7ONa0Jj32bh7dV8URjqYgcRBuBz3ADk9Svq0h89qS1OuCpZy+uA1IYfO1TNpiYJaP6z6zHy2O/AO4FlwdTphm7+S2ahm1LBYNUTY+xDrwGQmgF8u6Dqx7nXqXJNSOkBCdVrQZ6a30LnhBpQgwinDvWxMFeKNsbK8LnDO+tARUPQiRm0va3zvb4gqiUAPSBe8RxIeunmQvASbwAR4Yc1WHotY6utU'
        }, {
            'domain': '.amazon.com',
            'expiry': 1633625894,
            'httpOnly': False,
            'name': 'ubid-main',
            'path': '/',
            'secure': True,
            'value': '134-4542133-6572654'
        }, {
            'domain': '.amazon.com',
            'expiry': 1633625894,
            'httpOnly': False,
            'name': 'session-id-time',
            'path': '/',
            'secure': False,
            'value': '2082787201l'
        }, {
            'domain': '.amazon.com',
            'expiry': 1633625846,
            'httpOnly': False,
            'name': 'i18n-prefs',
            'path': '/',
            'secure': False,
            'value': 'USD'
        }, {
            'domain': '.amazon.com',
            'expiry': 1633625894,
            'httpOnly': False,
            'name': 'session-id',
            'path': '/',
            'secure': True,
            'value': '132-8928912-9834042'
        }]
        driver.get("https://www.baidu.com")
        try:
            WebDriverWait(driver, 10).until(
                EC.visibility_of_element_located((By.ID, 'su')))
        except:
            error_log.logger.error("---%s打开百度失败---" % proxy)
            continue
        for cookie in cookies:
            driver.add_cookie(cookie_dict=cookie)
        sleep(0.5)
        driver.get("https://www.amazon.com/s?k=" + data['keyword'] +
                   "&ref=nb_sb_noss")
        try:
            WebDriverWait(driver, 15).until(
                EC.visibility_of_element_located(
                    (By.XPATH, '//div[contains(@class,"s-main-slot")]')))
        except:
            try:
                WebDriverWait(driver, 10).until(
                    EC.visibility_of_element_located(
                        (By.XPATH,
                         '//h4[contains(text(),"characters you see")]')))
                error_log.logger.error("***ip=%s,keyword=%s,出现验证码,结束当前采集***" %
                                       (proxy, data['keyword']))
                driver.quit()
                continue
            except:
                pass
            try:
                WebDriverWait(driver, 10).until(
                    EC.visibility_of_element_located(
                        (By.XPATH, '//div[contains(@class,"s-main-slot")]')))
            except:
                error_log.logger.error("***ip=%s,keyword=%s,页面采集错误,结束当前采集***" %
                                       (proxy, data['keyword']))
                driver.quit()
                continue
        divs = driver.find_elements_by_xpath(
            '//div[contains(@class,"s-main-slot")]/div')
        try:
            success_num = 0
            update_sql = "update amz123_keyword_left9 set status=1 where id=%s"
            for div in divs:
                asin = div.get_attribute('data-asin')
                if asin and str(asin).startswith("B"):
                    try:
                        div.find_element_by_xpath(
                            './/div[@class="a-row a-spacing-micro"]')
                        sponsored = "1"
                    except:
                        pass
                        sponsored = "0"
                    try:
                        price = div.find_element_by_xpath(
                            './/span[@data-a-color="base"]/span'
                        ).get_attribute("innerText").replace("$", "")
                    except:
                        price = None
                    try:
                        img1 = div.find_element_by_xpath(
                            './/img').get_attribute('src')
                    except:
                        img1 = None
                    try:
                        title = div.find_element_by_xpath(
                            './/h2/a/span').get_attribute("innerText")
                    except:
                        title = None
                    try:
                        div.find_element_by_xpath(
                            './/span[contains(text(),"by Amazon")]')
                        fba = "1"
                    except:
                        fba = "0"
                    try:
                        star = div.find_element_by_xpath(
                            './/div[@class="a-row a-size-small"]/span'
                        ).get_attribute('aria-label').replace(
                            " out of 5 stars", "")
                    except:
                        star = None
                    try:
                        review = div.find_element_by_xpath(
                            './/div[@class="a-row a-size-small"]/span[2]'
                        ).get_attribute('aria-label').replace(",", "")
                    except:
                        review = "0"
                    try:
                        if int(review) > 70:
                            all_log.logger.info("---%s评价数为%s,跳过---" %
                                                (asin, review))
                            continue
                        if float(price) > 40:
                            all_log.logger.info("---%s价格为%s,跳过---" %
                                                (asin, price))
                            continue
                        if sponsored == "1":
                            all_log.logger.info("---%s为广告,跳过---" % asin)
                            continue
                    except:
                        all_log.logger.info("---%s过滤报错,跳过---" % asin)
                        continue
                    pro_url = div.find_element_by_xpath(
                        './/h2/a').get_attribute("href")
                    js = 'window.open("' + pro_url + '")'
                    driver.execute_script(js)
                    driver.switch_to.window(driver.window_handles[1])
                    try:
                        WebDriverWait(driver, 15).until(
                            EC.visibility_of_element_located(
                                (By.ID, 'bylineInfo_feature_div')))
                        try:
                            brand = driver.find_element_by_xpath(
                                '//a[@id="bylineInfo"]').text.replace(
                                    'Brand: ',
                                    '').replace('Visit the ',
                                                '').replace('Store',
                                                            '').strip()
                        except:
                            brand = None
                        try:
                            store = filter_str(
                                driver.find_element_by_id(
                                    'sellerProfileTriggerId').text)
                        except:
                            store = None
                        try:
                            qa = driver.find_element_by_xpath(
                                '//*[@id="askATFLink"]/span').get_attribute(
                                    'innerText').replace(
                                        " answered questions", "")
                        except:
                            qa = "0"
                        try:
                            seller_id = driver.find_element_by_id(
                                'merchantID').get_attribute("value")
                        except:
                            seller_id = None
                        try:
                            seller_num = driver.find_element_by_xpath(
                                '//div[@id="olp-upd-new-freeshipping-threshold"]//a/span'
                            ).text
                            seller_num = re.findall("\((.*)\)", seller_num)[0]
                        except:
                            seller_num = 0
                        br_error_num = 0
                        rank_type = 0
                        big_rank_txt = ""
                        big_rank = 0
                        mid_rank_txt = ""
                        mid_rank = 0
                        small_rank_txt = ""
                        small_rank = 0
                        while big_rank_txt == "":
                            if rank_type == 1:
                                try:
                                    big_rank_txt = driver.find_element_by_xpath(
                                        '//div[@id="detailBullets_feature_div"]/following-sibling::ul'
                                    ).get_attribute('innerText')
                                    if big_rank_txt == "":
                                        br_error_num += 1
                                except:
                                    br_error_num += 1
                                    sleep(1)
                                    big_rank_txt = ""
                            else:
                                try:
                                    big_rank_txt = getRank(driver, 1)
                                except:
                                    try:
                                        WebDriverWait(driver, 5).until(
                                            EC.visibility_of_element_located((
                                                By.ID,
                                                'detailBulletsWrapper_feature_div'
                                            )))
                                        rank_type = 1
                                        big_rank_txt = driver.find_element_by_xpath(
                                            '//div[@id="detailBullets_feature_div"]/following-sibling::ul'
                                        ).get_attribute('innerText')
                                    except:
                                        br_error_num += 1
                                        sleep(1)
                                        big_rank_txt = ""
                            if br_error_num == 3:
                                all_log.logger.error("%s未采集到大类排名%s次" %
                                                     (asin, br_error_num))
                                big_rank_txt = ""
                                break
                        if big_rank_txt != "":
                            if rank_type == 0:
                                big_rank_txt = re.sub("\(.*", "",
                                                      big_rank_txt).strip()
                                big_rank_list = re.findall("\d", big_rank_txt)
                                big_rank = ""
                                for br in big_rank_list:
                                    big_rank += br
                            else:
                                for br_i, br in enumerate(
                                        big_rank_txt.split("#")):
                                    rank_txt = "#" + br.strip()
                                    if br_i == 1:
                                        big_rank_txt = re.sub(
                                            "\(.*", "", rank_txt).strip()
                                        big_rank_list = re.findall(
                                            "\d", big_rank_txt)
                                        big_rank = ""
                                        for br_1 in big_rank_list:
                                            big_rank += br_1
                                    elif br_i == 2:
                                        mid_rank_txt = rank_txt
                                        mid_rank_list = re.findall(
                                            "\d", mid_rank_txt)
                                        mid_rank = ""
                                        for mr in mid_rank_list:
                                            mid_rank += mr
                                    elif br_i == 3:
                                        small_rank_txt = rank_txt
                                        small_rank_list = re.findall(
                                            "\d", small_rank_txt)
                                        small_rank = ""
                                        for sr in small_rank_list:
                                            small_rank += sr
                        else:
                            big_rank = 0
                        if rank_type == 0:
                            try:
                                mid_rank_txt = getRank(driver, 2)
                            except:
                                mid_rank_txt = ""
                            if mid_rank_txt != "":
                                mid_rank_txt = re.sub("\(.*", "",
                                                      mid_rank_txt).strip()
                                mid_rank_list = re.findall("\d", mid_rank_txt)
                                mid_rank = ""
                                for mr in mid_rank_list:
                                    mid_rank += mr
                            else:
                                mid_rank = 0
                            try:
                                small_rank_txt = getRank(driver, 3)
                            except:
                                small_rank_txt = ""
                            if small_rank_txt != "":
                                small_rank_txt = re.sub(
                                    "\(.*", "", small_rank_txt).strip()
                                small_rank_list = re.findall(
                                    "\d", small_rank_txt)
                                small_rank = ""
                                for sr in small_rank_list:
                                    small_rank += sr
                            else:
                                small_rank = 0
                        try:
                            put_date = driver.find_element_by_xpath(
                                '//th[contains(text(),"Date First Available")]/following-sibling::td[1]'
                            ).get_attribute('innerText')
                            if put_date:
                                put_date = datetime.strptime(
                                    put_date, '%B %d, %Y').strftime("%Y-%m-%d")
                        except:
                            put_date = None
                        if big_rank == '' or int(
                                big_rank) == 0 or int(big_rank) > 15000:
                            all_log.logger.info("---%s大类排名为%s,跳过---" %
                                                (asin, big_rank))
                            driver.close()
                            driver.switch_to.window(driver.window_handles[0])
                            continue
                        img2 = ''
                        img3 = ''
                        img2_num = 0
                        img2_click_num = 0
                        img3_num = 0
                        img3_click_num = 0
                        while img2 == '' and img2_click_num < 40 and img2_num < 5:
                            sleep(0.5)
                            try:
                                driver.find_element_by_xpath(
                                    '//div[@id="altImages"]/ul//li[@class="a-spacing-small template"]/following-sibling::li[2]'
                                ).click()
                            except:
                                img2_click_num += 1
                            try:
                                WebDriverWait(driver, 5).until(
                                    EC.visibility_of_element_located(
                                        (By.XPATH,
                                         '//li[contains(@class,"itemNo1")]')))
                                img2 = driver.find_element_by_xpath(
                                    '//li[contains(@class,"itemNo1")]//img'
                                ).get_attribute("src")
                            except:
                                img2_num += 1
                        while img3 == '' and img3_click_num < 40 and img3_num < 5:
                            sleep(0.5)
                            try:
                                driver.find_element_by_xpath(
                                    '//div[@id="altImages"]/ul//li[@class="a-spacing-small template"]/following-sibling::li[3]'
                                ).click()
                            except:
                                img3_click_num += 1
                            try:
                                WebDriverWait(driver, 5).until(
                                    EC.visibility_of_element_located(
                                        (By.XPATH,
                                         '//li[contains(@class,"itemNo2")]')))
                                img3 = driver.find_element_by_xpath(
                                    '//li[contains(@class,"itemNo2")]//img'
                                ).get_attribute("src")
                            except:
                                img3_num += 1
                        sql = "insert into tb_amz_pro_1129(keyword,asin,img1,img2,img3,sponsored,price,title,fba,star,review,brand,store,qa,seller_id,seller_num," \
                              "big_rank_txt,big_rank,mid_rank_txt,mid_rank,small_rank_txt,small_rank,put_date,add_date) " \
                              "values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,now())"
                        sql_param = [
                            data['keyword'], asin, img1, img2, img3, sponsored,
                            price, title, fba, star, review, brand, store, qa,
                            seller_id, seller_num, big_rank_txt, big_rank,
                            mid_rank_txt, mid_rank, small_rank_txt, small_rank,
                            put_date
                        ]
                        try:
                            mp.insert(sql, sql_param)
                            all_log.logger.info("-----%s(%s)入库成功-----" %
                                                (asin, data['keyword']))
                            success_num += 1
                        except IntegrityError:
                            all_log.logger.info("-----%s(%s)已存在-----" %
                                                (asin, data['keyword']))
                            success_num += 1
                        except Exception as e:
                            error_log.logger.error("-----%s(%s)入库失败%s-----" %
                                                   (asin, data['keyword'], e))
                    except:
                        traceback.print_exc()
                        error_log.logger.error("-----%s---%s采集出错-----" %
                                               (data['keyword'], proxy))
                    driver.close()
                    driver.switch_to.window(driver.window_handles[0])
            mp.update(update_sql, (data['id'], ))
        except:
            traceback.print_exc()
            error_log.logger.error("-----%s---%s出错-----" %
                                   (data['keyword'], proxy))
        finally:
            all_log.logger.info("---end---ip=%s,keyword=%s---" %
                                (proxy, data['keyword']))
            driver.quit()
Example #18
0
# -*- codeing = utf-8 -*-
# @Time : 2020/10/27 23:41
# @Author : Cj
# @File : test2.py.py
# @Software : PyCharm

from time import sleep
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from db import MysqlPool

if __name__ == "__main__":
    mp = MysqlPool()
    sql = "select * from tb_review_task"
    s_list = mp.fetch_all(sql, None)
    for s in s_list:
        asins = str(s['asin']).split("|")
        for asin in asins:
            in_sql = "insert into tb_task_asin(task_id,asin,status) values(%s,%s,1)"
            param = [s['id'], asin]
            mp.insert(in_sql, param)
Example #19
0
                            new_list[tabulation[-remainder:].index(j)].append(
                                j)
            return new_list
    else:
        '''如果列表长度小于份数'''
        for i in range(1, len(tabulation) + 1):
            tabulation_subset = [tabulation[i - 1]]
            new_list.append(tabulation_subset)
        return new_list


if __name__ == "__main__":
    mp = MysqlPool()
    pool_num = 3
    sql = "select asin from tb_amz_pro limit 14000,12000"
    all_asin_list = mp.fetch_all(sql, None)
    # getSellerId(all_seller_list)
    lists = bisector_list(all_asin_list, pool_num)
    process_list = []
    for p_num, p_list in enumerate(lists):
        sleep(0.5)
        process = Process(target=getSellerId, args=(
            p_list,
            p_num,
        ))
        process.start()
        process_list.append(process)
    for p in process_list:
        p.join()
    error_log.logger.error("运行结束")