import pika
import yaml
import requests
from lib.proxy_iterator import Proxies
from lib.log import LogHandler
from lxml import etree
import re
import json
from pymongo import MongoClient
log = LogHandler(__name__)
p = Proxies()
setting = yaml.load(open('config_dianping.yaml'))
m = MongoClient(host=setting['mongo']['host'],
                port=setting['mongo']['port'],
                username=setting['mongo']['user_name'],
                password=setting['mongo']['password'])
db = m[setting['mongo']['db_name']]
dianping_all_type_collection = db[setting['mongo']['shop_detail_collection']]
connection = pika.BlockingConnection(
    pika.ConnectionParameters(host=setting['rabbit']['host'],
                              port=setting['rabbit']['port']))
channel = connection.channel()
channel.queue_declare(queue='rpc_queue')


class ConvertIdRpcServer(object):
    def __init__(self, proxies):
        self.proxies = proxies
        self.headers = {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36',
 def __init__(self):
     self.proxy = Proxies()
from xiaozijia_core.y666yun import GetPhone
import requests
import re
from lib.mongo import Mongo
import datetime
from lib.proxy_iterator import Proxies
proxies = Proxies()
proxies = proxies.get_one(proxies_number=1)


class Register(object):
    def __init__(self):
        self.password = '******'
        self.s = requests.session()
        self.g = GetPhone('小资家')
        self.phone = self.g.phone
        self.headers = {
            'Connection': 'keep-alive',
            'Host': 'www.xiaozijia.cn:8002',
            'User-Agent':
            'xiao zi jiaiOS/1.2.1 (iPhone; iOS 11.4.1; Scale/2.00)',
        }
        self.code = ''
        self.m = Mongo('114.80.150.196',
                       27777,
                       user_name='goojia',
                       password='******')
        self.coll = self.m.connect['friends']['xiaozijia_user']
        self.result = ''

    def sent_phone(self):
Exemple #4
0
import requests
from lxml import etree
from lib.proxy_iterator import Proxies
from pymongo import MongoClient
import re
import threading
from lib.log import LogHandler
from retry import retry
log = LogHandler('lianjia')
p = Proxies()
p = p.get_one(proxies_number=7)

m = MongoClient(host='114.80.150.196',
                port=27777,
                username='******',
                password='******')
collection = m['hilder_gv']['sichuan']
sichuan_city_list = [
    '成都', '绵阳', '宜宾', '自贡', '攀枝花', '广元', '乐山', '南充', '泸州', '资阳', '内江', '达州',
    '巴中', '遂宁', '眉山', '德阳', '广安', '雅安', '阿坝州', '甘孜州', '凉山州'
]


class Lianjia:
    def __init__(self):
        self.headers = {
            'Cookie':
            'lianjia_uuid=44a258db-4e00-4541-997c-57f4f3c117c1; _smt_uid=5c077f11.54f9c61d; gr_user_id=34c329d5-abde-48c8-8e92-164aeb1967c4; UM_distinctid=1677d485e781e8-08ba54e7ba4e7e-35607402-1fa400-1677d485e7994; _jzqc=1; _ga=GA1.2.130576672.1543995159; Hm_lvt_9152f8221cb6243a53c83b956842be8a=1543995154,1544173828,1544173833; _jzqy=1.1544173829.1544173833.2.jzqsr=baidu|jzqct=%E9%93%BE%E5%AE%B6%E5%9C%B0%E4%BA%A7.jzqsr=baidu; _jzqx=1.1544430132.1544608309.5.jzqsr=bj%2Elianjia%2Ecom|jzqct=/.jzqsr=bj%2Elianjia%2Ecom|jzqct=/chengjiao/fengtai/; _gid=GA1.2.2020321299.1545103818; lianjia_ssid=b653ca99-45ef-4791-adbc-8cc15e705d04; _jzqa=1.4552267029258056000.1543995157.1545189315.1545206059.32; _jzqckmp=1; Qs_lvt_200116=1544798856%2C1545206539; Qs_pv_200116=235986746040596130%2C4405708339866472400%2C1972589321055627500%2C3526812790752574500%2C3163296021085384000; gr_session_id_a1a50f141657a94e=1aed3e59-04fb-4f93-90bc-5637149eeea8; gr_session_id_a1a50f141657a94e_1aed3e59-04fb-4f93-90bc-5637149eeea8=true; select_city=310000; all-lj=dafad6dd721afb903f2a315ab2f72633; TY_SESSION_ID=3a1d7567-ccca-4314-a3df-f1551037dceb; CNZZDATA1253492439=1920645834-1545204530-https%253A%252F%252Fbj.lianjia.com%252F%7C1545204530; CNZZDATA1254525948=828434328-1545203924-https%253A%252F%252Fbj.lianjia.com%252F%7C1545203924; CNZZDATA1255633284=1412891771-1545206158-https%253A%252F%252Fbj.lianjia.com%252F%7C1545206158; CNZZDATA1255604082=774544540-1545204688-https%253A%252F%252Fbj.lianjia.com%252F%7C1545204688; _qzjc=1; Hm_lpvt_9152f8221cb6243a53c83b956842be8a=1545207895; _qzja=1.386660674.1545207885583.1545207885583.1545207885583.1545207891886.1545207895471.0.0.0.5.1; _qzjb=1.1545207885583.5.0.0.0; _qzjto=5.1.0; _jzqb=1.134.10.1545206059.1',
            'User-Agent':
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'
        }
import requests
from lib.mongo import Mongo
from lib.log import LogHandler
import time
import datetime
from lib.proxy_iterator import Proxies
p = Proxies()
P = p.get_one(proxies_number=3)
m = Mongo('114.80.150.196', 27777, user_name='goojia', password='******')
collection = m.connect['friends']['zhizi_list']
detail_collection = m.connect['friends']['zhizi_detail']
deal_price_collection = m.connect['friends']['zhizi_deal_price_new']
listing_price_collection = m.connect['friends']['zhizi_listing_price']
new_house_collection = m.connect['friends']['zhizi_new_house']
new_house_sales_license_collection = m.connect['friends'][
    'zhizi_new_house_sales_license']

log = LogHandler(__name__)


def time_convert(data_):
    # 时间转换 '1532448000000'
    return time.strftime("%Y-%m-%d", time.localtime(data_ / 1000.0))


def price_convert(price_):
    # 价格转换 万元转元
    return int(price_) * 10000


headers = {
# _*_ coding:utf-8 _*_
# from company.baidumap_consumer import BaiduMapConsumer
from company.baidumap_producer import baiduproducer

from lib.proxy_iterator import Proxies
from multiprocessing import Process
from company.baidumap_consumer_update import BaiduMapConsumer

if __name__ == '__main__':
    # Process(target=baiduproducer).start()

    p = Proxies()
    # # Process(target=BaiduMapConsumer(proxies=next(p)).start_consume).start()
    #
    for x in range(1,7):
        Process(target=BaiduMapConsumer(proxies=p.get_one(x)).start_consume).start()

    # proxy = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % {
    #     "host": "http-dyn.abuyun.com",
    #     "port": "9020",
    #     "user": "******",
    #     "pass": "******",
    # }
    # proxies = {"https": proxy,
    #            "http": proxy}
    # Process(target=BaiduMapConsumer(proxies=proxies).start_consume).start()
Exemple #7
0
#     auth = Auth(access_key, secret_key)
#     # 初始化BucketManager
#     bucket_manager = BucketManager(auth)
#     ret, info = bucket_manager.fetch(url,bucket,filename)
#     if info.status_code == 200:
#
#         file_url = bucket_domain + "/" + filename
#         print(file_url)
#         return file_url
#     else:
#         print("{}抓取失败".format(url))
"""
    图片爬取
"""

proxy = Proxies()

bucket = 'fangjia-img'
log = LogHandler("qiniu")


@retry(delay=2)
def qiniufetch(url, file_name):
    headers = {
        "user_agent":
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36"
    }
    if 'http' in url:
        """
            使用代理池
        """
Exemple #8
0
猎聘运行是3,4
"""
# from company.liepin_category import get_city,get_category
from company.liepin_producer_list import LiepinProduceList
from lib.proxy_iterator import Proxies
from multiprocessing import Process
from company.liepin_consumer_single import LiepinConsumeSingle
from company.liepin_producer_detail import LiepinProducerDetail
from company.liepin_consumer_gevent import LiepinConsumeGevent
if __name__ == '__main__':
    #1.分别将城市代码及分类代码存入到mysql数据库中
    # get_city()
    # get_category()

    #2.生产者,将分页也就是列表页链接放入到队列中
    p = Proxies()
    Process(target=LiepinProduceList(proxies=next(p)).start_crawler).start()

    #3.生产者,消费2中队列的url,解析出来公司的url,将公司详情页放入到队列中
    Process(target=LiepinProducerDetail(proxies=next(p)).start_consume).start()

    #4.消费3中队列中的URL,发请求\解析\入库
    p = Proxies()
    for x in range(1, 7):
        Process(target=LiepinConsumeSingle(proxies=p.get_one(
            proxies_number=x)).start_consume).start()

    proxy = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % {
        "host": "http-dyn.abuyun.com",
        "port": "9020",
        "user": "******",
Exemple #9
0
        else:
            if text['status'] == '1':
                poi_list = text['data']['poi_list']
                for poi in poi_list:
                    address = poi['address']
                    if map_street in address:
                        dict_text = dict(poi)
                        poi_info.append(dict_text)
                    else:
                        break
                # 注意此处是更新
                if len(poi_info) != 31:
                    mongo_collection.update_one(
                        {
                            'city_code': data['city_code'],
                            'region': data['region'],
                            'street_number': data['street_number']
                        }, {'$set': {
                            'poi_info': poi_info
                        }})
            else:
                print(res.json())
                log.error('请求失败,status不为1,url = {}'.format(res.url))
            return True


if __name__ == '__main__':
    p = Proxies()
    street = AddStreet(proxies=p.get_one(proxies_number=1))
    street.add_streets()