Esempio n. 1
0
def originalCode():

    core.GetHouseByRegionlist(city, regionlist)
    core.GetRentByRegionlist(city, regionlist)
    # Init,scrapy celllist and insert database; could run only 1st time
    core.GetCommunityByRegionlist(city, regionlist)
    communitylist = get_communitylist(city)  # Read celllist from database
    core.GetSellByCommunitylist(city, communitylist)
Esempio n. 2
0
def get_community_worker(queue, city):
    while True:
        try:
            reg = queue.get_nowait()
            print(reg)
        except:
            return
        logging.info("Processing" + reg + "" + city)
        core.GetCommunityByRegionlist(city, [reg])
Esempio n. 3
0
def testUpsertDB():
    hisprice_data_source = []
    # info_dict = {'houseID': "121", 'totalPrice': '12110'}
    info_dict = {"id":121, "title":u'京基御景华城', "link":'https://sz.lianjia.com/xiaoqu/12221', 'district': u'福田区', 'bizcircle': u'赤尾1', 'tagList': u'近地铁7号线赤尾站'\
     ,'onsale':'1', 'year':'' }
    # hisprice_data_source.append(
    #                 {"houseID": info_dict["houseID"], "totalPrice": info_dict["totalPrice"]})
    # model.Hisprice.insert_many(
    #                 hisprice_data_source).upsert().execute()

    # model.Hisprice.insert_many(hisprice_data_source).on_conflict(conflict_target=[model.Hisprice.houseID,model.Hisprice.totalPrice], preserve=[model.Hisprice.totalPrice],update={}).execute()
    model.Community.insert(info_dict).on_conflict(conflict_target=[model.Community.id], preserve=[model.Community.title, model.Community.link, model.Community.district, \
                model.Community.bizcircle, model.Community.tagList, model.Community.onsale, model.Community.onrent, model.Community.year, \
                model.Community.housetype, model.Community.cost, model.Community.service, \
                model.Community.company, model.Community.building_num, model.Community.house_num, \
                model.Community.price, model.Community.city],update={}).execute()


if __name__ == "__main__":
    # originalCode()
    regionlist = settings.REGIONLIST  # only pinyin support
    city = settings.CITY
    model.database_init()
    core.GetCommunityByRegionlist(city, regionlist)
    communitylist = get_communitylist(city)
    # print communitylist
    core.GetHouseByCommunitylist(city, communitylist)
    core.GetSellByCommunitylist(city, communitylist)
    core.GetRentByCommunitylist(city, communitylist)
    # testUpsertDB()
Esempio n. 4
0
import core
import model
import settings


def get_communitylist():
    res = []
    for community in model.Community.select().where(
            model.Community.onsale > 0):
        res.append(community.title)
    return res


if __name__ == "__main__":
    regionlist = settings.REGIONLIST  # only pinyin support
    model.database_init()
    core.GetCommunityByRegionlist(
        regionlist
    )  # Init,scrapy celllist and insert database; could run only 1st time
    communitylist = get_communitylist()  # Read celllist from database
    core.GetHouseByCommunitylist(communitylist)
    core.GetRentByCommunitylist(communitylist)
    core.GetSellByCommunitylist(communitylist)
Esempio n. 5
0
    # model是数据库模型。
    model.database_init()
    """
    core是核心爬虫模块。
    """
    """
    根据行政区来爬虫在售房源信息, 返回regionlist里面所有在售房源信息。
    由于链家限制,仅支持爬前100页数据,可使用GetHouseByCommunitylist。
    """
    # core.GetHouseByRegionlist(city, regionlist)
    """
    获取行政区在租房源信息
    """
    # core.GetRentByRegionlist(city, regionlist)  # 获取在租房子信息
    """
    获取行政区内小区信息,可以只运行一次即可。
    """
    # Init,scrapy celllist and insert database; could run only 1st time
    core.GetCommunityByRegionlist(city, regionlist)  # 根据行政区列表获取小区信息
    """
    根据小区来爬虫成交房源信息,返回communitylist里面所有成交房源信息。
    部分数据无法显示因为这些数据仅在链家app显示
    """

    communitylist = get_communitylist(city)
    print(communitylist)

    list = ['东荟城', '金色梦想']
    # core.GetHouseByCommunitylist(city, list)
    # core.GetSellByCommunitylist(city, list)    # 成交房源信息
Esempio n. 6
0
if __name__ == "__main__":
    ret = mysql_status()
    if ret != 0:
        print('mysql start failed.')
        sys.exit()

    regionlist = settings.REGIONLIST  # only pinyin support
    city = settings.CITY
    if args.initDatabase:
        model.database_init()  # create_tables: 执行一次即可

    # Init,scrapy celllist and insert database; could run only 1st time

    if args.updateCommunity:
        core.GetCommunityByRegionlist(city, regionlist)  # 获取小区列表写入表community

    communitylist = get_communitylist(city)  # Read celllist from database

    # for community in communitylist:
    #     logging.info("%s", community)

    if args.isDebug:
        # dump_db('ershoufang')
        # core.get_sell_percommunity(city, communitylist[0])
        # core.get_house_percommunity(city, communitylist[0])
        # core.get_community_perregion(city, 'chaoyang')
        # core.get_house_perregion(city, 'chaoyang')
        # core.get_rent_percommunity(city, 'chaoyang')
        sys.exit()
Esempio n. 7
0
import core
import model
import settings

def get_communitylist():
	res = []
	for community in model.Community.select():
		res.append(community.title)
	return res

if __name__=="__main__":
    regionlist = settings.REGIONLIST # only pinyin support
    model.database_init() # only run on the first time
    # ByRegionlist cant not get all data because linajie only display 100 pages
    # core.GetHouseByRegionlist(regionlist)
    # core.GetRentByRegionlist(regionlist)
    
    # Init,scrapy celllist and insert database; could run only 1st time
    core.GetCommunityByRegionlist(regionlist) 
    
    # Read celllist from database
    communitylist = get_communitylist()
    
    # history sell
    core.GetSellByCommunitylist(communitylist)
    
    # on sell
    core.GetHouseByCommunitylist(communitylist)

    # Rent 
    core.GetRentByCommunitylist(communitylist)