Exemple #1
0
def init_db():
    config = global_obj.get("config")
    db_data = config["db"]
    obj = mongo.CMongodbManager("jiucai", db_data["addr"], db_data["port"],
                                db_data["user"], db_data["password"])
    global_obj.set("dbobj", obj)
    init_db_index()
Exemple #2
0
def init_task():
    config = global_obj.get("config")
    abortfile = None
    if "abort" in config:
        abortfile = config["abort"]
    timetaskobj = CTaskTimer(abortfile)
    global_obj.set("task_timer", timetaskobj)
Exemple #3
0
def save_excel(data_list, collect=True):
    import excel_tool

    obj_list = []
    head_list = [
        "小区", "id", "价格", "均价", "建筑面积", "套内面积", "配备电梯", "挂牌时间", "房屋户型", "所在楼层",
        "户型结构", "建筑类型", "房屋朝向", "建筑结构", "装修情况", "梯户比例", "交易权属", "上次交易", "房屋用途",
        "房屋年限", "产权所属", "抵押信息", "房本备件"
    ]
    config = global_obj.get("config")["beike"]
    file = DATA_PATH + config.get("output", "结果")
    all_list = []
    for data in data_list:
        save_data = []
        for _, house in data["house_data"].items():

            l = [
                data["name"] if s == "小区" else house.get(s, "")
                for s in head_list
            ]
            save_data.append(l)
            if collect:
                all_list.append(l)

        obj_list.append(
            excel_tool.CSheetObject("%s_%s" % (data["name"], data["id"]),
                                    head_list, save_data))
    if collect:
        obj = excel_tool.CSheetObject("汇总", head_list, all_list)
        obj_list.insert(0, obj)
    excel_tool.save_excel(file, obj_list)
Exemple #4
0
def init_mail():
    config = global_obj.get("config")
    mail_data = config["mail"]
    obj = mail.CMailBox(mail_data["user"], mail_data["password"],
                        mail_data["host"])
    obj.SetSender(mail_data["user"])
    for name in mail_data["to"]:
        obj.SetReceive(name)
    global_obj.set("mail", obj)
Exemple #5
0
def main_task(config_file):
    main.init_base(config_file)
    beike_db.init_db()
    init_mail()
    init_task()
    spider_beike.init()
    log.Sys("初始化完成")
    add_task()
    taskobj = global_obj.get("task_timer")
    taskobj.RunForever()
Exemple #6
0
def add_task():
    def start_task(tobj):
        spider_beike.beike_task()

    task_timer = global_obj.get("task_timer")
    time1 = CTimeTrigger(CTimeTrigger.TDay, "21:00:00")
    taskobj1 = CTask("spider_beike",
                     time1,
                     start_task,
                     run_type=CTask.TForever)
    task_timer.AddTask(taskobj1)
Exemple #7
0
def start_community():
    '''
    使用多线程爬取小区
    '''
    beike_conf = global_obj.get("config")["beike"]
    task_list = []
    for data in beike_conf["spider_list"]:
        cityName = data["city"]
        if "all" in data:
            community_list = get_all_community(cityName)
        else:
            community_list = data["community"]
        filterWord = None
        if "filter" in data:
            filterWord = data["filter"]
        for cName in community_list:
            task_list.append((
                cityName,
                cName,
                filterWord,
            ))

    task2_list = []
    data_list = []

    def _get_community_info(threadobj, cityName, cName, filterWord):
        result_list = get_community_info(cityName, cName, filterWord)
        data_list.extend(result_list.values())
        for cid, data in result_list.items():
            for url in data["house_url_list"]:
                task2_list.append((url, data["house_data"]))
            del data["house_url_list"]

    thread_tool.start_thread(_get_community_info, task_list, 5)
    log.Info("爬取小区信息完毕", len(task_list), len(task2_list))
    global g_count
    g_count = 0

    def _get_house_info(tobj, url, house_data):
        get_house_info(url, house_data)

    log.Info("开始爬取所有信息", len(task2_list))
    thread_tool.start_thread(_get_house_info, task2_list, 10)
    log.Info("爬取所有信息完成")
    return data_list
Exemple #8
0
def send_diff_mail(diff_list):
    if len(diff_list) == 0:
        log.Info("no beike diff")
        return
    htmobj = html.CHtml("房奴调研:")

    def set_dff_house(new, old):
        head_list = new.keys()
        tbl_list = []
        t2 = []
        for key in head_list:
            v1 = str(new.get(key, "NULL"))
            if v1 != str(old.get(key, "NULL")):
                t2.append(htmobj.Font(v1, "red"))
            else:
                t2.append(v1)
        tbl_list.append(t2)
        tbl_list.append([str(old.get(key, "NULL")) for key in head_list])
        htmobj.AddTable(tbl_list, head_list)

    for data in diff_list:
        htmobj.AddLine("=" * 30)
        htmobj.AddLine("小区<%s>信息发生变化" % (data["name"]))
        if len(data["new"]) > 0:
            htmobj.AddLine("新增房源:")
            htmobj.AddDict2Table(data["new"])
        if len(data["del"]) > 0:
            htmobj.AddLine("有房源被删除:")
            htmobj.AddDict2Table(data["del"])
        if len(data["diff"]) > 0:
            htmobj.AddLine("房源信息发生变化:")
            for v in data["diff"]:
                v1 = v[0]
                v2 = v[1]
                htmobj.AddLine("-" * 30)
                set_dff_house(v1, v2)
                htmobj.AddLine("+" * 30)
        htmobj.AddLine("*" * 30)
    html_text = htmobj.GetHtml()
    mailobj = global_obj.get("mail")
    message = mailobj.HtmlMailMessage()
    if message.SendMessage("房奴调研", html_text):
        log.Info("send beike mail done")
Exemple #9
0
def log_obj():
    return global_obj.get("logger")
Exemple #10
0
def save_xiaoqu(cid, data):
    dbobj = global_obj.get("dbobj")
    col = dbobj.Collection(colname)
    col.update({"id": cid}, data, upsert=True)
Exemple #11
0
def load_xiaoqu(cid):
    dbobj = global_obj.get("dbobj")
    col = dbobj.Collection(colname)
    ret = col.find_one({"id": cid}, {"_id": 0})
    return ret
Exemple #12
0
def init_db_index():
    dbobj = global_obj.get("dbobj")
    dbobj.CreateIndex(colname, "id")