예제 #1
0
 def parse(self, response):
     # print(response.text)
     # print(response.status)
     # print(json.loads(response.text))
     # for collections in ["zhihu_paris","zhihu_car","zhihu_icu"]:
     mongo = MongoDBUtils("zhihu_paris")
     curInfo = mongo.distinctID("author.id")
     for uid in curInfo:
         if uid == "0":
             continue
         yield Request(self.start_url.format(uid), callback=self.parse_detail, dont_filter=True)
예제 #2
0
class Location:
    def __init__(self, collectionName):
        self.mongo = MongoDBUtils(collectionName)

    def analysis(self):
        curInfo = self.mongo.distinctID("author.id")
        location_list = []
        local_mongo = MongoDBUtils("zhihu_user")
        for uid in curInfo:
            if uid != "0":
                curUser = local_mongo.searchByDoc({"_id": uid})[0]
                # 数据库有数据的
                if curUser:
                    if len(curUser.get("location")) == 0:
                        continue
                    else:
                        # location_list.extend([data.get("name") for data in curUser.get("location")])
                        for data in curUser.get("location"):
                            if data.get("name") in city_province.keys():
                                location_list.append(
                                    city_province.get(data.get("name")))
                            elif data.get("name") + "市" in city_province.keys(
                            ):
                                location_list.append(
                                    city_province.get(data.get("name") + "市"))
                            # elif data.get("name")+"" in city_province.keys():
                            #     location_list.append(city_province.get(data.get("name")+""))
                            else:
                                # print(data.get("name"))
                                for reg in province_list:
                                    if reg in data.get("name"):
                                        location_list.append(reg + "")
                                location_list.append("其他")
                else:
                    # 数据库没有数据的记录下来
                    # with open('uid2.txt','a+',encoding='utf-8') as f:
                    #     f.write(uid + '\n')  # 加\n换行显示
                    pass

        location_dict = Counter(location_list)
        print(location_dict)
        location_data = []
        for k, v in dict(location_dict).items():
            location_data.append({"name": k, "value": v})
        return location_data