Ejemplo n.º 1
0
def process_fangtianxia_parcel_raw_data(city_name):
    read_file_path = get_raw_data_file_path(
        city_name, CrawlerDataType.RAW_DATA.value,
        CrawlerSourceName.FANGTIANXIA.value, CrawlerDataLabel.PARCEL.value)
    save_file_path = get_raw_data_file_path(
        city_name, CrawlerDataType.READY_DATA.value,
        CrawlerSourceName.FANGTIANXIA.value, CrawlerDataLabel.PARCEL.value)
    raw_data = pd.read_table(read_file_path, error_bad_lines=False)
    ready_data = process_raw_data_to_ready(raw_data)
    ready_data.to_csv(path_or_buf=save_file_path, sep='\t', encoding='utf-8')
def process_lianjia_new_community_raw_data(city_name):
    read_file_path = get_raw_data_file_path(
        city_name, CrawlerDataType.RAW_DATA.value,
        CrawlerSourceName.LIANJIA.value, CrawlerDataLabel.NEW_COMMUNITY.value)
    save_file_path = get_raw_data_file_path(
        city_name, CrawlerDataType.READY_DATA.value,
        CrawlerSourceName.LIANJIA.value, CrawlerDataLabel.NEW_COMMUNITY.value)
    raw_data = pd.read_table(read_file_path, error_bad_lines=False)
    ready_data = process_new_community_raw_data_to_ready(raw_data)
    ready_data.to_csv(path_or_buf=save_file_path, sep='\t', encoding='utf-8')
Ejemplo n.º 3
0
def process_baidu_poi_raw_data(city_name):
    read_file_path = get_raw_data_file_path(city_name,
                                            CrawlerDataType.RAW_DATA.value,
                                            CrawlerSourceName.BAIDU.value,
                                            CrawlerDataLabel.BAIDU_POI.value)
    save_file_path = get_raw_data_file_path(city_name,
                                            CrawlerDataType.READY_DATA.value,
                                            CrawlerSourceName.BAIDU.value,
                                            CrawlerDataLabel.BAIDU_POI.value)
    raw_data = pd.read_table(read_file_path, error_bad_lines=False)
    ready_data = process_raw_data_to_ready(raw_data)
    ready_data.to_csv(path_or_buf=save_file_path, sep='\t', encoding='utf-8')
def process_lianjia_second_hand_community_raw_data(city_name):
    read_file_path = get_raw_data_file_path(city_name,
                                            CrawlerDataType.RAW_DATA.value,
                                            CrawlerSourceName.LIANJIA.value,
                                            CrawlerDataLabel.SECOND_HAND_COMMUNITY.value)
    save_file_path = get_raw_data_file_path(city_name,
                                            CrawlerDataType.READY_DATA.value,
                                            CrawlerSourceName.LIANJIA.value,
                                            CrawlerDataLabel.SECOND_HAND_COMMUNITY.value)
    raw_data = pd.read_table(read_file_path, error_bad_lines=False)
    ready_data = process_second_hand_community_raw_data_to_ready(raw_data)
    ready_data.to_csv(path_or_buf=save_file_path, sep='\t', encoding='utf-8')
def process_fangtianxia_parcel_raw_data(city_name):
    read_file_path = get_raw_data_file_path(city_name,
                                            CrawlerDataType.RAW_DATA.value,
                                            CrawlerSourceName.FANGTIANXIA.value,
                                            CrawlerDataLabel.PARCEL.value)
    save_file_path = get_raw_data_file_path(city_name,
                                            CrawlerDataType.READY_DATA.value,
                                            CrawlerSourceName.FANGTIANXIA.value,
                                            CrawlerDataLabel.PARCEL.value)
    raw_data = pd.read_table(read_file_path, error_bad_lines=False)
    ready_data = process_raw_data_to_ready(raw_data)
    ready_data.to_csv(path_or_buf=save_file_path, sep='\t', encoding='utf-8')
Ejemplo n.º 6
0
def process_anjuke_new_community_raw_data(city_name):
    read_file_path = get_raw_data_file_path(
        city_name, CrawlerDataType.RAW_DATA.value,
        CrawlerSourceName.ANJUKE.value, CrawlerDataLabel.NEW_COMMUNITY.value)
    raw_data = pd.read_table(read_file_path, error_bad_lines=False)
    ready_data = process_new_community_raw_data_to_ready(raw_data)
    return ready_data
Ejemplo n.º 7
0
 def write_second_community_raw_data_in_rect_to_file(self, raw_data_list):
     write_file_path = get_raw_data_file_path(
         self.city_name, CrawlerDataType.RAW_DATA.value,
         CrawlerSourceName.ANJUKE.value,
         CrawlerDataLabel.SECOND_HAND_COMMUNITY.value)
     self.write_to_file(ANJUKE_SECOND_COMMUNITY_RAW_DATA_HEADER_LIST,
                        write_file_path, raw_data_list)
Ejemplo n.º 8
0
 def write_second_community_raw_data_in_rect_to_file(self, raw_data_list):
     write_file_path = get_raw_data_file_path(self.city_name,
                                              CrawlerDataType.RAW_DATA.value,
                                              CrawlerSourceName.LIANJIA.value,
                                              CrawlerDataLabel.SECOND_HAND_COMMUNITY.value)
     header_list = LIANJIA_SPECIFIC_SECOND_COMMUNITY_READY_DATA_HEADER_LIST if self.city_name in LIANJIA_SPECIFIC_SECOND_HAND_COMMUNITY_CITY_NAME_LIST else LIANJIA_SECOND_COMMUNITY_RAW_DATA_HEADER_LIST
     self.write_to_file(header_list, write_file_path, raw_data_list)
Ejemplo n.º 9
0
    def crawl_lianjia_new_community_raw_data(self):
        data_dict_list_for_new_community = []
        city_url = self.get_city_url_for_lianjia()
        if self.city_name in LIANJIA_SPECIFIC_NEW_COMMUNITY_CITY_NAME_LIST:
            community_data = self.get_specific_lianjia_new_community_data_with_url(
                city_url)
            community_data_list = community_data
            self.new_community_data_num = len(community_data_list)
            for community in community_data_list:
                data_dict_list_for_new_community.append(community)
        else:
            community_data = self.get_lianjia_new_community_data_with_url(
                city_url)
            community_data_list = community_data.values()
            for community_list in community_data_list:
                self.new_community_data_num += len(community_list)
                for community in community_list:
                    data_dict_list_for_new_community.append(community)

        file_path = get_raw_data_file_path(
            self.city_name, CrawlerDataType.RAW_DATA.value,
            CrawlerSourceName.LIANJIA.value,
            CrawlerDataLabel.NEW_COMMUNITY.value)
        save_raw_data_in_tsv_file(file_path, data_dict_list_for_new_community)
        self.logger.info(
            'city : {} ][gross lianjia new community data : {}'.format(
                self.city_name, self.new_community_data_num))
Ejemplo n.º 10
0
 def write_second_community_raw_data_in_rect_to_file(self, raw_data_list):
     write_file_path = get_raw_data_file_path(
         self.city_name, CrawlerDataType.RAW_DATA.value,
         CrawlerSourceName.LIANJIA.value,
         CrawlerDataLabel.SECOND_HAND_COMMUNITY.value)
     header_list = LIANJIA_SPECIFIC_SECOND_COMMUNITY_READY_DATA_HEADER_LIST if self.city_name in LIANJIA_SPECIFIC_SECOND_HAND_COMMUNITY_CITY_NAME_LIST else LIANJIA_SECOND_COMMUNITY_RAW_DATA_HEADER_LIST
     self.write_to_file(header_list, write_file_path, raw_data_list)
Ejemplo n.º 11
0
def format_anjuke_second_hand_community_raw_data(city_name):
    read_file_path = get_raw_data_file_path(city_name,
                                            CrawlerDataType.RAW_DATA.value,
                                            CrawlerSourceName.ANJUKE.value,
                                            CrawlerDataLabel.SECOND_HAND_COMMUNITY.value)
    raw_data = pd.read_table(read_file_path, error_bad_lines=False)
    formed_raw_data = format_second_community_raw_data(raw_data)
    return formed_raw_data
Ejemplo n.º 12
0
def process_anjuke_new_community_raw_data(city_name):
    read_file_path = get_raw_data_file_path(city_name,
                                            CrawlerDataType.RAW_DATA.value,
                                            CrawlerSourceName.ANJUKE.value,
                                            CrawlerDataLabel.NEW_COMMUNITY.value)
    raw_data = pd.read_table(read_file_path, error_bad_lines=False)
    ready_data = process_new_community_raw_data_to_ready(raw_data)
    return ready_data
Ejemplo n.º 13
0
    def crawl_lianjia_new_community_raw_data(self):
        data_dict_list_for_new_community = []
        city_url = self.get_city_url_for_lianjia()
        if self.city_name in LIANJIA_SPECIFIC_NEW_COMMUNITY_CITY_NAME_LIST:
            community_data = self.get_specific_lianjia_new_community_data_with_url(city_url)
            community_data_list = community_data
            self.new_community_data_num = len(community_data_list)
            for community in community_data_list:
                data_dict_list_for_new_community.append(community)
        else:
            community_data = self.get_lianjia_new_community_data_with_url(city_url)
            community_data_list = community_data.values()
            for community_list in community_data_list:
                self.new_community_data_num += len(community_list)
                for community in community_list:
                    data_dict_list_for_new_community.append(community)

        file_path = get_raw_data_file_path(self.city_name,
                                           CrawlerDataType.RAW_DATA.value,
                                           CrawlerSourceName.LIANJIA.value,
                                           CrawlerDataLabel.NEW_COMMUNITY.value)
        save_raw_data_in_tsv_file(file_path, data_dict_list_for_new_community)
        self.logger.info('city : {} ][gross lianjia new community data : {}'.format(self.city_name,
                                                                                   self.new_community_data_num))
Ejemplo n.º 14
0
 def write_parcel_raw_data_in_rect_to_file(self, raw_data_list):
     write_file_path = get_raw_data_file_path(self.city_name,
                                              CrawlerDataType.RAW_DATA.value,
                                              CrawlerSourceName.FANGTIANXIA.value,
                                              CrawlerDataLabel.PARCEL.value)
     self.write_to_file(FANGTIANXIA_PARCEL_RAW_DATA_HEADER_LIST, write_file_path, raw_data_list)
Ejemplo n.º 15
0
 def write_poi_raw_data_in_rect_to_file(self, raw_data_list):
     write_file_path = get_raw_data_file_path(self.city_name,
                                              CrawlerDataType.RAW_DATA.value,
                                              CrawlerSourceName.BAIDU.value,
                                              CrawlerDataLabel.BAIDU_POI.value)
     self.write_to_file(BAIDU_POI_RAW_DATA_HEADER_LIST, write_file_path, raw_data_list)
Ejemplo n.º 16
0
 def write_second_community_raw_data_in_rect_to_file(self, raw_data_list):
     write_file_path = get_raw_data_file_path(self.city_name,
                                              CrawlerDataType.RAW_DATA.value,
                                              CrawlerSourceName.ANJUKE.value,
                                              CrawlerDataLabel.SECOND_HAND_COMMUNITY.value)
     self.write_to_file(ANJUKE_SECOND_COMMUNITY_RAW_DATA_HEADER_LIST, write_file_path, raw_data_list)