Exemple #1
0
    def find_matching_data(self,
                           data=None,
                           file_name=None,
                           save_data=True,
                           max_iterations=9999):
        if data == None:
            data = open_json_file(self.combined_data_file_name)

        n = 0
        for item in data:
            print("***\nFinding match for: {}, id: {}".format(
                data[item]["01.main_data"]["name"], item))
            if self.cond_set(item=data[item], source="bj",
                             type="lease") == True:
                print("requirements test passed")

                self.matching_procedure(operand=data[item],
                                        data=data,
                                        target_source="rm")
                self.matching_procedure(operand=data[item],
                                        data=data,
                                        target_source="oc")
                self.matching_procedure(operand=data[item],
                                        data=data,
                                        target_source="zhand")

                self.matching_procedure(operand=data[item],
                                        data=data,
                                        target_source="rm",
                                        match_method="address")
                self.matching_procedure(operand=data[item],
                                        data=data,
                                        target_source="oc",
                                        match_method="address")
                self.matching_procedure(operand=data[item],
                                        data=data,
                                        target_source="zhand",
                                        match_method="address")

            else:
                print("requirements not met")
            n += 1
            if n >= max_iterations:
                break

        if save_data == True:
            if file_name == None:
                file_name = self.combined_data_file_name
            save_json_file(file_name=file_name, content=data)
Exemple #2
0
    def merge_data(self, input_file_name=None, outut_file_name=None, save_data=True, max_iterations=9999):
        if input_file_name == None:
            raise Exception("No input file")

        if outut_file_name == None:
            outut_file_name = self.merged_data_output_file

        data = open_json_file(input_file_name)

        output = self.merging_function(data, max_iterations=max_iterations)

        if save_data == True:
            save_json_file(file_name=outut_file_name, content=output)

        return output
Exemple #3
0
    def combine_data(self, file_name=None, save_data=True):
        if file_name == None:
            file_name = self.combined_data_file_name

        output = {}

        for file in [f for f in listdir("datasets")]:
            if "st2" in file:
                print(file)
                file_data = open_json_file("datasets/{}".format(file))
                for item in file_data:
                    output[item] = file_data[item]

        if save_data == True:
            save_json_file(file_name=file_name, content=output)

        return output
Exemple #4
0
    def parse_by_links(self,
                       urls,
                       output_file_name,
                       max_iterations=9999,
                       save_data=True):
        # parses through all elements
        output = []

        n = 0
        for e in urls:
            e = self.bug_fixer(url=e, set=self.name_of_set)
            output.append(self.fetch_raw_data(fetch_soup(e), url=e))

            n += 1
            if n == max_iterations:
                break

        if save_data == True:
            save_json_file(file_name=output_file_name, content=output)
Exemple #5
0
    def bug_fixing(self,
                   input_file_name=None,
                   outut_file_name=None,
                   save_data=True,
                   max_iterations=9999):
        if input_file_name == None:
            raise Exception("No input file")

        if outut_file_name == None:
            outut_file_name = self.bug_fixed_data_output_file

        data = open_json_file(input_file_name)

        data = self.fixing_completion_date_bug(data,
                                               max_iterations=max_iterations)
        data = self.fixing_parking_ratio_bug(data,
                                             max_iterations=max_iterations)
        data = self.fixing_address_bug(data, max_iterations=max_iterations)

        if save_data == True:
            save_json_file(file_name=outut_file_name, content=data)

        return data
Exemple #6
0
    def restruct_data(self,
                      raw_data,
                      set,
                      file_name=None,
                      max_iterations=9999,
                      save_data=True):
        output = {}

        n = 1

        for e in raw_data:
            if e is not None:
                item = deepcopy(self.item_pattern)
                # item = {}

                id = self.set_id(set=set, n=n)

                # item["01.main_data"] = {}
                item["01.main_data"]["name"] = self.s_01_name(e, set)
                item["01.main_data"]["type"] = self.s_01_type(e, set)
                item["01.main_data"]["source"] = self.s_01_source(e, set)
                item["01.main_data"]["id"] = id

                # item["02.location_details"] = {}
                item["02.location_details"]["city"] = self.s_02_city(e, set)
                item["02.location_details"]["district"] = self.s_02_district(
                    e, set)
                item["02.location_details"]["address"] = self.s_02_address(
                    e, set)

                # item["03.offer_details"] = {}
                item["03.offer_details"]["av_office"] = self.s_03_av_office(
                    e, set)
                item["03.offer_details"][
                    "av_office_vol"] = self.s_03_av_office_vol(e, set)
                item["03.offer_details"][
                    "rent_office"] = self.s_03_rent_office(e, set)
                item["03.offer_details"][
                    "rent_retail"] = self.s_03_rent_retail(e, set)
                item["03.offer_details"][
                    "rent_warehouse"] = self.s_03_rent_warehouse(e, set)
                item["03.offer_details"][
                    "service_charge"] = self.s_03_service_charge(e, set)
                item["03.offer_details"][
                    "cost_parking_surface"] = self.s_03_cost_parking_surface(
                        e, set)
                item["03.offer_details"][
                    "cost_parking_underground"] = self.s_03_cost_parking_underground(
                        e, set)
                item["03.offer_details"][
                    "min_space_to_let"] = self.s_03_min_space_to_let(e, set)
                item["03.offer_details"]["min_lease"] = self.s_03_min_lease(
                    e, set)
                item["03.offer_details"][
                    "add_on_factor"] = self.s_03_add_on_factor(e, set)

                # item["04.building_details"] = {}
                item["04.building_details"][
                    "building_status"] = self.s_04_building_status(e, set)
                item["04.building_details"][
                    "building_class"] = self.s_04_building_class(e, set)
                item["04.building_details"][
                    "total_net_space"] = self.s_04_total_net_space(e, set)
                item["04.building_details"][
                    "total_gross_space"] = self.s_04_total_gross_space(e, set)
                item["04.building_details"][
                    "completion_date"] = self.s_04_completion_date(e, set)
                item["04.building_details"][
                    "ground_floors"] = self.s_04_ground_floors(e, set)
                item["04.building_details"][
                    "underground_floors"] = self.s_04_underground_floors(
                        e, set)
                item["04.building_details"][
                    "floor_plate"] = self.s_04_floor_plate(e, set)
                item["04.building_details"][
                    "no_surface_parking"] = self.s_04_no_surface_parking(
                        e, set)
                item["04.building_details"][
                    "no_underground_parking"] = self.s_04_no_underground_parking(
                        e, set)
                item["04.building_details"][
                    "parking_ratio"] = self.s_04_parking_ratio(e, set)
                item["04.building_details"][
                    "building_certification"] = self.s_04_building_certification(
                        e, set)

                # item["05.fitout_standard"] = {}
                item["05.fitout_standard"][
                    "sprinklers"] = self.s_05_sprinklers(e, set)
                item["05.fitout_standard"][
                    "access_control"] = self.s_05_access_control(e, set)
                item["05.fitout_standard"][
                    "computer_cabling"] = self.s_05_computer_cabling(e, set)
                item["05.fitout_standard"][
                    "switchboard"] = self.s_05_switchboard(e, set)
                item["05.fitout_standard"][
                    "smoke_detectors"] = self.s_05_smoke_detectors(e, set)
                item["05.fitout_standard"][
                    "suspended_ceiling"] = self.s_05_suspended_ceiling(e, set)
                item["05.fitout_standard"][
                    "openable_windows"] = self.s_05_openable_windows(e, set)
                item["05.fitout_standard"][
                    "partition_walls"] = self.s_05_partition_walls(e, set)
                item["05.fitout_standard"][
                    "backup_power_supply"] = self.s_05_backup_power_supply(
                        e, set)
                item["05.fitout_standard"][
                    "telephone_cabling"] = self.s_05_telephone_cabling(e, set)
                item["05.fitout_standard"][
                    "power_cabling"] = self.s_05_power_cabling(e, set)
                item["05.fitout_standard"][
                    "air_conditioning"] = self.s_05_air_conditioning(e, set)
                item["05.fitout_standard"][
                    "raised_floor"] = self.s_05_raised_floor(e, set)
                item["05.fitout_standard"]["carpeting"] = self.s_05_carpeting(
                    e, set)
                item["05.fitout_standard"][
                    "fibre_optic_connections"] = self.s_05_fibre_optic_connections(
                        e, set)
                item["05.fitout_standard"]["BMS"] = self.s_05_BMS(e, set)

                translate_dict = {
                    "sprinklers": "sprinklers",
                    "access control": "access_control",
                    "computer cabling": "computer_cabling",
                    "switchboard": "switchboard",
                    "smoke/heat detectors": "smoke_detectors",
                    "suspended ceiling": "suspended_ceiling",
                    "openable windows": "openable_windows",
                    "partition walls": "partition_walls",
                    "backup power supply": "backup_power_supply",
                    "telephone cabling": "telephone_cabling",
                    "power cabling": "power_cabling",
                    "air-conditioning": "air_conditioning",
                    "raised floor": "raised_floor",
                    "carpeting": "carpeting",
                    "fibre optic connection": "fibre_optic_connections",
                    "BMS": "BMS"
                }

                if set == "rm":
                    for fitoout_e in e[-2]:
                        item["05.fitout_standard"][
                            translate_dict[fitoout_e]] = False

                # item["09.metadata"] = {}
                item["09.metadata"]["rm_id"] = self.s_09_rm_id(e, set)
                item["09.metadata"]["rm_url"] = self.s_09_rm_url(e, set)
                item["09.metadata"]["rm_pic_url"] = self.s_09_rm_pic_url(
                    e, set)
                item["09.metadata"]["bj_id"] = self.s_09_bj_id(e, set)
                item["09.metadata"]["bj_url"] = self.s_09_bj_url(e, set)
                item["09.metadata"]["bj_pic_url"] = self.s_09_bj_pic_url(
                    e, set)
                item["09.metadata"]["oc_id"] = self.s_09_oc_id(e, set)
                item["09.metadata"]["oc_url"] = self.s_09_oc_url(e, set)
                item["09.metadata"]["oc_pic_url"] = self.s_09_oc_pic_url(
                    e, set)
                item["09.metadata"]["add_info"] = self.s_09_add_info(e, set)

                output[id] = item

                n += 1
                if n == max_iterations:
                    break

        if save_data == True:
            if file_name == None:
                file_name = self.restruct_data_output_file
            save_json_file(file_name=file_name, content=output)

        return output
Exemple #7
0
    def data_to_json(self, function, input_file_name, output_file_name):
        data = self.open_csv(file_name=input_file_name)

        json_data = function(data)

        save_json_file(file_name=output_file_name, content=json_data)