Example #1
0
    def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name):
        data_frame = DataFrameFunction.get_data_frame_from_pkl(
            original_pkl_path)
        data_frame['company'] = cls.COMPANY_NAME

        # DateとTimeで分割されているので結合した項目を作る。
        DataFrameFunction.generate_data_time_field(data_frame)
        data_frame.set_index('date_time')

        # TOTAL算出 Total Supply Capacity
        data_frame[
            'total_supply_capacity'] = DataFrameFunction.get_total_supply_capacity(
                data_frame)

        # 後続で計算できないのでfloatに変換している。
        data_frame['solar_output_control'] = data_frame[
            'solar_output_control'].astype(str).str.replace(',', '')
        data_frame['solar_output_control'] = data_frame[
            'solar_output_control'].astype(str).str.replace('−', '0')
        data_frame['solar_output_control'] = data_frame[
            'solar_output_control'].astype(float)
        data_frame['wind_output_control'] = data_frame[
            'wind_output_control'].astype(str).str.replace(',', '')
        data_frame['wind_output_control'] = data_frame[
            'wind_output_control'].astype(str).str.replace('−', '0')
        data_frame['wind_output_control'] = data_frame[
            'wind_output_control'].astype(float)

        processed_pkl_path = FileFunction.get_processed_pkl_path(
            root_path, cls.COMPANY_NAME, pkl_file_name)
        FileFunction.create_pkl_file(processed_pkl_path, data_frame)

        return processed_pkl_path
Example #2
0
    def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name):
        data_frame = DataFrameFunction.get_data_frame_from_pkl(
            original_pkl_path)
        data_frame['company'] = cls.COMPANY_NAME

        # DateとTimeで分割されているので結合した項目を作る。
        DataFrameFunction.generate_data_time_field(data_frame)
        data_frame.set_index('date_time')

        # 2,500みたいなデータがあるので取り除く。
        data_frame['demand'] = data_frame['demand'].astype(str).str.replace(
            ',', '').astype(int)
        data_frame['total_supply_capacity'] = data_frame[
            'total_supply_capacity'].astype(str).str.replace(',',
                                                             '').astype(int)

        # 後続で計算できないのでfloatに変換している。
        data_frame['thermal'] = data_frame['thermal'].astype(str).str.replace(
            ',', '').astype(float)

        # 他の電力に合わせて万kwからMWhに揃える。
        DataFrameFunction.to_mwh(data_frame)

        processed_pkl_path = FileFunction.get_processed_pkl_path(
            root_path, cls.COMPANY_NAME, pkl_file_name)
        FileFunction.create_pkl_file(processed_pkl_path, data_frame)

        return processed_pkl_path
Example #3
0
    def check_download_page(cls, root_path, companyname, omit_list):
        json = FileFunction.get_param_json(root_path, companyname)
        html = RequestFunction.get_html(json['download_page_url'])
        FileFunction.create_current_html(root_path, companyname, html)
        check_result, message = cls.check_diff_current_and_prev_html(
            root_path, companyname, omit_list)

        return check_result, message
Example #4
0
    def __correct_ex_data(cls, root_path, pkl_file_name, url, reflesh):
        original_pkl_path = FileFunction.get_original_pkl_path(root_path, cls.COMPANY_NAME, pkl_file_name)

        if reflesh or not os.path.exists(original_pkl_path):
            decoded_data = FileFunction.get_decoded_data(url)
            data_frame = cls.__parse(decoded_data)
            FileFunction.create_pkl_file(original_pkl_path, data_frame)

        return original_pkl_path
Example #5
0
    def __correct_ex_data(cls, root_path, pkl_file_name, url, reflesh):
        original_pkl_path = FileFunction.get_original_pkl_path(root_path, cls.COMPANY_NAME, pkl_file_name)

        if reflesh or not os.path.exists(original_pkl_path):
            decoded_data = FileFunction.get_decoded_data(url)
            # 「2016/10/1  1:00」とか余計な半角が存在するデータがあるので加工する。
            tohokuepco_csv = cls.__get_tohokuepco_csv(decoded_data)
            data_frame = cls.__parse(tohokuepco_csv)
            FileFunction.create_pkl_file(original_pkl_path, data_frame)

        return original_pkl_path
Example #6
0
    def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name):
        data_frame = DataFrameFunction.get_data_frame_from_pkl(
            original_pkl_path)
        data_frame['company'] = cls.COMPANY_NAME

        # DateとTimeで分割されているので結合した項目を作る。
        DataFrameFunction.generate_data_time_field(data_frame)
        processed_pkl_path = FileFunction.get_processed_pkl_path(
            root_path, cls.COMPANY_NAME, pkl_file_name)
        FileFunction.create_pkl_file(processed_pkl_path, data_frame)

        return processed_pkl_path
Example #7
0
    def __correct_ex_data(cls, root_path, pkl_file_name, url, reflesh):
        original_pkl_path = FileFunction.get_original_pkl_path(root_path, cls.COMPANY_NAME, pkl_file_name)

        if reflesh or not os.path.exists(original_pkl_path):
            decoded_data = FileFunction.get_decoded_data(url)
            # okidenは、空行が入っていたりするので、csv読み込み前にデータ補正が必要。
            okiden_csv = cls.__get_okiden_csv(decoded_data)
            data_frame = cls.__parse(okiden_csv)
            data_frame.reset_index()
            data_frame.reset_index()
            FileFunction.create_pkl_file(original_pkl_path, data_frame)

        return original_pkl_path
Example #8
0
    def __correct_ex_data(cls, root_path, pkl_file_name, url, reflesh):
        original_pkl_path = FileFunction.get_original_pkl_path(
            root_path, cls.COMPANY_NAME, pkl_file_name)

        if reflesh or not os.path.exists(original_pkl_path):
            decoded_data = FileFunction.get_decoded_data(url)
            # 末尾に余計な「,」が入っていることがあるので除去する。
            # スキップすべき行が違うこともあるので対応する。
            rikuden_csv = cls.__get_rikuden_csv(decoded_data)
            data_frame = cls.__parse(rikuden_csv)
            data_frame.reset_index()
            FileFunction.create_pkl_file(original_pkl_path, data_frame)

        return original_pkl_path
Example #9
0
    def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name):
        data_frame = DataFrameFunction.get_data_frame_from_pkl(original_pkl_path)
        data_frame['company'] = cls.COMPANY_NAME

        # tohokuepcoは、日時で持っているのでTepcoに合わせて分割する。
        DataFrameFunction.create_date_and_time_from_datetime(data_frame)

        # Date型に変換しておく。
        data_frame['date_time'] = pandas.to_datetime(data_frame['date_time'], format='%Y/%m/%d %H:%M')

        # TOTAL算出 Total Supply Capacity
        data_frame['total_supply_capacity'] = DataFrameFunction.get_total_supply_capacity(data_frame)

        processed_pkl_path = FileFunction.get_processed_pkl_path(root_path, cls.COMPANY_NAME, pkl_file_name)
        FileFunction.create_pkl_file(processed_pkl_path, data_frame)

        return processed_pkl_path
Example #10
0
    def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name):
        data_frame = DataFrameFunction.get_data_frame_from_pkl(original_pkl_path)
        data_frame['company'] = cls.COMPANY_NAME

        # DateとTimeで分割されているので結合した項目を作る。
        DataFrameFunction.generate_data_time_field(data_frame)
        data_frame.set_index('date_time')

        # 沖縄にない電力項目は0で埋める。
        data_frame['nuclear'] = 0
        data_frame['geothermal'] = 0
        data_frame['pumping'] = 0
        data_frame['interconnection'] = 0

        processed_pkl_path = FileFunction.get_processed_pkl_path(root_path, cls.COMPANY_NAME, pkl_file_name)
        FileFunction.create_pkl_file(processed_pkl_path, data_frame)

        return processed_pkl_path
Example #11
0
    def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name):
        data_frame = DataFrameFunction.get_data_frame_from_pkl(
            original_pkl_path)
        data_frame['company'] = cls.COMPANY_NAME

        # DateとTimeで分割されているので結合した項目を作る。
        DataFrameFunction.generate_data_time_field(data_frame)
        data_frame.set_index('date_time')

        # TOTAL算出 Total Supply Capacity
        data_frame[
            'total_supply_capacity'] = DataFrameFunction.get_total_supply_capacity(
                data_frame)

        processed_pkl_path = FileFunction.get_processed_pkl_path(
            root_path, cls.COMPANY_NAME, pkl_file_name)
        FileFunction.create_pkl_file(processed_pkl_path, data_frame)

        return processed_pkl_path
Example #12
0
    def correct_data(cls, root_path, reflesh):
        check_result, message = cls.check_download_page(root_path)

        if check_result is False:
            return message, None

        json = FileFunction.get_param_json(root_path, cls.COMPANY_NAME)
        merged_pkl_path = KyudenService.correct_data(json['url'], root_path,
                                                     reflesh)
        count = QueryService.count(root_path, cls.COMPANY_NAME)

        return count, merged_pkl_path
Example #13
0
    def __correct_ex_data(cls, root_path, pkl_file_name, url, reflesh):
        original_pkl_path = FileFunction.get_original_pkl_path(
            root_path, cls.COMPANY_NAME, pkl_file_name)

        if reflesh or not os.path.exists(original_pkl_path):
            data_frame_from_xls = pandas.read_excel(
                url,
                header=None,
                index_col=None,
                skiprows=[0, 1, 2, 3, 4, 5, 6, 7, 8])
            # 最終行は不要なので削除する。
            data_frame_from_xls.drop(data_frame_from_xls.tail(1).index,
                                     inplace=True)
            # Excelから読み込みやすいフォーマットに加工する。
            yonden_csv = cls.__create_yonden_csv_from_xls(
                data_frame_from_xls.to_csv(index=False))
            data_frame = cls.__parse(yonden_csv)
            print(data_frame)
            FileFunction.create_pkl_file(original_pkl_path, data_frame)

        return original_pkl_path
Example #14
0
    def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name):
        data_frame = DataFrameFunction.get_data_frame_from_pkl(
            original_pkl_path)
        data_frame['company'] = cls.COMPANY_NAME

        # DateとTimeで分割されているので結合した項目を作る。
        DataFrameFunction.generate_data_time_field(data_frame)
        data_frame.set_index('date_time')

        # 後続で計算できないのでfloatに変換している。
        # 地熱がハイフンなので0扱いにする。
        data_frame['geothermal'] = data_frame['geothermal'].astype(
            str).str.replace('-', '0').astype(float)

        # 他の電力に合わせて万kwからMWhに揃える。
        DataFrameFunction.to_mwh(data_frame)

        processed_pkl_path = FileFunction.get_processed_pkl_path(
            root_path, cls.COMPANY_NAME, pkl_file_name)
        FileFunction.create_pkl_file(processed_pkl_path, data_frame)

        return processed_pkl_path
Example #15
0
    def check_diff_current_and_prev_html(cls, root_path, company, omit_list):
        current_html_path = FileFunction.get_current_html_path(
            root_path, company)
        with open(current_html_path) as current_html:
            current_html_lines = current_html.readlines()

        prev_html_path = FileFunction.get_prev_html_path(root_path, company)
        with open(prev_html_path) as prev_html:
            prev_html_lines = prev_html.readlines()

        diff_lines = []
        for diff_line in difflib.unified_diff(current_html_lines,
                                              prev_html_lines,
                                              fromfile=current_html_path,
                                              tofile=prev_html_path):

            diff_line = diff_line.replace(" ", "").replace(" ", "")
            if len(diff_line) == 0:
                continue

            omit_result = False
            for omit_word in omit_list:
                if omit_word in diff_line:
                    omit_result = True
                    break

            if omit_result is False:
                diff_lines.append(diff_line)

        if len(diff_lines) > 0:
            for diff_line in diff_lines:
                print(company)
                print(diff_line)
            return False, 'There is diff between current and prev.'

        return True, 'Current is the same as prev.'
Example #16
0
    def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name):
        data_frame = DataFrameFunction.get_data_frame_from_pkl(original_pkl_path)
        data_frame['company'] = cls.COMPANY_NAME

        # Kyudenは、日時で持っているのでTepcoに合わせて分割する。
        DataFrameFunction.create_date_and_time_from_datetime(data_frame)

        # 2,500みたいなデータがあるので取り除く。
        data_frame['demand'] = data_frame['demand'].astype(str).str.replace(',', '').astype(float)

        # 後続で計算できないのでfloatに変換している。
        data_frame['nuclear'] = data_frame['nuclear'].astype(str).str.replace(',', '').astype(float)
        data_frame['thermal'] = data_frame['thermal'].astype(str).str.replace(',', '').astype(float)
        data_frame['solar_output_control'] = data_frame['solar_output_control'].astype(str).str.replace('None', '0')
        data_frame['solar_output_control'] = data_frame['solar_output_control'].str.replace('nan', '0')
        data_frame['solar_output_control'] = data_frame['solar_output_control'].str.replace(',', '')
        data_frame['solar_output_control'] = data_frame['solar_output_control'].astype(float)
        data_frame['pumping'] = data_frame['pumping'].astype(str).str.replace('None', '0')
        data_frame['pumping'] = data_frame['pumping'].str.replace('nan', '0')
        data_frame['pumping'] = data_frame['pumping'].str.replace(',', '')
        data_frame['pumping'] = data_frame['pumping'].astype(float)
        data_frame['interconnection'] = data_frame['interconnection'].astype(str).str.replace('None', '0')
        data_frame['interconnection'] = data_frame['interconnection'].str.replace('nan', '0')
        data_frame['interconnection'] = data_frame['interconnection'].str.replace(',', '')
        data_frame['interconnection'] = data_frame['interconnection'].astype(float)

        # Date型に変換しておく。
        data_frame['date_time'] = pandas.to_datetime(data_frame['date_time'], format='%Y/%m/%d %H:%M')

        # TOTAL算出 Total Supply Capacity
        data_frame['total_supply_capacity'] = DataFrameFunction.get_total_supply_capacity(data_frame)

        processed_pkl_path = FileFunction.get_processed_pkl_path(root_path, cls.COMPANY_NAME, pkl_file_name)
        FileFunction.create_pkl_file(processed_pkl_path, data_frame)

        return processed_pkl_path
Example #17
0
    def correct_data(cls, urls, root_path, reflesh):
        processed_pkl_paths = []

        for url in urls:
            try:
                pkl_file_name = FileFunction.get_pkl_file_name(url)
                original_pkl_path = cls.__correct_ex_data(root_path, pkl_file_name, url, reflesh)
                processed_pkl_path = cls.__process_ex_data(original_pkl_path, root_path, pkl_file_name)
                processed_pkl_paths.append(processed_pkl_path)
            except Exception as e:
                print(f'{pkl_file_name} => {e}')
                raise e

        merged_pkl_path = DataFrameFunction.merge_ex_data(processed_pkl_paths, root_path, cls.COMPANY_NAME)
        return merged_pkl_path
Example #18
0
    def correct_data(cls, urls, root_path, reflesh):
        processed_pkl_paths = []
        number = cls.KEY_NUMBER

        for url in urls:
            try:
                target_url = f'{cls.ROOT_URL}{number}/{url}'
                pkl_file_name = FileFunction.get_pkl_file_name(target_url)
                original_pkl_path = cls.__correct_ex_data(root_path, pkl_file_name, target_url, reflesh)
                processed_pkl_path = cls.__process_ex_data(original_pkl_path, root_path, pkl_file_name)
                processed_pkl_paths.append(processed_pkl_path)
                number = number + 1
            except Exception as e:
                print(f'{pkl_file_name} => {e}')
                raise e

        merged_pkl_path = DataFrameFunction.merge_ex_data(processed_pkl_paths, root_path, cls.COMPANY_NAME)
        return merged_pkl_path
Example #19
0
    def __correct_ex_data(cls, root_path, pkl_file_name, url, reflesh):
        original_pkl_path = FileFunction.get_original_pkl_path(
            root_path, cls.COMPANY_NAME, pkl_file_name)

        if reflesh or not os.path.exists(original_pkl_path):
            if '.xls' in url:
                data_frame_from_xls = pandas.read_excel(url,
                                                        header=None,
                                                        index_col=None,
                                                        skiprows=[0, 1, 2, 3])
                hepco_csv = cls.__create_hepco_csv_from_xls(
                    data_frame_from_xls.to_csv(index=False))
                data_frame = cls.__parse_csv_from_xls(hepco_csv)
                FileFunction.create_pkl_file(original_pkl_path, data_frame)
            else:
                decoded_data = FileFunction.get_decoded_data(url)
                # hepcoは、日時周りのフォーマットが他と違うので、csv読み込み前にデータ補正が必要。
                hepco_csv = cls.__get_hepco_csv(decoded_data)
                data_frame = cls.__parse(hepco_csv)
                FileFunction.create_pkl_file(original_pkl_path, data_frame)

        return original_pkl_path