def __correct_ex_data(cls, root_path, pkl_file_name, url, reflesh): original_pkl_path = FileFunction.get_original_pkl_path(root_path, cls.COMPANY_NAME, pkl_file_name) if reflesh or not os.path.exists(original_pkl_path): decoded_data = FileFunction.get_decoded_data(url) data_frame = cls.__parse(decoded_data) FileFunction.create_pkl_file(original_pkl_path, data_frame) return original_pkl_path
def __correct_ex_data(cls, root_path, pkl_file_name, url, reflesh): original_pkl_path = FileFunction.get_original_pkl_path(root_path, cls.COMPANY_NAME, pkl_file_name) if reflesh or not os.path.exists(original_pkl_path): decoded_data = FileFunction.get_decoded_data(url) # 「2016/10/1 1:00」とか余計な半角が存在するデータがあるので加工する。 tohokuepco_csv = cls.__get_tohokuepco_csv(decoded_data) data_frame = cls.__parse(tohokuepco_csv) FileFunction.create_pkl_file(original_pkl_path, data_frame) return original_pkl_path
def __correct_ex_data(cls, root_path, pkl_file_name, url, reflesh): original_pkl_path = FileFunction.get_original_pkl_path(root_path, cls.COMPANY_NAME, pkl_file_name) if reflesh or not os.path.exists(original_pkl_path): decoded_data = FileFunction.get_decoded_data(url) # okidenは、空行が入っていたりするので、csv読み込み前にデータ補正が必要。 okiden_csv = cls.__get_okiden_csv(decoded_data) data_frame = cls.__parse(okiden_csv) data_frame.reset_index() data_frame.reset_index() FileFunction.create_pkl_file(original_pkl_path, data_frame) return original_pkl_path
def __correct_ex_data(cls, root_path, pkl_file_name, url, reflesh): original_pkl_path = FileFunction.get_original_pkl_path( root_path, cls.COMPANY_NAME, pkl_file_name) if reflesh or not os.path.exists(original_pkl_path): decoded_data = FileFunction.get_decoded_data(url) # 末尾に余計な「,」が入っていることがあるので除去する。 # スキップすべき行が違うこともあるので対応する。 rikuden_csv = cls.__get_rikuden_csv(decoded_data) data_frame = cls.__parse(rikuden_csv) data_frame.reset_index() FileFunction.create_pkl_file(original_pkl_path, data_frame) return original_pkl_path
def __correct_ex_data(cls, root_path, pkl_file_name, url, reflesh): original_pkl_path = FileFunction.get_original_pkl_path( root_path, cls.COMPANY_NAME, pkl_file_name) if reflesh or not os.path.exists(original_pkl_path): if '.xls' in url: data_frame_from_xls = pandas.read_excel(url, header=None, index_col=None, skiprows=[0, 1, 2, 3]) hepco_csv = cls.__create_hepco_csv_from_xls( data_frame_from_xls.to_csv(index=False)) data_frame = cls.__parse_csv_from_xls(hepco_csv) FileFunction.create_pkl_file(original_pkl_path, data_frame) else: decoded_data = FileFunction.get_decoded_data(url) # hepcoは、日時周りのフォーマットが他と違うので、csv読み込み前にデータ補正が必要。 hepco_csv = cls.__get_hepco_csv(decoded_data) data_frame = cls.__parse(hepco_csv) FileFunction.create_pkl_file(original_pkl_path, data_frame) return original_pkl_path