def find_row(self, p_date: str = "", p_id: int = -1): if p_id > 0: self.__m_project_id = p_id self.__m_p_date = StrTool.get_the_date(p_date) self.__job_info.find_row( str(self.__m_project_id), self.__m_p_date.strftime("%Y-%m-%d")) # "2", "2099-12-30" print(self.__job_info.get_row()) if self.__job_info.has_row(): data_id = self.__job_info.get_data_str("data_id") location_id = self.__job_info.get_data_str("location_id") if len(data_id) >= 0: self.__data_info.find_row(data_id) print(self.__data_info.get_row()) if len(location_id) >= 0: self.__location_info.find_row(location_id) print(self.__location_info.get_row()) if self.__job_info.has_row() and self.__data_info.has_row( ) and self.__location_info.has_row(): self.__is_ready = True else: self.__is_ready = False
client = Client(the_conf.hdfs_ip()) # "http://10.2.201.197:50070" a = MyHdfsFile.get_child(client, "/data/posflow/allinpay_utf8_zc") b = MyHdfsFile.get_child_file(client,"/data/posflow/allinpay_utf8_zc") c = MyHdfsFile.get_child_dir(client, "/data/posflow/allinpay_utf8_zc") # test # MyHdfsFile.delete(client, "/data/posflow/allinpay_utf8_zc", "*agt_cpy*") # test if the_conf.is_test(): day_str = the_conf.test_date() days = 9 else: day_str = StrTool.get_param_str(1, "") days = StrTool.get_param_int(2, 1) day_str = StrTool.get_the_date_str(day_str) date1 = StrTool.get_the_date(day_str) for i in range(0, days): delta = days - i - 1 date2 = date1 - datetime.timedelta(days=delta) day_str2 = date2.strftime("%Y%m%d") run_sftp_file(the_conf, day_str2) run_unzip_file(the_conf, day_str2) run_conv_file_local_to_hdfs(the_conf, day_str2) run_hive(the_conf, the_date=day_str2) run_remove_files(the_conf, day_str2, 0) print("ok")
# client = Client(the_conf.hdfs_ip()) # "http://10.2.201.197:50070" if m_is_test: m_project_id = 3 start_date_str = "20180901" m_days = 9 m_project_id = StrTool.get_param_int(1, 3) start_date_str = StrTool.get_the_date_str(StrTool.get_param_str(2, "")) m_days = StrTool.get_param_int(3, 1) else: m_project_id = StrTool.get_param_int(1, 3) start_date_str = StrTool.get_the_date_str(StrTool.get_param_str(2, "")) m_days = StrTool.get_param_int(3, 1) start_date = StrTool.get_the_date(start_date_str) the_conf = ConfigData(m_project_id, StrTool.get_the_date_str_by_date(start_date, 0, 10), p_is_test=m_is_test) for i in range(0, m_days): delta = m_days - i - 1 # 不多加1天,20190108处理的是20190108文件夹 # delta = days - i - 1 + 1 # 多加1天,是因为20190108处理的是20190107文件夹 # 收银宝文件没有多 delta 1天 # 1、20190110 191 2019-1-10 2018-7-4 # 2、1 20180703 191 2018-7-3 2017-12-25 (2019-1-24 晚上) # 保理流水 # 2、之前是到 20180702, 是先191天,之后手工多补了一天 20180702 # 3、main3.py 7 20180702 70, 处理 20180702-20180423 (2019-1-25中午) # 4、main3.py 8 20180702 70, 处理 20180702-20180423 (2019-1-25中午) # 修改路径 remote_path_ftp_7="/ftpdata/thblposloan/posflow2/"