def run(self): src_file = os.path.join(WEB_SOURCES_CONFIG_DIR, str(self.sourcefile)) src_conf = Utils.read_file(src_file) job_file = os.path.join(JOBS_CONFIG_DIR, str(self.jobfile)) job_conf = Utils.read_file(job_file) handler = HandlersFactory.get_handler( XLSParser.handler_name(src_conf, job_conf)) service = XLSParser(src_conf, job_conf, [f.path for f in self.input()], WEB_DATA_PATH, handler) service.parse()
def output(self): src_file = os.path.join(WEB_SOURCES_CONFIG_DIR, str(self.sourcefile)) src_conf = Utils.read_file(src_file) job_file = os.path.join(JOBS_CONFIG_DIR, str(self.jobfile)) job_conf = Utils.read_file(job_file) return luigi.LocalTarget( XLSParser.path(src_conf, job_conf, WEB_DATA_PATH))
def test_kurk_parse_to_csv(self): srconf_path = os.path.join(WEB_SOURCES_CONFIG_DIR, 'web_statgov_kurk.json') jobconf_path = os.path.join(JOBS_CONFIG_DIR, 'to_csv.json') src_json = Utils.read_file(srconf_path) job_json = Utils.read_file(jobconf_path) download_handler = HandlersFactory.get_handler( Downloader.handler_name(src_json)) service = Downloader(src_json, download_handler) downloaded_file = service.download() parse_handler = HandlersFactory.get_handler( XLSParser.handler_name(src_json, job_json)) service = XLSParser(src_json, job_json, downloaded_file, self.data_path, parse_handler) csvfile = service.path(src_json, job_json, self.data_path) rows_cnt = service.parse() self.assertTrue(os.path.exists(csvfile)) self.assertGreater(rows_cnt, 0)