def run(self): src_file = os.path.join(WEB_SOURCES_CONFIG_DIR, str(self.sourcefile)) src_conf = Utils.read_file(src_file) job_file = os.path.join(JOBS_CONFIG_DIR, str(self.jobfile)) job_conf = Utils.read_file(job_file) handler = HandlersFactory.get_handler( Parser.handler_name(src_conf, job_conf)) service = Parser(src_conf, job_conf, WEB_DATA_PATH, handler) service.parse()
class Shorp: def __init__(self): self.content = '' self.parser = None self.interpreter = None def addAll(self, all): for i in all: self.add(i) return self def add(self, source): if os.path.isfile(source): fh = open(source, 'r') self.content = self.content + fh.read() fh.close() else: self.content = self.content + source def parse(self): if self.parser is None : self.parser = Parser() return self.parser.parse(self.content) def run(self): if self.interpreter is None: self.interpreter = SimpleInterpreter() root = self.parse() return self.interpreter.interpret(root)
def output(self): src_file = os.path.join(WEB_SOURCES_CONFIG_DIR, str(self.sourcefile)) src_conf = Utils.read_file(src_file) job_file = os.path.join(JOBS_CONFIG_DIR, str(self.jobfile)) job_conf = Utils.read_file(job_file) return luigi.LocalTarget(Parser.path(src_conf, job_conf, WEB_DATA_PATH))
def test_address_parse_to_csv(self): srconf_path = os.path.join(WEB_SOURCES_CONFIG_DIR, 'web_datagov_addresses.json') jobconf_path = os.path.join(JOBS_CONFIG_DIR, 'to_csv.json') src_json = Utils.read_file(srconf_path) job_json = Utils.read_file(jobconf_path) parse_handler = HandlersFactory.get_handler( Parser.handler_name(src_json, job_json)) service = Parser(src_json, job_json, self.data_path, parse_handler) csvfile = service.path(src_json, job_json, self.data_path) service.parse() self.assertTrue(os.path.exists(csvfile))
def parse(self): if self.parser is None : self.parser = Parser() return self.parser.parse(self.content)