def run(self):
     src_file = os.path.join(WEB_SOURCES_CONFIG_DIR, str(self.sourcefile))
     src_conf = Utils.read_file(src_file)
     job_file = os.path.join(JOBS_CONFIG_DIR, str(self.jobfile))
     job_conf = Utils.read_file(job_file)
     handler = HandlersFactory.get_handler(
         Parser.handler_name(src_conf, job_conf))
     service = Parser(src_conf, job_conf, WEB_DATA_PATH, handler)
     service.parse()
Example #2
0
class Shorp:
    
    def __init__(self):
        self.content = ''
        self.parser = None
        self.interpreter = None
            
    def addAll(self, all):
        for i in all:
            self.add(i)
        return self
            
    def add(self, source):
        if os.path.isfile(source):
            fh = open(source, 'r')
            self.content = self.content + fh.read()
            fh.close()
        else:
            self.content = self.content + source
    
    def parse(self):
        if self.parser is None : self.parser = Parser()
        return self.parser.parse(self.content)
    
    def run(self):
        if self.interpreter is None: self.interpreter = SimpleInterpreter()
        root = self.parse()
        return self.interpreter.interpret(root)
        
 def output(self):
     src_file = os.path.join(WEB_SOURCES_CONFIG_DIR, str(self.sourcefile))
     src_conf = Utils.read_file(src_file)
     job_file = os.path.join(JOBS_CONFIG_DIR, str(self.jobfile))
     job_conf = Utils.read_file(job_file)
     return luigi.LocalTarget(Parser.path(src_conf, job_conf,
                                          WEB_DATA_PATH))
Example #4
0
 def test_address_parse_to_csv(self):
     srconf_path = os.path.join(WEB_SOURCES_CONFIG_DIR,
                                'web_datagov_addresses.json')
     jobconf_path = os.path.join(JOBS_CONFIG_DIR, 'to_csv.json')
     src_json = Utils.read_file(srconf_path)
     job_json = Utils.read_file(jobconf_path)
     parse_handler = HandlersFactory.get_handler(
         Parser.handler_name(src_json, job_json))
     service = Parser(src_json, job_json, self.data_path, parse_handler)
     csvfile = service.path(src_json, job_json, self.data_path)
     service.parse()
     self.assertTrue(os.path.exists(csvfile))
Example #5
0
 def parse(self):
     if self.parser is None : self.parser = Parser()
     return self.parser.parse(self.content)