def __init__(self, configdict, section, produces): Input.__init__(self, configdict, section, produces) # Create the list of files to be used as input self.file_list = Util.make_file_list(self.file_path, None, self.filename_pattern, self.depth_search) log.info("file_list=%s" % str(self.file_list)) if not len(self.file_list): raise Exception('File list is empty!!') self.file_list_done = []
def __init__(self, configdict, section, produces): Input.__init__(self, configdict, section, produces) # Create the list of files to be used as input self.file_list = Util.make_file_list(self.file_path, None, self.filename_pattern, self.depth_search) log.info("file_list=%s" % str(self.file_list)) if not len(self.file_list): raise Exception('File list is empty!!') self.cur_file_path = None self.file_list_done = []
def __init__(self, configdict, section): Input.__init__(self, configdict, section, produces=FORMAT.etree_doc) self.cur_feature_blob = None self.rowcount = 0 # http://www.mkyong.com/regular-expressions/how-to-extract-html-links-with-regular-expression/ self.regex_xlink_href = re.compile("\\s*(?i)xlink:href\\s*=\\s*(\"#([^\"]*\")|'#[^']*'|(#[^'\">\\s]+))") self.db = None self.xlink_db = None self.buffer = None self.feature_count = 0 # Reusable XML parser self.xml_parser = etree.XMLParser(remove_blank_text=True)
def __init__(self, configdict, section, produces): Input.__init__(self, configdict, section, produces) # path to file or files: can be a dir or files or even multiple, comma separated self.file_path = self.cfg.get('file_path') # The filename pattern according to Python glob.glob self.filename_pattern = self.cfg.get('filename_pattern', '*.[gxGX][mM][lL]') # Recurse into directories ? self.depth_search = self.cfg.get_bool('depth_search', False) # Create the list of files to be used as input self.file_list = Util.make_file_list(self.file_path, None, self.filename_pattern, self.depth_search) log.info("file_list=%s" % str(self.file_list))
def __init__(self, configdict, section, produces=FORMAT.any): Input.__init__(self, configdict, section, produces) # url and optional parameters self.url = self.cfg.get('url') self.parameters = self.cfg.get('parameters') # http://docs.python.org/2/howto/urllib2.html self.query_string = None if self.parameters: # http://stackoverflow.com/questions/988228/converting-a-string-to-dictionary import ast self.parameters = ast.literal_eval(self.parameters) self.query_string = urllib.urlencode(self.parameters) log.info("url=%s" % self.url)
def __init__(self, configdict, section): Input.__init__(self, configdict, section, produces=[FORMAT.ogr_feature, FORMAT.ogr_feature_array])
def __init__(self, configdict, section): Input.__init__(self, configdict, section, produces=FORMAT.xml_line_stream)
def __init__(self, configdict, section, produces): Input.__init__(self, configdict, section, produces=produces)
def __init__(self, configdict, section, produces=FORMAT.any): Input.__init__(self, configdict, section, produces) log.info("url=%s parameters=%s" % (self.url, self.parameters))