def __init__(self, params=None): """ Initialize RRSDatabase, lookup rules, handle_rules, queue and object topology map. """ # param initialization self.lookup_level = LOOKUP_PRECISE self.update_rule = RRSDB_MISSING self.logfile = 'xmlimport' self.logs = SELE_LOG self.module = 'unknown_module' self.schema = 'data' for arg in ('update_rule', 'logfile', 'lookup_level', 'module', 'schema', 'logs'): if arg in params and params[arg] is not None: self.__dict__[arg] = params[arg] # working space self._queue = IMWaitingQueue() self._mime = MIMEHandler() self._rrsdb = RRSDatabase(self.logfile, self.schema, self.logs) self._db = self._rrsdb._db self._table_to_class_map = self._rrsdb._table_to_class_map self._lookup_rules = _LookupRules() # set logging self.manager = RRSLogManager() logfilename = "%s.importer.log" % self.logfile self.logger = self.manager.new_logger("xml_import_manager", logfilename) self.logger.info("RRSImportManager initialized.")
def __init__(self): self.crawler = GetHTMLPage() self.crawler.set_headers(( ('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.19) Gecko/2010040116 Ubuntu/9.04 (jaunty) Firefox/3.0.19'), \ ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8') )) self.mime_handler = MIMEHandler() # define wanted/unwanted file types self.wanted_mimes = [ 'application/pdf', 'application/msword', 'text/rtf' 'application/postscript', 'octet/stream', 'application/vnd.oasis.opendocument.text' ] self.unwanted_mimes = [ 'application/zip', 'application/x-tar', 'application/x-gtar' ]