Exemple #1
0
    def __init__(self, params=None):
        """
        Initialize RRSDatabase, lookup rules, handle_rules, queue and object
        topology map.
        """
        # param initialization
        self.lookup_level = LOOKUP_PRECISE
        self.update_rule = RRSDB_MISSING
        self.logfile = 'xmlimport'
        self.logs = SELE_LOG
        self.module = 'unknown_module'
        self.schema = 'data'
        for arg in ('update_rule', 'logfile', 'lookup_level', 'module', 'schema', 'logs'):
            if arg in params and params[arg] is not None:
                self.__dict__[arg] = params[arg]

        # working space
        self._queue = IMWaitingQueue()
        self._mime = MIMEHandler()
        self._rrsdb = RRSDatabase(self.logfile, self.schema, self.logs)
        self._db = self._rrsdb._db
        self._table_to_class_map = self._rrsdb._table_to_class_map
        self._lookup_rules = _LookupRules()

        # set logging
        self.manager = RRSLogManager()
        logfilename = "%s.importer.log" % self.logfile
        self.logger = self.manager.new_logger("xml_import_manager", logfilename)
        self.logger.info("RRSImportManager initialized.")
    def __init__(self):
        self.crawler = GetHTMLPage()
        self.crawler.set_headers((
                   ('User-Agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.19) Gecko/2010040116 Ubuntu/9.04 (jaunty) Firefox/3.0.19'), \
                   ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')
                 ))
        self.mime_handler = MIMEHandler()

        # define wanted/unwanted file types
        self.wanted_mimes = [
            'application/pdf', 'application/msword', 'text/rtf'
            'application/postscript', 'octet/stream',
            'application/vnd.oasis.opendocument.text'
        ]
        self.unwanted_mimes = [
            'application/zip', 'application/x-tar', 'application/x-gtar'
        ]