def __init__(self, configurationsDictionary): BasePersistenceHandler.__init__(self, configurationsDictionary) self.echo = common.EchoHandler(self.config["echo"]) self.local = threading.local() self.selectCacheThreadExceptionEvent = threading.Event() self.selectNoResourcesEvent = threading.Event() self.selectWaitCondition = threading.Condition() # Get column names query = "SELECT * FROM " + self.config["table"] + " LIMIT 0" connection = mysql.connector.connect(**self.config["connargs"]) cursor = connection.cursor() cursor.execute(query) cursor.fetchall() self.colNames = cursor.column_names cursor.close() connection.close() self.excludedColNames = (self.config["primarykeycolumn"], self.config["resourceidcolumn"], self.config["statuscolumn"]) self.infoColNames = [ name for name in self.colNames if (name not in self.excludedColNames) ] # Start select cache thread self.resourcesQueue = Queue.Queue() t = threading.Thread(target=self._selectCacheThread) t.daemon = True t.start() with self.selectWaitCondition: self.selectWaitCondition.wait()
def __init__(self, configurationsDictionary): MemoryPersistenceHandler.__init__(self, configurationsDictionary) self.echo = common.EchoHandler(self.config["echo"]) self.saveLock = threading.Lock() self.dumpExceptionEvent = threading.Event() self._setFileHandler() with open(self.config["filename"], "r") as inputFile: resourcesList = self.fileHandler.load(inputFile, self.fileColumns) for resource in resourcesList: self.statusRecords[resource["status"]].append( len(self.resources)) if (self.config["uniqueresourceid"]): if (resource["id"] not in self.IDsHash): self.IDsHash[resource["id"]] = len(self.resources) else: raise KeyError( "Duplicated ID found in '%s': %s." % (self.config["filename"], resource["id"])) if ("info" not in resource): resource["info"] = None self.resources.append(resource) self.timer = threading.Timer(self.config["savetimedelta"], self._dumpTimerThread) self.timer.daemon = True self.timer.start()
def __init__(self, configurationsDictionary): """Constructor. Upon initialization the crawler object receives everything in the crawler section of the XML configuration file as the parameter *configurationsDictionary*. """ self._extractConfig(configurationsDictionary) self.echo = common.EchoHandler(self.config["echo"])
def __init__(self, configurationsDictionary): self.config = configurationsDictionary # Configure echoing self.echo = common.EchoHandler(self.config["server"]["echo"], "server@%s[%s].log" % (socket.gethostname(), self.config["global"]["connection"]["port"])) # Get persistence handler instance self.echo.out("Initializing persistence handler...") PersistenceHandlerClass = getattr(persistence, self.config["server"]["persistence"]["class"]) self.persist = PersistenceHandlerClass(self.config["server"]["persistence"]) # Get filters instances self.echo.out("Initializing filters...") self.parallelFilters = [] self.sequentialFilters = [] self.FiltersClasses = [getattr(filters, filter["class"]) for filter in self.config["server"]["filtering"]["filter"]] for i, FilterClass in enumerate(self.FiltersClasses): filterOptions = self.config["server"]["filtering"]["filter"][i] if (filterOptions["parallel"]): self.parallelFilters.append(FilterClass(filterOptions)) else: self.sequentialFilters.append(FilterClass(filterOptions)) # Call SocketSever constructor self.allow_reuse_address = True # Avoid "Address already in use" error when restarting server right after a shutdown SocketServer.TCPServer.__init__(self, (self.config["global"]["connection"]["address"], self.config["global"]["connection"]["port"]), ServerHandler)
config["global"]["echo"]["mandatory"][ "loggingfilemode"] = args.loggingFileMode # Connect to server processID = os.getpid() server = common.NetworkHandler() server.connect(config["global"]["connection"]["address"], config["global"]["connection"]["port"]) server.send({"command": "CONNECT", "type": "client", "processid": processID}) message = server.recv() if (message["command"] == "REFUSED"): sys.exit("ERROR: %s" % message["reason"]) else: clientID = message["clientid"] # Configure echoing echo = common.EchoHandler( config["client"]["echo"], "client%s@%s[%s].log" % (clientID, socket.gethostname(), config["global"]["connection"]["port"])) # Get an instance of the crawler CrawlerClass = getattr(crawler, config["client"]["crawler"]["class"]) collector = CrawlerClass(config["client"]["crawler"]) # Execute collection echo.out("Connected to server with ID %s." % clientID) server.send({"command": "GET_ID"}) while (True): try: message = server.recv() if (not message): echo.out("Connection to server has been abruptly closed.", "ERROR")