Beispiel #1
0
    def __init__(self, configurationsDictionary):
        BasePersistenceHandler.__init__(self, configurationsDictionary)
        self.echo = common.EchoHandler(self.config["echo"])
        self.local = threading.local()
        self.selectCacheThreadExceptionEvent = threading.Event()
        self.selectNoResourcesEvent = threading.Event()
        self.selectWaitCondition = threading.Condition()

        # Get column names
        query = "SELECT * FROM " + self.config["table"] + " LIMIT 0"
        connection = mysql.connector.connect(**self.config["connargs"])
        cursor = connection.cursor()
        cursor.execute(query)
        cursor.fetchall()
        self.colNames = cursor.column_names
        cursor.close()
        connection.close()
        self.excludedColNames = (self.config["primarykeycolumn"],
                                 self.config["resourceidcolumn"],
                                 self.config["statuscolumn"])
        self.infoColNames = [
            name for name in self.colNames
            if (name not in self.excludedColNames)
        ]

        # Start select cache thread
        self.resourcesQueue = Queue.Queue()
        t = threading.Thread(target=self._selectCacheThread)
        t.daemon = True
        t.start()
        with self.selectWaitCondition:
            self.selectWaitCondition.wait()
Beispiel #2
0
    def __init__(self, configurationsDictionary):
        MemoryPersistenceHandler.__init__(self, configurationsDictionary)
        self.echo = common.EchoHandler(self.config["echo"])
        self.saveLock = threading.Lock()
        self.dumpExceptionEvent = threading.Event()

        self._setFileHandler()
        with open(self.config["filename"], "r") as inputFile:
            resourcesList = self.fileHandler.load(inputFile, self.fileColumns)
            for resource in resourcesList:
                self.statusRecords[resource["status"]].append(
                    len(self.resources))
                if (self.config["uniqueresourceid"]):
                    if (resource["id"] not in self.IDsHash):
                        self.IDsHash[resource["id"]] = len(self.resources)
                    else:
                        raise KeyError(
                            "Duplicated ID found in '%s': %s." %
                            (self.config["filename"], resource["id"]))
                if ("info" not in resource): resource["info"] = None
                self.resources.append(resource)

        self.timer = threading.Timer(self.config["savetimedelta"],
                                     self._dumpTimerThread)
        self.timer.daemon = True
        self.timer.start()
Beispiel #3
0
 def __init__(self, configurationsDictionary):
     """Constructor.  
     
     Upon initialization the crawler object receives everything in the crawler section of the XML configuration file as the parameter *configurationsDictionary*. 
     
     """
     self._extractConfig(configurationsDictionary)
     self.echo = common.EchoHandler(self.config["echo"])
Beispiel #4
0
 def __init__(self, configurationsDictionary):
     self.config = configurationsDictionary
     
     # Configure echoing
     self.echo = common.EchoHandler(self.config["server"]["echo"], "server@%s[%s].log" % (socket.gethostname(), self.config["global"]["connection"]["port"]))
     
     # Get persistence handler instance
     self.echo.out("Initializing persistence handler...")
     PersistenceHandlerClass = getattr(persistence, self.config["server"]["persistence"]["class"])
     self.persist = PersistenceHandlerClass(self.config["server"]["persistence"])
     
     # Get filters instances
     self.echo.out("Initializing filters...")
     self.parallelFilters = []
     self.sequentialFilters = []
     self.FiltersClasses = [getattr(filters, filter["class"]) for filter in self.config["server"]["filtering"]["filter"]]
     for i, FilterClass in enumerate(self.FiltersClasses):
         filterOptions = self.config["server"]["filtering"]["filter"][i]
         if (filterOptions["parallel"]): self.parallelFilters.append(FilterClass(filterOptions))
         else: self.sequentialFilters.append(FilterClass(filterOptions))
     
     # Call SocketSever constructor
     self.allow_reuse_address = True # Avoid "Address already in use" error when restarting server right after a shutdown
     SocketServer.TCPServer.__init__(self, (self.config["global"]["connection"]["address"], self.config["global"]["connection"]["port"]), ServerHandler)
Beispiel #5
0
    config["global"]["echo"]["mandatory"][
        "loggingfilemode"] = args.loggingFileMode

# Connect to server
processID = os.getpid()
server = common.NetworkHandler()
server.connect(config["global"]["connection"]["address"],
               config["global"]["connection"]["port"])
server.send({"command": "CONNECT", "type": "client", "processid": processID})
message = server.recv()
if (message["command"] == "REFUSED"): sys.exit("ERROR: %s" % message["reason"])
else: clientID = message["clientid"]

# Configure echoing
echo = common.EchoHandler(
    config["client"]["echo"], "client%s@%s[%s].log" %
    (clientID, socket.gethostname(), config["global"]["connection"]["port"]))

# Get an instance of the crawler
CrawlerClass = getattr(crawler, config["client"]["crawler"]["class"])
collector = CrawlerClass(config["client"]["crawler"])

# Execute collection
echo.out("Connected to server with ID %s." % clientID)
server.send({"command": "GET_ID"})
while (True):
    try:
        message = server.recv()

        if (not message):
            echo.out("Connection to server has been abruptly closed.", "ERROR")