Ejemplo n.º 1
0
 def __init__(self, cId, socket, address):
     self.id = cId
     self.socket = socket
     self.address = address
     self.isActive = True
     self.formattedAddr = logger.formatBrackets(str(str(address[0]) + ":" + str(address[1])))
     logger.log(logging.INFO, "Working node connected " + self.formattedAddr)
Ejemplo n.º 2
0
def main():
    #config
    config = ConfigParser.RawConfigParser(allow_no_value=True)
    config.read('config')
    host = config.get('client', 'hostAddr')
    port = config.getint('client', 'hostPort')
    logPath = config.get('common', 'logPath')
    verbose = config.get('common', 'verbose')

    if verbose == "True" or verbose == "true":
        verbose = True
    else:
        verbose = False

    #setup
    logger.init(logPath, "client-" + str(datetime.datetime.now()))
    logger.debugFlag = verbose

    node = WorkingNode()
    node.connect(host, port)
    node.readConfig()
    node.run()

    while node.isActive:
        time.sleep(0.5)

    node.disconnect()
    logger.log(logging.INFO, "Exiting. ByeBye")
Ejemplo n.º 3
0
    def connectionHandler(self, socket, address):
        """Creates a server-side client object and makes it listen for inputs"""
        clientID = uuid.uuid4()
        client = SSClient(clientID, socket, address)
        self.clientDict[clientID] = client

        #temp testing, could take a parameter from config
        global serverRunning
        if len(self.clientDict) > 0  and serverRunning == False:
            self.run()
            serverRunning = True

        #for clients in self.clientDict:
        #    logger.log(logging.DEBUG, "Working node connected : " + str(self.clientDict[clients].id))

        try:
            client.sendConfig(self.configurationPayload)
            client.run()
            while client.isActive:
                time.sleep(0.3)
        except EOFError:
            pass
        except:
            client.isActive = False
            exc_type, exc_value, exc_traceback = sys.exc_info()
            message = "\n" + ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
            logger.log(logging.ERROR, message)
        finally:
            client.disconnect()
            del self.clientDict[clientID]
Ejemplo n.º 4
0
    def readSocket(self, timeOut=None):
        self.socket.settimeout(timeOut)
        data = self.data

        if "\n\n12345ZEEK6789\n" in data:
            data = data.split("\n\n12345ZEEK6789\n")
            self.data = "\n\n12345ZEEK6789\n".join(data[1:])
            return pickle.loads(data[0])

        while self.isActive:
            buffer = self.socket.recv(buffSize)
            data = data + buffer

            if not buffer:
                logger.log(
                    logging.INFO, logger.RED + self.formattedAddr +
                    "Lost connection" + logger.NOCOLOR)
                self.isActive = False

            if "\n\n12345ZEEK6789\n" in data:
                data = data.split("\n\n12345ZEEK6789\n")
                self.data = "\n\n12345ZEEK6789\n".join(data[1:])
                break

        if self.isActive == False:
            return

        logger.log(
            logging.DEBUG,
            self.formattedAddr + "Receiving " + str(len(data[0])) + " bytes")
        return pickle.loads(data[0])
Ejemplo n.º 5
0
    def readSocket(self, timeOut=None):
        self.s.settimeout(timeOut)
        data = self.data

        if "\n\n12345ZEEK6789\n" in data:
            data = data.split("\n\n12345ZEEK6789\n")
            self.data = "\n\n12345ZEEK6789\n".join(data[1:])
            return pickle.loads(data[0])

        while self.isActive:
            buffer = self.s.recv(buffSize)
            data = data + buffer

            if not buffer:
                logger.log(logging.INFO, "\nLost connection to server " + self.masterNodeFormattedAddr)
                self.isActive = False

            if "\n\n12345ZEEK6789\n" in data:
                data = data.split("\n\n12345ZEEK6789\n")
                self.data = "\n\n12345ZEEK6789\n".join(data[1:])
                break

        if self.isActive == False:
            return

        logger.log(logging.DEBUG, "Receiving " + str(len(data[0])) + " bytes from server")

        return pickle.loads(data[0])
Ejemplo n.º 6
0
    def connectionHandler(self, socket, address):
        """Creates a server-side client object and makes it listen for inputs"""
        clientID = uuid.uuid4()
        client = SSClient(clientID, socket, address)
        self.clientDict[clientID] = client

        #temp testing, could take a parameter from config
        global serverRunning
        if len(self.clientDict) > 0 and serverRunning == False:
            self.run()
            serverRunning = True

        #for clients in self.clientDict:
        #    logger.log(logging.DEBUG, "Working node connected : " + str(self.clientDict[clients].id))

        try:
            client.sendConfig(self.configurationPayload)
            client.run()
            while client.isActive:
                time.sleep(0.3)
        except EOFError:
            pass
        except:
            client.isActive = False
            exc_type, exc_value, exc_traceback = sys.exc_info()
            message = "\n" + ''.join(
                traceback.format_exception(exc_type, exc_value, exc_traceback))
            logger.log(logging.ERROR, message)
        finally:
            client.disconnect()
            del self.clientDict[clientID]
Ejemplo n.º 7
0
def main():
    signal.signal(signal.SIGINT, handler)
    logger.printAsciiLogo()
    config = ConfigParser.RawConfigParser(allow_no_value=True)
    config.read('config')
    host = config.get('server', 'listeningAddr')
    port = config.getint('server', 'listeningPort')
    logPath = config.get('common', 'logPath')
    verbose = config.get('common', 'verbose')
    if verbose == "True" or verbose == "true":
        verbose = True
    else:
        verbose = False

    #logging
    logger.init(logPath, "server-" + str(datetime.datetime.now()))
    logger.debugFlag = verbose

    #server
    server = Server(host, port)
    server.setup()
    #server.listen()
    thread.start_new_thread(server.listen, ()) #testing

    while server.isActive:
        time.sleep(0.5)

    #time.sleep(9) #testing
    #server.isActive = False
    #server.disconnectAllClient()
    logger.log(logging.INFO, "Exiting. ByeBye")
Ejemplo n.º 8
0
 def writeSocket(self, obj):
     try:
         serializedObj = pickle.dumps(obj)
         logger.log(logging.DEBUG, self.formattedAddr + "Sending " + str(len(serializedObj + delimiter)) + " bytes")
         self.socket.sendall(serializedObj + delimiter)
     except:
         raise Exception("Unable to write to socket (client disconnected)")
Ejemplo n.º 9
0
    def connectionHandler(self, socket, address):
        clientID = uuid.uuid4()
        client = SSClient(clientID, socket, address)
        self.clientDict[clientID] = client

        #temp testing
        if len(self.clientDict) > 0:
            self.run()

        for clients in self.clientDict:
            logger.log(logging.DEBUG, "Working node connected : " + str(self.clientDict[clients].id))

        try:
            client.sendConfig()
            client.run()
            while client.isActive:
                time.sleep(1)
        except EOFError:
            pass
        except:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            message = "\n" + ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
            logger.log(logging.ERROR, message)
        finally:
            client.disconnect()
            del self.clientDict[clientID]
Ejemplo n.º 10
0
    def readSocket(self, timeOut=None):
        self.socket.settimeout(timeOut)
        data = self.data

        if "\n\n12345ZEEK6789\n" in data:
            data = data.split("\n\n12345ZEEK6789\n")
            self.data = "\n\n12345ZEEK6789\n".join(data[1:])
            return pickle.loads(data[0])

        while self.isActive:
            buffer = self.socket.recv(buffSize)
            data = data + buffer

            if not buffer:
                logger.log(logging.INFO, logger.RED + self.formattedAddr + "Lost connection" + logger.NOCOLOR)
                self.isActive = False

            if "\n\n12345ZEEK6789\n" in data:
                data = data.split("\n\n12345ZEEK6789\n")
                self.data = "\n\n12345ZEEK6789\n".join(data[1:])
                break

        if self.isActive == False:
            return

        logger.log(logging.DEBUG, self.formattedAddr + "Receiving " + str(len(data[0])) + " bytes")
        return pickle.loads(data[0])
Ejemplo n.º 11
0
    def readSocket(self, timeOut=None):
        self.s.settimeout(timeOut)
        data = self.data

        if "\n\n12345ZEEK6789\n" in data:
            data = data.split("\n\n12345ZEEK6789\n")
            self.data = "\n\n12345ZEEK6789\n".join(data[1:])
            return pickle.loads(data[0])

        while self.isActive:
            buffer = self.s.recv(buffSize)
            data = data + buffer

            if not buffer:
                logger.log(logging.INFO, "\nLost connection to server " + self.masterNodeFormattedAddr)
                self.isActive = False

            if "\n\n12345ZEEK6789\n" in data:
                data = data.split("\n\n12345ZEEK6789\n")
                self.data = "\n\n12345ZEEK6789\n".join(data[1:])
                break

        if self.isActive == False:
            return

        logger.log(logging.DEBUG, "Receiving " + str(len(data[0])) + " bytes from server")

        return pickle.loads(data[0])
Ejemplo n.º 12
0
 def writeSocket(self, obj):
     try:
         logger.log(logging.DEBUG, "Write " + self.formattedAddr)
         serializedObj = pickle.dumps(obj)
         self.socket.send(serializedObj)
     except:
         raise Exception("Error writting")
Ejemplo n.º 13
0
 def run(self):
     """Lunches main threads"""
     logger.log(logging.INFO, "\n\nStarting Crawling/Scrapping sequence...")
     if self.isActive:
         thread.start_new_thread(self.outputThread, ())
         thread.start_new_thread(self.inputThread, ())
         thread.start_new_thread(self.interpretingThread, ())
         thread.start_new_thread(self.crawlingThread, ())
Ejemplo n.º 14
0
    def disconnect(self):
        """Disconnects the client"""

        if self.socket != None:
            logger.log(logging.INFO, logger.RED + self.formattedAddr + "Disconnecting" + logger.NOCOLOR)
            self.isActive = False
            self.socket.close()
            self.socket = None
Ejemplo n.º 15
0
 def run(self):
     """Launches main threads"""
     logger.log(logging.INFO, "\n\nStarting Crawling/Scrapping sequence...")
     if self.isActive:
         thread.start_new_thread(self.outputThread, ())
         thread.start_new_thread(self.inputThread, ())
         thread.start_new_thread(self.interpretingThread, ())
         thread.start_new_thread(self.crawlingThread, ())
Ejemplo n.º 16
0
 def writeSocket(self, obj):
     try:
         serializedObj = pickle.dumps(obj)
         logger.log(
             logging.DEBUG, self.formattedAddr + "Sending " +
             str(len(serializedObj + delimiter)) + " bytes")
         self.socket.sendall(serializedObj + delimiter)
     except:
         raise Exception("Unable to write to socket (client disconnected)")
Ejemplo n.º 17
0
    def urlDispatcher(self):
        logger.log(logging.INFO, "Starting urlDispatcher")

        while self.isActive:
            obj = urlPool.get(True)

            # if not visited
            # verification

            urlToVisit.put(obj)
Ejemplo n.º 18
0
 def writeSocket(self, obj):
     try:
         serializedObj = pickle.dumps(obj)
         logger.log(logging.DEBUG, "Sending " + str(len(serializedObj + delimiter)) + " bytes to server")
         self.s.sendall(serializedObj + delimiter)
     except:
         exc_type, exc_value, exc_traceback = sys.exc_info()
         message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
         logger.log(logging.CRITICAL, message)
         raise Exception("Unable to write to socket (lost connection to server)")
Ejemplo n.º 19
0
    def readSocket(self, timeOut=None):
        self.socket.settimeout(timeOut)
        data = self.socket.recv(buffSize)

        #broken connection
        if not data:
            logger.log(logging.INFO, "Lost connection - Working node " + self.formattedAddr)
            self.isActive = False

        return pickle.loads(data)
Ejemplo n.º 20
0
    def dispatcher(self, packet):
        if packet.type is protocol.INFO:
            self.infoQueue.put(packet)
        elif packet.type is protocol.URL:
            self.urlToVisit.put(packet)
        else:
            logger.log(logging.CRITICAL, "Unrecognized packet type : " + str(packet.type) + ". This packet was dropped")
            return

        logger.log(logging.DEBUG, "Dispatched packet of type: " + str(packet.type))
Ejemplo n.º 21
0
    def disconnect(self):
        """Disconnects the client"""

        if self.socket != None:
            logger.log(
                logging.INFO, logger.RED + self.formattedAddr +
                "Disconnecting" + logger.NOCOLOR)
            self.isActive = False
            self.socket.close()
            self.socket = None
Ejemplo n.º 22
0
 def writeSocket(self, obj):
     try:
         serializedObj = pickle.dumps(obj)
         logger.log(logging.DEBUG, "Sending " + str(len(serializedObj + delimiter)) + " bytes to server")
         self.s.sendall(serializedObj + delimiter)
     except:
         exc_type, exc_value, exc_traceback = sys.exc_info()
         message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
         logger.log(logging.CRITICAL, message)
         raise Exception("Unable to write to socket (lost connection to server)")
Ejemplo n.º 23
0
    def readSocket(self, timeOut=None):
        self.s.settimeout(timeOut)
        data = self.s.recv(buffSize)

        #broken connection
        if not data:
            logger.log(logging.INFO, "Lost connection to server " + self.masterNodeFormattedAddr)
            self.isActive = False

        return pickle.loads(data)
Ejemplo n.º 24
0
    def __init__(self, cId, socket, address):
        self.id = cId
        self.socket = socket
        self.address = address
        self.isActive = True
        self.formattedAddr = logger.formatBrackets(str(str(address[0]) + ":" + str(address[1]))) + " "
        self.sentCount = 0
        self.data = ""
        self.configuration = None

        logger.log(logging.INFO, logger.GREEN + self.formattedAddr + "Working node connected" + logger.NOCOLOR)
Ejemplo n.º 25
0
    def setup(self, configuration):
        """Basic setup operation (socket binding, listen, etc)"""
        logger.log(logging.DEBUG, "Socket initialization")
        self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        self.s.bind((self.host, self.port))
        self.s.listen(5)
        logger.log(logging.INFO, "Listening on [" + str(self.host) + ":" + str(self.port) + "]")

        self.configurationPayload = configuration
        self.requestLimit = configuration.config.requestLimit
Ejemplo n.º 26
0
def shoot(cid):
	global countdownThread
	filename = "%s/output/%s-%d.jpg" % (os.path.dirname(os.path.realpath(__file__)), cid, int(time.time()))
	logger.log("filename: %s"%filename)
	try:
		subprocess.call(["fswebcam", "-r", RESOLUTIONS[RES_USE], "--no-banner", filename])

		files = {'file': open(filename, 'rb')}
		r = requests.post(UPL_URL+'?id='+GROUPID, files=files)
	except Exception, e:
		logger.log(e)
		raise e
Ejemplo n.º 27
0
 def listen(self):
     print("- - - - - - - - - - - - - - -")
     logger.log(logging.INFO, "Waiting for working nodes to connect...")
     while self.isActive:
         try:
             client, address = self.s.accept()
             thread.start_new_thread(self.connectionHandler, (client, address))
         except:
             exc_type, exc_value, exc_traceback = sys.exc_info()
             message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
             logger.log(logging.CRITICAL, message)
             self.isActive = False
Ejemplo n.º 28
0
    def inputThread(self):
        """Listens for inputs from the client"""
        logger.log(logging.DEBUG, self.formattedAddr +  "Listening for packets")

        while self.isActive:
            try:
                deserializedPacket = self.readSocket()
                self.dispatcher(deserializedPacket)

            except EOFError:
                #Fixes the pickle error if clients disconnects
                self.isActive = False
Ejemplo n.º 29
0
    def inputThread(self):
        """Listens for inputs from the client"""
        logger.log(logging.DEBUG, self.formattedAddr + "Listening for packets")

        while self.isActive:
            try:
                deserializedPacket = self.readSocket()
                self.dispatcher(deserializedPacket)

            except EOFError:
                #Fixes the pickle error if clients disconnects
                self.isActive = False
Ejemplo n.º 30
0
 def outputThread(self):
     while self.isActive:
         try:
             site = urlToVisit.get(True)
             payload = protocol.URLPayload(site)
             packet = protocol.Packet(protocol.URL, payload)
             self.writeSocket(packet)
             logger.log(logging.DEBUG, "Sending obj of type " + str(packet.type) + " to " + self.formattedAddr)
         except:
             exc_type, exc_value, exc_traceback = sys.exc_info()
             message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
             logger.log(logging.CRITICAL, message)
             self.isActive = False
Ejemplo n.º 31
0
    def setup(self, configuration):
        """Basic setup operation (socket binding, listen, etc)"""
        logger.log(logging.DEBUG, "Socket initialization")
        self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        self.s.bind((self.host, self.port))
        self.s.listen(5)
        logger.log(
            logging.INFO,
            "Listening on [" + str(self.host) + ":" + str(self.port) + "]")

        self.configurationPayload = configuration
        self.requestLimit = configuration.config.requestLimit
Ejemplo n.º 32
0
    def __init__(self, cId, socket, address):
        self.id = cId
        self.socket = socket
        self.address = address
        self.isActive = True
        self.formattedAddr = logger.formatBrackets(
            str(str(address[0]) + ":" + str(address[1]))) + " "
        self.sentCount = 0
        self.data = ""
        self.configuration = None

        logger.log(
            logging.INFO, logger.GREEN + self.formattedAddr +
            "Working node connected" + logger.NOCOLOR)
Ejemplo n.º 33
0
    def inputThread(self):
        logger.log(logging.DEBUG, "InputThread started")

        while self.isActive:
            try:
                deserializedPacket = self.readSocket()
                self.dispatcher(deserializedPacket)
            except EOFError:
                self.isActive = False
            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
                logger.log(logging.CRITICAL, message)
                self.isActive = False
Ejemplo n.º 34
0
    def outputThread(self):
        """Checks if there are messages to send to the client and sends them"""
        while self.isActive:
            if self.sentCount > 5:
                time.sleep(0.03)
                continue
            packetToBroadCast = protocol.deQueue([outputQueue])

            if not packetToBroadCast:
                    continue

            for packet in packetToBroadCast:
                self.writeSocket(packet)
                self.sentCount = self.sentCount+1
                logger.log(logging.DEBUG, self.formattedAddr + "Sending URL " + str(packet.payload.urlList[0]))
Ejemplo n.º 35
0
    def inputThread(self):
        """Listens for inputs from the server"""
        logger.log(logging.DEBUG, "InputThread started")

        while self.isActive:
            try:
                deserializedPacket = self.readSocket()
                self.dispatcher(deserializedPacket)
            except EOFError:
                self.isActive = False
            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
                logger.log(logging.CRITICAL, message)
                self.isActive = False
Ejemplo n.º 36
0
    def urlDispatcher(self):
        """Reads from the urlPool, makes sure the url has not been visited and adds it to the urlToVisit Queue"""
        logger.log(logging.INFO, "Starting server urlDispatcher")

        while self.isActive:
            try:
                url = urlPool.get(True)
                if url not in urlVisited:
                    urlVisited[url] = True
                    #logic if static crawling will come here
                    urlToVisit.put(url)
                    scrappedURLlist.append(url)
            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                message = "\n" + ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
                logger.log(logging.ERROR, message)
Ejemplo n.º 37
0
 def listen(self):
     """Waits for new clients to connect and launches a new client thread accordingly"""
     print("- - - - - - - - - - - - - - -")
     logger.log(logging.INFO, "Waiting for working nodes to connect...")
     while self.isActive:
         try:
             client, address = self.s.accept()
             thread.start_new_thread(self.connectionHandler,
                                     (client, address))
         except:
             exc_type, exc_value, exc_traceback = sys.exc_info()
             message = ''.join(
                 traceback.format_exception(exc_type, exc_value,
                                            exc_traceback))
             logger.log(logging.CRITICAL, message)
             self.isActive = False
Ejemplo n.º 38
0
    def outputThread(self):
        """Checks if there are messages to send to the client and sends them"""
        while self.isActive:
            if self.sentCount > 5:
                time.sleep(0.03)
                continue
            packetToBroadCast = protocol.deQueue([outputQueue])

            if not packetToBroadCast:
                continue

            for packet in packetToBroadCast:
                self.writeSocket(packet)
                self.sentCount = self.sentCount + 1
                logger.log(
                    logging.DEBUG, self.formattedAddr + "Sending URL " +
                    str(packet.payload.urlList[0]))
Ejemplo n.º 39
0
    def storageRoutine(self):
        """Stores session and data"""
        logger.log(logging.INFO, "Starting server storageRoutine")

        while self.isActive:
            try:
                sessions = protocol.deQueue([sessionStorageQueue])

                if not sessions:
                        continue

                for session in sessions:
                    storage.writeToFile(session, session.dataContainer)
            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                message = "\n" + ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
                logger.log(logging.ERROR, message)
Ejemplo n.º 40
0
    def urlDispatcher(self):
        """Reads from the urlPool, makes sure the url has not been visited and adds it to the urlToVisit Queue"""
        logger.log(logging.INFO, "Starting server urlDispatcher")

        while self.isActive:
            try:
                url = urlPool.get(True)
                if url not in urlVisited:
                    urlVisited[url] = True
                    #logic if static crawling will come here
                    urlToVisit.put(url)
                    scrappedURLlist.append(url)
            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                message = "\n" + ''.join(
                    traceback.format_exception(exc_type, exc_value,
                                               exc_traceback))
                logger.log(logging.ERROR, message)
Ejemplo n.º 41
0
 def __init__(self):
     """ Intiates variables for account balances and stocks data. """
     self.config = config.load()
     self.logger = logger.log()
     self.get_token(1)
     self.get_token(2)
     self.RRSPbalance, self.RRSPdata = self.questradeRRSP()
     self.TFSAbalance, self.TFSAdata = self.questradeTFSA()
     self.TFSA2balance, self.TFSA2data = self.questradeTFSA2()
Ejemplo n.º 42
0
    def crawlingThread(self):
        """Takes URL from the urlToVisit queue and visits them"""
        logger.log(logging.DEBUG, "CrawlingThread started")

        self.scrapper = scrapping.Scrapper(self.config.userAgent,
                                           self.config.robotParserEnabled,
                                           self.config.domainRestricted,
                                           self.config.crawling)

        while self.isActive:
            try:
                urlList = protocol.deQueue([self.urlToVisit])

                if not urlList:
                    time.sleep(0.2)  #temp - For testing
                    continue

                for url in urlList:
                    session = self.scrapper.visit(url)
                    logger.log(
                        logging.DEBUG, "Session \n" + str(session.url) +
                        "\nCode : " + str(session.returnCode) +
                        "\nRequest time : " + str(session.requestTime) +
                        "\nBs time : " + str(session.bsParsingTime))

                    if not session.failed:
                        if self.crawlingType == protocol.ConfigurationPayload.DYNAMIC_CRAWLING:
                            payload = protocol.URLPayload(
                                session.scrappedURLs,
                                protocol.URLPayload.SCRAPPED_URL)
                            packet = protocol.Packet(protocol.URL, payload)
                            self.outputQueue.put(packet)

                        payload = protocol.URLPayload(
                            [url],
                            protocol.URLPayload.VISITED,
                            session=session)
                        packet = protocol.Packet(protocol.URL, payload)
                        self.outputQueue.put(packet)
                    else:
                        logger.log(logging.INFO, "Skipping URL : " + url)
                        payload = protocol.URLPayload(
                            [url], protocol.URLPayload.SKIPPED, session)
                        packet = protocol.Packet(protocol.URL, payload)
                        self.outputQueue.put(packet)
                        continue

            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                message = ''.join(
                    traceback.format_exception(exc_type, exc_value,
                                               exc_traceback))
                logger.log(logging.CRITICAL, message)
                self.isActive = False
    def storageRoutine(self):
        """Stores session and data"""
        logger.log(logging.INFO, "Starting server storageRoutine")

        while self.isActive:
            try:
                sessions = protocol.deQueue([sessionStorageQueue])

                if not sessions:
                    continue

                for session in sessions:
                    storage.writeToFile(session, session.dataContainer)
            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                message = "\n" + ''.join(
                    traceback.format_exception(exc_type, exc_value,
                                               exc_traceback))
                logger.log(logging.ERROR, message)
Ejemplo n.º 44
0
    def inputThread(self):
        logger.log(logging.DEBUG, "Listening for packets " + self.formattedAddr)

        while self.isActive:
            try:
                obj = self.readSocket()

                if obj.type is protocol.INFO:
                    print("PACKET INFO")
                    # ie : Treat end of crawl
                    raise Exception("INFO PACKET RECEIVED")
                elif obj.type is protocol.URL:
                    urlPool.put(obj.payload.urlList)

                time.sleep(1)
            except EOFError:
                self.isActive = False
            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
                logger.log(logging.CRITICAL, message)
                self.isActive = False
Ejemplo n.º 45
0
    def dispatcher(self, packet):
        """Dispatches packets to the right packet queue"""
        if packet is None:
            return
        elif packet.type == protocol.INFO:
            logger.log(logging.DEBUG, "Dispatching INFO packet")
            self.infoQueue.put(packet)
        elif packet.type == protocol.URL:
            logger.log(logging.DEBUG, "Dispatching url packet : " + str(packet.payload.urlList[0]))
            for site in packet.payload.urlList:
                self.urlToVisit.put(site)
        else:
            logger.log(logging.CRITICAL, "Unrecognized packet type : " + str(packet.type) + ". This packet was dropped")
            return

        logger.log(logging.DEBUG, "Dispatched packet of type: " + str(packet.type))
Ejemplo n.º 46
0
    def dispatcher(self, packet):
        """Dispatches packets to the right packet queue"""
        if packet is None:
            return
        elif packet.type == protocol.INFO:
            logger.log(logging.DEBUG, "Dispatching INFO packet")
            self.infoQueue.put(packet)
        elif packet.type == protocol.URL:
            logger.log(logging.DEBUG, "Dispatching url packet : " + str(packet.payload.urlList[0]))
            for site in packet.payload.urlList:
                self.urlToVisit.put(site)
        else:
            logger.log(logging.CRITICAL, "Unrecognized packet type : " + str(packet.type) + ". This packet was dropped")
            return

        logger.log(logging.DEBUG, "Dispatched packet of type: " + str(packet.type))
Ejemplo n.º 47
0
 def readConfig(self):
     logger.log(logging.DEBUG, "Waiting for configuration from the server.")
     if self.isActive:
         try:
             deserializedPacket = self.readSocket()
             if deserializedPacket.type is protocol.CONFIG:
                 self.crawlingType = deserializedPacket.payload.crawlingType
                 payload = protocol.InfoPayload(protocol.InfoPayload.CLIENT_ACK)
                 packet = protocol.Packet(protocol.INFO, payload)
                 self.writeSocket(packet)
                 logger.log(logging.DEBUG, "Configuration received.")
                 logger.log(logging.DEBUG, "Sending ACK for configuration.")
             else:
                 raise Exception("Unable to parse configuration.")
         except:
             exc_type, exc_value, exc_traceback = sys.exc_info()
             message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
             logger.log(logging.CRITICAL, message)
             self.isActive = False
Ejemplo n.º 48
0
    def crawlingThread(self):
        """Takes URL from the urlToVisit queue and visits them"""
        logger.log(logging.DEBUG, "CrawlingThread started")

        self.scrapper = scrapping.Scrapper(self.config.userAgent, self.config.robotParserEnabled, self.config.domainRestricted, self.config.crawling)

        while self.isActive:
            try:
                urlList = protocol.deQueue([self.urlToVisit])

                if not urlList:
                    time.sleep(0.2) #temp - For testing
                    continue

                for url in urlList:
                    session = self.scrapper.visit(url)
                    logger.log(logging.DEBUG, "Session \n" + str(session.url) +
                      "\nCode : " + str(session.returnCode) +
                      "\nRequest time : " + str(session.requestTime) +
                      "\nBs time : " + str(session.bsParsingTime))

                    if not session.failed:
                        if self.crawlingType == protocol.ConfigurationPayload.DYNAMIC_CRAWLING:
                            payload = protocol.URLPayload(session.scrappedURLs, protocol.URLPayload.SCRAPPED_URL)
                            packet = protocol.Packet(protocol.URL, payload)
                            self.outputQueue.put(packet)

                        payload = protocol.URLPayload([url], protocol.URLPayload.VISITED, session=session)
                        packet = protocol.Packet(protocol.URL, payload)
                        self.outputQueue.put(packet)
                    else:
                        logger.log(logging.INFO, "Skipping URL : " + url)
                        payload = protocol.URLPayload([url], protocol.URLPayload.SKIPPED, session)
                        packet = protocol.Packet(protocol.URL, payload)
                        self.outputQueue.put(packet)
                        continue

            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
                logger.log(logging.CRITICAL, message)
                self.isActive = False
Ejemplo n.º 49
0
    def outputThread(self):
        logger.log(logging.DEBUG, "OutputThread started")

        while self.isActive:
            try:
                obj = self.outputQueue.get(True)
                self.writeSocket(obj)
                logger.log(logging.DEBUG, "Sending obj of type " + str(obj.type))
            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
                logger.log(logging.CRITICAL, message)
                self.isActive = False
Ejemplo n.º 50
0
    def outputThread(self):
        """Checks if there are messages to send to the server and sends them"""
        logger.log(logging.DEBUG, "OutputThread started")

        while self.isActive:
            try:
                obj = self.outputQueue.get(True) #fix with helper method to prevent block
                self.writeSocket(obj)
                logger.log(logging.DEBUG, "Sending obj of type " + str(obj.type))
            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
                logger.log(logging.CRITICAL, message)
                self.isActive = False
Ejemplo n.º 51
0
    def mainRoutine(self):
        """To Come in da future. For now, no use"""
        logger.log(logging.INFO, "Starting server mainRoutine")

        for url in self.configurationPayload.config.rootUrls:
            payload = protocol.URLPayload([str(url)],
                                          protocol.URLPayload.TOVISIT)
            packet = protocol.Packet(protocol.URL, payload)
            urlVisited[url] = True
            outputQueue.put(packet)

            if self.configurationPayload.crawlingType == protocol.ConfigurationPayload.STATIC_CRAWLING and (
                    self.configurationPayload.config.crawlDelay != 0):
                if self.configurationPayload.config.crawlDelay != 0:
                    time.sleep(self.configurationPayload.config.crawlDelay)

        while self.isActive:
            try:
                if self.configurationPayload.crawlingType == protocol.ConfigurationPayload.DYNAMIC_CRAWLING:
                    url = urlToVisit.get(True)
                    payload = protocol.URLPayload([str(url)],
                                                  protocol.URLPayload.TOVISIT)
                    packet = protocol.Packet(protocol.URL, payload)
                    outputQueue.put(packet)
                    self.requestCount = self.requestCount + 1

                    if self.configurationPayload.config.crawlDelay != 0:
                        time.sleep(self.configurationPayload.config.crawlDelay)

                    if self.requestLimit != 0 and len(
                            visitedURLlist) + 1 > self.requestLimit:
                        break

                elif self.configurationPayload.crawlingType == protocol.ConfigurationPayload.STATIC_CRAWLING:
                    if (len(skippedURLlist + visitedURLlist) == len(
                            self.configurationPayload.config.rootUrls)):
                        break
                    else:
                        time.sleep(0.3)
            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                message = "\n" + ''.join(
                    traceback.format_exception(exc_type, exc_value,
                                               exc_traceback))
                logger.log(logging.ERROR, message)

        logger.log(logging.INFO, "Scrapping complete. Terminating...")
        self.disconnectAllClient()
        self.isActive = False
Ejemplo n.º 52
0
    def interpretingThread(self):
        """Interprets message from the server other than type URL. (ie: INFO)"""
        logger.log(logging.DEBUG, "InterpretingThread started")

        while self.isActive:
            try:
                time.sleep(0.01) #temp - For testing
                packets = protocol.deQueue([self.infoQueue])

                if not packets:
                    continue

                for packet in packets:
                    if packet.type == protocol.INFO:
                        logger.log(logging.INFO, "Interpreting INFO packet : " + str(packet.payload.urlList))
            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
                logger.log(logging.CRITICAL, message)
                self.isActive = False
Ejemplo n.º 53
0
    def sendConfig(self, configuration):
        """Sends the configuration to the client"""
        logger.log(logging.DEBUG, self.formattedAddr + "Sending configuration")
        self.configuration = configuration

        packet = protocol.Packet(protocol.CONFIG, self.configuration)
        self.writeSocket(packet)

        logger.log(logging.DEBUG,
                   self.formattedAddr + "Configuration sent waiting for ACK")
        packet = self.readSocket(5)

        if packet.type == protocol.INFO:
            if packet.payload.info == protocol.InfoPayload.CLIENT_ACK:
                logger.log(
                    logging.DEBUG, self.formattedAddr +
                    "Working node ACK received (configuration)")
                return
            else:
                self.isActive = False
                raise Exception("Unable to transmit configuration")
Ejemplo n.º 54
0
 def run(self):
     """Launches the urlDispatcher and mainRoutine threads"""
     logger.log(logging.DEBUG, "Starting beginCrawlingProcedure")
     thread.start_new_thread(self.urlDispatcher, ())
     thread.start_new_thread(self.mainRoutine, ())
     thread.start_new_thread(self.storageRoutine, ())
Ejemplo n.º 55
0
# reset the countdownThread
def reset():
	global countdownThread
	countdownThread = False



if __name__ == "__main__":
	logger.createlogClient("testing", os.path.dirname(os.path.realpath(__file__))+"/")

	# thread used to show the countdown and call back for the actual picture taking process
	global countdownThread
	countdownThread = False

	# button listener thread managing callbacks for different events (photo, shuffle, random)
	buttonThread = ButtonThread(cheese, BUTTONS, shuffle, 'SH', refresh, 'RF')
	buttonThread.daemon = True
	buttonThread.start()

	try:
		while True:
			pass
	except Exception, e:
		logger.log(e)
		raise e
	finally:
		print "stopping"
		countdownThread.stop()
		buttonThread.stop()
		GPIO.cleanup()
Ejemplo n.º 56
0
    def connect(self, host, port):
        """Sets up the connection to the server (max 6 attemps)"""
        self.host = host
        self.port = port
        self.masterNodeFormattedAddr = "[" + str(self.host) + ":" + str(self.port) + "]"

        logger.log(logging.DEBUG, "Socket initialization")
        self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        for connectionAttempt in range(6, 0, -1):
            if connectionAttempt == 1:
                logger.log(logging.CRITICAL, "Unable to connect to host " + self.masterNodeFormattedAddr)
                sys.exit()
            try:
                logger.log(logging.DEBUG, "Connecting to host... " + self.masterNodeFormattedAddr)
                self.s.connect((self.host, self.port))
                logger.log(logging.INFO, "Connected to " + self.masterNodeFormattedAddr)
                break
            except socket.error:
                logger.log(logging.INFO, "Connection failed to " + self.masterNodeFormattedAddr)
                logger.log(logging.INFO, "Retrying in 3 seconds.")
                time.sleep(3)
Ejemplo n.º 57
0
    def readConfig(self):
        """Reads the configuration from the server"""
        logger.log(logging.DEBUG, "Waiting for configuration from the server.")
        if self.isActive:
            try:
                deserializedPacket = self.readSocket()
                logger.log(logging.DEBUG, "Configuration received.")

                if deserializedPacket.type == protocol.CONFIG:
                    self.crawlingType = deserializedPacket.payload.crawlingType
                    self.config = deserializedPacket.payload.config

                    # dynamic module reload
                    basePath = os.path.dirname(sys.argv[0])
                    if basePath:
                        basePath = basePath + "/"

                    # path building
                    rulePath = basePath + "modules/rule.py"
                    scrappingPath = basePath + "modules/scrapping.py"

                    # re-writing source .py
                    logger.log(logging.INFO, "Importing rule.py from server")
                    ruleFd = open(rulePath, 'w')
                    ruleFd.write(self.config.rule_py)
                    ruleFd.close()

                    logger.log(logging.INFO, "Importing scrapping.py from server")
                    scrappingFd = open(scrappingPath, 'w')
                    scrappingFd.write(self.config.scrapping_py)
                    scrappingFd.close()

                    # compilation test
                    try:
                        code=open(rulePath, 'rU').read()
                        compile(code, "rule_test", "exec")
                    except:
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
                        logger.log(logging.CRITICAL, message)
                        logger.log(logging.ERROR, "Unable to compile rule.py (is the syntax right?)")
                        sys.exit(0)

                    try:
                        code=open(scrappingPath, 'rb').read(os.path.getsize(scrappingPath))
                        compile(code, "scrapping_test", "exec")
                    except:
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
                        logger.log(logging.CRITICAL, message)
                        logger.log(logging.ERROR, "Unable to compile scrapping.py (is the syntax right?)")
                        sys.exit(0)

                    # dynamic reload of modules
                    # TODO reloading of rule.py should eventually come here
                    logger.log(logging.INFO, "Reloading modules imported for server")
                    reload(sys.modules["modules.scrapping"])


                    payload = protocol.InfoPayload(protocol.InfoPayload.CLIENT_ACK)
                    packet = protocol.Packet(protocol.INFO, payload)
                    self.writeSocket(packet)

                    logger.log(logging.DEBUG, "Sending ACK for configuration.")
                else:
                    raise Exception("Unable to parse configuration.")
            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                message = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
                logger.log(logging.CRITICAL, message)
                self.isActive = False
Ejemplo n.º 58
0
    def storageRoutine(self):
        """Stores session and data"""
        logger.log(logging.INFO, "Starting server storageRoutine")
        try:
            connection = psycopg2.connect(user="******",
                                          password="******",
                                          host="localhost",
                                          port="5432",
                                          database="crawler2")
            cursor = connection.cursor()
            # Print PostgreSQL Connection properties
            print(connection.get_dsn_parameters(), "\n")
            # Print PostgreSQL version
            cursor.execute("SELECT version();")
            record = cursor.fetchone()
            print("You are connected to - ", record, "\n")
        except (Exception, psycopg2.Error) as error:
            print("Error while connecting to PostgreSQL", error)
        """finally:
        #closing database connection.
            if(connection):
                cursor.close()
                connection.close()
                print("PostgreSQL connection is closed")"""
        while self.isActive:
            try:
                sessions = protocol.deQueue([sessionStorageQueue])

                if not sessions:
                    continue

                for session in sessions:
                    #storage.writeToFile(session, session.dataContainer)

                    #storage.writeToDb(session, session.dataContainer)
                    try:
                        if (not session.failed):
                            #insert_one(session.url)
                            #connection = None
                            try:
                                cursor.execute("INSERT INTO url VALUES (%s)",
                                               session.url)
                                # read database configuration
                                #params = config()
                                # connect to the PostgreSQL database
                                #conn = psycopg2.connect(**params)
                                # create a new cursor
                                #cur = connection.cursor()
                                # execute the INSERT statement
                                #cursor.execute(sql, (session.url,))
                                # commit the changes to the database
                                #conn.commit()
                                # close communication with the database
                                #cursor.close()
                            except (Exception,
                                    psycopg2.DatabaseError) as error:
                                print(error)
                            print "ez"

                        elif session.failed:
                            #insert_one(session.url.replace)
                            print "hola"
                        #else:
                        #    raise Exception("..")
                    except:
                        logger.log(logging.ERROR,
                                   "Unhandled exception in storage.py")

            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                message = "\n" + ''.join(
                    traceback.format_exception(exc_type, exc_value,
                                               exc_traceback))
                logger.log(logging.ERROR, message)
        cursor.close()
Ejemplo n.º 59
0
    def dispatcher(self, packet):
        """Dispatches packets to the right packet queue or takes action if needed (ie: infoPacket)"""
        if packet is None:
            return
        logger.log(logging.DEBUG,
                   "Dispatching packet of type: " + str(packet.type))

        if packet.type == protocol.INFO:
            logger.log(logging.DEBUG,
                       self.formattedAddr + "Received INFO packet")
        elif packet.type == protocol.URL:

            if packet.payload.type == protocol.URLPayload.SCRAPPED_URL:
                logger.log(
                    logging.INFO,
                    self.formattedAddr + "Receiving scrapped URLs : " +
                    str(len(packet.payload.urlList)).center(5) + " / " +
                    str(len(scrappedURLlist)).center(7) + " - " +
                    str(len(skippedURLlist)).center(5))
                for url in packet.payload.urlList:
                    urlPool.put(url)

            if packet.payload.type == protocol.URLPayload.VISITED:
                self.sentCount = self.sentCount - 1
                for url in packet.payload.urlList:
                    logger.log(logging.INFO,
                               self.formattedAddr + "Receiving scrapped data")
                    logger.log(
                        logging.DEBUG,
                        self.formattedAddr + "Receiving scrapped data" + url)
                    visitedURLlist.append(url)
                if hasattr(packet.payload, 'session'):
                    if packet.payload.session is not None:
                        sessionStorageQueue.put(packet.payload.session)

            if packet.payload.type == protocol.URLPayload.SKIPPED:
                self.sentCount = self.sentCount - 1
                for url in packet.payload.urlList:
                    skippedURLlist.append(url)
                if hasattr(packet.payload, 'session'):
                    if packet.payload.session is not None:
                        sessionStorageQueue.put(packet.payload.session)
                        if packet.payload.session.returnCode == -1:
                            logger.log(
                                logging.INFO,
                                logger.PINK + self.formattedAddr +
                                "Skipped (timeout) : " + url + logger.NOCOLOR)
                        elif packet.payload.session.returnCode == -2:
                            logger.log(
                                logging.INFO,
                                logger.PINK + self.formattedAddr +
                                "Skipped (request not allowed - robot parser) : "
                                + url + logger.NOCOLOR)
                        elif packet.payload.session.returnCode == -100:
                            logger.log(
                                logging.INFO,
                                logger.YELLOW + self.formattedAddr +
                                "Skipped (unknown error) : " + url +
                                logger.NOCOLOR)
                        else:
                            logger.log(
                                logging.INFO, logger.BLUE +
                                self.formattedAddr + "Skipped (html error " +
                                str(packet.payload.session.returnCode) +
                                ") : " + url + logger.NOCOLOR)
                else:
                    logger.log(
                        logging.INFO, logger.RED + self.formattedAddr +
                        "No session returned" + url + logger.NOCOLOR)
        else:
            logger.log(
                logging.CRITICAL, "Unrecognized packet type : " +
                str(packet.type) + ". This packet was dropped")
            return