Beispiel #1
0
def Execute(parse_source, parse_state):
    LOG.info('STARTED: ' + os.path.basename(__file__))
    LOG.info('parse_source: \'' + str(parse_source) + '\'')
    if not parse_state:
        parse_state = db.CommunicationLogs.States.DOWNLOADED  #production mode: parsing newly downloaded logs
    LOG.info('parse_state: \'' + str(parse_state) + '\'')

    communication_logs = db.CommunicationLogs.GetByStateSource(
        state=parse_state,
        source=parse_source,
    )
    LOG.debug(communication_logs)
    for communication_log in communication_logs:
        try:
            file_path = settings.DOWNLOAD_DIR + '/' + communication_log[
                'source'] + '/' + communication_log['file_name']
            if not os.path.exists(file_path):
                LOG.error('Does not exists: ' + file_path)
                continue
            LOG.info('Parsing: ' + file_path)
            parsers = imp.load_source(
                'parsers', 'parsers/' + communication_log['source'] + '.py')
            #class_ = getattr(parsers, settings.COMMUNICATION_LOG_SOURCES[communication_log['source']]['ParserClass'])
            parser_class = getattr(parsers, communication_log['source'])
            parser = parser_class()
            parser.Parse(communication_log)
        except:
            LOG.exception(sys.exc_info()[0])

    LOG.info('COMPLETED')
 def GetLastFrame(self):
     '''This method is thread safe and can be used to access self.Frames anytime.'''
     with self.frames_lock:
         try:
             index = len(self.Frames) - 1
             if index < 0:
                 return None
             return self.Frames[index]
         except:
             LOG.exception(sys.exc_info()[0])
Beispiel #3
0
 def __get_communications_from_conversation_node(self, conversation_node):
     communications = []
     participants = {}
     for participantEntered_node in conversation_node.findall(
             './ParticipantEntered'):
         participant = {
             'name':
             participantEntered_node.find('./LoginName').text,
             'login_time':
             datetime.datetime.utcfromtimestamp(
                 float(participantEntered_node.find('./DateTimeUTC').text)),
         }
         participants[participant['name']] = participant
     for participantLeft_node in conversation_node.findall(
             './ParticipantLeft'):
         try:
             participants[participantLeft_node.find('./LoginName').text][
                 'logout_time'] = datetime.datetime.utcfromtimestamp(
                     float(participantLeft_node.find('./DateTimeUTC').text))
         except:
             LOG.exception(sys.exc_info()[0])
             #LOG.info(participants)
             #LOG.info(stringify_children(conversation_node))
             raise
     for message_node in conversation_node.findall('./Message'):
         message_time = get_datetime_from_unix_string(
             message_node.find('./DateTimeUTC').text)
         name = message_node.find('./LoginName').text
         tos = []
         for participant_name in participants:
             try:
                 if participant_name != name and participants[
                         participant_name][
                             'login_time'] <= message_time and participants[
                                 participant_name][
                                     'logout_time'] >= message_time:
                     tos.append({'name': participant_name})
             except:
                 LOG.exception(participant_name)
                 #LOG.info(participants)
                 #LOG.info(tos)
                 raise
         communication = self.create_communication(
             from_={
                 'name': name,
             },
             to=tos,
             message=message_node.find('./Content').text,
             message_time=message_time,
         )
         communications.append(communication)
         #LOG.debug(communication)
     return communications
    def __init__(
            self,
            stream_name,
            time_span_between_frames_in_secs=-1,
            frame_queue_max_length=10,
            save_frames2directory='./_frames',
            catch_frames=True,
            reconnect_max_count=3,  #ignored when refreshing connection due to AWS kinesis time limit
    ):
        try:
            LOG.info('Parser starting for %s' % stream_name)

            self.lock = threading.Lock()
            self.frames_lock = threading.Lock()
            self.tags_lock = threading.Lock()
            self.disposing = False

            self.stream_name = stream_name
            self.TimeSpanBetweenFramesInSecs = time_span_between_frames_in_secs
            self.FrameQueueMaxLength = frame_queue_max_length

            if save_frames2directory == True:
                save_frames2directory = './_frames'
            self.frame_directory = save_frames2directory
            if self.frame_directory:
                if os.path.exists(self.frame_directory):
                    import shutil
                    shutil.rmtree(self.frame_directory)
                os.makedirs(self.frame_directory)

            self.catch_frames = catch_frames
            self.reconnect_max_count = reconnect_max_count

            self.next_frame_time = 0.0
            with self.frames_lock:
                self.Frames = []
            with self.tags_lock:
                self.tags_line = []
                self.last_packet_tags = None
            self.last_frame_id = 0
            self.connection_attempts_count = 0
            self.connection_renewals_count = 0
            self.kinesis_stream = None
            self.libav_input_descriptor = None
            self.kinesis_stream_reader_thread = None
            self.libav_parser_thread = None
            self.kinesis_stream_pipe = r'/tmp/AwsKinesisParserFifo'
            self.starter(False)

        except:
            LOG.exception(sys.exc_info()[0])
        finally:
            pass
Beispiel #5
0
    def __download_communication_logs(source):
        try:
            LOG.info('Checking: ' + source['Name'])
            transport = paramiko.Transport(source['Host'], source['Port'])
            transport.connect(username=source['User'],
                              password=source['Password'])
            sftp = paramiko.SFTPClient.from_transport(transport)
            sftp.chdir(source['RemoteDir'])
            files = sftp.listdir('.')
            for file in files:
                try:
                    #lstat = sftp.lstat(file)
                    #LOG.debug(file, lstat)
                    #communication_log = db.CommunicationLogs.GetByKey(source['Name'], file, lstat.st_mtime)
                    communication_log = db.CommunicationLogs.GetByKey(
                        source['Name'], file)

                    if not reload_state:  #production mode: only loading new logs, not reloading old ones
                        if communication_log:
                            continue
                    else:  #testing mode: only reloading old logs, not loading new ones
                        if not communication_log:
                            continue
                        if communication_log['state'] != reload_state:
                            continue
                        db.CommunicationLogs.DeleteById(
                            communication_log['_id'])
                        db.Conversations.DeleteByCommunicationLogId(
                            communication_log['_id'])
                        db.Communications.DeleteByCommunicationLogId(
                            communication_log['_id'])

                    unique_file_name = file  #+ '.' + str(lstat.st_mtime)
                    LOG.info('Loading: %s/%s', source['Name'],
                             unique_file_name)
                    directory = settings.DOWNLOAD_DIR + '/' + source['Name']
                    if not os.path.exists(directory):
                        os.makedirs(directory)
                    sftp.get(file, directory + '/' + unique_file_name)
                    db.CommunicationLogs.Add(
                        source=source['Name'],
                        remote_file_name=file,
                        #remote_file_modified_time = lstat.st_mtime,
                        file_name=unique_file_name,
                        enterprise_id=source['EnterpriseId'],
                    )
                except:
                    LOG.exception(sys.exc_info()[0])
        except:
            LOG.exception(sys.exc_info()[0])
    def libav_parser(self, ):
        try:
            if self.disposing:
                return

            LOG.info('libav_parser started')

            if not self.run_libav_parser:
                return

            self.libav_output_reader = av.open(self.kinesis_stream_pipe)
            LOG.info('kinesis_stream_pipe opened for reading')
            for packet in self.libav_output_reader.demux(video=0):
                if not self.run_libav_parser:
                    return

                with self.tags_lock:
                    tags_i = -1
                    for i, t in enumerate(self.tags_line):
                        if t.position > packet.pos:
                            tags_i = i
                            break
                    #print('len(self.tags_line):%d'%len(self.tags_line))
                    if tags_i < 0:
                        raise Exception('No tag for packet!')
                    else:
                        self.last_packet_tags = self.tags_line[tags_i]
                        del self.tags_line[:tags_i]

                if not self.catch_frames:
                    continue

                for frame in packet.decode():
                    if not self.run_libav_parser:
                        break
                    self.last_frame_id += 1
                    #self.catch_frame(self.last_packet_tags, frame.to_image(), self.last_frame_id) #about 70/100 slower
                    self.catch_frame(self.last_packet_tags,
                                     frame.to_nd_array(format='bgr24'),
                                     self.last_frame_id)

        except:
            LOG.exception(sys.exc_info()[0])

        finally:
            LOG.info('libav_parser exiting...:\r\nrun_libav_parser=%s' %
                     (self.run_libav_parser, ))
            self.starter(True)
    def on_message(self, stomp_headers, json_encoded_messages):
        LOG.debug('STOMP headers {}'.format(stomp_headers))

        try:
            messages = json.loads(json_encoded_messages)
        except ValueError as e:
            LOG.error('Failed to decode {} bytes as JSON: {}'.format(
                len(json_encoded_messages), json_encoded_messages))
            LOG.exception(e)
            return

        try:
            self._handle_multiple_messages(messages)
        except Exception as e:
            LOG.exception(e)
            return
    def Dispose(self):
        if self.disposing:
            return
        self.disposing = True

        with self.lock:

            LOG.info('Shutting down Parser...\r\n%s' %
                     '\r\n'.join(traceback.format_stack()))

            self.run_kinesis_stream_reader = False
            self.run_libav_parser = False
            time.sleep(1)

            if self.kinesis_stream:
                try:
                    self.kinesis_stream.close()
                except:
                    LOG.exception(sys.exc_info()[0])
                self.kinesis_stream = None

            if self.libav_input_descriptor:
                try:
                    self.libav_input_descriptor.close()
                except:
                    LOG.exception(sys.exc_info()[0])
                self.libav_input_descriptor = None

            self.libav_output_reader = None

            if self.kinesis_stream_reader_thread:
                self.kinesis_stream_reader_thread.join(3)
                if self.kinesis_stream_reader_thread.isAlive():
                    raise Exception(
                        'kinesis_stream_reader_thread has not stopped!')

            if self.libav_parser_thread:
                self.libav_parser_thread.join(3)
                if self.libav_parser_thread.isAlive():
                    raise Exception('libav_parser_thread has not stopped!')

            LOG.info("Parser has been disposed.")
 def readElementHead(self):
     try:
         id = self.readElementId()
     except:  # Invalid EBML header.
         LOG.exception(sys.exc_info()[0])
         id = None
     try:
         size = self.readElementSize()
     except:
         LOG.exception(sys.exc_info()[0])
         size = -1
     try:
         name, type_ = EbmlElementIds2NameType[id]
     except:
         name = None
         type_ = None
     #LOG.info('Position: %d, size:%d, id:%s, name:%s, type_:%s' % (self.Position, size, id, name, type_))
     self.lastElementHead = EbmlElementHead(size, id, name, type_)
     if self.ElementHeadCalback != None:
         self.ElementHeadCalback(self, size, id, name, type_)
     return (size, id, name, type_)
Beispiel #10
0
    def Parse(
        self,
        communication_log,
    ):
        try:
            self.communication_log = communication_log

            lexicon = db.Lexicon.GetByEnterprise(
                self.communication_log['enterprise_id'])
            keywords = {}
            for l in lexicon:
                for k in l['keywords']:
                    if k and not k.isspace():
                        keywords[k.strip()] = 1
            if len(keywords) > 0:
                self.keywords_regex = re.compile('|'.join(keywords.keys()),
                                                 re.IGNORECASE)
            else:
                self.keywords_regex = re.compile('---', re.IGNORECASE)
                LOG.warning('Lexicon is empty for enterprise_id=' +
                            self.communication_log['enterprise_id'])

            db.Conversations.DeleteByCommunicationLogId(
                self.communication_log['_id'])
            db.Communications.DeleteByCommunicationLogId(
                self.communication_log['_id'])

            self.file_path = db.CommunicationLogs.GetFilePath(
                communication_log)
            self.conversations = []
            self.fill_conversations()
            for communications in self.conversations:
                self.__save_conversation(communications)
            db.CommunicationLogs.SetParsed(communication_log['_id'])
        except:
            LOG.exception(sys.exc_info()[0])
            db.CommunicationLogs.SetParsed(communication_log['_id'],
                                           sys.exc_info()[0])
    def kinesis_stream_reader(self, ):
        try:
            if self.disposing:
                return

            LOG.info('kinesis_stream_reader started')

            if not self.run_kinesis_stream_reader:
                return

            interestingElementNames = [
                'Segment',
                # 'Cluster',
                'TagName',
                'TagString',
                'DocTypeReadVersion'  #last tag in segment
            ]
            #interestingElementNames = None
            ebmlReader = ebml.EbmlReader(
                self.kinesis_stream,
                interestingElementNames)  #, self.print_ebml_element_head)

            self.libav_input_descriptor = open(self.kinesis_stream_pipe, 'w')

            tags = Tags()
            ebmlReader.CopyBuffer = io.BytesIO()
            lastTagName = None
            while self.run_kinesis_stream_reader:
                size, id, name, type_, value = ebmlReader.ReadNextElement()
                #self.print_ebml_element(size, id, name, type_, value)

                #TEST
                #if ebmlReader.Position > 30000000:
                #    f = self.GetLastFrame()
                #    LOG.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>Last frame: %s' % f)
                #    raise Exception("restart test")

                if name == 'Segment':
                    pass

                elif name == 'TagName':
                    lastTagName = value
                elif name == 'TagString':
                    if hasattr(tags, lastTagName):
                        setattr(tags, lastTagName, value)
                    elif lastTagName == 'AWS_KINESISVIDEO_ERROR_CODE':
                        LOG.error('AWS_KINESISVIDEO_ERROR_CODE: %s' % value)
                    elif lastTagName == 'AWS_KINESISVIDEO_ERROR_ID':
                        LOG.error('AWS_KINESISVIDEO_ERROR_ID: %s' % value)

                elif name == 'DocTypeReadVersion':
                    tags.position = ebmlReader.Position
                    with self.tags_lock:
                        self.tags_line.append(tags)
                    tags = Tags()

                    bs = ebmlReader.CopyBuffer.getvalue()
                    #LOG.info('=====================CopyBuffer: %d, %d'%(len(bs), ebmlReader.Position))
                    self.libav_input_descriptor.write(bs)
                    self.libav_input_descriptor.flush()
                    #os.write(self.libav_input_descriptor, bs)
                    #os.flush(self.libav_input_descriptor)
                    ebmlReader.CopyBuffer.close()
                    ebmlReader.CopyBuffer = io.BytesIO()

        except:
            LOG.exception(sys.exc_info()[0])

        finally:
            LOG.info(
                'kinesis_stream_reader exiting...:\r\nrun_kinesis_stream_reader=%s'
                % (self.run_kinesis_stream_reader))
            if self.last_packet_tags:  #the last connection was successful
                self.starter(False)
            else:
                self.starter(True)
    def starter_(self, count_connection_attempt):
        try:
            if self.disposing:
                return

            #stop and clean everything before [re-]starting

            self.run_kinesis_stream_reader = False
            if self.kinesis_stream:
                try:
                    self.kinesis_stream.close()
                except:
                    LOG.exception(sys.exc_info()[0])
                self.kinesis_stream = None

            if self.libav_input_descriptor:
                try:
                    #(it was checked:) after closing pipe input, libav will still read all the packets until EOF
                    LOG.info('Closing libav_input_descriptor...')
                    self.libav_input_descriptor.close()
                except:
                    LOG.exception(sys.exc_info()[0])
                self.libav_input_descriptor = None

            if count_connection_attempt:
                self.connection_attempts_count += 1
            else:
                self.connection_renewals_count += 1
            if self.connection_attempts_count > self.reconnect_max_count:
                LOG.warning('Stopping because reconnect count exceeded %d...' %
                            self.reconnect_max_count)
                return

            self.set_kinesis_stream()

            if self.libav_parser_thread:
                LOG.info(
                    "Wating libav_parser_thread to read remaining packets and stop..."
                )
                self.libav_parser_thread.join()
                self.libav_parser_thread = None
                LOG.info("libav_parser_thread has been stopped.")
            self.libav_output_reader = None

            if self.kinesis_stream_reader_thread:
                LOG.info("Wating kinesis_stream_reader_thread to stop...")
                self.kinesis_stream_reader_thread.join()
                self.kinesis_stream_reader_thread = None
                LOG.info("kinesis_stream_reader_thread has been stopped.")

            with self.tags_lock:
                self.tags_line = []
                self.last_packet_tags = None  #the main use apart, it shows if at least one packet was read after [re-]connection

            if os.path.exists(self.kinesis_stream_pipe):
                os.remove(
                    self.kinesis_stream_pipe
                )  #clean the pipe if it remains with data after Parser interruption
            os.mkfifo(self.kinesis_stream_pipe)

            self.run_kinesis_stream_reader = True
            self.kinesis_stream_reader_thread = Thread(
                target=self.kinesis_stream_reader, args=())
            self.kinesis_stream_reader_thread.daemon = True
            self.kinesis_stream_reader_thread.start()

            self.run_libav_parser = True
            self.libav_parser_thread = Thread(target=self.libav_parser,
                                              args=())
            self.libav_parser_thread.daemon = True
            self.libav_parser_thread.start()

        except:
            LOG.exception(sys.exc_info()[0])
            self.dispose()

        finally:
            pass