Beispiel #1
0
  def CompileFilter(self, filter_string):
    """Compile the filter string into a EventObjectFilter matcher."""
    lex = SelectiveLexer(filter_string)

    _ = lex.NextToken()
    if lex.error:
      raise errors.WrongPlugin('Malformed filter string.')

    _ = lex.NextToken()
    if lex.error:
      raise errors.WrongPlugin('No fields defined.')

    if lex.state is not 'END':
      while lex.state is not 'END':
        _ = lex.NextToken()
        if lex.error:
          raise errors.WrongPlugin('No filter defined for DynamicFilter.')

    if lex.state != 'END':
      raise errors.WrongPlugin(
          'Malformed DynamicFilter, end state not reached.')

    self._fields = lex.fields
    self._limit = lex.limit
    self._separator = unicode(lex.separator)

    if lex.lex_filter:
      super(DynamicFilter, self).CompileFilter(lex.lex_filter)
    else:
      self._matcher = None
    self._filter_expression = filter_string
Beispiel #2
0
    def _ParseEntry(self, entry):
        """Parses a single filter entry.

    Args:
      entry (dict[str, dict[str, object]]): dictionary containing one more
        filter rules and associated metadata.

    Raises:
      WrongPlugin: if the entry cannot be parsed.
    """
        # A single file with a list of filters to parse.
        for name, meta in entry.items():
            if 'filter' not in meta:
                raise errors.WrongPlugin(
                    'Entry inside {0:s} does not contain a filter statement.'.
                    format(name))

            meta_filter = meta.get('filter')
            matcher = self._GetMatcher(meta_filter)
            if not matcher:
                raise errors.WrongPlugin(
                    'Filter entry [{0:s}] malformed for rule: <{1:s}>'.format(
                        meta_filter, name))

            self.filters.append((name, matcher, meta))
Beispiel #3
0
  def CompileFilter(self, filter_string):
    """Compile a set of ObjectFilters defined in an YAML file."""
    if not os.path.isfile(filter_string):
      raise errors.WrongPlugin((
          'ObjectFilterList requires an YAML file to be passed on, this filter '
          'string is not a file.'))

    yaml.add_constructor('!include', IncludeKeyword,
                         Loader=yaml.loader.SafeLoader)
    results = None

    with open(filter_string, 'rb') as fh:
      try:
        results = yaml.safe_load(fh)
      except (yaml.scanner.ScannerError, IOError) as exception:
        raise errors.WrongPlugin(
            u'Unable to parse YAML file with error: {0:s}.'.format(exception))

    self.filters = []
    results_type = type(results)
    if results_type is dict:
      self._ParseEntry(results)
    elif results_type is list:
      for result in results:
        if type(result) is not dict:
          raise errors.WrongPlugin(
              u'Wrong format of YAML file, entry not a dict ({})'.format(
                  results_type))
        self._ParseEntry(result)
    else:
      raise errors.WrongPlugin(
          u'Wrong format of YAML file, entry not a dict ({})'.format(
              results_type))
Beispiel #4
0
    def Process(self, cookie_name=None, cookie_data=None, **kwargs):
        """Determine if this is the right plugin for this cookie.

    Args:
      cookie_name: The name of the cookie value.
      cookie_data: The cookie data, as a byte string.

    Returns:
      A generator that yields event objects.

    Raises:
      errors.WrongPlugin: If the cookie name differs from the one
      supplied in COOKIE_NAME.
      ValueError: If cookie_name or cookie_data are not set.
    """
        if cookie_name is None or cookie_data is None:
            raise ValueError(u'Cookie name or data are not set.')

        if cookie_name != self.COOKIE_NAME:
            raise errors.WrongPlugin(
                u'Not the correct cookie plugin for: {} [{}]'.format(
                    cookie_name, self.plugin_name))

        super(CookiePlugin, self).Process(**kwargs)
        return self.GetEntries(cookie_data=cookie_data)
Beispiel #5
0
  def Process(self, parser_mediator, date_time, syslog_tokens, **kwargs):
    """Processes the data structure produced by the parser.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      date_time (dfdatetime.DateTimeValues): date and time values.
      syslog_tokens (dict[str, str]): names of the fields extracted by the
          syslog parser and the matching grammar, and values are the values of
          those fields.

    Raises:
      AttributeError: If the syslog_tokens do not include a 'body' attribute.
      WrongPlugin: If the plugin is unable to parse the syslog tokens.
    """
    body = syslog_tokens.get('body', None)
    if not body:
      raise AttributeError('Missing required attribute: body')

    for key, grammar in self.MESSAGE_GRAMMARS:
      try:
        tokens = grammar.parseString(body)
        syslog_tokens.update(tokens.asDict())
        self._ParseMessage(parser_mediator, key, date_time, syslog_tokens)
        return

      except pyparsing.ParseException:
        pass

    raise errors.WrongPlugin('Unable to create event from: {0:s}'.format(body))
Beispiel #6
0
    def Process(self, parser_mediator, root_item, item_names, **kwargs):
        """Determine if this is the right plugin for this OLECF file.

    This function takes a list of sub items found in the root of a
    OLECF file and compares that to a list of required items defined
    in this plugin.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      root_item: Optional root item of the OLECF file.
      item_names: Optional list of all items discovered in the root.

    Raises:
      errors.WrongPlugin: If the set of required items is not a subset
                          of the available items.
      ValueError: If the root_item or items are not set.
    """
        if root_item is None or item_names is None:
            raise ValueError(u'Root item or items are not set.')

        if not frozenset(item_names) >= self.REQUIRED_ITEMS:
            raise errors.WrongPlugin(
                u'Not the correct items for: {0:s}'.format(self.NAME))

        # This will raise if unhandled keyword arguments are passed.
        super(OlecfPlugin, self).Process(parser_mediator)

        items = []
        for item_string in self.REQUIRED_ITEMS:
            item = root_item.get_sub_item_by_name(item_string)

            if item:
                items.append(item)

        self.ParseItems(parser_mediator, root_item=root_item, items=items)
Beispiel #7
0
    def GetEntries(self,
                   parser_mediator,
                   cookie_data=None,
                   url=None,
                   **unused_kwargs):
        """Extracts event objects from the cookie.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      cookie_data: The cookie data, as a byte string.
      url: The full URL or path where the cookie got set.
    """
        # Values has the structure of:
        #   <domain hash>.<pages viewed>.10.<last time>
        fields = cookie_data.split(u'.')

        # Check for a valid record.
        if len(fields) != 4:
            raise errors.WrongPlugin(
                u'Wrong number of fields. [{0:d} vs. 4]'.format(len(fields)))

        domain_hash, pages_viewed, _, last = fields

        event_object = GoogleAnalyticsEvent(
            int(last, 10),
            eventdata.EventTimestamp.LAST_VISITED_TIME,
            url,
            'utmb',
            self.COOKIE_NAME,
            domain_hash=domain_hash,
            pages_viewed=int(pages_viewed, 10))
        parser_mediator.ProduceEvent(event_object)
Beispiel #8
0
  def Process(self, database=None, cache=None, **kwargs):
    """Determines if this is the appropriate plugin for the database.

    Args:
      database: Optional ESE database object (instance of pyesedb.file).
                The default is None.
      cache: Optional cache object (instance of EseDbCache). The default is
             None.

    Returns:
      A generator that yields event object.

    Raises:
      errors.WrongPlugin: If the database does not contain all the tables
                          defined in the required_tables set.
      ValueError: If the database attribute is not valid.
    """
    if database is None:
      raise ValueError(u'Invalid database.')

    table_names = frozenset(self._GetTableNames(database))
    if self._required_tables.difference(table_names):
      raise errors.WrongPlugin(
          u'[{0:s}] required tables not found.'.format(self.plugin_name))

    super(EseDbPlugin, self).Process(**kwargs)

    return self.GetEntries(database=database, cache=cache, **kwargs)
Beispiel #9
0
    def Process(self, parser_mediator, timestamp, syslog_tokens, **kwargs):
        """Processes the data structure produced by the parser.

    Args:
      parser_mediator: a parser mediator object (instance of ParserMediator).
      timestamp: the timestamp, which is an integer containing the number
                  of micro seconds since January 1, 1970, 00:00:00 UTC or 0
                  on error.
      syslog_tokens: a dictionary whose keys are the names of the fields
                     extracted by the syslog parser, and values are the values
                     of those fields.

    Raises:
      AttributeError: If the syslog_tokens do not include a 'body' attribute.
      WrongPlugin: If the plugin is unable to parse the syslog tokens.
    """
        body = syslog_tokens.get(u'body', None)
        if not body:
            raise AttributeError(u'Missing required attribute: body')

        for key, grammar in iter(self.MESSAGE_GRAMMARS):
            try:
                tokens = grammar.parseString(body)
                syslog_tokens.update(tokens.asDict())
                self.ParseMessage(parser_mediator, key, timestamp,
                                  syslog_tokens)
                return
            except pyparsing.ParseException:
                pass

        raise errors.WrongPlugin(
            u'Unable to create event from: {0:s}'.format(body))
Beispiel #10
0
    def _ParseEntry(self, entry):
        """Parse a single YAML filter entry."""
        # A single file with a list of filters to parse.
        for name, meta in entry.items():
            if 'filter' not in meta:
                raise errors.WrongPlugin(
                    u'Entry inside {} does not contain a filter statement.'.
                    format(name))

            matcher = pfilter.GetMatcher(meta.get('filter'), True)
            if not matcher:
                raise errors.WrongPlugin(
                    u'Filter entry [{0:s}] malformed for rule: <{1:s}>'.format(
                        meta.get('filter'), name))

            self.filters.append((name, matcher, meta))
Beispiel #11
0
    def Process(self, cache=None, database=None, **kwargs):
        """Determine if this is the right plugin for this database.

    This function takes a SQLiteDatabase object and compares the list
    of required tables against the available tables in the database.
    If all the tables defined in REQUIRED_TABLES are present in the
    database then this plugin is considered to be the correct plugin
    and the function will return back a generator that yields event
    objects.

    Args:
      cache: A SQLiteCache object.
      database: A database object (instance of SQLiteDatabase).

    Returns:
      A generator that yields event objects.

    Raises:
      errors.WrongPlugin: If the database does not contain all the tables
      defined in the REQUIRED_TABLES set.
      ValueError: If the database attribute is not passed in.
    """
        if database is None:
            raise ValueError(u'Database is not set.')

        if not frozenset(database.tables) >= self.REQUIRED_TABLES:
            raise errors.WrongPlugin(
                u'Not the correct database tables for: {}'.format(
                    self.plugin_name))

        super(SQLitePlugin, self).Process(**kwargs)

        return self.GetEntries(cache=cache, database=database)
Beispiel #12
0
    def Process(self, parser_mediator, timestamp, syslog_tokens, **kwargs):
        """Processes the data structure produced by the parser.

    Args:
      parser_mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      timestamp (int): number of micro seconds since January 1, 1970,
          00:00:00 UTC or 0 on error.
      syslog_tokens (dict[str, str]): names of the fields extracted by the
          syslog parser and the matching grammar, and values are the values of
          those fields.

    Raises:
      AttributeError: If the syslog_tokens do not include a 'body' attribute.
      WrongPlugin: If the plugin is unable to parse the syslog tokens.
    """
        body = syslog_tokens.get('body', None)
        if not body:
            raise AttributeError('Missing required attribute: body')

        for key, grammar in iter(self.MESSAGE_GRAMMARS):
            try:
                tokens = grammar.parseString(body)
                syslog_tokens.update(tokens.asDict())
                self.ParseMessage(parser_mediator, key, timestamp,
                                  syslog_tokens)
                return
            except pyparsing.ParseException:
                pass

        raise errors.WrongPlugin(
            'Unable to create event from: {0:s}'.format(body))
Beispiel #13
0
    def Process(self, parser_mediator, cookie_name, cookie_data, url,
                **kwargs):
        """Determine if this is the right plugin for this cookie.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      cookie_name: The name of the cookie value.
      cookie_data: The cookie data, as a byte string.
      url: The full URL or path where the cookie got set.

    Raises:
      errors.WrongPlugin: If the cookie name differs from the one
      supplied in COOKIE_NAME.
      ValueError: If cookie_name or cookie_data are not set.
    """
        if cookie_name is None or cookie_data is None:
            raise ValueError(u'Cookie name or data are not set.')

        if cookie_name != self.COOKIE_NAME:
            raise errors.WrongPlugin(
                u'Not the correct cookie plugin for: {0:s} [{1:s}]'.format(
                    cookie_name, self.NAME))

        # This will raise if unhandled keyword arguments are passed.
        super(CookiePlugin, self).Process(parser_mediator)

        self.GetEntries(parser_mediator, cookie_data=cookie_data, url=url)
Beispiel #14
0
    def Process(self, parser_mediator, database=None, cache=None, **kwargs):
        """Determines if this is the appropriate plugin for the database.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      database: Optional ESE database object (instance of pyesedb.file).
                The default is None.
      cache: Optional cache object (instance of EseDbCache). The default is
             None.

    Raises:
      errors.WrongPlugin: If the database does not contain all the tables
                          defined in the required_tables set.
      ValueError: If the database attribute is not valid.
    """
        if database is None:
            raise ValueError(u'Invalid database.')

        table_names = frozenset(self._GetTableNames(database))
        if self._required_tables.difference(table_names):
            raise errors.WrongPlugin(
                u'[{0:s}] required tables not found.'.format(self.NAME))

        # This will raise if unhandled keyword arguments are passed.
        super(EseDbPlugin, self).Process(parser_mediator)

        self.GetEntries(parser_mediator,
                        database=database,
                        cache=cache,
                        **kwargs)
Beispiel #15
0
    def Process(self, parser_mediator, cookie_name, cookie_data, url,
                **kwargs):
        """Determine if this is the right plugin for this cookie.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      cookie_name (str): the name of the cookie value.
      cookie_data (bytes): the cookie data, as a byte sequence.
      url (str): the full URL or path where the cookie was set.

    Raises:
      errors.WrongPlugin: If the cookie name differs from the one
          supplied in COOKIE_NAME.
      ValueError: If cookie_name or cookie_data are not set.
    """
        if cookie_name is None or cookie_data is None:
            raise ValueError('Cookie name or data are not set.')

        if cookie_name != self.COOKIE_NAME:
            raise errors.WrongPlugin(
                'Not the correct cookie plugin for: {0:s} [{1:s}]'.format(
                    cookie_name, self.NAME))

        # This will raise if unhandled keyword arguments are passed.
        super(BaseCookiePlugin, self).Process(parser_mediator)

        self.GetEntries(parser_mediator, cookie_data=cookie_data, url=url)
Beispiel #16
0
    def GetEntries(self,
                   parser_mediator,
                   cookie_data=None,
                   url=None,
                   **unused_kwargs):
        """Extracts event objects from the cookie.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      cookie_data: The cookie data, as a byte string.
      url: The full URL or path where the cookie got set.
    """
        # Values has the structure of:
        # <domain hash>.<visitor ID>.<first visit>.<previous>.<last>.<# of
        # sessions>
        fields = cookie_data.split(u'.')

        # Check for a valid record.
        if len(fields) != 6:
            raise errors.WrongPlugin(
                u'Wrong number of fields. [{0:d} vs. 6]'.format(len(fields)))

        domain_hash, visitor_id, first_visit, previous, last, sessions = fields

        # TODO: catch int() throwing a ValueError.

        # TODO: Double check this time is stored in UTC and not local time.
        first_epoch = int(first_visit, 10)
        event_object = GoogleAnalyticsEvent(first_epoch,
                                            'Analytics Creation Time',
                                            url,
                                            'utma',
                                            self.COOKIE_NAME,
                                            domain_hash=domain_hash,
                                            visitor_id=visitor_id,
                                            sessions=int(sessions, 10))
        parser_mediator.ProduceEvent(event_object)

        event_object = GoogleAnalyticsEvent(int(previous, 10),
                                            'Analytics Previous Time',
                                            url,
                                            'utma',
                                            self.COOKIE_NAME,
                                            domain_hash=domain_hash,
                                            visitor_id=visitor_id,
                                            sessions=int(sessions, 10))
        parser_mediator.ProduceEvent(event_object)

        event_object = GoogleAnalyticsEvent(
            int(last, 10),
            eventdata.EventTimestamp.LAST_VISITED_TIME,
            url,
            'utma',
            self.COOKIE_NAME,
            domain_hash=domain_hash,
            visitor_id=visitor_id,
            sessions=int(sessions, 10))
        parser_mediator.ProduceEvent(event_object)
Beispiel #17
0
    def CompileFilter(self, filter_expression):
        """Compiles the filter expression.

    The filter expression contains the name of a YAML file.

    Args:
      filter_expression: string that contains the filter expression.

    Raises:
      WrongPlugin: if the filter could not be compiled.
    """
        if not os.path.isfile(filter_expression):
            raise errors.WrongPlugin(
                (u'ObjectFilterList requires an YAML file to be passed on, '
                 u'this filter string is not a file.'))

        yaml.add_constructor(u'!include',
                             self._IncludeKeyword,
                             Loader=yaml.loader.SafeLoader)
        results = None

        with open(filter_expression, 'rb') as file_object:
            try:
                results = yaml.safe_load(file_object)
            except (yaml.scanner.ScannerError, IOError) as exception:
                raise errors.WrongPlugin(
                    u'Unable to parse YAML file with error: {0:s}.'.format(
                        exception))

        self.filters = []
        results_type = type(results)
        if results_type is dict:
            self._ParseEntry(results)
        elif results_type is list:
            for result in results:
                if not isinstance(result, dict):
                    raise errors.WrongPlugin(
                        u'Wrong format of YAML file, entry not a dict ({0:s})'.
                        format(results_type))
                self._ParseEntry(result)
        else:
            raise errors.WrongPlugin(
                u'Wrong format of YAML file, entry not a dict ({0:s})'.format(
                    results_type))
        self._filter_expression = filter_expression
Beispiel #18
0
    def __init__(self, reg_type):
        """Initialize the plugin.

    Args:
      reg_type: The detected Windows Registry type. This value should match
                the REG_TYPE value defined by the plugins.
    """
        super(WinRegCachePlugin, self).__init__()
        if self.REG_TYPE.lower() != reg_type.lower():
            raise errors.WrongPlugin(u'Not the correct Windows Registry type.')
Beispiel #19
0
    def CompileFilter(self, filter_expression):
        """Compiles the filter expression.

    The filter expression contains an object filter expression extended
    with selective field selection.

    Args:
      filter_expression: string that contains the filter expression.

    Raises:
      WrongPlugin: if the filter could not be compiled.
    """
        lexer_object = SelectiveLexer(filter_expression)

        lexer_object.NextToken()
        if lexer_object.error:
            raise errors.WrongPlugin('Malformed filter string.')

        lexer_object.NextToken()
        if lexer_object.error:
            raise errors.WrongPlugin('No fields defined.')

        while lexer_object.state != self._STATE_END:
            lexer_object.NextToken()
            if lexer_object.error:
                raise errors.WrongPlugin(
                    'No filter defined for DynamicFilter.')

        if lexer_object.state != self._STATE_END:
            raise errors.WrongPlugin(
                'Malformed DynamicFilter, end state not reached.')

        self._fields = lexer_object.fields
        self._limit = lexer_object.limit
        self._separator = '{0:s}'.format(lexer_object.separator)

        if lexer_object.lex_filter:
            super(DynamicFilter, self).CompileFilter(lexer_object.lex_filter)
        else:
            self._matcher = None
        self._filter_expression = filter_expression
Beispiel #20
0
    def GetEntries(self,
                   parser_mediator,
                   cookie_data=None,
                   url=None,
                   **unused_kwargs):
        """Extracts event objects from the cookie.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      cookie_data: The cookie data, as a byte string.
      url: The full URL or path where the cookie got set.
    """
        # The structure of the field:
        #   <domain hash>.<last time>.<sessions>.<sources>.<variables>
        fields = cookie_data.split('.')

        if len(fields) > 5:
            variables = u'.'.join(fields[4:])
            fields = fields[0:4]
            fields.append(variables)

        if len(fields) != 5:
            raise errors.WrongPlugin(
                u'Wrong number of fields. [{0:d} vs. 5]'.format(len(fields)))

        domain_hash, last, sessions, sources, variables = fields
        extra_variables = variables.split(u'|')

        kwargs = {}
        for variable in extra_variables:
            key, _, value = variable.partition(u'=')
            try:
                value_line = unicode(urllib.unquote(str(value)), 'utf-8')
            except UnicodeDecodeError:
                value_line = repr(value)

            kwargs[key] = value_line

        event_object = GoogleAnalyticsEvent(
            int(last, 10),
            eventdata.EventTimestamp.LAST_VISITED_TIME,
            url,
            'utmz',
            self.COOKIE_NAME,
            domain_hash=domain_hash,
            sessions=int(sessions, 10),
            sources=int(sources, 10),
            **kwargs)
        parser_mediator.ProduceEvent(event_object)
Beispiel #21
0
    def _ParseEntry(self, entry):
        """Parses a single filter entry.

    Args:
      entry: YAML string that defines a single object filter entry.

    Raises:
      WrongPlugin: if the entry cannot be parsed.
    """
        # A single file with a list of filters to parse.
        for name, meta in entry.items():
            if u'filter' not in meta:
                raise errors.WrongPlugin(
                    u'Entry inside {0:s} does not contain a filter statement.'.
                    format(name))

            meta_filter = meta.get(u'filter')
            matcher = self._GetMatcher(meta_filter)
            if not matcher:
                raise errors.WrongPlugin(
                    u'Filter entry [{0:s}] malformed for rule: <{1:s}>'.format(
                        meta_filter, name))

            self.filters.append((name, matcher, meta))
Beispiel #22
0
  def CompileFilter(self, unused_filter_string):
    """Verify filter string and prepare the filter for later usage.

    This function verifies the filter string matches the definition of
    the class and if necessary compiles or prepares the filter so it can start
    matching against passed in EventObjects.

    Args:
      unused_filter_string: A string passed in that should be recognized by
                            the filter class.

    Raises:
      errors.WrongPlugin: If this filter string does not match the filter
                          class.
    """
    raise errors.WrongPlugin('Not the correct filter for this string.')
Beispiel #23
0
  def CompileFilter(self, filter_expression):
    """Compiles the filter expression.

    The filter expression contains an object filter expression.

    Args:
      filter_expression: string that contains the filter expression.

    Raises:
      WrongPlugin: if the filter could not be compiled.
    """
    matcher = self._GetMatcher(filter_expression)
    if not matcher:
      raise errors.WrongPlugin(u'Malformed filter expression.')

    self._filter_expression = filter_expression
    self._matcher = matcher
Beispiel #24
0
    def GetEntries(self,
                   parser_mediator,
                   cookie_data=None,
                   url=None,
                   **unused_kwargs):
        """Extracts event objects from the cookie.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      cookie_data: The cookie data, as a byte string.
      url: The full URL or path where the cookie got set.
    """
        fields = cookie_data.split(u'.')
        number_of_fields = len(fields)

        if number_of_fields != 4:
            raise errors.WrongPlugin(
                u'Wrong number of fields. [{0:d} vs. 4]'.format(
                    number_of_fields))

        domain_hash = fields[0]

        try:
            number_of_pages_viewed = int(fields[1], 10)
        except ValueError:
            number_of_pages_viewed = 0

        try:
            last_visit_posix_time = int(fields[3], 10)
        except ValueError:
            last_visit_posix_time = 0

        if last_visit_posix_time is not None:
            timestamp_description = eventdata.EventTimestamp.LAST_VISITED_TIME
        else:
            last_visit_posix_time = timelib.Timestamp.NONE_TIMESTAMP
            timestamp_description = eventdata.EventTimestamp.NOT_A_TIME

        event_object = GoogleAnalyticsEvent(
            last_visit_posix_time,
            timestamp_description,
            u'utmb',
            url,
            domain_hash=domain_hash,
            number_of_pages_viewed=number_of_pages_viewed)
        parser_mediator.ProduceEvent(event_object)
Beispiel #25
0
    def GetEntries(self, cookie_data, **unused_kwargs):
        """Yield event objects extracted from the cookie."""
        # Values has the structure of:
        #   <domain hash>.<pages viewed>.10.<last time>
        fields = cookie_data.split('.')

        # Check for a valid record.
        if len(fields) != 4:
            raise errors.WrongPlugin(
                u'Wrong number of fields. [{} vs. 4]'.format(len(fields)))

        domain_hash, pages_viewed, _, last = fields

        yield GoogleAnalyticsEvent(int(last, 10),
                                   eventdata.EventTimestamp.LAST_VISITED_TIME,
                                   self._data_type,
                                   domain_hash=domain_hash,
                                   pages_viewed=int(pages_viewed, 10))
Beispiel #26
0
    def Process(self, root_item=None, item_names=None, **kwargs):
        """Determine if this is the right plugin for this OLECF file.

    This function takes a list of sub items found in the root of a
    OLECF file and compares that to a list of required items defined
    in this plugin.

    If the list of required items is a subset of the overall items
    this plugin is considered to be the correct plugin and the function
    will return back a generator that yields event objects.

    Args:
      root_item: The root item of the OLECF file.
      item_names: A list of all items discovered in the root.

    Returns:
      A generator that yields event objects.

    Raises:
      errors.WrongPlugin: If the set of required items is not a subset
      of the available items.
      ValueError: If the root_item or items are not set.
    """
        if root_item is None or item_names is None:
            raise ValueError(u'Root item or items are not set.')

        if not frozenset(item_names) >= self.REQUIRED_ITEMS:
            raise errors.WrongPlugin(u'Not the correct items for: {}'.format(
                self.plugin_name))

        super(OlecfPlugin, self).Process(**kwargs)

        items = []
        for item_string in self.REQUIRED_ITEMS:
            item = root_item.get_sub_item_by_name(item_string)

            if item:
                items.append(item)

        return self.GetEntries(root_item=root_item, items=items)
Beispiel #27
0
    def GetEntries(self, cookie_data, **unused_kwargs):
        """Process the cookie."""
        # The structure of the field:
        #   <domain hash>.<last time>.<sessions>.<sources>.<variables>
        fields = cookie_data.split('.')

        if len(fields) > 5:
            variables = '.'.join(fields[4:])
            fields = fields[0:4]
            fields.append(variables)

        if len(fields) != 5:
            raise errors.WrongPlugin(
                u'Wrong number of fields. [{} vs. 5]'.format(len(fields)))

        domain_hash, last, sessions, sources, variables = fields
        extra_variables = variables.split('|')

        extra_variables_translated = []
        for variable in extra_variables:
            key, _, value = variable.partition('=')
            translation = self.GA_UTMZ_TRANSLATION.get(key, key)
            try:
                value_line = unicode(urllib.unquote(str(value)), 'utf-8')
            except UnicodeDecodeError:
                value_line = repr(value)

            extra_variables_translated.append(u'{} = {}'.format(
                translation, value_line))

        yield GoogleAnalyticsEvent(int(last, 10),
                                   eventdata.EventTimestamp.LAST_VISITED_TIME,
                                   self._data_type,
                                   domain_hash=domain_hash,
                                   sessions=int(sessions, 10),
                                   sources=int(sources, 10),
                                   extra=extra_variables_translated)
Beispiel #28
0
    def GetEntries(self, cookie_data, **unused_kwargs):
        """Yield event objects extracted from the cookie."""
        # Values has the structure of:
        # <domain hash>.<visitor ID>.<first visit>.<previous>.<last>.<# of
        # sessions>
        fields = cookie_data.split('.')

        # Check for a valid record.
        if len(fields) != 6:
            raise errors.WrongPlugin(
                u'Wrong number of fields. [{} vs. 6]'.format(len(fields)))

        domain_hash, visitor_id, first_visit, previous, last, sessions = fields

        # TODO: Double check this time is stored in UTC and not local time.
        first_epoch = int(first_visit, 10)
        yield GoogleAnalyticsEvent(first_epoch,
                                   'Analytics Creation Time',
                                   self._data_type,
                                   domain_hash=domain_hash,
                                   visitor_id=visitor_id,
                                   sessions=int(sessions, 10))

        yield GoogleAnalyticsEvent(int(previous, 10),
                                   'Analytics Previous Time',
                                   self._data_type,
                                   domain_hash=domain_hash,
                                   visitor_id=visitor_id,
                                   sessions=int(sessions, 10))

        yield GoogleAnalyticsEvent(int(last, 10),
                                   eventdata.EventTimestamp.LAST_VISITED_TIME,
                                   self._data_type,
                                   domain_hash=domain_hash,
                                   visitor_id=visitor_id,
                                   sessions=int(sessions, 10))
Beispiel #29
0
    def GetEntries(self,
                   parser_mediator,
                   cookie_data=None,
                   url=None,
                   **unused_kwargs):
        """Extracts event objects from the cookie.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      cookie_data: The cookie data, as a byte string.
      url: The full URL or path where the cookie got set.
    """
        fields = cookie_data.split(u'.')

        # Check for a valid record.
        if len(fields) != 6:
            raise errors.WrongPlugin(
                u'Wrong number of fields. [{0:d} vs. 6]'.format(len(fields)))

        domain_hash, visitor_id, first_visit, previous, last, sessions = fields

        # TODO: Double check this time is stored in UTC and not local time.
        try:
            first_epoch = int(first_visit, 10)
        except ValueError:
            first_epoch = 0

        try:
            sessions = int(sessions, 10)
        except ValueError:
            sessions = 0

        try:
            previous = int(previous, 10)
        except ValueError:
            previous = 0

        try:
            last = int(last, 10)
        except ValueError:
            last = 0

        event_object = GoogleAnalyticsEvent(first_epoch,
                                            u'Analytics Creation Time',
                                            url,
                                            u'utma',
                                            domain_hash=domain_hash,
                                            visitor_id=visitor_id,
                                            sessions=sessions)
        parser_mediator.ProduceEvent(event_object)

        event_object = GoogleAnalyticsEvent(previous,
                                            u'Analytics Previous Time',
                                            url,
                                            u'utma',
                                            domain_hash=domain_hash,
                                            visitor_id=visitor_id,
                                            sessions=sessions)
        parser_mediator.ProduceEvent(event_object)

        event_object = GoogleAnalyticsEvent(
            last,
            eventdata.EventTimestamp.LAST_VISITED_TIME,
            url,
            u'utma',
            domain_hash=domain_hash,
            visitor_id=visitor_id,
            sessions=sessions)
        parser_mediator.ProduceEvent(event_object)
Beispiel #30
0
    def GetEntries(self,
                   parser_mediator,
                   cookie_data=None,
                   url=None,
                   **unused_kwargs):
        """Extracts event objects from the cookie.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      cookie_data: The cookie data, as a byte string.
      url: The full URL or path where the cookie got set.
    """
        fields = cookie_data.split(u'.')

        if len(fields) > 5:
            variables = u'.'.join(fields[4:])
            fields = fields[0:4]
            fields.append(variables)

        if len(fields) != 5:
            raise errors.WrongPlugin(
                u'Wrong number of fields. [{0:d} vs. 5]'.format(len(fields)))

        domain_hash, last, sessions, sources, variables = fields
        extra_variables = variables.split(u'|')

        kwargs = {}
        for variable in extra_variables:
            key, _, value = variable.partition(u'=')

            # Cookies can have a variety of different encodings, usually ASCII or
            # UTF-8, and values may additionally be URL encoded. urllib only correctly
            # url-decodes ASCII strings, so we'll convert our string to ASCII first.
            try:
                ascii_value = value.encode(u'ascii')
            except UnicodeEncodeError:
                ascii_value = value.encode(u'ascii', errors=u'ignore')
                parser_mediator.ProduceParseError(
                    u'Cookie contains non 7-bit ASCII characters. The characters have '
                    u'been removed')

            utf_stream = urllib.unquote(ascii_value)

            try:
                value_line = utf_stream.decode(u'utf-8')
            except UnicodeDecodeError:
                value_line = utf_stream.decode(u'utf-8', errors=u'replace')
                parser_mediator.ProduceParseError(
                    u'Cookie value did not decode to value unicode string. Non UTF-8 '
                    u'characters have been replaced.')

            kwargs[key] = value_line

        try:
            last = int(last, 10)
        except ValueError:
            last = 0

        try:
            sessions = int(sessions, 10)
        except ValueError:
            sessions = 0

        try:
            sources = int(sources, 10)
        except ValueError:
            sources = 0

        event_object = GoogleAnalyticsEvent(
            last,
            eventdata.EventTimestamp.LAST_VISITED_TIME,
            url,
            u'utmz',
            domain_hash=domain_hash,
            sessions=sessions,
            sources=sources,
            **kwargs)
        parser_mediator.ProduceEvent(event_object)