Exemple #1
0
    def Parse(cls, event):
        """Parses event data based on position in event.strings.

    Args:
      event (dict): dict serialized plaso event.

    Returns:
      event_data.EventData: event data parsed from event.
    """

        data = event_data.EventData()
        event_id = event.get(u'event_identifier')
        strings = event.get(u'strings')
        if not strings:
            return event_data.EventData()

        if not isinstance(strings, list):
            strings = eval(strings)  # pylint: disable=eval-used

        if event_id == 4624:  # An account was successfully logged on.
            storage_file_name = utils.GetImageName(event)
            source_storage_datum = event_data.StorageFileName(
                source=True, value=storage_file_name)
            data.Add(source_storage_datum)
            source_machine_name = event.get(u'computer_name', '')
            source_machine_name_datum = event_data.MachineName(
                source=True, value=source_machine_name)
            data.Add(source_machine_name_datum)
            field_mapper = {
                event_data.UserId(source=True): 0,
                event_data.UserName(source=True): 1,
                event_data.UserId(target=True): 4,
                event_data.UserName(target=True): 5,
                event_data.MachineName(target=True): 11,
                event_data.Ip(target=True): 18
            }
            for datum, field_index in field_mapper.items():
                datum.value = strings[field_index]
                data.Add(datum)

        elif event_id == 4648:  # Login with certificate.
            source_machine_name = event.get(u'computer_name', '')
            source_machine_name_datum = event_data.MachineName(
                source=True, value=source_machine_name)
            data.Add(source_machine_name_datum)
            field_mapper = {
                event_data.UserId(source=True): 0,
                event_data.UserName(source=True): 1,
                event_data.UserName(target=True): 5,
                event_data.MachineName(target=True): 8,
                event_data.Ip(target=True): 12,
            }
            for datum, field_index in field_mapper.items():
                datum.value = strings[field_index]
                data.Add(datum)

        return data
Exemple #2
0
    def Parse(cls, event):
        """Parses event message with regexp.

    Args:
      event (dict): dict serialized plaso event.

    Returns:
      event_data.EventData: event data parsed from event.
    """
        data = event_data.EventData()
        match = cls.MATCH_REGEXP.match(event.get(u'message', u''))
        if not match:
            return event_data.EventData()

        storage_file_name = utils.GetImageName(event)
        target_storage_datum = event_data.StorageFileName(
            target=True, value=storage_file_name)
        data.Add(target_storage_datum)
        target_machine_name = event.get(u'hostname', u'-')
        target_machine_name_datum = event_data.MachineName(
            target=True, value=target_machine_name)
        data.Add(target_machine_name_datum)
        target_user_name = match.group(u'user')
        target_user_name_datum = event_data.UserName(target=True,
                                                     value=target_user_name)
        data.Add(target_user_name_datum)
        source_ip = match.group(u'ip')
        source_ip_datum = event_data.Ip(source=True, value=source_ip)
        data.Add(source_ip_datum)
        # NOTE I do not care for authentication method nor pid.
        return data
Exemple #3
0
    def Parse(cls, event):
        """Parses event.message with regexp.

    Args:
      event (dict): dict serialized plaso event.

    Returns:
      event_data.EventData: event data parsed from event.
    """
        match = cls.MATCH_REGEXP.match(event.get(u'message', ''))
        if not match:
            return event_data.EventData()

        data = event_data.EventData()

        storage_file_name = utils.GetImageName(event)
        storage_datum = event_data.StorageFileName(target=True,
                                                   value=storage_file_name)
        data.Add(storage_datum)

        target_user_name = match.group(u'user')
        target_user_name_datum = event_data.UserName(target=True,
                                                     value=target_user_name)
        data.Add(target_user_name_datum)
        source_ip = match.group(u'ip')
        source_ip_datum = event_data.Ip(source=True, value=source_ip)
        data.Add(source_ip_datum)

        return data
Exemple #4
0
 def test_SysLogSsh(self):
   """Tests parser for syslog:ssh:login data_type."""
   expected = [
       event_data.Ip(source=True, value=u'10.0.8.6'),
       event_data.MachineName(target=True, value=u'acserver'),
       event_data.StorageFileName(
           target=True, value=u'acserver.dd/images/user/usr/'),
       event_data.UserName(target=True, value=u'dean@acserver'),
   ]
   self._testParser(expected, self._sys_log_ssh)
Exemple #5
0
 def test_LinuxUtmp(self):
   """Tests parser for linux:utmp:event data_type."""
   expected = [
       event_data.Ip(source=True, value=u'192.168.1.11'),
       event_data.MachineName(source=True, value=u'192.168.1.11'),
       event_data.UserName(target=True, value=u'dean@acserver'),
       event_data.MachineName(target=True, value=u'acserver'),
       event_data.StorageFileName(
           target=True, value=u'acserver.dd/images/user/usr/'),
   ]
   self._testParser(expected, self._linux_utmp_event)
Exemple #6
0
 def test_Bsm(self):
   """Tests parser for bsm:event data_type."""
   expected = [
       event_data.Ip(source=True, value=u'192.168.1.11'),
       event_data.StorageFileName(
           target=True, value=u'dean_mac.dd/greendale_images/media/'),
       event_data.UserId(
           target=True, value=u'502@dean_mac.dd/greendale_images/media/'),
       event_data.UserName(
           target=True, value=u'dean@dean_mac.dd/greendale_images/media/'),
   ]
   self._testParser(expected, self._bsm_event)
Exemple #7
0
    def Parse(cls, event):
        """Parses event data directly from event fields.

    Args:
      event (dict): dict serialized plaso event.

    Returns:
      event_data.EventData: event data parsed from event.
    """
        data = event_data.EventData()
        storage_file_name = utils.GetImageName(event)
        storage_datum = event_data.StorageFileName(target=True,
                                                   value=storage_file_name)
        data.Add(storage_datum)
        target_machine_name = event.get(u'hostname')
        target_machine_name_datum = event_data.MachineName(
            target=True, value=target_machine_name)
        data.Add(target_machine_name_datum)

        source_ip_field = event.get(u'ip_address', {})
        if isinstance(source_ip_field, dict):
            source_ip = source_ip_field.get(u'stream')
            source_ip_datum = event_data.Ip(source=True, value=source_ip)
            data.Add(source_ip_datum)
        elif isinstance(source_ip_field, basestring):
            source_ip_datum = event_data.Ip(source=True, value=source_ip_field)
            data.Add(source_ip_datum)

        source_machine_name = event.get(u'computer_name')
        source_machine_name_datum = event_data.MachineName(
            source=True, value=source_machine_name)
        data.Add(source_machine_name_datum)
        target_user_name = event.get(u'user')
        target_user_name_datum = event_data.UserName(target=True,
                                                     value=target_user_name)
        data.Add(target_user_name_datum)
        return data
Exemple #8
0
 def test_WinEvtx(self):
   """Tests parser for windows:evtx:record data_type."""
   expected = [
       event_data.MachineName(
           source=True, value=u'REGISTRAR.internal.greendale.edu'),
       event_data.UserId(
           source=True, value=u'*****@*****.**'),
       event_data.StorageFileName(
           source=True, value=u'registrar.dd/greendale_images/media/'),
       event_data.MachineName(target=True, value=u'STUDENT-PC1'),
       event_data.Ip(target=True, value=u'192.168.1.11'),
       event_data.UserId(target=True, value=u'S-1-5-7@STUDENT-PC1'),
       event_data.UserName(target=True, value=u'ANONYMOUS LOGON@STUDENT-PC1')
   ]
   self._testParser(expected, self._win_evtx_event)
Exemple #9
0
 def test_CreateGraph(self):
     """Tests graph creation."""
     informations_list = [
         event_data.Ip(source=True, value=u'192.168.1.11'),
         event_data.MachineName(source=True, value=u'192.168.1.11'),
         event_data.UserName(target=True, value=u'dean@acserver'),
         event_data.MachineName(target=True, value=u'acserver'),
         event_data.StorageFileName(target=True,
                                    value=u'acserver.dd/images/user/usr/'),
     ]
     informations = event_data.EventData(data=informations_list,
                                         event_id=1,
                                         timestamp=1441559606244560)
     informations_list = [informations]
     graph = graph_lib.CreateGraph(informations_list)
     self.assertEqual(len(graph.nodes), 4)
     self.assertEqual(len(graph.edges), 3)
Exemple #10
0
    def Parse(cls, event):
        """Parses event.message with regexps.

    Args:
      event (dict): dict serialized plaso event.

    Returns:
      event_data.EventData: event data parsed from event.
    """

        data = event_data.EventData()
        storage_file_name = utils.GetImageName(event)
        storage_datum = event_data.StorageFileName(target=True,
                                                   value=storage_file_name)
        data.Add(storage_datum)
        event_type = event.get(u'event_type')
        message = event.get(u'message', '')
        if not (event_type == u'OpenSSH login (32800)'
                and cls.SUCCESS_REGEXP.match(message)):
            return event_data.EventData()

        user = cls.USER_REGEXP.search(message)
        if user:
            user_name = user.group(1)
            user_name_datum = event_data.UserName(target=True, value=user_name)
            data.Add(user_name_datum)

        raw_tokens = cls.TOKEN_REGEXP.search(message)
        token_dict = {}
        if raw_tokens:
            tokens = raw_tokens.group(1).split(u',')
            for token in tokens:
                key, value = token.strip(u' )').split(u'(')
                token_dict[key] = value

        machine_ip = token_dict.get(u'terminal_ip')
        ip_datum = event_data.Ip(source=True, value=machine_ip)
        data.Add(ip_datum)
        user_id = token_dict.get(u'uid')
        user_id_datum = event_data.UserId(target=True, value=user_id)
        data.Add(user_id_datum)

        return data
Exemple #11
0
class Graph(object):
    """Very light-weight implementation of property graph.

  This implementation is not meant to be a general property graph. It is meant
  for storing event data about lateral movement. Property graph is a set of
  nodes and edges, where each edge and each node can have any number of
  key/value properties associated with them.

  In our case, we have a very specific set of properties.
  nodes:
    id: Automatic generated unique id
    type: Type/name of the node. These correspond to EventDatum.NAME.
    value: Value that the node has.
  Pair (tuple) (type, value) is also unique identifier of a node.

  edge:
    source: Id of a source node
    target: Id of a target node
    type: Type of an edge
      Usual types are
        "has": Machine has an user
        "is": Machine is ip_address (this is not necessarily true for the whole
            time)
        "access": remote connection
    events:
      List of event ids and timestamps. Those events are responsible for
      creation of given edge. Events can be found by id in timesketch or
      filtered by timestamps.
  In fact, every edge in a graph represents multiple edges created by multiple
  events. Those events are specified in events property of this node.

  In theory, edges are directed. In practice, only place where it makes a
  difference is visualization.

  This dictionary (not class) based implementation has its meaning. It is
  easily extensible and prone to data integrity errors. It also has direct
  mapping to data, that can be used in javascript (d3) visualization.

  Attributes:
    edges (list): list of graph edges.
    edges_ids (defaultdict[tuple, int]): maps tuple serialized edges to their
        ids.
    nodes (list): list of graph nodes.
    nodes_ids (defaultdict[tuple, int]): maps tuple serialized nodes to their
        ids.
  """

    # Edge types.
    EDGE_HAS = u'has'
    EDGE_IS = u'is'
    EDGE_ACCESS = u'access'

    # Rules describing which pairs of event_data should create which type of
    # edge.
    Rule = namedtuple(u'Rule', [u'source', u'target', u'type'])
    RULES = (Rule(event_data.Ip(source=True),
                  event_data.MachineName(source=True), EDGE_IS),
             Rule(event_data.UserName(source=True),
                  event_data.UserId(source=True), EDGE_IS),
             Rule(event_data.MachineName(target=True),
                  event_data.Ip(target=True), EDGE_IS),
             Rule(event_data.UserName(target=True),
                  event_data.UserId(target=True), EDGE_IS),
             Rule(event_data.Ip(source=True), event_data.UserName(source=True),
                  EDGE_HAS),
             Rule(event_data.Ip(source=True), event_data.UserId(source=True),
                  EDGE_HAS),
             Rule(event_data.MachineName(source=True),
                  event_data.UserName(source=True), EDGE_HAS),
             Rule(event_data.MachineName(source=True),
                  event_data.UserId(source=True), EDGE_HAS),
             Rule(event_data.Ip(target=True), event_data.UserName(target=True),
                  EDGE_HAS),
             Rule(event_data.Ip(target=True), event_data.UserId(target=True),
                  EDGE_HAS),
             Rule(event_data.MachineName(target=True),
                  event_data.UserName(target=True), EDGE_HAS),
             Rule(event_data.MachineName(target=True),
                  event_data.UserId(target=True), EDGE_HAS))

    # List of things that should be used as remote source/target in decreasing
    # priority.
    DATA_PRIORITY = (event_data.UserName, event_data.UserId,
                     event_data.MachineName, event_data.Ip,
                     event_data.StorageFileName)

    def __init__(self):
        """Initializes empty graph."""
        self.edges = []
        self.edges_ids = defaultdict(int)  # Provides fast index for edges.
        self.nodes = []
        self.nodes_ids = defaultdict(int)  # Provides fast index for nodes.

    def GetAddNode(self, node_type, node_value):
        """Gets node's id with given type and value.

    If the node does not exist, it is created.

    Args:
      node_type (str): type of node derived from event datum names.
      node_value (str): value of the node.

    Returns:
      int: identifier of node. This is a position of node in self.nodes.
    """
        node = Node(node_type, node_value)
        if node.ToTuple() not in self.nodes_ids:
            node.id = len(self.nodes)
            self.nodes_ids[node.ToTuple()] = node.id
            self.nodes.append(node.ToDict())

        return self.nodes_ids[node.ToTuple()]

    def AddEdge(self, source_id, target_id, edge_type, timestamp, event_id):
        """Adds new edge to graph or just adds new event to existing edge.

    Args:
      edge_type (str): type of the edge (currently "has", "is" or "access").
      event_id (int|str): identifier for event responsible for this edge.
      source_id (int): id of source node.
      target_id (int): id of target node.
      timestamp (int): timestamp when event happened.
    """
        edge = (source_id, target_id, edge_type)
        if edge in self.edges_ids:
            edge_id = self.edges_ids[edge]
            event = {u'id': event_id, u'timestamp': timestamp}
            self.edges[edge_id][u'events'].append(event)
        else:
            edge_id = len(self.edges)
            self.edges_ids[edge] = edge_id
            event = {u'id': event_id, u'timestamp': timestamp}
            self.edges.append({
                u'source': source_id,
                u'target': target_id,
                u'type': edge_type,
                u'events': [event],
            })

    @classmethod
    def GetRemote(cls, data, source=False, target=False):
        """Gets most specific remote source/target.

    Knowing that the remote connection was from user Dean is better than knowing
    only the ip address.
    Note that user name and user id is extended by machine identifier so the
    information about machine is not lost.

    Args:
      data (event_data.EventData): data about event.
      source (bool): whether to find source.
      target (bool): whether to find target.

    Returns:
      event_data.EventDatum|None: most specific event data about source/target.
          None if no event data were found.

    Exactly one of source, target arguments should be True. Otherwise the
    function do not raise an error, but it does not make sense.
    """
        for data_class in cls.DATA_PRIORITY:
            raw_datum = data_class(source=source, target=target)
            event_datum = data.Get(raw_datum)
            if event_datum:
                return event_datum
        return None

    def AddData(self, source_datum, target_datum, edge_type, event_time,
                event_id):
        """Adds edge with corresponding nodes to graph.

    This ensures that required nodes are in the graph and creates an edge
    between them.

    Args:
      edge_type (str): edge type.
      event_id (int|str): event identifier.
      event_time (str): timestamp for event.
      source_datum (event_data.EventDatum): event datum about source node.
      target_datum (event_data.EventDatum): event datum about target node.
    """
        source_id = self.GetAddNode(source_datum.NAME, source_datum.value)
        target_id = self.GetAddNode(target_datum.NAME, target_datum.value)
        self.AddEdge(source_id, target_id, edge_type, event_time, event_id)

    def AddEventData(self, parsed_event):
        """Processes one parsed event and encodes it to edges and nodes.

    Encoding is based on simple rules stored at RULES. If event contains
    event_data with key/name rule.source and rule.target, a new edge will be
    created with type rule.type.

    Args:
      parsed_event (event_data.EventData): event data about event to be
          translated to graph.
    """
        # Evaluate rules from RULES.
        for rule in self.__class__.RULES:
            source_datum = parsed_event.Get(rule.source)
            target_datum = parsed_event.Get(rule.target)
            if source_datum and target_datum:
                self.AddData(source_datum, target_datum, rule.type,
                             parsed_event.timestamp, parsed_event.event_id)

        # Rules for access edges.
        remote_source = self.__class__.GetRemote(parsed_event, source=True)
        remote_target = self.__class__.GetRemote(parsed_event, target=True)
        if remote_source and remote_target:
            self.AddData(remote_source, remote_target,
                         self.__class__.EDGE_ACCESS, parsed_event.timestamp,
                         parsed_event.event_id)

    def MinimalSerialize(self):
        """Serializes only required data for visualization."""
        return {u'nodes': self.nodes, u'links': self.edges}
Exemple #12
0
    def Parse(cls, event):
        """Determines which parser should be used and uses it.

    Parser is chosen based on data_type of event.
    After parsing, some enhancements are done to data.
    Users (names and ids) are extended by first reasonable machine identifier.
    Adds timestamps and event_id.

    Args:
        event (dict): dict serialized plaso event.

    Returns:
        event_data.EventData: event data extracted from event.
    """
        raw_data_type = event.get(u'data_type')
        data_type = None

        if isinstance(raw_data_type, basestring):
            data_type = raw_data_type
        elif isinstance(raw_data_type, dict):
            data_type = raw_data_type.get(u'stream')

        if data_type in cls._parser_clases:
            parsed_data = cls._parser_clases[data_type].Parse(event)

            if not parsed_data or parsed_data.IsEmpty():
                return event_data.EventData()

            parsed_data.event_data_type = data_type
            target_datum_candidates = [
                parsed_data.Get(event_data.MachineName(target=True)),
                parsed_data.Get(event_data.Ip(target=True)),
                parsed_data.Get(event_data.StorageFileName(target=True))
            ]
            target_id = utils.FirstValidDatum(target_datum_candidates,
                                              default=u'UNKNOWN')

            for inf in [
                    event_data.UserName(target=True),
                    event_data.UserId(target=True)
            ]:
                inf = parsed_data.Get(inf)
                if inf:
                    inf.value += u'@' + target_id

            source_datum_candidates = [
                parsed_data.Get(event_data.MachineName(source=True)),
                parsed_data.Get(event_data.Ip(source=True)),
                parsed_data.Get(event_data.StorageFileName(source=True))
            ]
            source_id = utils.FirstValidDatum(source_datum_candidates,
                                              default=u'UNKNOWN')

            for inf in [
                    event_data.UserName(source=True),
                    event_data.UserId(source=True)
            ]:
                inf = parsed_data.Get(inf)
                if inf:
                    inf.value += u'@' + source_id

            parsed_data.timestamp = event.get(u'timestamp')
            parsed_data.event_id = event.get(u'timesketch_id',
                                             cls.GetNextEventId())
            return parsed_data
        else:
            return event_data.EventData()