def testEscapeFieldNames(self):
     content = [
         {'$query': {'_id': '1cba73b8a555ba442a3630ccf735dffd/14'}},
         {'$query': {'_id': {'$in': []}}},
         {'count': '107f3bf172abf9dae6458f1dbb0d4ad6/11',
          'query': {'md5': {'$in': ['c3117f341b734d3ce6e71608480de82d/34']}}},
         {'$query': {'foo.bar': 1234}},
     ]
     
     for i in xrange(0, len(content)):
         orig = content[i]
         
         escaped = util.escapeFieldNames(content[i])
         self.assertNotEqual(escaped, None)
         keys = self.getAllKeys(escaped)
         for k in keys:
             self.assertFalse(k.startswith('$'), pformat(escaped))
             self.assertEqual(-1, k.find("."))
         print pformat(escaped)
    def testEscapeFieldNames(self):
        content = [
            {
                '$query': {
                    '_id': '1cba73b8a555ba442a3630ccf735dffd/14'
                }
            },
            {
                '$query': {
                    '_id': {
                        '$in': []
                    }
                }
            },
            {
                'count': '107f3bf172abf9dae6458f1dbb0d4ad6/11',
                'query': {
                    'md5': {
                        '$in': ['c3117f341b734d3ce6e71608480de82d/34']
                    }
                }
            },
            {
                '$query': {
                    'foo.bar': 1234
                }
            },
        ]

        for i in xrange(0, len(content)):
            orig = content[i]

            escaped = util.escapeFieldNames(content[i])
            self.assertNotEqual(escaped, None)
            keys = self.getAllKeys(escaped)
            for k in keys:
                self.assertFalse(k.startswith('$'), pformat(escaped))
                self.assertEqual(-1, k.find("."))
            print pformat(escaped)
Exemple #3
0
    def storeCurrentOpInSession(self):
        """Stores the currentOp in a session. We will create a new session if one does not already exist."""

        # Check whether it has a busted collection name
        # For now we'll just change the name to our marker so that we can figure out
        # what it really should be after we recreate the schema
        try:
            self.currentOp['collection'].decode('ascii')
        except Exception as err:
            if self.debug:
                LOG.warn(
                    "Operation %(query_id)d has an invalid collection name '%(collection)s'. Will fix later... [opCtr=%(op_ctr)d / lineCtr=%(line_ctr)d]"
                    % self.currentOp)
            self.currentOp['collection'] = constants.INVALID_COLLECTION_MARKER
            self.bustedOps.append(self.currentOp)
            pass

        # Figure out whether this is a outgoing query from the client
        # Or an incoming response from the server
        if self.currentOp['arrow'] == '-->>':
            ip_client = self.currentOp['ip1']
            ip_server = self.currentOp['ip2']
        else:
            ip_client = self.currentOp['ip2']
            ip_server = self.currentOp['ip1']

            # If this doesn't have a type here, then we know that it's a reply
            if not 'type' in self.currentOp:
                self.currentOp['type'] = constants.OP_TYPE_REPLY
        ## IF

        if not 'type' in self.currentOp:
            msg = "Current operation is incomplete on line %d: Missing 'type' field" % self.line_ctr
            LOG.warn("%s [opCtr=%d]\n%s" %
                     (msg, self.op_ctr, pformat(self.currentOp)))
            if self.stop_on_error: raise Exception(msg)
            return
        ## IF

        # Get the session to store this operation in
        session = self.getOrCreateSession(ip_client, ip_server)
        if session["start_time"] is None and "timestamp" in self.currentOp:
            session["start_time"] = self.currentOp['timestamp']

        # Escape any invalid key names
        for i in xrange(0, len(self.currentContent)):
            # HACK: Rename the 'query' key to '$query'
            if 'query' in self.currentContent[i]:
                self.currentContent[i][
                    constants.OP_TYPE_QUERY] = self.currentContent[i]['query']
                del self.currentContent[i]['query']
            self.currentContent[i] = util.escapeFieldNames(
                self.currentContent[i])
        ## FOR

        # QUERY: $query, $delete, $insert, $update:
        # Create the operation, add it to the session
        if self.currentOp['type'] in [
                constants.OP_TYPE_QUERY, constants.OP_TYPE_INSERT,
                constants.OP_TYPE_DELETE, constants.OP_TYPE_UPDATE
        ]:
            # create the operation -- corresponds to current
            if self.debug:
                LOG.debug(
                    "Current Operation %d Content:\n%s" %
                    (self.currentOp['query_id'], pformat(self.currentContent)))

            op = Session.operationFactory()
            op['collection'] = self.currentOp['collection']
            op['type'] = self.currentOp['type']
            op['query_time'] = self.currentOp['timestamp']
            op['query_size'] = self.currentOp['size']
            op['query_content'] = self.currentContent
            op['query_id'] = long(self.currentOp['query_id'])
            op['query_aggregate'] = False  # false -not aggregate- by default

            # UPDATE Flags
            if op['type'] == constants.OP_TYPE_UPDATE:
                op['update_upsert'] = self.currentOp['update_upsert']
                op['update_multi'] = self.currentOp['update_multi']

            # QUERY Flags
            elif op['type'] == constants.OP_TYPE_QUERY:
                # SKIP, LIMIT
                op['query_limit'] = self.currentOp['ntoreturn']
                op['query_offset'] = self.currentOp['ntoskip']
                if self.currentOp['hasfields']:
                    # HACK: Convert dot notation into '*'
                    # FIXME: This should really be broke out into a dictionary of 'include' 'exclude'
                    op['query_fields'] = dict([
                        (k.replace(".", "*"), v)
                        for k, v in self.currentOp['hasfields'].iteritems()
                    ])

                # check for aggregate
                # update collection name, set aggregate type
                if op['collection'].find("$cmd") > 0:
                    op['query_aggregate'] = True
                    # extract the real collection name
                    ## --> This has to be done at the end after the first pass, because the collection name is hashed up

            # Keep track of operations by their ids so that we can add
            # the response to it later on
            self.query_response_map[self.currentOp['query_id']] = op

            # Append it to the current session
            # TODO: Large traces will cause the sessions to get too big.
            #       We need to split out the operations into a seperate collection
            #       Or use multiple sessions
            session['operations'].append(op)
            self.op_ctr += 1
            if self.debug:
                LOG.debug(
                    "Added %s operation %d to session %s from line %d:\n%s" %
                    (op['type'], self.currentOp['query_id'],
                     session['session_id'], self.line_ctr, pformat(op)))

            # store the collection name in known_collections. This will be useful later.
            # see the comment at known_collections
            # HACK: We have to cut off the db name here. We may not want
            #       to do that if the application is querying multiple databases.
            full_name = op['collection']
            col_name = full_name[full_name.find(".") +
                                 1:]  # cut off the db name
            self.known_collections.add(col_name)

        # RESPONSE - add information to the matching query
        elif self.currentOp['type'] == constants.OP_TYPE_REPLY:
            self.resp_ctr += 1
            reply_id = self.currentOp['reply_id']
            # see if the matching query is in the map
            if reply_id in self.query_response_map:
                # fill in missing information
                query_op = self.query_response_map[reply_id]
                query_op['resp_content'] = self.currentContent
                query_op['resp_size'] = self.currentOp['size']
                query_op['resp_time'] = self.currentOp['timestamp']
                query_op['resp_id'] = long(self.currentOp['query_id'])
                del self.query_response_map[reply_id]
            else:
                self.skip_ctr += 1
                if self.debug:
                    LOG.warn(
                        "Skipping response on line %d - No matching query_id '%s' [skipCtr=%d/%d]"
                        % (self.line_ctr, reply_id, self.skip_ctr,
                           self.resp_ctr))

        # These can be safely ignored
        elif self.currentOp['type'] in [
                constants.OP_TYPE_GETMORE, constants.OP_TYPE_KILLCURSORS
        ]:
            if self.debug:
                LOG.warn("Skipping '%s' operation %d on line %d" %
                         (self.currentOp['type'], self.currentOp['query_id'],
                          self.line_ctr))

        # UNKNOWN
        else:
            raise Exception("Unexpected message type '%s'" %
                            self.currentOp['type'])

        return
Exemple #4
0
    def storeCurrentOpInSession(self):
        """Stores the currentOp in a session. We will create a new session if one does not already exist."""
        
        # Check whether it has a busted collection name
        # For now we'll just change the name to our marker so that we can figure out
        # what it really should be after we recreate the schema
        try:
            self.currentOp['collection'].decode('ascii')
        except Exception as err:
            if self.debug:
                LOG.warn("Operation %(query_id)d has an invalid collection name '%(collection)s'. Will fix later... [opCtr=%(op_ctr)d / lineCtr=%(line_ctr)d]" % self.currentOp)
            self.currentOp['collection'] = constants.INVALID_COLLECTION_MARKER
            self.bustedOps.append(self.currentOp)
            pass
        
        # Figure out whether this is a outgoing query from the client
        # Or an incoming response from the server
        if self.currentOp['arrow'] == '-->>':
            ip_client = self.currentOp['ip1']
            ip_server = self.currentOp['ip2']
        else:
            ip_client = self.currentOp['ip2']
            ip_server = self.currentOp['ip1']
            
            # If this doesn't have a type here, then we know that it's a reply
            if not 'type' in self.currentOp:
                self.currentOp['type'] = constants.OP_TYPE_REPLY
        ## IF

        if not 'type' in self.currentOp:
            msg = "Current operation is incomplete on line %d: Missing 'type' field" % self.line_ctr
            LOG.warn("%s [opCtr=%d]\n%s" % (msg, self.op_ctr, pformat(self.currentOp)))
            if self.stop_on_error: raise Exception(msg)
            return
        ## IF
        
        # Get the session to store this operation in
        session = self.getOrCreateSession(ip_client, ip_server)
        if session["start_time"] is None and "timestamp" in self.currentOp:
            session["start_time"] = self.currentOp['timestamp']

        # Escape any invalid key names
        for i in xrange(0, len(self.currentContent)):
            # HACK: Rename the 'query' key to '$query'
            if 'query' in self.currentContent[i]:
                self.currentContent[i][constants.OP_TYPE_QUERY] = self.currentContent[i]['query']
                del self.currentContent[i]['query']
            self.currentContent[i] = util.escapeFieldNames(self.currentContent[i])
        ## FOR
        
        # QUERY: $query, $delete, $insert, $update:
        # Create the operation, add it to the session
        if self.currentOp['type'] in [constants.OP_TYPE_QUERY, constants.OP_TYPE_INSERT, constants.OP_TYPE_DELETE, constants.OP_TYPE_UPDATE]:
            # create the operation -- corresponds to current
            if self.debug:
                LOG.debug("Current Operation %d Content:\n%s" % (self.currentOp['query_id'], pformat(self.currentContent)))
            
            op = Session.operationFactory()
            op['collection']        = self.currentOp['collection']
            op['type']              = self.currentOp['type']
            op['query_time']        = self.currentOp['timestamp']
            op['query_size']        = self.currentOp['size']
            op['query_content']     = self.currentContent
            op['query_id']          = long(self.currentOp['query_id'])
            op['query_aggregate']   = False # false -not aggregate- by default

            # UPDATE Flags
            if op['type'] == constants.OP_TYPE_UPDATE:
                op['update_upsert'] = self.currentOp['update_upsert']
                op['update_multi'] = self.currentOp['update_multi']
            
            # QUERY Flags
            elif op['type'] == constants.OP_TYPE_QUERY:
                # SKIP, LIMIT
                op['query_limit'] = self.currentOp['ntoreturn']
                op['query_offset'] = self.currentOp['ntoskip']
                if self.currentOp['hasfields']:
                    # HACK: Convert dot notation into '*'
                    # FIXME: This should really be broke out into a dictionary of 'include' 'exclude'
                    op['query_fields'] = dict([ (k.replace(".", "*"), v) for k,v in self.currentOp['hasfields'].iteritems()])
            
                # check for aggregate
                # update collection name, set aggregate type
                if op['collection'].find("$cmd") > 0:
                    op['query_aggregate'] = True
                    # extract the real collection name
                    ## --> This has to be done at the end after the first pass, because the collection name is hashed up
            
            # Keep track of operations by their ids so that we can add
            # the response to it later on
            self.query_response_map[self.currentOp['query_id']] = op
            
            # Append it to the current session
            # TODO: Large traces will cause the sessions to get too big.
            #       We need to split out the operations into a seperate collection
            #       Or use multiple sessions
            session['operations'].append(op)
            self.op_ctr += 1
            if self.debug:
                LOG.debug("Added %s operation %d to session %s from line %d:\n%s" % (op['type'], self.currentOp['query_id'], session['session_id'], self.line_ctr, pformat(op)))
        
            # store the collection name in known_collections. This will be useful later.
            # see the comment at known_collections
            # HACK: We have to cut off the db name here. We may not want
            #       to do that if the application is querying multiple databases.
            full_name = op['collection']
            col_name = full_name[full_name.find(".")+1:] # cut off the db name
            self.known_collections.add(col_name)
        
        # RESPONSE - add information to the matching query
        elif self.currentOp['type'] == constants.OP_TYPE_REPLY:
            self.resp_ctr += 1
            reply_id = self.currentOp['reply_id'];
            # see if the matching query is in the map
            if reply_id in self.query_response_map:
                # fill in missing information
                query_op = self.query_response_map[reply_id]
                query_op['resp_content'] = self.currentContent
                query_op['resp_size'] = self.currentOp['size']
                query_op['resp_time'] = self.currentOp['timestamp']
                query_op['resp_id'] = long(self.currentOp['query_id'])
                del self.query_response_map[reply_id]
            else:
                self.skip_ctr += 1
                if self.debug:
                    LOG.warn("Skipping response on line %d - No matching query_id '%s' [skipCtr=%d/%d]" % (self.line_ctr, reply_id, self.skip_ctr, self.resp_ctr))
                
        # These can be safely ignored
        elif self.currentOp['type'] in [constants.OP_TYPE_GETMORE, constants.OP_TYPE_KILLCURSORS]:
            if self.debug:
                LOG.warn("Skipping '%s' operation %d on line %d" % (self.currentOp['type'], self.currentOp['query_id'], self.line_ctr))
            
        # UNKNOWN
        else:
            raise Exception("Unexpected message type '%s'" % self.currentOp['type'])
                
        return