def testEscapeFieldNames(self): content = [ {'$query': {'_id': '1cba73b8a555ba442a3630ccf735dffd/14'}}, {'$query': {'_id': {'$in': []}}}, {'count': '107f3bf172abf9dae6458f1dbb0d4ad6/11', 'query': {'md5': {'$in': ['c3117f341b734d3ce6e71608480de82d/34']}}}, {'$query': {'foo.bar': 1234}}, ] for i in xrange(0, len(content)): orig = content[i] escaped = util.escapeFieldNames(content[i]) self.assertNotEqual(escaped, None) keys = self.getAllKeys(escaped) for k in keys: self.assertFalse(k.startswith('$'), pformat(escaped)) self.assertEqual(-1, k.find(".")) print pformat(escaped)
def testEscapeFieldNames(self): content = [ { '$query': { '_id': '1cba73b8a555ba442a3630ccf735dffd/14' } }, { '$query': { '_id': { '$in': [] } } }, { 'count': '107f3bf172abf9dae6458f1dbb0d4ad6/11', 'query': { 'md5': { '$in': ['c3117f341b734d3ce6e71608480de82d/34'] } } }, { '$query': { 'foo.bar': 1234 } }, ] for i in xrange(0, len(content)): orig = content[i] escaped = util.escapeFieldNames(content[i]) self.assertNotEqual(escaped, None) keys = self.getAllKeys(escaped) for k in keys: self.assertFalse(k.startswith('$'), pformat(escaped)) self.assertEqual(-1, k.find(".")) print pformat(escaped)
def storeCurrentOpInSession(self): """Stores the currentOp in a session. We will create a new session if one does not already exist.""" # Check whether it has a busted collection name # For now we'll just change the name to our marker so that we can figure out # what it really should be after we recreate the schema try: self.currentOp['collection'].decode('ascii') except Exception as err: if self.debug: LOG.warn( "Operation %(query_id)d has an invalid collection name '%(collection)s'. Will fix later... [opCtr=%(op_ctr)d / lineCtr=%(line_ctr)d]" % self.currentOp) self.currentOp['collection'] = constants.INVALID_COLLECTION_MARKER self.bustedOps.append(self.currentOp) pass # Figure out whether this is a outgoing query from the client # Or an incoming response from the server if self.currentOp['arrow'] == '-->>': ip_client = self.currentOp['ip1'] ip_server = self.currentOp['ip2'] else: ip_client = self.currentOp['ip2'] ip_server = self.currentOp['ip1'] # If this doesn't have a type here, then we know that it's a reply if not 'type' in self.currentOp: self.currentOp['type'] = constants.OP_TYPE_REPLY ## IF if not 'type' in self.currentOp: msg = "Current operation is incomplete on line %d: Missing 'type' field" % self.line_ctr LOG.warn("%s [opCtr=%d]\n%s" % (msg, self.op_ctr, pformat(self.currentOp))) if self.stop_on_error: raise Exception(msg) return ## IF # Get the session to store this operation in session = self.getOrCreateSession(ip_client, ip_server) if session["start_time"] is None and "timestamp" in self.currentOp: session["start_time"] = self.currentOp['timestamp'] # Escape any invalid key names for i in xrange(0, len(self.currentContent)): # HACK: Rename the 'query' key to '$query' if 'query' in self.currentContent[i]: self.currentContent[i][ constants.OP_TYPE_QUERY] = self.currentContent[i]['query'] del self.currentContent[i]['query'] self.currentContent[i] = util.escapeFieldNames( self.currentContent[i]) ## FOR # QUERY: $query, $delete, $insert, $update: # Create the operation, add it to the session if self.currentOp['type'] in [ constants.OP_TYPE_QUERY, constants.OP_TYPE_INSERT, constants.OP_TYPE_DELETE, constants.OP_TYPE_UPDATE ]: # create the operation -- corresponds to current if self.debug: LOG.debug( "Current Operation %d Content:\n%s" % (self.currentOp['query_id'], pformat(self.currentContent))) op = Session.operationFactory() op['collection'] = self.currentOp['collection'] op['type'] = self.currentOp['type'] op['query_time'] = self.currentOp['timestamp'] op['query_size'] = self.currentOp['size'] op['query_content'] = self.currentContent op['query_id'] = long(self.currentOp['query_id']) op['query_aggregate'] = False # false -not aggregate- by default # UPDATE Flags if op['type'] == constants.OP_TYPE_UPDATE: op['update_upsert'] = self.currentOp['update_upsert'] op['update_multi'] = self.currentOp['update_multi'] # QUERY Flags elif op['type'] == constants.OP_TYPE_QUERY: # SKIP, LIMIT op['query_limit'] = self.currentOp['ntoreturn'] op['query_offset'] = self.currentOp['ntoskip'] if self.currentOp['hasfields']: # HACK: Convert dot notation into '*' # FIXME: This should really be broke out into a dictionary of 'include' 'exclude' op['query_fields'] = dict([ (k.replace(".", "*"), v) for k, v in self.currentOp['hasfields'].iteritems() ]) # check for aggregate # update collection name, set aggregate type if op['collection'].find("$cmd") > 0: op['query_aggregate'] = True # extract the real collection name ## --> This has to be done at the end after the first pass, because the collection name is hashed up # Keep track of operations by their ids so that we can add # the response to it later on self.query_response_map[self.currentOp['query_id']] = op # Append it to the current session # TODO: Large traces will cause the sessions to get too big. # We need to split out the operations into a seperate collection # Or use multiple sessions session['operations'].append(op) self.op_ctr += 1 if self.debug: LOG.debug( "Added %s operation %d to session %s from line %d:\n%s" % (op['type'], self.currentOp['query_id'], session['session_id'], self.line_ctr, pformat(op))) # store the collection name in known_collections. This will be useful later. # see the comment at known_collections # HACK: We have to cut off the db name here. We may not want # to do that if the application is querying multiple databases. full_name = op['collection'] col_name = full_name[full_name.find(".") + 1:] # cut off the db name self.known_collections.add(col_name) # RESPONSE - add information to the matching query elif self.currentOp['type'] == constants.OP_TYPE_REPLY: self.resp_ctr += 1 reply_id = self.currentOp['reply_id'] # see if the matching query is in the map if reply_id in self.query_response_map: # fill in missing information query_op = self.query_response_map[reply_id] query_op['resp_content'] = self.currentContent query_op['resp_size'] = self.currentOp['size'] query_op['resp_time'] = self.currentOp['timestamp'] query_op['resp_id'] = long(self.currentOp['query_id']) del self.query_response_map[reply_id] else: self.skip_ctr += 1 if self.debug: LOG.warn( "Skipping response on line %d - No matching query_id '%s' [skipCtr=%d/%d]" % (self.line_ctr, reply_id, self.skip_ctr, self.resp_ctr)) # These can be safely ignored elif self.currentOp['type'] in [ constants.OP_TYPE_GETMORE, constants.OP_TYPE_KILLCURSORS ]: if self.debug: LOG.warn("Skipping '%s' operation %d on line %d" % (self.currentOp['type'], self.currentOp['query_id'], self.line_ctr)) # UNKNOWN else: raise Exception("Unexpected message type '%s'" % self.currentOp['type']) return
def storeCurrentOpInSession(self): """Stores the currentOp in a session. We will create a new session if one does not already exist.""" # Check whether it has a busted collection name # For now we'll just change the name to our marker so that we can figure out # what it really should be after we recreate the schema try: self.currentOp['collection'].decode('ascii') except Exception as err: if self.debug: LOG.warn("Operation %(query_id)d has an invalid collection name '%(collection)s'. Will fix later... [opCtr=%(op_ctr)d / lineCtr=%(line_ctr)d]" % self.currentOp) self.currentOp['collection'] = constants.INVALID_COLLECTION_MARKER self.bustedOps.append(self.currentOp) pass # Figure out whether this is a outgoing query from the client # Or an incoming response from the server if self.currentOp['arrow'] == '-->>': ip_client = self.currentOp['ip1'] ip_server = self.currentOp['ip2'] else: ip_client = self.currentOp['ip2'] ip_server = self.currentOp['ip1'] # If this doesn't have a type here, then we know that it's a reply if not 'type' in self.currentOp: self.currentOp['type'] = constants.OP_TYPE_REPLY ## IF if not 'type' in self.currentOp: msg = "Current operation is incomplete on line %d: Missing 'type' field" % self.line_ctr LOG.warn("%s [opCtr=%d]\n%s" % (msg, self.op_ctr, pformat(self.currentOp))) if self.stop_on_error: raise Exception(msg) return ## IF # Get the session to store this operation in session = self.getOrCreateSession(ip_client, ip_server) if session["start_time"] is None and "timestamp" in self.currentOp: session["start_time"] = self.currentOp['timestamp'] # Escape any invalid key names for i in xrange(0, len(self.currentContent)): # HACK: Rename the 'query' key to '$query' if 'query' in self.currentContent[i]: self.currentContent[i][constants.OP_TYPE_QUERY] = self.currentContent[i]['query'] del self.currentContent[i]['query'] self.currentContent[i] = util.escapeFieldNames(self.currentContent[i]) ## FOR # QUERY: $query, $delete, $insert, $update: # Create the operation, add it to the session if self.currentOp['type'] in [constants.OP_TYPE_QUERY, constants.OP_TYPE_INSERT, constants.OP_TYPE_DELETE, constants.OP_TYPE_UPDATE]: # create the operation -- corresponds to current if self.debug: LOG.debug("Current Operation %d Content:\n%s" % (self.currentOp['query_id'], pformat(self.currentContent))) op = Session.operationFactory() op['collection'] = self.currentOp['collection'] op['type'] = self.currentOp['type'] op['query_time'] = self.currentOp['timestamp'] op['query_size'] = self.currentOp['size'] op['query_content'] = self.currentContent op['query_id'] = long(self.currentOp['query_id']) op['query_aggregate'] = False # false -not aggregate- by default # UPDATE Flags if op['type'] == constants.OP_TYPE_UPDATE: op['update_upsert'] = self.currentOp['update_upsert'] op['update_multi'] = self.currentOp['update_multi'] # QUERY Flags elif op['type'] == constants.OP_TYPE_QUERY: # SKIP, LIMIT op['query_limit'] = self.currentOp['ntoreturn'] op['query_offset'] = self.currentOp['ntoskip'] if self.currentOp['hasfields']: # HACK: Convert dot notation into '*' # FIXME: This should really be broke out into a dictionary of 'include' 'exclude' op['query_fields'] = dict([ (k.replace(".", "*"), v) for k,v in self.currentOp['hasfields'].iteritems()]) # check for aggregate # update collection name, set aggregate type if op['collection'].find("$cmd") > 0: op['query_aggregate'] = True # extract the real collection name ## --> This has to be done at the end after the first pass, because the collection name is hashed up # Keep track of operations by their ids so that we can add # the response to it later on self.query_response_map[self.currentOp['query_id']] = op # Append it to the current session # TODO: Large traces will cause the sessions to get too big. # We need to split out the operations into a seperate collection # Or use multiple sessions session['operations'].append(op) self.op_ctr += 1 if self.debug: LOG.debug("Added %s operation %d to session %s from line %d:\n%s" % (op['type'], self.currentOp['query_id'], session['session_id'], self.line_ctr, pformat(op))) # store the collection name in known_collections. This will be useful later. # see the comment at known_collections # HACK: We have to cut off the db name here. We may not want # to do that if the application is querying multiple databases. full_name = op['collection'] col_name = full_name[full_name.find(".")+1:] # cut off the db name self.known_collections.add(col_name) # RESPONSE - add information to the matching query elif self.currentOp['type'] == constants.OP_TYPE_REPLY: self.resp_ctr += 1 reply_id = self.currentOp['reply_id']; # see if the matching query is in the map if reply_id in self.query_response_map: # fill in missing information query_op = self.query_response_map[reply_id] query_op['resp_content'] = self.currentContent query_op['resp_size'] = self.currentOp['size'] query_op['resp_time'] = self.currentOp['timestamp'] query_op['resp_id'] = long(self.currentOp['query_id']) del self.query_response_map[reply_id] else: self.skip_ctr += 1 if self.debug: LOG.warn("Skipping response on line %d - No matching query_id '%s' [skipCtr=%d/%d]" % (self.line_ctr, reply_id, self.skip_ctr, self.resp_ctr)) # These can be safely ignored elif self.currentOp['type'] in [constants.OP_TYPE_GETMORE, constants.OP_TYPE_KILLCURSORS]: if self.debug: LOG.warn("Skipping '%s' operation %d on line %d" % (self.currentOp['type'], self.currentOp['query_id'], self.line_ctr)) # UNKNOWN else: raise Exception("Unexpected message type '%s'" % self.currentOp['type']) return