def __init__(self): self.randomDataGenerator = RandomDataGenerator()
class SubdocHelper(): def __init__(self): self.randomDataGenerator = RandomDataGenerator() def _find_data_type(self, data, data_type = "any"): if data_type != "any" and data_type == "dict" and isinstance(data, dict): return True elif data_type != "any" and data_type == "array" and isinstance(data, list): return True elif data_type == "any" and (isinstance(data, dict) or isinstance(data, list)): return True return False def find_pairs_data_type(self, data_set, path = "", pairs = {}, data_type = "any"): if isinstance(data_set, dict): for key in data_set.keys(): prefix = "" if path != "": prefix = path+"." if isinstance(data_set[key], dict): self.find_pairs_data_type(data_set[key], prefix + key, pairs) elif isinstance(data_set[key], list): self.find_pairs_data_type(data_set[key], prefix + key, pairs) if self._find_data_type(data_set[key], data_type = data_type): pairs[prefix+key] = data_set[key] elif isinstance(data_set, list): index = 0 for element in data_set: if isinstance(element, dict): self.find_pairs_data_type(element, path + "["+str(index)+"]", pairs) elif isinstance(element, list): self.find_pairs_data_type(element, path + "["+str(index)+"]", pairs) if self._find_data_type(element, data_type = data_type): pairs[path+"["+str(index)+"]"] =element index += 1 def find_pairs(self, data_set, path = "", pairs = {}): if isinstance(data_set, dict): for key in data_set.keys(): prefix = "" if path != "": prefix = path+"." if isinstance(data_set[key], dict): self.find_pairs(data_set[key], prefix + key, pairs) elif isinstance(data_set[key], list): self.find_pairs(data_set[key], prefix + key, pairs) pairs[prefix+key] = data_set[key] elif isinstance(data_set, list): index = 0 for element in data_set: if isinstance(element, dict): self.find_pairs(element, path + "["+str(index)+"]", pairs) elif isinstance(element, list): self.find_pairs(element, path + "["+str(index)+"]", pairs) pairs[path+"["+str(index)+"]"] =element index += 1 ''' Find out combination of operations to be executed for a given JSON document ''' def build_concurrent_operations(self, data_set = None, max_number_operations = 10, seed = None, mutation_operation_type = "any", force_operation_type = None): # FIX THE SEED if seed != None: random.seed(seed) self.randomDataGenerator.set_seed(seed) filter_paths = [] pairs = {} operation_definition = [] operation_index = 1 self.find_pairs_data_type(data_set,"", pairs = pairs, data_type = mutation_operation_type) for i in range(10000): if len(pairs.keys()) == 0: return operation_definition key = random.choice(pairs.keys()) if self.isPathPresent(key, filter_paths): pairs.pop(key) filter_paths.append(key) else: if mutation_operation_type == "any": operation = self.pick_operations(pairs[key], operation = force_operation_type) elif mutation_operation_type == "dict": operation = self.pick_dict_operations(pairs[key], operation = force_operation_type) else: operation = self.pick_array_operations(pairs[key], operation = force_operation_type) new_path = None copy_of_original_dataset = copy.deepcopy(data_set) if operation["mutate"] == True: function = getattr(self, operation["python"]) new_path, data = function(key, data_set) if new_path != None: pairs.pop(key) filter_path = new_path if "[" in filter_path: filter_path = self.trim_path(filter_path, "[") filter_paths.append(filter_path) operation_definition.append({ "data_value":data, "path_impacted_by_mutation_operation":key, "new_path_impacted_after_mutation_operation":new_path, "original_dataset":copy_of_original_dataset, "mutated_data_set": copy.deepcopy(data_set), "python_based_function_applied":operation["python"], "subdoc_api_function_applied":operation["subdoc_api"] }) operation_index += 1 elif operation["mutate"] and new_path == None: print "mutation failed" if operation_index == max_number_operations: return operation_definition return operation_definition ''' Find out combination of operations to be executed for a given JSON document ''' def build_sequence_operations(self, data_set = None, max_number_operations = 10, seed = None, mutation_operation_type = "any", force_operation_type = None): # FIX THE SEED if seed != None: random.seed(seed) self.randomDataGenerator.set_seed(seed) operation_definition = [] operation_index = 0 while True: pairs = {} self.find_pairs_data_type(data_set,"", pairs = pairs, data_type = mutation_operation_type) if len(pairs) == 0: return operation_definition key = random.choice(pairs.keys()) if mutation_operation_type == "any": operation = self.pick_operations(pairs[key], operation = force_operation_type) elif mutation_operation_type == "dict": operation = self.pick_dict_operations(pairs[key], operation = force_operation_type) elif mutation_operation_type == "array": operation = self.pick_array_operations(pairs[key], operation = force_operation_type) if operation["mutate"] == True: copy_of_original_dataset = copy.deepcopy(data_set) function = getattr(self, operation["python"]) new_path, data = function(key, data_set) if new_path != None: pairs.pop(key) operation_definition.append({ "data_value":data, "path_impacted_by_mutation_operation":key, "new_path_impacted_after_mutation_operation":new_path, "original_dataset":copy_of_original_dataset, "mutated_data_set": copy.deepcopy(data_set), "python_based_function_applied":operation["python"], "subdoc_api_function_applied":operation["subdoc_api"] }) operation_index += 1 if operation_index == max_number_operations: return operation_definition else: pairs.pop(key) return operation_definition def parse_and_get_data(self, data_set, path): for key in path.split("."): if "[" not in key: data_set = data_set[key] else: if key.split("[")[0] != '': data_set = data_set[key.split("[")[0]] for k in key.split("[")[1:]: index = int(k.replace("]","")) data_set = data_set[index] return data_set def gen_data(self): return self.randomDataGenerator.gen_data() def python_based_dict_add(self, path = "", original_dataset = {}): field_name, data_set = self.gen_data() if path == "": modify_dataset = original_dataset modify_dataset[field_name] = copy.deepcopy(data_set) else: modify_dataset = self.parse_and_get_data(original_dataset, path) modify_dataset[field_name] = copy.deepcopy(data_set) if path != "": path = path + "." + field_name else: path = field_name return path, data_set def python_based_get(self, path = "", original_dataset = {}): return path, self.parse_and_get_data(original_dataset, path) def python_based_exists(self, path = "", original_dataset = {}): return path, None def python_based_array_replace(self, path = "", original_dataset = {}): field_name, data_set = self.gen_data() modify_dataset = self.parse_and_get_data(original_dataset, path) if len(modify_dataset) == 0: return None, None index = random.choice(range(len(modify_dataset))) modify_dataset[index] = copy.deepcopy(data_set) return path+"["+str(index)+"]", data_set def python_based_array_add_insert(self, path = "", original_dataset = None): return self.python_based_array_add(path, original_dataset = original_dataset, type = "insert") def python_based_array_add_first(self, path = "", original_dataset = None): return self.python_based_array_add(path, original_dataset = original_dataset, type = "first") def python_based_array_add_last(self, path = "", original_dataset = None): return self.python_based_array_add(path, original_dataset = original_dataset, type = "last") def python_based_array_add_unique(self, path = "", original_dataset = None): return self.python_based_array_add(path, original_dataset = original_dataset, type = "unique") def python_based_array_add(self, path = "", original_dataset = {}, type = "insert"): field_name, data_set = self.randomDataGenerator.gen_data_no_json() modify_dataset = self.parse_and_get_data(original_dataset, path) if type == "first": index = 0 elif type == "last": modify_dataset.append(copy.deepcopy(data_set)) return path, data_set elif type == "unique": for element in modify_dataset: if isinstance(element, dict) or isinstance(element, list) or element == data_set: return None, None modify_dataset.append(copy.deepcopy(data_set)) return path, data_set else: if len(modify_dataset) == 0: return None, None index = random.choice(range(len(modify_dataset))) path = path+"["+str(index)+"]" modify_dataset.insert(index,copy.deepcopy(data_set)) return path, data_set def python_based_dict_delete(self, path = "", original_dataset = None): modify_dataset = original_dataset if path != "": modify_dataset = self.parse_and_get_data(original_dataset, path) if(len(modify_dataset.keys()) == 0): return None, None key_to_remove = random.choice(modify_dataset.keys()) modify_dataset.pop(key_to_remove) if path == "": path = key_to_remove else: path = path+"."+key_to_remove return path, None def python_based_array_delete(self, path = "", original_dataset = None): modify_dataset = original_dataset if path != "": modify_dataset = self.parse_and_get_data(original_dataset, path) if len(modify_dataset) == 0: return None, None index = random.choice(range(len(modify_dataset))) modify_dataset.pop(index) if path == "": path = "["+str(index)+"]" else: path = path+"["+str(index)+"]" return path, None def python_based_dict_upsert_replace(self, path = "", original_dataset = None): field_name, data_set = self.gen_data() if path == "": if(len(original_dataset.keys()) == 0): return None, None field_name = random.choice(original_dataset.keys()) modify_dataset = original_dataset modify_dataset[field_name] = copy.deepcopy(data_set) else: modify_dataset = self.parse_and_get_data(original_dataset, path) if(len(modify_dataset.keys()) == 0): return None, None field_name = random.choice(modify_dataset.keys()) modify_dataset[field_name] = copy.deepcopy(data_set) if path != "": path = path + "." + field_name else: path = field_name return path, data_set def python_based_dict_replace(self, path = "", original_dataset = {}): return self.python_based_dict_upsert_replace(path = path, original_dataset = original_dataset) def python_based_dict_upsert_add(self, path = "", original_dataset = {}): return self.python_based_dict_add(path = path, original_dataset = original_dataset) def pick_operations(self, data = None, operation = None): if isinstance(data, list): return self.pick_array_operations(data = data, operation = operation) elif isinstance(data, dict): return self.pick_dict_operations(data = data, operation = operation) else: return {"mutate":False} def pick_array_operations(self, data = None, operation = None): array_ops ={ "array_add_first" : {"python":"python_based_array_add_first", "subdoc_api":"array_add_first", "mutate":True}, "array_add_last": {"python":"python_based_array_add_last", "subdoc_api":"array_add_last", "mutate":True}, "array_add_unique": {"python":"python_based_array_add_unique", "subdoc_api":"array_add_unique", "mutate":True}, "array_add_insert": {"python":"python_based_array_add_insert", "subdoc_api":"array_add_insert", "mutate":True}, "array_delete": {"python":"python_based_array_delete", "subdoc_api":"delete", "mutate":True}, "array_replace": {"python":"python_based_array_replace", "subdoc_api":"replace", "mutate":True} } if isinstance(data, list): if operation == None: operation = random.choice(array_ops.keys()) return array_ops[operation] else: return {"mutate":False} def pick_dict_operations(self, data = None, operation = None): dict_ops = { "dict_add": {"python":"python_based_dict_add", "subdoc_api":"dict_add", "mutate":True}, "dict_upsert_add": {"python":"python_based_dict_upsert_add", "subdoc_api":"dict_upsert", "mutate":True}, "dict_upsert_replace": {"python":"python_based_dict_upsert_replace", "subdoc_api":"dict_upsert", "mutate":True}, "dict_delete": {"python":"python_based_dict_delete", "subdoc_api":"delete", "mutate":True}, "dict_replace": {"python":"python_based_dict_replace", "subdoc_api":"replace", "mutate":True} } if isinstance(data, dict): if operation == None: operation = random.choice(dict_ops.keys()) return dict_ops[operation] else: return {"mutate":False} def isPathPresent(self, path, filter_paths = []): for path_parent in filter_paths: if path[:len(path_parent)] == path_parent: return True return False def trim_path(self, path, search_string): return path[:path.rfind(search_string)] def show_all_operations(self, ops = []): operation_index = 0 for operation in ops: print "++++++++++++++++++ OPERATION {0} ++++++++++++++++++++++".format(operation_index) operation_index += 1 for field in operation.keys(): print "{0} :: {1}".format(field, operation[field]) def show_all_paths(self, pairs, data_set): for path in pairs.keys(): parse_path_data = self.parse_and_get_data(data_set, path) print "PATH = {0} || VALUE = {1} || PARSE PATH = {2} ".format(path, pairs[path], parse_path_data) key, ops_info = self.pick_operations(parse_path_data) print key print "Run python operation {0}".format(ops_info["python"]) print "Run equivalent subdoc api operation {0}".format(ops_info["subdoc_api"]) def gen_input_file(self, file_name = "sample_json.txt", number_of_test_cases = 100): dump_file = open(file_name, 'wb') for x in range(number_of_test_cases): json_document = {} json_document["seed"] = x json_document["json_document"] = self.randomDataGenerator.random_json() dump_file.write(json.dumps(json_document)) dump_file.write('\n') dump_file.close()