def _extract_models(self, criterion): tables = [] """ This means that the current criterion is involving a constant value: there is not information that could be collected about a join between tables. """ if ":" in str(criterion): return else: """ Extract tables names from the criterion. """ expressions = [criterion.expression.left, criterion.expression.right] if hasattr(criterion, "expression") else [] for expression in expressions: if str(expression) == "NULL": return if hasattr(expression, "foreign_keys"): for foreign_key in getattr(expression, "foreign_keys"): if hasattr(foreign_key, "column"): tables += [foreign_key.column.table] tables_objects = getattr(criterion, "_from_objects", []) tables_names = map(lambda x: str(x), tables_objects) tables += tables_names tables = list(set(tables)) # remove duplicate names """ Extract the missing entity models from tablenames. """ current_entities = map(lambda x: x._model, self._models) current_entities = filter(lambda x: x is not None, current_entities) current_entities_tablenames = map(lambda x: x.__tablename__, current_entities) missing_tables = filter(lambda x: x not in current_entities_tablenames, tables) missing_tables_names = map(lambda x: str(x), missing_tables) missing_entities_names = map(lambda x: get_model_classname_from_tablename(x), missing_tables_names) missing_entities_objects = map(lambda x: get_model_class_from_name(x), missing_entities_names) """ Add the missing entity models to the models of the current query. """ missing_models_to_selections = map(lambda x: Selection(x, "id", is_hidden=True), missing_entities_objects) self._models += missing_models_to_selections
def _extract_fields(self, tablename): from lib.rome.core.models import get_model_class_from_name, get_model_classname_from_tablename modelclass_name = get_model_classname_from_tablename(tablename) klass = get_model_class_from_name(modelclass_name) fields = [] try: fields = map(lambda x: "%s" % (x.key), klass()._sa_instance_state.attrs) except: fields = map(lambda x: "%s" % (x), klass._sa_class_manager) fields += ["_pid", "_metadata_novabase_classname", "_rid", "_nova_classname", "_rome_version_number"] fields = sorted(list(set(fields))) print("fields@%s => %s" % (tablename, fields)) return fields
def _extract_table_metadata(self, tablename): from lib.rome.core.models import get_model_class_from_name, get_model_classname_from_tablename modelclass_name = get_model_classname_from_tablename(tablename) klass = get_model_class_from_name(modelclass_name) fields = self._extract_fields(tablename) corrected_columns = map(lambda x: self._correct_badname(x), fields) corrected_columns = filter(lambda x: x!="_rome_version_number", corrected_columns) # columns_name_str = ", ".join(map(lambda x: "%s varchar" % (x), corrected_columns)) columns_associated_with_types_list = map(lambda x: process_column(x, klass), corrected_columns) columns_associated_with_types = {} for each in columns_associated_with_types_list: columns_associated_with_types[each[0]] = each self.table_columns_metadata[tablename] = columns_associated_with_types
def _extract_table_metadata(self, tablename): from lib.rome.core.models import get_model_class_from_name, get_model_classname_from_tablename modelclass_name = get_model_classname_from_tablename(tablename) klass = get_model_class_from_name(modelclass_name) fields = self._extract_fields(tablename) corrected_columns = map(lambda x: self._correct_badname(x), fields) corrected_columns = filter(lambda x: x != "_rome_version_number", corrected_columns) # columns_name_str = ", ".join(map(lambda x: "%s varchar" % (x), corrected_columns)) columns_associated_with_types_list = map( lambda x: process_column(x, klass), corrected_columns) columns_associated_with_types = {} for each in columns_associated_with_types_list: columns_associated_with_types[each[0]] = each self.table_columns_metadata[tablename] = columns_associated_with_types
def _extract_fields(self, tablename): from lib.rome.core.models import get_model_class_from_name, get_model_classname_from_tablename modelclass_name = get_model_classname_from_tablename(tablename) klass = get_model_class_from_name(modelclass_name) fields = [] try: fields = map(lambda x: "%s" % (x.key), klass()._sa_instance_state.attrs) except: fields = map(lambda x: "%s" % (x), klass._sa_class_manager) fields += [ "_pid", "_metadata_novabase_classname", "_rid", "_nova_classname", "_rome_version_number" ] fields = sorted(list(set(fields))) print("fields@%s => %s" % (tablename, fields)) return fields
def _extract_models(self, criterion): tables = [] """ This means that the current criterion is involving a constant value: there is not information that could be collected about a join between tables. """ if ":" in str(criterion): return else: """ Extract tables names from the criterion. """ expressions = [ criterion.expression.left, criterion.expression.right ] if hasattr(criterion, "expression") else [] for expression in expressions: if str(expression) == "NULL": return if hasattr(expression, "foreign_keys"): for foreign_key in getattr(expression, "foreign_keys"): if hasattr(foreign_key, "column"): tables += [foreign_key.column.table] tables_objects = getattr(criterion, "_from_objects", []) tables_names = map(lambda x: str(x), tables_objects) tables += tables_names tables = list(set(tables)) # remove duplicate names """ Extract the missing entity models from tablenames. """ current_entities = map(lambda x: x._model, self._models) current_entities = filter(lambda x: x is not None, current_entities) current_entities_tablenames = map(lambda x: x.__tablename__, current_entities) missing_tables = filter(lambda x: x not in current_entities_tablenames, tables) missing_tables_names = map(lambda x: str(x), missing_tables) missing_entities_names = map( lambda x: get_model_classname_from_tablename(x), missing_tables_names) missing_entities_objects = map(lambda x: get_model_class_from_name(x), missing_entities_names) """ Add the missing entity models to the models of the current query. """ missing_models_to_selections = map( lambda x: Selection(x, "id", is_hidden=True), missing_entities_objects) self._models += missing_models_to_selections
def building_tuples(list_results, labels, criterions, hints=[]): from lib.rome.core.rows.rows import get_attribute, set_attribute, has_attribute mode = "experimental" if mode is "cartesian_product": cartesian_product = [] for element in itertools.product(*list_results): cartesian_product += [element] return cartesian_product elif mode is "experimental": steps = zip(list_results, labels) candidates_values = {} candidates_per_table = {} joining_criterions = [] non_joining_criterions = {} # Initialising candidates per table for each in labels: candidates_per_table[each] = {} # Collecting joining expressions for criterion in criterions: # if criterion.operator in "NORMAL": for exp in criterion.exps: for joining_criterion in extract_joining_criterion(exp): foo = [x for x in joining_criterion if x is not None] if len(foo) > 1: joining_criterions += [foo] else: # Extract here non joining criterions, and use it to filter objects # that are located in list_results exp_criterions = ([x for x in flatten(joining_criterion) if x is not None]) for non_joining_criterion in exp_criterions: tablename = non_joining_criterion["table"] column = non_joining_criterion["column"] if not tablename in non_joining_criterions: non_joining_criterions[tablename] = [] non_joining_criterions[tablename] += [{ "tablename": tablename, "column": column, "exp": exp, "criterion": criterion }] # # Filtering list_of_results with non_joining_criterions # corrected_list_results = [] # for results in list_results: # cresults = [] # for each in results: # tablename = each["nova_classname"] # if tablename in non_joining_criterions: # do_add = True # for criterion in non_joining_criterions[tablename]: # if not criterion["criterion"].evaluate(KeyedTuple([each], labels=[tablename])): # do_add = False # break # if do_add: # cresults += [each] # corrected_list_results += [cresults] # list_results = corrected_list_results # Consolidating joining criterions with data stored in relationships done_index = {} for step in steps: tablename = step[1] model_classname = get_model_classname_from_tablename(tablename) fake_instance = get_model_class_from_name(model_classname)() relationships = fake_instance.get_relationships() for r in relationships: criterion = extract_joining_criterion_from_relationship(r, tablename) key1 = criterion[0]["table"]+"__"+criterion[1]["table"] key2 = criterion[1]["table"]+"__"+criterion[0]["table"] if key1 not in done_index and key2 not in criterion[0]["table"] in labels and criterion[1]["table"] in labels: joining_criterions += [criterion] done_index[key1] = True done_index[key2] = True pass # Collecting for each of the aforementioned expressions, its values <-> objects if len(joining_criterions) > 0: for criterion in joining_criterions: for each in criterion: key = "%s.%s" % (each["table"], each["column"]) index_list_results = labels.index(each["table"]) objects = list_results[index_list_results] if not candidates_values.has_key(key): candidates_values[key] = {} for object in objects: value_key = get_attribute(object, each["column"]) skip = False for hint in hints: if each["table"] == hint.table_name and hint.attribute in object and object[hint.attribute] != hint.value: skip = True break if not skip: if not candidates_values[key].has_key(value_key): candidates_values[key][value_key] = {} object_hash = str(object).__hash__() object_table = object["nova_classname"] candidates_values[key][value_key][object_hash] = {"value": value_key, "object": object} candidates_per_table[object_table][object_hash] = object else: for each in steps: for each_object in each[0]: object_hash = str(each_object).__hash__() object_table = each_object["nova_classname"] candidates_per_table[object_table][object_hash] = each_object # Progressively reduce the list of results results = [] processed_models = [] if len(steps) > 0: step = steps[0] results = map(lambda x: [candidates_per_table[step[1]][x]], candidates_per_table[step[1]]) processed_models += [step[1]] remaining_models = map(lambda x:x[1], steps[1:]) for step in steps[1:]: for criterion in joining_criterions: criterion_models = map(lambda x: x["table"], criterion) candidate_models = [step[1]] + processed_models if len(intersect(candidate_models, criterion_models)) > 1: processed_models += [step[1]] remaining_models = filter(lambda x: x ==step[1], remaining_models) # try: current_criterion_option = filter(lambda x:x["table"]==step[1], criterion) remote_criterion_option = filter(lambda x:x["table"]!=step[1], criterion) if not (len(current_criterion_option) > 0 and len(remote_criterion_option) > 0): continue current_criterion_part = current_criterion_option[0] remote_criterion_part = remote_criterion_option[0] new_results = [] for each in results: existing_tuple_index = processed_models.index(remote_criterion_part["table"]) existing_value = get_attribute(each[existing_tuple_index], remote_criterion_part["column"]) if existing_value is not None: key = "%s.%s" % (current_criterion_part["table"], current_criterion_part["column"]) candidates_value_index = candidates_values[key] candidates = candidates_value_index[existing_value] if existing_value in candidates_value_index else {} for candidate_key in candidates: new_results += [each + [candidates[candidate_key]["object"]]] results = new_results break continue return results
def default_panda_building_tuples(lists_results, labels, criterions, hints=[]): """ Build tuples (join operator in relational algebra). """ """ Create the Dataframe indexes. """ dataframes = [] dataindex = {} substitution_index = {} normal_keys_index = {} refactored_keys_index = {} normal_keys_to_key_index = {} refactored_keys_to_key_index = {} refactored_keys_to_table_index = {} index = 0 classname_index = {} for each in labels: classname_index[each] = get_model_classname_from_tablename(each) # if len(lists_results) == 1: # return map(lambda x: [x], lists_results[0]) for list_results in lists_results: label = labels[index] if len(list_results) == 0: continue keys = map(lambda x: x, list_results[0]) + ["created_at", "updated_at"] dataframe = pd.DataFrame(data=list_results, columns=keys) for value in keys: normal_key = "%s.%s" % (label, value) refactored_keys = "%s___%s" % (label, value) refactored_keys_to_table_index[refactored_keys] = label normal_keys_to_key_index[normal_key] = value refactored_keys_to_key_index[refactored_keys] = value normal_keys = map(lambda x: "%s.%s" % (label, x), keys) normal_keys_index[label] = normal_keys refactored_keys = map(lambda x: "%s___%s" % (label, x), keys) refactored_keys_index[label] = refactored_keys for (a, b) in zip(normal_keys, refactored_keys): substitution_index[a] = b dataframe.columns = refactored_keys dataframes += [dataframe] """ Index the dataframe and create a reverse index. """ dataindex[label] = index index += 1 """ Collecting joining expressions. """ joining_pairs = [] non_joining_criterions = [] _joining_pairs_str_index = {} _nonjoining_criterions_str_index = {} for criterion in criterions: _joining_pairs = criterion.extract_joining_pairs() _nonjoining_criterions = criterion.extract_nonjoining_criterions() _nonjoining_criterions_str = str(_nonjoining_criterions) if len(_joining_pairs) > 0: _joining_pairs_str = str(sorted(_joining_pairs[0])) if not _joining_pairs_str in _joining_pairs_str_index: _joining_pairs_str_index[_joining_pairs_str] = 1 joining_pairs += _joining_pairs if not _nonjoining_criterions_str in _nonjoining_criterions_str_index: _nonjoining_criterions_str_index[_nonjoining_criterions_str] = 1 non_joining_criterions += _nonjoining_criterions """ Construct the resulting rows. """ if len(labels) > 1 and len(filter(lambda x: len(x) == 0, lists_results)) > 0: return [] result = None if len(lists_results) > 1: processed_tables = [] for joining_pair in joining_pairs: """ Preparing the tables that will be joined. """ attribute_1 = joining_pair[0].strip() attribute_2 = joining_pair[1].strip() tablename_1 = attribute_1.split(".")[0] tablename_2 = attribute_2.split(".")[0] if tablename_1 not in dataindex or tablename_2 not in dataindex: return [] index_1 = dataindex[tablename_1] index_2 = dataindex[tablename_2] dataframe_1 = dataframes[ index_1] if not tablename_1 in processed_tables else result dataframe_2 = dataframes[ index_2] if not tablename_2 in processed_tables else result refactored_attribute_1 = attribute_1.split( ".")[0] + "___" + attribute_1.split(".")[1] refactored_attribute_2 = attribute_2.split( ".")[0] + "___" + attribute_2.split(".")[1] """ Join the tables. """ try: result = pd.merge(dataframe_1, dataframe_2, left_on=refactored_attribute_1, right_on=refactored_attribute_2, how="outer") drop_y(result) rename_x(result) except KeyError: return [] """ Update the history of processed tables. """ processed_tables += [tablename_1, tablename_2] processed_tables = list(set(processed_tables)) """ Filtering rows. """ if result is None: if len(dataframes) == 0: return [] result = dataframes[0] for non_joining_criterion in non_joining_criterions: expression_str = non_joining_criterion.raw_expression for value in substitution_index: if value in expression_str: corresponding_key = substitution_index[value] expression_str = expression_str.replace( value, corresponding_key) try: corrected_expression = correct_boolean_int(expression_str) corrected_expression = correct_expression_containing_none( corrected_expression) result = result.query(corrected_expression) except: pass """ Building the rows. """ rows = [] columns_indexes = {} label_indexes = {} i = 0 for refactored_key in result.columns.values: columns_indexes[refactored_key] = i i += 1 i = 0 for label in labels: label_indexes[label] = i i += 1 transposed_result = result.transpose() dict_values = transposed_result.to_dict() for value in dict_values.values(): row = map(lambda x: {}, labels) for ci in value: table = refactored_keys_to_table_index[ci] table_index = label_indexes[table] key = refactored_keys_to_key_index[ci] v = value[ci] if type(v) is float and math.isnan(v): v = 0 if key == "_metadata_novabase_classname": v = classname_index[table] row[table_index][key] = v rows += [row] return rows
def building_tuples(list_results, labels, criterions, hints=[]): mode = "not_cartesian_product" if mode is "cartesian_product": cartesian_product = [] for element in itertools.product(*list_results): cartesian_product += [element] return cartesian_product else: # construct dicts that will keep a ref on objects according to their "id" and "uuid" fields. indexed_results = {} for i in zip(list_results, labels): (results, label) = i dict_result = {"id": {}, "uuid": {}} for j in results: if has_attribute(j, "id"): dict_result["id"][get_attribute(j, "id")] = j if has_attribute(j, "uuid"): dict_result["uuid"][get_attribute(j, "uuid")] = j indexed_results[label] = dict_result # find iteratively pairs that matches according to relationship modelisation tuples = [] tuples_labels = [] # initialise tuples count = 0 for i in zip(list_results, labels): (results, label) = i tuples_labels += [label] for j in results: current_tuple = {label: j} tuples += [current_tuple] break # increase model of exisintg tuples count == 0 for i in zip(list_results, labels): if count == 0: count += 1 continue (results, label) = i tuples_labels += [label] # iterate on tuples for t in tuples: # iterate on existing elements of the current rows keys = t.keys() for e in keys: model_classname = get_model_classname_from_tablename(e) fake_instance = get_model_class_from_name(model_classname)() relationships = fake_instance.get_relationships() for r in relationships: if r.local_fk_field in ["id", "uuid"]: continue remote_label_name = r.remote_object_tablename if remote_label_name in indexed_results: local_value = get_attribute(t[e], r.local_fk_field) if local_value is not None: try: remote_candidate = indexed_results[remote_label_name][r.remote_object_field][local_value] t[remote_label_name] = remote_candidate except Exception as e: logging.error(e) traceback.print_exc() tuple_groupby_size = {} for t in tuples: tuple_size = len(t) if not tuple_size in tuple_groupby_size: tuple_groupby_size[tuple_size] = [] tuple_groupby_size[tuple_size] += [t] if len(tuple_groupby_size.keys()) > 0: max_size = max(tuple_groupby_size.keys()) tuples = tuple_groupby_size[max_size] else: tuples = [] # reordering tuples results = [] for t in tuples: if len(t) == len(labels): ordered_t = [t[i] for i in labels] results += [tuple(ordered_t)] return results