class TableMapper(Component): """ Abstract class. """ __metaclass__ = ABCMeta STORE_MODE_LOOKUP = SQLTable.STORE_MODE_LOOKUP STORE_MODE_INSERT = SQLTable.STORE_MODE_INSERT STORE_MODE_UPSERT = SQLTable.STORE_MODE_UPSERT entity = None connection = None table = None eval = [] mappings = [] lookup_cols = None auto_store = None store_mode = STORE_MODE_LOOKUP _sqltable = None _lookup_changed_fields = [] _uses_table = True olapmapper = None def __init__(self): super(TableMapper, self).__init__() self.eval = [] self.mappings = [] self._lookup_changed_fields = [] def __str__(self, *args, **kwargs): if (self.entity != None): return "%s(%s)" % (self.__class__.__name__, self.entity.name) else: return super(TableMapper, self).__str__(*args, **kwargs) def _mappings_join(self, ctx): pk = self.pk(ctx) if pk == None: raise Exception("%s has no primary key and cannot provide join columns." % self) ctype = pk["type"] if (ctype == "AutoIncrement"): ctype = "Integer" return [{ "entity": self.entity, "name": self.entity.name, "column": self.entity.name + "_id", "type": ctype, #"value": '${ m["' + self.entity.name + "_id" + '"] }' "value": pk['value'] if (pk['value']) else '${ m["' + self.entity.name + "_id" + '"] }' }] def _mappings(self, ctx): """ Note: _ensure_mappings() shall be called only as the last step in the eval resolution chain, to avoid setting defaults before all consumers had an opportunity to override values. """ #logger.debug("Calculating eval (TableMapper) for %s" % self) mappings = [mapping.copy() for mapping in self.mappings] return self._ensure_mappings(ctx, mappings) def _joins(self, ctx, master = None): """ Joins related to this entity. """ if (master != None): return [{ "master_entity": master, "master_column": self.entity.name + "_id", "detail_entity": self.entity, "detail_column": (self.olapmapper.entity_mapper(self.entity.fact).pk(ctx)["column"]) if (hasattr(self.entity, "fact")) else self.pk(ctx)['column'], }] else: return [] def _extend_mappings(self, ctx, mappings, newmappings): for nm in newmappings: found = None for m in mappings: if (not "entity" in m): raise Exception("No entity defined for mapping %s" % m) if (not "entity" in nm): raise Exception("No entity defined for mapping %s" % nm) if (not isinstance(m["entity"], Component)): raise Exception("No correct entity defined for mapping %s" % m) if (not isinstance(nm["entity"], Component)): raise Exception("No correct entity defined for mapping %s" % nm) if (m["name"] == nm["name"] and m["entity"].name == nm["entity"].name): found = m break if not found: mappings.append(nm) else: # Update missing properties if (not "type" in m and "type" in nm): m["type"] = nm ["type"] if (not "value" in m and "value" in nm): m["value"] = nm ["value"] if (not "label" in m and "label" in nm): m["label"] = nm ["label"] if (not "column" in m and "column" in nm): m["column"] = nm ["column"] def _ensure_mappings(self, ctx, mappings): for mapping in mappings: mapping["pk"] = (False if (not "pk" in mapping) else parsebool(mapping["pk"])) if (not "column" in mapping): mapping["column"] = mapping["name"] if (not "value" in mapping): mapping["value"] = None if (mapping["pk"] and not "type" in mapping): if (not "value" in mapping or mapping["value"] == None): mapping["type"] = "AutoIncrement" if (not "column" in mapping): mapping["column"] = mapping["name"] if (not "type" in mapping): mapping["type"] = "String" return mappings def initialize(self, ctx): super(TableMapper, self).initialize(ctx) if self._uses_table: if (self.entity == None): raise Exception("No entity defined for %s" % self) if (self.connection == None): raise Exception("No connection defined for %s" % self) ctx.comp.initialize(self.entity) ctx.comp.initialize(self.connection) self._sqltable = CachedSQLTable() self._sqltable.name = self.table self._sqltable.connection = self.connection # Assert that the sqltable is clean #if (len(self._sqltable.columns) != 0): raise AssertionError("SQLTable '%s' columns shall be empty!" % self._sqltable.name) # If lookup_cols is a string, split by commas if (isinstance(self.lookup_cols, basestring)): self.lookup_cols = [ key.strip() for key in self.lookup_cols.split(",") ] Mappings.includes(ctx, self.mappings) for mapping in self.mappings: try: if (not "entity" in mapping): mapping["entity"] = self.entity except TypeError as e: raise Exception("Could not initialize mapping '%s' of '%s': %s" % (mapping, self, e)) if self._uses_table: mappings = self._mappings(ctx) for mapping in mappings: logger.debug("%s adding column from OLAP mapping: %s" % (self, mapping)) self._sqltable.columns.append({ "name": mapping["column"] , "type": mapping["type"], "pk": mapping["pk"] }) # If no key, use pk() if (self.lookup_cols == None): pk = self.pk(ctx) if ((pk == None) or (pk["type"] == "AutoIncrement")): raise Exception ("No lookup cols defined for %s (use lookup_cols=[...])" % self) self.lookup_cols = [ pk["name"] ] ctx.comp.initialize(self._sqltable) def finalize(self, ctx): if self._uses_table: ctx.comp.finalize(self._sqltable) ctx.comp.finalize(self.connection) ctx.comp.finalize(self.entity) super(TableMapper, self).finalize(ctx) def pk(self, ctx): #Returns the primary key mapping. pk_mappings = [] for mapping in self._mappings(ctx): if ("pk" in mapping): if parsebool(mapping["pk"]): pk_mappings.append(mapping) if (len(pk_mappings) > 1): raise Exception("%s has multiple primary keys mapped: %s" % (self, pk_mappings)) elif (len(pk_mappings) == 1): return pk_mappings[0] else: return None def store(self, ctx, m): # Resolve evals Eval.process_evals(ctx, m, self.eval) # Store automatically or include dimensions if (self.auto_store != None): logger.debug("Storing automatically: %s" % (self.auto_store)) for ast in self.auto_store: did = self.olapmapper.entity_mapper(ast).store(ctx, m) # TODO: Review and use PK properly if (did != None): m[ast.name + "_id"] = did elif (isinstance(self.entity, cubetl.olap.Fact)): logger.debug("Storing automatically: %s" % (self.entity.dimensions)) for dim in self.entity.dimensions: did = self.olapmapper.entity_mapper(dim).store(ctx, m) # TODO: review this too, or use rarer prefix if (did != None): m[dim.name + "_id"] = did logger.debug("Storing entity in %s (mode: %s, lookup: %s)" % (self, self.store_mode, self.lookup_cols)) data = {} mappings = self._mappings(ctx) # First try to look it up for mapping in mappings: if (mapping["column"] in self.lookup_cols): if (mapping["type"] != "AutoIncrement"): if (mapping["value"] == None): data[mapping["column"]] = m[mapping["name"]] else: data[mapping["column"]] = ctx.interpolate(m, mapping["value"]) row = None if (self.store_mode == TableMapper.STORE_MODE_LOOKUP): row = self._sqltable.lookup(ctx, data) for mapping in mappings: if (mapping["type"] != "AutoIncrement"): if (mapping["value"] == None): if (not mapping["name"] in m): raise Exception("Field '%s' does not exist in message when assigning data for column %s in %s" % (mapping["name"], mapping["column"], self)) data[mapping["column"]] = m[mapping["name"]] else: data[mapping["column"]] = ctx.interpolate(m, mapping["value"]) if (not row): if (ctx.debug2): logger.debug("Storing data in %s (data: %s)" % (self, data)) if (self.store_mode in [TableMapper.STORE_MODE_LOOKUP, TableMapper.STORE_MODE_INSERT]): row = self._sqltable.insert(ctx, data) else: raise Exception("Update store mode used at %s (%s) not implemented (available 'lookup', 'insert')" % (self, self.store_mode)) else: # Check row is identical for mapping in self._mappings(ctx): if (mapping["type"] != "AutoIncrement"): v1 = row[mapping['column']] v2 = data[mapping['column']] if (isinstance(v1, basestring) or isinstance(v2, basestring)): if (not isinstance(v1, basestring)): v1 = str(v1) if (not isinstance(v2, basestring)): v2 = str(v2) if (v1 != v2): if (mapping["column"] not in self._lookup_changed_fields): logger.warn("%s looked up an entity which exists with different attributes (field=%s, existing_value=%r, tried_value=%r) (reported only once per field)" % (self, mapping["column"], v1, v2)) self._lookup_changed_fields.append(mapping["column"]) return row[self.pk(ctx)["column"]]
class TableMapper(Component): """ Abstract base class for Olap Entity SQL Mappers. An OlapSQLMapper is a contianer of OlapMappings, which associate Olap entities and attributes to SQL tables and columns (and aliases). """ __metaclass__ = ABCMeta STORE_MODE_LOOKUP = SQLTable.STORE_MODE_LOOKUP STORE_MODE_INSERT = SQLTable.STORE_MODE_INSERT STORE_MODE_UPSERT = SQLTable.STORE_MODE_UPSERT def __init__(self, entity, sqltable, mappings=None, lookup_cols=None): """ Lookup columns are used to lookup an entity when its primary key is not available (if the attribute mapped to the primary key is available, it will be used instead for lookups). """ super().__init__() self.eval = [] self.entity = entity self.sqltable = sqltable self.mappings = mappings if mappings else [] self.lookup_cols = lookup_cols self.auto_store = None self.store_mode = TableMapper.STORE_MODE_LOOKUP self._lookup_changed_fields = [] self._uses_table = True def __str__(self, *args, **kwargs): if (self.entity != None): return "%s(%s)" % (self.__class__.__name__, self.entity.name) else: return super().__str__(*args, **kwargs) def sql_mappings(self, ctx): olapmapper = ctx.find(OlapMapper)[0] # Return own mappings result = [] for mapping in self.mappings: #sqlmapping = OlapSQLMapping(mapping. self.entity, mapping.entity, self.sqltable.name if self.sqltable else None, mapping.sqlcolumn, mapping.function) sqlmapping = OlapSQLMapping([p.name for p in mapping.path], None, mapping.path[-1], self.sqltable, mapping.sqlcolumn, [], mapping.sqlcolumn.name, mapping.function) result.append(sqlmapping) # Add related dimension mappings # TODO: allow for a "publish: False" setting to avoid publishing dimensions recursively? for dimensionattribute in self.entity.get_dimensions(): dimension = dimensionattribute.dimension mapper = olapmapper.entity_mapper(dimension, fail=False) if mapper: dim_mappings = mapper.sql_mappings(ctx) for mapping in dim_mappings: sqlmapping = OlapSQLMapping([dimensionattribute.name] + mapping.path, mapping.entity, mapping.attribute, mapping.sqltable, mapping.sqlcolumn, [dimensionattribute.name] + mapping.sqltable_alias, mapping.sqlcolumn_alias, mapping.function) #if sqlmapping.sqlcolumn is None: # self_olap_mapping = self.mappings[0] # sqlmapping = OlapSQLMapping(mapping.parent, mapping.field, self.entity.name, self.entity, mapping.function) #print("MAPPING: %s" % sqlmapping) result.append(sqlmapping) return result def sql_joins(self, ctx, master=None): """ Joins related to this entity. """ olapmapper = ctx.find(OlapMapper)[0] joins = [] for dim_attr in self.entity.get_dimensions(): dim = dim_attr.dimension dim_mapper = olapmapper.entity_mapper(dim, fail=False) if dim_mapper: entity_joins = dim_mapper.sql_joins(ctx, self.entity) for join in entity_joins: join['alias'] = [dim_attr.name] + join['alias'] joins.append(join) # Embedded mappings pk = self.pk(ctx) if pk is None or pk.sqlcolumn is None: pass elif master is not None: # Search column name of the foreign key that references this primary key master_column_name = "<EXPORT ERROR>" #self.entity.name for column in olapmapper.entity_mapper(master).sqltable.columns: if hasattr(column, "fk_sqlcolumn"): if column.fk_sqlcolumn == pk.sqlcolumn: master_column_name = column.name joins.append({"alias": [], "master_entity": master, "master_column": master_column_name, "detail_entity": (olapmapper.entity_mapper(self.entity.fact).pk(ctx).sqlcolumn.sqltable.name) if (hasattr(self.entity, "fact")) else self.pk(ctx).sqlcolumn.sqltable.name, "detail_column": (olapmapper.entity_mapper(self.entity.fact).pk(ctx).sqlcolumn.name) if (hasattr(self.entity, "fact")) else self.pk(ctx).sqlcolumn.name, }) return joins def initialize(self, ctx): super().initialize(ctx) if (self.entity == None): raise Exception("No entity defined for %s" % self) ctx.comp.initialize(self.entity) # Apply a caching layer # TODO: shall at least be optional, also, columns are referenced to the backed table # another option is that everybody that wants caching adds the wrapper, or maybe that # tables natively support caching. if self._uses_table: self._sqltable = CachedSQLTable(sqltable=self.sqltable) #self._sqltable = self.sqltable # Assert that the sqltable is clean #if (len(self._sqltable.columns) != 0): raise AssertionError("SQLTable '%s' columns shall be empty!" % self._sqltable.name) # If lookup_cols is a string, split by commas if (isinstance(self.lookup_cols, str)): self.lookup_cols = [key.strip() for key in self.lookup_cols.split(",")] #Mappings.includes(ctx, self.mappings) for mapping in self.mappings: try: if mapping.path is None: raise Exception("Mapping entity is None: %s" % self) except TypeError as e: raise Exception("Could not initialize mapping '%s' of '%s': %s" % (mapping, self, e)) if self._uses_table: # If no key, use pk() if self.lookup_cols is None: pk = self.pk(ctx) if (pk is None) or (pk.sqlcolumn.type == "AutoIncrement"): #logger.warning("No lookup cols defined for %s", self) # else raise Exception("No lookup cols defined for %s" % self) self.lookup_cols = [pk] ctx.comp.initialize(self._sqltable) def finalize(self, ctx): if self._sqltable: ctx.comp.finalize(self._sqltable) ctx.comp.finalize(self.entity) super().finalize(ctx) def pk(self, ctx): #Returns the primary key mapping. # TODO: Remove the need for 'ctx': this is usable/used before context is initialized #mappings = self._mappings(ctx) pk_mappings = [mapping for mapping in self.mappings if isinstance(mapping.path[-1], Key)] if (len(pk_mappings) > 1): #raise Exception("%s has multiple primary keys mapped: %s" % (self, pk_mappings)) logger.warn("%s has multiple primary keys mapped: %s (ignoring)" % (self, pk_mappings)) return None elif (len(pk_mappings) == 1): return pk_mappings[0] #elif (len(pk_mappings) == 0 and len(mappings) == 1): # return mappings[0] else: return None def query_aggregate(self, ctx, drills, cuts, limit=5000): mappings = self.sql_mappings(ctx) joins = self.sql_joins(ctx, None) pk = self.pk(ctx) connection = self.sqltable.connection.connection() engine = self.sqltable.connection._engine # Build query Session = sessionmaker() Session.configure(bind=engine) session = Session() q = session.query() #q = q.add_columns(self.sqltable.sa_table.columns['is_bot_id'].label("x")) #q = q.add_entity(self.sqltable.sa_table) # Include measures for measure in [m for m in mappings if isinstance(m.field, Measure)]: sa_column = self.sqltable.sa_table.columns[measure.sqlcolumn.name] q = q.add_columns(func.avg(sa_column).label(measure.field.name + "_avg")) q = q.add_columns(func.sum(sa_column).label(measure.field.name + "_sum")) q = q.add_columns(func.count(self.sqltable.sa_table).label("record_count")) # Drills for dimension in [m for m in mappings if isinstance(m.field, Dimension)]: # We shoulld check the dimension-path here, with drills, and use key/lookup for drill if dimension.field.name in drills: sa_column = None try: sa_column = self.sqltable.sa_table.columns[dimension.sqlcolumn.name] except KeyError as e: raise ETLException("Unknown column in backend SQL table (table=%s, column=%s). Columns: %s" % (self.sqltable.sa_table, dimension.sqlcolumn.name, [c.name for c in self.sqltable.sa_table.columns])) q = q.add_columns(sa_column) q = q.group_by(sa_column) # Cuts # TODO: Filterng on any dimension attribute, not only the key # (ie filter cities with type A or icon B), but then again # that could be a different (nested) dimension. for dimension in [m for m in mappings if isinstance(m.field, Dimension)]: # We shoulld check the dimension-path here, with drills if dimension.field.name in cuts.keys(): sa_column = self.sqltable.sa_table.columns[dimension.sqlcolumn.name] cut_value = cuts[dimension.field.name] q = q.filter(sa_column==cut_value) # Limit q = q.limit(5000) statement = q.statement logger.debug("Statement: %s", str(statement).replace("\n", " ")) rows = connection.execute(statement).fetchall() return rows def store(self, ctx, m): olapmapper = ctx.find(OlapMapper)[0] # Resolve evals Eval.process_evals(ctx, m, self.eval) # Store automatically or include dimensions if self.auto_store is not None: logger.debug("Storing automatically: %s" % (self.auto_store)) for ast in self.auto_store: did = olapmapper.entity_mapper(ast).store(ctx, m) # FIXME: Review and use PK m[ast.name + "_id"] = did else: dimensions = self.entity.get_dimensions() if dimensions: logger.debug("Storing automatically: %s" % ([da.name for da in self.entity.get_dimensions()])) for dim_attr in self.entity.get_dimensions(): dim = dim_attr.dimension mapper = olapmapper.entity_mapper(dim, False) if mapper: did = mapper.store(ctx, m) # FIXME: shall use the correct foreign key column according to mappings m[dim_attr.name + "_id"] = did logger.debug("Storing entity in %s (mode: %s, lookup: %s)" % (self, self.store_mode, self.lookup_cols)) data = {} mappings = self.sql_mappings(ctx) # First try to look it up for mapping in mappings: if (mapping.sqlcolumn.name in self.lookup_cols): if (mapping.sqlcolumn.type != "AutoIncrement"): try: data[mapping.sqlcolumn.name] = m[mapping.sqlcolumn.name] except KeyError as e: raise ETLException("Could not find key '%s' on message when storing data in %s (fields: %s)." % (mapping.sqlcolumn.name, self.entity, sorted([str(k) for k in m.keys()]))) row = None if (self.store_mode == TableMapper.STORE_MODE_LOOKUP): row = self._sqltable.lookup(ctx, data) for mapping in mappings: #print(mapping.sqlcolumn.name + "->" + mapping.field.name) if (mapping.sqlcolumn.type != "AutoIncrement"): if mapping.sqlcolumn.name not in m: raise Exception("Key '%s' does not exist in message when assigning data for column %s in %s (fields: %s)" % (mapping.field.name, mapping.sqlcolumn.name, self, [f for f in m.keys()])) data[mapping.sqlcolumn.name] = m[mapping.sqlcolumn.name] if (not row): if (ctx.debug2): logger.debug("Storing data in %s (data: %s)" % (self, data)) if (self.store_mode in [TableMapper.STORE_MODE_LOOKUP, TableMapper.STORE_MODE_INSERT]): row = self._sqltable.insert(ctx, data) else: raise Exception("Update store mode used at %s (%s) not implemented (available 'lookup', 'insert')" % (self, self.store_mode)) else: # Check row is identical to issue a warning # TODO: this shall be optional, check is expensive (no check, warning, fail) for mapping in mappings: if mapping.sqlcolumn.sqltable != self.sqltable: continue if mapping.sqlcolumn.type != "AutoIncrement": v1 = row[mapping.sqlcolumn.name] v2 = data[mapping.sqlcolumn.name] if (isinstance(v1, str) or isinstance(v2, str)): if not isinstance(v1, str): v1 = str(v1) if not isinstance(v2, str): v2 = str(v2) if v1 != v2: # Give warning just one time for each field if (mapping.sqlcolumn not in self._lookup_changed_fields): logger.warn("%s looked up an entity which exists with different attributes (field=%s, existing_value=%r, tried_value=%r) (reported only once per field)" % (self, mapping.sqlcolumn, v1, v2)) self._lookup_changed_fields.append(mapping.sqlcolumn) pk = self.pk(ctx) return row[pk.sqlcolumn.name] if pk else None
class TableMapper(Component): """ Abstract base class for Olap Entity SQL Mappers. An OlapSQLMapper is a contianer of OlapMappings, which associate Olap entities and attributes to SQL tables and columns (and aliases). """ __metaclass__ = ABCMeta STORE_MODE_LOOKUP = SQLTable.STORE_MODE_LOOKUP STORE_MODE_INSERT = SQLTable.STORE_MODE_INSERT STORE_MODE_UPSERT = SQLTable.STORE_MODE_UPSERT def __init__(self, entity, sqltable, mappings=None, lookup_cols=None): """ Lookup columns are used to lookup an entity when its primary key is not available (if the attribute mapped to the primary key is available, it will be used instead for lookups). """ super().__init__() self.eval = [] self.entity = entity self.sqltable = sqltable self.mappings = mappings if mappings else [] self.lookup_cols = lookup_cols self.auto_store = None self.store_mode = TableMapper.STORE_MODE_LOOKUP self._lookup_changed_fields = [] self._uses_table = True def __str__(self, *args, **kwargs): if (self.entity != None): return "%s(%s)" % (self.__class__.__name__, self.entity.name) else: return super().__str__(*args, **kwargs) def sql_mappings(self, ctx): olapmapper = ctx.find(OlapMapper)[0] # Return own mappings result = [] for mapping in self.mappings: #sqlmapping = OlapSQLMapping(mapping. self.entity, mapping.entity, self.sqltable.name if self.sqltable else None, mapping.sqlcolumn, mapping.function) sqlmapping = OlapSQLMapping([p.name for p in mapping.path], None, mapping.path[-1], self.sqltable, mapping.sqlcolumn, [], mapping.sqlcolumn.name, mapping.function) result.append(sqlmapping) # Add related dimension mappings # TODO: allow for a "publish: False" setting to avoid publishing dimensions recursively? for dimensionattribute in self.entity.get_dimensions(): dimension = dimensionattribute.dimension mapper = olapmapper.entity_mapper(dimension, fail=False) if mapper: dim_mappings = mapper.sql_mappings(ctx) for mapping in dim_mappings: sqlmapping = OlapSQLMapping( [dimensionattribute.name] + mapping.path, mapping.entity, mapping.attribute, mapping.sqltable, mapping.sqlcolumn, [dimensionattribute.name] + mapping.sqltable_alias, mapping.sqlcolumn_alias, mapping.function) #if sqlmapping.sqlcolumn is None: # self_olap_mapping = self.mappings[0] # sqlmapping = OlapSQLMapping(mapping.parent, mapping.field, self.entity.name, self.entity, mapping.function) #print("MAPPING: %s" % sqlmapping) result.append(sqlmapping) return result def sql_joins(self, ctx, master=None): """ Joins related to this entity. """ olapmapper = ctx.find(OlapMapper)[0] joins = [] for dim_attr in self.entity.get_dimensions(): dim = dim_attr.dimension dim_mapper = olapmapper.entity_mapper(dim, fail=False) if dim_mapper: entity_joins = dim_mapper.sql_joins(ctx, self.entity) for join in entity_joins: join['alias'] = [dim_attr.name] + join['alias'] joins.append(join) # Embedded mappings pk = self.pk(ctx) if pk is None or pk.sqlcolumn is None: pass elif master is not None: # Search column name of the foreign key that references this primary key master_column_name = "<EXPORT ERROR>" #self.entity.name for column in olapmapper.entity_mapper(master).sqltable.columns: if hasattr(column, "fk_sqlcolumn"): if column.fk_sqlcolumn == pk.sqlcolumn: master_column_name = column.name joins.append({ "alias": [], "master_entity": master, "master_column": master_column_name, "detail_entity": (olapmapper.entity_mapper( self.entity.fact).pk(ctx).sqlcolumn.sqltable.name) if (hasattr(self.entity, "fact")) else self.pk(ctx).sqlcolumn.sqltable.name, "detail_column": (olapmapper.entity_mapper( self.entity.fact).pk(ctx).sqlcolumn.name) if (hasattr( self.entity, "fact")) else self.pk(ctx).sqlcolumn.name, }) return joins def initialize(self, ctx): super().initialize(ctx) if (self.entity == None): raise Exception("No entity defined for %s" % self) ctx.comp.initialize(self.entity) # Apply a caching layer # TODO: shall at least be optional, also, columns are referenced to the backed table # another option is that everybody that wants caching adds the wrapper, or maybe that # tables natively support caching. if self._uses_table: self._sqltable = CachedSQLTable(sqltable=self.sqltable) #self._sqltable = self.sqltable # Assert that the sqltable is clean #if (len(self._sqltable.columns) != 0): raise AssertionError("SQLTable '%s' columns shall be empty!" % self._sqltable.name) # If lookup_cols is a string, split by commas if (isinstance(self.lookup_cols, str)): self.lookup_cols = [ key.strip() for key in self.lookup_cols.split(",") ] #Mappings.includes(ctx, self.mappings) for mapping in self.mappings: try: if mapping.path is None: raise Exception("Mapping entity is None: %s" % self) except TypeError as e: raise Exception( "Could not initialize mapping '%s' of '%s': %s" % (mapping, self, e)) if self._uses_table: # If no key, use pk() if self.lookup_cols is None: pk = self.pk(ctx) if (pk is None) or (pk.sqlcolumn.type == "AutoIncrement"): #logger.warning("No lookup cols defined for %s", self) # else raise Exception("No lookup cols defined for %s" % self) self.lookup_cols = [pk] ctx.comp.initialize(self._sqltable) def finalize(self, ctx): if self._sqltable: ctx.comp.finalize(self._sqltable) ctx.comp.finalize(self.entity) super().finalize(ctx) def pk(self, ctx): #Returns the primary key mapping. # TODO: Remove the need for 'ctx': this is usable/used before context is initialized #mappings = self._mappings(ctx) pk_mappings = [ mapping for mapping in self.mappings if isinstance(mapping.path[-1], Key) ] if (len(pk_mappings) > 1): #raise Exception("%s has multiple primary keys mapped: %s" % (self, pk_mappings)) logger.warn("%s has multiple primary keys mapped: %s (ignoring)" % (self, pk_mappings)) return None elif (len(pk_mappings) == 1): return pk_mappings[0] #elif (len(pk_mappings) == 0 and len(mappings) == 1): # return mappings[0] else: return None def query_aggregate(self, ctx, drills, cuts, limit=5000): mappings = self.sql_mappings(ctx) joins = self.sql_joins(ctx, None) pk = self.pk(ctx) connection = self.sqltable.connection.connection() engine = self.sqltable.connection._engine # Build query Session = sessionmaker() Session.configure(bind=engine) session = Session() q = session.query() #q = q.add_columns(self.sqltable.sa_table.columns['is_bot_id'].label("x")) #q = q.add_entity(self.sqltable.sa_table) # Include measures for measure in [m for m in mappings if isinstance(m.field, Measure)]: sa_column = self.sqltable.sa_table.columns[measure.sqlcolumn.name] q = q.add_columns( func.avg(sa_column).label(measure.field.name + "_avg")) q = q.add_columns( func.sum(sa_column).label(measure.field.name + "_sum")) q = q.add_columns( func.count(self.sqltable.sa_table).label("record_count")) # Drills for dimension in [ m for m in mappings if isinstance(m.field, Dimension) ]: # We shoulld check the dimension-path here, with drills, and use key/lookup for drill if dimension.field.name in drills: sa_column = None try: sa_column = self.sqltable.sa_table.columns[ dimension.sqlcolumn.name] except KeyError as e: raise ETLException( "Unknown column in backend SQL table (table=%s, column=%s). Columns: %s" % (self.sqltable.sa_table, dimension.sqlcolumn.name, [c.name for c in self.sqltable.sa_table.columns])) q = q.add_columns(sa_column) q = q.group_by(sa_column) # Cuts # TODO: Filterng on any dimension attribute, not only the key # (ie filter cities with type A or icon B), but then again # that could be a different (nested) dimension. for dimension in [ m for m in mappings if isinstance(m.field, Dimension) ]: # We shoulld check the dimension-path here, with drills if dimension.field.name in cuts.keys(): sa_column = self.sqltable.sa_table.columns[ dimension.sqlcolumn.name] cut_value = cuts[dimension.field.name] q = q.filter(sa_column == cut_value) # Limit q = q.limit(5000) statement = q.statement logger.debug("Statement: %s", str(statement).replace("\n", " ")) rows = connection.execute(statement).fetchall() return rows def store(self, ctx, m): olapmapper = ctx.find(OlapMapper)[0] # Resolve evals Eval.process_evals(ctx, m, self.eval) # Store automatically or include dimensions if self.auto_store is not None: logger.debug("Storing automatically: %s" % (self.auto_store)) for ast in self.auto_store: did = olapmapper.entity_mapper(ast).store(ctx, m) # FIXME: Review and use PK m[ast.name + "_id"] = did else: dimensions = self.entity.get_dimensions() if dimensions: logger.debug("Storing automatically: %s" % ([da.name for da in self.entity.get_dimensions()])) for dim_attr in self.entity.get_dimensions(): dim = dim_attr.dimension mapper = olapmapper.entity_mapper(dim, False) if mapper: did = mapper.store(ctx, m) # FIXME: shall use the correct foreign key column according to mappings m[dim_attr.name + "_id"] = did logger.debug("Storing entity in %s (mode: %s, lookup: %s)" % (self, self.store_mode, self.lookup_cols)) data = {} mappings = self.sql_mappings(ctx) # First try to look it up for mapping in mappings: if (mapping.sqlcolumn.name in self.lookup_cols): if (mapping.sqlcolumn.type != "AutoIncrement"): try: data[mapping.sqlcolumn.name] = m[ mapping.sqlcolumn.name] except KeyError as e: raise ETLException( "Could not find key '%s' on message when storing data in %s (fields: %s)." % (mapping.sqlcolumn.name, self.entity, sorted([str(k) for k in m.keys()]))) row = None if (self.store_mode == TableMapper.STORE_MODE_LOOKUP): row = self._sqltable.lookup(ctx, data) for mapping in mappings: #print(mapping.sqlcolumn.name + "->" + mapping.field.name) if (mapping.sqlcolumn.type != "AutoIncrement"): if mapping.sqlcolumn.name not in m: raise Exception( "Key '%s' does not exist in message when assigning data for column %s in %s (fields: %s)" % (mapping.field.name, mapping.sqlcolumn.name, self, [f for f in m.keys()])) data[mapping.sqlcolumn.name] = m[mapping.sqlcolumn.name] if (not row): if (ctx.debug2): logger.debug("Storing data in %s (data: %s)" % (self, data)) if (self.store_mode in [ TableMapper.STORE_MODE_LOOKUP, TableMapper.STORE_MODE_INSERT ]): row = self._sqltable.insert(ctx, data) else: raise Exception( "Update store mode used at %s (%s) not implemented (available 'lookup', 'insert')" % (self, self.store_mode)) else: # Check row is identical to issue a warning # TODO: this shall be optional, check is expensive (no check, warning, fail) for mapping in mappings: if mapping.sqlcolumn.sqltable != self.sqltable: continue if mapping.sqlcolumn.type != "AutoIncrement": v1 = row[mapping.sqlcolumn.name] v2 = data[mapping.sqlcolumn.name] if (isinstance(v1, str) or isinstance(v2, str)): if not isinstance(v1, str): v1 = str(v1) if not isinstance(v2, str): v2 = str(v2) if v1 != v2: # Give warning just one time for each field if (mapping.sqlcolumn not in self._lookup_changed_fields): logger.warn( "%s looked up an entity which exists with different attributes (field=%s, existing_value=%r, tried_value=%r) (reported only once per field)" % (self, mapping.sqlcolumn, v1, v2)) self._lookup_changed_fields.append( mapping.sqlcolumn) pk = self.pk(ctx) return row[pk.sqlcolumn.name] if pk else None
class TableMapper(Component): """ Abstract class. """ __metaclass__ = ABCMeta STORE_MODE_LOOKUP = SQLTable.STORE_MODE_LOOKUP STORE_MODE_INSERT = SQLTable.STORE_MODE_INSERT STORE_MODE_UPSERT = SQLTable.STORE_MODE_UPSERT entity = None connection = None table = None eval = [] mappings = [] lookup_cols = None auto_store = None store_mode = STORE_MODE_LOOKUP _sqltable = None _lookup_changed_fields = [] _uses_table = True olapmapper = None def __init__(self): super(TableMapper, self).__init__() self.eval = [] self.mappings = [] self._lookup_changed_fields = [] def __str__(self, *args, **kwargs): if (self.entity != None): return "%s(%s)" % (self.__class__.__name__, self.entity.name) else: return super(TableMapper, self).__str__(*args, **kwargs) def _mappings_join(self, ctx): pk = self.pk(ctx) if pk == None: raise Exception( "%s has no primary key and cannot provide join columns." % self) ctype = pk["type"] if (ctype == "AutoIncrement"): ctype = "Integer" return [{ "entity": self.entity, "name": self.entity.name, "column": self.entity.name + "_id", "type": ctype, #"value": '${ m["' + self.entity.name + "_id" + '"] }' "value": pk['value'] if (pk['value']) else '${ m["' + self.entity.name + "_id" + '"] }' }] def _mappings(self, ctx): """ Note: _ensure_mappings() shall be called only as the last step in the eval resolution chain, to avoid setting defaults before all consumers had an opportunity to override values. """ #logger.debug("Calculating eval (TableMapper) for %s" % self) mappings = [mapping.copy() for mapping in self.mappings] return self._ensure_mappings(ctx, mappings) def _joins(self, ctx, master=None): """ Joins related to this entity. """ if (master != None): return [{ "master_entity": master, "master_column": self.entity.name + "_id", "detail_entity": self.entity, "detail_column": (self.olapmapper.entity_mapper( self.entity.fact).pk(ctx)["column"]) if (hasattr(self.entity, "fact")) else self.pk(ctx)['column'], }] else: return [] def _extend_mappings(self, ctx, mappings, newmappings): for nm in newmappings: found = None for m in mappings: if (not "entity" in m): raise Exception("No entity defined for mapping %s" % m) if (not "entity" in nm): raise Exception("No entity defined for mapping %s" % nm) if (not isinstance(m["entity"], Component)): raise Exception( "No correct entity defined for mapping %s" % m) if (not isinstance(nm["entity"], Component)): raise Exception( "No correct entity defined for mapping %s" % nm) if (m["name"] == nm["name"] and m["entity"].name == nm["entity"].name): found = m break if not found: mappings.append(nm) else: # Update missing properties if (not "type" in m and "type" in nm): m["type"] = nm["type"] if (not "value" in m and "value" in nm): m["value"] = nm["value"] if (not "label" in m and "label" in nm): m["label"] = nm["label"] if (not "column" in m and "column" in nm): m["column"] = nm["column"] def _ensure_mappings(self, ctx, mappings): for mapping in mappings: mapping["pk"] = (False if (not "pk" in mapping) else parsebool( mapping["pk"])) if (not "column" in mapping): mapping["column"] = mapping["name"] if (not "value" in mapping): mapping["value"] = None if (mapping["pk"] and not "type" in mapping): if (not "value" in mapping or mapping["value"] == None): mapping["type"] = "AutoIncrement" if (not "column" in mapping): mapping["column"] = mapping["name"] if (not "type" in mapping): mapping["type"] = "String" return mappings def initialize(self, ctx): super(TableMapper, self).initialize(ctx) if self._uses_table: if (self.entity == None): raise Exception("No entity defined for %s" % self) if (self.connection == None): raise Exception("No connection defined for %s" % self) ctx.comp.initialize(self.entity) ctx.comp.initialize(self.connection) self._sqltable = CachedSQLTable() self._sqltable.name = self.table self._sqltable.connection = self.connection # Assert that the sqltable is clean #if (len(self._sqltable.columns) != 0): raise AssertionError("SQLTable '%s' columns shall be empty!" % self._sqltable.name) # If lookup_cols is a string, split by commas if (isinstance(self.lookup_cols, basestring)): self.lookup_cols = [ key.strip() for key in self.lookup_cols.split(",") ] Mappings.includes(ctx, self.mappings) for mapping in self.mappings: try: if (not "entity" in mapping): mapping["entity"] = self.entity except TypeError as e: raise Exception( "Could not initialize mapping '%s' of '%s': %s" % (mapping, self, e)) if self._uses_table: mappings = self._mappings(ctx) for mapping in mappings: logger.debug("%s adding column from OLAP mapping: %s" % (self, mapping)) self._sqltable.columns.append({ "name": mapping["column"], "type": mapping["type"], "pk": mapping["pk"] }) # If no key, use pk() if (self.lookup_cols == None): pk = self.pk(ctx) if ((pk == None) or (pk["type"] == "AutoIncrement")): raise Exception( "No lookup cols defined for %s (use lookup_cols=[...])" % self) self.lookup_cols = [pk["name"]] ctx.comp.initialize(self._sqltable) def finalize(self, ctx): if self._uses_table: ctx.comp.finalize(self._sqltable) ctx.comp.finalize(self.connection) ctx.comp.finalize(self.entity) super(TableMapper, self).finalize(ctx) def pk(self, ctx): #Returns the primary key mapping. pk_mappings = [] for mapping in self._mappings(ctx): if ("pk" in mapping): if parsebool(mapping["pk"]): pk_mappings.append(mapping) if (len(pk_mappings) > 1): raise Exception("%s has multiple primary keys mapped: %s" % (self, pk_mappings)) elif (len(pk_mappings) == 1): return pk_mappings[0] else: return None def store(self, ctx, m): # Resolve evals Eval.process_evals(ctx, m, self.eval) # Store automatically or include dimensions if (self.auto_store != None): logger.debug("Storing automatically: %s" % (self.auto_store)) for ast in self.auto_store: did = self.olapmapper.entity_mapper(ast).store(ctx, m) # TODO: Review and use PK properly if (did != None): m[ast.name + "_id"] = did elif (isinstance(self.entity, cubetl.olap.Fact)): logger.debug("Storing automatically: %s" % (self.entity.dimensions)) for dim in self.entity.dimensions: did = self.olapmapper.entity_mapper(dim).store(ctx, m) # TODO: review this too, or use rarer prefix if (did != None): m[dim.name + "_id"] = did logger.debug("Storing entity in %s (mode: %s, lookup: %s)" % (self, self.store_mode, self.lookup_cols)) data = {} mappings = self._mappings(ctx) # First try to look it up for mapping in mappings: if (mapping["column"] in self.lookup_cols): if (mapping["type"] != "AutoIncrement"): if (mapping["value"] == None): data[mapping["column"]] = m[mapping["name"]] else: data[mapping["column"]] = ctx.interpolate( m, mapping["value"]) row = None if (self.store_mode == TableMapper.STORE_MODE_LOOKUP): row = self._sqltable.lookup(ctx, data) for mapping in mappings: if (mapping["type"] != "AutoIncrement"): if (mapping["value"] == None): if (not mapping["name"] in m): raise Exception( "Field '%s' does not exist in message when assigning data for column %s in %s" % (mapping["name"], mapping["column"], self)) data[mapping["column"]] = m[mapping["name"]] else: data[mapping["column"]] = ctx.interpolate( m, mapping["value"]) if (not row): if (ctx.debug2): logger.debug("Storing data in %s (data: %s)" % (self, data)) if (self.store_mode in [ TableMapper.STORE_MODE_LOOKUP, TableMapper.STORE_MODE_INSERT ]): row = self._sqltable.insert(ctx, data) else: raise Exception( "Update store mode used at %s (%s) not implemented (available 'lookup', 'insert')" % (self, self.store_mode)) else: # Check row is identical for mapping in self._mappings(ctx): if (mapping["type"] != "AutoIncrement"): v1 = row[mapping['column']] v2 = data[mapping['column']] if (isinstance(v1, basestring) or isinstance(v2, basestring)): if (not isinstance(v1, basestring)): v1 = str(v1) if (not isinstance(v2, basestring)): v2 = str(v2) if (v1 != v2): if (mapping["column"] not in self._lookup_changed_fields): logger.warn( "%s looked up an entity which exists with different attributes (field=%s, existing_value=%r, tried_value=%r) (reported only once per field)" % (self, mapping["column"], v1, v2)) self._lookup_changed_fields.append( mapping["column"]) return row[self.pk(ctx)["column"]]