def __subquery__(cls, value, timestamp): """ Return a SQL SELECT query to use as a subquery within a fact INSERT. Does not append parentheses or a LIMIT clause. """ value_type = type(value) # We also check for subclasses for situations like basestring, which # matches on either str or unicode. natural_keys = [key for key in cls.__naturalkeys__ if (key.type is value_type or issubclass(value_type, key.type))] if not natural_keys: raise ValueError("Value type '%s' does not match type of any " "natural key for dimension " "'%s'" % (value_type.__name__, cls.__name__)) sql_template = ( 'SELECT {primary_key} FROM {table_name} ' 'WHERE {selector} ' 'AND `applicable_from` = (SELECT max(`applicable_from`) ' 'FROM {table_name} ' 'WHERE {selector} AND `applicable_from` <= "{timestamp}")' ) sql = sql_template.format( primary_key=escaped(cls.__primarykey__.name), table_name=escaped(cls.__tablename__), selector=" OR ".join("%s = %s" % (escaped(key.name), dump(value)) for key in natural_keys), timestamp=timestamp ) return sql
def insert(cls, *instances): """ Insert one or more instances into the table as records. """ if instances: columns = [ column for column in cls.__columns__ if not isinstance(column, AutoColumn) ] sql = "%s INTO %s (\n %s\n)\n" % ( cls.INSERT, escaped(cls.__tablename__), ",\n ".join( escaped(column.name) for column in columns)) batches = cls.batch(instances) for iteration, batch in enumerate(batches, start=1): log.debug('Inserting batch %s' % (iteration), extra={"table": cls.__tablename__}) insert_statement = sql link = "VALUES" for instance in batch: values = [] for column in columns: value = instance[column.name] values.append(dump(value)) insert_statement += link + (" (\n %s\n)" % ",\n ".join(values)) link = "," for i in range(1, 3): connection = Warehouse.get() try: cursor = connection.cursor() cursor.execute(insert_statement) cursor.close() except Exception as e: classify_error(e) if e.__class__ == BrokenPipeError and i == 1: log.info( 'Trying once more with a fresh connection', extra={"table": cls.__tablename__}) connection.close() else: log.error(e) return else: connection.commit() break log.debug('Finished updating %s' % cls.__tablename__, extra={"table": cls.__tablename__})
def insert(cls, *instances): """ Insert one or more instances into the table as records. """ if instances: columns = [column for column in cls.__columns__ if not isinstance(column, AutoColumn)] sql = "%s INTO %s (\n %s\n)\n" % ( cls.INSERT, escaped(cls.__tablename__), ",\n ".join(escaped(column.name) for column in columns)) batches = cls.batch(instances) for iteration, batch in enumerate(batches, start=1): log.debug('Inserting batch %s' % (iteration), extra={"table": cls.__tablename__}) insert_statement = sql link = "VALUES" for instance in batch: values = [] for column in columns: value = instance[column.name] values.append(dump(value)) insert_statement += link + (" (\n %s\n)" % ",\n ".join(values)) link = "," for i in range(1, 3): connection = Warehouse.get() try: cursor = connection.cursor() cursor.execute(insert_statement) cursor.close() except Exception as e: classify_error(e) if e.__class__ == BrokenPipeError and i == 1: log.info( 'Trying once more with a fresh connection', extra={"table": cls.__tablename__} ) connection.close() else: log.error(e) return else: connection.commit() break log.debug('Finished updating %s' % cls.__tablename__, extra={"table": cls.__tablename__})
def insert(cls, *instances): """ Insert fact instances (overridden to handle Dimensions correctly) """ if instances: columns = [column for column in cls.__columns__ if not isinstance(column, AutoColumn)] sql = "%s INTO %s (\n %s\n)\n" % ( cls.INSERT, escaped(cls.__tablename__), ",\n ".join(escaped(column.name) for column in columns)) batches = cls.batch(instances) for iteration, batch in enumerate(batches, start=1): log.debug('Inserting batch %s' % (iteration), extra={"table": cls.__tablename__}) insert_statement = sql link = "VALUES" for instance in batch: values = [] for column in columns: value = instance[column.name] if isinstance(column, DimensionKey): if not value and column.optional: values.append(dump(value)) else: values.append( "(%s)" % column.dimension.__subquery__( value, instance.__dimension_selector__.timestamp(instance) # TODO This is a bit messy - shouldn't have to pass the instance back in. ) ) else: values.append(dump(value)) insert_statement += link + (" (\n %s\n)" % ",\n ".join(values)) link = "," connection = Warehouse.get() try: with closing(connection.cursor()) as cursor: cursor.execute(insert_statement) except Exception as e: classify_error(e) log.error(e) log.error(insert_statement) connection.rollback() else: connection.commit()
def expression(self): s = [escaped(self.name), self.type_expression] if not self.optional: s.append("NOT NULL") default_expression = self.default_clause if default_expression: s.append(default_expression) if self.comment: s.append("COMMENT %s" % dump(self.comment)) return " ".join(s)
def expression(self): dimension = self.dimension foreign_key = "FOREIGN KEY (%s) REFERENCES %s (%s)" % ( escaped(self.name), escaped(dimension.__tablename__), escaped(dimension.__primarykey__.name)) return super(DimensionKey, self).expression + ", " + foreign_key
def __init__(self, *args, **kwargs): self['hash_key'] = raw_sql("UNHEX(SHA1(CONCAT_WS(',', %s)))" % ', '.join(["IFNULL(%s,'NULL')" % escaped(c.name) for c in self.__compositekey__]))