Beispiel #1
0
    def __subquery__(cls, value, timestamp):
        """ Return a SQL SELECT query to use as a subquery within a
        fact INSERT. Does not append parentheses or a LIMIT clause.
        """
        value_type = type(value)
        # We also check for subclasses for situations like basestring, which
        # matches on either str or unicode.
        natural_keys = [key for key in cls.__naturalkeys__
                        if (key.type is value_type or
                            issubclass(value_type, key.type))]
        if not natural_keys:
            raise ValueError("Value type '%s' does not match type of any "
                             "natural key for dimension "
                             "'%s'" % (value_type.__name__, cls.__name__))

        sql_template = (
            'SELECT {primary_key} FROM {table_name} '
            'WHERE {selector} '
            'AND `applicable_from` = (SELECT max(`applicable_from`) '
            'FROM {table_name} '
            'WHERE {selector} AND `applicable_from` <= "{timestamp}")'
            )
        sql = sql_template.format(
            primary_key=escaped(cls.__primarykey__.name),
            table_name=escaped(cls.__tablename__),
            selector=" OR ".join("%s = %s" % (escaped(key.name), dump(value)) for key in natural_keys),
            timestamp=timestamp
            )
        return sql
Beispiel #2
0
    def insert(cls, *instances):
        """ Insert one or more instances into the table as records.
        """
        if instances:
            columns = [
                column for column in cls.__columns__
                if not isinstance(column, AutoColumn)
            ]

            sql = "%s INTO %s (\n  %s\n)\n" % (
                cls.INSERT, escaped(cls.__tablename__), ",\n  ".join(
                    escaped(column.name) for column in columns))

            batches = cls.batch(instances)
            for iteration, batch in enumerate(batches, start=1):
                log.debug('Inserting batch %s' % (iteration),
                          extra={"table": cls.__tablename__})

                insert_statement = sql
                link = "VALUES"

                for instance in batch:
                    values = []
                    for column in columns:
                        value = instance[column.name]
                        values.append(dump(value))
                    insert_statement += link + (" (\n  %s\n)" %
                                                ",\n  ".join(values))
                    link = ","

                for i in range(1, 3):
                    connection = Warehouse.get()
                    try:
                        cursor = connection.cursor()
                        cursor.execute(insert_statement)
                        cursor.close()

                    except Exception as e:
                        classify_error(e)
                        if e.__class__ == BrokenPipeError and i == 1:
                            log.info(
                                'Trying once more with a fresh connection',
                                extra={"table": cls.__tablename__})
                            connection.close()
                        else:
                            log.error(e)
                            return
                    else:
                        connection.commit()
                        break

        log.debug('Finished updating %s' % cls.__tablename__,
                  extra={"table": cls.__tablename__})
Beispiel #3
0
    def insert(cls, *instances):
        """ Insert one or more instances into the table as records.
        """
        if instances:
            columns = [column for column in cls.__columns__
                       if not isinstance(column, AutoColumn)]

            sql = "%s INTO %s (\n  %s\n)\n" % (
                cls.INSERT, escaped(cls.__tablename__),
                ",\n  ".join(escaped(column.name) for column in columns))

            batches = cls.batch(instances)
            for iteration, batch in enumerate(batches, start=1):
                log.debug('Inserting batch %s' % (iteration),
                          extra={"table": cls.__tablename__})

                insert_statement = sql
                link = "VALUES"

                for instance in batch:
                    values = []
                    for column in columns:
                        value = instance[column.name]
                        values.append(dump(value))
                    insert_statement += link + (" (\n  %s\n)" % ",\n  ".join(values))
                    link = ","

                for i in range(1, 3):
                    connection = Warehouse.get()
                    try:
                        cursor = connection.cursor()
                        cursor.execute(insert_statement)
                        cursor.close()

                    except Exception as e:
                        classify_error(e)
                        if e.__class__ == BrokenPipeError and i == 1:
                            log.info(
                                'Trying once more with a fresh connection',
                                extra={"table": cls.__tablename__}
                                )
                            connection.close()
                        else:
                            log.error(e)
                            return
                    else:
                        connection.commit()
                        break

        log.debug('Finished updating %s' % cls.__tablename__,
                  extra={"table": cls.__tablename__})
Beispiel #4
0
    def insert(cls, *instances):
        """ Insert fact instances (overridden to handle Dimensions correctly)
        """
        if instances:
            columns = [column for column in cls.__columns__
                       if not isinstance(column, AutoColumn)]
            sql = "%s INTO %s (\n  %s\n)\n" % (
                cls.INSERT, escaped(cls.__tablename__),
                ",\n  ".join(escaped(column.name) for column in columns))

            batches = cls.batch(instances)
            for iteration, batch in enumerate(batches, start=1):
                log.debug('Inserting batch %s' % (iteration),
                          extra={"table": cls.__tablename__})

                insert_statement = sql
                link = "VALUES"

                for instance in batch:
                    values = []
                    for column in columns:
                        value = instance[column.name]
                        if isinstance(column, DimensionKey):
                            if not value and column.optional:
                                values.append(dump(value))
                            else:
                                values.append(
                                    "(%s)" % column.dimension.__subquery__(
                                        value,
                                        instance.__dimension_selector__.timestamp(instance) # TODO This is a bit messy - shouldn't have to pass the instance back in.
                                        )
                                    )
                        else:
                            values.append(dump(value))
                    insert_statement += link + (" (\n  %s\n)" % ",\n  ".join(values))
                    link = ","

                connection = Warehouse.get()
                try:
                    with closing(connection.cursor()) as cursor:
                        cursor.execute(insert_statement)
                except Exception as e:
                    classify_error(e)
                    log.error(e)
                    log.error(insert_statement)
                    connection.rollback()
                else:
                    connection.commit()
Beispiel #5
0
 def expression(self):
     s = [escaped(self.name), self.type_expression]
     if not self.optional:
         s.append("NOT NULL")
     default_expression = self.default_clause
     if default_expression:
         s.append(default_expression)
     if self.comment:
         s.append("COMMENT %s" % dump(self.comment))
     return " ".join(s)
Beispiel #6
0
 def expression(self):
     s = [escaped(self.name), self.type_expression]
     if not self.optional:
         s.append("NOT NULL")
     default_expression = self.default_clause
     if default_expression:
         s.append(default_expression)
     if self.comment:
         s.append("COMMENT %s" % dump(self.comment))
     return " ".join(s)
Beispiel #7
0
 def expression(self):
     dimension = self.dimension
     foreign_key = "FOREIGN KEY (%s) REFERENCES %s (%s)" % (
         escaped(self.name), escaped(dimension.__tablename__),
         escaped(dimension.__primarykey__.name))
     return super(DimensionKey, self).expression + ", " + foreign_key
Beispiel #8
0
 def __init__(self, *args, **kwargs):
     self['hash_key'] = raw_sql("UNHEX(SHA1(CONCAT_WS(',', %s)))" % ', '.join(["IFNULL(%s,'NULL')" % escaped(c.name) for c in self.__compositekey__]))
Beispiel #9
0
 def expression(self):
     dimension = self.dimension
     foreign_key = "FOREIGN KEY (%s) REFERENCES %s (%s)" % (
         escaped(self.name), escaped(dimension.__tablename__),
         escaped(dimension.__primarykey__.name))
     return super(DimensionKey, self).expression + ", " + foreign_key