def create_table(cls): """ Create this table in the current data warehouse. Returns: True if the table was created, or False if the table already exists. """ if cls.table_exists: log.info('%s already exists - skipping.' % cls.__tablename__) return False verb = "CREATE TABLE" columns = ",\n ".join(col.expression for col in cls.__columns__) sql = "%s %s (\n %s\n)" % (verb, cls.__tablename__, columns) for key, value in cls.__tableargs__.items(): sql += " %s=%s" % (key, value) connection = Warehouse.get() with closing(connection.cursor()) as cursor: try: cursor.execute(sql) except Exception as exception: classify_error(exception) raise exception else: return True
def create_trigger(cls): """ There's a constraint in earlier versions of MySQL where only one timestamp column can have a CURRENT_TIMESTAMP default value. These triggers get around that problem. Returns: True if a trigger was created, or False if the trigger already exists. """ if cls.trigger_name in Warehouse.trigger_names: log.info('%s already exists - skipping.' % cls.trigger_name) return False trigger = """\ CREATE TRIGGER %s BEFORE INSERT ON %s FOR EACH ROW BEGIN IF NEW.created = '0000-00-00 00:00:00' THEN SET NEW.created = NOW(); END IF; END """ % (cls.trigger_name, cls.__tablename__) connection = Warehouse.get() with closing(connection.cursor()) as cursor: try: cursor.execute(trigger) except Exception as exception: classify_error(exception) raise exception else: log.info('%s created.' % cls.trigger_name) return True
def insert(cls, *instances): """ Insert one or more instances into the table as records. """ if instances: columns = [ column for column in cls.__columns__ if not isinstance(column, AutoColumn) ] sql = "%s INTO %s (\n %s\n)\n" % ( cls.INSERT, escaped(cls.__tablename__), ",\n ".join( escaped(column.name) for column in columns)) batches = cls.batch(instances) for iteration, batch in enumerate(batches, start=1): log.debug('Inserting batch %s' % (iteration), extra={"table": cls.__tablename__}) insert_statement = sql link = "VALUES" for instance in batch: values = [] for column in columns: value = instance[column.name] values.append(dump(value)) insert_statement += link + (" (\n %s\n)" % ",\n ".join(values)) link = "," for i in range(1, 3): connection = Warehouse.get() try: cursor = connection.cursor() cursor.execute(insert_statement) cursor.close() except Exception as e: classify_error(e) if e.__class__ == BrokenPipeError and i == 1: log.info( 'Trying once more with a fresh connection', extra={"table": cls.__tablename__}) connection.close() else: log.error(e) return else: connection.commit() break log.debug('Finished updating %s' % cls.__tablename__, extra={"table": cls.__tablename__})
def insert(cls, *instances): """ Insert one or more instances into the table as records. """ if instances: columns = [column for column in cls.__columns__ if not isinstance(column, AutoColumn)] sql = "%s INTO %s (\n %s\n)\n" % ( cls.INSERT, escaped(cls.__tablename__), ",\n ".join(escaped(column.name) for column in columns)) batches = cls.batch(instances) for iteration, batch in enumerate(batches, start=1): log.debug('Inserting batch %s' % (iteration), extra={"table": cls.__tablename__}) insert_statement = sql link = "VALUES" for instance in batch: values = [] for column in columns: value = instance[column.name] values.append(dump(value)) insert_statement += link + (" (\n %s\n)" % ",\n ".join(values)) link = "," for i in range(1, 3): connection = Warehouse.get() try: cursor = connection.cursor() cursor.execute(insert_statement) cursor.close() except Exception as e: classify_error(e) if e.__class__ == BrokenPipeError and i == 1: log.info( 'Trying once more with a fresh connection', extra={"table": cls.__tablename__} ) connection.close() else: log.error(e) return else: connection.commit() break log.debug('Finished updating %s' % cls.__tablename__, extra={"table": cls.__tablename__})
def insert(cls, *instances): """ Insert fact instances (overridden to handle Dimensions correctly) """ if instances: columns = [column for column in cls.__columns__ if not isinstance(column, AutoColumn)] sql = "%s INTO %s (\n %s\n)\n" % ( cls.INSERT, escaped(cls.__tablename__), ",\n ".join(escaped(column.name) for column in columns)) batches = cls.batch(instances) for iteration, batch in enumerate(batches, start=1): log.debug('Inserting batch %s' % (iteration), extra={"table": cls.__tablename__}) insert_statement = sql link = "VALUES" for instance in batch: values = [] for column in columns: value = instance[column.name] if isinstance(column, DimensionKey): if not value and column.optional: values.append(dump(value)) else: values.append( "(%s)" % column.dimension.__subquery__( value, instance.__dimension_selector__.timestamp(instance) # TODO This is a bit messy - shouldn't have to pass the instance back in. ) ) else: values.append(dump(value)) insert_statement += link + (" (\n %s\n)" % ",\n ".join(values)) link = "," connection = Warehouse.get() try: with closing(connection.cursor()) as cursor: cursor.execute(insert_statement) except Exception as e: classify_error(e) log.error(e) log.error(insert_statement) connection.rollback() else: connection.commit()