def distinct(self, *args, **_filter): """Return all the unique (distinct) values for the given ``columns``. :: # returns only one row per year, ignoring the rest table.distinct('year') # works with multiple columns, too table.distinct('year', 'country') # you can also combine this with a filter table.distinct('year', country='China') """ if not self.exists: return iter([]) columns = [] clauses = [] for column in args: if isinstance(column, ClauseElement): clauses.append(column) else: if not self.has_column(column): raise DatasetException("No such column: %s" % column) columns.append(self.table.c[column]) clause = self._args_to_clause(_filter, clauses=clauses) if not len(columns): return iter([]) q = expression.select(columns, distinct=True, whereclause=clause, order_by=[c.asc() for c in columns]) return self.db.query(q)
def _sync_table(self, columns): """Lazy load, create or adapt the table structure in the database.""" if self._table is None: # Load an existing table from the database. self._reflect_table() if self._table is None: # Create the table with an initial set of columns. if not self._auto_create: raise DatasetException("Table does not exist: %s" % self.name) # Keep the lock scope small because this is run very often. with self.db.lock: self._threading_warn() self._table = SQLATable(self.name, self.db.metadata, schema=self.db.schema) if self._primary_id is not False: # This can go wrong on DBMS like MySQL and SQLite where # tables cannot have no columns. primary_id = self._primary_id or self.PRIMARY_DEFAULT primary_type = self._primary_type or Types.integer increment = primary_type in [Types.integer, Types.bigint] column = Column(primary_id, primary_type, primary_key=True, autoincrement=increment) self._table.append_column(column) for column in columns: self._table.append_column(column) self._table.create(self.db.executable, checkfirst=True) elif len(columns): with self.db.lock: self._threading_warn() for column in columns: if not self.has_column(column.name): self.db.op.add_column(self.name, column, self.db.schema) self._reflect_table()
def create_index(self, columns, name=None, **kw): """Create an index to speed up queries on a table. If no ``name`` is given a random name is created. :: table.create_index(['name', 'country']) """ columns = [self._get_column_name(c) for c in ensure_list(columns)] with self.db.lock: if not self.exists: raise DatasetException("Table has not been created yet.") for column in columns: if not self.has_column(column): return if not self.has_index(columns): self._threading_warn() name = name or index_name(self.name, columns) columns = [self.table.c[c] for c in columns] # MySQL crashes out if you try to index very long text fields, # apparently. This defines (a somewhat random) prefix that # will be captured by the index, after which I assume the engine # conducts a more linear scan: mysql_length = {} for col in columns: if isinstance(col.type, MYSQL_LENGTH_TYPES): mysql_length[col.name] = 10 kw["mysql_length"] = mysql_length idx = Index(name, *columns, **kw) idx.create(self.db.executable)
def create_table(self, table_name, primary_id='id', primary_type='Integer'): """ Creates a new table. The new table will automatically have an `id` column unless specified via optional parameter primary_id, which will be used as the primary key of the table. Automatic id is set to be an auto-incrementing integer, while the type of custom primary_id can be a String or an Integer as specified with primary_type flag. The default length of String is 255. The caller can specify the length. The caller will be responsible for the uniqueness of manual primary_id. This custom id feature is only available via direct create_table call. Returns a :py:class:`Table <dataset.Table>` instance. :: table = db.create_table('population') # custom id and type table2 = db.create_table('population2', 'age') table3 = db.create_table('population3', primary_id='race', primary_type='String') # custom length of String table4 = db.create_table('population4', primary_id='race', primary_type='String(50)') """ table_name = self._valid_table_name(table_name) self._acquire() try: log.debug("Creating table: %s on %r" % (table_name, self.engine)) match = re.match(r'^(Integer)$|^(String)(\(\d+\))?$', primary_type) if match: if match.group(1) == 'Integer': auto_flag = False if primary_id == 'id': auto_flag = True col = Column(primary_id, Integer, primary_key=True, autoincrement=auto_flag) elif not match.group(3): col = Column(primary_id, String(255), primary_key=True) else: len_string = int(match.group(3)[1:-1]) len_string = min(len_string, 255) col = Column(primary_id, String(len_string), primary_key=True) else: raise DatasetException( "The primary_type has to be either 'Integer' or 'String'.") table = SQLATable(table_name, self.metadata, schema=self.schema) table.append_column(col) table.create(self.engine) self._tables[table_name] = table return Table(self, table) finally: self._release()
def has_index(self, columns): """Check if an index exists to cover the given ``columns``.""" if not self.exists: return False columns = set([normalize_column_name(c) for c in columns]) if columns in self._indexes: return True for column in columns: if not self.has_column(column): raise DatasetException("Column does not exist: %s" % column) indexes = self.db.inspect.get_indexes(self.name, schema=self.db.schema) for index in indexes: if columns == set(index.get('column_names', [])): self._indexes.append(columns) return True return False
def create_table(self, table_name, primary_id='id', primary_type='Integer'): """ Creates a new table. The new table will automatically have an `id` column unless specified via optional parameter primary_id, which will be used as the primary key of the table. Automatic id is set to be an auto-incrementing integer, while the type of custom primary_id can be a Text or an Integer as specified with primary_type flag. The caller will be responsible for the uniqueness of manual primary_id. This custom id feature is only available via direct create_table call. Returns a :py:class:`Table <dataset.Table>` instance. :: table = db.create_table('population') # custom id and type table2 = db.create_table('population2', 'age') table3 = db.create_table('population3', primary_id='race', primary_type='Text') """ self._acquire() try: log.debug("Creating table: %s on %r" % (table_name, self.engine)) table = SQLATable(table_name, self.metadata) if primary_type is 'Integer': auto_flag = False if primary_id is 'id': auto_flag = True col = Column(primary_id, Integer, primary_key=True, autoincrement=auto_flag) elif primary_type is 'Text': col = Column(primary_id, Text, primary_key=True) else: raise DatasetException( "The primary_type has to be either 'Integer' or 'Text'.") table.append_column(col) table.create(self.engine) self._tables[table_name] = table return Table(self, table) finally: self._release()
def create_index(self, columns, name=None, **kw): """Create an index to speed up queries on a table. If no ``name`` is given a random name is created. :: table.create_index(['name', 'country']) """ columns = [normalize_column_name(c) for c in ensure_tuple(columns)] with self.db.lock: if not self.exists: raise DatasetException("Table has not been created yet.") if not self.has_index(columns): self._threading_warn() name = name or index_name(self.name, columns) columns = [self.table.c[c] for c in columns] idx = Index(name, *columns, **kw) idx.create(self.db.executable)
def _check_dropped(self): if self._is_dropped: raise DatasetException('the table has been dropped. this object should not be used again.')