Example #1
0
    def distinct(self, *args, **_filter):
        """Return all the unique (distinct) values for the given ``columns``.
        ::

            # returns only one row per year, ignoring the rest
            table.distinct('year')
            # works with multiple columns, too
            table.distinct('year', 'country')
            # you can also combine this with a filter
            table.distinct('year', country='China')
        """
        if not self.exists:
            return iter([])

        columns = []
        clauses = []
        for column in args:
            if isinstance(column, ClauseElement):
                clauses.append(column)
            else:
                if not self.has_column(column):
                    raise DatasetException("No such column: %s" % column)
                columns.append(self.table.c[column])

        clause = self._args_to_clause(_filter, clauses=clauses)
        if not len(columns):
            return iter([])

        q = expression.select(columns,
                              distinct=True,
                              whereclause=clause,
                              order_by=[c.asc() for c in columns])
        return self.db.query(q)
Example #2
0
 def _sync_table(self, columns):
     """Lazy load, create or adapt the table structure in the database."""
     if self._table is None:
         # Load an existing table from the database.
         self._reflect_table()
     if self._table is None:
         # Create the table with an initial set of columns.
         if not self._auto_create:
             raise DatasetException("Table does not exist: %s" % self.name)
         # Keep the lock scope small because this is run very often.
         with self.db.lock:
             self._threading_warn()
             self._table = SQLATable(self.name,
                                     self.db.metadata,
                                     schema=self.db.schema)
             if self._primary_id is not False:
                 # This can go wrong on DBMS like MySQL and SQLite where
                 # tables cannot have no columns.
                 primary_id = self._primary_id or self.PRIMARY_DEFAULT
                 primary_type = self._primary_type or Types.integer
                 increment = primary_type in [Types.integer, Types.bigint]
                 column = Column(primary_id, primary_type,
                                 primary_key=True,
                                 autoincrement=increment)
                 self._table.append_column(column)
             for column in columns:
                 self._table.append_column(column)
             self._table.create(self.db.executable, checkfirst=True)
     elif len(columns):
         with self.db.lock:
             self._threading_warn()
             for column in columns:
                 if not self.has_column(column.name):
                     self.db.op.add_column(self.name, column, self.db.schema)
             self._reflect_table()
Example #3
0
    def create_index(self, columns, name=None, **kw):
        """Create an index to speed up queries on a table.

        If no ``name`` is given a random name is created.
        ::

            table.create_index(['name', 'country'])
        """
        columns = [self._get_column_name(c) for c in ensure_list(columns)]
        with self.db.lock:
            if not self.exists:
                raise DatasetException("Table has not been created yet.")

            for column in columns:
                if not self.has_column(column):
                    return

            if not self.has_index(columns):
                self._threading_warn()
                name = name or index_name(self.name, columns)
                columns = [self.table.c[c] for c in columns]

                # MySQL crashes out if you try to index very long text fields,
                # apparently. This defines (a somewhat random) prefix that
                # will be captured by the index, after which I assume the engine
                # conducts a more linear scan:
                mysql_length = {}
                for col in columns:
                    if isinstance(col.type, MYSQL_LENGTH_TYPES):
                        mysql_length[col.name] = 10
                kw["mysql_length"] = mysql_length

                idx = Index(name, *columns, **kw)
                idx.create(self.db.executable)
Example #4
0
    def create_table(self,
                     table_name,
                     primary_id='id',
                     primary_type='Integer'):
        """
        Creates a new table. The new table will automatically have an `id` column
        unless specified via optional parameter primary_id, which will be used
        as the primary key of the table. Automatic id is set to be an
        auto-incrementing integer, while the type of custom primary_id can be a
        String or an Integer as specified with primary_type flag. The default
        length of String is 255. The caller can specify the length.
        The caller will be responsible for the uniqueness of manual primary_id.

        This custom id feature is only available via direct create_table call.

        Returns a :py:class:`Table <dataset.Table>` instance.
        ::

            table = db.create_table('population')

            # custom id and type
            table2 = db.create_table('population2', 'age')
            table3 = db.create_table('population3', primary_id='race', primary_type='String')
            # custom length of String
            table4 = db.create_table('population4', primary_id='race', primary_type='String(50)')
        """
        table_name = self._valid_table_name(table_name)
        self._acquire()
        try:
            log.debug("Creating table: %s on %r" % (table_name, self.engine))
            match = re.match(r'^(Integer)$|^(String)(\(\d+\))?$', primary_type)
            if match:
                if match.group(1) == 'Integer':
                    auto_flag = False
                    if primary_id == 'id':
                        auto_flag = True
                    col = Column(primary_id,
                                 Integer,
                                 primary_key=True,
                                 autoincrement=auto_flag)
                elif not match.group(3):
                    col = Column(primary_id, String(255), primary_key=True)
                else:
                    len_string = int(match.group(3)[1:-1])
                    len_string = min(len_string, 255)
                    col = Column(primary_id,
                                 String(len_string),
                                 primary_key=True)
            else:
                raise DatasetException(
                    "The primary_type has to be either 'Integer' or 'String'.")

            table = SQLATable(table_name, self.metadata, schema=self.schema)
            table.append_column(col)
            table.create(self.engine)
            self._tables[table_name] = table
            return Table(self, table)
        finally:
            self._release()
Example #5
0
 def has_index(self, columns):
     """Check if an index exists to cover the given ``columns``."""
     if not self.exists:
         return False
     columns = set([normalize_column_name(c) for c in columns])
     if columns in self._indexes:
         return True
     for column in columns:
         if not self.has_column(column):
             raise DatasetException("Column does not exist: %s" % column)
     indexes = self.db.inspect.get_indexes(self.name, schema=self.db.schema)
     for index in indexes:
         if columns == set(index.get('column_names', [])):
             self._indexes.append(columns)
             return True
     return False
Example #6
0
    def create_table(self,
                     table_name,
                     primary_id='id',
                     primary_type='Integer'):
        """
        Creates a new table. The new table will automatically have an `id` column 
        unless specified via optional parameter primary_id, which will be used 
        as the primary key of the table. Automatic id is set to be an 
        auto-incrementing integer, while the type of custom primary_id can be a 
        Text or an Integer as specified with primary_type flag. 
        The caller will be responsible for the uniqueness of manual primary_id.

        This custom id feature is only available via direct create_table call. 

        Returns a :py:class:`Table <dataset.Table>` instance.
        ::

            table = db.create_table('population')

            # custom id and type
            table2 = db.create_table('population2', 'age')
            table3 = db.create_table('population3', primary_id='race', primary_type='Text')
        """
        self._acquire()
        try:
            log.debug("Creating table: %s on %r" % (table_name, self.engine))
            table = SQLATable(table_name, self.metadata)
            if primary_type is 'Integer':
                auto_flag = False
                if primary_id is 'id':
                    auto_flag = True
                col = Column(primary_id,
                             Integer,
                             primary_key=True,
                             autoincrement=auto_flag)
            elif primary_type is 'Text':
                col = Column(primary_id, Text, primary_key=True)
            else:
                raise DatasetException(
                    "The primary_type has to be either 'Integer' or 'Text'.")

            table.append_column(col)
            table.create(self.engine)
            self._tables[table_name] = table
            return Table(self, table)
        finally:
            self._release()
Example #7
0
    def create_index(self, columns, name=None, **kw):
        """Create an index to speed up queries on a table.

        If no ``name`` is given a random name is created.
        ::

            table.create_index(['name', 'country'])
        """
        columns = [normalize_column_name(c) for c in ensure_tuple(columns)]
        with self.db.lock:
            if not self.exists:
                raise DatasetException("Table has not been created yet.")

            if not self.has_index(columns):
                self._threading_warn()
                name = name or index_name(self.name, columns)
                columns = [self.table.c[c] for c in columns]
                idx = Index(name, *columns, **kw)
                idx.create(self.db.executable)
Example #8
0
 def _check_dropped(self):
     if self._is_dropped:
         raise DatasetException('the table has been dropped. this object should not be used again.')