Example #1
0
 def visitField_list(self, ctx: SQLParser.Field_listContext):
     name_to_column = {}
     foreign_keys = {}
     primary_key = None
     # Modified by Dong: remove reversed
     for field in ctx.field():
         if isinstance(field, SQLParser.Normal_fieldContext):
             name = to_str(field.Identifier())
             type_, size = field.type_().accept(self)
             # not_null = field.getChild(2) == 'NOT'
             # default = to_str(field.value()) if field.value() else None
             name_to_column[name] = ColumnInfo(type_, name, size)
         elif isinstance(field, SQLParser.Foreign_key_fieldContext):
             field_name, table_name, refer_name = field.accept(self)
             if field_name in foreign_keys:
                 raise DataBaseError(
                     f'Foreign key named {field_name} is duplicated')
             foreign_keys[field_name] = table_name, refer_name
         else:
             assert isinstance(field, SQLParser.Primary_key_fieldContext)
             names = field.accept(self)
             for name in names:
                 if name not in name_to_column:
                     raise DataBaseError(f'Unknown field {name} field list')
             if primary_key:
                 raise DataBaseError('Only one primary key supported')
             primary_key = names
     return list(name_to_column.values()), foreign_keys, primary_key
Example #2
0
 def encode(size_list, type_list, total_size, value_list):
     if len(value_list) != len(size_list):
         raise DataBaseError(
             f'length of value ({len(value_list)}) != length of columns ({len(size_list)})'
         )
     record_data = np.zeros(shape=total_size, dtype=np.uint8)
     pos = 0
     for size_, type_, value_ in zip(size_list, type_list, value_list):
         if type_ == "VARCHAR":
             if value_ is None:
                 length = 1
                 bytes_ = (1, )
             else:
                 if not isinstance(value_, str):
                     raise DataBaseError(
                         f"Expect VARCHAR({size_ - 1}) but get {value_} instead"
                     )
                 bytes_ = (0, ) + tuple(value_.encode())
                 length = len(bytes_)
                 if length > size_:
                     raise DataBaseError(
                         f"String length {length} exceeds VARCHAR({size_ - 1})"
                     )
             record_data[pos:pos + length] = bytes_
             record_data[pos + length:pos + size_] = 0
         else:
             record_data[pos:pos + size_] = list(
                 Converter.serialize(value_, type_))
         pos += size_
     assert pos == total_size
     return record_data
Example #3
0
    def cond_join(self, results_map: dict, conditions) -> QueryResult:
        if self.using_db is None:
            raise DataBaseError(f"No using database to scan.")
        join_pair_map = {}

        def build_join_pair(condition: Condition):
            if condition.target_table and condition.table_name != condition.target_table:
                if condition.operator != '=':
                    raise DataBaseError(
                        'Comparison between different tables must be "="')
                pair = (condition.table_name,
                        condition.column_name), (condition.target_table,
                                                 condition.target_column)
                return tuple(zip(*sorted(pair)))
            return None, None

        for join_pair_key, join_pair_col in map(build_join_pair, conditions):
            if join_pair_col is None:
                continue
            if join_pair_key in join_pair_map:
                join_pair_map[join_pair_key][0].append(join_pair_col[0])
                join_pair_map[join_pair_key][1].append(join_pair_col[1])
            else:
                join_pair_map[join_pair_key] = ([join_pair_col[0]],
                                                [join_pair_col[1]])

        if not join_pair_map:
            raise DataBaseError('Join tables need join condition')
        union_set = {key: key for key in results_map.keys()}

        def union_set_find(x):
            if x != union_set[x]:
                union_set[x] = union_set_find(union_set[x])
            return union_set[x]

        def union_set_union(x, y):
            x = union_set_find(x)
            y = union_set_find(y)
            union_set[x] = y

        results = None
        for join_pair in join_pair_map:
            outer: QueryResult = results_map[join_pair[0]]
            inner: QueryResult = results_map[join_pair[1]]
            outer_joined = tuple(join_pair[0] + "." + col
                                 for col in join_pair_map[join_pair][0])
            inner_joined = tuple(join_pair[1] + "." + col
                                 for col in join_pair_map[join_pair][1])
            new_result = nested_loops_join(outer, inner, outer_joined,
                                           inner_joined)
            union_set_union(join_pair[0], join_pair[1])
            new_key = union_set_find(join_pair[0])
            results_map[new_key] = new_result
            results = new_result
        return results
Example #4
0
 def drop_index(self, index_name):
     if self.using_db is None:
         raise DataBaseError(f"No using database to create index")
     meta_handle = self._MM.open_meta(self.using_db)
     table_name, column_name = meta_handle.get_index_info(index_name)
     table_info = meta_handle.get_table(table_name)
     if not meta_handle.exists_index(index_name):
         raise DataBaseError(f"Indexes {index_name} not exists.")
     table_info.drop_index(column_name)
     meta_handle.drop_index(index_name)
     self._MM.close_meta(self.using_db)
Example #5
0
File: info.py Project: rcy17/pybase
 def check_value_map(self, value_map: dict):
     for column_name, value in value_map.items():
         column: ColumnInfo = self.column_map.get(column_name)
         if column is None:
             raise DataBaseError(f'Field {column_name} is unknown')
         if type(value) not in ACCEPT_TYPE[column.type]:
             raise DataBaseError(
                 f'Field {column_name} expects {column.type} bug get {value} instead'
             )
         if column.type == 'DATE':
             value_map[column_name] = Converter.parse_date(value)
Example #6
0
 def result_to_value(result: QueryResult, is_in):
     if len(result.headers) > 1:
         raise DataBaseError('Recursive select must return one column')
     value = sum(result.data, ())
     if not is_in:
         if len(result.data) != 1:
             raise DataBaseError(
                 f'One value of {result.headers[0]} expected but got {len(result.data)}'
             )
         value, = value
     return value
Example #7
0
 def set_table_name(item, table_name_attr, column_name_attr):
     _table = getattr(item, table_name_attr)
     _column = getattr(item, column_name_attr)
     if _column is None:
         return
     if _table is None:
         tables = column_to_table[_column]
         if len(tables) > 1:
             raise DataBaseError(
                 f'Field {_column} is ambiguous when joining on tables '
             )
         if not tables:
             raise DataBaseError(f'Field {_column} is unknown')
         setattr(item, table_name_attr, tables[0])
Example #8
0
 def rename_table(self, old_name, new_name):
     if self.using_db is None:
         raise DataBaseError(f"No using database to rename table")
     meta_handle = self._MM.open_meta(self.using_db)
     meta_handle.rename_table(old_name, new_name)
     self._RM.rename_file(self.get_table_path(old_name),
                          self.get_table_path(new_name))
Example #9
0
    def drop_column(self, table_name, column_name):
        meta_handle, table_info = self.get_table_info(table_name,
                                                      "drop column")
        if table_info.get_col_index(column_name) is None:
            raise DataBaseError(f"Column not exists.")
        index = table_info.get_col_index(column_name)
        old_table_info = deepcopy(table_info)
        meta_handle.drop_column(table_name, column_name)

        self._RM.create_file(self.get_table_path(table_name + ".copy"),
                             table_info.total_size)
        record_handle = self._RM.open_file(self.get_table_path(table_name))
        new_record_handle = self._RM.open_file(
            self.get_table_path(table_name + ".copy"))
        scanner = FileScan(record_handle)
        for record in scanner:
            value_list = list(old_table_info.load_record(record))
            value_list.pop(index)
            data = table_info.build_record(value_list)
            new_record_handle.insert_record(data)
        self._RM.close_file(self.get_table_path(table_name))
        self._RM.close_file(self.get_table_path(table_name + ".copy"))
        # Rename
        self._RM.replace_file(self.get_table_path(table_name + ".copy"),
                              self.get_table_path(table_name))
Example #10
0
    def add_column(self, table_name, column_info: ColumnInfo):
        meta_handle, table_info = self.get_table_info(table_name, "add column")
        if table_info.get_col_index(column_info.name) is not None:
            raise DataBaseError(f"Column already exists.")
        old_table_info = deepcopy(table_info)
        meta_handle.add_col(table_name, column_info)

        self._RM.create_file(self.get_table_path(table_name + ".copy"),
                             table_info.total_size)
        record_handle = self._RM.open_file(self.get_table_path(table_name))
        new_record_handle = self._RM.open_file(
            self.get_table_path(table_name + ".copy"))
        scanner = FileScan(record_handle)
        for record in scanner:
            value_list = list(old_table_info.load_record(record))
            if column_info.default is not None:
                value_list.append(column_info.default)
            else:
                value_list.append(None)
            data = table_info.build_record(value_list)
            new_record_handle.insert_record(data)
        self._RM.close_file(self.get_table_path(table_name))
        self._RM.close_file(self.get_table_path(table_name + ".copy"))
        # Rename
        self._RM.replace_file(self.get_table_path(table_name + ".copy"),
                              self.get_table_path(table_name))
Example #11
0
 def show_tables(self):
     if self.using_db is None:
         raise DataBaseError(f"No using database to show tables")
     return [
         file.stem for file in (self._base_path / self.using_db).iterdir()
         if file.suffix == '.table'
     ]
Example #12
0
 def create_db(self, name):
     if name in self.dbs:
         raise DataBaseError(f"Can't create existing database {name}")
     db_path = self.get_db_path(name)
     assert not db_path.exists()
     db_path.mkdir(parents=True)
     self.dbs.add(name)
Example #13
0
 def parse_date(value: str):
     value = value.replace("/", "-")
     value_list = value.split("-")
     try:
         return date(*map(int, value_list))
         # return parse_date(value).date()
     except (TypeError, AttributeError, ValueError, TypeError):
         raise DataBaseError(f"Expect DATE but get {value} instead")
Example #14
0
 def build_condition_func(condition: Condition):
     if condition.table_name and condition.table_name != table_name:
         return None
     cond_index = table_info.get_col_index(condition.column_name)
     if cond_index is None:
         raise DataBaseError(
             f'Field {condition.column_name} for table {table_name} is unknown'
         )
     type_ = table_info.type_list[cond_index]
     if condition.type == ConditionType.Compare:
         if condition.target_column:
             if condition.target_table != table_name:
                 return None
             cond_index_2 = table_info.get_col_index(
                 condition.target_column)
             return compare_to_attr(cond_index, condition.operator,
                                    cond_index_2)
         else:
             value = condition.value
             if type_ in ('INT', 'FLOAT'):
                 if not isinstance(value, (int, float)):
                     raise DataBaseError(
                         f"Expect {type_} but get '{value}' instead")
             elif type_ == 'DATE':
                 value = Converter.parse_date(value)
             elif type_ == 'VARCHAR':
                 if not isinstance(value, str):
                     raise DataBaseError(
                         f'Expect VARCHAR but get {value} instead')
             return compare_to_value(cond_index, condition.operator,
                                     value)
     elif condition.type == ConditionType.In:
         values = condition.value
         if type_ == 'DATE':
             values = tuple(map(Converter.parse_date, values))
         return in_value_list(cond_index, values)
     elif condition.type == ConditionType.Like:
         if type_ != 'VARCHAR':
             raise DataBaseError(
                 f'Like operator expects VARCHAR but get {condition.column_name}:{type_}'
             )
         return like_pattern(cond_index, condition.value)
     elif condition.type == ConditionType.Null:
         assert isinstance(condition.value, bool)
         return null_check(cond_index, condition.value)
Example #15
0
 def build_join_pair(condition: Condition):
     if condition.target_table and condition.table_name != condition.target_table:
         if condition.operator != '=':
             raise DataBaseError(
                 'Comparison between different tables must be "="')
         pair = (condition.table_name,
                 condition.column_name), (condition.target_table,
                                          condition.target_column)
         return tuple(zip(*sorted(pair)))
     return None, None
Example #16
0
 def create_index(self, index_name, table_name, column_name):
     meta_handle, table_info = self.get_table_info(table_name,
                                                   "create index")
     if meta_handle.exists_index(index_name):
         raise DataBaseError(f"Indexes {index_name} not exists.")
     if table_info.exists_index(column_name):
         meta_handle.create_index(index_name, table_name, column_name)
         return
     index = self._IM.create_index(self.using_db, table_name)
     table_info.create_index(column_name, index.root_id)
     col_id = table_info.get_col_index(column_name)
     if col_id is None:
         raise DataBaseError(f"Column not exists.")
     record_handle = self._RM.open_file(self.get_table_path(table_name))
     scanner = FileScan(record_handle)
     for record in scanner:
         data = table_info.load_record(record)
         key = data[col_id]
         index.insert(key, record.rid)
     meta_handle.create_index(index_name, table_name, column_name)
Example #17
0
 def rename_table(self, old_name, new_name):
     if old_name not in self._db_info._tbMap:
         raise DataBaseError(f"Table {old_name} not in database.")
     tbInfo = self._db_info._tbMap[old_name]
     self._db_info._tbMap.pop(old_name)
     self._db_info._tbMap[new_name] = tbInfo
     for index_name in self._db_info._index_map:
         pair = self._db_info._index_map[index_name]
         if pair[0] == old_name:
             self._db_info._index_map[index_name] = (new_name, pair[1])
     self._dump()
Example #18
0
 def create_table(self, tb_info: TableInfo):
     if self.using_db is None:
         raise DataBaseError(f"No using database to create table")
     meta_handle = self._MM.open_meta(self.using_db)
     meta_handle.add_table(tb_info)
     '''
     # DEBUG INFO
     for i in tb_info._colindex.keys():
         print(i._name, ":", tb_info._colindex[i])
     '''
     record_length = tb_info.total_size
     self._RM.create_file(self.get_table_path(tb_info._name), record_length)
Example #19
0
 def serialize(value_, type_: str):
     if type_ == "INT":
         if value_ is None:
             value_ = settings.NULL_VALUE
         elif not isinstance(value_, int):
             raise DataBaseError(f"Expect INT but get {value_} instead")
         return struct.pack('<q', value_)
     elif type_ == "FLOAT":
         if value_ is None:
             value_ = settings.NULL_VALUE
         elif not isinstance(value_, Number):
             raise DataBaseError(f"Expect FLOAT but get {value_} instead")
         return struct.pack('<d', value_)
     elif type_ == "DATE":
         if value_ is None:
             day = settings.NULL_VALUE
         else:
             day = Converter.parse_date(value_).toordinal()
         return struct.pack('<q', day)
     else:
         raise DataBaseError("Unsupported type.")
Example #20
0
 def deserialize(data: np.ndarray, type_):
     if type_ == "VARCHAR":
         value = None if data[0] else data.tobytes()[1:].rstrip(
             b'\x00').decode('utf-8')
     elif type_ == "INT":
         value = struct.unpack('<q', data)[0]
     elif type_ == "FLOAT":
         value = struct.unpack('<d', data)[0]
     elif type_ == "DATE":
         value = struct.unpack('<q', data)[0]
         if value > 0:
             value = date.fromordinal(value)
     else:
         raise DataBaseError("Unsupported type.")
     return None if value == settings.NULL_VALUE else value
Example #21
0
 def drop_db(self, name):
     if name not in self.dbs:
         raise DataBaseError(f"Can't drop non-existing database {name}")
     db_path = self.get_db_path(name)
     assert db_path.exists()
     self._IM.close_handler(name)
     self._MM.close_meta(name)
     for each in db_path.iterdir():
         if each.suffix == settings.TABLE_FILE_SUFFIX:
             self._RM.close_file(str(each))
         if each.suffix == settings.INDEX_FILE_SUFFIX:
             pass
         each.unlink()
     db_path.rmdir()
     self.dbs.remove(name)
     if self.using_db == name:
         self.using_db = None
         return QueryResult(change_db='None')
Example #22
0
 def use_db(self, name):
     if name not in self.dbs:
         raise DataBaseError(f"Can't use not existing database {name}")
     self.using_db = name
     return QueryResult(change_db=name)
Example #23
0
File: info.py Project: rcy17/pybase
 def get_index_info(self, index_name):
     if index_name not in self._index_map:
         raise DataBaseError("Index name not exists.")
     return self._index_map[index_name]
Example #24
0
File: info.py Project: rcy17/pybase
 def drop_index(self, index_name):
     if index_name not in self._index_map:
         raise DataBaseError("Index name not exists.")
     self._index_map.pop(index_name)
Example #25
0
File: info.py Project: rcy17/pybase
 def create_index(self, index_name, database_name, column_name):
     if index_name in self._index_map:
         raise DataBaseError("Index name already exists.")
     self._index_map[index_name] = (database_name, column_name)
Example #26
0
 def drop_table(self, table_name):
     if self.using_db is None:
         raise DataBaseError(f"No using database to create table")
     meta_handle = self._MM.open_meta(self.using_db)
     meta_handle.drop_table(table_name)
     self._RM.remove_file(self.get_table_path(table_name))
Example #27
0
 def get_table_info(self, table_name, error="execute sql"):
     if self.using_db is None:
         raise DataBaseError(f"No using database to {error}")
     meta_handle = self._MM.open_meta(self.using_db)
     table_info = meta_handle.get_table(table_name)
     return meta_handle, table_info
Example #28
0
    def select_records(self, selectors: Tuple[Selector],
                       table_names: Tuple[str,
                                          ...], conditions: Tuple[Condition],
                       group_by: Tuple[str, str]) -> QueryResult:
        def get_selected_data(column_to_data):
            column_to_data['*.*'] = next(iter(column_to_data.values()))
            return tuple(
                map(
                    lambda selector: selector.select(column_to_data[
                        selector.target()]), selectors))

        def set_table_name(item, table_name_attr, column_name_attr):
            _table = getattr(item, table_name_attr)
            _column = getattr(item, column_name_attr)
            if _column is None:
                return
            if _table is None:
                tables = column_to_table[_column]
                if len(tables) > 1:
                    raise DataBaseError(
                        f'Field {_column} is ambiguous when joining on tables '
                    )
                if not tables:
                    raise DataBaseError(f'Field {_column} is unknown')
                setattr(item, table_name_attr, tables[0])

        if self.using_db is None:
            raise DataBaseError(f"No using database to select.")
        group_table, group_column = group_by

        meta = self._MM.open_meta(self.using_db)
        column_to_table = meta.build_column_to_table_map(table_names)

        for each in conditions + selectors:
            if isinstance(each, Condition):
                set_table_name(each, 'target_table', 'target_column')
            set_table_name(each, 'table_name', 'column_name')

        group_table = group_table or table_names[0]
        group_by = group_table + '.' + group_column
        types = set(selector.type for selector in selectors)
        if not group_column and SelectorType.Field in types and len(types) > 1:
            raise DataBaseError(
                "Select without group by shouldn't contain both field and aggregations"
            )
        if not selectors and not group_column and len(
                table_names
        ) == 1 and selectors[0].type == SelectorType.Counter:
            # COUNT(*) can has a shortcut from table.header['record_number']
            file = self._RM.open_file(self.get_table_path(table_names[0]))
            data = (file.header['record_number'], )
            headers = (selectors[0].to_string(False), )
            return QueryResult(headers, data)

        result_map = {
            table_name: self.cond_scan_index(table_name, conditions)
            for table_name in table_names
        }
        result = result_map[table_names[0]] if len(
            table_names) == 1 else self.cond_join(result_map, conditions)
        prefix = len(table_names) > 1
        if group_column:

            def make_row(group):
                _data_map = {
                    _header: _data
                    for _header, _data in zip(result.headers, zip(*group))
                }
                return get_selected_data(_data_map)

            index = result.get_header_index(group_by)
            groups = defaultdict(list)
            for row in result.data:
                groups[row[index]].append(row)
            if selectors[0].type == SelectorType.All:
                assert len(selectors) == 1
                data = tuple(group[0] for group in groups.values())
                return QueryResult(result.headers, data)
            data = tuple(map(make_row, groups.values()))
        else:
            if selectors[0].type == SelectorType.All:
                assert len(selectors) == 1
                return result
            if SelectorType.Field in types:  # No aggregation

                def take_columns(_row):
                    return tuple(_row[each] for each in indexes)

                headers = tuple(selector.target() for selector in selectors)
                indexes = tuple(
                    result.get_header_index(header) for header in headers)
                data = tuple(map(take_columns, result.data))
            else:
                # Only aggregations
                if not result.data:
                    data = (None, ) * len(result.headers)
                else:
                    data_map = {
                        _header: _data
                        for _header, _data in zip(result.headers,
                                                  zip(*result.data))
                    }
                    data = get_selected_data(data_map),
        # Reset headers regarding to prefix
        headers = tuple(selector.to_string(prefix) for selector in selectors)
        return QueryResult(headers, data)
Example #29
0
 def rename_index(self, old_index, new_index):
     if self.using_db is None:
         raise DataBaseError(f"No using database to remove index")
     meta_handler = self._MM.open_meta(self.using_db)
     meta_handler.rename_index(old_index, new_index)
Example #30
0
 def get_table(self, tbname) -> TableInfo:
     # print(tbname)
     if tbname not in self._db_info._tbMap:
         raise DataBaseError(f"There is not table named {tbname}")
     return self._db_info._tbMap[tbname]