def visitField_list(self, ctx: SQLParser.Field_listContext): name_to_column = {} foreign_keys = {} primary_key = None # Modified by Dong: remove reversed for field in ctx.field(): if isinstance(field, SQLParser.Normal_fieldContext): name = to_str(field.Identifier()) type_, size = field.type_().accept(self) # not_null = field.getChild(2) == 'NOT' # default = to_str(field.value()) if field.value() else None name_to_column[name] = ColumnInfo(type_, name, size) elif isinstance(field, SQLParser.Foreign_key_fieldContext): field_name, table_name, refer_name = field.accept(self) if field_name in foreign_keys: raise DataBaseError( f'Foreign key named {field_name} is duplicated') foreign_keys[field_name] = table_name, refer_name else: assert isinstance(field, SQLParser.Primary_key_fieldContext) names = field.accept(self) for name in names: if name not in name_to_column: raise DataBaseError(f'Unknown field {name} field list') if primary_key: raise DataBaseError('Only one primary key supported') primary_key = names return list(name_to_column.values()), foreign_keys, primary_key
def encode(size_list, type_list, total_size, value_list): if len(value_list) != len(size_list): raise DataBaseError( f'length of value ({len(value_list)}) != length of columns ({len(size_list)})' ) record_data = np.zeros(shape=total_size, dtype=np.uint8) pos = 0 for size_, type_, value_ in zip(size_list, type_list, value_list): if type_ == "VARCHAR": if value_ is None: length = 1 bytes_ = (1, ) else: if not isinstance(value_, str): raise DataBaseError( f"Expect VARCHAR({size_ - 1}) but get {value_} instead" ) bytes_ = (0, ) + tuple(value_.encode()) length = len(bytes_) if length > size_: raise DataBaseError( f"String length {length} exceeds VARCHAR({size_ - 1})" ) record_data[pos:pos + length] = bytes_ record_data[pos + length:pos + size_] = 0 else: record_data[pos:pos + size_] = list( Converter.serialize(value_, type_)) pos += size_ assert pos == total_size return record_data
def cond_join(self, results_map: dict, conditions) -> QueryResult: if self.using_db is None: raise DataBaseError(f"No using database to scan.") join_pair_map = {} def build_join_pair(condition: Condition): if condition.target_table and condition.table_name != condition.target_table: if condition.operator != '=': raise DataBaseError( 'Comparison between different tables must be "="') pair = (condition.table_name, condition.column_name), (condition.target_table, condition.target_column) return tuple(zip(*sorted(pair))) return None, None for join_pair_key, join_pair_col in map(build_join_pair, conditions): if join_pair_col is None: continue if join_pair_key in join_pair_map: join_pair_map[join_pair_key][0].append(join_pair_col[0]) join_pair_map[join_pair_key][1].append(join_pair_col[1]) else: join_pair_map[join_pair_key] = ([join_pair_col[0]], [join_pair_col[1]]) if not join_pair_map: raise DataBaseError('Join tables need join condition') union_set = {key: key for key in results_map.keys()} def union_set_find(x): if x != union_set[x]: union_set[x] = union_set_find(union_set[x]) return union_set[x] def union_set_union(x, y): x = union_set_find(x) y = union_set_find(y) union_set[x] = y results = None for join_pair in join_pair_map: outer: QueryResult = results_map[join_pair[0]] inner: QueryResult = results_map[join_pair[1]] outer_joined = tuple(join_pair[0] + "." + col for col in join_pair_map[join_pair][0]) inner_joined = tuple(join_pair[1] + "." + col for col in join_pair_map[join_pair][1]) new_result = nested_loops_join(outer, inner, outer_joined, inner_joined) union_set_union(join_pair[0], join_pair[1]) new_key = union_set_find(join_pair[0]) results_map[new_key] = new_result results = new_result return results
def drop_index(self, index_name): if self.using_db is None: raise DataBaseError(f"No using database to create index") meta_handle = self._MM.open_meta(self.using_db) table_name, column_name = meta_handle.get_index_info(index_name) table_info = meta_handle.get_table(table_name) if not meta_handle.exists_index(index_name): raise DataBaseError(f"Indexes {index_name} not exists.") table_info.drop_index(column_name) meta_handle.drop_index(index_name) self._MM.close_meta(self.using_db)
def check_value_map(self, value_map: dict): for column_name, value in value_map.items(): column: ColumnInfo = self.column_map.get(column_name) if column is None: raise DataBaseError(f'Field {column_name} is unknown') if type(value) not in ACCEPT_TYPE[column.type]: raise DataBaseError( f'Field {column_name} expects {column.type} bug get {value} instead' ) if column.type == 'DATE': value_map[column_name] = Converter.parse_date(value)
def result_to_value(result: QueryResult, is_in): if len(result.headers) > 1: raise DataBaseError('Recursive select must return one column') value = sum(result.data, ()) if not is_in: if len(result.data) != 1: raise DataBaseError( f'One value of {result.headers[0]} expected but got {len(result.data)}' ) value, = value return value
def set_table_name(item, table_name_attr, column_name_attr): _table = getattr(item, table_name_attr) _column = getattr(item, column_name_attr) if _column is None: return if _table is None: tables = column_to_table[_column] if len(tables) > 1: raise DataBaseError( f'Field {_column} is ambiguous when joining on tables ' ) if not tables: raise DataBaseError(f'Field {_column} is unknown') setattr(item, table_name_attr, tables[0])
def rename_table(self, old_name, new_name): if self.using_db is None: raise DataBaseError(f"No using database to rename table") meta_handle = self._MM.open_meta(self.using_db) meta_handle.rename_table(old_name, new_name) self._RM.rename_file(self.get_table_path(old_name), self.get_table_path(new_name))
def drop_column(self, table_name, column_name): meta_handle, table_info = self.get_table_info(table_name, "drop column") if table_info.get_col_index(column_name) is None: raise DataBaseError(f"Column not exists.") index = table_info.get_col_index(column_name) old_table_info = deepcopy(table_info) meta_handle.drop_column(table_name, column_name) self._RM.create_file(self.get_table_path(table_name + ".copy"), table_info.total_size) record_handle = self._RM.open_file(self.get_table_path(table_name)) new_record_handle = self._RM.open_file( self.get_table_path(table_name + ".copy")) scanner = FileScan(record_handle) for record in scanner: value_list = list(old_table_info.load_record(record)) value_list.pop(index) data = table_info.build_record(value_list) new_record_handle.insert_record(data) self._RM.close_file(self.get_table_path(table_name)) self._RM.close_file(self.get_table_path(table_name + ".copy")) # Rename self._RM.replace_file(self.get_table_path(table_name + ".copy"), self.get_table_path(table_name))
def add_column(self, table_name, column_info: ColumnInfo): meta_handle, table_info = self.get_table_info(table_name, "add column") if table_info.get_col_index(column_info.name) is not None: raise DataBaseError(f"Column already exists.") old_table_info = deepcopy(table_info) meta_handle.add_col(table_name, column_info) self._RM.create_file(self.get_table_path(table_name + ".copy"), table_info.total_size) record_handle = self._RM.open_file(self.get_table_path(table_name)) new_record_handle = self._RM.open_file( self.get_table_path(table_name + ".copy")) scanner = FileScan(record_handle) for record in scanner: value_list = list(old_table_info.load_record(record)) if column_info.default is not None: value_list.append(column_info.default) else: value_list.append(None) data = table_info.build_record(value_list) new_record_handle.insert_record(data) self._RM.close_file(self.get_table_path(table_name)) self._RM.close_file(self.get_table_path(table_name + ".copy")) # Rename self._RM.replace_file(self.get_table_path(table_name + ".copy"), self.get_table_path(table_name))
def show_tables(self): if self.using_db is None: raise DataBaseError(f"No using database to show tables") return [ file.stem for file in (self._base_path / self.using_db).iterdir() if file.suffix == '.table' ]
def create_db(self, name): if name in self.dbs: raise DataBaseError(f"Can't create existing database {name}") db_path = self.get_db_path(name) assert not db_path.exists() db_path.mkdir(parents=True) self.dbs.add(name)
def parse_date(value: str): value = value.replace("/", "-") value_list = value.split("-") try: return date(*map(int, value_list)) # return parse_date(value).date() except (TypeError, AttributeError, ValueError, TypeError): raise DataBaseError(f"Expect DATE but get {value} instead")
def build_condition_func(condition: Condition): if condition.table_name and condition.table_name != table_name: return None cond_index = table_info.get_col_index(condition.column_name) if cond_index is None: raise DataBaseError( f'Field {condition.column_name} for table {table_name} is unknown' ) type_ = table_info.type_list[cond_index] if condition.type == ConditionType.Compare: if condition.target_column: if condition.target_table != table_name: return None cond_index_2 = table_info.get_col_index( condition.target_column) return compare_to_attr(cond_index, condition.operator, cond_index_2) else: value = condition.value if type_ in ('INT', 'FLOAT'): if not isinstance(value, (int, float)): raise DataBaseError( f"Expect {type_} but get '{value}' instead") elif type_ == 'DATE': value = Converter.parse_date(value) elif type_ == 'VARCHAR': if not isinstance(value, str): raise DataBaseError( f'Expect VARCHAR but get {value} instead') return compare_to_value(cond_index, condition.operator, value) elif condition.type == ConditionType.In: values = condition.value if type_ == 'DATE': values = tuple(map(Converter.parse_date, values)) return in_value_list(cond_index, values) elif condition.type == ConditionType.Like: if type_ != 'VARCHAR': raise DataBaseError( f'Like operator expects VARCHAR but get {condition.column_name}:{type_}' ) return like_pattern(cond_index, condition.value) elif condition.type == ConditionType.Null: assert isinstance(condition.value, bool) return null_check(cond_index, condition.value)
def build_join_pair(condition: Condition): if condition.target_table and condition.table_name != condition.target_table: if condition.operator != '=': raise DataBaseError( 'Comparison between different tables must be "="') pair = (condition.table_name, condition.column_name), (condition.target_table, condition.target_column) return tuple(zip(*sorted(pair))) return None, None
def create_index(self, index_name, table_name, column_name): meta_handle, table_info = self.get_table_info(table_name, "create index") if meta_handle.exists_index(index_name): raise DataBaseError(f"Indexes {index_name} not exists.") if table_info.exists_index(column_name): meta_handle.create_index(index_name, table_name, column_name) return index = self._IM.create_index(self.using_db, table_name) table_info.create_index(column_name, index.root_id) col_id = table_info.get_col_index(column_name) if col_id is None: raise DataBaseError(f"Column not exists.") record_handle = self._RM.open_file(self.get_table_path(table_name)) scanner = FileScan(record_handle) for record in scanner: data = table_info.load_record(record) key = data[col_id] index.insert(key, record.rid) meta_handle.create_index(index_name, table_name, column_name)
def rename_table(self, old_name, new_name): if old_name not in self._db_info._tbMap: raise DataBaseError(f"Table {old_name} not in database.") tbInfo = self._db_info._tbMap[old_name] self._db_info._tbMap.pop(old_name) self._db_info._tbMap[new_name] = tbInfo for index_name in self._db_info._index_map: pair = self._db_info._index_map[index_name] if pair[0] == old_name: self._db_info._index_map[index_name] = (new_name, pair[1]) self._dump()
def create_table(self, tb_info: TableInfo): if self.using_db is None: raise DataBaseError(f"No using database to create table") meta_handle = self._MM.open_meta(self.using_db) meta_handle.add_table(tb_info) ''' # DEBUG INFO for i in tb_info._colindex.keys(): print(i._name, ":", tb_info._colindex[i]) ''' record_length = tb_info.total_size self._RM.create_file(self.get_table_path(tb_info._name), record_length)
def serialize(value_, type_: str): if type_ == "INT": if value_ is None: value_ = settings.NULL_VALUE elif not isinstance(value_, int): raise DataBaseError(f"Expect INT but get {value_} instead") return struct.pack('<q', value_) elif type_ == "FLOAT": if value_ is None: value_ = settings.NULL_VALUE elif not isinstance(value_, Number): raise DataBaseError(f"Expect FLOAT but get {value_} instead") return struct.pack('<d', value_) elif type_ == "DATE": if value_ is None: day = settings.NULL_VALUE else: day = Converter.parse_date(value_).toordinal() return struct.pack('<q', day) else: raise DataBaseError("Unsupported type.")
def deserialize(data: np.ndarray, type_): if type_ == "VARCHAR": value = None if data[0] else data.tobytes()[1:].rstrip( b'\x00').decode('utf-8') elif type_ == "INT": value = struct.unpack('<q', data)[0] elif type_ == "FLOAT": value = struct.unpack('<d', data)[0] elif type_ == "DATE": value = struct.unpack('<q', data)[0] if value > 0: value = date.fromordinal(value) else: raise DataBaseError("Unsupported type.") return None if value == settings.NULL_VALUE else value
def drop_db(self, name): if name not in self.dbs: raise DataBaseError(f"Can't drop non-existing database {name}") db_path = self.get_db_path(name) assert db_path.exists() self._IM.close_handler(name) self._MM.close_meta(name) for each in db_path.iterdir(): if each.suffix == settings.TABLE_FILE_SUFFIX: self._RM.close_file(str(each)) if each.suffix == settings.INDEX_FILE_SUFFIX: pass each.unlink() db_path.rmdir() self.dbs.remove(name) if self.using_db == name: self.using_db = None return QueryResult(change_db='None')
def use_db(self, name): if name not in self.dbs: raise DataBaseError(f"Can't use not existing database {name}") self.using_db = name return QueryResult(change_db=name)
def get_index_info(self, index_name): if index_name not in self._index_map: raise DataBaseError("Index name not exists.") return self._index_map[index_name]
def drop_index(self, index_name): if index_name not in self._index_map: raise DataBaseError("Index name not exists.") self._index_map.pop(index_name)
def create_index(self, index_name, database_name, column_name): if index_name in self._index_map: raise DataBaseError("Index name already exists.") self._index_map[index_name] = (database_name, column_name)
def drop_table(self, table_name): if self.using_db is None: raise DataBaseError(f"No using database to create table") meta_handle = self._MM.open_meta(self.using_db) meta_handle.drop_table(table_name) self._RM.remove_file(self.get_table_path(table_name))
def get_table_info(self, table_name, error="execute sql"): if self.using_db is None: raise DataBaseError(f"No using database to {error}") meta_handle = self._MM.open_meta(self.using_db) table_info = meta_handle.get_table(table_name) return meta_handle, table_info
def select_records(self, selectors: Tuple[Selector], table_names: Tuple[str, ...], conditions: Tuple[Condition], group_by: Tuple[str, str]) -> QueryResult: def get_selected_data(column_to_data): column_to_data['*.*'] = next(iter(column_to_data.values())) return tuple( map( lambda selector: selector.select(column_to_data[ selector.target()]), selectors)) def set_table_name(item, table_name_attr, column_name_attr): _table = getattr(item, table_name_attr) _column = getattr(item, column_name_attr) if _column is None: return if _table is None: tables = column_to_table[_column] if len(tables) > 1: raise DataBaseError( f'Field {_column} is ambiguous when joining on tables ' ) if not tables: raise DataBaseError(f'Field {_column} is unknown') setattr(item, table_name_attr, tables[0]) if self.using_db is None: raise DataBaseError(f"No using database to select.") group_table, group_column = group_by meta = self._MM.open_meta(self.using_db) column_to_table = meta.build_column_to_table_map(table_names) for each in conditions + selectors: if isinstance(each, Condition): set_table_name(each, 'target_table', 'target_column') set_table_name(each, 'table_name', 'column_name') group_table = group_table or table_names[0] group_by = group_table + '.' + group_column types = set(selector.type for selector in selectors) if not group_column and SelectorType.Field in types and len(types) > 1: raise DataBaseError( "Select without group by shouldn't contain both field and aggregations" ) if not selectors and not group_column and len( table_names ) == 1 and selectors[0].type == SelectorType.Counter: # COUNT(*) can has a shortcut from table.header['record_number'] file = self._RM.open_file(self.get_table_path(table_names[0])) data = (file.header['record_number'], ) headers = (selectors[0].to_string(False), ) return QueryResult(headers, data) result_map = { table_name: self.cond_scan_index(table_name, conditions) for table_name in table_names } result = result_map[table_names[0]] if len( table_names) == 1 else self.cond_join(result_map, conditions) prefix = len(table_names) > 1 if group_column: def make_row(group): _data_map = { _header: _data for _header, _data in zip(result.headers, zip(*group)) } return get_selected_data(_data_map) index = result.get_header_index(group_by) groups = defaultdict(list) for row in result.data: groups[row[index]].append(row) if selectors[0].type == SelectorType.All: assert len(selectors) == 1 data = tuple(group[0] for group in groups.values()) return QueryResult(result.headers, data) data = tuple(map(make_row, groups.values())) else: if selectors[0].type == SelectorType.All: assert len(selectors) == 1 return result if SelectorType.Field in types: # No aggregation def take_columns(_row): return tuple(_row[each] for each in indexes) headers = tuple(selector.target() for selector in selectors) indexes = tuple( result.get_header_index(header) for header in headers) data = tuple(map(take_columns, result.data)) else: # Only aggregations if not result.data: data = (None, ) * len(result.headers) else: data_map = { _header: _data for _header, _data in zip(result.headers, zip(*result.data)) } data = get_selected_data(data_map), # Reset headers regarding to prefix headers = tuple(selector.to_string(prefix) for selector in selectors) return QueryResult(headers, data)
def rename_index(self, old_index, new_index): if self.using_db is None: raise DataBaseError(f"No using database to remove index") meta_handler = self._MM.open_meta(self.using_db) meta_handler.rename_index(old_index, new_index)
def get_table(self, tbname) -> TableInfo: # print(tbname) if tbname not in self._db_info._tbMap: raise DataBaseError(f"There is not table named {tbname}") return self._db_info._tbMap[tbname]