Esempio n. 1
0
    def _inner_join(self, table_right: Table, condition):
        '''
        Join table (left) with a supplied table (right) where condition is met.
        '''
        # get columns and operator
        column_name_left, operator, column_name_right = self._parse_condition(
            condition, both_columns=True)
        # try to find both columns, if you fail raise error
        try:
            column_index_left = self.column_names.index(column_name_left)
            column_index_right = table_right.column_names.index(
                column_name_right)
        except:
            raise Exception(f'Columns dont exist in one or both tables.')

        # get the column names of both tables with the table name in front
        # ex. for left -> name becomes left_table_name_name etc
        left_names = [
            f'{self._name}_{colname}' for colname in self.column_names
        ]
        right_names = [
            f'{table_right._name}_{colname}'
            for colname in table_right.column_names
        ]

        # define the new tables name, its column names and types
        join_table_name = f'{self._name}_join_{table_right._name}'
        join_table_colnames = left_names + right_names
        join_table_coltypes = self.column_types + table_right.column_types
        join_table = Table(name=join_table_name,
                           column_names=join_table_colnames,
                           column_types=join_table_coltypes)

        # count the number of operations (<,> etc)
        no_of_ops = 0
        # this code is dumb on purpose... it needs to illustrate the underline technique
        # for each value in left column and right column, if condition, append the corresponding row to the new table
        for row_left in self.data:
            left_value = row_left[column_index_left]
            for row_right in table_right.data:
                right_value = row_right[column_index_right]
                no_of_ops += 1
                if get_op(operator, left_value, right_value):  #EQ_OP
                    join_table._insert(row_left + row_right)

        print(f'## Select ops no. -> {no_of_ops}')
        print(f'# Left table size -> {len(self.data)}')
        print(f'# Right table size -> {len(table_right.data)}')

        return join_table
Esempio n. 2
0
    def _select_where_with_btree(self, return_columns, bt, condition, order_by=None, asc=False, top_k=None):

        # if * return all columns, else find the column indexes for the columns specified
        if return_columns == '*':
            return_cols = [i for i in range(len(self.column_names))]
        else:
            return_cols = [self.column_names.index(colname) for colname in return_columns]


        column_name, operator, value = self._parse_condition(condition)

        print("1: ", type(value), " 2: ", self.column_types[self.column_names.index(column_name)])

        # if the column in condition is not a primary key, abort the select
        if column_name != self.column_names[self.pk_idx]:
            print('Column is not PK. Aborting')

        # here we run the same select twice, sequentially and using the btree.
        # we then check the results match and compare performance (number of operation)
        column = self.columns[self.column_names.index(column_name)]

        # sequential
        rows1 = []
        opsseq = 0
        for ind, x in enumerate(column):
            opsseq+=1
            if get_op(operator, x, value):
                rows1.append(ind)

        print(f'Without Btree -> {opsseq} comparison operations')
        # btree find
        rows = bt.find(operator, value)
        print('### Seq result ###')
        print(rows1)
        print('### Index result ###')
        print(rows)

        # same as simple select from now on
        rows = rows[:top_k]
        # TODO: this needs to be dumbed down
        dict = {(key):([[self.data[i][j] for j in return_cols] for i in rows] if key=="data" else value) for key,value in self.__dict__.items()}

        dict['column_names'] = [self.column_names[i] for i in return_cols]
        dict['column_types']   = [self.column_types[i] for i in return_cols]
        dict['_no_of_columns'] = len(return_cols)

        if order_by is None:
            return Table(load=dict)
        else:
            return Table(load=dict).order_by(order_by, asc)
Esempio n. 3
0
    def _update_row(self, set_value, set_column, condition):
        '''
        update where Condition
        '''
        # parse the condition
        column_name, operator, value = self._parse_condition(condition)

        # get the condition and the set column
        column = self.column_by_name(column_name)
        set_column_idx = self.column_names.index(set_column)

        # set_columns_indx = [self.column_names.index(set_column_name) for set_column_name in set_column_names]

        # for each value in column, if condition, replace it with set_value
        for row_ind, column_value in enumerate(column):
            if get_op(operator, column_value, value):
                self.data[row_ind][set_column_idx] = set_value
Esempio n. 4
0
    def _select_where(self, return_columns, condition=None, order_by=None, asc=False, top_k=None):
        '''
        Select and return a table containing specified columns and rows where condition is met
        '''

        # if * return all columns, else find the column indexes for the columns specified
        if return_columns == '*':
            return_cols = [i for i in range(len(self.column_names))]
        elif isinstance(return_columns, str):
            raise Exception(
                f'Return columns should be "*" or of type list. (the second parameter is return_columns not condition)')
        else:
            return_cols = [self.column_names.index(colname) for colname in return_columns]

        # if condition is None, return all rows
        # if not, return the rows with values where condition is met for value
        if condition is not None:
            column_name, operator, value = self._parse_condition(condition)
            column = self.columns[self.column_names.index(column_name)]
            rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)]
        else:
            rows = [i for i in range(len(self.columns[0]))]

        # top k rows
        rows = rows[:top_k]
        # copy the old dict, but only the rows and columns of data with index in rows/columns (the indexes that we want returned)
        dict = {(key): ([[self.data[i][j] for j in return_cols] for i in rows] if key == "data" else value) for
                key, value in self.__dict__.items()}

        # we need to set the new column names/types and no of columns, since we might
        # only return some columns
        dict['column_names'] = [self.column_names[i] for i in return_cols]
        dict['column_types'] = [self.column_types[i] for i in return_cols]
        dict['_no_of_columns'] = len(return_cols)

        # order by the return table if specified
        if order_by is None:
            return Table(load=dict)
        else:
            return Table(load=dict).order_by(order_by, asc)
Esempio n. 5
0
    def _delete_where(self, condition):
        '''
        Deletes rows where condition is met.

        Important: delete replaces the rows to be deleted with rows filled with Nones.
        These rows are then appended to the insert_stack.

        Args:
            condition: string. A condition using the following format:
                'column[<,<=,==,>=,>]value' or
                'value[<,<=,==,>=,>]column'.
                
                Operatores supported: (<,<=,==,>=,>)
        '''
        column_name, operator, value = self._parse_condition(condition)

        indexes_to_del = []

        column = self.column_by_name(column_name)
        for index, row_value in enumerate(column):
            if get_op(operator, row_value, value):
                indexes_to_del.append(index)

        # we pop from highest to lowest index in order to avoid removing the wrong item
        # since we dont delete, we dont have to to pop in that order, but since delete is used
        # to delete from meta tables too, we still implement it.

        for index in sorted(indexes_to_del, reverse=True):
            if self._name[:4] != 'meta':
                # if the table is not a metatable, replace the row with a row of nones
                self.data[index] = [
                    None for _ in range(len(self.column_names))
                ]
            else:
                self.data.pop(index)

        # self._update()
        # we have to return the deleted indexes, since they will be appended to the insert_stack
        return indexes_to_del
Esempio n. 6
0
    def _update_row(self, set_value, set_column, condition):
        '''
        update where Condition
        '''
        # parse the condition
        column_name, operator, value = self._parse_condition(condition)

        # get the condition and the set column
        column = self.columns[self.column_names.index(column_name)]
        if isinstance(set_column, str):
            set_column_idx = self.column_names.index(set_column)
            # set_columns_indx = [self.column_names.index(set_column_name) for set_column_name in set_column_names]

            # check if user tries to set same values for p_k column
            if set_column_idx == self.pk_idx and set_value in self.columns[
                    set_column_idx]:
                raise ValueError(
                    f'## ERROR -> Column {set_column} is primary key and can not have same values.'
                )
            # for each value in column, if condition, replace it with set_value
        for row_ind, column_value in enumerate(column):
            if get_op(operator, column_value, value):
                self.data[row_ind][set_column_idx] = set_value
        self._update()
Esempio n. 7
0
    def _outer_join(self, table_right: Table, condition):
            '''
            Join table (left) with a supplied table (right).
            Show all rows from both tables and match the rows where the condition is met
            '''
            # get columns and operator
            column_name_left, operator, column_name_right = self._parse_condition(condition, join=True)
            # try to find both columns, if you fail raise error
            try:
                column_index_left = self.column_names.index(column_name_left)
                column_index_right = table_right.column_names.index(column_name_right)
            except:
                raise Exception(f'Columns dont exist in one or both tables.')

            # get the column names of both tables with the table name in front
            # ex. for left -> name becomes left_table_name_name etc
            left_names = [f'{self._name}_{colname}' for colname in self.column_names]
            right_names = [f'{table_right._name}_{colname}' for colname in table_right.column_names]

            # define the new tables name, its column names and types
            join_table_name = f'{self._name}_outer_join_{table_right._name}'
            join_table_colnames = left_names+right_names
            join_table_coltypes = self.column_types+table_right.column_types
            join_table = Table(name=join_table_name, column_names=join_table_colnames, column_types= join_table_coltypes)

            # count the number of operations (<,> etc)
            no_of_ops = 0
            # this code is dumb on purpose... it needs to illustrate the underline technique
            # for each value in left column and right column, if condition, append the corresponding row to the new table
            row_null =[]

            #outer join works with a combination of the left and right join
            #ONLY DIFFERENCE when doing the right_join part we dont insert if there is a match. Only when there is not

            for row_left in self.data:
                row_null.clear()
                hasMatch =  False
                left_value = row_left[column_index_left]
                for row_right in table_right.data:
                    right_value = row_right[column_index_right]
                    no_of_ops+=1
                    if get_op(operator, left_value, right_value): #EQ_OP
                        join_table._insert(row_left+row_right)
                        hasMatch = True
                if not hasMatch:
                    for column in table_right.column_types:
                        if column == type(1) or column == type(1.2):
                            row_null.append(0)
                        else:
                            row_null.append(None)
                    join_table._insert(row_left + row_null)

            for row_right in table_right.data:
                row_null.clear()
                hasMatch =  False
                right_value = row_right[column_index_right]
                for row_left in self.data:
                    left_value = row_left[column_index_left]
                    no_of_ops+=1
                    if get_op(operator, left_value, right_value): #EQ_OP
                        hasMatch = True
                if not hasMatch:
                    for column in self.column_types:
                        if column == type(1) or column == type(1.2):
                            row_null.append(0)
                        else:
                            row_null.append(None)
                    join_table._insert(row_null + row_right)




            print(f'## Select ops no. -> {no_of_ops}')
            print(f'# Left table size -> {len(self.data)}')
            print(f'# Right table size -> {len(table_right.data)}')

            return join_table
Esempio n. 8
0
    def _inner_join(self, table_right: Table, condition):
        '''
        Join table (left) with a supplied table (right) where condition is met.

        Args:
            condition: string. A condition using the following format:
                'column[<,<=,==,>=,>]value' or
                'value[<,<=,==,>=,>]column'.
                
                Operatores supported: (<,<=,==,>=,>)
        '''
        # get columns and operator
        column_name_left, operator, column_name_right = self._parse_condition(
            condition, join=True)
        # try to find both columns, if you fail raise error
        try:
            column_index_left = self.column_names.index(column_name_left)
        except:
            raise Exception(
                f'Column "{column_name_left}" dont exist in left table. Valid columns: {self.column_names}.'
            )

        try:
            column_index_right = table_right.column_names.index(
                column_name_right)
        except:
            raise Exception(
                f'Column "{column_name_right}" dont exist in right table. Valid columns: {table_right.column_names}.'
            )

        # get the column names of both tables with the table name in front
        # ex. for left -> name becomes left_table_name_name etc
        left_names = [
            f'{self._name}.{colname}' if self._name != '' else colname
            for colname in self.column_names
        ]
        right_names = [
            f'{table_right._name}.{colname}'
            if table_right._name != '' else colname
            for colname in table_right.column_names
        ]

        # define the new tables name, its column names and types
        join_table_name = ''
        join_table_colnames = left_names + right_names
        join_table_coltypes = self.column_types + table_right.column_types
        join_table = Table(name=join_table_name,
                           column_names=join_table_colnames,
                           column_types=join_table_coltypes)

        # count the number of operations (<,> etc)
        no_of_ops = 0
        # this code is dumb on purpose... it needs to illustrate the underline technique
        # for each value in left column and right column, if condition, append the corresponding row to the new table
        for row_left in self.data:
            left_value = row_left[column_index_left]
            for row_right in table_right.data:
                right_value = row_right[column_index_right]
                no_of_ops += 1
                if get_op(operator, left_value, right_value):  #EQ_OP
                    join_table._insert(row_left + row_right)

        return join_table
Esempio n. 9
0
    def _select_where(self,
                      return_columns,
                      condition=None,
                      order_by=None,
                      desc=True,
                      top_k=None):
        '''
        Select and return a table containing specified columns and rows where condition is met.

        Args:
            return_columns: list. The columns to be returned.
            condition: string. A condition using the following format:
                'column[<,<=,==,>=,>]value' or
                'value[<,<=,==,>=,>]column'.
                
                Operatores supported: (<,<=,==,>=,>)
            order_by: string. A column name that signals that the resulting table should be ordered based on it (no order if None).
            desc: boolean. If True, order_by will return results in descending order (False by default).
            top_k: int. An integer that defines the number of rows that will be returned (all rows if None).
        '''

        # if * return all columns, else find the column indexes for the columns specified
        if return_columns == '*':
            return_cols = [i for i in range(len(self.column_names))]
        else:
            return_cols = [
                self.column_names.index(col.strip())
                for col in return_columns.split(',')
            ]

        # if condition is None, return all rows
        # if not, return the rows with values where condition is met for value
        if condition is not None:
            column_name, operator, value = self._parse_condition(condition)
            column = self.column_by_name(column_name)
            rows = [
                ind for ind, x in enumerate(column)
                if get_op(operator, x, value)
            ]
        else:
            rows = [i for i in range(len(self.data))]

        # top k rows
        # rows = rows[:int(top_k)] if isinstance(top_k,str) else rows
        # copy the old dict, but only the rows and columns of data with index in rows/columns (the indexes that we want returned)
        dict = {(key): ([[self.data[i][j] for j in return_cols]
                         for i in rows] if key == "data" else value)
                for key, value in self.__dict__.items()}

        # we need to set the new column names/types and no of columns, since we might
        # only return some columns
        dict['column_names'] = [self.column_names[i] for i in return_cols]
        dict['column_types'] = [self.column_types[i] for i in return_cols]

        s_table = Table(load=dict)
        if order_by:
            s_table.order_by(order_by, desc)

        s_table.data = s_table.data[:int(top_k)] if isinstance(
            top_k, str) else s_table.data

        return s_table
Esempio n. 10
0
    def _full_outer_join(self, table_right: Table, condition):

        # get columns and operator
        column_name_left, operator, column_name_right = self._parse_condition(
            condition, join=True)
        # try to find both columns, if you fail raise error
        try:
            column_index_left = self.column_names.index(column_name_left)
            column_index_right = table_right.column_names.index(
                column_name_right)
        except:
            raise Exception(f'Columns dont exist in one or both tables.')

        # get the column names of both tables with the table name in front
        # ex. for left -> name becomes left_table_name_name etc
        left_names = [
            f'{self._name}_{colname}' for colname in self.column_names
        ]
        right_names = [
            f'{table_right._name}_{colname}'
            for colname in table_right.column_names
        ]

        # define the new tables name, its column names and types
        join_table_name = f'{self._name}_full_outer_join_{table_right._name}'
        join_table_colnames = left_names + right_names
        join_table_coltypes = self.column_types + table_right.column_types
        join_table = Table(name=join_table_name,
                           column_names=join_table_colnames,
                           column_types=join_table_coltypes)

        # count the number of operations (<,> etc)
        no_of_ops = 0
        nulls = []  #enter 0 as many as the columns of the right table
        SameValues = []  #values that we have already enter in the join_table
        newrow = 0  #counts rows in join_table

        # for each value in left column and right column, if condition, append the corresponding row to the new table
        for row_left in self.data:
            nulls.clear()
            left_value = row_left[column_index_left]
            found = False
            for row_right in table_right.data:
                right_value = row_right[column_index_right]
                no_of_ops += 1
                if get_op(operator, left_value, right_value):  #EQ_OP
                    join_table._insert(row_left + row_right)
                    newrow += 1  # new row in join table
                    found = True
                    SameValues.append(
                        row_right)  #keep the values that we have found match
            if not found:  # if not fount match in the right table, then for every column of the right table enter 0 to nulls
                for columns in range(table_right._no_of_columns):
                    nulls.append(0)
                join_table._insert(
                    row_left + nulls
                )  #append left table's values and nulls into join table
                newrow += 1  # new row in join table
                for columns in range(table_right._no_of_columns
                                     ):  # change every 0 column to Null
                    join_table.data[newrow - 1][columns +
                                                self._no_of_columns] = 'Null'
                    join_table._update()

        #for each value in right column, check if it exists in SameValues. If not then add it to the join_table with null in left columns
        for row_right in table_right.data:
            right = False  #found right column in SameVlaues
            nulls.clear()
            for j in range(len(SameValues)):
                if SameValues[j] == row_right:
                    right = True
            if not right:
                for columns in range(self._no_of_columns):
                    nulls.append(0)
                join_table._insert(
                    nulls + row_right
                )  #append right table's values and nulls into join table
                newrow += 1  # new row in join table
                for columns in range(
                        self._no_of_columns):  # change every 0 column to Null
                    join_table.data[newrow - 1][columns] = 'Null'
                    join_table._update()

        print(f'## Select ops no. -> {no_of_ops}')
        print(f'# Left table size -> {len(self.data)}')
        print(f'# Right table size -> {len(table_right.data)}')

        return join_table
Esempio n. 11
0
    def _update_row(self, set_value, set_column, condition):
        '''
        update where Condition
        '''
        # parse the condition
        column_name, operator, value = self._parse_condition(condition)
        # get the condition and the set column
        column = self.columns[self.column_names.index(column_name)]
        set_column_idx = self.column_names.index(set_column)
        # set_columns_indx = [self.column_names.index(set_column_name) for set_column_name in set_column_names]
        # for each value in column, if condition, replace it with
        set_value
        for row_ind, column_value in enumerate(column):
        if get_op(operator, column_value, value):
        self.data[row_ind][set_column_idx] = set_value
        self._update()


    def _delete_where(self, condition):
        '''
        Deletes rows where condition is met.
        Important: delete replaces the rows to be deleted with rows filled with Nones,
        These rows are then appened to the insert_stack
        '''
        column_name, operator, value = self._parse_condition(condition)

        indexes_to_del = []

        column = self.columns[self.column_names.index(column_name)]
        for index, row_value in enumerate(column):
            if get_op(operator, row_value, value):
                indexes_to_del.append(index)

        # we pop from highest to lowest index in order to avoid removing the wrong item
        # since we dont delete, we dont have to to pop in that order, but since delete is used
        # to delete from meta tables too, we still implement it.

        for index in sorted(indexes_to_del, reverse=True):
            if self._name[:4] != 'meta':
                # if the table is not a metatable, replace the row with a row of nones
                self.data[index] = [None for _ in range(len(self.column_names))]
            else:
                self.data.pop(index)

        self._update()
        print(f"Deleted {len(indexes_to_del)} rows")
        # we have to return the deleted indexes, since they will be appended to the insert_stack
        return indexes_to_del


    def _select_where(self, return_columns, condition=None, order_by=None, asc=False, top_k=None):
        '''
        Select and return a table containing specified columns and rows where condition is met
        '''

        # if * return all columns, else find the column indexes for the columns specified
        if return_columns == '*':
            return_cols = [i for i in range(len(self.column_names))]
        elif isinstance(return_columns, str):
            raise Exception(f'Return columns should be "*" or of type list. (the second parameter is return_columns not condition)')
        else:
            return_cols = [self.column_names.index(colname) for colname in return_columns]

        # if condition is None, return all rows
        # if not, return the rows with values where condition is met for value
        if condition is not None:
            column_name, operator, value = self._parse_condition(condition)
            column = self.columns[self.column_names.index(column_name)]
            rows = [ind for ind, x in enumerate(column) if get_op(operator, x, value)]
        else:
            rows = [i for i in range(len(self.columns[0]))]

        # top k rows
        rows = rows[:top_k]
        # copy the old dict, but only the rows and columns of data with index in rows/columns (the indexes that we want returned)
        dict = {(key):([[self.data[i][j] for j in return_cols] for i in rows] if key=="data" else value) for key,value in self.__dict__.items()}

        # we need to set the new column names/types and no of columns, since we might
        # only return some columns
        dict['column_names'] = [self.column_names[i] for i in return_cols]
        dict['column_types']   = [self.column_types[i] for i in return_cols]
        dict['_no_of_columns'] = len(return_cols)

        # order by the return table if specified
        if order_by is None:
            return Table(load=dict)
        else:
            return Table(load=dict).order_by(order_by, asc)


    def _select_where_with_btree(self, return_columns, bt, condition, order_by=None, asc=False, top_k=None):

        # if * return all columns, else find the column indexes for the columns specified
        if return_columns == '*':
            return_cols = [i for i in range(len(self.column_names))]
        else:
            return_cols = [self.column_names.index(colname) for colname in return_columns]


        column_name, operator, value = self._parse_condition(condition)

        print("1: ", type(value), " 2: ", self.column_types[self.column_names.index(column_name)])

        # if the column in condition is not a primary key, abort the select
        if column_name != self.column_names[self.pk_idx]:
            print('Column is not PK. Aborting')

        # here we run the same select twice, sequentially and using the btree.
        # we then check the results match and compare performance (number of operation)
        column = self.columns[self.column_names.index(column_name)]

        # sequential
        rows1 = []
        opsseq = 0
        for ind, x in enumerate(column):
            opsseq+=1
            if get_op(operator, x, value):
                rows1.append(ind)

        print(f'Without Btree -> {opsseq} comparison operations')
        # btree find
        rows = bt.find(operator, value)
        print('### Seq result ###')
        print(rows1)
        print('### Index result ###')
        print(rows)

        # same as simple select from now on
        rows = rows[:top_k]
        # TODO: this needs to be dumbed down
        dict = {(key):([[self.data[i][j] for j in return_cols] for i in rows] if key=="data" else value) for key,value in self.__dict__.items()}

        dict['column_names'] = [self.column_names[i] for i in return_cols]
        dict['column_types']   = [self.column_types[i] for i in return_cols]
        dict['_no_of_columns'] = len(return_cols)

        if order_by is None:
            return Table(load=dict)
        else:
            return Table(load=dict).order_by(order_by, asc)


    def order_by(self, column_name, asc=False):
        '''
        Order by based on column
        '''
        # get column, sort values and return sorted indexes
        column = self.columns[self.column_names.index(column_name)]
        idx = sorted(range(len(column)), key=lambda k: column[k], reverse=not asc)
        # return table but arange data using idx list (sorted indexes)
        dict = {(key):([self.data[i] for i in idx] if key=="data" else value) for key, value in self.__dict__.items()}
        return Table(load=dict)


    def _sort(self, column_name, asc=False):
        '''
        Same as order by, but its persistant
        '''
        column = self.columns[self.column_names.index(column_name)]
        idx = sorted(range(len(column)), key=lambda k: column[k], reverse=not asc)
        # print(idx)
        self.data = [self.data[i] for i in idx]
        self._update()


    def _inner_join(self, table_right: Table, condition):
        '''
        Join table (left) with a supplied table (right) where condition is met.
        '''
        # get columns and operator
        column_name_left, operator, column_name_right = self._parse_condition(condition, both_columns=True)
        # try to find both columns, if you fail raise error
        try:
            column_index_left = self.column_names.index(column_name_left)
            column_index_right = table_right.column_names.index(column_name_right)
        except:
            raise Exception(f'Columns dont exist in one or both tables.')

        # get the column names of both tables with the table name in front
        # ex. for left -> name becomes left_table_name_name etc
        left_names = [f'{self._name}_{colname}' for colname in self.column_names]
        right_names = [f'{table_right._name}_{colname}' for colname in table_right.column_names]

        # define the new tables name, its column names and types
        join_table_name = f'{self._name}_join_{table_right._name}'
        join_table_colnames = left_names+right_names
        join_table_coltypes = self.column_types+table_right.column_types
        join_table = Table(name=join_table_name, column_names=join_table_colnames, column_types= join_table_coltypes)

        # count the number of operations (<,> etc)
        no_of_ops = 0
        # this code is dumb on purpose... it needs to illustrate the underline technique
        # for each value in left column and right column, if condition, append the corresponding row to the new table
        for row_left in self.data:
            left_value = row_left[column_index_left]
            for row_right in table_right.data:
                right_value = row_right[column_index_right]
                no_of_ops+=1
                if get_op(operator, left_value, right_value): #EQ_OP
                    join_table._insert(row_left+row_right)

        print(f'## Select ops no. -> {no_of_ops}')
        print(f'# Left table size -> {len(self.data)}')
        print(f'# Right table size -> {len(table_right.data)}')

        return join_table


    def show(self, no_of_rows=None, is_locked=False):
        '''
        Pretty print the table
        '''
        # if the table is locked, add locked keyword to title
        if is_locked:
            print(f"\n## {self._name} (locked) ##")
        else:
            print(f"\n## {self._name} ##")

        # headers -> "column name (column type)"
        headers = [f'{col} ({tp.__name__})' for col, tp in zip(self.column_names, self.column_types)]
        if self.pk_idx is not None:
            # table has a primary key, add PK next to the appropriate column
            headers[self.pk_idx] = headers[self.pk_idx]+' #PK#'
        # detect the rows that are no tfull of nones (these rows have been deleted)
        # if we dont skip these rows, the returning table has empty rows at the deleted positions
        non_none_rows = [row for row in self.data if any(row)]
        # print using tabulate
        print(tabulate(non_none_rows[:no_of_rows], headers=headers)+'\n')


    def _parse_condition(self, condition, join=False):
        '''
        Parse the single string condition and return column/s value and operator
        '''
        # if both_columns (used by the join function) return the names of the names of the columns (left first)
        if join:
            return split_condition(condition)

        # cast the value with the specified column's type and return the column name, the operator and the casted value
        left, op, right = split_condition(condition)
        if left not in self.column_names:
            raise ValueError(f'Condition is not valid (cant find column name)')
        coltype = self.column_types[self.column_names.index(left)]

        return left, op, coltype(right)


    def _load_from_file(self, filename):
        '''
        Load table from a pkl file (not used currently)
        '''
        f = open(filename, 'rb')
        tmp_dict = pickle.load(f)
        f.close()

        self.__dict__.update(tmp_dict)
Esempio n. 12
0
def _select_where_O(tb_object, return_columns, condition=None, order_by=None, asc=False, top_k=None):
    #-->Select all columns
    if return_columns == '*':
        return_cols = [i for i in range(len(tb_object.column_names))]
    #-->Invalid parameter input
    elif isinstance(return_columns, str):
        raise Exception(
            f'Return columns should be "*" or of type list. (the second parameter is return_columns not condition)')
    #-->Select only specified columns
    else:
        return_cols = [tb_object.column_names.index(colname) for colname in return_columns]

    #-->Return records based on condition
    select_data = []
    if condition is not None:
        column_name, operator, value = tb_object._parse_condition(condition)
        #-->Condition's culumn index
        column = tb_object.column_names.index(column_name)

        #-->Check if column_name is the primary key column
        if column_name == tb_object.column_names[tb_object.pk_idx]: #-->PK

            if operator == '==': #-->PK-identity
                #-->Check if record exists in main file
                temp = [row[tb_object.pk_idx] for row in tb_object.sequential_file_data]
                position = binary_search._binary_search(temp, value)
                #-->Record exists in main file
                if position != -1:
                    select_data.append(tb_object.sequential_file_data[position])
                # -->Search in insert stack
                else:
                    if value in [row[tb_object.pk_idx] for row in tb_object.data]:
                        for record in tb_object.data:
                            if get_op('==', record[tb_object.pk_idx], value):
                                select_data.append(record)

            else: #-->PK-range
                temp = [row[1] for row in tb_object.order]
                position, exists = binary_search._binary_search_v2(temp, value)
                #-->Get the starting position (lower limit)
                if operator == "<" or operator == "<=":
                    lower_limit = 0
                elif operator == ">" and exists == True:
                    lower_limit = position + 1
                else:
                    lower_limit = position

                #-->Get corresponding records from both main file and insert stack
                i = lower_limit
                while i < len(temp) and get_op(operator, temp[i], value):
                    #-->Record exists in main file
                    if temp[i] in tb_object.sequential_file_columns[column]:
                        select_data.append(tb_object.sequential_file_data[tb_object.sequential_file_columns[column].index(temp[i])])
                    #-->Record exists in insert stack
                    elif temp[i] in tb_object.columns[column]:
                        select_data.append(tb_object.data[tb_object.columns[column].index(temp[i])])
                    i += 1

        else: #-->Not PK
            #-->Get corresponding records from both main file and insert stack
            for elem in tb_object.sequential_file_data:
                if get_op(operator, elem[column], value):
                    select_data.append(elem)
            for elem in tb_object.data:
                if get_op(operator, elem[column], value):
                    select_data.append(elem)

    #-->Return all records
    else:
        #-->Get all records from both main file and insert stack
        for elem in tb_object.sequential_file_data:
            select_data.append(elem)
        for elem in tb_object.data:
            select_data.append(elem)

    select_columns = [[row[i] for row in select_data] for i in range(tb_object._no_of_columns)]
    #-->Selected records indexes
    rows = [i for i in range(len(select_columns[0]))]
    #-->Select only top k records
    rows = rows[:top_k]
    #-->Store result table's information in a dictionary
    dict = {(key): ([[select_data[i][j] for j in return_cols] for i in rows] if key == "data" else value) for key, value
            in tb_object.__dict__.items()}
    dict['column_names'] = [tb_object.column_names[i] for i in return_cols]
    dict['column_types'] = [tb_object.column_types[i] for i in return_cols]
    dict['_no_of_columns'] = len(return_cols)
    #-->Return result table
    if order_by is None:
        return Table(load=dict)
    #-->Return result table ordered
    else:
        return Table(load=dict).order_by(order_by, asc)
Esempio n. 13
0
    def _select_where_groupby(self,
                              return_columns,
                              aggr_function,
                              where_condition=None,
                              having_condition=None,
                              order_by=None,
                              asc=False,
                              top_k=None):
        '''
        Select and return a table containing specified columns and rows grouped-by (a specific column) where condition is met
        '''

        if (isinstance(return_columns, str)):
            raise Exception(f'Return columns must be inserted as a list')
        indx = self.column_names.index(return_columns[-1])
        typeList = self.column_types
        # if attemps to perform aggregate function (other than count) to non numeric values, raise exception
        if (not (typeList[indx] == int or typeList[indx] == float)
                and not (aggr_function == 'count')):
            raise Exception(f'Can not perform ' + aggr_function +
                            ' to non numeric values')
        if (return_columns == '*'):
            raise Exception(
                f'Return columns should be explicitly written and the aggregate function will affect the last column'
            )
        elif isinstance(return_columns, str):
            raise Exception(
                f'Return columns should be of type list. (the second parameter is return_columns not condition)'
            )
        else:
            return_cols = [
                self.column_names.index(colname) for colname in return_columns
            ]
            # if where_condition is None, return all rows
            # if not, return the rows with values where condition is met for value
        if where_condition is not None:
            column_name, operator, value = self._parse_condition(
                where_condition)
            column = self.columns[self.column_names.index(column_name)]
            rows = [
                ind for ind, x in enumerate(column)
                if get_op(operator, x, value)
            ]
        else:
            rows = [i for i in range(len(self.columns[0]))]
        # copy the old dict, but only the rows and columns of data with index in rows/columns (the indexes that we want to be returned)
        dict = {(key): ([[self.data[i][j] for j in return_cols]
                         for i in rows] if key == "data" else value)
                for key, value in self.__dict__.items()}
        dataList = dict.get('data')
        dictData = {}
        tempList = []
        if (aggr_function == 'sum'):
            for row in dataList:
                key = tuple(row[:-1])
                if (dictData.get(key, 'None') == 'None'):
                    dictData[key] = row[-1]
                else:
                    temp = dictData.get(key)
                    temp += row[-1]
                    dictData[key] = temp
            for k, v in dictData.items():
                tempRow = list(k)
                tempRow.append(v)
                tempList.append(tempRow)
            dict['data'] = tempList
        elif (aggr_function == 'avg'):
            for row in dataList:
                key = tuple(row[:-1])
                if (dictData.get(key, 'None') == 'None'):
                    dictData[key] = [row[-1], 1]
                else:
                    tempRow = dictData.get(key)
                    tempRow[0] = tempRow[0] + row[-1]
                    tempRow[1] = tempRow[1] + 1
                    dictData[key] = tempRow
            for k, v in dictData.items():
                tempRow = list(k)
                val = v[0] / v[1]
                tempRow.append(val)
                tempList.append(tempRow)
            dict['data'] = tempList
        elif (aggr_function == 'count'):
            for row in dataList:
                key = tuple(row[:-1])
                if (dictData.get(key, 'None') == 'None'):
                    dictData[key] = 1
                else:
                    tempVal = dictData.get(key)
                    tempVal += 1
                    dictData[key] = tempVal
            for k, v in dictData.items():
                tempRow = list(k)
                tempRow.append(v)
                tempList.append(tempRow)
            dict['data'] = tempList
        elif (aggr_function == 'max'):
            for row in dataList:
                key = tuple(row[:-1])
                if (dictData.get(key, 'None') == 'None'):
                    dictData[key] = row[-1]
                else:
                    tempVal = dictData.get(key)
                    if (row[-1] > tempVal):
                        tempVal = row[-1]
                    dictData[key] = tempVal
            for k, v in dictData.items():
                tempRow = list(k)
                tempRow.append(v)
                tempList.append(tempRow)
            dict['data'] = tempList
        elif (aggr_function == 'min'):
            for row in dataList:
                key = tuple(row[:-1])
                if (dictData.get(key, 'None') == 'None'):
                    dictData[key] = row[-1]
                else:
                    tempVal = dictData.get(key)
                    if (row[-1] < tempVal):
                        tempVal = row[-1]
                    dictData[key] = tempVal
            for k, v in dictData.items():
                tempRow = list(k)
                tempRow.append(v)
                tempList.append(tempRow)
            dict['data'] = tempList
        if having_condition is not None:
            column_name, operator, value = self._parse_condition(
                having_condition)
            tempList = dict.get('data')
            colList = dict.get('column_names')
            for i in range(len(colList)):
                if colList[i] == column_name:
                    index = return_cols.index(i)
                    break
            tempList = [
                row for row in tempList if get_op(operator, row[index], value)
            ]
            dict['data'] = tempList
        # we need to set the new column names/types and no of columns, since we might
        # only return some columns
        dict['column_names'] = [self.column_names[i] for i in return_cols]
        dict['column_types'] = [self.column_types[i] for i in return_cols]
        dict['_no_of_columns'] = len(return_cols)
        # order by the return table if specified
        if order_by is None:
            return Table(load=dict)
        else:
            return Table(load=dict).order_by(order_by, asc)
Esempio n. 14
0
    def aggregate(self, aggregation: dict, group_by):
        '''
            apply aggregation to grouped data
            example:
            {
                'building1': [capacity1]
                'building2': [capacity2],
                'building3': [capacity3, capacity4]
            }
            '''
        grouping = True if group_by else False

        if not group_by:
            for aggregation_key, aggregation_value in aggregation.items():
                group_by = {}
                aggregation_idx = self.column_names.index(aggregation_key)

                group_by[aggregation_key] = [
                    el[aggregation_idx] for el in self.data
                ]

        keys_to_delete = []
        for aggregation_key, aggregation_value in aggregation.items():
            # if aggregation function is avg or sum, column must be integer or float
            if aggregation_value[0] == 'avg' or aggregation_value[0] == 'sum':
                col_type = self.column_types[self.column_names.index(
                    aggregation_key)]
                if col_type is not int and col_type is not float:
                    raise Exception(
                        f'{aggregation_value[0]} can be applied only to numeric columns'
                    )

            # save aggregation function to a lambda function, so it can be applied later
            if aggregation_value[0] == 'min':
                aggregation_function = lambda values: min(values)
            elif aggregation_value[0] == 'max':
                aggregation_function = lambda values: max(values)
            elif aggregation_value[0] == 'avg':
                aggregation_function = lambda values: sum(values) / len(values)
            elif aggregation_value[0] == 'count':
                aggregation_function = lambda values: len(values)
            elif aggregation_value[0] == 'sum':
                aggregation_function = lambda values: sum(values)
            else:
                raise Exception('Aggregation function was not found')

            # loop through grouped items and apply aggregation function
            for k, v in group_by.items():
                group_by[k] = [aggregation_function(v)]

                # if len > 1, a having condition exists and data should be filtered
                if len(aggregation_value) > 1:
                    column_name, operator, value = self._parse_condition(
                        aggregation_value[1])
                    if not get_op(operator, group_by[k][0], value):
                        # append keys of data that should be filtered out due to having condition
                        keys_to_delete.append(k)

            # delete filtered out keys
            for k in keys_to_delete:
                del group_by[k]

            # add aggregation function as a column to be displayed in final print of data
            # example: sum( capacity)
            self.column_names = [
                f'{aggregation_value[0]} ( {col} )'
                if col == aggregation_key else col for col in self.column_names
            ]

        # if grouping was applied, add grouped items to the final print of data
        if grouping:
            self.data = [v + [k] for k, v in group_by.items()]
        else:
            self.data = [v for k, v in group_by.items()]
Esempio n. 15
0
    def _full_outer_join(self, table_right: Table, condition):
        '''
        Left outer join table (left) with a supplied table (right) where condition is met.
        '''
        # get columns and operator
        column_name_left, operator, column_name_right = self._parse_condition(
            condition, join=True)
        # try to find both columns, if you fail raise error
        try:
            column_index_left = self.column_names.index(column_name_left)
            column_index_right = table_right.column_names.index(
                column_name_right)
        except:
            raise Exception(f'Columns dont exist in one or both tables.')

        # get the column names of both tables with the table name in front
        # ex. for left -> name becomes left_table_name_name etc
        left_names = [
            f'{self._name}_{colname}' for colname in self.column_names
        ]
        right_names = [
            f'{table_right._name}_{colname}'
            for colname in table_right.column_names
        ]

        # define the new tables name, its column names and types
        join_table_name = f'{self._name}_join_{table_right._name}'
        join_table_colnames = left_names + right_names
        join_table_coltypes = self.column_types + table_right.column_types
        join_table = Table(name=join_table_name,
                           column_names=join_table_colnames,
                           column_types=join_table_coltypes)

        # count the number of operations (<,> etc)
        no_of_ops = 0
        # Simplify the full outer join as left outer and right outer
        for row_left in self.data:
            left_value = row_left[column_index_left]
            exist = False
            for row_right in table_right.data:
                right_value = row_right[column_index_right]
                no_of_ops += 1
                if get_op(operator, left_value, right_value):
                    exist = True  # EQ_OP
                    join_table._insert(row_left + row_right)
            if exist != True:
                none_values = []
                for i in range(len(self.columns)):
                    none_values.append(None)
                join_table._insert(row_left + none_values)
        for row_left2 in table_right.data:
            left_value2 = row_left2[column_index_left]
            exist = False
            for row_right2 in self.data:
                right_value2 = row_right2[column_index_right]
                no_of_ops += 1
                if get_op(operator, left_value2, right_value2):
                    exist = True  #EQ_OP
                    join_table._insert(row_right2 + row_left2)
            if exist != True:
                none_values = []
                for i in range(len(self.columns)):
                    none_values.append(None)

                join_table._insert(none_values + row_left2)

        print(f'## Select ops no. -> {no_of_ops}')
        print(f'# Left table size -> {len(self.data)}')
        print(f'# Right table size -> {len(table_right.data)}')

        return join_table