Beispiel #1
0
 def data_sum_by_rules(self, data_rows, sum_rules):
     output = {}
     # Read data col names
     for col_name, value in data_rows[0]['data'].items():
         col_found = False
         # All data rows must have rules for sum
         for rule_item in sum_rules:
             if rule_item['Name'].lower() == col_name.lower():
                 rule_name = rule_item['FactScale'].lower()
                 col_found = True
                 if rule_name in self.sum_func_map:
                     sum_method = self.sum_func_map[rule_name]
                 else:
                     raise ex.EmptyInputsError('No function match to the\
                     sum method')
                 break
         if not col_found:
             raise ex.EmptyInputsError('No sum rule fot the data column ' +
                                       str(col_name))
         # sum all rows that has col_name
         sum_data = []
         for row in data_rows:
             data_value = row['data'][col_name]
             sum_data.append(data_value)
         result = sum_method(sum_data)
         output[col_name] = result
     return output
Beispiel #2
0
def jj_extract(wb, meta_cols, data_cols, dates_cols):
    ws = wb.sheet_by_index(0)
    if ws.nrows <= 1:
        raise ex.EmptyInputsError('jj_extract')
    header_row = ws.row(0)
    last_col = get_last_col(ws, header_row)
    # Init headers cols: names
    for key, val in meta_cols.items():
        if key >= last_col:
            raise ex.NotExistsError('DataProcessing', 'column', key)
        if val == '':
            meta_cols[key] = header_row[key].value
        new_val = meta_cols[key]
    for key, val in data_cols.items():
        if key >= last_col:
            raise ex.NotExistsError('DataProcessing', 'column', key)
        if val == '':
            data_cols[key] = header_row[key].value
        new_val = data_cols[key]
    for key, val in dates_cols.items():
        if key >= last_col:
            raise ex.NotExistsError('DataProcessing', 'column', key)
    # Create output: Append data
    output = []
    data = get_cell_range(0, 0, ws.ncols, ws.nrows, ws)
    for row_index in range(1, ws.nrows):
        new_row = {'meta': {}, 'data': {}, 'dates': 0}
        for key, val in meta_cols.items():
            new_row['meta'][val] = data[row_index][key].value
        for key, val in data_cols.items():
            new_row['data'][val] = data[row_index][key].value
        new_row['dates'] = date_func(dates_cols, data[row_index])
        output.append(new_row)
    return output
Beispiel #3
0
def __check_none_obj(**kwargs):
    ''' Args:
    -- object=object
    '''
    for key, value in kwargs.items():
        if value is None:
            raise ex.EmptyInputsError(key)
Beispiel #4
0
def jj_aoc(wb, meta_cols, data_cols, dates_cols):
    # ws = wb.sheet_by_index(1)
    ws = wb.sheet_by_name('Report1')
    output = []
    # Initialize data start
    data_header_row_index = 0
    start_meta_row = 0
    for row_index in range(ws.nrows):
        desc_val = str(ws.cell(row_index, 0).value)
        # if  isinstance(curr_val, str):
        desc_val = desc_val.strip().lower()
        if desc_val == 'description':
            data_header_row_index = row_index
            start_meta_row = row_index + 2
            break
    if data_header_row_index == 0:
        raise ex.EmptyInputsError('data')
    # Assign header_row(dates header) and last column(not higher then ws.ncols)
    header_row = ws.row(data_header_row_index)
    last_col = get_last_col(ws, header_row)
    # Initialize data: meta table and data table
    row_index = data_header_row_index+1
    meta_column = ws.col(0)
    while row_index < ws.nrows:
        desc_val = str(meta_column[row_index].value)
        # Add meta and data to output only if facts exist
        if desc_val.strip().lower() == 'facts':
            last_meta_row = row_index - 1
            last_facts_row = __get_last_facts_row(ws, row_index+1, ws.nrows, 
                                                  last_col)
            # Looking for rows which go by columns
            for col_index in range(1, last_col):
                new_row = {}
                data_column = ws.col(col_index)
                meta = __get_meta(meta_column, meta_cols, start_meta_row, 
                                  last_meta_row)
                data = __get_data(data_column, meta_column, row_index+1, 
                                  last_facts_row) 
                # Append new_row data
                new_row['meta'] = meta
                new_row['data'] = data
                new_row['dates'] = date_func(dates_cols, header_row, 
                                             index = col_index)
                output.append(new_row)
            row_index = last_facts_row
            start_meta_row = last_facts_row + 1
        row_index = row_index + 1
    return output
Beispiel #5
0
 def __init__(self, data, sum_rules, map_rules, **kwargs):
     '''
     Args: 
     - data (obligatory)
     - source dates type(dates_source): daily, weekly, monthly, etc.
     - target dates type(dates_target): weekly, monthly, year, etc.
     '''
     if data is None:
         raise ex.EmptyInputsError('No data to work with')
     self.data = data
     self.sum_rules = sum_rules
     self.map_rules = map_rules
     for key, val in kwargs.items():
         if key == 'dates_source':
             self.dates_source = val
         if key == 'dates_target':
             self.dates_target = val
     self.sum_func_map = {
         'average': self.__average_sum,
         'sum': self.__simple_sum
     }
Beispiel #6
0
def jj_brand_extract(warehouse, wb, options_list):
    # t1 = datetime.datetime.now()

    meta_cols = options_list['meta_cols']
    data_cols = options_list['data_cols']
    dates_info = options_list['dates_cols']
    date_func = options_list['date_func']
    date_col = dates_info['date_col']
    series_name = dates_info['scale']
    ws = wb.sheet_by_index(0)
    if ws.nrows <= 1:
        raise ex.EmptyInputsError('jj_extract')
    data = get_cell_range(0, 0, ws.ncols, ws.nrows, ws)
    header_row_index = 0
    last_col = get_last_col(data, header_row_index)
    # Init headers cols: names
    for item in meta_cols:
        column_number = item['Col_number']
        if column_number >= last_col:
            raise ex.NotExistsError('DataProcessing', 'column', column_number)
        if item['Dimension_name'] == '':
            item['Dimension_name'] = data[0][column_number].value
    for key, val in data_cols.items():
        if key[0] >= last_col:
            raise ex.NotExistsError('DataProcessing', 'column', key)
        if val == '':
            data_cols[key] = data[0][key[0]].value
    if date_col >= last_col:
        raise ex.NotExistsError('DataProcessing', 'column', key[0])
    # Create output: Append data
    if 'mapping_rule' in options_list:
        mapping_rule = options_list['mapping_rule']
    else:
        mapping_rule = None
    date_values = []
    for row_index in range(1, len(data)):
        date_values.append(data[row_index][date_col].value)
    time_line = get_time_line(date_values)
    times_series = warehouse.add_time_scale(series_name, time_line)
    for row_index in range(1, len(data)):
        # print(row_index)
        meta = []
        for item in meta_cols:
            copy_item = item.copy()
            column_index = copy_item['Col_number']
            copy_item['Name'] = data[row_index][column_index].value
            meta.append(copy_item)
        if mapping_rule is not None:
            new_meta, is_mapped = mapping(meta, mapping_rule)
            if is_mapped:
                meta = new_meta
        num_of_dates = 1
        date_value = data[row_index][date_col].value
        start_label = date_func(date_value, num_of_dates)
        path = [x['Name'] for x in meta]
        item_meta = [(x['Dimension_name'], x['Layer']) for x in meta]
        entity = warehouse.add_entity(path, item_meta)
        for key, val in data_cols.items():
            value = data[row_index][key[0]].value
            value = convert_value(value, key[1])
            variable = warehouse.force_ent_variable(entity, val, key[1])
            time_series = warehouse.force_var_time_series(
                variable, times_series)
            history_value = time_series.get_value(start_label)
            if not history_value:
                new_value = [value]
            else:
                new_value = [history_value + value]
            time_series.set_values(start_label, new_value)
Beispiel #7
0
def jj_brand_media_spend(warehouse, wb, options_list):
    meta_cols = options_list['meta_cols']
    dates_info = options_list['dates_cols']
    date_func = options_list['date_func']
    name_col_num = options_list['name_col']
    series_name = dates_info['scale']
    ws = wb.sheet_by_index(0)
    if ws.nrows <= 1:
        raise ex.EmptyInputsError('jj_brand_media_spend')
    data = get_cell_range(0, 0, ws.ncols, ws.nrows, ws)
    header_row_index = 0
    # Assign header_row(dates header) and last column(not higher then ws.ncols)
    start_dates_col = dates_info['start_column']
    end_dates_col = dates_info['end_column']
    if end_dates_col == '':
        end_dates_col = get_last_col(data, header_row_index)
    if start_dates_col > end_dates_col:
        raise ex.WrongValueError(start_dates_col, 'value <= ' + end_dates_col,
                                 '', 'jj_brand_media_spend')
    # # Init headers cols: names
    # for item in meta_cols:
    #     column_number = item['Col_number']
    #     if column_number >= end_dates_col:
    #         raise ex.WrongValueError(column_number, 'value <= ' +
    #                                  end_dates_col, '', 'jj_brand_media_spend')
    #     if item['Dimension_name'] == '':
    #         item['Dimension_name'] = data[0][column_number].value
    # Create output: Append data
    # if 'mapping_rule' in options_list:
    #     mapping_rule = options_list['mapping_rule']
    # else:
    #     mapping_rule = None
    # Initialize data: meta table and data table
    num_of_dates = end_dates_col - start_dates_col
    first_label, time_line = date_func(
        data[header_row_index][start_dates_col].value, wb.datemode,
        num_of_dates)
    # start_date_point = date_func(header_row[start_dates_col].value)
    times_series = warehouse.add_time_scale(series_name, time_line)
    # meta2 = [
    #     {'Layer': 'Country', 'Dimension_name': 'Geography', 'Name': 'US'},
    #     {'Layer': 'Brand', 'Dimension_name': 'Products', 'Name': 'BENADRYL'}
    #
    path = [x['Name'] for x in options_list['meta_cols']]
    meta = [(x['Dimension_name'], x['Layer'])
            for x in options_list['meta_cols']]
    entity = warehouse.add_entity(path, meta)
    for row_index in range(header_row_index + 1, len(data)):
        # print(row_index)
        # meta = []
        # for item in meta_cols:
        #     copy_item = item.copy()
        #     column_index = copy_item['Col_number']
        #     copy_item['Name'] = data[row_index][column_index].value
        #     meta.append(copy_item)
        # if mapping_rule is not None:
        #     new_meta, is_mapped = mapping(meta, mapping_rule)
        #     if is_mapped:
        #         meta = new_meta
        # working with WH interface
        # entity = warehouse.add_entity(meta)
        # meta2 = [
        #     {'Layer': 'Country', 'Dimension_name': 'Geography', 'Name': 'US'},
        #     {'Layer': 'Brand', 'Dimension_name': 'Products', 'Name': 'BENADRYL'}
        # ]
        # entity = warehouse.add_entity(meta2)
        fact_name = data[row_index][name_col_num].value
        variable = warehouse.force_ent_variable(entity, fact_name, 'float')
        values = []
        for col_index in range(start_dates_col, end_dates_col):
            value = convert_value(data[row_index][col_index].value, 'float')
            values.append(value)
        time_series = warehouse.force_var_time_series(variable, times_series)
        history_values = time_series.get_values(first_label)
        if history_values:
            if len(history_values) != len(values):
                raise ex.WrongValueError(
                    len(values), len(history_values),
                    'length values must be equal to the history '
                    'values length', 'jj_brand')
            for i in range(len(history_values)):
                if history_values[i] != '':
                    values[i] += history_values[i]
        time_series.set_values(first_label, values)
Beispiel #8
0
def jj_brand(warehouse, wb, options_list):
    date_func = options_list['date_func']
    meta_cols = options_list['meta_cols']
    name_col_num = options_list['name_col']
    dates_info = options_list['dates_cols']

    if 'mapping_rule' in options_list:
        mapping_rule = options_list['mapping_rule']
    else:
        mapping_rule = None

    ws = wb.sheet_by_name('Report1')
    # Initialize data start
    data_header_row_index = 0
    start_meta_row = 0
    data = get_cell_range(0, 0, ws.ncols, ws.nrows, ws)
    for row_index in range(len(data)):
        desc_val = str(data[row_index][name_col_num].value)
        # if  isinstance(curr_val, str):
        desc_val = desc_val.strip().lower()
        if desc_val == 'description':
            data_header_row_index = row_index
            start_meta_row = row_index + 2
            break
    if data_header_row_index == 0:
        raise ex.EmptyInputsError('data')
    # Assign header_row(dates header) and last column(not higher then ws.ncols)
    start_dates_col = dates_info['start_column']
    end_dates_col = dates_info['end_column']
    if end_dates_col == '':
        end_dates_col = get_last_col(data, data_header_row_index)
    # Initialize data: meta table and data table
    row_index = data_header_row_index + 1
    num_of_dates = end_dates_col - start_dates_col
    first_label, time_line = date_func(
        data[data_header_row_index][start_dates_col].value, num_of_dates)
    # start_date_point = date_func(header_row[start_dates_col].value)
    series_name = dates_info['scale']
    times_series = warehouse.add_time_scale(series_name, time_line)
    full_meta = []
    while row_index < len(data):
        desc_val = str(data[row_index][name_col_num].value)
        # Add meta and data to output only if facts exist
        if desc_val.strip().lower() == 'facts':
            last_meta_row = row_index - 1
            last_facts_row = __get_last_facts_row(data, row_index + 1,
                                                  len(data), end_dates_col)
            # Looking for facts by rows and data by columns, add data to db
            # using db interface
            # collecting meta
            meta = __get_meta(data, name_col_num, meta_cols, start_meta_row,
                              last_meta_row)
            len_meta = len(meta)
            if not full_meta:
                for item in meta:
                    full_meta.append(item.copy())
            elif len_meta > len(full_meta):
                raise ex.WrongValueError(
                    len_meta, 'value >= ' + str(len(full_meta)),
                    'length of sub meta must be <= '
                    'length of meta', 'jj_brand')
            elif len_meta <= len(full_meta):
                for i in range(len_meta):
                    full_meta[-len_meta + i] = meta[i].copy()
                meta = full_meta
            # TODO check __reorder_meta function
            meta2 = __reorder_meta(meta)
            #if mapping_rule is not None:
            #    new_meta, is_mapped = mapping(meta2, mapping_rule)
            #    if is_mapped:
            #        meta2 = new_meta
            # working with WH interface
            path = [x['Name'] for x in meta2]
            item_meta = [(x['Dimension_name'], x['Layer']) for x in meta2]
            entity = warehouse.add_entity(path, item_meta)
            for row_index in range(row_index + 1, last_facts_row + 1):
                fact_name = data[row_index][name_col_num].value
                variable = warehouse.force_ent_variable(
                    entity, fact_name, 'float')
                values = []
                for col_index in range(start_dates_col, end_dates_col):
                    value = convert_value(data[row_index][col_index].value,
                                          'float')
                    values.append(value)
                time_series = warehouse.force_var_time_series(
                    variable, times_series)
                history_values = time_series.get_values(first_label)
                if history_values:
                    if len(history_values) != len(values):
                        raise ex.WrongValueError(
                            len(values), len(history_values),
                            'length values must be equal to the history '
                            'values length', 'jj_brand')
                    for i in range(len(history_values)):
                        if history_values[i] != '':
                            values[i] += history_values[i]
                time_series.set_values(first_label, values)
            row_index = last_facts_row
            start_meta_row = last_facts_row + 1
        row_index += 1