Python apply_dim_filters Examples, chatbot.helper_methods.apply_dim_filters Python Examples

Example #1

0

Show file

def action_fact_table_group(df, entities):
    if entities['logic']:
        return fact_table_logic(df, entities)
    if not entities['graph']:
        entities['graph'] = ['table']
    df = helpers.apply_date_condition(df, entities['date_condition'])
    df = helpers.apply_dim_filters(df, entities['dim_filters'])
    df = helpers.apply_fact_condition(df, entities['dim'] + entities['adject'],
                                      entities['fact_condition'])
    facts = df.columns.tolist()
    df = df.reset_index().fillna(0)
    if 'index' in df.columns:
        df = df.drop('index', axis=1)
    data = []
    for graph in entities['graph']:
        if not entities['select_upto']:
            if len(df) == 1 and len(df.columns) == 1 and graph == 'text':
                df = "{0}{1}{2} is {3}".format(
                    df.columns[0],
                    helpers.get_date_text(entities['date_condition']),
                    helpers.get_dim_filter_text(entities['dim_filters']),
                    helpers.format_value_to_language(df.iloc[0, 0]))
                table_data = {
                    'chart': graph,
                    'chart_title': 'message',
                    'data': df
                }
                data.append(table_data)
            else:
                table_data = {
                    'chart': graph,
                    'facts': facts,
                    'dimensions': entities['dim'] + entities['adject'],
                    'table': df
                }
                data.append(table_data)
        else:
            # Apply Selection
            final_df = pd.DataFrame()
            for selection in entities['select_upto']:
                table_data = helpers.dynamic_selection_upto(
                    df, entities['dim'] + entities['adject'], facts,
                    selection['selection'], selection['upto'])
                final_df = final_df.append(table_data)
            table_data = {
                'chart': graph,
                'facts': facts,
                'dimensions': entities['dim'] + entities['adject'],
                'table': final_df
            }
            data.append(table_data)
    return data

Example #2

0

Show file

    def target_achievement(self, df):
        fact_condition = self.fact_condition.copy()
        if self.dim_filters:
            df = helpers.apply_dim_filters(df, dim_filters=self.dim_filters)
        if self.date_condition:
            df = helpers.apply_date_condition(df, self.date_condition)

        # add Sales in Aggregation
        if 'SalesAmount' not in fact_condition['aggregation']:
            fact_condition['aggregation']['SalesAmount'] = ['sum']
            fact_condition['conditions'] += [{
                'fact_name': 'SalesAmount sum',
                'conditions': np.nan,
                'fact_value': np.nan
            }]

        # add Target Amount in Aggregation
        if 'TargetAmount' not in fact_condition['aggregation']:
            fact_condition['aggregation']['TargetAmount'] = fact_condition[
                'aggregation']['SalesAmount']
            fact_condition['conditions'] += [{
                'fact_name': 'TargetAmount sum',
                'conditions': np.nan,
                'fact_value': np.nan
            }]

        # add sum to sales Amount
        if 'sum' not in fact_condition['aggregation']['SalesAmount']:
            fact_condition['aggregation']['SalesAmount'] += ['sum']
            fact_condition['conditions'] += [{
                'fact_name': 'SalesAmount sum',
                'conditions': np.nan,
                'fact_value': np.nan
            }]

        # Apply fact condition
        df = helpers.apply_fact_condition(df, self.dimensions, fact_condition)
        sale_fact_names = [
            c['fact_name'] for c in fact_condition['conditions']
            if c['fact_name'][0:5] == 'Sales'
        ]
        target_fact_names = [
            c['fact_name'] for c in fact_condition['conditions']
            if c['fact_name'][0:6] == 'Target'
        ]

        # target facts for 0th name
        target_facts = pd.DataFrame(100 * df[sale_fact_names[0]] /
                                    df[target_fact_names[0]])
        target_facts.columns = ["% Target Achievement"]
        # return pd.concat([df, target_facts], axis=1, sort=False)
        return target_facts

Example #3

0

Show file

    def ytd(self, df):
        if self.dim_filters:
            df = helpers.apply_dim_filters(df, dim_filters=self.dim_filters)

        # get max date from the user query for ytd
        date = self.end_date if self.end_date > self.start_date else self.start_date
        ytd_facts = helpers.safe_groupby(
            df[(df['Year'] == date.year) & (df['CalendarDate'] <= date)],
            self.dimensions, self.agg)
        ytd_facts = ytd_facts.add_prefix(
            date.replace(day=1, month=1).strftime("%Y-%m-%d") + ' to ' +
            date.strftime("%Y-%m-%d") + " ")
        return ytd_facts

Example #4

0

Show file

    def mtd(self, df):
        if self.dim_filters:
            df = helpers.apply_dim_filters(df, dim_filters=self.dim_filters)

        # get max date from the user query for mtd
        date = self.end_date if self.end_date > self.start_date else self.start_date
        curr_month_year = MONTH_NAMES[
            date.month]['ShortMonthName'] + '-' + str(date.year)
        mtd_facts = helpers.safe_groupby(
            df[(df['MonthYear'] == curr_month_year)
               & (df['CalendarDate'] <= date)], self.dimensions, self.agg)
        mtd_facts = mtd_facts.add_prefix(
            date.replace(day=1).strftime("%Y-%m-%d") + ' to ' +
            date.strftime("%Y-%m-%d") + " ")
        return mtd_facts

Example #5

0

Show file

    def qtd(self, df):
        if self.dim_filters:
            df = helpers.apply_dim_filters(df, dim_filters=self.dim_filters)

        # get max date from the user query for qtd
        date = self.end_date if self.end_date > self.start_date else self.start_date
        curr_qtr_year = MONTH_NAMES[date.month]['QtrName'] + '-' + str(
            date.year)
        qtd_facts = helpers.safe_groupby(
            df[(df['QuarterYear'] == curr_qtr_year)
               & (df['CalendarDate'] <= date)], self.dimensions, self.agg)
        qtr_first_date = datetime(
            date.year, (3 * int(MONTH_NAMES[date.month]['QtrName'][1:]) - 2),
            1)
        qtd_facts = qtd_facts.add_prefix(
            qtr_first_date.strftime("%Y-%m-%d") + ' to ' +
            date.strftime("%Y-%m-%d") + " ")
        return qtd_facts

Example #6

0

Show file

    def mom_facts(self, df):
        if self.dim_filters:
            df = helpers.apply_dim_filters(df, dim_filters=self.dim_filters)

        final_df = pd.DataFrame()
        s_date = self.start_date
        e_date = self.end_date
        if e_date > df['CalendarDate'].max():
            # select max date of data if e_date is too large
            e_date = df['CalendarDate'].max()

        r = relativedelta.relativedelta(e_date, s_date)
        n_months = r.years * 12 + r.months
        # if there is no date or same date then make mom for last 6 months
        if n_months < 1:
            n_months = 6
            e_date = datetime.now()
            s_date = e_date - relativedelta.relativedelta(months=6)

        # show 1 extra month for current month
        for month in range(n_months + 1):
            curr_month_year = MONTH_NAMES[
                s_date.month]['ShortMonthName'] + '-' + str(s_date.year)
            prev_month_year = helpers.get_prev_month_year(curr_month_year)
            curr_month_facts = helpers.safe_groupby(
                df[df['MonthYear'] == curr_month_year],
                self.dimensions + ['MonthYear'], self.agg)
            last_month_facts = helpers.safe_groupby(
                df[df['MonthYear'] == prev_month_year],
                self.dimensions + ['MonthYear'], self.agg)
            if not last_month_facts.empty:
                last_month_facts.index = curr_month_facts.index

            mom_facts = (curr_month_facts / last_month_facts).fillna(
                0
            ) if last_month_facts.empty else 100 * curr_month_facts / last_month_facts
            mom_facts = mom_facts.add_prefix("% MOM ")
            final_df = final_df.append(
                pd.concat([curr_month_facts, mom_facts], axis=1, sort=False))
            s_date = s_date + relativedelta.relativedelta(months=1)
        return final_df

Example #7

0

Show file

def action_fact_kpi(df, entities):
    if entities['dim']:
        return action_fact_table_group(df, entities)
    entities['graph'] = 'text'
    df = helpers.apply_date_condition(df, entities['date_condition'])
    df = helpers.apply_dim_filters(df, entities['dim_filters'])
    date_text = helpers.get_date_text(entities['date_condition'])
    dim_filters_text = helpers.get_dim_filter_text(entities['dim_filters'])
    data = []
    for k, v in entities['fact_condition']['aggregation'].items():
        for agg in v:
            val = df[k].agg(agg)
            val = helpers.format_value_to_language(val)
            text = "The {0}{1}{2}{3} is {4}.".format(agg + ' of ', k,
                                                     date_text,
                                                     dim_filters_text, val)
            data.append({
                'chart': entities['graph'],
                'chart_title': 'Message',
                'data': text
            })
    return data

Example #8

0

Show file

    def qoq_facts(self, df):
        if self.dim_filters:
            df = helpers.apply_dim_filters(df, dim_filters=self.dim_filters)

        final_df = pd.DataFrame()
        s_date = self.start_date
        e_date = self.end_date
        n_qtr = helpers.get_n_quarters(e_date, s_date)
        # if there is no date or same date then make qoq for last 4 quarters
        if n_qtr < 1:
            n_qtr = 4
            e_date = datetime.now()
            s_date = e_date - relativedelta.relativedelta(months=n_qtr * 3)

        # use 1 extra quarter to show current quarter facts alse
        for qtr in range(n_qtr + 1):
            curr_qtr_year = MONTH_NAMES[s_date.month]['QtrName'] + '-' + str(
                s_date.year)
            prev_qtr_year = helpers.get_prev_qtr_year(curr_qtr_year)
            curr_qtr_facts = helpers.safe_groupby(
                df[df['QuarterYear'] == curr_qtr_year],
                self.dimensions + ['QuarterYear'], self.agg)
            last_qtr_facts = helpers.safe_groupby(
                df[df['QuarterYear'] == prev_qtr_year],
                self.dimensions + ['QuarterYear'], self.agg)
            if not last_qtr_facts.empty:
                last_qtr_facts.index = curr_qtr_facts.index

            qoq_facts = (curr_qtr_facts / last_qtr_facts).fillna(
                0
            ) if last_qtr_facts.empty else 100 * curr_qtr_facts / last_qtr_facts
            qoq_facts = qoq_facts.add_prefix("% QOQ ")
            final_df = final_df.append(
                pd.concat([curr_qtr_facts, qoq_facts], axis=1, sort=False))
            s_date = s_date + relativedelta.relativedelta(months=3)

        return final_df

Example #9

0

Show file

    def yoy_fact(self, df):
        if self.dim_filters:
            df = helpers.apply_dim_filters(df, dim_filters=self.dim_filters)

        final_df = pd.DataFrame()
        s_date = self.start_date
        e_date = self.end_date
        n_year = relativedelta.relativedelta(e_date, s_date).years

        # if there is no date or same date then make yoy for last 3 years
        if n_year <= 1:
            n_year = 3
            e_date = datetime.now()
            s_date = e_date - relativedelta.relativedelta(years=3)

        # add 1 to show current year sales also
        for year in range(n_year + 1):
            curr_year = s_date.year
            prev_year = curr_year - 1
            curr_year_facts = helpers.safe_groupby(df[df['Year'] == curr_year],
                                                   self.dimensions + ['Year'],
                                                   self.agg)
            last_year_facts = helpers.safe_groupby(df[df['Year'] == prev_year],
                                                   self.dimensions + ['Year'],
                                                   self.agg)
            if not last_year_facts.empty:
                last_year_facts.index = curr_year_facts.index
            yoy_facts = (curr_year_facts / last_year_facts).fillna(
                0
            ) if last_year_facts.empty else 100 * curr_year_facts / last_year_facts
            yoy_facts = yoy_facts.add_prefix("% YoY ")
            final_df = final_df.append(
                pd.concat([curr_year_facts, yoy_facts], axis=1, sort=False))
            s_date = s_date + relativedelta.relativedelta(years=1)

        return final_df

Example #10

0

Show file

    def contribution(self, df):

        # Raise Error if there is no filters
        if not self.dim_filters and not self.date_condition and not self.dimensions:
            raise NoLeftOperandInPercentage(
                "Could not apply percentage without a filter! Either apply filter in dimension or date"
            )

        all_filtered = df.copy()
        less_filtered = df.copy()
        contribution = pd.DataFrame(
            {'Error': ["Could not applied percentage"]})

        if (self.dim_filters or self.dimensions) and self.date_condition:
            # Do not filter lowest level dimension
            all_filtered = helpers.apply_dim_filters(
                all_filtered, dim_filters=self.dim_filters)
            all_filtered = helpers.apply_date_condition(
                all_filtered, self.date_condition)
            less_filtered = helpers.apply_date_condition(
                less_filtered, self.date_condition)

            all_filtered = helpers.apply_fact_condition(
                all_filtered, self.dimensions, self.fact_condition)
            less_filtered = helpers.apply_fact_condition(
                less_filtered, self.dimensions[1:], self.fact_condition)

            contribution = 100 * all_filtered / less_filtered.iloc[0]
            contribution = contribution.add_prefix("% ")

        elif len(self.dim_filters.keys()) == 1 or self.dimensions:
            # Do not filter lowest level dimension
            less_dim_filters = {
                k: v
                for k, v in self.dim_filters.items()
                if k != list(self.dim_filters.keys())[0]
            }
            if not less_dim_filters:
                less_dim_filters = self.dim_filters
            all_filtered = helpers.apply_dim_filters(
                all_filtered, dim_filters=self.dim_filters)
            less_filtered = helpers.apply_dim_filters(
                less_filtered, dim_filters=less_dim_filters)
            all_filtered = helpers.apply_fact_condition(
                all_filtered, self.dimensions, self.fact_condition)
            less_filtered = helpers.apply_fact_condition(
                less_filtered, self.dimensions[1:], self.fact_condition)

            contribution = 100 * all_filtered / less_filtered.iloc[0]
            contribution = contribution.add_prefix("% ")

        elif len(self.dim_filters.keys()) > 1:
            # Do not filter lowest level dimension
            less_dim_filters = {
                k: v
                for k, v in self.dim_filters.items()
                if k != list(self.dim_filters.keys())[0]
            }
            all_filtered = helpers.apply_dim_filters(
                all_filtered, dim_filters=self.dim_filters)
            less_filtered = helpers.apply_dim_filters(
                less_filtered, dim_filters=less_dim_filters)
            all_filtered = helpers.apply_fact_condition(
                all_filtered, self.dimensions, self.fact_condition)
            less_filtered = helpers.apply_fact_condition(
                less_filtered, self.dimensions[1:], self.fact_condition)

            contribution = 100 * all_filtered / less_filtered
            contribution = contribution.add_prefix("% ")

        elif self.date_condition:
            # Do not filter first level date
            less_date_filters = self.date_condition[1:]
            more_date_filters = self.date_condition[0:1]

            if not len(self.date_condition) % 2:
                less_date_filters = self.date_condition[2:]
                more_date_filters = self.date_condition[0:2]

            all_filtered = helpers.apply_date_condition(
                all_filtered, more_date_filters)
            less_filtered = helpers.apply_date_condition(
                less_filtered, less_date_filters)

            all_filtered = helpers.apply_fact_condition(
                all_filtered, self.dimensions, self.fact_condition)
            less_filtered = helpers.apply_fact_condition(
                less_filtered, self.dimensions[1:], self.fact_condition)

            contribution = 100 * all_filtered / less_filtered
            contribution = contribution.add_prefix("% ")

        return contribution

Example #11

0

Show file

File: actions.py Project: igupta967937/Purchase_Chatbot

def action_po_header(df, entities, *args, **kwargs):
    if not entities['graph']:
        entities['graph'] = ['table']

    if not entities['fact_condition']:

        entities['fact_condition'] = {
            'aggregation': {
                'SubTotal': ['sum']
            },
            'conditions': [{
                'fact_name': 'SubTotal sum',
                'conditions': np.nan,
                'fact_value': np.nan
            }]
        }

        if {
                'ProductName', 'ProductNumber', 'ProductSubcategoryName',
                'ProductCategoryName'
        }.intersection(entities['dim'] + entities['adject'] +
                       list(entities['dim_filters'].keys())):
            entities['fact_condition'] = {
                'aggregation': {
                    'LineTotal': ['sum']
                },
                'conditions': [{
                    'fact_name': 'LineTotal sum',
                    'conditions': np.nan,
                    'fact_value': np.nan
                }]
            }

    df = helpers.apply_date_condition(df, entities['date_condition'])
    df = helpers.apply_dim_filters(df, entities['dim_filters'])
    df = helpers.apply_fact_condition(df, entities['dim'] + entities['adject'],
                                      entities['fact_condition'])
    facts = df.columns.tolist()
    df = df.reset_index().fillna(0)
    if 'index' in df.columns:
        df = df.drop('index', axis=1)
    data = []
    for graph in entities['graph']:
        if not entities['select_upto']:
            if len(df) == 1 and len(df.columns) == 1 and graph == 'text':
                df = "{0}{1}{2} is {3}".format(
                    df.columns[0],
                    helpers.get_date_text(entities['date_condition']),
                    helpers.get_dim_filter_text(entities['dim_filters']),
                    helpers.format_value_to_language(df.iloc[0, 0]))
                table_data = {
                    'chart': graph,
                    'chart_title': 'message',
                    'data': df
                }
                data.append(table_data)
            else:
                table_data = {
                    'chart': graph,
                    'facts': facts,
                    'dimensions': entities['dim'] + entities['adject'],
                    'table': df
                }
                data.append(table_data)
        else:
            # Apply Selection
            final_df = pd.DataFrame()
            for selection in entities['select_upto']:
                table_data = helpers.dynamic_selection_upto(
                    df, entities['dim'] + entities['adject'], facts,
                    selection['selection'], selection['upto'])
                final_df = final_df.append(table_data)
            table_data = {
                'chart': graph,
                'facts': facts,
                'dimensions': entities['dim'] + entities['adject'],
                'table': final_df
            }
            data.append(table_data)
    return data, 1

Example #12

0

Show file

File: actions.py Project: igupta967937/Purchase_Chatbot

def action_product_description(df,
                               entities,
                               page_num=0,
                               length=100,
                               *args,
                               **kwargs):
    df = helpers.apply_date_condition(df, entities['date_condition'])
    df = helpers.apply_dim_filters(df, entities['dim_filters'])

    for col in df:
        dt = df[col].dtype
        if dt == int or dt == float:
            df[col].fillna(0, inplace=True)
        else:
            df[col].fillna("", inplace=True)

    product_group_columns = [
        'ProductID', 'ProductName', 'ProductNumber', 'ProductSubcategoryName',
        'ProductCategoryName'
    ]
    product_agg = {
        'OrderDate': 'max',
        'OrderQty': 'sum',
        'LineTotal': 'sum',
        'ReceivedQty': 'sum',
        'RejectedQty': 'sum'
    }
    product_df = df.groupby(product_group_columns).agg(
        product_agg).reset_index()
    total_pages = len(product_df) // length
    product_df = product_df.sort_values(
        'ProductID',
        ascending=True).iloc[page_num * length:(page_num + 1) * length]

    product_df['LastPurchasedOn'] = product_df['OrderDate']
    product_df.drop('OrderDate', axis=1, inplace=True)
    product_df['ReturnRate'] = (product_df['RejectedQty'] /
                                product_df['OrderQty']).round(2)
    product_df['PendingOrder'] = product_df.apply(lambda x: df[
        (df['Status'] == 'Pending') &
        (df['ProductID'] == x['ProductID'])]['OrderQty'].sum(),
                                                  axis=1)

    product_df['UnitPrice'] = product_df.apply(lambda x: df[
        (df['ProductID'] == x['ProductID']) &
        (df['OrderDate'] == x['LastPurchasedOn'])]['UnitPrice'].mean(),
                                               axis=1)
    product_df = product_df.round(2)

    data = []
    for index, row in product_df.iterrows():
        row_product_details_df = df[df['ProductID'] == row['ProductID']].round(
            2)
    row_product_details_df = row_product_details_df.groupby('Month').agg({
        'LineTotal':
        'sum'
    }).reset_index()
    full_month_names = {k: v['FullMonthName'] for k, v in MONTH_NAMES.items()}
    row_product_details_df = row_product_details_df.replace({
        'Month':
        full_month_names
    }).round(2)
    for col in row_product_details_df:
        dt = row_product_details_df[col].dtype
    if dt == int or dt == float:
        row_product_details_df[col].fillna(0, inplace=True)
    else:
        row_product_details_df[col].fillna("", inplace=True)

    row_product_data = [row_product_details_df.columns.to_list()
                        ] + row_product_details_df.to_numpy().tolist()
    row_data = {
        'ProductData': {
            'type': 'values',
            'value': row.to_dict()
        },
        'ProductTrend': {
            'type': 'line',
            'value': row_product_data
        }
    }
    data.append(row_data)
    if not data:
        return [{
            'chart': 'text',
            'chart_title': 'Message',
            'data': 'No data found!'
        }], 1
    data = [{'chart': 'customProductChart', 'data': data}]
    return data, total_pages

Example #13

0

Show file

File: actions.py Project: igupta967937/Purchase_Chatbot

def action_po_header_details(df,
                             entities,
                             page_num=0,
                             length=100,
                             *args,
                             **kwargs):
    df = helpers.apply_date_condition(df, entities['date_condition'])
    df = helpers.apply_dim_filters(df, entities['dim_filters'])
    total_pages = len(df) // length
    df = df.sort_values('PurchaseOrderID',
                        ascending=False).iloc[page_num *
                                              length:(page_num + 1) * length]
    for col in df:
        dt = df[col].dtype
        if dt == int or dt == float:
            df[col].fillna(0, inplace=True)
        else:
            df[col].fillna("", inplace=True)
    order_group_columns = [
        'PurchaseOrderID', 'EmployeeName', 'JobTitle', 'DepartmentName',
        'VendorAccountNumber', 'VendorName', 'VendorCreditRating', 'Status',
        'ShipMethodName', 'OrderDate', 'ShipDate'
    ]
    order_agg = {
        'SubTotal': 'mean',
        'TaxAmt': 'mean',
        'Freight': 'mean',
        'TotalDue': 'mean'
    }
    order_df = df.groupby(order_group_columns).agg(order_agg)
    order_df = order_df.reset_index()

    order_details_group_columns = [
        'PurchaseOrderID', 'ProductName', 'ProductNumber',
        'ProductSubcategoryName', 'ProductCategoryName'
    ]
    order_details_agg = [
        'OrderQty', 'UnitPrice', 'LineTotal', 'ReceivedQty', 'RejectedQty'
    ]

    order_details_df = df[order_details_group_columns + order_details_agg]

    data = []
    for index, row in order_df.iterrows():
        row_order_details_df = order_details_df[
            order_details_df['PurchaseOrderID'] ==
            row['PurchaseOrderID']].round(2)

        row_order_data = [row_order_details_df.columns.to_list()
                          ] + row_order_details_df.to_numpy().tolist()
        row_data = {
            'PurchaseOrderHeader': {
                'type': 'values',
                'value': row.to_dict()
            },
            'PurchaseOrderDetails': {
                'type': 'table',
                'value': row_order_data
            }
        }
        data.append(row_data)
    if not data:
        return [{
            'chart': 'text',
            'chart_title': 'Message',
            'data': 'No data found!'
        }], 1
    data = [{'chart': 'customPOChart', 'data': data}]
    return data, total_pages