コード例 #1
0
 def get_and_merge_dictionary(self, df):
     dic = dct.Dict(self.p[vmc.filenamedict])
     err = er.ErrorReport(df, dic, self.p[vmc.placement],
                          self.p[vmc.filenameerror])
     dic.auto_functions(err, self.p[vmc.autodicord],
                        self.p[vmc.autodicplace])
     df = dic.merge(df, dctc.FPN)
     return df
コード例 #2
0
 def get_dict_order_df(self):
     self.df = self.get_raw_df()
     dic = dct.Dict()
     err = er.ErrorReport(self.df, dic, self.p[vmc.placement],
                          self.p[vmc.filenameerror])
     error = dic.split_error_df(err, self.p[vmc.autodicord],
                                self.p[vmc.autodicplace])
     return error
コード例 #3
0
def import_plan_data(key, df, plan_omit_list, **kwargs):
    if df is None or df.empty:
        df = pd.DataFrame(columns=kwargs[vmc.fullplacename] + [vmc.vendorkey])
    df = df.loc[~df[vmc.vendorkey].isin(plan_omit_list)]
    df = df.loc[:, kwargs[vmc.fullplacename]]
    df = full_placement_creation(df, key, dctc.FPN, kwargs[vmc.fullplacename])
    df = df.drop_duplicates()
    dic = dct.Dict(kwargs[vmc.filenamedict])
    df_fpn = pd.DataFrame(df[dctc.FPN])
    er.ErrorReport(df_fpn, dic, None, kwargs[vmc.filenameerror])
    merge_col = list(set(dic.data_dict.columns).intersection(df.columns))
    dic.data_dict = utl.data_to_type(dic.data_dict, str_col=merge_col)
    dic.data_dict = dic.data_dict.merge(df, on=merge_col, how='left')
    dic.apply_functions()
    dic.data_dict = utl.data_to_type(dic.data_dict, date_col=vmc.datadatecol)
    return dic.data_dict
コード例 #4
0
def df_single_transform(df, transform):
    if str(transform) == 'nan':
        return df
    transform = transform.split('::')
    transform_type = transform[0]
    if transform_type == 'MixedDateColumn':
        mixed_col = transform[1]
        date_col = transform[2]
        df[date_col] = df[mixed_col]
        df = utl.data_to_type(df, date_col=[date_col])
        df['temp'] = df[date_col]
        df[date_col] = df[date_col].fillna(method='ffill')
        df = df[df['temp'].isnull()].reset_index(drop=True)
        df.drop('temp', axis=1, inplace=True)
    if transform_type == 'Pivot':
        pivot_col = transform[1]
        val_col = transform[2].split('|')
        df = df.fillna(0)
        index_cols = [x for x in df.columns if x not in val_col + [pivot_col]]
        df = pd.pivot_table(df,
                            index=index_cols,
                            columns=[pivot_col],
                            aggfunc='sum')
        if len(val_col) != 1:
            df.columns = df.columns.map('_'.join)
        if type(df.columns) == pd.MultiIndex:
            df.columns = [' - '.join([str(y) for y in x]) for x in df.columns]
        df = df.reset_index()
    if transform_type == 'Merge':
        merge_file = transform[1]
        left_merge = transform[2]
        right_merge = transform[3]
        merge_df = pd.read_csv(merge_file)
        dfs = {left_merge: df, right_merge: merge_df}
        for col in dfs:
            if dfs[col][col].dtype == 'float64':
                dfs[col][col] = dfs[col][col].fillna(0).astype('int')
            dfs[col][col] = dfs[col][col].astype('U')
            dfs[col][col] = dfs[col][col].str.strip('.0')
        filename = 'Merge-{}-{}.csv'.format(left_merge, right_merge)
        err = er.ErrorReport(df,
                             merge_df,
                             None,
                             filename,
                             merge_col=[left_merge, right_merge])
        df = err.merge_df
        df = df.drop('_merge', axis=1)
    if transform_type == 'DateSplit':
        start_date = transform[1]
        end_date = transform[2]
        if len(transform) == 4:
            exempt_col = transform[3].split('|')
        else:
            exempt_col = []
        df = utl.data_to_type(df, date_col=[end_date, start_date])
        df['days'] = (df[end_date] - df[start_date]).dt.days + 1
        n_cols = [
            x for x in df.columns
            if df[x].dtype in ['int64', 'float64'] and x not in exempt_col +
            ['days']
        ]
        df[n_cols] = df[n_cols].div(df['days'], axis=0)
        df = df.loc[df.index.repeat(df['days'])]
        df[start_date] = (df.groupby(level=0)[start_date].transform(
            lambda x: pd.date_range(start=x.iat[0], periods=len(x))))
        df = df.drop('days', axis=1)
        df = df.reset_index(drop=True)  # type: pd.DataFrame
    if transform_type == 'Stack':
        header_col_name = transform[1]
        hold_col_name = transform[2]
        df.columns = [
            df.columns[idx - 1] if 'Unnamed' in x else x
            for idx, x in enumerate(df.columns)
        ]
        hdf = pd.DataFrame(df[hold_col_name])
        ndf = pd.DataFrame()
        for x in set(y for y in df.columns if y != hold_col_name):
            tdf = df[x]
            tdf.columns = tdf.loc[0]
            tdf = tdf.iloc[1:]
            tdf[header_col_name] = x
            ndf = ndf.append(tdf)
        df = pd.concat([ndf, hdf], axis=1, join='inner')
        df = df.reset_index(drop=True)  # type: pd.DataFrame
    if transform_type == 'Melt':
        header_col_name = transform[1]
        variable_cols = transform[2].split('|')
        df = df.melt(id_vars=[x for x in df.columns if x not in variable_cols],
                     value_vars=[x for x in variable_cols if x in df.columns],
                     var_name='{}-variable'.format(header_col_name),
                     value_name='{}-value'.format(header_col_name))
        df = df.reset_index(drop=True)
    if transform_type == 'RawTranslate':
        tc = dct.DictTranslationConfig()
        tc.read(dctc.filename_tran_config)
        df = tc.apply_translation_to_dict(df)
    if transform_type == 'AddColumn':
        col_name = transform[1]
        col_val = transform[2]
        df[col_name] = col_val
    return df