def get_and_merge_dictionary(self, df): dic = dct.Dict(self.p[vmc.filenamedict]) err = er.ErrorReport(df, dic, self.p[vmc.placement], self.p[vmc.filenameerror]) dic.auto_functions(err, self.p[vmc.autodicord], self.p[vmc.autodicplace]) df = dic.merge(df, dctc.FPN) return df
def get_dict_order_df(self): self.df = self.get_raw_df() dic = dct.Dict() err = er.ErrorReport(self.df, dic, self.p[vmc.placement], self.p[vmc.filenameerror]) error = dic.split_error_df(err, self.p[vmc.autodicord], self.p[vmc.autodicplace]) return error
def import_plan_data(key, df, plan_omit_list, **kwargs): if df is None or df.empty: df = pd.DataFrame(columns=kwargs[vmc.fullplacename] + [vmc.vendorkey]) df = df.loc[~df[vmc.vendorkey].isin(plan_omit_list)] df = df.loc[:, kwargs[vmc.fullplacename]] df = full_placement_creation(df, key, dctc.FPN, kwargs[vmc.fullplacename]) df = df.drop_duplicates() dic = dct.Dict(kwargs[vmc.filenamedict]) df_fpn = pd.DataFrame(df[dctc.FPN]) er.ErrorReport(df_fpn, dic, None, kwargs[vmc.filenameerror]) merge_col = list(set(dic.data_dict.columns).intersection(df.columns)) dic.data_dict = utl.data_to_type(dic.data_dict, str_col=merge_col) dic.data_dict = dic.data_dict.merge(df, on=merge_col, how='left') dic.apply_functions() dic.data_dict = utl.data_to_type(dic.data_dict, date_col=vmc.datadatecol) return dic.data_dict
def df_single_transform(df, transform): if str(transform) == 'nan': return df transform = transform.split('::') transform_type = transform[0] if transform_type == 'MixedDateColumn': mixed_col = transform[1] date_col = transform[2] df[date_col] = df[mixed_col] df = utl.data_to_type(df, date_col=[date_col]) df['temp'] = df[date_col] df[date_col] = df[date_col].fillna(method='ffill') df = df[df['temp'].isnull()].reset_index(drop=True) df.drop('temp', axis=1, inplace=True) if transform_type == 'Pivot': pivot_col = transform[1] val_col = transform[2].split('|') df = df.fillna(0) index_cols = [x for x in df.columns if x not in val_col + [pivot_col]] df = pd.pivot_table(df, index=index_cols, columns=[pivot_col], aggfunc='sum') if len(val_col) != 1: df.columns = df.columns.map('_'.join) if type(df.columns) == pd.MultiIndex: df.columns = [' - '.join([str(y) for y in x]) for x in df.columns] df = df.reset_index() if transform_type == 'Merge': merge_file = transform[1] left_merge = transform[2] right_merge = transform[3] merge_df = pd.read_csv(merge_file) dfs = {left_merge: df, right_merge: merge_df} for col in dfs: if dfs[col][col].dtype == 'float64': dfs[col][col] = dfs[col][col].fillna(0).astype('int') dfs[col][col] = dfs[col][col].astype('U') dfs[col][col] = dfs[col][col].str.strip('.0') filename = 'Merge-{}-{}.csv'.format(left_merge, right_merge) err = er.ErrorReport(df, merge_df, None, filename, merge_col=[left_merge, right_merge]) df = err.merge_df df = df.drop('_merge', axis=1) if transform_type == 'DateSplit': start_date = transform[1] end_date = transform[2] if len(transform) == 4: exempt_col = transform[3].split('|') else: exempt_col = [] df = utl.data_to_type(df, date_col=[end_date, start_date]) df['days'] = (df[end_date] - df[start_date]).dt.days + 1 n_cols = [ x for x in df.columns if df[x].dtype in ['int64', 'float64'] and x not in exempt_col + ['days'] ] df[n_cols] = df[n_cols].div(df['days'], axis=0) df = df.loc[df.index.repeat(df['days'])] df[start_date] = (df.groupby(level=0)[start_date].transform( lambda x: pd.date_range(start=x.iat[0], periods=len(x)))) df = df.drop('days', axis=1) df = df.reset_index(drop=True) # type: pd.DataFrame if transform_type == 'Stack': header_col_name = transform[1] hold_col_name = transform[2] df.columns = [ df.columns[idx - 1] if 'Unnamed' in x else x for idx, x in enumerate(df.columns) ] hdf = pd.DataFrame(df[hold_col_name]) ndf = pd.DataFrame() for x in set(y for y in df.columns if y != hold_col_name): tdf = df[x] tdf.columns = tdf.loc[0] tdf = tdf.iloc[1:] tdf[header_col_name] = x ndf = ndf.append(tdf) df = pd.concat([ndf, hdf], axis=1, join='inner') df = df.reset_index(drop=True) # type: pd.DataFrame if transform_type == 'Melt': header_col_name = transform[1] variable_cols = transform[2].split('|') df = df.melt(id_vars=[x for x in df.columns if x not in variable_cols], value_vars=[x for x in variable_cols if x in df.columns], var_name='{}-variable'.format(header_col_name), value_name='{}-value'.format(header_col_name)) df = df.reset_index(drop=True) if transform_type == 'RawTranslate': tc = dct.DictTranslationConfig() tc.read(dctc.filename_tran_config) df = tc.apply_translation_to_dict(df) if transform_type == 'AddColumn': col_name = transform[1] col_val = transform[2] df[col_name] = col_val return df