def xml2dict(input_file_name: str, key_tag_name: str, value_tag_name: str, *, progress_indicator=None, estimated_items_count=0) -> (dict, Error): mylog.info("Reading file {0}...".format(input_file_name)) item_list, error = xml2list(input_file_name, progress_indicator=progress_indicator, estimated_items_count=estimated_items_count) if error: return None, Error(str(error) + "File: {0}".format(input_file_name)) print('\n') mylog.info("Building dictionary...".format(input_file_name)) res = {} count = 0 total_item_count = len(item_list) for item in item_list: if progress_indicator: progress_indicator(count, total_item_count) count += 1 if (key_tag_name in item) and (value_tag_name in item): res.update({item[key_tag_name]: item[value_tag_name]}) else: mylog.warning("No tag-value match!") return res, Error(None)
def exclude_data(df, col_name, val_list): if col_name in df.columns.values.tolist(): for val in val_list: # filtered_df = df.loc[df[col_name] != val] filtered_df = df.loc[no_matching(df[col_name], val)] if filtered_df.shape == df.shape: mylog.warning('Warning! Value "{0}" was not found in column "{1}"'.format(val, col_name)) df = filtered_df else: mylog.error('Error! Column "{0}" is not found in excel columns'.format(col_name)) return df
def set_parameter_by_ispn(*, df: pd.DataFrame, destination_col: str, source_cols: tuple, **options): del options ispn = source_cols[0] new_value = source_cols[1] index = df.index[df['Ispn'] == ispn].tolist()[0] if df.at[index, destination_col] != '': mylog.warning("Replacing non-blank value at {0} : {1} to {2}".format( ispn, df.at[index, destination_col], new_value)) df.at[index, destination_col] = new_value
def update_excel_sheet(updated_sheet_name: str, file_name: str, df: pd.DataFrame, prompt=False, convert_strings_to_urls=True) -> Error: original_sheet_list, error = read_sheet_names(file_name) # overwrite first sheet if updates_sheet_name is empty if len(updated_sheet_name) == 0: updated_sheet_name = original_sheet_list[0] if error: # file doesn't exist yet, try to create new mylog.warning("File {0} doesn't exist. Creating new".format(file_name)) error = write_excel(file_name, df, prompt=prompt, convert_strings_to_urls=convert_strings_to_urls, sheet_name=updated_sheet_name) return error else: # read all existing sheets excel_with_sheets_dict = OrderedDict() # reading all sheets for sheet in original_sheet_list: next_sheet, error = read_excel(file_name, replace_nan='', sheet_name=sheet) if error: mylog.error("Can't read {0} - {1}: {2}".format( file_name, sheet, error)) else: excel_with_sheets_dict[sheet] = next_sheet excel_with_sheets_dict[updated_sheet_name] = df mylog.debug("excel_with_sheets_dict={0}".format( list(excel_with_sheets_dict))) error = write_excel(file_name, excel_with_sheets_dict, prompt=prompt, convert_strings_to_urls=convert_strings_to_urls) return error
def filter_and_remove_empty(*, df: pd.DataFrame, destination_col: str, source_cols: tuple, **options): del options if destination_col in df.columns: df.drop(df[~df[destination_col].isin(source_cols)].index, inplace=True) else: mylog.warning( "Column {0} doesn't exist. Can't filter".format(destination_col)) df.dropna(axis=1, how='all', inplace=True) col_list = list(df.columns) for col in col_list: if df[col].nunique() == 1 and df[col].tolist()[0] == '': df.drop(col, 1, inplace=True) mylog.debug("Dropped empty columns. Remaining columns: {0}".format( list(df.columns)))
def set_multiple_parameters_by_ispn(*, df: pd.DataFrame, destination_col: str, source_cols: tuple, **options): del options try: index = df.index[df['Ispn'] == destination_col].tolist()[0] except Exception as e: mylog.error("Invalid Ispn {0}: {1}".format(destination_col, e)) return for param_name, new_value in zip(*[iter(source_cols)] * 2): try: if df.at[index, param_name] != '': mylog.warning( "Replacing non-blank value at {0} : {1} to {2}".format( destination_col, df.at[index, param_name], new_value)) df.at[index, param_name] = new_value except Exception as e: mylog.error("Invalid Parameter '{0}' in {1}: {2}".format( param_name, destination_col, e))
def include_only_data(df, col_name, val_list): if not val_list: mylog.warning('Warning: Empty value list for "{0}"', col_name) return if col_name in df.columns.values.tolist(): orig_df = df.copy() # df = df.loc[df[col_name] == val_list[0]] df = df.loc[matching(df[col_name], val_list[0])] for val in val_list[1:]: # filtered_df = orig_df.loc[orig_df[col_name] == val] filtered_df = orig_df.loc[matching(orig_df[col_name], val)] if filtered_df.empty: mylog.warning('Warning! Value "{0}" was not found in column "{1}"'.format(val, col_name)) df = df.append(filtered_df) else: mylog.error('Error! Column "{0}" is not found in excel columns'.format(col_name)) return df