Esempio n. 1
0
def xml2dict(input_file_name: str,
             key_tag_name: str,
             value_tag_name: str,
             *,
             progress_indicator=None,
             estimated_items_count=0) -> (dict, Error):
    mylog.info("Reading file {0}...".format(input_file_name))
    item_list, error = xml2list(input_file_name,
                                progress_indicator=progress_indicator,
                                estimated_items_count=estimated_items_count)
    if error:
        return None, Error(str(error) + "File: {0}".format(input_file_name))

    print('\n')
    mylog.info("Building dictionary...".format(input_file_name))

    res = {}
    count = 0
    total_item_count = len(item_list)
    for item in item_list:

        if progress_indicator:
            progress_indicator(count, total_item_count)
            count += 1

        if (key_tag_name in item) and (value_tag_name in item):
            res.update({item[key_tag_name]: item[value_tag_name]})
        else:
            mylog.warning("No tag-value match!")

    return res, Error(None)
Esempio n. 2
0
def exclude_data(df, col_name, val_list):
    if col_name in df.columns.values.tolist():
        for val in val_list:
            # filtered_df = df.loc[df[col_name] != val]
            filtered_df = df.loc[no_matching(df[col_name], val)]

            if filtered_df.shape == df.shape:
                mylog.warning('Warning! Value "{0}" was not found in column "{1}"'.format(val, col_name))
            df = filtered_df
    else:
        mylog.error('Error! Column "{0}" is not found in excel columns'.format(col_name))

    return df
def set_parameter_by_ispn(*, df: pd.DataFrame, destination_col: str,
                          source_cols: tuple, **options):
    del options

    ispn = source_cols[0]
    new_value = source_cols[1]

    index = df.index[df['Ispn'] == ispn].tolist()[0]

    if df.at[index, destination_col] != '':
        mylog.warning("Replacing non-blank value at {0} : {1} to {2}".format(
            ispn, df.at[index, destination_col], new_value))

    df.at[index, destination_col] = new_value
Esempio n. 4
0
def update_excel_sheet(updated_sheet_name: str,
                       file_name: str,
                       df: pd.DataFrame,
                       prompt=False,
                       convert_strings_to_urls=True) -> Error:
    original_sheet_list, error = read_sheet_names(file_name)

    #  overwrite first sheet if updates_sheet_name is empty
    if len(updated_sheet_name) == 0:
        updated_sheet_name = original_sheet_list[0]

    if error:
        # file doesn't exist yet, try to create new
        mylog.warning("File {0} doesn't exist. Creating new".format(file_name))
        error = write_excel(file_name,
                            df,
                            prompt=prompt,
                            convert_strings_to_urls=convert_strings_to_urls,
                            sheet_name=updated_sheet_name)
        return error
    else:
        # read all existing sheets
        excel_with_sheets_dict = OrderedDict()

        # reading all sheets
        for sheet in original_sheet_list:
            next_sheet, error = read_excel(file_name,
                                           replace_nan='',
                                           sheet_name=sheet)
            if error:
                mylog.error("Can't read {0} - {1}: {2}".format(
                    file_name, sheet, error))
            else:
                excel_with_sheets_dict[sheet] = next_sheet

        excel_with_sheets_dict[updated_sheet_name] = df

        mylog.debug("excel_with_sheets_dict={0}".format(
            list(excel_with_sheets_dict)))
        error = write_excel(file_name,
                            excel_with_sheets_dict,
                            prompt=prompt,
                            convert_strings_to_urls=convert_strings_to_urls)

        return error
def filter_and_remove_empty(*, df: pd.DataFrame, destination_col: str,
                            source_cols: tuple, **options):
    del options

    if destination_col in df.columns:
        df.drop(df[~df[destination_col].isin(source_cols)].index, inplace=True)
    else:
        mylog.warning(
            "Column {0} doesn't exist. Can't filter".format(destination_col))

    df.dropna(axis=1, how='all', inplace=True)

    col_list = list(df.columns)
    for col in col_list:
        if df[col].nunique() == 1 and df[col].tolist()[0] == '':
            df.drop(col, 1, inplace=True)

    mylog.debug("Dropped empty columns. Remaining columns: {0}".format(
        list(df.columns)))
def set_multiple_parameters_by_ispn(*, df: pd.DataFrame, destination_col: str,
                                    source_cols: tuple, **options):
    del options
    try:
        index = df.index[df['Ispn'] == destination_col].tolist()[0]
    except Exception as e:
        mylog.error("Invalid Ispn {0}: {1}".format(destination_col, e))
        return

    for param_name, new_value in zip(*[iter(source_cols)] * 2):
        try:
            if df.at[index, param_name] != '':
                mylog.warning(
                    "Replacing non-blank value at {0} : {1} to {2}".format(
                        destination_col, df.at[index, param_name], new_value))
            df.at[index, param_name] = new_value
        except Exception as e:
            mylog.error("Invalid Parameter '{0}' in {1}: {2}".format(
                param_name, destination_col, e))
Esempio n. 7
0
def include_only_data(df, col_name, val_list):
    if not val_list:
        mylog.warning('Warning: Empty value list for "{0}"', col_name)
        return

    if col_name in df.columns.values.tolist():
        orig_df = df.copy()

        # df = df.loc[df[col_name] == val_list[0]]
        df = df.loc[matching(df[col_name], val_list[0])]

        for val in val_list[1:]:

            # filtered_df = orig_df.loc[orig_df[col_name] == val]
            filtered_df = orig_df.loc[matching(orig_df[col_name], val)]

            if filtered_df.empty:
                mylog.warning('Warning! Value "{0}" was not found in column "{1}"'.format(val, col_name))
            df = df.append(filtered_df)
    else:
        mylog.error('Error! Column "{0}" is not found in excel columns'.format(col_name))

    return df