Exemple #1
0
def validate(id):
    """Perform validation check on file.

    Args:
        id (int): ID of file.

    Returns:
        context ({str:data}): information related to the validation of file.
    """

    print("Begining Validation")

    newdoc = ['']  # Future: using array in case user uploads multiple files
    found_mapping = []  # Future: automatic mapping ###

    newdoc[0] = str(File.objects.get(id=id).file)

    # Future: loop here if uploaded multiple files or distrubute with tasks

    df_file = pd.read_csv(newdoc[0])
    error_data, dtypes_dict = generate_error_data(df_file)
    zip_list, summary_results, summary_indexes, remaining_mapping, \
        file_heading_list = get_column_information(df_file, dtypes_dict)

    print("Saving Error Information")
    save_validation_data(error_data, id, dtypes_dict)
    context = {
        'success': 1,
        "found_list": zip_list,
        "summary": zip(summary_indexes, summary_results),
        "missing_list": remaining_mapping
    }

    return context
Exemple #2
0
def find_and_replace(df_data, data):
    """Searches for a value and replaces if indicated"""
    id = data['file_id']
    # Heading of column being where find and replace is being carried out
    heading = data['filter_column_heading']
    # Might be better to just find it throughout df_data

    if data['find_value'] == 'nan':
        filter_applied = df_data[heading].isnull()
    else:
        temp = df_data[heading]
        temp = temp.astype('str').str.lower()
        filter_applied = np.array(temp == str(data['find_value']).lower())

    if data['replace_pressed']:
        df_data[heading][filter_applied] = data['replace_value']
        column_values = df_data[heading][filter_applied]

        if len(column_values) > 0:
            error_data, dtypes_dict = get_dtype_data(id)
            dicts = get_geolocation_dictionary()
            temp_prob_list, temp_error_counter = identify_col_dtype(
                column_values, heading, dicts)
            dtypes_dict[heading][filter_applied] = temp_error_counter
            error_data[heading] = get_prob_list(dtypes_dict[heading])
            save_validation_data(error_data, id, dtypes_dict)
            update_data(File.objects.get(id=id).file, df_data)
        df_data = df_data[df_data[heading] == data['replace_value']]
    else:
        df_data = df_data[filter_applied]
    return df_data
Exemple #3
0
def update(id, data):
    '''Updates cell that user edits.'''
    df_data = get_file_data(id)
    error_data, dtypes_dict = get_dtype_data(id)

    if not data['changeHeader'] == '':
        count = 2
        tmp = data['header_value']
        while tmp in df_data.columns:
            tmp = data['header_value'] + str(count)
            count += 1
        data['header_value'] = tmp
        df_data = df_data.rename(columns={data['header_tobe_changed']: data['header_value']})
        dtypes_dict[data['header_value']] = dtypes_dict[data['header_tobe_changed']]
        dtypes_dict.pop(data['header_tobe_changed'], None)
        error_data[data['header_value']] = error_data[data['header_tobe_changed']]
        error_data.pop(data['header_tobe_changed'], None)
    else:
        heading = data['column']
        line_no = data['line_no']
        df_data[heading][line_no] = data['cell_value']

        prob_list, error_count = update_cell_type(df_data[heading][line_no], dtypes_dict[heading], line_no, heading)
        dtypes_dict[heading] = error_count
        error_data[heading] = prob_list

    save_validation_data(error_data, id, dtypes_dict)
    update_data(File.objects.get(id=id).file, df_data)

    return {'success' : 1}
Exemple #4
0
def delete_data(id, data):
    """Deletes data based on data"""
    df_data = get_file_data(id)
    row_keys = list(map(int, data['row_keys']))
    df_data = df_data.drop(df_data.index[row_keys])
    df_data = df_data.reset_index(drop=True)
    error_data, dtypes_dict = get_dtype_data(id)
    error_data, dtypes_dict = remove_entries(error_data, dtypes_dict, row_keys)
    save_validation_data(error_data, id, dtypes_dict)
    update_data(File.objects.get(id=id).file, df_data)
    return {'success': 1}
Exemple #5
0
def update(id, data):
    """Updates cell that user edits."""
    df_data = get_file_data(id)
    error_data, dtypes_dict = get_dtype_data(id)

    if not data['changeHeader'] == '':
        count = 2
        tmp = data['header_value']
        while tmp in df_data.columns:
            tmp = data['header_value'] + str(count)
            count += 1
        data['header_value'] = tmp
        df_data = df_data.rename(
            columns={data['header_tobe_changed']: data['header_value']})
        dtypes_dict[data['header_value']] = dtypes_dict[
            data['header_tobe_changed']]
        dtypes_dict.pop(data['header_tobe_changed'], None)
        error_data[data['header_value']] = error_data[
            data['header_tobe_changed']]
        error_data.pop(data['header_tobe_changed'], None)
    else:
        heading = data['column']
        line_no = data['line_no']
        df_data[heading][line_no] = data['cell_value']

        # TODO: Logic BUGS check everything why using if like below
        # First logic error on then save_validation_data.
        # When used test code will be to if
        # When used GraphQL will be to else
        if type(error_data[next(iter(error_data))]) == list:
            prob_list, error_count = update_cell_type(
                df_data[heading][line_no], dtypes_dict[heading], line_no,
                heading)
        else:
            prob_list, error_count = update_cell_type(
                df_data[heading][line_no], error_data[heading], line_no,
                heading)

        dtypes_dict[heading] = error_count
        error_data[heading] = prob_list

    save_validation_data(error_data, id, dtypes_dict)
    update_data(File.objects.get(id=id).file, df_data)

    return {'success': 1}