def validate(id): """Perform validation check on file. Args: id (int): ID of file. Returns: context ({str:data}): information related to the validation of file. """ print("Begining Validation") newdoc = [''] # Future: using array in case user uploads multiple files found_mapping = [] # Future: automatic mapping ### newdoc[0] = str(File.objects.get(id=id).file) # Future: loop here if uploaded multiple files or distrubute with tasks df_file = pd.read_csv(newdoc[0]) error_data, dtypes_dict = generate_error_data(df_file) zip_list, summary_results, summary_indexes, remaining_mapping, \ file_heading_list = get_column_information(df_file, dtypes_dict) print("Saving Error Information") save_validation_data(error_data, id, dtypes_dict) context = { 'success': 1, "found_list": zip_list, "summary": zip(summary_indexes, summary_results), "missing_list": remaining_mapping } return context
def find_and_replace(df_data, data): """Searches for a value and replaces if indicated""" id = data['file_id'] # Heading of column being where find and replace is being carried out heading = data['filter_column_heading'] # Might be better to just find it throughout df_data if data['find_value'] == 'nan': filter_applied = df_data[heading].isnull() else: temp = df_data[heading] temp = temp.astype('str').str.lower() filter_applied = np.array(temp == str(data['find_value']).lower()) if data['replace_pressed']: df_data[heading][filter_applied] = data['replace_value'] column_values = df_data[heading][filter_applied] if len(column_values) > 0: error_data, dtypes_dict = get_dtype_data(id) dicts = get_geolocation_dictionary() temp_prob_list, temp_error_counter = identify_col_dtype( column_values, heading, dicts) dtypes_dict[heading][filter_applied] = temp_error_counter error_data[heading] = get_prob_list(dtypes_dict[heading]) save_validation_data(error_data, id, dtypes_dict) update_data(File.objects.get(id=id).file, df_data) df_data = df_data[df_data[heading] == data['replace_value']] else: df_data = df_data[filter_applied] return df_data
def update(id, data): '''Updates cell that user edits.''' df_data = get_file_data(id) error_data, dtypes_dict = get_dtype_data(id) if not data['changeHeader'] == '': count = 2 tmp = data['header_value'] while tmp in df_data.columns: tmp = data['header_value'] + str(count) count += 1 data['header_value'] = tmp df_data = df_data.rename(columns={data['header_tobe_changed']: data['header_value']}) dtypes_dict[data['header_value']] = dtypes_dict[data['header_tobe_changed']] dtypes_dict.pop(data['header_tobe_changed'], None) error_data[data['header_value']] = error_data[data['header_tobe_changed']] error_data.pop(data['header_tobe_changed'], None) else: heading = data['column'] line_no = data['line_no'] df_data[heading][line_no] = data['cell_value'] prob_list, error_count = update_cell_type(df_data[heading][line_no], dtypes_dict[heading], line_no, heading) dtypes_dict[heading] = error_count error_data[heading] = prob_list save_validation_data(error_data, id, dtypes_dict) update_data(File.objects.get(id=id).file, df_data) return {'success' : 1}
def delete_data(id, data): """Deletes data based on data""" df_data = get_file_data(id) row_keys = list(map(int, data['row_keys'])) df_data = df_data.drop(df_data.index[row_keys]) df_data = df_data.reset_index(drop=True) error_data, dtypes_dict = get_dtype_data(id) error_data, dtypes_dict = remove_entries(error_data, dtypes_dict, row_keys) save_validation_data(error_data, id, dtypes_dict) update_data(File.objects.get(id=id).file, df_data) return {'success': 1}
def update(id, data): """Updates cell that user edits.""" df_data = get_file_data(id) error_data, dtypes_dict = get_dtype_data(id) if not data['changeHeader'] == '': count = 2 tmp = data['header_value'] while tmp in df_data.columns: tmp = data['header_value'] + str(count) count += 1 data['header_value'] = tmp df_data = df_data.rename( columns={data['header_tobe_changed']: data['header_value']}) dtypes_dict[data['header_value']] = dtypes_dict[ data['header_tobe_changed']] dtypes_dict.pop(data['header_tobe_changed'], None) error_data[data['header_value']] = error_data[ data['header_tobe_changed']] error_data.pop(data['header_tobe_changed'], None) else: heading = data['column'] line_no = data['line_no'] df_data[heading][line_no] = data['cell_value'] # TODO: Logic BUGS check everything why using if like below # First logic error on then save_validation_data. # When used test code will be to if # When used GraphQL will be to else if type(error_data[next(iter(error_data))]) == list: prob_list, error_count = update_cell_type( df_data[heading][line_no], dtypes_dict[heading], line_no, heading) else: prob_list, error_count = update_cell_type( df_data[heading][line_no], error_data[heading], line_no, heading) dtypes_dict[heading] = error_count error_data[heading] = prob_list save_validation_data(error_data, id, dtypes_dict) update_data(File.objects.get(id=id).file, df_data) return {'success': 1}