def test_serializer_with_record_id(): data_dictionary = [{ "field_name": "patient_id", "form_name": "demographics", "field_type": "text" }, { "field_name": "gender", "form_name": "demographics", "field_type": "radio", "choices": "1, female | 2, male | 3, unknown | 4, unspecified | 5, not reported" }] data_dictionary = [ RedcapField.from_json(field) for field in data_dictionary ] project_info = { 'secondary_unique_field': [], 'record_autonumbering_enabled': 0, 'next_record_name': 1, 'repeatable_instruments': [], } records = [{ "patient_id": "123", "gender": "male" }, { "patient_id": "456", "gender": "female" }] records = pd.DataFrame(records) records.fillna('', inplace=True) encoded_rows = serializer.encode_sheet(data_dictionary, project_info, records) encoded_rows = encoded_rows.sort_index(axis=1) d = { 'patient_id': ["123", "456"], 'redcap_repeat_instrument': ['', ''], 'redcap_repeat_instance': ['', ''], 'gender': ['2', '1'] } expected = pd.DataFrame(data=d) expected = expected.sort_index(axis=1) assert len(encoded_rows) == 2 assert_frame_equal(encoded_rows, expected, check_dtype=False)
def encode_records(): form = request.form.to_dict() csv_headers = json.loads(form.get('csvHeaders')) malformed_sheets = json.loads(form.get('malformedSheets', '[]')) decoded_records = json.loads(form.get('decodedRecords')) matching_repeat_instances = json.loads(form.get('matchingRepeatInstances')) matching_record_ids = json.loads(form.get('matchingRecordIds')) project_info = json.loads(form.get('projectInfo')) json_data = json.loads(form.get('jsonData'), object_pairs_hook=OrderedDict) data_dictionary = [ RedcapField.from_json(field) for field in json.loads(form.get('ddData')) ] records = {} for sheet in json_data: frame = pd.DataFrame(json_data[sheet]) frame = frame[csv_headers[sheet]] frame.fillna('', inplace=True) records[sheet] = frame datafile_errors = linter.lint_datafile(data_dictionary, project_info, records) cells_with_errors = datafile_errors['cells_with_errors'] rows_in_error = utils.get_rows_with_errors(cells_with_errors, records) options = { 'rows_in_error': rows_in_error, 'decoded_records': decoded_records, 'matching_repeat_instances': matching_repeat_instances, 'matching_record_ids': matching_record_ids } encoded_records = serializer.encode_datafile(data_dictionary, project_info, records, options) json_data = {} output_records = {} encoded_record_headers = {} for sheet_name in encoded_records: if malformed_sheets and sheet_name in malformed_sheets: continue output_records[sheet_name] = json.loads( encoded_records[sheet_name].to_json(orient='records')) encoded_record_headers[sheet_name] = list( encoded_records[sheet_name].columns) results = { 'encodedRecords': output_records, 'encodedRecordHeaders': encoded_record_headers, } return flask.jsonify(results)
def test_integer_validation_with_range(client): """Test date validation""" data_dictionary = [{ "field_name": "record_id", "form_name": "demographics", "field_type": "text" }, { "field_name": "number", "form_name": "demographics", "field_type": "text", "text_validation": "integer", "text_min": "1", "text_max": "5", }] data_dictionary = [ RedcapField.from_json(field) for field in data_dictionary ] project_info = { 'secondary_unique_field': [], 'record_autonumbering_enabled': 1, 'next_record_name': 1, 'repeatable_instruments': [], } records = [ { "number": "1" }, { "number": "6" }, ] records = pd.DataFrame(records) records.fillna('', inplace=True) results = linter.lint_sheet(data_dictionary, project_info, records) assert len(results['all_errors']) == 1 assert results['all_errors'][ 0] == "6 did not pass date validation integer. Min: 1 | Max: 5"
def test_date_validation_failure(client): """Test date validation""" data_dictionary = [{ "field_name": "record_id", "form_name": "demographics", "field_type": "text" }, { "field_name": "treatment_dx", "form_name": "treatment", "field_type": "text", "text_validation": "date_mdy" }] data_dictionary = [ RedcapField.from_json(field) for field in data_dictionary ] project_info = { 'secondary_unique_field': [], 'record_autonumbering_enabled': 1, 'next_record_name': 1, 'repeatable_instruments': [], } records = [ { "treatment_dx": "abcd" }, { "treatment_dx": "05-05-2018" }, ] records = pd.DataFrame(records) records.fillna('', inplace=True) results = linter.lint_sheet(data_dictionary, project_info, records) assert len(results['all_errors']) == 1
def test_validation_permissible_value_failure(client): """Test date validation""" data_dictionary = [{ "field_name": "record_id", "form_name": "demographics", "field_type": "text" }, { "field_name": "gender", "form_name": "demographics", "field_type": "radio", "choices": "1, female | 2, male | 3, unknown | 4, unspecified | 5, not reported" }] data_dictionary = [ RedcapField.from_json(field) for field in data_dictionary ] project_info = { 'secondary_unique_field': [], 'record_autonumbering_enabled': 1, 'next_record_name': 1, 'repeatable_instruments': [], } records = [{"gender": "dog"}, {"gender": "male"}] records = pd.DataFrame(records) records.fillna('', inplace=True) results = linter.lint_sheet(data_dictionary, project_info, records) assert len(results['all_errors']) == 1 assert "dog not found in Permissible Values" in results['all_errors'][0]
def test_serializer_with_repeatable_instrument_and_matching_repeat_instances(): data_dictionary = [ { "field_name": "patient_id", "form_name": "demographics", "field_type": "text" }, { "field_name": "gender", "form_name": "demographics", "field_type": "radio", "choices": "1, female | 2, male | 3, unknown | 4, unspecified | 5, not reported" }, { "field_name": "treatment_dx", "form_name": "treatment", "field_type": "text", }, { "field_name": "treatment", "form_name": "treatment", "field_type": "radio", "choices": "1, chemotherapy | 2, immunotherapy" }, ] data_dictionary = [ RedcapField.from_json(field) for field in data_dictionary ] project_info = { 'secondary_unique_field': [], 'record_autonumbering_enabled': 0, 'next_record_name': 1, 'repeatable_instruments': ['treatment'], } records = [{ "patient_id": "123", "gender": "male", "treatment_dx": "2018-09-01", "treatment": "chemotherapy" }, { "patient_id": "123", "gender": "male", "treatment_dx": "2019-09-01", "treatment": "immunotherapy" }] records = pd.DataFrame(records) records.fillna('', inplace=True) options = {'matching_repeat_instances': {'0': {'treatment': 7}}} encoded_rows = serializer.encode_sheet(data_dictionary, project_info, records, options) encoded_rows = encoded_rows.sort_index(axis=1) d = { 'patient_id': ['123', '123', '123'], 'redcap_repeat_instrument': ['', 'treatment', 'treatment'], 'redcap_repeat_instance': ['', 7, 2], 'gender': ['2', '', ''], 'treatment': ['', '1', '2'], 'treatment_dx': ['', '2018-09-01', '2019-09-01'] } expected = pd.DataFrame(data=d) expected = expected.sort_index(axis=1) assert len(encoded_rows) == 3 assert_frame_equal(encoded_rows, expected, check_dtype=False, check_like=True)
def save_fields(): form = request.form.to_dict() data_field_to_redcap_field_map = json.loads( form.get('dataFieldToRedcapFieldMap')) csv_headers = json.loads(form.get('csvHeaders')) existing_records = json.loads(form.get('existingRecords')) recordid_field = json.loads(form.get('recordidField')) project_info = json.loads(form.get('projectInfo')) token = json.loads(form.get('token')) env = json.loads(form.get('env')) json_data = json.loads(form.get('jsonData'), object_pairs_hook=OrderedDict) records = {} for sheet in json_data: matched_field_dict = data_field_to_redcap_field_map.get(sheet, {}) csv_headers[sheet] = [ matched_field_dict.get(c) or c for c in csv_headers[sheet] if matched_field_dict.get(c) != '' ] frame = pd.DataFrame(json_data[sheet]) frame.fillna('', inplace=True) frame = frame.rename(index=str, columns=matched_field_dict) frame = frame[csv_headers[sheet]] records[sheet] = frame dd_data = json.loads(form.get('ddData')) dd = [RedcapField.from_json(field) for field in dd_data] if token: redcap_api = RedcapApi(env) if project_info['record_autonumbering_enabled'] == 1: if not project_info.get('secondary_unique_field'): existing_records = None else: secondary_unique_field_values = utils.get_field_values( project_info.get('secondary_unique_field', []), records) options = { 'secondary_unique_field': project_info.get('secondary_unique_field', []), 'secondary_unique_field_values': secondary_unique_field_values } existing_records = redcap_api.export_records(token, options) record_ids = [] for r in existing_records: record_id = r[recordid_field] record_id = str(int(record_id)) if isinstance( record_id, float) and record_id.is_integer() else record_id record_ids.append(record_id) options = {'records': record_ids} existing_records = redcap_api.export_records(token, options) else: record_ids = utils.get_field_values([recordid_field], records) options = {'records': record_ids} existing_records = redcap_api.export_records(token, options) datafile_errors = linter.lint_datafile(dd, project_info, records) cells_with_errors = datafile_errors['cells_with_errors'] rows_in_error = utils.get_rows_with_errors(cells_with_errors, records) columns_in_error = utils.get_columns_with_errors(cells_with_errors, records) all_errors = [{ "Error": error } for error in datafile_errors['linting_errors']] json_data = {} for sheet_name, sheet in records.items(): json_data[sheet_name] = json.loads( sheet.to_json(orient='records', date_format='iso')) cells_with_errors[sheet_name] = json.loads( cells_with_errors[sheet_name].to_json(orient='records')) records_to_reconcile = {} if existing_records: for record in existing_records: if record.get(recordid_field): if not records_to_reconcile.get(record[recordid_field]): records_to_reconcile[record[recordid_field]] = [] records_to_reconcile[record[recordid_field]].append(record) decoded_records = {} for recordid, encoded_rows in records_to_reconcile.items(): decoded_rows = serializer.decode_sheet(dd, encoded_rows) decoded_records[recordid] = decoded_rows results = { 'jsonData': json_data, 'rowsInError': rows_in_error, 'cellsWithErrors': cells_with_errors, 'allErrors': all_errors, 'csvHeaders': csv_headers, 'existingRecords': existing_records, 'columnsInError': columns_in_error, 'decodedRecords': decoded_records, 'fieldsSaved': True, } return flask.jsonify(results)
def post_form(): form = request.form.to_dict() datafile_name = form.get('dataFileName') # records = pd.read_excel(request.files['dataFile'], sheet_name=None) records = utils.read_spreadsheet(request.files['dataFile'], datafile_name) date_cols = [] if datafile_name.endswith('.xlsx') or datafile_name.endswith('.xls'): records_with_format = load_workbook(request.files['dataFile']) for sheet in records_with_format.sheetnames: for row in records_with_format[sheet].iter_rows(min_row=2): for cell in row: # MRN column_letter = get_column_letter(cell.column) column_header = records_with_format[sheet][column_letter + '1'].value if column_header in records[ sheet].columns and cell.number_format == '00000000': current_list = list(records[sheet][column_header]) current_list = [ str(i).rjust(8, '0') if isinstance(i, int) else i for i in current_list ] records[sheet][column_header] = current_list if column_header in records[ sheet].columns and cell.number_format == 'mm-dd-yy': date_cols.append(column_header) current_list = list(records[sheet][column_header]) current_list = [ i.strftime('%m/%d/%Y') if isinstance(i, datetime) and not pd.isnull(i) else i for i in current_list ] records[sheet][column_header] = current_list break token = form.get('token') env = form.get('env') mappings = None existing_records = None form_names = set() form_name_to_dd_fields = {} data_field_to_redcap_field_map = {} data_field_to_choice_map = {} original_to_correct_value_map = {} no_match_redcap_fields = [] if 'mappingsFile' in request.files: mappings = pd.read_excel(request.files['mappingsFile'], sheet_name="Sheet1") if list(mappings["dataFieldToRedcapFieldMap"]): data_field_to_redcap_field_map = json.loads( list(mappings["dataFieldToRedcapFieldMap"])[0]) if list(mappings["dataFieldToChoiceMap"]): data_field_to_choice_map = json.loads( list(mappings["dataFieldToChoiceMap"])[0]) if list(mappings["originalToCorrectedValueMap"]): original_to_correct_value_map = json.loads( list(mappings["originalToCorrectedValueMap"])[0]) if list(mappings["noMatchRedcapFields"]): no_match_redcap_fields = json.loads( list(mappings["noMatchRedcapFields"])[0]) redcap_api = RedcapApi(env) project_info = { 'secondary_unique_field': '', 'record_autonumbering_enabled': 0, 'repeatable_instruments': [], 'next_record_name': 1 } data_dictionary = None existing_records = None if token: try: data_dictionary = redcap_api.fetch_data_dictionary(token) project_info = redcap_api.fetch_project_info(token) project_info[ 'next_record_name'] = redcap_api.generate_next_record_name( token) if project_info.get('secondary_unique_field'): project_info['secondary_unique_field'] = [ project_info.get('secondary_unique_field') ] if project_info['has_repeating_instruments_or_events'] == 1: repeatable_instruments = redcap_api.fetch_repeatable_instruments( token) project_info['repeatable_instruments'] = [ i['form_name'] for i in repeatable_instruments ] if project_info['record_autonumbering_enabled'] == 0: data_dictionary[0]['required'] = 'Y' dd = [RedcapField.from_json(field) for field in data_dictionary] except Exception as e: logging.warning(e) results = {'error': "Error: {0}".format(e)} response = flask.jsonify(results) response.headers.add('Access-Control-Allow-Origin', '*') return response else: data_dictionary_name = form.get('dataDictionaryName') if data_dictionary_name.endswith('.csv'): dd_df = pd.read_csv(request.files['dataDictionary']) dd_df.fillna('', inplace=True) elif data_dictionary_name.endswith( '.xlsx') or data_dictionary_name.endswith('.xls'): dd_df = pd.read_excel(request.files['dataDictionary']) dd = [ RedcapField.from_data_dictionary(dd_df, field) for field in list(dd_df['Variable / Field Name']) ] if dd[0].field_name == 'record_id': project_info['record_autonumbering_enabled'] = 1 if 'existingRecordsFile' in request.files: existing_records = pd.read_csv( request.files['existingRecordsFile']) existing_records = json.loads( existing_records.to_json(orient='records', date_format='iso')) all_csv_headers = [] dd_headers = [] dd_data = {} dd_data_raw = {} if data_dictionary is not None: dd_headers = list(data_dictionary[0].keys()) dd_data_raw = data_dictionary else: dd_headers = list(dd_df.columns) dd_data_raw = json.loads( dd_df.to_json(orient='records', date_format='iso')) dd_data = [field.__dict__ for field in dd] for dd_field in dd: if not form_name_to_dd_fields.get(dd_field.form_name): form_name_to_dd_fields[dd_field.form_name] = [] form_name_to_dd_fields.get(dd_field.form_name).append( dd_field.field_name) form_names.add(dd_field.form_name) recordid_field = dd[0].field_name form_names = list(form_names) for sheet_name, sheet in records.items(): all_csv_headers += list(sheet.columns) all_csv_headers = [i for i in all_csv_headers if 'Unnamed' not in i] all_field_names = [f.field_name for f in dd] redcap_field_candidates = {} data_field_candidates = {} csv_headers = {} fields_not_in_redcap = {} duplicate_fields = {} for sheet_name, sheet in records.items(): duplicate_fields[sheet_name] = {} # Remove empty rows sheet.dropna(axis=0, how='all', inplace=True) csv_headers[sheet_name] = list(sheet.columns) csv_headers[sheet_name] = [ item for item in csv_headers[sheet_name] if 'Unnamed' not in item ] for header in csv_headers[sheet_name]: duplicate_fields[sheet_name][ header] = duplicate_fields[sheet_name].get(header, 0) + 1 duplicate_fields[sheet_name] = [ k for k, v in duplicate_fields[sheet_name].items() if v > 1 ] normalized_headers = utils.parameterize_list(csv_headers[sheet_name]) fields_not_in_redcap[sheet_name] = [ header for header, normalized_header in zip( csv_headers[sheet_name], normalized_headers) if normalized_header not in all_field_names ] all_csv_headers = list(set(all_csv_headers)) unmatched_data_fields = {} for sheet in csv_headers: data_field_to_redcap_field_map[ sheet] = data_field_to_redcap_field_map.get(sheet, {}) unmatched_data_fields[sheet] = unmatched_data_fields.get(sheet, []) for header in csv_headers[sheet]: normalized_header = utils.parameterize(header) if data_field_to_redcap_field_map[sheet].get(header): continue if normalized_header in all_field_names: data_field_to_redcap_field_map[sheet][ header] = normalized_header else: unmatched_data_fields[sheet].append(header) selected_columns = {} matched_redcap_fields = [] matched_redcap_fields += no_match_redcap_fields for sheet_name, field_map in data_field_to_redcap_field_map.items(): selected_columns[sheet_name] = field_map.keys() matched_redcap_fields += field_map.values() unmatched_redcap_fields = [ f for f in all_field_names if f not in matched_redcap_fields and f != 'record_id' ] for f1 in all_field_names: dd_field = [f for f in dd_data if f['field_name'] == f1][0] redcap_field_candidates[f1] = [] for sheet in csv_headers: for f2 in csv_headers[sheet]: redcap_field_candidates[f1].append({ 'candidate': f2, 'sheets': [sheet], 'score': max(fuzz.token_set_ratio(f1, f2), fuzz.token_set_ratio(dd_field['field_label'], f2)) }) for sheet in csv_headers: for f1 in csv_headers[sheet]: if data_field_candidates.get(f1): continue data_field_candidates[f1] = [] for f2 in all_field_names: dd_field = [f for f in dd_data if f['field_name'] == f2][0] data_field_candidates[f1].append({ 'candidate': f2, 'form_name': dd_field['form_name'], 'score': max(fuzz.token_set_ratio(f1, f2), fuzz.token_set_ratio(dd_field['field_label'], f1)) }) malformed_sheets = [] form_names = [redcap_field.form_name for redcap_field in dd] form_names = list(set(form_names)) for sheet_name in records.keys(): sheet = records.get(sheet_name) redcap_field_names = [f.field_name for f in dd] matching_fields = [f for f in sheet.columns if f in redcap_field_names] if not matching_fields and not data_field_to_redcap_field_map.get( sheet_name): malformed_sheets.append(sheet_name) json_data = {} for sheet_name, sheet in records.items(): json_data[sheet_name] = json.loads( sheet.to_json(orient='records', date_format='iso')) results = { 'csvHeaders': csv_headers, 'jsonData': json_data, 'ddHeaders': dd_headers, 'ddData': dd_data, 'ddDataRaw': dd_data_raw, 'formNames': form_names, 'dateColumns': date_cols, 'duplicateFields': duplicate_fields, 'malformedSheets': malformed_sheets, 'recordFieldsNotInRedcap': fields_not_in_redcap, 'formNameToDdFields': form_name_to_dd_fields, 'projectInfo': project_info, 'existingRecords': existing_records, 'recordidField': recordid_field, 'redcapFieldCandidates': redcap_field_candidates, 'dataFieldCandidates': data_field_candidates, 'unmatchedRedcapFields': unmatched_redcap_fields, 'unmatchedDataFields': unmatched_data_fields, 'dataFileName': datafile_name, 'token': token, } if data_field_to_redcap_field_map: results['dataFieldToRedcapFieldMap'] = data_field_to_redcap_field_map if data_field_to_choice_map: results['dataFieldToChoiceMap'] = data_field_to_choice_map if original_to_correct_value_map: results['originalToCorrectedValueMap'] = original_to_correct_value_map if no_match_redcap_fields: results['noMatchRedcapFields'] = no_match_redcap_fields response = flask.jsonify(results) return response
def resolve_merge_row(): form = request.form.to_dict() csv_headers = json.loads(form.get('csvHeaders')) # Working column is the column being saved action = json.loads(form.get('action', '""')) next_merge_row = json.loads(form.get('nextMergeRow', '-1')) next_sheet_name = json.loads(form.get('nextSheetName', '""')) working_merge_row = json.loads(form.get('workingMergeRow', '-1')) working_sheet_name = json.loads(form.get('workingSheetName', '""')) merge_map = json.loads(form.get('mergeMap', '{}')) merge_conflicts = json.loads(form.get('mergeConflicts', '{}')) project_info = json.loads(form.get('projectInfo', '{}')) json_data = json.loads(form.get('jsonData'), object_pairs_hook=OrderedDict) data_dictionary = [ RedcapField.from_json(field) for field in json.loads(form.get('ddData')) ] row_merge_map = merge_map.get(working_sheet_name, {}).get(str(working_merge_row), {}) records = {} for sheet in json_data: frame = pd.DataFrame(json_data[sheet]) frame = frame[csv_headers[sheet]] frame.fillna('', inplace=True) if sheet == working_sheet_name: for field in row_merge_map: dd_field = [ f for f in data_dictionary if f.field_name == field ][0] value = row_merge_map[field] if dd_field.text_validation == 'integer': value = int(value) if value else value elif dd_field.text_validation == 'number_2dp': value = float(value) if value else value frame.iloc[working_merge_row, frame.columns.get_loc(field)] = value records[sheet] = frame if working_sheet_name and merge_conflicts and merge_conflicts[ working_sheet_name]: del merge_conflicts[working_sheet_name][str(working_merge_row)] datafile_errors = linter.lint_datafile(data_dictionary, project_info, records) cells_with_errors = datafile_errors['cells_with_errors'] all_errors = [{ "Error": error } for error in datafile_errors['linting_errors']] json_data = {} for sheet_name, sheet in records.items(): json_data[sheet_name] = json.loads( sheet.to_json(orient='records', date_format='iso')) cells_with_errors[sheet_name] = json.loads( cells_with_errors[sheet_name].to_json(orient='records')) results = { 'jsonData': json_data, 'allErrors': all_errors, 'mergeMap': merge_map, 'mergeConflicts': merge_conflicts, 'cellsWithErrors': cells_with_errors, } if action == 'continue': results['workingMergeRow'] = next_merge_row results['workingSheetName'] = next_sheet_name return flask.jsonify(results)
def resolve_column(): form = request.form.to_dict() csv_headers = json.loads(form.get('csvHeaders')) action = json.loads(form.get('action', '""')) next_column = json.loads(form.get('nextColumn', '""')) next_sheet_name = json.loads(form.get('nextSheetName', '""')) working_column = json.loads(form.get('workingColumn', '""')) working_sheet_name = json.loads(form.get('workingSheetName', '""')) data_field_to_choice_map = json.loads( form.get('dataFieldToChoiceMap', '{}')) original_to_correct_value_map = json.loads( form.get('originalToCorrectedValueMap', '{}')) json_data = json.loads(form.get('jsonData'), object_pairs_hook=OrderedDict) transform_map = {} has_transforms = False for sheet in json_data: transform_map[sheet] = { **data_field_to_choice_map.get(working_sheet_name, {}).get( working_column, {}), **original_to_correct_value_map.get(working_sheet_name, {}).get( working_column, {}) } if transform_map[sheet]: has_transforms = True data_dictionary = [ RedcapField.from_json(field) for field in json.loads(form.get('ddData')) ] records = {} for sheet in json_data: frame = pd.DataFrame(json_data[sheet]) frame = frame[csv_headers[sheet]] frame.fillna('', inplace=True) if sheet == working_sheet_name: new_list = [] for field in list(frame[working_column]): new_value = transform_map[sheet].get(str(field)) or field if isinstance(new_value, list): new_value = ', '.join([str(i) for i in new_value]) new_list.append(new_value) frame[working_column] = new_list records[sheet] = frame project_info = json.loads(form.get('projectInfo', '{}')) field_errors = {} if next_column: field_errors = calculate_field_errors(next_column, next_sheet_name, data_dictionary, records) row_info = {} json_data = {} next_row = -1 for sheet_name, sheet in records.items(): json_data[sheet_name] = json.loads( sheet.to_json(orient='records', date_format='iso')) results = { 'jsonData': json_data, } if has_transforms: datafile_errors = linter.lint_datafile(data_dictionary, project_info, records) cells_with_errors = datafile_errors['cells_with_errors'] rows_in_error = utils.get_rows_with_errors(cells_with_errors, records) columns_in_error = utils.get_columns_with_errors( cells_with_errors, records) for sheet_name, sheet in records.items(): cells_with_errors[sheet_name] = json.loads( cells_with_errors[sheet_name].to_json(orient='records')) #TODO Check if next_row and next_row is still in error if not next_column and rows_in_error: next_sheet_name = list(rows_in_error.keys())[0] next_row = rows_in_error[next_sheet_name][0] row_info = calculate_row_info(next_row, next_sheet_name, data_dictionary, records) all_errors = [{ "Error": error } for error in datafile_errors['linting_errors']] results['allErrors'] = all_errors results['columnsInError'] = columns_in_error results['cellsWithErrors'] = cells_with_errors results['rowsInError'] = rows_in_error if action == 'continue': results['workingColumn'] = next_column results['workingSheetName'] = next_sheet_name results['fieldErrors'] = field_errors results['rowInfo'] = row_info results['workingRow'] = next_row return flask.jsonify(results)
def resolve_row(): form = request.form.to_dict() csv_headers = json.loads(form.get('csvHeaders')) action = json.loads(form.get('action', '""')) next_row = json.loads(form.get('nextRow', '-1')) next_sheet_name = json.loads(form.get('nextSheetName', '""')) working_row = json.loads(form.get('workingRow', '-1')) working_sheet_name = json.loads(form.get('workingSheetName', '""')) field_to_value_map = json.loads(form.get('fieldToValueMap')) json_data = json.loads(form.get('jsonData'), object_pairs_hook=OrderedDict) data_dictionary = [ RedcapField.from_json(field) for field in json.loads(form.get('ddData')) ] value_map = field_to_value_map.get(working_sheet_name, {}).get(str(working_row), {}) records = {} for sheet in json_data: frame = pd.DataFrame(json_data[sheet]) frame = frame[csv_headers[sheet]] frame.fillna('', inplace=True) if sheet == working_sheet_name: for field in value_map: dd_field = [ f for f in data_dictionary if f.field_name == field ][0] value = value_map[field] if dd_field.text_validation == 'integer': value = int(value) if value else value elif dd_field.text_validation == 'number_2dp': value = float(value) if value else value frame.iloc[working_row, frame.columns.get_loc(field)] = value records[sheet] = frame project_info = json.loads(form.get('projectInfo')) datafile_errors = linter.lint_datafile(data_dictionary, project_info, records) cells_with_errors = datafile_errors['cells_with_errors'] # TODO Figure out why errors on malformed sheets get added here rows_in_error = utils.get_rows_with_errors(cells_with_errors, records) columns_in_error = utils.get_columns_with_errors(cells_with_errors, records) #TODO Check if next_column and next_column is still in error row_info = {} field_errors = {} next_column = '' if next_row >= 0: row_info = calculate_row_info(next_row, next_sheet_name, data_dictionary, records) elif columns_in_error: # Get next column next_sheet_name = list(columns_in_error.keys())[0] next_column = columns_in_error[next_sheet_name][0] field_errors = calculate_field_errors(next_column, next_sheet_name, data_dictionary, records) all_errors = [{ "Error": error } for error in datafile_errors['linting_errors']] json_data = {} for sheet_name, sheet in records.items(): json_data[sheet_name] = json.loads( sheet.to_json(orient='records', date_format='iso')) cells_with_errors[sheet_name] = json.loads( cells_with_errors[sheet_name].to_json(orient='records')) results = { 'jsonData': json_data, 'allErrors': all_errors, 'rowsInError': rows_in_error, 'columnsInError': columns_in_error, 'rowInfo': row_info, 'cellsWithErrors': cells_with_errors, } if action == 'continue': results['workingRow'] = next_row results['workingSheetName'] = next_sheet_name results['workingColumn'] = next_column results['fieldErrors'] = field_errors return flask.jsonify(results)
def download_progress(): form = request.form.to_dict() datafile_name = form.get('dataFileName') data_field_to_redcap_field_map = json.loads( form.get('dataFieldToRedcapFieldMap')) csv_headers = json.loads(form.get('csvHeaders')) data_dictionary = [ RedcapField.from_json(field) for field in json.loads(form.get('ddData')) ] cells_with_errors = json.loads(form.get('cellsWithErrors')) record_fields_not_in_redcap = json.loads( form.get('recordFieldsNotInRedcap')) datafile_name = os.path.splitext(ntpath.basename(datafile_name))[0] current_date = datetime.now().strftime("%m-%d-%Y") new_datafile_name = datafile_name + '-' + current_date + '-Edited.xlsx' json_data = json.loads(form.get('jsonData'), object_pairs_hook=OrderedDict) output = io.BytesIO() writer = pd.ExcelWriter(output, engine='xlsxwriter') error_format = writer.book.add_format({'bg_color': '#ffbf00'}) # Amber empty_format = writer.book.add_format({'bg_color': '#FFE300'}) # Yellow missing_column_format = writer.book.add_format({'bg_color': '#E5153E'}) # Red for sheet in json_data: matched_field_dict = data_field_to_redcap_field_map.get(sheet, {}) csv_headers[sheet] = [ matched_field_dict.get(c) or c for c in csv_headers[sheet] ] error_df = pd.DataFrame(cells_with_errors[sheet]) frame = pd.DataFrame(json_data[sheet]) frame.fillna('', inplace=True) frame.rename(columns=matched_field_dict, inplace=True) error_df.rename(columns=matched_field_dict, inplace=True) frame = frame[csv_headers[sheet]] error_df = error_df[csv_headers[sheet]] frame.to_excel(writer, sheet_name=sheet, index=False) data_worksheet = writer.sheets[sheet] for j, col in enumerate(error_df.columns): if col in record_fields_not_in_redcap.get(sheet, []): data_worksheet.write(0, j, frame.columns[j], missing_column_format) continue for index, _ in error_df.iterrows(): error_cell = error_df.iloc[index][col] required = False dd_field = [f for f in data_dictionary if f.field_name == col] if dd_field: required = dd_field[0].required if error_cell is None and required: data_worksheet.write(index + 1, j, '', empty_format) elif error_cell: cell = frame.iloc[index][frame.columns[j]] target_string = cell or '' cell_format = None if cell: cell_format = error_format elif required: cell_format = empty_format data_worksheet.write(index + 1, j, target_string, cell_format) writer.close() output.seek(0) return flask.send_file(output, attachment_filename=new_datafile_name, as_attachment=True)