예제 #1
0
def test_serializer_with_record_id():

    data_dictionary = [{
        "field_name": "patient_id",
        "form_name": "demographics",
        "field_type": "text"
    }, {
        "field_name":
        "gender",
        "form_name":
        "demographics",
        "field_type":
        "radio",
        "choices":
        "1, female | 2, male | 3, unknown | 4, unspecified | 5, not reported"
    }]

    data_dictionary = [
        RedcapField.from_json(field) for field in data_dictionary
    ]

    project_info = {
        'secondary_unique_field': [],
        'record_autonumbering_enabled': 0,
        'next_record_name': 1,
        'repeatable_instruments': [],
    }

    records = [{
        "patient_id": "123",
        "gender": "male"
    }, {
        "patient_id": "456",
        "gender": "female"
    }]
    records = pd.DataFrame(records)
    records.fillna('', inplace=True)

    encoded_rows = serializer.encode_sheet(data_dictionary, project_info,
                                           records)
    encoded_rows = encoded_rows.sort_index(axis=1)

    d = {
        'patient_id': ["123", "456"],
        'redcap_repeat_instrument': ['', ''],
        'redcap_repeat_instance': ['', ''],
        'gender': ['2', '1']
    }
    expected = pd.DataFrame(data=d)
    expected = expected.sort_index(axis=1)

    assert len(encoded_rows) == 2
    assert_frame_equal(encoded_rows, expected, check_dtype=False)
예제 #2
0
def encode_records():
    form = request.form.to_dict()
    csv_headers = json.loads(form.get('csvHeaders'))
    malformed_sheets = json.loads(form.get('malformedSheets', '[]'))
    decoded_records = json.loads(form.get('decodedRecords'))
    matching_repeat_instances = json.loads(form.get('matchingRepeatInstances'))
    matching_record_ids = json.loads(form.get('matchingRecordIds'))
    project_info = json.loads(form.get('projectInfo'))
    json_data = json.loads(form.get('jsonData'), object_pairs_hook=OrderedDict)

    data_dictionary = [
        RedcapField.from_json(field)
        for field in json.loads(form.get('ddData'))
    ]

    records = {}
    for sheet in json_data:
        frame = pd.DataFrame(json_data[sheet])
        frame = frame[csv_headers[sheet]]
        frame.fillna('', inplace=True)
        records[sheet] = frame

    datafile_errors = linter.lint_datafile(data_dictionary, project_info,
                                           records)
    cells_with_errors = datafile_errors['cells_with_errors']
    rows_in_error = utils.get_rows_with_errors(cells_with_errors, records)

    options = {
        'rows_in_error': rows_in_error,
        'decoded_records': decoded_records,
        'matching_repeat_instances': matching_repeat_instances,
        'matching_record_ids': matching_record_ids
    }
    encoded_records = serializer.encode_datafile(data_dictionary, project_info,
                                                 records, options)

    json_data = {}
    output_records = {}
    encoded_record_headers = {}

    for sheet_name in encoded_records:
        if malformed_sheets and sheet_name in malformed_sheets:
            continue
        output_records[sheet_name] = json.loads(
            encoded_records[sheet_name].to_json(orient='records'))
        encoded_record_headers[sheet_name] = list(
            encoded_records[sheet_name].columns)

    results = {
        'encodedRecords': output_records,
        'encodedRecordHeaders': encoded_record_headers,
    }
    return flask.jsonify(results)
예제 #3
0
def test_integer_validation_with_range(client):
    """Test date validation"""

    data_dictionary = [{
        "field_name": "record_id",
        "form_name": "demographics",
        "field_type": "text"
    }, {
        "field_name": "number",
        "form_name": "demographics",
        "field_type": "text",
        "text_validation": "integer",
        "text_min": "1",
        "text_max": "5",
    }]

    data_dictionary = [
        RedcapField.from_json(field) for field in data_dictionary
    ]

    project_info = {
        'secondary_unique_field': [],
        'record_autonumbering_enabled': 1,
        'next_record_name': 1,
        'repeatable_instruments': [],
    }

    records = [
        {
            "number": "1"
        },
        {
            "number": "6"
        },
    ]
    records = pd.DataFrame(records)
    records.fillna('', inplace=True)

    results = linter.lint_sheet(data_dictionary, project_info, records)

    assert len(results['all_errors']) == 1
    assert results['all_errors'][
        0] == "6 did not pass date validation integer. Min: 1 | Max: 5"
예제 #4
0
def test_date_validation_failure(client):
    """Test date validation"""

    data_dictionary = [{
        "field_name": "record_id",
        "form_name": "demographics",
        "field_type": "text"
    }, {
        "field_name": "treatment_dx",
        "form_name": "treatment",
        "field_type": "text",
        "text_validation": "date_mdy"
    }]

    data_dictionary = [
        RedcapField.from_json(field) for field in data_dictionary
    ]

    project_info = {
        'secondary_unique_field': [],
        'record_autonumbering_enabled': 1,
        'next_record_name': 1,
        'repeatable_instruments': [],
    }

    records = [
        {
            "treatment_dx": "abcd"
        },
        {
            "treatment_dx": "05-05-2018"
        },
    ]
    records = pd.DataFrame(records)
    records.fillna('', inplace=True)

    results = linter.lint_sheet(data_dictionary, project_info, records)

    assert len(results['all_errors']) == 1
예제 #5
0
def test_validation_permissible_value_failure(client):
    """Test date validation"""

    data_dictionary = [{
        "field_name": "record_id",
        "form_name": "demographics",
        "field_type": "text"
    }, {
        "field_name":
        "gender",
        "form_name":
        "demographics",
        "field_type":
        "radio",
        "choices":
        "1, female | 2, male | 3, unknown | 4, unspecified | 5, not reported"
    }]

    data_dictionary = [
        RedcapField.from_json(field) for field in data_dictionary
    ]

    project_info = {
        'secondary_unique_field': [],
        'record_autonumbering_enabled': 1,
        'next_record_name': 1,
        'repeatable_instruments': [],
    }

    records = [{"gender": "dog"}, {"gender": "male"}]
    records = pd.DataFrame(records)
    records.fillna('', inplace=True)

    results = linter.lint_sheet(data_dictionary, project_info, records)

    assert len(results['all_errors']) == 1
    assert "dog not found in Permissible Values" in results['all_errors'][0]
예제 #6
0
def test_serializer_with_repeatable_instrument_and_matching_repeat_instances():

    data_dictionary = [
        {
            "field_name": "patient_id",
            "form_name": "demographics",
            "field_type": "text"
        },
        {
            "field_name":
            "gender",
            "form_name":
            "demographics",
            "field_type":
            "radio",
            "choices":
            "1, female | 2, male | 3, unknown | 4, unspecified | 5, not reported"
        },
        {
            "field_name": "treatment_dx",
            "form_name": "treatment",
            "field_type": "text",
        },
        {
            "field_name": "treatment",
            "form_name": "treatment",
            "field_type": "radio",
            "choices": "1, chemotherapy | 2, immunotherapy"
        },
    ]

    data_dictionary = [
        RedcapField.from_json(field) for field in data_dictionary
    ]

    project_info = {
        'secondary_unique_field': [],
        'record_autonumbering_enabled': 0,
        'next_record_name': 1,
        'repeatable_instruments': ['treatment'],
    }

    records = [{
        "patient_id": "123",
        "gender": "male",
        "treatment_dx": "2018-09-01",
        "treatment": "chemotherapy"
    }, {
        "patient_id": "123",
        "gender": "male",
        "treatment_dx": "2019-09-01",
        "treatment": "immunotherapy"
    }]
    records = pd.DataFrame(records)
    records.fillna('', inplace=True)

    options = {'matching_repeat_instances': {'0': {'treatment': 7}}}
    encoded_rows = serializer.encode_sheet(data_dictionary, project_info,
                                           records, options)
    encoded_rows = encoded_rows.sort_index(axis=1)

    d = {
        'patient_id': ['123', '123', '123'],
        'redcap_repeat_instrument': ['', 'treatment', 'treatment'],
        'redcap_repeat_instance': ['', 7, 2],
        'gender': ['2', '', ''],
        'treatment': ['', '1', '2'],
        'treatment_dx': ['', '2018-09-01', '2019-09-01']
    }
    expected = pd.DataFrame(data=d)
    expected = expected.sort_index(axis=1)

    assert len(encoded_rows) == 3
    assert_frame_equal(encoded_rows,
                       expected,
                       check_dtype=False,
                       check_like=True)
예제 #7
0
def save_fields():
    form = request.form.to_dict()
    data_field_to_redcap_field_map = json.loads(
        form.get('dataFieldToRedcapFieldMap'))
    csv_headers = json.loads(form.get('csvHeaders'))
    existing_records = json.loads(form.get('existingRecords'))
    recordid_field = json.loads(form.get('recordidField'))
    project_info = json.loads(form.get('projectInfo'))
    token = json.loads(form.get('token'))
    env = json.loads(form.get('env'))
    json_data = json.loads(form.get('jsonData'), object_pairs_hook=OrderedDict)
    records = {}
    for sheet in json_data:
        matched_field_dict = data_field_to_redcap_field_map.get(sheet, {})
        csv_headers[sheet] = [
            matched_field_dict.get(c) or c for c in csv_headers[sheet]
            if matched_field_dict.get(c) != ''
        ]
        frame = pd.DataFrame(json_data[sheet])
        frame.fillna('', inplace=True)
        frame = frame.rename(index=str, columns=matched_field_dict)
        frame = frame[csv_headers[sheet]]
        records[sheet] = frame

    dd_data = json.loads(form.get('ddData'))
    dd = [RedcapField.from_json(field) for field in dd_data]

    if token:
        redcap_api = RedcapApi(env)
        if project_info['record_autonumbering_enabled'] == 1:
            if not project_info.get('secondary_unique_field'):
                existing_records = None
            else:
                secondary_unique_field_values = utils.get_field_values(
                    project_info.get('secondary_unique_field', []), records)
                options = {
                    'secondary_unique_field':
                    project_info.get('secondary_unique_field', []),
                    'secondary_unique_field_values':
                    secondary_unique_field_values
                }
                existing_records = redcap_api.export_records(token, options)

                record_ids = []
                for r in existing_records:
                    record_id = r[recordid_field]
                    record_id = str(int(record_id)) if isinstance(
                        record_id,
                        float) and record_id.is_integer() else record_id
                    record_ids.append(record_id)
                options = {'records': record_ids}
                existing_records = redcap_api.export_records(token, options)
        else:
            record_ids = utils.get_field_values([recordid_field], records)
            options = {'records': record_ids}
            existing_records = redcap_api.export_records(token, options)

    datafile_errors = linter.lint_datafile(dd, project_info, records)
    cells_with_errors = datafile_errors['cells_with_errors']

    rows_in_error = utils.get_rows_with_errors(cells_with_errors, records)
    columns_in_error = utils.get_columns_with_errors(cells_with_errors,
                                                     records)

    all_errors = [{
        "Error": error
    } for error in datafile_errors['linting_errors']]

    json_data = {}

    for sheet_name, sheet in records.items():
        json_data[sheet_name] = json.loads(
            sheet.to_json(orient='records', date_format='iso'))
        cells_with_errors[sheet_name] = json.loads(
            cells_with_errors[sheet_name].to_json(orient='records'))

    records_to_reconcile = {}
    if existing_records:
        for record in existing_records:
            if record.get(recordid_field):
                if not records_to_reconcile.get(record[recordid_field]):
                    records_to_reconcile[record[recordid_field]] = []
                records_to_reconcile[record[recordid_field]].append(record)

    decoded_records = {}
    for recordid, encoded_rows in records_to_reconcile.items():
        decoded_rows = serializer.decode_sheet(dd, encoded_rows)
        decoded_records[recordid] = decoded_rows

    results = {
        'jsonData': json_data,
        'rowsInError': rows_in_error,
        'cellsWithErrors': cells_with_errors,
        'allErrors': all_errors,
        'csvHeaders': csv_headers,
        'existingRecords': existing_records,
        'columnsInError': columns_in_error,
        'decodedRecords': decoded_records,
        'fieldsSaved': True,
    }
    return flask.jsonify(results)
예제 #8
0
def post_form():
    form = request.form.to_dict()
    datafile_name = form.get('dataFileName')
    # records = pd.read_excel(request.files['dataFile'], sheet_name=None)
    records = utils.read_spreadsheet(request.files['dataFile'], datafile_name)
    date_cols = []
    if datafile_name.endswith('.xlsx') or datafile_name.endswith('.xls'):
        records_with_format = load_workbook(request.files['dataFile'])
        for sheet in records_with_format.sheetnames:
            for row in records_with_format[sheet].iter_rows(min_row=2):
                for cell in row:
                    # MRN
                    column_letter = get_column_letter(cell.column)
                    column_header = records_with_format[sheet][column_letter +
                                                               '1'].value
                    if column_header in records[
                            sheet].columns and cell.number_format == '00000000':
                        current_list = list(records[sheet][column_header])
                        current_list = [
                            str(i).rjust(8, '0') if isinstance(i, int) else i
                            for i in current_list
                        ]
                        records[sheet][column_header] = current_list
                    if column_header in records[
                            sheet].columns and cell.number_format == 'mm-dd-yy':
                        date_cols.append(column_header)
                        current_list = list(records[sheet][column_header])
                        current_list = [
                            i.strftime('%m/%d/%Y') if isinstance(i, datetime)
                            and not pd.isnull(i) else i for i in current_list
                        ]
                        records[sheet][column_header] = current_list
                break
    token = form.get('token')
    env = form.get('env')
    mappings = None
    existing_records = None
    form_names = set()
    form_name_to_dd_fields = {}
    data_field_to_redcap_field_map = {}
    data_field_to_choice_map = {}
    original_to_correct_value_map = {}
    no_match_redcap_fields = []

    if 'mappingsFile' in request.files:
        mappings = pd.read_excel(request.files['mappingsFile'],
                                 sheet_name="Sheet1")

        if list(mappings["dataFieldToRedcapFieldMap"]):
            data_field_to_redcap_field_map = json.loads(
                list(mappings["dataFieldToRedcapFieldMap"])[0])
        if list(mappings["dataFieldToChoiceMap"]):
            data_field_to_choice_map = json.loads(
                list(mappings["dataFieldToChoiceMap"])[0])
        if list(mappings["originalToCorrectedValueMap"]):
            original_to_correct_value_map = json.loads(
                list(mappings["originalToCorrectedValueMap"])[0])
        if list(mappings["noMatchRedcapFields"]):
            no_match_redcap_fields = json.loads(
                list(mappings["noMatchRedcapFields"])[0])

    redcap_api = RedcapApi(env)

    project_info = {
        'secondary_unique_field': '',
        'record_autonumbering_enabled': 0,
        'repeatable_instruments': [],
        'next_record_name': 1
    }

    data_dictionary = None
    existing_records = None
    if token:
        try:
            data_dictionary = redcap_api.fetch_data_dictionary(token)
            project_info = redcap_api.fetch_project_info(token)
            project_info[
                'next_record_name'] = redcap_api.generate_next_record_name(
                    token)
            if project_info.get('secondary_unique_field'):
                project_info['secondary_unique_field'] = [
                    project_info.get('secondary_unique_field')
                ]
            if project_info['has_repeating_instruments_or_events'] == 1:
                repeatable_instruments = redcap_api.fetch_repeatable_instruments(
                    token)
                project_info['repeatable_instruments'] = [
                    i['form_name'] for i in repeatable_instruments
                ]
            if project_info['record_autonumbering_enabled'] == 0:
                data_dictionary[0]['required'] = 'Y'
            dd = [RedcapField.from_json(field) for field in data_dictionary]
        except Exception as e:
            logging.warning(e)
            results = {'error': "Error: {0}".format(e)}
            response = flask.jsonify(results)
            response.headers.add('Access-Control-Allow-Origin', '*')
            return response
    else:
        data_dictionary_name = form.get('dataDictionaryName')
        if data_dictionary_name.endswith('.csv'):
            dd_df = pd.read_csv(request.files['dataDictionary'])
            dd_df.fillna('', inplace=True)
        elif data_dictionary_name.endswith(
                '.xlsx') or data_dictionary_name.endswith('.xls'):
            dd_df = pd.read_excel(request.files['dataDictionary'])
        dd = [
            RedcapField.from_data_dictionary(dd_df, field)
            for field in list(dd_df['Variable / Field Name'])
        ]
        if dd[0].field_name == 'record_id':
            project_info['record_autonumbering_enabled'] = 1
        if 'existingRecordsFile' in request.files:
            existing_records = pd.read_csv(
                request.files['existingRecordsFile'])
            existing_records = json.loads(
                existing_records.to_json(orient='records', date_format='iso'))

    all_csv_headers = []
    dd_headers = []
    dd_data = {}
    dd_data_raw = {}
    if data_dictionary is not None:
        dd_headers = list(data_dictionary[0].keys())
        dd_data_raw = data_dictionary
    else:
        dd_headers = list(dd_df.columns)
        dd_data_raw = json.loads(
            dd_df.to_json(orient='records', date_format='iso'))

    dd_data = [field.__dict__ for field in dd]

    for dd_field in dd:
        if not form_name_to_dd_fields.get(dd_field.form_name):
            form_name_to_dd_fields[dd_field.form_name] = []
        form_name_to_dd_fields.get(dd_field.form_name).append(
            dd_field.field_name)
        form_names.add(dd_field.form_name)

    recordid_field = dd[0].field_name

    form_names = list(form_names)

    for sheet_name, sheet in records.items():
        all_csv_headers += list(sheet.columns)
        all_csv_headers = [i for i in all_csv_headers if 'Unnamed' not in i]

    all_field_names = [f.field_name for f in dd]

    redcap_field_candidates = {}
    data_field_candidates = {}
    csv_headers = {}
    fields_not_in_redcap = {}
    duplicate_fields = {}

    for sheet_name, sheet in records.items():
        duplicate_fields[sheet_name] = {}
        # Remove empty rows
        sheet.dropna(axis=0, how='all', inplace=True)
        csv_headers[sheet_name] = list(sheet.columns)
        csv_headers[sheet_name] = [
            item for item in csv_headers[sheet_name] if 'Unnamed' not in item
        ]
        for header in csv_headers[sheet_name]:
            duplicate_fields[sheet_name][
                header] = duplicate_fields[sheet_name].get(header, 0) + 1
        duplicate_fields[sheet_name] = [
            k for k, v in duplicate_fields[sheet_name].items() if v > 1
        ]
        normalized_headers = utils.parameterize_list(csv_headers[sheet_name])
        fields_not_in_redcap[sheet_name] = [
            header for header, normalized_header in zip(
                csv_headers[sheet_name], normalized_headers)
            if normalized_header not in all_field_names
        ]

    all_csv_headers = list(set(all_csv_headers))

    unmatched_data_fields = {}

    for sheet in csv_headers:
        data_field_to_redcap_field_map[
            sheet] = data_field_to_redcap_field_map.get(sheet, {})
        unmatched_data_fields[sheet] = unmatched_data_fields.get(sheet, [])
        for header in csv_headers[sheet]:
            normalized_header = utils.parameterize(header)
            if data_field_to_redcap_field_map[sheet].get(header):
                continue
            if normalized_header in all_field_names:
                data_field_to_redcap_field_map[sheet][
                    header] = normalized_header
            else:
                unmatched_data_fields[sheet].append(header)

    selected_columns = {}

    matched_redcap_fields = []
    matched_redcap_fields += no_match_redcap_fields
    for sheet_name, field_map in data_field_to_redcap_field_map.items():
        selected_columns[sheet_name] = field_map.keys()
        matched_redcap_fields += field_map.values()
    unmatched_redcap_fields = [
        f for f in all_field_names
        if f not in matched_redcap_fields and f != 'record_id'
    ]
    for f1 in all_field_names:
        dd_field = [f for f in dd_data if f['field_name'] == f1][0]
        redcap_field_candidates[f1] = []
        for sheet in csv_headers:
            for f2 in csv_headers[sheet]:
                redcap_field_candidates[f1].append({
                    'candidate':
                    f2,
                    'sheets': [sheet],
                    'score':
                    max(fuzz.token_set_ratio(f1, f2),
                        fuzz.token_set_ratio(dd_field['field_label'], f2))
                })

    for sheet in csv_headers:
        for f1 in csv_headers[sheet]:
            if data_field_candidates.get(f1):
                continue
            data_field_candidates[f1] = []
            for f2 in all_field_names:
                dd_field = [f for f in dd_data if f['field_name'] == f2][0]
                data_field_candidates[f1].append({
                    'candidate':
                    f2,
                    'form_name':
                    dd_field['form_name'],
                    'score':
                    max(fuzz.token_set_ratio(f1, f2),
                        fuzz.token_set_ratio(dd_field['field_label'], f1))
                })

    malformed_sheets = []

    form_names = [redcap_field.form_name for redcap_field in dd]
    form_names = list(set(form_names))
    for sheet_name in records.keys():
        sheet = records.get(sheet_name)

        redcap_field_names = [f.field_name for f in dd]

        matching_fields = [f for f in sheet.columns if f in redcap_field_names]
        if not matching_fields and not data_field_to_redcap_field_map.get(
                sheet_name):
            malformed_sheets.append(sheet_name)

    json_data = {}

    for sheet_name, sheet in records.items():
        json_data[sheet_name] = json.loads(
            sheet.to_json(orient='records', date_format='iso'))

    results = {
        'csvHeaders': csv_headers,
        'jsonData': json_data,
        'ddHeaders': dd_headers,
        'ddData': dd_data,
        'ddDataRaw': dd_data_raw,
        'formNames': form_names,
        'dateColumns': date_cols,
        'duplicateFields': duplicate_fields,
        'malformedSheets': malformed_sheets,
        'recordFieldsNotInRedcap': fields_not_in_redcap,
        'formNameToDdFields': form_name_to_dd_fields,
        'projectInfo': project_info,
        'existingRecords': existing_records,
        'recordidField': recordid_field,
        'redcapFieldCandidates': redcap_field_candidates,
        'dataFieldCandidates': data_field_candidates,
        'unmatchedRedcapFields': unmatched_redcap_fields,
        'unmatchedDataFields': unmatched_data_fields,
        'dataFileName': datafile_name,
        'token': token,
    }
    if data_field_to_redcap_field_map:
        results['dataFieldToRedcapFieldMap'] = data_field_to_redcap_field_map
    if data_field_to_choice_map:
        results['dataFieldToChoiceMap'] = data_field_to_choice_map
    if original_to_correct_value_map:
        results['originalToCorrectedValueMap'] = original_to_correct_value_map
    if no_match_redcap_fields:
        results['noMatchRedcapFields'] = no_match_redcap_fields

    response = flask.jsonify(results)
    return response
예제 #9
0
def resolve_merge_row():
    form = request.form.to_dict()
    csv_headers = json.loads(form.get('csvHeaders'))
    # Working column is the column being saved
    action = json.loads(form.get('action', '""'))
    next_merge_row = json.loads(form.get('nextMergeRow', '-1'))
    next_sheet_name = json.loads(form.get('nextSheetName', '""'))
    working_merge_row = json.loads(form.get('workingMergeRow', '-1'))
    working_sheet_name = json.loads(form.get('workingSheetName', '""'))
    merge_map = json.loads(form.get('mergeMap', '{}'))
    merge_conflicts = json.loads(form.get('mergeConflicts', '{}'))
    project_info = json.loads(form.get('projectInfo', '{}'))
    json_data = json.loads(form.get('jsonData'), object_pairs_hook=OrderedDict)

    data_dictionary = [
        RedcapField.from_json(field)
        for field in json.loads(form.get('ddData'))
    ]

    row_merge_map = merge_map.get(working_sheet_name,
                                  {}).get(str(working_merge_row), {})

    records = {}
    for sheet in json_data:
        frame = pd.DataFrame(json_data[sheet])
        frame = frame[csv_headers[sheet]]
        frame.fillna('', inplace=True)
        if sheet == working_sheet_name:
            for field in row_merge_map:
                dd_field = [
                    f for f in data_dictionary if f.field_name == field
                ][0]
                value = row_merge_map[field]
                if dd_field.text_validation == 'integer':
                    value = int(value) if value else value
                elif dd_field.text_validation == 'number_2dp':
                    value = float(value) if value else value
                frame.iloc[working_merge_row,
                           frame.columns.get_loc(field)] = value
        records[sheet] = frame

    if working_sheet_name and merge_conflicts and merge_conflicts[
            working_sheet_name]:
        del merge_conflicts[working_sheet_name][str(working_merge_row)]

    datafile_errors = linter.lint_datafile(data_dictionary, project_info,
                                           records)
    cells_with_errors = datafile_errors['cells_with_errors']

    all_errors = [{
        "Error": error
    } for error in datafile_errors['linting_errors']]

    json_data = {}

    for sheet_name, sheet in records.items():
        json_data[sheet_name] = json.loads(
            sheet.to_json(orient='records', date_format='iso'))
        cells_with_errors[sheet_name] = json.loads(
            cells_with_errors[sheet_name].to_json(orient='records'))

    results = {
        'jsonData': json_data,
        'allErrors': all_errors,
        'mergeMap': merge_map,
        'mergeConflicts': merge_conflicts,
        'cellsWithErrors': cells_with_errors,
    }
    if action == 'continue':
        results['workingMergeRow'] = next_merge_row
        results['workingSheetName'] = next_sheet_name
    return flask.jsonify(results)
예제 #10
0
def resolve_column():
    form = request.form.to_dict()
    csv_headers = json.loads(form.get('csvHeaders'))
    action = json.loads(form.get('action', '""'))
    next_column = json.loads(form.get('nextColumn', '""'))
    next_sheet_name = json.loads(form.get('nextSheetName', '""'))
    working_column = json.loads(form.get('workingColumn', '""'))
    working_sheet_name = json.loads(form.get('workingSheetName', '""'))
    data_field_to_choice_map = json.loads(
        form.get('dataFieldToChoiceMap', '{}'))
    original_to_correct_value_map = json.loads(
        form.get('originalToCorrectedValueMap', '{}'))
    json_data = json.loads(form.get('jsonData'), object_pairs_hook=OrderedDict)

    transform_map = {}
    has_transforms = False

    for sheet in json_data:
        transform_map[sheet] = {
            **data_field_to_choice_map.get(working_sheet_name, {}).get(
                working_column, {}),
            **original_to_correct_value_map.get(working_sheet_name, {}).get(
                working_column, {})
        }
        if transform_map[sheet]:
            has_transforms = True

    data_dictionary = [
        RedcapField.from_json(field)
        for field in json.loads(form.get('ddData'))
    ]

    records = {}
    for sheet in json_data:
        frame = pd.DataFrame(json_data[sheet])
        frame = frame[csv_headers[sheet]]
        frame.fillna('', inplace=True)
        if sheet == working_sheet_name:
            new_list = []
            for field in list(frame[working_column]):
                new_value = transform_map[sheet].get(str(field)) or field
                if isinstance(new_value, list):
                    new_value = ', '.join([str(i) for i in new_value])
                new_list.append(new_value)
            frame[working_column] = new_list
        records[sheet] = frame

    project_info = json.loads(form.get('projectInfo', '{}'))

    field_errors = {}
    if next_column:
        field_errors = calculate_field_errors(next_column, next_sheet_name,
                                              data_dictionary, records)

    row_info = {}
    json_data = {}
    next_row = -1

    for sheet_name, sheet in records.items():
        json_data[sheet_name] = json.loads(
            sheet.to_json(orient='records', date_format='iso'))

    results = {
        'jsonData': json_data,
    }

    if has_transforms:
        datafile_errors = linter.lint_datafile(data_dictionary, project_info,
                                               records)
        cells_with_errors = datafile_errors['cells_with_errors']
        rows_in_error = utils.get_rows_with_errors(cells_with_errors, records)
        columns_in_error = utils.get_columns_with_errors(
            cells_with_errors, records)

        for sheet_name, sheet in records.items():
            cells_with_errors[sheet_name] = json.loads(
                cells_with_errors[sheet_name].to_json(orient='records'))

        #TODO Check if next_row and next_row is still in error
        if not next_column and rows_in_error:
            next_sheet_name = list(rows_in_error.keys())[0]
            next_row = rows_in_error[next_sheet_name][0]
            row_info = calculate_row_info(next_row, next_sheet_name,
                                          data_dictionary, records)

        all_errors = [{
            "Error": error
        } for error in datafile_errors['linting_errors']]
        results['allErrors'] = all_errors
        results['columnsInError'] = columns_in_error
        results['cellsWithErrors'] = cells_with_errors
        results['rowsInError'] = rows_in_error

    if action == 'continue':
        results['workingColumn'] = next_column
        results['workingSheetName'] = next_sheet_name
        results['fieldErrors'] = field_errors
        results['rowInfo'] = row_info
        results['workingRow'] = next_row

    return flask.jsonify(results)
예제 #11
0
def resolve_row():
    form = request.form.to_dict()
    csv_headers = json.loads(form.get('csvHeaders'))
    action = json.loads(form.get('action', '""'))
    next_row = json.loads(form.get('nextRow', '-1'))
    next_sheet_name = json.loads(form.get('nextSheetName', '""'))
    working_row = json.loads(form.get('workingRow', '-1'))
    working_sheet_name = json.loads(form.get('workingSheetName', '""'))
    field_to_value_map = json.loads(form.get('fieldToValueMap'))
    json_data = json.loads(form.get('jsonData'), object_pairs_hook=OrderedDict)

    data_dictionary = [
        RedcapField.from_json(field)
        for field in json.loads(form.get('ddData'))
    ]

    value_map = field_to_value_map.get(working_sheet_name,
                                       {}).get(str(working_row), {})

    records = {}
    for sheet in json_data:
        frame = pd.DataFrame(json_data[sheet])
        frame = frame[csv_headers[sheet]]
        frame.fillna('', inplace=True)
        if sheet == working_sheet_name:
            for field in value_map:
                dd_field = [
                    f for f in data_dictionary if f.field_name == field
                ][0]
                value = value_map[field]
                if dd_field.text_validation == 'integer':
                    value = int(value) if value else value
                elif dd_field.text_validation == 'number_2dp':
                    value = float(value) if value else value
                frame.iloc[working_row, frame.columns.get_loc(field)] = value
        records[sheet] = frame

    project_info = json.loads(form.get('projectInfo'))

    datafile_errors = linter.lint_datafile(data_dictionary, project_info,
                                           records)
    cells_with_errors = datafile_errors['cells_with_errors']
    # TODO Figure out why errors on malformed sheets get added here
    rows_in_error = utils.get_rows_with_errors(cells_with_errors, records)
    columns_in_error = utils.get_columns_with_errors(cells_with_errors,
                                                     records)

    #TODO Check if next_column and next_column is still in error
    row_info = {}
    field_errors = {}
    next_column = ''
    if next_row >= 0:
        row_info = calculate_row_info(next_row, next_sheet_name,
                                      data_dictionary, records)
    elif columns_in_error:
        # Get next column
        next_sheet_name = list(columns_in_error.keys())[0]
        next_column = columns_in_error[next_sheet_name][0]
        field_errors = calculate_field_errors(next_column, next_sheet_name,
                                              data_dictionary, records)

    all_errors = [{
        "Error": error
    } for error in datafile_errors['linting_errors']]

    json_data = {}

    for sheet_name, sheet in records.items():
        json_data[sheet_name] = json.loads(
            sheet.to_json(orient='records', date_format='iso'))
        cells_with_errors[sheet_name] = json.loads(
            cells_with_errors[sheet_name].to_json(orient='records'))

    results = {
        'jsonData': json_data,
        'allErrors': all_errors,
        'rowsInError': rows_in_error,
        'columnsInError': columns_in_error,
        'rowInfo': row_info,
        'cellsWithErrors': cells_with_errors,
    }
    if action == 'continue':
        results['workingRow'] = next_row
        results['workingSheetName'] = next_sheet_name
        results['workingColumn'] = next_column
        results['fieldErrors'] = field_errors
    return flask.jsonify(results)
예제 #12
0
def download_progress():
    form = request.form.to_dict()
    datafile_name = form.get('dataFileName')
    data_field_to_redcap_field_map = json.loads(
        form.get('dataFieldToRedcapFieldMap'))
    csv_headers = json.loads(form.get('csvHeaders'))
    data_dictionary = [
        RedcapField.from_json(field)
        for field in json.loads(form.get('ddData'))
    ]
    cells_with_errors = json.loads(form.get('cellsWithErrors'))
    record_fields_not_in_redcap = json.loads(
        form.get('recordFieldsNotInRedcap'))

    datafile_name = os.path.splitext(ntpath.basename(datafile_name))[0]
    current_date = datetime.now().strftime("%m-%d-%Y")
    new_datafile_name = datafile_name + '-' + current_date + '-Edited.xlsx'
    json_data = json.loads(form.get('jsonData'), object_pairs_hook=OrderedDict)

    output = io.BytesIO()
    writer = pd.ExcelWriter(output, engine='xlsxwriter')

    error_format = writer.book.add_format({'bg_color': '#ffbf00'})  # Amber
    empty_format = writer.book.add_format({'bg_color': '#FFE300'})  # Yellow
    missing_column_format = writer.book.add_format({'bg_color':
                                                    '#E5153E'})  # Red
    for sheet in json_data:
        matched_field_dict = data_field_to_redcap_field_map.get(sheet, {})
        csv_headers[sheet] = [
            matched_field_dict.get(c) or c for c in csv_headers[sheet]
        ]
        error_df = pd.DataFrame(cells_with_errors[sheet])
        frame = pd.DataFrame(json_data[sheet])
        frame.fillna('', inplace=True)
        frame.rename(columns=matched_field_dict, inplace=True)
        error_df.rename(columns=matched_field_dict, inplace=True)
        frame = frame[csv_headers[sheet]]
        error_df = error_df[csv_headers[sheet]]
        frame.to_excel(writer, sheet_name=sheet, index=False)

        data_worksheet = writer.sheets[sheet]
        for j, col in enumerate(error_df.columns):
            if col in record_fields_not_in_redcap.get(sheet, []):
                data_worksheet.write(0, j, frame.columns[j],
                                     missing_column_format)
                continue
            for index, _ in error_df.iterrows():
                error_cell = error_df.iloc[index][col]
                required = False
                dd_field = [f for f in data_dictionary if f.field_name == col]
                if dd_field:
                    required = dd_field[0].required
                if error_cell is None and required:
                    data_worksheet.write(index + 1, j, '', empty_format)
                elif error_cell:
                    cell = frame.iloc[index][frame.columns[j]]
                    target_string = cell or ''
                    cell_format = None
                    if cell:
                        cell_format = error_format
                    elif required:
                        cell_format = empty_format
                    data_worksheet.write(index + 1, j, target_string,
                                         cell_format)
    writer.close()
    output.seek(0)
    return flask.send_file(output,
                           attachment_filename=new_datafile_name,
                           as_attachment=True)