def warn_good_import(added, deleted, form, modified): if deleted.empty and modified.empty: h.showbox( """There are %s rows of new data and no unexpected changes to old data. Please proceed with <code>data["raw"]</code>.""" % len(added), form + ": Importing Data", "success", ) h.showdataframe(added)
def show_modified_rows(self): form, added, deleted, modified = self.form, self.added, self.deleted, self.modified if not modified.empty: h.showbox( """There are %s rows in the old data that has been modified in the new data. If this is expected, you can ignore this message. To further inspect rows type <code>data["modified"]</code>""" % len(modified), form + ": Modified", "danger", ) h.showdataframe(modified)
def update(self): make_similar(self.old, self.new) self.deleted = h.diff(self.old, self.new.id) self.modified = h.intersection(self.old, self.new, "id").sort_values("id") added = h.diff(self.new, self.old.id) added[self.form_complete] = 1 added["common_complete"] = 1 make_similar(added, self.redcap_df) added = h.diff(added, self.redcap_df) self.added = added if added is not None and not added.empty: self.merged = self.redcap_df.append(added, sort=False) else: self.merged = self.redcap_df
def show_modified(self): form, added, deleted, modified = self.form, self.added, self.deleted, self.modified def update(btn): self.added = (dups.loc[[ dd.value for dd in selections ]].iloc[:, :-1].append(self.added, sort=False).drop_duplicates( ["patientid", "patienttype"])) btn = wg.Button(description="Submit changes") btn.on_click(update) dups = h.intersection( self.redcap_df, self.added, ["patientid", "patienttype"], sources=("current", "new"), ).reset_index(drop=True) selections = [] output = [] for id, group in dups.groupby("id"): cols = ["id", "patientid"] + h.unequal_columns(group) show = group[cols].set_index("_merge") sheet = ipysheet.sheet(ipysheet.from_dataframe(show)) options = [(k, v) for v, k in group._merge.items()] dd = wg.Dropdown( options=options, value=options[0][1], description="Keep version:", disabled=False, ) selections.append(dd) output.append(sheet) output.append(dd) if output: display( HTML( "<H3>Should New Data Overwrite Old Data</H3><SMALL>Please select which version to keep.</SMALL>" )) for x in output: display(x) display(btn)
def test_diff_subtracting_conflicting_rows_but_using_id_rows(A, B2): """Since now matching on B[0], only the first column should be used for matching.""" result = h.diff(A, B2[0]) expected = pd.DataFrame( [ (1, 1), (3, 9), ] ) assert result.equals(expected)
def test_diff_subtracting_nonconflicting_rows(A, B): """The two rows that match perfectly should be removed from result""" result = h.diff(A, B) expected = pd.DataFrame( [ (1, 1), (3, 9), ] ) assert result.equals(expected)
def test_conflict_on_conflicting_rows(A, B2): """Returns the two sets of two rows that are in conflict""" result = h.conflict(A, B2, [0]) expected = pd.DataFrame( [ (0, 0, "left"), (2, 4, "left"), (0, 99, "right"), (2, 99, "right"), ], index=[0, 2, 0, 1], columns=[0, 1, "_merge"], ) assert result.equals(expected)
def show_missing_subjects(self): form, added, deleted, modified = self.form, self.added, self.deleted, self.modified df = self.merged.copy() df["subject"] = df["patientid"].str.split("_", 1, expand=True)[0].str.strip() missing = h.diff(studydata, df.subject) # missing = h.difference(studydata, df.subject).copy() missing = missing[missing.flagged.isnull()] missing = missing[missing.interview_date < "2019-05-01"] missing = missing[missing.study != "hcpa"] missing["reason"] = "Missing in Box" self.warn_missing(missing, self.form) display(missing) return missing
def on_update(btn): df = ipysheet.to_dataframe(sheet) df = df.replace("nan", np.nan) # delta of changes z = h.diff(df, not_in_redcap) updates = z[~z.delete].iloc[:, 1:] if not updates.empty: r = table.send_frame(updates) print("Updates: ", r.status_code, r.content) delete = z[z.delete].id.tolist() if delete: r = table.delete_records(delete) print("Delete Records: ", r.status_code, r.content)
def generate_code_to_revert_modifications(df): for id_, group in modified.groupby('id'): redcap = group[group._merge == "redcap"].iloc[0] ksads = group[group._merge != "redcap"].iloc[0] cols = h.unequal_columns(group.iloc[:,:-1]) print(f"\n # {id_}", file=file) for col in cols: vr, vk = redcap[col], ksads[col] if isna(vr) and isna(vk): continue escaped_new = escape_value(ksads[col]) escaped_permanent = escape_value(redcap[col]) print(f' modify(df, {id_}, "{col}", {escaped_new}, {escaped_permanent})', file=file)
def warn_missing(self, missing, form): if missing.empty: h.showbox( """All patientid's are in New Data.""", form + ": No Missing Redcap Subjects", "success", ) else: h.showbox( """There are %s Redcap subjects missing from the current data.""" % len(missing), form + ": Redcap Subjects Missing", "danger", ) h.showdataframe(missing)
def warn_not_in_redcap(not_in_redcap, form): not_in_redcap = h.difference(df, studyids.subject).copy() not_in_redcap['reason'] = 'PatientID not in Redcap' not_in_redcap.rename(columns={'sitename': 'site'}, inplace=True) if not_in_redcap.empty: h.showbox( """All patientid's are in Redcap.""", form + ": No Subject Missing from Redcap", "success", ) else: h.showbox( """There are %s rows with patientid missing from Redcap.""" % len(not_in_redcap), form + ": Subjects Missing from Redcap", "danger", ) h.showdataframe(not_in_redcap)
def warn_duplicates(self): form, added, deleted, modified = self.form, self.added, self.deleted, self.modified duplicates = df[df.duplicated(['patientid', 'patienttype'], keep=False)] duplicates['reason'] = 'Duplicate IDs' if duplicates.empty: h.showbox( """All patientid + patienttype combos are unique.""", form + ": No Duplicates", "success", ) else: h.showbox( """There are %s rows that contain the same patientid + patienttype.""" % len(duplicates), form + ": Duplicates", "danger", ) h.showdataframe(duplicates)
def test_diff_from_same(A, B): """Subtracting a dataframe from itself should result in an empty df""" assert h.diff(A, A).empty assert h.diff(B, B).empty
# modified = h.conflict(current_redcap, ksads, intersect_on="id", sources=("redcap", "ksads.net")) # # print(ksads.shape, added.shape, deleted.shape, modified.shape) # %% import changes.accepted df = ksads.set_index('id') changes.accepted.execute(df) # ACTION! ksads = df.reset_index() # %% [markdown] # # Capture Changes # %% # print new shapes added = h.diff(ksads, current_redcap.id) deleted = h.diff(current_redcap, ksads.id) modified = h.conflict(current_redcap, ksads, intersect_on="id", sources=("redcap", "ksads.net")) print(ksads.shape, added.shape, deleted.shape, modified.shape) # %% # deleted is always empty, if not throw error assert deleted.empty, "KSADS.net data has been deleted, take a look at `deleted` dataframe and figure out whether to delete those rows from redcap, Invalidate, mark a flag, or notify users etc." deleted file = open("changes/to_review.py", "w") file.write("from changes.utils import block, modify\n\n\ndef execute(df):\n pass\n") # %% def generate_blocking_code_for_added_rows(added):
def test_diff_subtracting_conflicting_rows(A, B2): """Since no row in B matches perfectly, the result should be A unmodified""" result = h.diff(A, B2) expected = A assert result.equals(expected)
def test_conflict_on_nonconflicting_rows(A, B): """Since there is no conflict, should be empty""" result = h.conflict(A, B, [0]) assert result.empty
def show_not_in_redcap(self): form, added, deleted, modified = self.form, self.added, self.deleted, self.modified df = self.merged.copy() df["subject"] = df["patientid"].str.split("_", 1, expand=True)[0].str.strip() not_in_redcap = h.diff(df, studyids.subject).iloc[:, :-1] h.asInt(not_in_redcap, "id", "common_complete", self.form_complete) not_in_redcap.insert(0, "delete", False) not_in_redcap.insert(1, "link", "view") sheet = ipysheet.sheet(ipysheet.from_dataframe(not_in_redcap)) spaced = wg.Layout(margin="30px 0 20px 0") save_btn = wg.Button(description="Update", icon="save") reset_btn = wg.Button(description="Reset", icon="trash") btns = wg.HBox([save_btn, reset_btn], layout=spaced) def on_reset(btn): sheet.cells = ipysheet.from_dataframe(not_in_redcap).cells # sheet = ipysheet.sheet(ipysheet.from_dataframe(not_in_redcap)) reset_btn.on_click(on_reset) def on_update(btn): df = ipysheet.to_dataframe(sheet) df = df.replace("nan", np.nan) # delta of changes z = h.diff(df, not_in_redcap) updates = z[~z.delete].iloc[:, 1:] if not updates.empty: r = table.send_frame(updates) print("Updates: ", r.status_code, r.content) delete = z[z.delete].id.tolist() if delete: r = table.delete_records(delete) print("Delete Records: ", r.status_code, r.content) save_btn.on_click(on_update) fancy_widget = wg.VBox([sheet, btns]) def convert_to_links(): values = [ wg.HTML( '<a target="_blank" href="https://redcap.wustl.edu/redcap/redcap_v8.11.0/DataEntry/record_home.php?pid=3355&arm=1&id=%s">view</a>' % x) for x in sheet.cells[2].value ] ipysheet.column(1, values) convert_to_links() if not not_in_redcap.empty: display( HTML( "<H3>Subject IDs not in Redcap</H3><SMALL>Please either change patientid to match an ID in redcap or delete the row.</SMALL>" )) display(fancy_widget)