def update(self): make_similar(self.old, self.new) self.deleted = h.diff(self.old, self.new.id) self.modified = h.intersection(self.old, self.new, "id").sort_values("id") added = h.diff(self.new, self.old.id) added[self.form_complete] = 1 added["common_complete"] = 1 make_similar(added, self.redcap_df) added = h.diff(added, self.redcap_df) self.added = added if added is not None and not added.empty: self.merged = self.redcap_df.append(added, sort=False) else: self.merged = self.redcap_df
def test_diff_subtracting_conflicting_rows_but_using_id_rows(A, B2): """Since now matching on B[0], only the first column should be used for matching.""" result = h.diff(A, B2[0]) expected = pd.DataFrame( [ (1, 1), (3, 9), ] ) assert result.equals(expected)
def test_diff_subtracting_nonconflicting_rows(A, B): """The two rows that match perfectly should be removed from result""" result = h.diff(A, B) expected = pd.DataFrame( [ (1, 1), (3, 9), ] ) assert result.equals(expected)
def show_missing_subjects(self): form, added, deleted, modified = self.form, self.added, self.deleted, self.modified df = self.merged.copy() df["subject"] = df["patientid"].str.split("_", 1, expand=True)[0].str.strip() missing = h.diff(studydata, df.subject) # missing = h.difference(studydata, df.subject).copy() missing = missing[missing.flagged.isnull()] missing = missing[missing.interview_date < "2019-05-01"] missing = missing[missing.study != "hcpa"] missing["reason"] = "Missing in Box" self.warn_missing(missing, self.form) display(missing) return missing
def on_update(btn): df = ipysheet.to_dataframe(sheet) df = df.replace("nan", np.nan) # delta of changes z = h.diff(df, not_in_redcap) updates = z[~z.delete].iloc[:, 1:] if not updates.empty: r = table.send_frame(updates) print("Updates: ", r.status_code, r.content) delete = z[z.delete].id.tolist() if delete: r = table.delete_records(delete) print("Delete Records: ", r.status_code, r.content)
# modified = h.conflict(current_redcap, ksads, intersect_on="id", sources=("redcap", "ksads.net")) # # print(ksads.shape, added.shape, deleted.shape, modified.shape) # %% import changes.accepted df = ksads.set_index('id') changes.accepted.execute(df) # ACTION! ksads = df.reset_index() # %% [markdown] # # Capture Changes # %% # print new shapes added = h.diff(ksads, current_redcap.id) deleted = h.diff(current_redcap, ksads.id) modified = h.conflict(current_redcap, ksads, intersect_on="id", sources=("redcap", "ksads.net")) print(ksads.shape, added.shape, deleted.shape, modified.shape) # %% # deleted is always empty, if not throw error assert deleted.empty, "KSADS.net data has been deleted, take a look at `deleted` dataframe and figure out whether to delete those rows from redcap, Invalidate, mark a flag, or notify users etc." deleted file = open("changes/to_review.py", "w") file.write("from changes.utils import block, modify\n\n\ndef execute(df):\n pass\n") # %% def generate_blocking_code_for_added_rows(added):
def show_not_in_redcap(self): form, added, deleted, modified = self.form, self.added, self.deleted, self.modified df = self.merged.copy() df["subject"] = df["patientid"].str.split("_", 1, expand=True)[0].str.strip() not_in_redcap = h.diff(df, studyids.subject).iloc[:, :-1] h.asInt(not_in_redcap, "id", "common_complete", self.form_complete) not_in_redcap.insert(0, "delete", False) not_in_redcap.insert(1, "link", "view") sheet = ipysheet.sheet(ipysheet.from_dataframe(not_in_redcap)) spaced = wg.Layout(margin="30px 0 20px 0") save_btn = wg.Button(description="Update", icon="save") reset_btn = wg.Button(description="Reset", icon="trash") btns = wg.HBox([save_btn, reset_btn], layout=spaced) def on_reset(btn): sheet.cells = ipysheet.from_dataframe(not_in_redcap).cells # sheet = ipysheet.sheet(ipysheet.from_dataframe(not_in_redcap)) reset_btn.on_click(on_reset) def on_update(btn): df = ipysheet.to_dataframe(sheet) df = df.replace("nan", np.nan) # delta of changes z = h.diff(df, not_in_redcap) updates = z[~z.delete].iloc[:, 1:] if not updates.empty: r = table.send_frame(updates) print("Updates: ", r.status_code, r.content) delete = z[z.delete].id.tolist() if delete: r = table.delete_records(delete) print("Delete Records: ", r.status_code, r.content) save_btn.on_click(on_update) fancy_widget = wg.VBox([sheet, btns]) def convert_to_links(): values = [ wg.HTML( '<a target="_blank" href="https://redcap.wustl.edu/redcap/redcap_v8.11.0/DataEntry/record_home.php?pid=3355&arm=1&id=%s">view</a>' % x) for x in sheet.cells[2].value ] ipysheet.column(1, values) convert_to_links() if not not_in_redcap.empty: display( HTML( "<H3>Subject IDs not in Redcap</H3><SMALL>Please either change patientid to match an ID in redcap or delete the row.</SMALL>" )) display(fancy_widget)
def test_diff_subtracting_conflicting_rows(A, B2): """Since no row in B matches perfectly, the result should be A unmodified""" result = h.diff(A, B2) expected = A assert result.equals(expected)
def test_diff_from_same(A, B): """Subtracting a dataframe from itself should result in an empty df""" assert h.diff(A, A).empty assert h.diff(B, B).empty