Example #1
0
 def warn_good_import(added, deleted, form, modified):
     if deleted.empty and modified.empty:
         h.showbox(
             """There are %s rows of new data and no unexpected changes to old data.
                         Please proceed with <code>data["raw"]</code>.""" %
             len(added),
             form + ": Importing Data",
             "success",
         )
         h.showdataframe(added)
Example #2
0
 def show_modified_rows(self):
     form, added, deleted, modified = self.form, self.added, self.deleted, self.modified
     if not modified.empty:
         h.showbox(
             """There are %s rows in the old data that has been modified in the new data.
                         If this is expected, you can ignore this message.
                         To further inspect rows type <code>data["modified"]</code>"""
             % len(modified),
             form + ": Modified",
             "danger",
         )
         h.showdataframe(modified)
Example #3
0
    def update(self):
        make_similar(self.old, self.new)
        self.deleted = h.diff(self.old, self.new.id)
        self.modified = h.intersection(self.old, self.new,
                                       "id").sort_values("id")
        added = h.diff(self.new, self.old.id)
        added[self.form_complete] = 1
        added["common_complete"] = 1
        make_similar(added, self.redcap_df)
        added = h.diff(added, self.redcap_df)
        self.added = added

        if added is not None and not added.empty:
            self.merged = self.redcap_df.append(added, sort=False)
        else:
            self.merged = self.redcap_df
Example #4
0
    def show_modified(self):
        form, added, deleted, modified = self.form, self.added, self.deleted, self.modified

        def update(btn):
            self.added = (dups.loc[[
                dd.value for dd in selections
            ]].iloc[:, :-1].append(self.added, sort=False).drop_duplicates(
                ["patientid", "patienttype"]))

        btn = wg.Button(description="Submit changes")
        btn.on_click(update)

        dups = h.intersection(
            self.redcap_df,
            self.added,
            ["patientid", "patienttype"],
            sources=("current", "new"),
        ).reset_index(drop=True)
        selections = []
        output = []
        for id, group in dups.groupby("id"):
            cols = ["id", "patientid"] + h.unequal_columns(group)
            show = group[cols].set_index("_merge")
            sheet = ipysheet.sheet(ipysheet.from_dataframe(show))
            options = [(k, v) for v, k in group._merge.items()]

            dd = wg.Dropdown(
                options=options,
                value=options[0][1],
                description="Keep version:",
                disabled=False,
            )
            selections.append(dd)
            output.append(sheet)
            output.append(dd)

        if output:
            display(
                HTML(
                    "<H3>Should New Data Overwrite Old Data</H3><SMALL>Please select which version to keep.</SMALL>"
                ))
            for x in output:
                display(x)
            display(btn)
Example #5
0
def test_diff_subtracting_conflicting_rows_but_using_id_rows(A, B2):
    """Since now matching on B[0], only the first column should be used for matching."""
    result = h.diff(A, B2[0])
    expected = pd.DataFrame(
        [
            (1, 1),
            (3, 9),
        ]
    )
    assert result.equals(expected)
Example #6
0
def test_diff_subtracting_nonconflicting_rows(A, B):
    """The two rows that match perfectly should be removed from result"""
    result = h.diff(A, B)
    expected = pd.DataFrame(
        [
            (1, 1),
            (3, 9),
        ]
    )
    assert result.equals(expected)
Example #7
0
def test_conflict_on_conflicting_rows(A, B2):
    """Returns the two sets of two rows that are in conflict"""
    result = h.conflict(A, B2, [0])
    expected = pd.DataFrame(
        [
            (0, 0, "left"),
            (2, 4, "left"),
            (0, 99, "right"),
            (2, 99, "right"),
        ],
        index=[0, 2, 0, 1],
        columns=[0, 1, "_merge"],
    )
    assert result.equals(expected)
Example #8
0
    def show_missing_subjects(self):
        form, added, deleted, modified = self.form, self.added, self.deleted, self.modified
        df = self.merged.copy()
        df["subject"] = df["patientid"].str.split("_", 1,
                                                  expand=True)[0].str.strip()
        missing = h.diff(studydata, df.subject)

        # missing = h.difference(studydata, df.subject).copy()
        missing = missing[missing.flagged.isnull()]
        missing = missing[missing.interview_date < "2019-05-01"]
        missing = missing[missing.study != "hcpa"]
        missing["reason"] = "Missing in Box"
        self.warn_missing(missing, self.form)
        display(missing)
        return missing
Example #9
0
        def on_update(btn):
            df = ipysheet.to_dataframe(sheet)
            df = df.replace("nan", np.nan)

            # delta  of changes
            z = h.diff(df, not_in_redcap)

            updates = z[~z.delete].iloc[:, 1:]
            if not updates.empty:
                r = table.send_frame(updates)
                print("Updates: ", r.status_code, r.content)

            delete = z[z.delete].id.tolist()
            if delete:
                r = table.delete_records(delete)
                print("Delete Records: ", r.status_code, r.content)
Example #10
0
def generate_code_to_revert_modifications(df):
    for id_, group in modified.groupby('id'):
        redcap = group[group._merge == "redcap"].iloc[0]
        ksads = group[group._merge != "redcap"].iloc[0]

        cols = h.unequal_columns(group.iloc[:,:-1])
        print(f"\n    # {id_}", file=file)
        for col in cols:
            vr, vk = redcap[col], ksads[col]
            if isna(vr) and isna(vk):
                continue

            escaped_new = escape_value(ksads[col])
            escaped_permanent = escape_value(redcap[col])

            print(f'    modify(df, {id_}, "{col}", {escaped_new}, {escaped_permanent})', file=file)
Example #11
0
 def warn_missing(self, missing, form):
     if missing.empty:
         h.showbox(
             """All patientid's are in New Data.""",
             form + ": No Missing Redcap Subjects",
             "success",
         )
     else:
         h.showbox(
             """There are %s Redcap subjects missing from the current data."""
             % len(missing),
             form + ": Redcap Subjects Missing",
             "danger",
         )
         h.showdataframe(missing)
Example #12
0
    def warn_not_in_redcap(not_in_redcap, form):
        not_in_redcap = h.difference(df, studyids.subject).copy()
        not_in_redcap['reason'] = 'PatientID not in Redcap'
        not_in_redcap.rename(columns={'sitename': 'site'}, inplace=True)

        if not_in_redcap.empty:
            h.showbox(
                """All patientid's are in Redcap.""",
                form + ": No Subject Missing from Redcap",
                "success",
            )
        else:
            h.showbox(
                """There are %s rows with patientid missing from Redcap.""" %
                len(not_in_redcap),
                form + ": Subjects Missing from Redcap",
                "danger",
            )
            h.showdataframe(not_in_redcap)
Example #13
0
 def warn_duplicates(self):
     form, added, deleted, modified = self.form, self.added, self.deleted, self.modified
     duplicates = df[df.duplicated(['patientid', 'patienttype'],
                                   keep=False)]
     duplicates['reason'] = 'Duplicate IDs'
     if duplicates.empty:
         h.showbox(
             """All patientid + patienttype combos are unique.""",
             form + ": No Duplicates",
             "success",
         )
     else:
         h.showbox(
             """There are %s rows that contain the same patientid + patienttype."""
             % len(duplicates),
             form + ": Duplicates",
             "danger",
         )
         h.showdataframe(duplicates)
Example #14
0
def test_diff_from_same(A, B):
    """Subtracting a dataframe from itself should result in an empty df"""
    assert h.diff(A, A).empty
    assert h.diff(B, B).empty
Example #15
0
# modified = h.conflict(current_redcap, ksads, intersect_on="id", sources=("redcap", "ksads.net"))
#
# print(ksads.shape, added.shape, deleted.shape, modified.shape)

# %%
import changes.accepted
df = ksads.set_index('id')
changes.accepted.execute(df)   # ACTION!
ksads = df.reset_index()

# %% [markdown]
# # Capture Changes

# %%
# print new shapes
added = h.diff(ksads, current_redcap.id)
deleted = h.diff(current_redcap, ksads.id)
modified = h.conflict(current_redcap, ksads, intersect_on="id", sources=("redcap", "ksads.net"))

print(ksads.shape, added.shape, deleted.shape, modified.shape)

# %%
# deleted is always empty, if not throw error
assert deleted.empty, "KSADS.net data has been deleted, take a look at `deleted` dataframe and figure out whether to delete those rows from redcap, Invalidate, mark a flag, or notify users etc."
deleted


file = open("changes/to_review.py", "w")
file.write("from changes.utils import block, modify\n\n\ndef execute(df):\n    pass\n")
# %%
def generate_blocking_code_for_added_rows(added):
Example #16
0
def test_diff_subtracting_conflicting_rows(A, B2):
    """Since no row in B matches perfectly, the result should be A unmodified"""
    result = h.diff(A, B2)
    expected = A
    assert result.equals(expected)
Example #17
0
def test_conflict_on_nonconflicting_rows(A, B):
    """Since there is no conflict, should be empty"""
    result = h.conflict(A, B, [0])
    assert result.empty
Example #18
0
    def show_not_in_redcap(self):
        form, added, deleted, modified = self.form, self.added, self.deleted, self.modified
        df = self.merged.copy()
        df["subject"] = df["patientid"].str.split("_", 1,
                                                  expand=True)[0].str.strip()
        not_in_redcap = h.diff(df, studyids.subject).iloc[:, :-1]
        h.asInt(not_in_redcap, "id", "common_complete", self.form_complete)
        not_in_redcap.insert(0, "delete", False)
        not_in_redcap.insert(1, "link", "view")

        sheet = ipysheet.sheet(ipysheet.from_dataframe(not_in_redcap))

        spaced = wg.Layout(margin="30px 0 20px 0")

        save_btn = wg.Button(description="Update", icon="save")
        reset_btn = wg.Button(description="Reset", icon="trash")
        btns = wg.HBox([save_btn, reset_btn], layout=spaced)

        def on_reset(btn):
            sheet.cells = ipysheet.from_dataframe(not_in_redcap).cells

        #     sheet = ipysheet.sheet(ipysheet.from_dataframe(not_in_redcap))

        reset_btn.on_click(on_reset)

        def on_update(btn):
            df = ipysheet.to_dataframe(sheet)
            df = df.replace("nan", np.nan)

            # delta  of changes
            z = h.diff(df, not_in_redcap)

            updates = z[~z.delete].iloc[:, 1:]
            if not updates.empty:
                r = table.send_frame(updates)
                print("Updates: ", r.status_code, r.content)

            delete = z[z.delete].id.tolist()
            if delete:
                r = table.delete_records(delete)
                print("Delete Records: ", r.status_code, r.content)

        save_btn.on_click(on_update)

        fancy_widget = wg.VBox([sheet, btns])

        def convert_to_links():
            values = [
                wg.HTML(
                    '<a target="_blank" href="https://redcap.wustl.edu/redcap/redcap_v8.11.0/DataEntry/record_home.php?pid=3355&arm=1&id=%s">view</a>'
                    % x) for x in sheet.cells[2].value
            ]
            ipysheet.column(1, values)

        convert_to_links()
        if not not_in_redcap.empty:
            display(
                HTML(
                    "<H3>Subject IDs not in Redcap</H3><SMALL>Please either change patientid to match an ID in redcap or delete the row.</SMALL>"
                ))
            display(fancy_widget)