예제 #1
0
def get_json_file(dIndex):
    userID = uniqueDonors.userID[dIndex]
    donorGroup = uniqueDonors.donorGroup[dIndex]

    outputFilePathName = os.path.join(args.donorJsonDataFolder,
                                      "PHI-" + userID + ".json")

    # if the json file already exists, do NOT pull it again
    if not os.path.exists(outputFilePathName):

        # create a temp file so that other processes know that the download in in progress
        blankDF.to_json(outputFilePathName)

        # case where donorGroup is bigdata, but should be ""
        if pd.isnull(donorGroup):
            donorGroup = ""

        # get environmental variables
        email, password = \
            environmentalVariables.get_environmental_variables(donorGroup)

        # get json data
        get_user_data(email, password, userID, outputFilePathName)

        print(userID, "complete")

    else:
        print(userID, "data already downloaded")

    return
예제 #2
0
def get_secrets(donorGroup):
    if donorGroup == "bigdata":
        donorGroup = ""
    email, password = \
        environmentalVariables.get_environmental_variables(donorGroup)

    return email, password
예제 #3
0
        print(donorGroup, "ERROR", myResponse.status_code)
        sys.exit("Error with" + donorGroup + ":" + str(myResponse.status_code))

    return donorMetaData


# %% loop through each donor group to get a list of donors, bdays, and ddays
for donorGroup in donorGroups:
    outputDonorList = os.path.join(donorListFolder, donorGroup + "-donors.csv")

    if donorGroup == "bigdata":
        donorGroup = ""

    # get environmental variables
    email, password = \
        environmentalVariables.get_environmental_variables(donorGroup)

    # load in bdays and ddays and append to all donor list
    donorMetadataList = get_donor_info(email, password, donorMetadataColumns)

    donorMetadataList.to_csv(outputDonorList)
    print("BIGDATA_" + donorGroup, "complete")
    donorMetadataList["donorGroup"] = donorGroup

    alldonorMetadataList = alldonorMetadataList.append(donorMetadataList,
                                                       ignore_index=True,
                                                       sort=False)

# %% save output
alldonorMetadataList.sort_values(by=['name', 'donorGroup'], inplace=True)
uniqueDonors = alldonorMetadataList.loc[~alldonorMetadataList["userID"].
예제 #4
0
                                                     ignore_index=True)
        else:
            print(donorGroup, "ERROR", myResponse2.status_code)
            sys.exit("Error with 2nd API call" + donorGroup + ":" +
                     str(myResponse2.status_code))
    else:
        print(donorGroup, "ERROR", myResponse.status_code)
        sys.exit("Error with 1st API call" + donorGroup + ":" +
                 str(myResponse.status_code))

    return donorMetaData


data, responses = get_data_from_api(
    email="*****@*****.**",
    password=environmentalVariables.get_environmental_variables(""))

# %% loop through each donor group to get a list of donors, bdays, and ddays
for donorGroup in donorGroups:
    outputDonorList = os.path.join(donorListFolder, donorGroup + "-donors.csv")

    if donorGroup == "bigdata":
        donorGroup = ""

    # get environmental variables
    email, password = \
        environmentalVariables.get_environmental_variables(donorGroup)

    # load in bdays and ddays and append to all donor list
    donorMetadataList = get_donor_info(email, password, donorMetadataColumns)
예제 #5
0
def accept_and_get_list(args):
    # create output folders
    date_stamp = args.date_stamp  # dt.datetime.now().strftime("%Y-%m-%d")
    phi_date_stamp = "PHI-" + date_stamp
    donor_folder = os.path.join(args.data_path, phi_date_stamp + "-donor-data")
    make_folder_if_doesnt_exist(donor_folder)

    uniqueDonorList_path = os.path.join(
        donor_folder, phi_date_stamp + "-uniqueDonorList.csv")

    # define the donor groups
    donor_groups = [
        "bigdata",
        "AADE",
        "BT1",
        "carbdm",
        "CDN",
        "CWD",
        "DHF",
        "DIATRIBE",
        "diabetessisters",
        "DYF",
        "JDRF",
        "NSF",
        "T1DX",
    ]

    all_donors_df = pd.DataFrame(columns=["userID", "donorGroup"])

    # accounts to ignore (QA testing)
    accounts_to_ignore = [
        'f597f21dcd', '0ef51a0121', '38c3795fcb', '69c99b51f6', '84c2cdd947',
        '9cdebdc316', '9daaf4d4c1', 'bdf4724bed', 'c7415b5097', 'dccc3baf63',
        'ee145393b0', '00cd0ffada', '122a0bf6c5', '898c3d8056', '9e4f3fbc2a',
        '1ebe2a2790', '230650bb9c', '3f8fdabcd7', '636aad0f58', '70df39aa43',
        '92a3c903fe', '3043996405', '0239c1cfb2', '03852a5acc', '03b1953135',
        '0ca5e75e4a', '0d8bdb05eb', '19123d4d6a', '19c25d34b5', '1f6866bebc',
        '1f851c13a5', '275ffa345f', '275ffa345f', '3949134b4a', '410865ba56',
        '57e2b2ed3d', '59bd6891e9', '5acf17a80a', '627d0f4bf1', '65247f8257',
        '6e5287d4c4', '6fc3a4ad44', '78ea6c3cad', '7d8a80e8ce', '8265248ea3',
        '8a411facd2', '98f81fae18', '9d601a08a3', 'aa9fbc4ef5', 'aaac56022a',
        'adc00844c3', 'aea4b3d8ea', 'bc5ee641a3', 'c8328622d0', 'cfef0b91ac',
        'df54366b1c', 'e67aa71493', 'f2103a44d5', 'dccc3baf63'
    ]

    for donor_group in donor_groups:
        if donor_group == "bigdata":
            dg = ""
        else:
            dg = donor_group

        nNewDonors, donors_df = accept_new_donors_and_get_donor_list(
            environmentalVariables.get_environmental_variables(dg))

        donors_df["donorGroup"] = donor_group
        print(donor_group, "complete, there are %d new donors\n" % nNewDonors)
        all_donors_df = pd.concat([all_donors_df, donors_df])

    all_donors_df.sort_values(by=['userID', 'donorGroup'], inplace=True)
    unique_donors = all_donors_df.loc[~all_donors_df["userID"].duplicated()]
    total_donors = len(set(unique_donors["userID"]) - set(accounts_to_ignore))

    final_donor_list = pd.DataFrame(
        list(set(unique_donors["userID"]) - set(accounts_to_ignore)),
        columns=["userID"])

    final_donor_list = pd.merge(final_donor_list,
                                unique_donors,
                                how="left",
                                on="userID")

    # polish up the final donor list
    final_donor_list.sort_values(by="donorGroup", inplace=True)
    final_donor_list.reset_index(drop=True, inplace=True)

    if args.save_donor_list:
        print("saving donor list ...\n")
        final_donor_list.to_csv(uniqueDonorList_path)
    else:
        print("donor list is NOT being saved ...\n")

    print("There are %d total donors," % total_donors)
    print("after removing donors that donated to more than 1 group,")
    print("and after removing QA testing accounts.")

    return final_donor_list
def get_shared_metadata(
    donor_group=np.nan,
    userid_of_shared_user=np.nan,
    auth=np.nan,
    email=np.nan,
    password=np.nan,
):
    # login
    if pd.notnull(donor_group):
        if donor_group == "bigdata":
            dg = ""
        else:
            dg = donor_group

        auth = environmentalVariables.get_environmental_variables(dg)

    if pd.isnull(auth):
        if pd.isnull(email):
            email = input("Enter Tidepool email address:\n")

        if pd.isnull(password):
            password = getpass.getpass("Enter password:\n")

        auth = (email, password)

    api_call = "https://api.tidepool.org/auth/login"
    api_response = requests.post(api_call, auth=auth)
    if (api_response.ok):
        xtoken = api_response.headers["x-tidepool-session-token"]
        userid_master = json.loads(api_response.content.decode())["userid"]
        headers = {
            "x-tidepool-session-token": xtoken,
            "Content-Type": "application/json"
        }
    else:
        sys.exit("Error with " + auth[0] + ":" + str(api_response.status_code))

    if pd.isnull(userid_of_shared_user):
        userid_of_shared_user = userid_master
        print("getting metadata for the master account since no shared " +
              "user account was given")

    print("logging into", auth[0], "...")

    # get shared or donro metadata
    print("get donor metadata for %s ..." % userid_of_shared_user)
    api_call = ("https://api.tidepool.org/metadata/%s/profile" %
                userid_of_shared_user)
    api_response = requests.get(api_call, headers=headers)
    df = pd.DataFrame(dtype=object,
                      columns=[
                          "diagnosisType", "diagnosisDate", "biologicalSex",
                          "birthday", "targetTimezone", "targetDevices",
                          "isOtherPerson", "about"
                      ])

    if (api_response.ok):
        user_profile = json.loads(api_response.content.decode())
        if "patient" in user_profile.keys():
            for k, d in zip(user_profile["patient"].keys(),
                            user_profile["patient"].values()):
                df.at[userid_of_shared_user, k] = d
    else:
        sys.exit("Error getting metadata API " + str(api_response.status_code))

    # logout
    api_call = "https://api.tidepool.org/auth/logout"
    api_response = requests.post(api_call, auth=auth)

    if (api_response.ok):
        print("successfully logged out of", auth[0])

    else:
        sys.exit("Error with logging out for " + auth[0] + ":" +
                 str(api_response.status_code))
    df.index.rename("userid", inplace=True)

    return df, userid_of_shared_user
예제 #7
0
def get_data(
    weeks_of_data=10 * 52,
    donor_group=np.nan,
    userid_of_shared_user=np.nan,
    auth=np.nan,
    email=np.nan,
    password=np.nan,
):
    # login
    if pd.notnull(donor_group):
        if donor_group == "bigdata":
            dg = ""
        else:
            dg = donor_group

        auth = environmentalVariables.get_environmental_variables(dg)

    if pd.isnull(auth):
        if pd.isnull(email):
            email = input("Enter Tidepool email address:\n")

        if pd.isnull(password):
            password = getpass.getpass("Enter password:\n")

        auth = (email, password)

    api_call = "https://api.tidepool.org/auth/login"
    api_response = requests.post(api_call, auth=auth)
    if (api_response.ok):
        xtoken = api_response.headers["x-tidepool-session-token"]
        userid_master = json.loads(api_response.content.decode())["userid"]
        headers = {
            "x-tidepool-session-token": xtoken,
            "Content-Type": "application/json"
        }
    else:
        sys.exit("Error with " + auth[0] + ":" + str(api_response.status_code))

    if pd.isnull(userid_of_shared_user):
        userid_of_shared_user = userid_master
        print("getting data for the master account since no shared " +
              "user account was given")

    print("logging into", auth[0], "...")

    # download user data
    print("downloading data ...")
    df = pd.DataFrame()
    endDate = pd.datetime.now() + pd.Timedelta(1, unit="d")

    if weeks_of_data > 52:
        years_of_data = int(np.floor(weeks_of_data / 52))

        for years in range(0, years_of_data + 1):
            startDate = pd.datetime(endDate.year - 1, endDate.month,
                                    endDate.day + 1)
            year_df, endDate = get_data_api(userid_of_shared_user, startDate,
                                            endDate, headers)

            df = pd.concat([df, year_df], ignore_index=True, sort=False)

    else:
        startDate = (pd.to_datetime(endDate) -
                     pd.Timedelta(weeks_of_data * 7, "d"))

        df, _ = get_data_api(userid_of_shared_user, startDate, endDate,
                             headers)

    # logout
    api_call = "https://api.tidepool.org/auth/logout"
    api_response = requests.post(api_call, auth=auth)

    if (api_response.ok):
        print("successfully logged out of", auth[0])

    else:
        sys.exit("Error with logging out for " + auth[0] + ":" +
                 str(api_response.status_code))

    return df, userid_of_shared_user
예제 #8
0
meta_df = pd.DataFrame(dtype=object,
                       columns=[
                           "diagnosisType", "diagnosisDate", "biologicalSex",
                           "birthday", "targetTimezone", "targetDevices",
                           "isOtherPerson", "about"
                       ])

# % loop through each user and get data
for donor_group in donor_list["donorGroup"].unique():
    if donor_group == "bigdata":
        dg = ""
    else:
        dg = donor_group

    headers, userid = login_api(
        environmentalVariables.get_environmental_variables(dg))

    userids = donor_list.loc[donor_list["donorGroup"] == donor_group, "userID"]
    for userid in userids:
        temp_df = get_metadata_api(userid, headers)

        csv_output_path = os.path.join(csv_path, "PHI-" + userid + ".csv")

        skip_if_no_overwrite_and_file_exists = (
            (not args.overwrite) & (os.path.exists(csv_output_path)))

        if not args.metadata_only:
            if not skip_if_no_overwrite_and_file_exists:
                data = get_all_data_api(userid)
                data_rows = len(data)
                if data_rows > 0: