def get_json_file(dIndex): userID = uniqueDonors.userID[dIndex] donorGroup = uniqueDonors.donorGroup[dIndex] outputFilePathName = os.path.join(args.donorJsonDataFolder, "PHI-" + userID + ".json") # if the json file already exists, do NOT pull it again if not os.path.exists(outputFilePathName): # create a temp file so that other processes know that the download in in progress blankDF.to_json(outputFilePathName) # case where donorGroup is bigdata, but should be "" if pd.isnull(donorGroup): donorGroup = "" # get environmental variables email, password = \ environmentalVariables.get_environmental_variables(donorGroup) # get json data get_user_data(email, password, userID, outputFilePathName) print(userID, "complete") else: print(userID, "data already downloaded") return
def get_secrets(donorGroup): if donorGroup == "bigdata": donorGroup = "" email, password = \ environmentalVariables.get_environmental_variables(donorGroup) return email, password
print(donorGroup, "ERROR", myResponse.status_code) sys.exit("Error with" + donorGroup + ":" + str(myResponse.status_code)) return donorMetaData # %% loop through each donor group to get a list of donors, bdays, and ddays for donorGroup in donorGroups: outputDonorList = os.path.join(donorListFolder, donorGroup + "-donors.csv") if donorGroup == "bigdata": donorGroup = "" # get environmental variables email, password = \ environmentalVariables.get_environmental_variables(donorGroup) # load in bdays and ddays and append to all donor list donorMetadataList = get_donor_info(email, password, donorMetadataColumns) donorMetadataList.to_csv(outputDonorList) print("BIGDATA_" + donorGroup, "complete") donorMetadataList["donorGroup"] = donorGroup alldonorMetadataList = alldonorMetadataList.append(donorMetadataList, ignore_index=True, sort=False) # %% save output alldonorMetadataList.sort_values(by=['name', 'donorGroup'], inplace=True) uniqueDonors = alldonorMetadataList.loc[~alldonorMetadataList["userID"].
ignore_index=True) else: print(donorGroup, "ERROR", myResponse2.status_code) sys.exit("Error with 2nd API call" + donorGroup + ":" + str(myResponse2.status_code)) else: print(donorGroup, "ERROR", myResponse.status_code) sys.exit("Error with 1st API call" + donorGroup + ":" + str(myResponse.status_code)) return donorMetaData data, responses = get_data_from_api( email="*****@*****.**", password=environmentalVariables.get_environmental_variables("")) # %% loop through each donor group to get a list of donors, bdays, and ddays for donorGroup in donorGroups: outputDonorList = os.path.join(donorListFolder, donorGroup + "-donors.csv") if donorGroup == "bigdata": donorGroup = "" # get environmental variables email, password = \ environmentalVariables.get_environmental_variables(donorGroup) # load in bdays and ddays and append to all donor list donorMetadataList = get_donor_info(email, password, donorMetadataColumns)
def accept_and_get_list(args): # create output folders date_stamp = args.date_stamp # dt.datetime.now().strftime("%Y-%m-%d") phi_date_stamp = "PHI-" + date_stamp donor_folder = os.path.join(args.data_path, phi_date_stamp + "-donor-data") make_folder_if_doesnt_exist(donor_folder) uniqueDonorList_path = os.path.join( donor_folder, phi_date_stamp + "-uniqueDonorList.csv") # define the donor groups donor_groups = [ "bigdata", "AADE", "BT1", "carbdm", "CDN", "CWD", "DHF", "DIATRIBE", "diabetessisters", "DYF", "JDRF", "NSF", "T1DX", ] all_donors_df = pd.DataFrame(columns=["userID", "donorGroup"]) # accounts to ignore (QA testing) accounts_to_ignore = [ 'f597f21dcd', '0ef51a0121', '38c3795fcb', '69c99b51f6', '84c2cdd947', '9cdebdc316', '9daaf4d4c1', 'bdf4724bed', 'c7415b5097', 'dccc3baf63', 'ee145393b0', '00cd0ffada', '122a0bf6c5', '898c3d8056', '9e4f3fbc2a', '1ebe2a2790', '230650bb9c', '3f8fdabcd7', '636aad0f58', '70df39aa43', '92a3c903fe', '3043996405', '0239c1cfb2', '03852a5acc', '03b1953135', '0ca5e75e4a', '0d8bdb05eb', '19123d4d6a', '19c25d34b5', '1f6866bebc', '1f851c13a5', '275ffa345f', '275ffa345f', '3949134b4a', '410865ba56', '57e2b2ed3d', '59bd6891e9', '5acf17a80a', '627d0f4bf1', '65247f8257', '6e5287d4c4', '6fc3a4ad44', '78ea6c3cad', '7d8a80e8ce', '8265248ea3', '8a411facd2', '98f81fae18', '9d601a08a3', 'aa9fbc4ef5', 'aaac56022a', 'adc00844c3', 'aea4b3d8ea', 'bc5ee641a3', 'c8328622d0', 'cfef0b91ac', 'df54366b1c', 'e67aa71493', 'f2103a44d5', 'dccc3baf63' ] for donor_group in donor_groups: if donor_group == "bigdata": dg = "" else: dg = donor_group nNewDonors, donors_df = accept_new_donors_and_get_donor_list( environmentalVariables.get_environmental_variables(dg)) donors_df["donorGroup"] = donor_group print(donor_group, "complete, there are %d new donors\n" % nNewDonors) all_donors_df = pd.concat([all_donors_df, donors_df]) all_donors_df.sort_values(by=['userID', 'donorGroup'], inplace=True) unique_donors = all_donors_df.loc[~all_donors_df["userID"].duplicated()] total_donors = len(set(unique_donors["userID"]) - set(accounts_to_ignore)) final_donor_list = pd.DataFrame( list(set(unique_donors["userID"]) - set(accounts_to_ignore)), columns=["userID"]) final_donor_list = pd.merge(final_donor_list, unique_donors, how="left", on="userID") # polish up the final donor list final_donor_list.sort_values(by="donorGroup", inplace=True) final_donor_list.reset_index(drop=True, inplace=True) if args.save_donor_list: print("saving donor list ...\n") final_donor_list.to_csv(uniqueDonorList_path) else: print("donor list is NOT being saved ...\n") print("There are %d total donors," % total_donors) print("after removing donors that donated to more than 1 group,") print("and after removing QA testing accounts.") return final_donor_list
def get_shared_metadata( donor_group=np.nan, userid_of_shared_user=np.nan, auth=np.nan, email=np.nan, password=np.nan, ): # login if pd.notnull(donor_group): if donor_group == "bigdata": dg = "" else: dg = donor_group auth = environmentalVariables.get_environmental_variables(dg) if pd.isnull(auth): if pd.isnull(email): email = input("Enter Tidepool email address:\n") if pd.isnull(password): password = getpass.getpass("Enter password:\n") auth = (email, password) api_call = "https://api.tidepool.org/auth/login" api_response = requests.post(api_call, auth=auth) if (api_response.ok): xtoken = api_response.headers["x-tidepool-session-token"] userid_master = json.loads(api_response.content.decode())["userid"] headers = { "x-tidepool-session-token": xtoken, "Content-Type": "application/json" } else: sys.exit("Error with " + auth[0] + ":" + str(api_response.status_code)) if pd.isnull(userid_of_shared_user): userid_of_shared_user = userid_master print("getting metadata for the master account since no shared " + "user account was given") print("logging into", auth[0], "...") # get shared or donro metadata print("get donor metadata for %s ..." % userid_of_shared_user) api_call = ("https://api.tidepool.org/metadata/%s/profile" % userid_of_shared_user) api_response = requests.get(api_call, headers=headers) df = pd.DataFrame(dtype=object, columns=[ "diagnosisType", "diagnosisDate", "biologicalSex", "birthday", "targetTimezone", "targetDevices", "isOtherPerson", "about" ]) if (api_response.ok): user_profile = json.loads(api_response.content.decode()) if "patient" in user_profile.keys(): for k, d in zip(user_profile["patient"].keys(), user_profile["patient"].values()): df.at[userid_of_shared_user, k] = d else: sys.exit("Error getting metadata API " + str(api_response.status_code)) # logout api_call = "https://api.tidepool.org/auth/logout" api_response = requests.post(api_call, auth=auth) if (api_response.ok): print("successfully logged out of", auth[0]) else: sys.exit("Error with logging out for " + auth[0] + ":" + str(api_response.status_code)) df.index.rename("userid", inplace=True) return df, userid_of_shared_user
def get_data( weeks_of_data=10 * 52, donor_group=np.nan, userid_of_shared_user=np.nan, auth=np.nan, email=np.nan, password=np.nan, ): # login if pd.notnull(donor_group): if donor_group == "bigdata": dg = "" else: dg = donor_group auth = environmentalVariables.get_environmental_variables(dg) if pd.isnull(auth): if pd.isnull(email): email = input("Enter Tidepool email address:\n") if pd.isnull(password): password = getpass.getpass("Enter password:\n") auth = (email, password) api_call = "https://api.tidepool.org/auth/login" api_response = requests.post(api_call, auth=auth) if (api_response.ok): xtoken = api_response.headers["x-tidepool-session-token"] userid_master = json.loads(api_response.content.decode())["userid"] headers = { "x-tidepool-session-token": xtoken, "Content-Type": "application/json" } else: sys.exit("Error with " + auth[0] + ":" + str(api_response.status_code)) if pd.isnull(userid_of_shared_user): userid_of_shared_user = userid_master print("getting data for the master account since no shared " + "user account was given") print("logging into", auth[0], "...") # download user data print("downloading data ...") df = pd.DataFrame() endDate = pd.datetime.now() + pd.Timedelta(1, unit="d") if weeks_of_data > 52: years_of_data = int(np.floor(weeks_of_data / 52)) for years in range(0, years_of_data + 1): startDate = pd.datetime(endDate.year - 1, endDate.month, endDate.day + 1) year_df, endDate = get_data_api(userid_of_shared_user, startDate, endDate, headers) df = pd.concat([df, year_df], ignore_index=True, sort=False) else: startDate = (pd.to_datetime(endDate) - pd.Timedelta(weeks_of_data * 7, "d")) df, _ = get_data_api(userid_of_shared_user, startDate, endDate, headers) # logout api_call = "https://api.tidepool.org/auth/logout" api_response = requests.post(api_call, auth=auth) if (api_response.ok): print("successfully logged out of", auth[0]) else: sys.exit("Error with logging out for " + auth[0] + ":" + str(api_response.status_code)) return df, userid_of_shared_user
meta_df = pd.DataFrame(dtype=object, columns=[ "diagnosisType", "diagnosisDate", "biologicalSex", "birthday", "targetTimezone", "targetDevices", "isOtherPerson", "about" ]) # % loop through each user and get data for donor_group in donor_list["donorGroup"].unique(): if donor_group == "bigdata": dg = "" else: dg = donor_group headers, userid = login_api( environmentalVariables.get_environmental_variables(dg)) userids = donor_list.loc[donor_list["donorGroup"] == donor_group, "userID"] for userid in userids: temp_df = get_metadata_api(userid, headers) csv_output_path = os.path.join(csv_path, "PHI-" + userid + ".csv") skip_if_no_overwrite_and_file_exists = ( (not args.overwrite) & (os.path.exists(csv_output_path))) if not args.metadata_only: if not skip_if_no_overwrite_and_file_exists: data = get_all_data_api(userid) data_rows = len(data) if data_rows > 0: