def counts(num_orgs, org_info, worksheet, rows): doc_counts = [] docs_base_url = 'https://www.humanitarianresponse.info/en/api/v1.0/documents?fields=organizations.id&filter[organizations]=' cell_num = 3 for index in range(0, num_orgs): org_id = org_info[index][0] count = 0 docs_full_url = docs_base_url + str(org_id) content = base.open_url(docs_full_url) count += len(content['data']) content, count = content_counts.next_page(content, count) doc_counts.append((org_id, count)) cell_num += 1 doc_cells = worksheet.range('B3:B' + rows) index = 0 for cell in doc_cells: cell.value = doc_counts[index][1] index += 1 worksheet.update_cells(doc_cells) updated = content_counts.update_timestamp(worksheet) worksheet.update_acell('B1', updated)
def main(): #response = input("Do you want organization IDs and names (type 'both') or only organization names (type 'names')? ") url = 'https://www.humanitarianresponse.info/en/api/v1.0/organizations?fields=id,label' content = base.open_url(url) org_info = [] content, org_info = collect_orgs(content, org_info) content, org_info = next(content, org_info) return org_info
def next_page(content, count): try: url = content['next']['href'] content = base.open_url(url) count += len(content['data']) content, count = next_page(content, count) except KeyError: pass return content, count
def next_page(content, op_status): try: url = content['next']['href'] content = base.open_url(url) op_status = op_stat(content, op_status) op_status = next_page(content, op_status) except KeyError: pass return op_status
def next(content, org_info): try: url = content['next']['href'] content = base.open_url(url) content, org_info = collect_orgs(content, org_info) content, org_info = next(content, org_info) except KeyError: pass return content, org_info
def next_page(content, years): try: try: url = content['next']['href'] print(url) content = base.open_url(url) years = work(content, years) years = next_page(content, years) except KeyError: pass except urllib.request.HTTPError: try: url = content['next']['href'] print(url) content = base.open_url(url) years = work(content, years) years = next_page(content, years) except KeyError: pass return years
def next_page(content, dict, index): index+=1 url = 'https://www.humanitarianresponse.info/en/api/v1.0/bundles?page%5Bnumber%5D=' + str(index) try: content = base.open_url(url) if len(content['data']) == 0: compiling(dict) dict,index = compile_clusters(content, dict, index) dict,index = next_page(content, dict, index) except KeyError: pass return dict,index
def main(): names = set() dupes = [] LIMIT = 1000 OFFSET = 0 more = True while more: base_url = 'https://api.humanitarian.id/api/v2/user?limit='+str(LIMIT)+'&offset='\ +str(OFFSET)+'&sort=name&access_token=' full_url = base_url + base.API_KEY content = base.open_url(full_url) if len(content) != 0: for person in content: first = person['given_name'] last = person['family_name'] full = first.title() + ' ' + last.title() if full not in names: names.add(full) else: if full not in dupes: dupes.append(full) OFFSET += 1000 else: break # Update Google Sheet try: worksheet = base.wks.add_worksheet(title="Duplicates", rows=(len(dupes) + 4), cols=10) worksheet.update_acell( 'A2', "Duplicate accounts (Based on given and family name matching)") worksheet.update_acell('A3', "Name") except APIError: worksheet = base.wks.worksheet("Duplicates") names_list = worksheet.range('A4:A' + str(len(dupes) + 3)) index = 0 for cell in names_list: cell.value = dupes[index] index += 1 worksheet.update_cells(names_list) # Update last modified updated = base.update_timestamp(worksheet) worksheet.update_acell('A1', updated)
def main(): LIMIT = 100 OFFSET = 0 ONLY_VER_VIS_COUNT = 0 # keeps track of how many users specify only verified can view their email more = True while more: base_url = 'https://api.humanitarian.id/api/v2/user?emailsVisibility=verified&limit='+str(LIMIT)+\ '&offset='+str(OFFSET)+'&access_token=' full_url = base_url + base.API_KEY content = base.open_url(full_url) count = len(content) if count==0: more = False else: ONLY_VER_VIS_COUNT+=count OFFSET+=100 print(ONLY_VER_VIS_COUNT)
def main(): LIMIT = 100 OFFSET = 0 VERIFIED_COUNT = 0 more = True while more: base_url = 'https://api.humanitarian.id/api/v2/list?visibility=verified&limit='+str(LIMIT)+'&offset='\ +str(OFFSET)+'&access_token=' full_url = base_url + base.API_KEY content = base.open_url(full_url) page_count = len(content) if page_count == 0: more = False else: VERIFIED_COUNT += page_count OFFSET += 100 print(VERIFIED_COUNT)
def spaces(url): content = base.open_url(url) spaces = [] times = [] for data in content['data']: last_modified = int(data['changed']) last_modified = time.strftime("%Y %d %b", time.localtime(last_modified)) spaces.append(data['label']) times.append(last_modified) spreadsheet = base.get_spreadsheet() try: worksheet = spreadsheet.add_worksheet(title="Global Spaces", rows=len(spaces)+10, cols="5") except APIError: worksheet = spreadsheet.worksheet("Global Spaces") # Pull time of program execution and update geneva = timezone('Etc/GMT-2') current_time = datetime.now(geneva) formatted_time = current_time.strftime("%d %m %Y %H:%M:%S") updated = "Sheet Last Updated: " + formatted_time + ' (GMT+2)' worksheet.update_acell('A1', updated) # label worksheet.update_acell('A2','Space') worksheet.update_acell('B2','Last Modified') # Select a range space_list = worksheet.range('A3:A'+str(len(spaces)+2)) time_list = worksheet.range('B3:B'+str(len(times)+2)) index = 0 for cell in space_list: #update id cell.value = spaces[index] index+=1 index = 0 for cell in time_list: #update names cell.value = times[index] index+=1 # Update in batch - avoids API timeout problem worksheet.update_cells(space_list) worksheet.update_cells(time_list)
def main(): LIMIT = 1000 OFFSET = 0 ids = [] firsts = [] lasts = [] more = True while more: base_url = 'https://api.humanitarian.id/api/v2/user?verified=true&authOnly=false&limit='+\ str(LIMIT)+'&offset='+str(OFFSET)+'&access_token=' full_url = base_url + base.API_KEY content = base.open_url(full_url) print('Getting', LIMIT, 'new records...') if len(content) != 0: for user in content: try: location = user['locations'] if (location == None) or (len(location) == 0): ids.append(user['_id']) firsts.append(user['given_name']) lasts.append(user['family_name']) else: continue except KeyError: ids.append(user['_id']) firsts.append(user['given_name']) lasts.append(user['family_name']) OFFSET += 1000 else: more = False # Update Google Sheet try: worksheet = base.wks.worksheet("Verified - No Location") except APIError as e: print("{}: {}".format(type(e).__name__, e)) try: worksheet = base.wks.add_worksheet(title="Verified - No Location", rows=(len(ids) + 4), cols=10) worksheet.update_acell( 'A2', "Verified profiles without location (excludes auth users)") worksheet.update_acell('A3', "User ID") worksheet.update_acell('B3', "Given Name") worksheet.update_acell('C3', "Family Name") except: print("Unexpected error:", sys.exc_info()[0], sys.exc_info()[1]) except: print("Unexpected error:", sys.exc_info()[0], sys.exc_info()[1]) # Select range id_list = worksheet.range('A4:A' + str(len(ids) + 3)) first_list = worksheet.range('B4:B' + str(len(firsts) + 3)) last_list = worksheet.range('C4:C' + str(len(lasts) + 3)) index = 0 for cell in id_list: cell.value = ids[index] index += 1 index = 0 for cell in first_list: cell.value = firsts[index] index += 1 index = 0 for cell in last_list: cell.value = lasts[index] index += 1 # Update in batch - avoids API timeout problem worksheet.update_cells(id_list) worksheet.update_cells(first_list) worksheet.update_cells(last_list) # Update last modified updated = base.update_timestamp(worksheet) worksheet.update_acell('A1', updated)
def main(): LIMIT = 100 OFFSET = 0 more = True ids = [] firsts = [] lasts = [] while more: base_url = 'https://api.humanitarian.id/api/v2/user?verified=true&organization.orgTypeId=443&limit='\ +str(LIMIT)+'&offset='+str(OFFSET)+'&access_token=' full_url = base_url + base.API_KEY content = base.open_url(full_url) if len(content) != 0: for user in content: ids.append(user['_id']) firsts.append(user['given_name']) lasts.append(user['family_name']) OFFSET += 100 else: more = False # Update Google Sheet try: worksheet = base.wks.add_worksheet(title="Verified - Other", rows=(len(ids) + 4), cols=10) worksheet.update_acell( 'A2', "Verified users with OTHER as organization type") worksheet.update_acell('A3', "User ID") worksheet.update_acell('B3', "Given Name") worksheet.update_acell('C3', "Family Name") except APIError: worksheet = base.wks.worksheet("Verified - Other") # Select range id_list = worksheet.range('A4:A' + str(len(ids) + 3)) first_list = worksheet.range('B4:B' + str(len(firsts) + 3)) last_list = worksheet.range('C4:C' + str(len(lasts) + 3)) index = 0 for cell in id_list: cell.value = ids[index] index += 1 index = 0 for cell in first_list: cell.value = firsts[index] index += 1 index = 0 for cell in last_list: cell.value = lasts[index] index += 1 # Update in batch - avoids API timeout problem worksheet.update_cells(id_list) worksheet.update_cells(first_list) worksheet.update_cells(last_list) # Update last modified updated = base.update_timestamp(worksheet) worksheet.update_acell('A1', updated)
def ops_by_status(url): content = base.open_url(url) op_status = {} op_status = op_stat(content, op_status) op_status = next_page(content, op_status) # Dict to list mylist = [] for key, value in op_status.items(): temp = [key, value] mylist.append(temp) rows = len(mylist) + 5 spreadsheet = base.get_spreadsheet() # Upload to Sheets try: worksheet = spreadsheet.add_worksheet(title="Ops By Status", rows=str(rows), cols="10") except APIError: worksheet = spreadsheet.worksheet("Ops By Status") # Pull time of program execution and update geneva = timezone('Etc/GMT-2') current_time = datetime.now(geneva) formatted_time = current_time.strftime("%d %m %Y %H:%M:%S") updated = "Sheet Last Updated: " + formatted_time + ' (GMT+2)' worksheet.update_acell('A1', updated) # Label worksheet.update_acell('A2', 'Operation') worksheet.update_acell('B2', 'Status') worksheet.update_acell('C2', 'Last Modified') # Select a range org_list = worksheet.range('A3:A' + str(len(op_status) + 2)) status_list = worksheet.range('B3:B' + str(len(op_status) + 2)) changed_list = worksheet.range('C3:C' + str(len(op_status) + 2)) # Update organization index = 0 for cell in org_list: cell.value = mylist[index][0] index += 1 # Update status index = 0 for cell in status_list: cell.value = mylist[index][1][0] index += 1 # Update time changed index = 0 for cell in changed_list: cell.value = mylist[index][1][1] index += 1 # Update in batch (to avoid API timeout) worksheet.update_cells(org_list) worksheet.update_cells(status_list) worksheet.update_cells(changed_list)
def main(): LIMIT = 1000 OFFSET = 0 ids = [] firsts = [] lasts = [] more = True while more: base_url = 'https://api.humanitarian.id/api/v2/user?verified=false&authOnly=false&limit='\ +str(LIMIT)+'&offset='+str(OFFSET)+'&access_token=' full_url = base_url + base.API_KEY content = base.open_url(full_url) if len(content) != 0: for user in content: try: org = user['organization'] if (org == None) or (len(org) == 0): ids.append(user['_id']) firsts.append(user['given_name']) lasts.append(user['family_name']) else: continue except KeyError: ids.append(user['_id']) firsts.append(user['given_name']) lasts.append(user['family_name']) OFFSET += 1000 else: more = False # Update Google Sheet try: worksheet = base.wks.add_worksheet(title="Unverified - No Org", rows=(len(ids) + 4), cols=10) worksheet.update_acell( 'A2', "Unverified users without organization (excludes auth users)") worksheet.update_acell('A3', "User ID") worksheet.update_acell('B3', "Given Name") worksheet.update_acell('C3', "Family Name") except APIError: worksheet = base.wks.worksheet("Unverified - No Org") # Select range id_list = worksheet.range('A4:A' + str(len(ids) + 3)) first_list = worksheet.range('B4:B' + str(len(firsts) + 3)) last_list = worksheet.range('C4:C' + str(len(lasts) + 3)) index = 0 for cell in id_list: cell.value = ids[index] index += 1 index = 0 for cell in first_list: cell.value = firsts[index] index += 1 index = 0 for cell in last_list: cell.value = lasts[index] index += 1 # Update in batch - avoids API timeout problem worksheet.update_cells(id_list) worksheet.update_cells(first_list) worksheet.update_cells(last_list) # Update last modified updated = base.update_timestamp(worksheet) worksheet.update_acell('A1', updated)
def main(): LIMIT = 100 OFFSET = 0 more = True ids = [] names = [] counts = [] while more: base_url = 'https://api.humanitarian.id/api/v2/list?limit='+str(LIMIT)+'&offset='+str(OFFSET)+\ '&sort=-count&access_token=' full_url = base_url + base.API_KEY content = base.open_url(full_url) for list in content: count = list['count'] if count < 20: more = False break ids.append(list['_id']) names.append(list['label']) counts.append(count) OFFSET += 100 # Update Google Sheet try: worksheet = base.wks.add_worksheet(title="20+ Contacts", rows=(len(ids) + 4), cols=10) worksheet.update_acell('A2', "Lists with 20+ contacts") worksheet.update_acell('A3', "List ID") worksheet.update_acell('B3', "List Label") worksheet.update_acell('C3', "Num Contacts") except APIError: worksheet = base.wks.worksheet("20+ Contacts") # Select range id_list = worksheet.range('A4:A' + str(len(ids) + 3)) name_list = worksheet.range('B4:B' + str(len(names) + 3)) count_list = worksheet.range('C4:C' + str(len(counts) + 3)) index = 0 for cell in id_list: cell.value = ids[index] index += 1 index = 0 for cell in name_list: cell.value = names[index] index += 1 index = 0 for cell in count_list: cell.value = counts[index] index += 1 # Update in batch - avoids API timeout problem worksheet.update_cells(id_list) worksheet.update_cells(name_list) worksheet.update_cells(count_list) # Update last modified updated = base.update_timestamp(worksheet) worksheet.update_acell('A1', updated)
def groups_by_op(base_url): index = 1 content = base.open_url(base_url+str(index)) dict = {} dict,index = compile_clusters(content, dict, index) next_page(content, dict,index)
def contribs_by_year(): years = {} # current year utc = timezone('UTC') tstamp = int(datetime(2019, 1, 1, 0, 0, 0, tzinfo=utc).timestamp()) tstamp = str(tstamp) # events events_url = 'https://www.humanitarianresponse.info/en/api/v1.0/events?fields=organizations.label,created&filter[created][value]=' + tstamp + '&filter[created][operator]=>=' content = base.open_url(events_url) years = work(content, years) years = next_page(content, years) # docs docs_url = 'https://www.humanitarianresponse.info/api/v1.0/documents?fields=organizations.label,created&filter[created][value]=' + tstamp + '&filter[created][operator]=>=' content = base.open_url(docs_url) years = work(content, years) years = next_page(content, years) # maps/infographics maps_url = 'https://www.humanitarianresponse.info/api/v1.0/infographics?fields=organizations.label,created&filter[created][value]=' + tstamp + '&filter[created][operator]=>=' content = base.open_url(maps_url) years = work(content, years) years = next_page(content, years) spreadsheet = base.get_spreadsheet() try: worksheet = spreadsheet.add_worksheet( title="Contributing Orgs By Year", rows=100, cols=10) # label worksheet.update_acell('A2', '2012') worksheet.update_acell('B2', '2013') worksheet.update_acell('C2', '2014') worksheet.update_acell('D2', '2015') worksheet.update_acell('E2', '2016') worksheet.update_acell('F2', '2017') worksheet.update_acell('G2', '2018') worksheet.update_acell('H2', '2019') except APIError: worksheet = spreadsheet.worksheet("Contributing Orgs By Year") # Pull time of program execution and update geneva = timezone('Etc/GMT-2') current_time = datetime.now(geneva) formatted_time = current_time.strftime("%d %m %Y %H:%M:%S") updated = "Sheet Last Updated: " + formatted_time + ' (GMT+2)' worksheet.update_acell('A1', updated) # 2012 #update_worksheet_year(years, '2012', 'A3:A', worksheet) # 2013 #update_worksheet_year(years, '2013', 'B3:B', worksheet) # 2014 #update_worksheet_year(years, '2014', 'C3:C', worksheet) # 2015 #update_worksheet_year(years, '2015', 'D3:D', worksheet) # 2016 #update_worksheet_year(years, '2016', 'E3:E', worksheet) # 2017 #update_worksheet_year(years, '2017', 'F3:F', worksheet) # 2018 #update_worksheet_year(years, '2018', 'G3:G', worksheet) # 2019 update_worksheet_year(years, '2019', 'H3:H', worksheet)
def main(): LIMIT = 1000 OFFSET = 0 ids = [] firsts = [] lasts = [] more = True while more: base_url = 'https://api.humanitarian.id/api/v2/user?verified=false&sort=name&limit='+str(LIMIT)+\ '&offset='+str(OFFSET)+'&access_token=' full_url = base_url + base.API_KEY content = base.open_url(full_url) if len(content) != 0: for user in content: id = user['_id'] first = user['given_name'] last = user['family_name'] if (len(first) <= 1) or (len(last) <= 1): ids.append(id) firsts.append(first) lasts.append(last) OFFSET += 1000 else: more = False # Update Google Sheet try: worksheet = base.wks.add_worksheet(title="Unverified - Incomplete", rows=(len(ids) + 4), cols=10) worksheet.update_acell( 'A2', "Unverified profiles with potentially incomplete names") worksheet.update_acell('A3', "User ID") worksheet.update_acell('B3', "Given Name") worksheet.update_acell('C3', "Family Name") except APIError: worksheet = base.wks.worksheet("Unverified - Incomplete") # Select range id_list = worksheet.range('A4:A' + str(len(ids) + 3)) first_list = worksheet.range('B4:B' + str(len(firsts) + 3)) last_list = worksheet.range('C4:C' + str(len(lasts) + 3)) index = 0 for cell in id_list: cell.value = ids[index] index += 1 index = 0 for cell in first_list: cell.value = firsts[index] index += 1 index = 0 for cell in last_list: cell.value = lasts[index] index += 1 # Update in batch - avoids API timeout problem worksheet.update_cells(id_list) worksheet.update_cells(first_list) worksheet.update_cells(last_list) # Update last modified updated = base.update_timestamp(worksheet) worksheet.update_acell('A1', updated)