def upload_local_file_to_oh(file_path, file_name, file_metadata, access_token, member_id): """ Uploads a local file to the members Open Humans account. :param file_path: The location of the local file to be uploaded to Open Humans. :param file_name: The name of the file to be uploaded. :param file_metadata: The metadata of the file to be uploaded. :param access_token: The project access token for the given member. :param member_id: The Open Humans ID of the member. :return: boolean. True if successful, else False """ try: with open(file_path, 'rb') as fs: upload_stream(fs, file_name, file_metadata, access_token) return True except: print(f'Failed to upload {file_path} to OH for OH member {member_id}') return False
def write_new_tweets(oh_member, twitter_api, month, new_data): existing_files = api.exchange_oauth2_member( oh_member.get_access_token(), all_files=True) old_data = None file_id = None for dfile in existing_files['data']: if dfile['basename'] == 'twitter-data-{}.json'.format(month): old_data = requests.get(dfile['download_url']).json() file_id = dfile['id'] break if old_data: old_data['tweets'] = new_data['tweets'] + old_data['tweets'] old_data['likes'] = new_data['likes'] + old_data['likes'] else: old_data = {'tweets': new_data['tweets'], 'likes': new_data['likes'], 'followers': [], 'following': []} if month == str(datetime.datetime.today())[:7]: me = twitter_api.me() old_data['followers'].append( {'timestamp': str(datetime.datetime.today()), 'value': me.followers_count}) old_data['following'].append( {'timestamp': str(datetime.datetime.today()), 'value': me.friends_count}) with tempfile.TemporaryFile() as f: js = json.dumps(old_data) js = str.encode(js) f.write(js) f.flush() f.seek(0) api.upload_stream( f, "twitter-data-{}.json".format(month), metadata={ "description": "Twitter Data", "tags": ["Twitter"] }, access_token=oh_member.get_access_token()) if file_id: api.delete_file( oh_member.get_access_token(), project_member_id=oh_member.oh_id, file_id=file_id)
def test_upload_stream_valid(self): stream = None with open(TARGET_FILEPATH, 'rb') as testfile: testdata = testfile.read() stream = io.BytesIO(testdata) response = upload_stream(stream=stream, filename=TARGET_FILEPATH.split('/')[-1], metadata=FILE_METADATA, access_token=ACCESS_TOKEN, project_member_id=VALID_PMI1) self.assertEqual(response.status_code, 200) assert response.json() == {'size': 446, 'status': 'ok'}
def serialize_accesslogs(api_endpoint, oh_member, access_token, start_date, end_date): """ Groups logs by project, then converts from dict to csv, and finally uploads the resultant csv files to aws. """ accesslog_api_url = "{0}/data-management/{1}/?access_token={2}".format( settings.OPENHUMANS_OH_BASE_URL, api_endpoint, oh_member.get_access_token()) if start_date: accesslog_api_url = "{0}&start_date={1}".format( accesslog_api_url, start_date) if end_date: accesslog_api_url = "{0}&end_date={1}".format(accesslog_api_url, end_date) if api_endpoint == "newdatafileaccesslog": access_point = "open-humans" headers = [ "date", "ip_address", "user", "datafile_id", "datafile_source", "datafile_created", "datafile_user_id", "datafile_basename", "datafile_download_url", "key_id", "key_key", "key_created", "key_project_id", "key_datafile_id", "key_access_token", "key_key_creation_ip_address", ] else: access_point = "aws" headers = [ "time", "remote_ip", "request_id", "operation", "bucket_key", "request_uri", "status", "bytes_sent", "object_size", "total_time", "turn_around_time", "referrer", "user_agent", "cipher_suite", "host_header", "datafile_id", "datafile_source", "datafile_created", "datafile_user_id", "datafile_basename", "datafile_download_url", ] timestamp = str(datetime.now(timezone.utc).isoformat()) accesslogs = get_all_results(accesslog_api_url) # Group log events by project and serialize to lists log_events = {} for access_log in accesslogs: try: if access_log["datafile"]: project = access_log["datafile"]["source"] else: continue except KeyError: # Sometimes, a log file gets deleted between an access event and log retrieval # In these instances, skip the log continue row = [] for header in headers: if header in access_log: field = access_log[header] if access_log[header] is None: field = "-" else: field = str(access_log[header]) elif "datafile_" in header: key = header[9:] if key in access_log["datafile"]: if access_log["datafile"][key] is None: field = "-" else: field = str(access_log["datafile"][key]) elif "key_" in header: key = header[4:] if key in access_log["key"]: if access_log["key"][key] is None: field = "-" else: field = str(access_log["key"][key]) else: field = "-" row.append(field.strip(",")) if project in log_events.keys(): log_events[project].append(row) else: log_events[project] = [row] # Combine lists for each project as csv files and upload for project, items in log_events.items(): filename = "datalogs_{0}_{1}_{2}_{3}_{4}.csv".format( access_point, project, start_date, end_date, timestamp) csv = "" for row in items: if csv: csv = "{0}\n{1}".format(csv, ",".join(row)) else: csv = ",".join(row) csv = "{0}\n{1}".format(",".join(headers), csv) # Prepend the headers f = io.StringIO(csv) logger.info("Writing {0}".format(filename)) upload_stream( f, filename, metadata={ "description": "Open Humans access logs: AWS side", "tags": ["logs", "access logs", "AWS access logs"], }, access_token=access_token, )