def upload_local_file_to_oh(file_path, file_name, file_metadata, access_token,
                            member_id):
    """
    Uploads a local file to the members Open Humans account.

    :param file_path: The location of the local file to be uploaded to Open Humans.
    :param file_name: The name of the file to be uploaded.
    :param file_metadata: The metadata of the file to be uploaded.
    :param access_token: The project access token for the given member.
    :param member_id: The Open Humans ID of the member.
    :return: boolean. True if successful, else False
    """
    try:
        with open(file_path, 'rb') as fs:
            upload_stream(fs, file_name, file_metadata, access_token)
        return True
    except:
        print(f'Failed to upload {file_path} to OH for OH member {member_id}')
        return False
Example #2
0
def write_new_tweets(oh_member, twitter_api, month, new_data):
    existing_files = api.exchange_oauth2_member(
        oh_member.get_access_token(),
        all_files=True)
    old_data = None
    file_id = None
    for dfile in existing_files['data']:
        if dfile['basename'] == 'twitter-data-{}.json'.format(month):
            old_data = requests.get(dfile['download_url']).json()
            file_id = dfile['id']
            break
    if old_data:
        old_data['tweets'] = new_data['tweets'] + old_data['tweets']
        old_data['likes'] = new_data['likes'] + old_data['likes']
    else:
        old_data = {'tweets': new_data['tweets'], 'likes': new_data['likes'],
                    'followers': [], 'following': []}
    if month == str(datetime.datetime.today())[:7]:
        me = twitter_api.me()
        old_data['followers'].append(
            {'timestamp': str(datetime.datetime.today()),
                'value': me.followers_count})
        old_data['following'].append(
            {'timestamp': str(datetime.datetime.today()),
                'value': me.friends_count})
    with tempfile.TemporaryFile() as f:
                js = json.dumps(old_data)
                js = str.encode(js)
                f.write(js)
                f.flush()
                f.seek(0)
                api.upload_stream(
                    f, "twitter-data-{}.json".format(month),
                    metadata={
                        "description": "Twitter Data",
                        "tags": ["Twitter"]
                        }, access_token=oh_member.get_access_token())
    if file_id:
        api.delete_file(
            oh_member.get_access_token(),
            project_member_id=oh_member.oh_id,
            file_id=file_id)
Example #3
0
 def test_upload_stream_valid(self):
     stream = None
     with open(TARGET_FILEPATH, 'rb') as testfile:
         testdata = testfile.read()
         stream = io.BytesIO(testdata)
     response = upload_stream(stream=stream,
                              filename=TARGET_FILEPATH.split('/')[-1],
                              metadata=FILE_METADATA,
                              access_token=ACCESS_TOKEN,
                              project_member_id=VALID_PMI1)
     self.assertEqual(response.status_code, 200)
     assert response.json() == {'size': 446, 'status': 'ok'}
Example #4
0
def serialize_accesslogs(api_endpoint, oh_member, access_token, start_date,
                         end_date):
    """
    Groups logs by project, then converts from dict to csv, and finally uploads the
    resultant csv files to aws.
    """
    accesslog_api_url = "{0}/data-management/{1}/?access_token={2}".format(
        settings.OPENHUMANS_OH_BASE_URL, api_endpoint,
        oh_member.get_access_token())
    if start_date:
        accesslog_api_url = "{0}&start_date={1}".format(
            accesslog_api_url, start_date)
    if end_date:
        accesslog_api_url = "{0}&end_date={1}".format(accesslog_api_url,
                                                      end_date)
    if api_endpoint == "newdatafileaccesslog":
        access_point = "open-humans"
        headers = [
            "date",
            "ip_address",
            "user",
            "datafile_id",
            "datafile_source",
            "datafile_created",
            "datafile_user_id",
            "datafile_basename",
            "datafile_download_url",
            "key_id",
            "key_key",
            "key_created",
            "key_project_id",
            "key_datafile_id",
            "key_access_token",
            "key_key_creation_ip_address",
        ]
    else:
        access_point = "aws"
        headers = [
            "time",
            "remote_ip",
            "request_id",
            "operation",
            "bucket_key",
            "request_uri",
            "status",
            "bytes_sent",
            "object_size",
            "total_time",
            "turn_around_time",
            "referrer",
            "user_agent",
            "cipher_suite",
            "host_header",
            "datafile_id",
            "datafile_source",
            "datafile_created",
            "datafile_user_id",
            "datafile_basename",
            "datafile_download_url",
        ]
    timestamp = str(datetime.now(timezone.utc).isoformat())
    accesslogs = get_all_results(accesslog_api_url)

    # Group log events by project and serialize to lists
    log_events = {}
    for access_log in accesslogs:
        try:
            if access_log["datafile"]:
                project = access_log["datafile"]["source"]
            else:
                continue
        except KeyError:
            # Sometimes, a log file gets deleted between an access event and log retrieval
            # In these instances, skip the log
            continue

        row = []
        for header in headers:
            if header in access_log:
                field = access_log[header]
                if access_log[header] is None:
                    field = "-"
                else:
                    field = str(access_log[header])
            elif "datafile_" in header:
                key = header[9:]
                if key in access_log["datafile"]:
                    if access_log["datafile"][key] is None:
                        field = "-"
                    else:
                        field = str(access_log["datafile"][key])
            elif "key_" in header:
                key = header[4:]
                if key in access_log["key"]:
                    if access_log["key"][key] is None:
                        field = "-"
                    else:
                        field = str(access_log["key"][key])
            else:
                field = "-"
            row.append(field.strip(","))

        if project in log_events.keys():
            log_events[project].append(row)
        else:
            log_events[project] = [row]

    # Combine lists for each project as csv files and upload
    for project, items in log_events.items():
        filename = "datalogs_{0}_{1}_{2}_{3}_{4}.csv".format(
            access_point, project, start_date, end_date, timestamp)

        csv = ""
        for row in items:
            if csv:
                csv = "{0}\n{1}".format(csv, ",".join(row))
            else:
                csv = ",".join(row)
        csv = "{0}\n{1}".format(",".join(headers), csv)  # Prepend the headers
        f = io.StringIO(csv)
        logger.info("Writing {0}".format(filename))
        upload_stream(
            f,
            filename,
            metadata={
                "description": "Open Humans access logs:  AWS side",
                "tags": ["logs", "access logs", "AWS access logs"],
            },
            access_token=access_token,
        )