예제 #1
0
    def test_upload_aws_valid_access_token(self):
        with my_vcr.use_cassette('ohapi/cassettes/test_upload_aws_valid_' +
                                 'access_token') as cass:
            with patch('ohapi.api.open', mock_open(read_data=b'some stuff')):
                try:

                    def fake_stat(arg):
                        if arg == "foo":
                            faked = list(orig_os_stat('/tmp'))
                            faked[stat.ST_SIZE] = len('some stuff')
                            return stat_result(faked)
                        else:
                            return orig_os_stat(arg)
                    orig_os_stat = os.stat
                    os.stat = fake_stat
                    upload_aws(target_filepath='foo',
                               metadata=FILE_METADATA,
                               access_token=ACCESS_TOKEN,
                               project_member_id=VALID_PMI1
                               )
                    self.assertEqual(cass.responses[0][
                                     "status"]["code"], 201)
                    self.assertEqual(cass.responses[1][
                                     "status"]["code"], 200)
                    self.assertEqual(cass.responses[2][
                                     "status"]["code"], 200)
                finally:
                    os.stat = orig_os_stat
                    pass
예제 #2
0
def get_semantic_data(tf_in, tmp_dir, member, access_token):
    # Get the new semantic files and save those too
    if tf_in.name.endswith('.zip'):
        zf = zipfile.ZipFile(tf_in)
        for f in zf.filelist:
            if f.filename.endswith('.json') and len(
                    f.filename.split('/')) == 5:
                file_content = json.loads(zf.read(f))
                file_name = tmp_dir + '/' + f.filename.split('/')[-1]
                with open(file_name, 'w') as raw_file:
                    json.dump(file_content, raw_file)
                metadata = {
                    'description': 'Semantic Google Location History JSON',
                    'tags':
                    ['google location history', 'gps', 'semantic data'],
                    'creation_date': arrow.get().format(),
                }
                api.upload_aws(file_name,
                               metadata,
                               access_token,
                               base_url=OH_BASE_URL,
                               project_member_id=str(
                                   member['project_member_id']))
    else:
        return None
예제 #3
0
def fetch_googlefit_data(oh_id):
    '''
    Fetches all of the googlefit data for a given user
    '''
    print("Started googlefit update task")
    try:
        current_dt = datetime.utcnow()
        oh_member = OpenHumansMember.objects.get(oh_id=oh_id)
        gf_member = oh_member.googlefit_member
        oh_access_token = oh_member.get_access_token()
        gf_access_token = gf_member.get_access_token()

        basenames_to_ids = get_existing_basenames_to_ids(oh_member)

        filesmonth = get_googlefit_data(oh_access_token, gf_access_token, current_dt)
        for fn, month in filesmonth:
            api.upload_aws(fn, create_metadata(month),
                                  oh_access_token,
                                  project_member_id=oh_id)
            basename = os.path.basename(fn)
            if basename in basenames_to_ids:
                file_id_to_delete = basenames_to_ids[basename]
                api.delete_file(oh_access_token, file_id=file_id_to_delete)

        gf_member.last_updated = arrow.now().format()
        gf_member.save()

    except Exception as e:
        import traceback
        print("Fetching googlefit data failed: {}".format(e))
        print(traceback.format_exc())
        # queue to retry later
        fetch_googlefit_data.apply_async(args=[oh_id], countdown=3600)
        raise
예제 #4
0
def upload_summaries(oh_user, summaries, file_name, existing_file_id):
    temp_dir, file = write_json_data_to_tmp_file(f'garmin-health-api-{file_name}.json', summaries)
    api.upload_aws(file, create_metadata(file_name), oh_user.get_access_token(), project_member_id=oh_user.oh_id, max_bytes=MAX_FILE_BYTES)
    if existing_file_id:
        api.delete_file(oh_user.get_access_token(), file_id=existing_file_id)
    os.remove(file)
    os.rmdir(temp_dir)
예제 #5
0
def process_file(dfile, access_token, member, metadata, taxonomy):
    try:
        verify_ubiome(dfile)
        tmp_directory = tempfile.mkdtemp()
        base_filename = dfile['basename'].replace('.zip', '')
        taxonomy_file = base_filename + '.taxonomy.json'
        raw_filename = temp_join(tmp_directory, taxonomy_file)
        metadata = {
            'description': 'uBiome 16S taxonomy data, JSON format.',
            'tags': ['json', 'uBiome', '16S']
        }
        with open(raw_filename, 'w') as raw_file:
            json.dump(taxonomy, raw_file)
            raw_file.flush()

        api.upload_aws(raw_filename,
                       metadata,
                       access_token,
                       base_url=OH_BASE_URL,
                       project_member_id=str(member['project_member_id']))
    except:
        api.message("uBiome integration: A broken file was deleted",
                    "While processing your uBiome file "
                    "we noticed that your file does not conform "
                    "to the expected specifications and it was "
                    "thus deleted. Email us as [email protected] if "
                    "you think this file should be valid.",
                    access_token,
                    base_url=OH_BASE_URL)
        api.delete_file(access_token,
                        str(member['project_member_id']),
                        file_id=str(dfile['id']),
                        base_url=OH_BASE_URL)
        raise
예제 #6
0
def add_jawbone_data(oh_member, data, endpoint):
    # delete old file and upload new to open humans
    tmp_directory = tempfile.mkdtemp()
    metadata = {
        'tags': ['Jawbone'],
        'updated_at': str(datetime.utcnow()),
        }
    if endpoint == 'moves':
        metadata['description'] = ('Jawbone "moves" data, including steps, '
                                   'calories, and activity')
        metadata['tags'].append('steps')
    elif endpoint == 'sleeps':
        metadata['description'] = ('Jawbone "sleeps" data, including time, '
                                   'duration, and depth estimates.')
        metadata['tags'].append('sleep')
    elif endpoint == 'heartrates':
        metadata['description'] = ('Jawbone "heartrates" data, including '
                                   'resting heartrates')
        metadata['tags'].append('heartrate')
    out_file = os.path.join(
        tmp_directory,
        'jawbone-{}-data.json'.format(endpoint))
    logger.debug('deleted old file for {}'.format(oh_member.oh_id))
    api.delete_file(oh_member.access_token,
                    oh_member.oh_id,
                    file_basename='jawbone-{}-data.json'.format(endpoint))
    with open(out_file, 'w') as json_file:
        json.dump(data, json_file)
        json_file.flush()
    api.upload_aws(out_file, metadata,
                   oh_member.access_token,
                   project_member_id=oh_member.oh_id)
    logger.debug('added new jawbone {} file for {}'.format(
        endpoint, oh_member.oh_id))
예제 #7
0
def upload_summaries(oh_user, summaries, file_name, month, existing_file_id):
    fn = write_json_data_to_tmp_file(f'garmin-health-api-{file_name}-{month}.json', summaries)
    api.upload_aws(fn, create_metadata(file_name, month),
                   oh_user.get_access_token(),
                   project_member_id=oh_user.oh_id,
                   max_bytes=MAX_FILE_BYTES)
    if existing_file_id:
        api.delete_file(oh_user.get_access_token(), file_id=existing_file_id)
예제 #8
0
def process_file(dfile, access_token, member, metadata):
    try:
        vcf_metadata = verify_vcf(dfile)
    except:
        api.message("VCF integration: A broken file was deleted",
                    "While processing your VCF file "
                    "we noticed that your file does not conform "
                    "to the expected specifications and it was "
                    "thus deleted. Email us as [email protected] if "
                    "you think this file should be valid.",
                    access_token,
                    base_url=OH_BASE_URL)
        api.delete_file(access_token,
                        str(member['project_member_id']),
                        file_id=str(dfile['id']),
                        base_url=OH_BASE_URL)
        raise
    try:
        tmp_directory = tempfile.mkdtemp()
        base_filename = dfile['basename']

        # Save raw 23andMe genotyping to temp file.
        if base_filename.endswith('.gz'):
            base_filename = base_filename[0:-3]
        elif base_filename.endswith('.bz2'):
            base_filename = base_filename[0:-4]
        meta_filename = base_filename + '.metadata.json'
        raw_filename = temp_join(tmp_directory, meta_filename)
        metadata = {'description': 'VCF file metadata', 'tags': ['vcf']}
        with open(raw_filename, 'w') as raw_file:
            json.dump(vcf_metadata, raw_file)
            raw_file.flush()

        api.upload_aws(raw_filename,
                       metadata,
                       access_token,
                       base_url=OH_BASE_URL,
                       project_member_id=str(member['project_member_id']))
    except:
        api.message("VCF integration: File could not be uploaded",
                    "Something went wrong when processing your "
                    "file. Please try to upload it again. "
                    "Please email us as [email protected] if "
                    "this keeps happening.",
                    access_token,
                    base_url=OH_BASE_URL)
        api.delete_file(access_token,
                        str(member['project_member_id']),
                        file_id=str(dfile['id']),
                        base_url=OH_BASE_URL)
        raise
예제 #9
0
def replace_fitbit(oh_member, fitbit_data):
    print("replace function started")
    # delete old file and upload new to open humans
    tmp_directory = tempfile.mkdtemp()
    metadata = {
        'description': 'Fitbit data.',
        'tags': ['Fitbit', 'activity', 'steps'],
        'updated_at': str(datetime.utcnow()),
    }
    out_file = os.path.join(tmp_directory, 'fitbit-data.json')
    logger.debug('deleted old file for {}'.format(oh_member.oh_id))
    deleter = api.delete_file(oh_member.access_token,
                              oh_member.oh_id,
                              file_basename="fitbit-data.json")
    print("delete response")
    print(deleter)
    print("trying to write to file")
    with open(out_file, 'w') as json_file:
        print("inside open file")
        # json.dump(fitbit_data, json_file)
        json_file.write(json.dumps(fitbit_data))
        # print(json.dump(fitbit_data, json_file))
        print("dumped, trying to flush")
        json_file.flush()
    print("attempting add response")
    addr = api.upload_aws(out_file,
                          metadata,
                          oh_member.access_token,
                          project_member_id=oh_member.oh_id)
    print("add response")
    print(addr)
    logger.debug('uploaded new file for {}'.format(oh_member.oh_id))
예제 #10
0
 def test_upload_aws_invalid_access_token(self):
     with self.assertRaises(Exception):
         response = upload_aws(
             target_filepath='foo',
             metadata=FILE_METADATA,
             access_token=ACCESS_TOKEN_INVALID,
             project_member_id=VALID_PMI1)
         assert response.json() == {"detail": "Invalid token."}
예제 #11
0
def fetch_googlefit_data(oh_id, send_email=False):
    '''
    Fetches all of the googlefit data for a given user
    '''
    print("Started googlefit update task")
    try:
        current_dt = datetime.utcnow()
        oh_member = OpenHumansMember.objects.get(oh_id=oh_id)
        if not hasattr(oh_member, 'googlefit_member'):
            print("No googlefit connection exists for member")
            return
        gf_member = oh_member.googlefit_member
        oh_access_token = oh_member.get_access_token()
        gf_access_token = gf_member.get_access_token()

        basenames_to_ids = get_existing_basenames_to_ids(oh_member)

        filesmonth = get_googlefit_data(oh_access_token, gf_access_token,
                                        current_dt)
        for fn, month in filesmonth:
            api.upload_aws(fn,
                           create_metadata(month),
                           oh_access_token,
                           project_member_id=oh_id,
                           max_bytes=MAX_FILE_BYTES)
            basename = os.path.basename(fn)
            if basename in basenames_to_ids:
                file_id_to_delete = basenames_to_ids[basename]
                api.delete_file(oh_access_token, file_id=file_id_to_delete)

        gf_member.last_updated = arrow.now().format()
        gf_member.save()

        if send_email and len(filesmonth) > 0:
            send_first_success_email(oh_id, oh_access_token)
        elif send_email and len(filesmonth) == 0:
            send_first_no_data_email(oh_id, oh_access_token)

    except Exception as e:
        import traceback
        print("Fetching googlefit data failed: {}".format(e))
        print(traceback.format_exc())
        # queue to retry later
        fetch_googlefit_data.apply_async(args=[oh_id], countdown=3600)
        raise
예제 #12
0
 def test_upload_aws_expired_access_token(self):
     with self.assertRaises(Exception):
         with patch('ohapi.api.open', mock_open(), create=True):
             response = upload_aws(
                 target_filepath='foo',
                 metadata=FILE_METADATA,
                 access_token=ACCESS_TOKEN_EXPIRED,
                 project_member_id=VALID_PMI1)
             assert response.json() == {"detail": "Expired token."}
예제 #13
0
def upload_user_dailies(garmin_user_id, user_map, existing_file_id):

    min_date = earliest_date(user_map)
    fn = write_jsonfile_to_tmp_dir('garmin-dailies.json', user_map)
    oh_user = get_oh_user_from_garmin_id(garmin_user_id)
    api.upload_aws(fn,
                   create_metadata(),
                   oh_user.get_access_token(),
                   project_member_id=oh_user.oh_id,
                   max_bytes=MAX_FILE_BYTES)

    oh_user.garmin_member.last_updated = datetime.now()
    if not oh_user.garmin_member.earliest_available_data or \
            (oh_user.garmin_member.earliest_available_data and min_date < oh_user.garmin_member.earliest_available_data.replace(tzinfo=None)):
        oh_user.garmin_member.earliest_available_data = min_date
    oh_user.garmin_member.save()
    if existing_file_id:
        api.delete_file(oh_user.get_access_token(), file_id=existing_file_id)
예제 #14
0
def process_file(dfile, access_token, member, metadata):
    infile_suffix = dfile['basename'].split(".")[-1]
    tf_in = tempfile.NamedTemporaryFile(suffix="." + infile_suffix)
    tf_in.write(requests.get(dfile['download_url']).content)
    tf_in.flush()
    tmp_directory = tempfile.mkdtemp()
    filename_base = 'Location History.json'
    location_data = get_json(tf_in)
    if location_data:
        location_json = json.loads(location_data)
        output_file = tmp_directory + '/' + filename_base
        with open(output_file, 'w') as raw_file:
            json.dump(location_json, raw_file)
        metadata = {
            'description': 'Google Location History JSON',
            'tags': ['google location history', 'gps'],
            'creation_date': arrow.get().format(),
        }
        api.upload_aws(output_file,
                       metadata,
                       access_token,
                       base_url=OH_BASE_URL,
                       project_member_id=str(member['project_member_id']))
        get_semantic_data(tf_in, tmp_directory, member, access_token)
    else:
        api.message("Google Location History: A broken file was deleted",
                    "While processing your Google Location History file "
                    "we noticed that your file does not conform "
                    "to the expected specifications and it was "
                    "thus deleted. Please make sure you upload "
                    "the right file:\nWe expect the file to be a "
                    "single json file "
                    "or a .zip file as downloaded from Google Takeout."
                    " Please "
                    "do not alter the original file, as unexpected "
                    "additions can invalidate the file.",
                    access_token,
                    base_url=OH_BASE_URL)
    api.delete_file(access_token,
                    str(member['project_member_id']),
                    file_id=str(dfile['id']),
                    base_url=OH_BASE_URL)
예제 #15
0
def replace_datasource(oh_member, source_data):
    # delete old file and upload new to open humans
    tmp_directory = tempfile.mkdtemp()
    metadata = {
        'description': 'Dummy data for demo.',
        'tags': ['demo', 'dummy', 'test'],
        'updated_at': str(datetime.utcnow()),
    }
    out_file = os.path.join(tmp_directory, 'dummy-data.json')
    logger.debug('deleted old file for {}'.format(oh_member.oh_id))
    api.delete_file(oh_member.access_token,
                    oh_member.oh_id,
                    file_basename="dummy-data.json")
    with open(out_file, 'w') as json_file:
        json.dump(source_data, json_file)
        json_file.flush()
    api.upload_aws(out_file,
                   metadata,
                   oh_member.access_token,
                   project_member_id=oh_member.oh_id)
    logger.debug('uploaded new file for {}'.format(oh_member.oh_id))
예제 #16
0
 def test_upload_aws_invalid_metadata_without_description(self):
     with self.assertRaises(Exception):
         with patch('ohapi.api.open', mock_open(), create=True):
             response = upload_aws(
                 target_filepath='foo',
                 metadata=FILE_METADATA_INVALID_WITHOUT_DESC,
                 access_token=ACCESS_TOKEN,
                 project_member_id=VALID_PMI1)
             assert response.json() == {
                 "metadata":
                 ["\"description\" is a " +
                  "required field of the metadata"]}
예제 #17
0
def replace_moves(oh_member, moves_data):
    # delete old file and upload new to open humans
    tmp_directory = tempfile.mkdtemp()
    metadata = {
        'description': 'Moves GPS maps, locations, and steps data.',
        'tags': ['GPS', 'Moves', 'steps'],
        'updated_at': str(datetime.utcnow()),
    }
    out_file = os.path.join(tmp_directory, 'moves-storyline-data.json')
    logger.debug('deleted old file for {}'.format(oh_member.oh_id))
    api.delete_file(oh_member.access_token,
                    oh_member.oh_id,
                    file_basename="moves-storyline-data.json")
    with open(out_file, 'w') as json_file:
        json.dump(moves_data, json_file)
        json_file.flush()
    api.upload_aws(out_file,
                   metadata,
                   oh_member.access_token,
                   project_member_id=oh_member.oh_id)
    logger.debug('uploaded new file for {}'.format(oh_member.oh_id))
예제 #18
0
def replace_rescuetime(oh_member, rescuetime_data):
    # delete old file and upload new to open humans
    tmp_directory = tempfile.mkdtemp()
    metadata = {
        'description': 'RescueTime productivity data.',
        'tags': ['Rescuetime', 'productivity'],
        'updated_at': str(datetime.utcnow()),
    }
    out_file = os.path.join(tmp_directory, 'rescuetime.json')
    logger.debug('deleted old file for {}'.format(oh_member.oh_id))
    api.delete_file(oh_member.access_token,
                    oh_member.oh_id,
                    file_basename="rescuetime.json")
    with open(out_file, 'w') as json_file:
        json.dump(rescuetime_data, json_file)
        json_file.flush()
    api.upload_aws(out_file,
                   metadata,
                   oh_member.access_token,
                   project_member_id=oh_member.oh_id)
    logger.debug('uploaded new file for {}'.format(oh_member.oh_id))
예제 #19
0
def process_github(oh_id):
    """
    Update the github file for a given OH user
    """
    try:
        logger.debug('Starting github processing for {}'.format(oh_id))
        oh_member = OpenHumansMember.objects.get(oh_id=oh_id)
        oh_access_token = oh_member.get_access_token(
            client_id=settings.OPENHUMANS_CLIENT_ID,
            client_secret=settings.OPENHUMANS_CLIENT_SECRET)
        #github_data = get_existing_github_data(oh_access_token)#
        github_member = oh_member.datasourcemember
        github_access_token = github_member.get_access_token(
            client_id=settings.GITHUB_CLIENT_ID,
            client_secret=settings.GITHUB_CLIENT_SECRET)

        #print("OH access token: {}".format(oh_access_token))

        gh_file = gh_api.get_github_data(oh_access_token, github_access_token)

        existing_file_ids = get_existing_file_ids(oh_member)
        print(existing_file_ids)
        api.upload_aws(gh_file,
                       create_file_metadata(),
                       oh_access_token,
                       project_member_id=oh_id,
                       max_bytes=MAX_FILE_BYTES)

        for id in existing_file_ids:
            api.delete_file(oh_access_token, file_id=id)

        github_member.last_updated = arrow.now().format()
        github_member.save()
    except Exception as e:
        import traceback
        print("Fetching github data failed: {}".format(e))
        print(traceback.format_exc())
        # queue to retry later
        process_github.apply_async(args=[oh_id], countdown=4 * 3600)
        raise
예제 #20
0
def replace_twitter(oh_member, twitter_data):
    # delete old file and upload new to open humans
    tmp_directory = tempfile.mkdtemp()
    metadata = {
        'description':
        'Twitter activity feed, repository contents and stars data.',
        'tags': ['demo', 'Twitter', 'test'],
        'updated_at': str(datetime.utcnow()),
    }
    out_file = os.path.join(tmp_directory, 'twitter-data.json')
    logger.debug('deleted old file for {}'.format(oh_member.oh_id))
    api.delete_file(oh_member.access_token,
                    oh_member.oh_id,
                    file_basename="twitter-data.json")
    with open(out_file, 'w') as json_file:
        json.dump(twitter_data, json_file)
        json_file.flush()
    api.upload_aws(out_file,
                   metadata,
                   oh_member.access_token,
                   project_member_id=oh_member.oh_id)
    logger.debug('uploaded new file for {}'.format(oh_member.oh_id))
예제 #21
0
def replace_nokia(oh_member, nokia_data):
    """
    Delete any old file and upload new
    """
    tmp_directory = tempfile.mkdtemp()
    metadata = {
        'tags': ['nokiahealthdata', 'health', 'measure'],
        'description': 'File with Nokia Health data',
        'updated_at': str(datetime.utcnow()),
    }
    filename = 'nokiahealthdata.json'
    out_file = os.path.join(tmp_directory, filename)
    logger.debug('deleted old file for {}'.format(oh_member.oh_id))
    api.delete_file(oh_member.access_token,
                    oh_member.oh_id,
                    file_basename=filename)
    with open(out_file, 'w') as json_file:
        json.dump(nokia_data, json_file)
        json_file.flush()
    api.upload_aws(out_file, metadata,
                   oh_member.access_token,
                   project_member_id=oh_member.oh_id)
    logger.debug('uploaded new file for {}'.format(oh_member.oh_id))
예제 #22
0
def process_source(oh_id):
    oh_member = OpenHumansMember.objects.get(oh_id=oh_id)
    OUT_DIR = os.environ.get('OUT_DIR')
    metadata = {
        'description': 'Imputed genotypes from Imputer',
        'tags': ['genomics'],
        'updated_at': str(datetime.utcnow()),
    }
    oh_access_token = oh_member.get_access_token(
        client_id=settings.OPENHUMANS_CLIENT_ID,
        client_secret=settings.OPENHUMANS_CLIENT_SECRET)

    # this works below
    try:
        api.delete_file(oh_member.access_token,
                        oh_member.oh_id,
                        file_basename="member.imputed.vcf.bz2")
    except FileNotFoundError:
        logger.info('New Source File')
    api.upload_aws('{}/{}/member.imputed.vcf.bz2'.format(OUT_DIR, oh_id),
                   metadata,
                   oh_access_token,
                   project_member_id=oh_member.oh_id,
                   max_bytes=256000000)
예제 #23
0
def upload_notebook(notebook_content,
                    notebook_name,
                    access_token,
                    project_member_id):
    """
    Upload a notebook to the Personal Data Notebook project on Open Humans.
    """
    tmp_directory = tempfile.mkdtemp()
    metadata = {
        'description': 'A Personal Data Notebook',
        'tags': ['personal data notebook', 'notebook', 'jupyter']
    }
    out_file = os.path.join(tmp_directory, notebook_name)
    with open(out_file, 'wb') as tmp_notebook:
        tmp_notebook.write(notebook_content)
        tmp_notebook.flush()
    print(out_file)
    upload_response = api.upload_aws(out_file,
                                     metadata,
                                     access_token,
                                     project_member_id=project_member_id)
    return upload_response
예제 #24
0
def process_runkeeper(oh_id):
    """
    Data is split per-year, in JSON format.
    Each JSON is an object (dict) in the following format (pseudocode):

    { 'background_activities':
        [
          { key: value for each of BACKGROUND_DATA_KEYS },
          { key: value for each of BACKGROUND_DATA_KEYS },
          ...
        ],
      'fitness_activities':
        [
          { 'path': { key: value for each of FITNESS_PATH_KEYS },
             and key: value for each of the FITNESS_ACTIVITY_KEYS },
          { 'path': { key: value for each of FITNESS_PATH_KEYS },
             and key: value for each of the FITNESS_ACTIVITY_KEYS },
          ...
        ]
    }

    Notes:
        - items are sorted according to start_time or timestamp
        - The item_uri for fitness_activities matches item_uri in
          fitness_activity_sharing.
    """
    oh_member = OpenHumansMember.objects.get(oh_id=oh_id)
    oh_access_token = oh_member.get_access_token(
        client_id=settings.OPENHUMANS_CLIENT_ID,
        client_secret=settings.OPENHUMANS_CLIENT_SECRET)
    runkeeper_member = oh_member.datasourcemember
    print('start processing data for {}'.format(runkeeper_member.runkeeper_id))

    access_token = runkeeper_member.access_token
    user_data = runkeeper_query('/user', access_token)
    runkeeper_member.runkeeper_id = user_data['userID']

    # Get activity data.
    fitness_activity_path = '{}?pageSize={}'.format(
        user_data['fitness_activities'], PAGESIZE)
    fitness_activity_items, complete_fitness_activity_years = yearly_items(
        get_items(path=fitness_activity_path, access_token=access_token))

    # Background activities.
    background_activ_path = '{}?pageSize={}'.format(
        user_data['background_activities'], PAGESIZE)
    background_activ_items, complete_background_activ_years = yearly_items(
        get_items(background_activ_path, access_token))

    all_years = sorted(
        set(
            list(fitness_activity_items.keys()) +
            list(background_activ_items.keys())))
    all_completed_years = set(complete_fitness_activity_years +
                              complete_background_activ_years)

    for year in all_years:
        outdata = {'fitness_activities': [], 'background_activities': []}

        fitness_items = sorted(
            fitness_activity_items.get(year, []),
            key=lambda item: datetime.strptime(item['start_time'],
                                               '%a, %d %b %Y %H:%M:%S'))
        for item in fitness_items:
            item_data = runkeeper_query(item['uri'], access_token)
            item_data_out = data_for_keys(item_data, FITNESS_SUMMARY_KEYS)
            item_data_out['path'] = [
                data_for_keys(datapoint, FITNESS_PATH_KEYS)
                for datapoint in item_data['path']
            ]
            outdata['fitness_activities'].append(item_data_out)
        background_items = sorted(
            background_activ_items.get(year, []),
            key=lambda item: datetime.strptime(item['timestamp'],
                                               '%a, %d %b %Y %H:%M:%S'))

        for item in background_items:
            outdata['background_activities'].append(
                data_for_keys(item, BACKGROUND_DATA_KEYS))

        filename = 'Runkeeper-activity-data-{}.json'.format(str(year))
        temp_directory = tempfile.mkdtemp()
        filepath = os.path.join(temp_directory, filename)
        with open(filepath, 'w') as f:
            json.dump(outdata, f, indent=2, sort_keys=True)
            f.flush()

        metadata = {
            'description': ('Runkeeper GPS maps and imported '
                            'activity data.'),
            'tags': ['GPS', 'Runkeeper'],
            'dataYear': year,
            'complete': year in all_completed_years,
        }
        api.delete_file(oh_member.access_token,
                        oh_member.oh_id,
                        file_basename=filename)
        api.upload_aws(filepath,
                       metadata,
                       oh_access_token,
                       project_member_id=oh_member.oh_id)
    runkeeper_member.last_updated = arrow.now().format()
    runkeeper_member.save()
    print('finished processing data for {}'.format(
        runkeeper_member.runkeeper_id))
예제 #25
0
def process_file(dfile, access_token, member, metadata):
    try:
        infile_suffix = dfile['basename'].split(".")[-1]
        tf_in = tempfile.NamedTemporaryFile(suffix="." + infile_suffix)
        tf_in.write(requests.get(dfile['download_url']).content)
        tf_in.flush()
        tmp_directory = tempfile.mkdtemp()
        filename_base = 'AncestryDNA-genotyping'
        raw_ancestry, chr_sex = clean_raw_ancestrydna(tf_in)
        raw_ancestry.seek(0)
        vcf_ancestry_unsort = vcf_from_raw_ancestrydna(raw_ancestry, chr_sex)

        # Save raw Ancestry genotyping to temp file.
        raw_filename = filename_base + '.txt'
        raw_filename = temp_join(tmp_directory, raw_filename)
        metadata = {
            'description': 'AncestryDNA full genotyping data, original format',
            'tags': ['AncestryDNA', 'genotyping'],
            'creation_date': arrow.get().format(),
        }
        with open(raw_filename, 'w') as raw_file:
            raw_ancestry.seek(0)
            shutil.copyfileobj(raw_ancestry, raw_file)
            raw_file.flush()

        api.upload_aws(raw_filename,
                       metadata,
                       access_token,
                       base_url=OH_BASE_URL,
                       project_member_id=str(member['project_member_id']))

        # Save VCF Ancestry genotyping to temp file.
        vcf_filename = filename_base + '.vcf.bz2'
        vcf_filename = temp_join(tmp_directory, vcf_filename)

        metadata = {
            'description': 'AncestryDNA full genotyping data, VCF format',
            'tags': ['AncestryDNA', 'genotyping', 'vcf'],
            'creation_date': arrow.get().format()
        }

        vcf_ancestry_unsort.seek(0)
        vcf_ancestry = sort_vcf(vcf_ancestry_unsort)

        with bz2.BZ2File(vcf_filename, 'w') as vcf_file:
            vcf_ancestry.seek(0)
            for i in vcf_ancestry:
                vcf_file.write(i)

        api.upload_aws(vcf_filename,
                       metadata,
                       access_token,
                       base_url=OH_BASE_URL,
                       project_member_id=str(member['project_member_id']))

    except:
        api.message("AncestryDNA integration: A broken file was deleted",
                    "While processing your AncestryDNA file "
                    "we noticed that your file does not conform "
                    "to the expected specifications and it was "
                    "thus deleted. Please make sure you upload "
                    "the right file:\nWe expect the file to be a "
                    "single txt file (either unzipped, bz2 zipped or gzipped) "
                    "or a .zip file that contains a single txt file (this is "
                    " what you can download from Ancestry right away) Please "
                    "do not alter the original txt file, as unexpected "
                    "additions can invalidate the file.",
                    access_token,
                    base_url=OH_BASE_URL)
        raise

    finally:
        api.delete_file(access_token,
                        str(member['project_member_id']),
                        file_id=str(dfile['id']),
                        base_url=OH_BASE_URL)