Beispiel #1
0
def process_file(dfile, access_token, member, metadata, taxonomy):
    try:
        verify_ubiome(dfile)
        tmp_directory = tempfile.mkdtemp()
        base_filename = dfile['basename'].replace('.zip', '')
        taxonomy_file = base_filename + '.taxonomy.json'
        raw_filename = temp_join(tmp_directory, taxonomy_file)
        metadata = {
            'description': 'uBiome 16S taxonomy data, JSON format.',
            'tags': ['json', 'uBiome', '16S']
        }
        with open(raw_filename, 'w') as raw_file:
            json.dump(taxonomy, raw_file)
            raw_file.flush()

        api.upload_aws(raw_filename,
                       metadata,
                       access_token,
                       base_url=OH_BASE_URL,
                       project_member_id=str(member['project_member_id']))
    except:
        api.message("uBiome integration: A broken file was deleted",
                    "While processing your uBiome file "
                    "we noticed that your file does not conform "
                    "to the expected specifications and it was "
                    "thus deleted. Email us as [email protected] if "
                    "you think this file should be valid.",
                    access_token,
                    base_url=OH_BASE_URL)
        api.delete_file(access_token,
                        str(member['project_member_id']),
                        file_id=str(dfile['id']),
                        base_url=OH_BASE_URL)
        raise
Beispiel #2
0
def process_file(dfile, access_token, member, metadata):
    try:
        vcf_metadata = verify_vcf(dfile)
    except:
        api.message("VCF integration: A broken file was deleted",
                    "While processing your VCF file "
                    "we noticed that your file does not conform "
                    "to the expected specifications and it was "
                    "thus deleted. Email us as [email protected] if "
                    "you think this file should be valid.",
                    access_token,
                    base_url=OH_BASE_URL)
        api.delete_file(access_token,
                        str(member['project_member_id']),
                        file_id=str(dfile['id']),
                        base_url=OH_BASE_URL)
        raise
    try:
        tmp_directory = tempfile.mkdtemp()
        base_filename = dfile['basename']

        # Save raw 23andMe genotyping to temp file.
        if base_filename.endswith('.gz'):
            base_filename = base_filename[0:-3]
        elif base_filename.endswith('.bz2'):
            base_filename = base_filename[0:-4]
        meta_filename = base_filename + '.metadata.json'
        raw_filename = temp_join(tmp_directory, meta_filename)
        metadata = {'description': 'VCF file metadata', 'tags': ['vcf']}
        with open(raw_filename, 'w') as raw_file:
            json.dump(vcf_metadata, raw_file)
            raw_file.flush()

        api.upload_aws(raw_filename,
                       metadata,
                       access_token,
                       base_url=OH_BASE_URL,
                       project_member_id=str(member['project_member_id']))
    except:
        api.message("VCF integration: File could not be uploaded",
                    "Something went wrong when processing your "
                    "file. Please try to upload it again. "
                    "Please email us as [email protected] if "
                    "this keeps happening.",
                    access_token,
                    base_url=OH_BASE_URL)
        api.delete_file(access_token,
                        str(member['project_member_id']),
                        file_id=str(dfile['id']),
                        base_url=OH_BASE_URL)
        raise
Beispiel #3
0
 def test_message_all_members_false_projectmemberid_has_invalid_char(self):
     with self.assertRaises(Exception):
         response = message(project_member_ids=['abcdef1', 'test'],
                            subject=SUBJECT, message=MESSAGE,
                            access_token=MASTER_ACCESS_TOKEN)
         assert response.json() == {"errors":
                                    {"project_member_ids":
                                     ["Project member IDs are always 8" +
                                      " digits long."]}}
Beispiel #4
0
 def test_message_all_members_false_projectmemberid_has_invalid_digit(self):
     with self.assertRaises(Exception):
         response = message(project_member_ids=[INVALID_PMI1,
                                                INVALID_PMI2],
                            subject=SUBJECT, message=MESSAGE,
                            access_token=MASTER_ACCESS_TOKEN)
         assert response.json() == {"errors":
                                    {"project_member_ids":
                                     ["Invalid project member ID(s):" +
                                      " invalidPMI2"]}}
Beispiel #5
0
def process_file(dfile, access_token, member, metadata):
    infile_suffix = dfile['basename'].split(".")[-1]
    tf_in = tempfile.NamedTemporaryFile(suffix="." + infile_suffix)
    tf_in.write(requests.get(dfile['download_url']).content)
    tf_in.flush()
    tmp_directory = tempfile.mkdtemp()
    filename_base = 'Location History.json'
    location_data = get_json(tf_in)
    if location_data:
        location_json = json.loads(location_data)
        output_file = tmp_directory + '/' + filename_base
        with open(output_file, 'w') as raw_file:
            json.dump(location_json, raw_file)
        metadata = {
            'description': 'Google Location History JSON',
            'tags': ['google location history', 'gps'],
            'creation_date': arrow.get().format(),
        }
        api.upload_aws(output_file,
                       metadata,
                       access_token,
                       base_url=OH_BASE_URL,
                       project_member_id=str(member['project_member_id']))
        get_semantic_data(tf_in, tmp_directory, member, access_token)
    else:
        api.message("Google Location History: A broken file was deleted",
                    "While processing your Google Location History file "
                    "we noticed that your file does not conform "
                    "to the expected specifications and it was "
                    "thus deleted. Please make sure you upload "
                    "the right file:\nWe expect the file to be a "
                    "single json file "
                    "or a .zip file as downloaded from Google Takeout."
                    " Please "
                    "do not alter the original file, as unexpected "
                    "additions can invalidate the file.",
                    access_token,
                    base_url=OH_BASE_URL)
    api.delete_file(access_token,
                    str(member['project_member_id']),
                    file_id=str(dfile['id']),
                    base_url=OH_BASE_URL)
Beispiel #6
0
def process_target(data_file, access_token, member, metadata):
    try:
        tf = tempfile.NamedTemporaryFile(suffix=".gz")
        tf_out = tempfile.NamedTemporaryFile(prefix="ftdna-",
                                             suffix=".csv",
                                             mode="w+b")
        print("downloading ftdna file from oh")
        tf.write(requests.get(data_file['download_url']).content)
        tf.flush()
        print('read ftdna file')
        with gzip.open(tf.name, "rt", newline="\n") as ftdna_file:
            for line in ftdna_file:
                if valid_line(line):
                    tf_out.write(line.encode('ascii'))
        tf_out.flush()
        tf_out.seek(0)
        print('cleaned file')
        api.delete_file(access_token,
                        str(member['project_member_id']),
                        file_id=str(data_file['id']))
        print('deleted old')
        upload_new_file(tf_out, access_token, str(member['project_member_id']),
                        data_file['metadata'])
    except:
        print('delete broken file')
        api.delete_file(access_token,
                        str(member['project_member_id']),
                        file_id=str(data_file['id']))
        api.message(
            "A broken file was deleted",
            "While processing your FamilyTreeDNA file "
            "we noticed that your file does not conform "
            "to the expected specifications and it was "
            "thus deleted. Please make sure you upload "
            "the right file:\nWe expect the file to be a "
            "single, - gzipped (ends in .gz) - file as "
            "you can download from FamilyTreeDNA. Please "
            "do not alter or unzip this file, as unexpected additions "
            "also invalidate the file.", access_token)
 def test_message_all_members_false_project_member_id_not_none_valid(self):
     response = message(project_member_ids=[VALID_PMI1, VALID_PMI2],
                        subject=SUBJECT,
                        message=MESSAGE,
                        access_token=ACCESS_TOKEN)
     self.assertEqual(response.status_code, 200)
 def test_message_all_members_true_project_member_id_none(self):
     response = message(all_members=True,
                        subject=SUBJECT,
                        message=MESSAGE,
                        access_token=ACCESS_TOKEN)
     self.assertEqual(response.status_code, 200)
 def test_message_invalid_access_token(self):
     response = message(subject=SUBJECT,
                        message=MESSAGE,
                        access_token=ACCESS_TOKEN_INVALID)
     assert response.json() == {"detail": "Invalid token."}
Beispiel #10
0
 def test_message_expired_access_token(self):
     response = message(subject=SUBJECT,
                        message=MESSAGE,
                        access_token=ACCESS_TOKEN_EXPIRED)
     assert response.json() == {"detail": "Expired token."}
Beispiel #11
0
 def test_message_valid_access_token(self):
     response = message(subject=SUBJECT,
                        message=MESSAGE,
                        access_token=ACCESS_TOKEN)
     self.assertEqual(response.status_code, 200)
Beispiel #12
0
def upload_to_oh(oh_id):
    logger.info('{}: now uploading to OpenHumans'.format(oh_id))

    with open('{}/{}/header.txt'.format(OUT_DIR, oh_id), 'r') as headerobj:
        headiter = takewhile(lambda s: s.startswith('#'), headerobj)
        header = list(headiter)

    # construct the final header
    new_header = [
        '##FORMAT=<ID=GP,Number=3,Type=Float,Description="Estimated Posterior Probabilities (rounded to 3 digits) for Genotypes 0/0, 0/1 and 1/1">\n',
        '##INFO=<ID=INFO,Number=1,Type=Float,Description="Impute2 info metric">\n',
        '##imputerdate={}\n'.format(datetime.date.today().strftime("%m-%d-%y"))
    ]
    header.insert(-2, new_header[0])
    header.insert(-4, new_header[1])
    header.insert(1, new_header[2])
    # get all the contig info from the fasta index file
    fai = 'hg19.fasta.fai'
    contigs = pd.read_csv('{}/{}'.format(REF_FA, fai),
                          sep='\t',
                          names=['ID', 'length'],
                          usecols=[0, 1])
    contigs['ID'] = contigs['ID'].str.replace('chr', '')
    for row in contigs.itertuples():
        chrom = row[1]
        length = row[2]
        header.insert(-1, f'##contig=<ID={chrom},length={length}>\n')
    header = ''.join(header)

    # combine all vcfs
    os.chdir(settings.BASE_DIR)
    with open('{}/{}/member.imputed.vcf'.format(OUT_DIR, oh_id),
              'w') as outfile:
        for chrom in CHROMOSOMES:
            fname = '{}/{}/chr{}/chr{}/final_impute2/chr{}.member.imputed.vcf'.format(
                OUT_DIR, oh_id, chrom, chrom, chrom)
            with open(fname) as infile:
                for line in infile:
                    outfile.write(line)

    member_vcf_fp = '{}/{}/member.imputed.vcf'.format(OUT_DIR, oh_id)
    # add the header to the combined vcf file.
    with open(member_vcf_fp, 'r') as original:
        data = original.read()
    with open(member_vcf_fp, 'w') as modified:
        modified.write(header + data)

    # bzip the file
    with open(member_vcf_fp, 'rb') as input_:
        with bz2.BZ2File(member_vcf_fp + '.bz2', 'wb',
                         compresslevel=9) as output:
            copyfileobj(input_, output)

    # upload file to OpenHumans
    process_source(oh_id)

    # Message Member
    oh_member = OpenHumansMember.objects.get(oh_id=oh_id)
    project_page = environ.get('OH_ACTIVITY_PAGE')
    explore_url = 'https://exploratory.openhumans.org/notebook/21/'
    visualization_url = 'https://exploratory.openhumans.org/notebook/26/'
    body = "Check {} to see your imputed genotype results from Open Humans.\nVisualize your results here: {}\nFurther explore your results here: {}".format(
        project_page, visualization_url, explore_url)
    api.message('Open Humans Imputation Complete',
                body,
                oh_member.access_token,
                project_member_ids=[oh_id])
    logger.info('{} emailed member'.format(oh_id))

    # check that the vcf file was uploaded.
    user_details = api.exchange_oauth2_member(oh_member.get_access_token())
    username = user_details['username']
    imputed = False
    for data in user_details['data']:
        if data['basename'] == 'member.imputed.vcf.bz2':
            imputed = True
    # alert the sentry admin that the pipeline has completed
    if imputed:
        # this is not really any error...properly log me please!
        logging.error(
            'Pipeline finished for member user name: {}, oh_id: {}'.format(
                username, oh_id))
    else:
        logging.error(
            'Error uploading imputed vcf for member -- user name: {}, oh_id: {}'
            .format(username, oh_id))

    # clean users files
    if not settings.DEBUG:
        os.chdir(settings.BASE_DIR)
        clean_command = ['imputer/clean_files.sh', '{}'.format(oh_id)]
        process = run(clean_command, stdout=PIPE, stderr=PIPE)
        logger.debug(process.stderr)
        logger.info('{} finished removing files'.format(oh_id))

    imputer_record = ImputerMember.objects.get(oh_id=oh_id, active=True)
    imputer_record.step = 'complete'
    imputer_record.active = False
    imputer_record.save()
def send_first_no_data_email(oh_id, oh_access_token):
    api.message('[GoogleFit] Data Import: No Data', 'No GoogleFit data was found to import. You need to be using the GoogleFit app on your Android device to collect data.',
                oh_access_token, project_member_ids=[oh_id])
def send_first_success_email(oh_id, oh_access_token):
    api.message('[GoogleFit] Data Import: Success', 'Your GoogleFit data was imported successfully to OpenHumans. Go to your dashboard to view: https://googlefit.openhumans.org',
                oh_access_token, project_member_ids=[oh_id])
Beispiel #15
0
def upload_to_oh(oh_id):
    logger.info('{}: now uploading to OpenHumans'.format(oh_id))

    with open('{}/{}/header.txt'.format(OUT_DIR, oh_id), 'r') as headerobj:
        headiter = takewhile(lambda s: s.startswith('#'), headerobj)
        header = list(headiter)

    # construct the final header
    new_header = [
        '##FORMAT=<ID=GP,Number=3,Type=Float,Description="Estimated Posterior Probabilities (rounded to 3 digits) for Genotypes 0/0, 0/1 and 1/1">\n',
        '##INFO=<ID=INFO,Number=1,Type=Float,Description="Impute2 info metric">\n',
        '##imputerdate={}\n'.format(datetime.date.today().strftime("%m-%d-%y"))
    ]
    header.insert(-2, new_header[0])
    header.insert(-4, new_header[1])
    header.insert(1, new_header[2])
    header = ''.join(header)

    # combine all vcfs
    os.chdir(settings.BASE_DIR)
    combine_command = ['imputer/combine_chrom.sh', '{}'.format(oh_id)]
    process = run(combine_command, stdout=PIPE, stderr=PIPE)
    logger.debug(process.stderr)

    member_vcf_fp = '{}/{}/member.imputed.vcf'.format(OUT_DIR, oh_id)
    # add the header to the combined vcf file.
    with open(member_vcf_fp, 'r') as original:
        data = original.read()
    with open(member_vcf_fp, 'w') as modified:
        modified.write(header + data)

    # bzip the file
    with open(member_vcf_fp, 'rb') as input_:
        with bz2.BZ2File(member_vcf_fp + '.bz2', 'wb',
                         compresslevel=9) as output:
            copyfileobj(input_, output)

    # upload file to OpenHumans
    process_source(oh_id)

    # Message Member
    oh_member = OpenHumansMember.objects.get(oh_id=oh_id)
    project_page = environ.get('OH_ACTIVITY_PAGE')
    api.message(
        'Open Humans Imputation Complete',
        'Check {} to see your imputed genotype results from Open Humans.'.
        format(project_page),
        oh_member.access_token,
        project_member_ids=[oh_id])
    logger.info('{} emailed member'.format(oh_id))

    # clean users files
    if not settings.DEBUG:
        os.chdir(settings.BASE_DIR)
        clean_command = ['imputer/clean_files.sh', '{}'.format(oh_id)]
        process = run(clean_command, stdout=PIPE, stderr=PIPE)
        logger.debug(process.stderr)
        logger.info('{} finished removing files'.format(oh_id))

    imputer_record = ImputerMember.objects.get(oh_id=oh_id, active=True)
    imputer_record.step = 'complete'
    imputer_record.active = False
    imputer_record.save()
def process_file(dfile, access_token, member, metadata):
    try:
        infile_suffix = dfile['basename'].split(".")[-1]
        tf_in = tempfile.NamedTemporaryFile(suffix="." + infile_suffix)
        tf_in.write(requests.get(dfile['download_url']).content)
        tf_in.flush()
        tmp_directory = tempfile.mkdtemp()
        filename_base = 'AncestryDNA-genotyping'
        raw_ancestry, chr_sex = clean_raw_ancestrydna(tf_in)
        raw_ancestry.seek(0)
        vcf_ancestry_unsort = vcf_from_raw_ancestrydna(raw_ancestry, chr_sex)

        # Save raw Ancestry genotyping to temp file.
        raw_filename = filename_base + '.txt'
        raw_filename = temp_join(tmp_directory, raw_filename)
        metadata = {
            'description': 'AncestryDNA full genotyping data, original format',
            'tags': ['AncestryDNA', 'genotyping'],
            'creation_date': arrow.get().format(),
        }
        with open(raw_filename, 'w') as raw_file:
            raw_ancestry.seek(0)
            shutil.copyfileobj(raw_ancestry, raw_file)
            raw_file.flush()

        api.upload_aws(raw_filename,
                       metadata,
                       access_token,
                       base_url=OH_BASE_URL,
                       project_member_id=str(member['project_member_id']))

        # Save VCF Ancestry genotyping to temp file.
        vcf_filename = filename_base + '.vcf.bz2'
        vcf_filename = temp_join(tmp_directory, vcf_filename)

        metadata = {
            'description': 'AncestryDNA full genotyping data, VCF format',
            'tags': ['AncestryDNA', 'genotyping', 'vcf'],
            'creation_date': arrow.get().format()
        }

        vcf_ancestry_unsort.seek(0)
        vcf_ancestry = sort_vcf(vcf_ancestry_unsort)

        with bz2.BZ2File(vcf_filename, 'w') as vcf_file:
            vcf_ancestry.seek(0)
            for i in vcf_ancestry:
                vcf_file.write(i)

        api.upload_aws(vcf_filename,
                       metadata,
                       access_token,
                       base_url=OH_BASE_URL,
                       project_member_id=str(member['project_member_id']))

    except:
        api.message("AncestryDNA integration: A broken file was deleted",
                    "While processing your AncestryDNA file "
                    "we noticed that your file does not conform "
                    "to the expected specifications and it was "
                    "thus deleted. Please make sure you upload "
                    "the right file:\nWe expect the file to be a "
                    "single txt file (either unzipped, bz2 zipped or gzipped) "
                    "or a .zip file that contains a single txt file (this is "
                    " what you can download from Ancestry right away) Please "
                    "do not alter the original txt file, as unexpected "
                    "additions can invalidate the file.",
                    access_token,
                    base_url=OH_BASE_URL)
        raise

    finally:
        api.delete_file(access_token,
                        str(member['project_member_id']),
                        file_id=str(dfile['id']),
                        base_url=OH_BASE_URL)