Esempio n. 1
0
    def test_pulldown_third_party(self):
        # Add survey answers
        with open(self.ext_survey_fp, 'rU') as f:
            obs = db.store_external_survey(f,
                                           'Vioscreen',
                                           separator=',',
                                           survey_id_col='SubjectId',
                                           trim='-160')
        self.assertEqual(obs, 3)

        barcodes = ['000029429', '000018046', '000023299', '000023300']
        # Test without third party
        obs, _ = db.pulldown(barcodes)
        survey = obs[1]
        self.assertFalse('VIOSCREEN' in survey)

        obs, _ = db.pulldown(barcodes, blanks=['BLANK.01'])
        survey = obs[1]
        self.assertFalse('VIOSCREEN' in survey)
        self.assertTrue('BLANK.01' in survey)

        # Test with third party
        obs, _ = db.pulldown(barcodes, external=['Vioscreen'])
        survey = obs[1]
        self.assertTrue('VIOSCREEN' in survey)

        obs, _ = db.pulldown(barcodes,
                             blanks=['BLANK.01'],
                             external=['Vioscreen'])
        survey = obs[1]
        self.assertTrue('VIOSCREEN' in survey)
        self.assertTrue('BLANK.01' in survey)
Esempio n. 2
0
    def test_pulldown_third_party(self):
        # Add survey answers
        with open(self.ext_survey_fp, 'rU') as f:
            obs = db.store_external_survey(
                f, 'Vioscreen', separator=',', survey_id_col='SubjectId',
                trim='-160')
        self.assertEqual(obs, 3)

        barcodes = ['000029429', '000018046', '000023299', '000023300']
        # Test without third party
        obs, _ = db.pulldown(barcodes)
        survey = obs[1]
        self.assertFalse('VIOSCREEN' in survey)

        obs, _ = db.pulldown(barcodes, blanks=['BLANK.01'])
        survey = obs[1]
        self.assertFalse('VIOSCREEN' in survey)
        self.assertTrue('BLANK.01' in survey)

        # Test with third party
        obs, _ = db.pulldown(barcodes, external=['Vioscreen'])
        survey = obs[1]
        self.assertTrue('VIOSCREEN' in survey)

        obs, _ = db.pulldown(barcodes, blanks=['BLANK.01'],
                             external=['Vioscreen'])
        survey = obs[1]
        self.assertTrue('VIOSCREEN' in survey)
        self.assertTrue('BLANK.01' in survey)
Esempio n. 3
0
    def test_align_with_qiita_categories(self):
        samples = ['000004216', '000017291', '000004215']

        # apparently the call to pulldown is not idempotent
        # the first call is != to the second, but the second
        # is equal to the third.
        db.pulldown(samples)
        data = db.pulldown(samples)

        data_as_pd = pd.read_csv(StringIO.StringIO(data[0][1]),
                                 sep='\t',
                                 dtype=str)
        data_as_pd.set_index('sample_name', inplace=True)
        data_as_pd.columns = [c.lower() for c in data_as_pd.columns]

        # as of 15august2019, 000017291 does not successfully pulldown. this
        # sample has an inconsistency in the metadata that triggers a failure
        # condition. This test SHOULD fail when metadata pulldown is
        # successfully revisited.
        self.assertFalse('000017291' in data_as_pd.index)
        nc = len(data_as_pd.columns)
        data_as_pd = data_as_pd.append(
            pd.Series(['pulldown-issue'] * nc,
                      index=data_as_pd.columns,
                      name='000017291'))

        # per a request from Gail
        data_as_pd.loc['000017291', 'env_package'] = 'Air'

        for c in set(AG_DEBUG_OBSERVED_CATEGORIES) - set(data_as_pd.columns):
            data_as_pd[c] = 'Missing: Not provided'

        exp = {
            '000004216': data_as_pd.loc['000004216'].to_dict(),
            '000017291': data_as_pd.loc['000017291'].to_dict(),
            '000004215': data_as_pd.loc['000004215'].to_dict()
        }

        obs = align_with_qiita_categories(samples,
                                          AG_DEBUG_OBSERVED_CATEGORIES)

        # for an undetermined reason, simply testing equality on the obs
        # and exp dicts is very time consuming.
        self.assertEqual(sorted(obs.keys()), sorted(exp.keys()))
        for k in obs.keys():
            o_items = sorted(obs[k].items())
            e_items = sorted(exp[k].items())
            self.assertEqual(o_items, e_items)
Esempio n. 4
0
    def post(self):
        barcodes = self.get_argument('barcodes').split(',')
        if self.get_argument('blanks'):
            blanks = self.get_argument('blanks').split(',')
        else:
            blanks = []
        if self.get_argument('external'):
            external = self.get_argument('external').split(',')
        else:
            external = []
        # Get metadata and create zip file
        metadata, failures = db.pulldown(barcodes, blanks, external)

        meta_zip = InMemoryZip()
        failed = '\n'.join(['\t'.join(bc) for bc in viewitems(failures)])
        failtext = ("The following barcodes were not retrieved "
                    "for any survey:\n%s" % failed)
        meta_zip.append("failures.txt", failtext)
        for survey, meta in viewitems(metadata):
            meta_zip.append('survey_%s_md.txt' % survey, meta)

        # write out zip file
        self.add_header('Content-type',  'application/octet-stream')
        self.add_header('Content-Transfer-Encoding', 'binary')
        self.add_header('Accept-Ranges', 'bytes')
        self.add_header('Content-Encoding', 'none')
        self.add_header('Content-Disposition',
                        'attachment; filename=metadata.zip')
        self.write(meta_zip.write_to_buffer())
        self.flush()
        self.finish()
Esempio n. 5
0
    def post(self):
        barcodes = self.get_argument('barcodes').split(',')
        if self.get_argument('blanks'):
            blanks = self.get_argument('blanks').split(',')
        else:
            blanks = []
        if self.get_argument('external'):
            external = self.get_argument('external').split(',')
        else:
            external = []
        # Get metadata and create zip file
        metadata, failures = db.pulldown(barcodes, blanks, external)

        meta_zip = InMemoryZip()
        failed = '\n'.join(['\t'.join(bc) for bc in viewitems(failures)])
        failtext = ("The following barcodes were not retrieved "
                    "for any survey:\n%s" % failed)
        meta_zip.append("failures.txt", failtext)
        for survey, meta in viewitems(metadata):
            meta_zip.append('survey_%s_md.txt' % survey, meta)

        # write out zip file
        self.add_header('Content-type', 'application/octet-stream')
        self.add_header('Content-Transfer-Encoding', 'binary')
        self.add_header('Accept-Ranges', 'bytes')
        self.add_header('Content-Encoding', 'none')
        self.add_header('Content-Disposition',
                        'attachment; filename=metadata.zip')
        self.write(meta_zip.write_to_buffer())
        self.flush()
        self.finish()
Esempio n. 6
0
    def get_ag_details(self, barcode):
        ag_details = db.getAGBarcodeDetails(barcode)
        _, failures = db.pulldown([barcode], [])

        if len(ag_details) == 0 and failures:
            div_id = "no_metadata"
            message = "Cannot retrieve metadata: %s" % failures[barcode]
        elif len(ag_details) > 0:
            for col, val in ag_details.iteritems():
                if val is None:
                    ag_details[col] = ''
            ag_details['other_checked'] = ''
            ag_details['overloaded_checked'] = ''
            ag_details['moldy_checked'] = ''
            ag_details['login_user'] = ag_details['name']
            if ag_details['moldy'] == 'Y':
                ag_details['moldy_checked'] = 'checked'
            if ag_details['overloaded'] == 'Y':
                ag_details['overloaded_checked'] = 'checked'
            if ag_details['other'] == 'Y':
                ag_details['other_checked'] = 'checked'

            survey_id = db.get_barcode_survey(barcode)

            # it has all sample details
            # (sample time, date, site)
            if failures:
                div_id = "no_metadata"
                message = "Cannot retrieve metadata: %s" % failures[barcode]
                ag_details['email_type'] = "-1"
            elif (survey_id is None and ag_details['environment_sampled']) \
                    or survey_id in survey_type:
                div_id = "verified"
                message = "All good"
                ag_details['email_type'] = "1"
            else:
                # should never get here (this would happen
                # if the metadata
                # pulldown returned more than one row for a
                # single barcode)
                div_id = "md_pulldown_error"
                message = ("This barcode has multiple entries "
                           "in the database, which should "
                           "never happen. Please notify "
                           "someone on the database crew.")
                ag_details['email_type'] = "-1"
        else:
            # TODO: Stefan Janssen: I cannot see how this case should ever be
            # reached, since failures will be set to 'Unknown reason' at the
            # outmost.
            div_id = "not_assigned"
            message = ("In American Gut project group but no "
                       "American Gut info for barcode")
            ag_details['email_type'] = "-1"
        return div_id, message, ag_details
Esempio n. 7
0
    def get_ag_details(self, barcode):
        ag_details = db.getAGBarcodeDetails(barcode)
        _, failures = db.pulldown([barcode], [])

        if len(ag_details) == 0 and failures:
            div_id = "no_metadata"
            message = "Cannot retrieve metadata: %s" % failures[barcode]
        elif len(ag_details) > 0:
            for col, val in ag_details.iteritems():
                if val is None:
                    ag_details[col] = ''
            ag_details['other_checked'] = ''
            ag_details['overloaded_checked'] = ''
            ag_details['moldy_checked'] = ''
            ag_details['login_user'] = ag_details['name']
            if ag_details['moldy'] == 'Y':
                ag_details['moldy_checked'] = 'checked'
            if ag_details['overloaded'] == 'Y':
                ag_details['overloaded_checked'] = 'checked'
            if ag_details['other'] == 'Y':
                ag_details['other_checked'] = 'checked'

            survey_id = db.get_barcode_survey(barcode)

            # it has all sample details
            # (sample time, date, site)
            if failures:
                div_id = "no_metadata"
                message = "Cannot retrieve metadata: %s" % failures[barcode]
                ag_details['email_type'] = "-1"
            elif (survey_id is None and ag_details['environment_sampled']) \
                    or survey_id in survey_type:
                div_id = "verified"
                message = "All good"
                ag_details['email_type'] = "1"
            else:
                # should never get here (this would happen
                # if the metadata
                # pulldown returned more than one row for a
                # single barcode)
                div_id = "md_pulldown_error"
                message = ("This barcode has multiple entries "
                           "in the database, which should "
                           "never happen. Please notify "
                           "someone on the database crew.")
                ag_details['email_type'] = "-1"
        else:
            # TODO: Stefan Janssen: I cannot see how this case should ever be
            # reached, since failures will be set to 'Unknown reason' at the
            # outmost.
            div_id = "not_assigned"
            message = ("In American Gut project group but no "
                       "American Gut info for barcode")
            ag_details['email_type'] = "-1"
        return div_id, message, ag_details
Esempio n. 8
0
    def test_scrubb_pet_freetext(self):
        # we had the problem that survey question 150 = 'pets_other_freetext'
        # was exported for pulldown, but it has the potential to carry personal
        # information.

        # this is a barcode where an answer to this question is stored in DB
        barcodes = ['000037487']

        # get free text value from DB
        all_survey_info = db.get_surveys(barcodes)
        freetextvalue = all_survey_info[1]['000037487']['pets_other_freetext']

        # make sure free text value does NOT show up in pulldown
        obs_pulldown = db.pulldown(barcodes)[0]
        for row in obs_pulldown.keys():
            self.assertNotIn(freetextvalue, obs_pulldown[row])
Esempio n. 9
0
    def test_pulldown_third_party(self):
        # Add survey answers
        with open(self.ext_survey_fp, 'rU') as f:
            obs = db.store_external_survey(f,
                                           'Vioscreen',
                                           separator=',',
                                           survey_id_col='SubjectId',
                                           trim='-160')
        self.assertEqual(obs, 3)

        barcodes = ['000029429', '000018046', '000023299', '000023300']
        # Test without third party
        obs, _ = db.pulldown(barcodes)

        # Parse the metadata into a pandas dataframe to test some invariants
        # This tests does not ensure that the columns have the exact value
        # but at least ensure that the contents looks as expected
        survey_df = pd.read_csv(StringIO(obs[1]),
                                delimiter='\t',
                                dtype=str,
                                encoding='utf-8')
        survey_df.set_index('sample_name', inplace=True, drop=True)

        # Make sure that the prohibited columns from EBI are not in the
        # pulldown
        self.assertEqual(
            set(survey_df.columns).intersection(ebi_remove), set())

        freq_accepted_vals = {
            'Never', 'Rarely (a few times/month)',
            'Regularly (3-5 times/week)', 'Occasionally (1-2 times/week)',
            'Unspecified', 'Daily'
        }

        freq_cols = [
            'ALCOHOL_FREQUENCY', 'PROBIOTIC_FREQUENCY',
            'ONE_LITER_OF_WATER_A_DAY_FREQUENCY', 'POOL_FREQUENCY',
            'FLOSSING_FREQUENCY', 'COSMETICS_FREQUENCY'
        ]

        for col in freq_cols:
            vals = set(survey_df[col])
            self.assertTrue(all([x in freq_accepted_vals for x in vals]))

        # This astype is making sure that the values in the BMI column are
        # values that can be casted to float.
        survey_df[survey_df.BMI != 'Unspecified'].BMI.astype(float)

        body_product_values = set(survey_df.BODY_PRODUCT)
        self.assertTrue(
            all([
                x.startswith('UBERON') or x == 'Unspecified'
                for x in body_product_values
            ]))

        survey = obs[1]
        self.assertFalse('VIOSCREEN' in survey)

        obs, _ = db.pulldown(barcodes, blanks=['BLANK.01'])
        survey = obs[1]
        self.assertFalse('VIOSCREEN' in survey)
        self.assertTrue('BLANK.01' in survey)

        # Test with third party
        obs, _ = db.pulldown(barcodes, external=['Vioscreen'])
        survey = obs[1]
        self.assertTrue('VIOSCREEN' in survey)

        obs, _ = db.pulldown(barcodes,
                             blanks=['BLANK.01'],
                             external=['Vioscreen'])
        survey = obs[1]
        self.assertTrue('VIOSCREEN' in survey)
        self.assertTrue('BLANK.01' in survey)
Esempio n. 10
0
    def post(self):
        barcodes = listify(self.get_arguments('barcodes'))
        blanks = listify(self.get_arguments('blanks'))
        # query which surveys have been selected by the user
        selected_ag_surveys = listify(
            self.get_arguments('selected_ag_surveys'))
        external = listify(self.get_arguments('external'))

        selected_ag_surveys = list(map(int, selected_ag_surveys))

        # Get metadata and create zip file
        metadata, failures = db.pulldown(barcodes, blanks, external)

        meta_zip = InMemoryZip()
        failed = '\n'.join(['\t'.join(bc) for bc in viewitems(failures)])
        failtext = ("The following barcodes were not retrieved "
                    "for any survey:\n%s" % failed)
        meta_zip.append("failures.txt", failtext)

        # check database about what surveys are available
        available_agsurveys = {}
        for (_id, name, _) in db.list_ag_surveys():
            available_agsurveys[_id] = name.replace(' ', '_')

        results_as_pd = []
        for survey, meta in viewitems(metadata):
            # only create files for those surveys that have been selected by
            # the user. Note that ids from the DB are negative, in metadata
            # they are positive!
            # Currently, I (Stefan Janssen) don't have test data for external
            # surveys, thus I don't know their 'survey' value. I expect it to
            # be the name of the external survey. In order to not block their
            # pulldown I check that a skipped survey ID must be in the set of
            # all available surveys.
            survey = -1 * survey
            if (survey in selected_ag_surveys) or \
               (survey not in available_agsurveys):
                meta_zip.append(
                    'survey_%s_md.txt' % available_agsurveys[survey], meta)
                # transform each survey into a pandas dataframe for later merge
                # read all columns as string to avoid unintened conversions,
                # like cutting leading zeros of barcodes
                pd_meta = pd.read_csv(StringIO(meta), sep="\t", dtype=str)
                # reset the index to barcodes = here sample_name
                pd_meta.set_index('sample_name', inplace=True)
                results_as_pd.append(pd_meta)

        # add the merged table of all selected surveys to the zip archive
        if self.get_argument('merged', default='False') == 'True':
            pd_all = pd.DataFrame()
            if len(results_as_pd) > 0:
                pd_all = pd.concat(results_as_pd, join='outer', axis=1)
                meta_zip.append(
                    'surveys_merged_md.txt',
                    pd_all.to_csv(sep='\t', index_label='sample_name'))

        # write out zip file
        self.add_header('Content-type', 'application/octet-stream')
        self.add_header('Content-Transfer-Encoding', 'binary')
        self.add_header('Accept-Ranges', 'bytes')
        self.add_header('Content-Encoding', 'none')
        self.add_header('Content-Disposition',
                        'attachment; filename=metadata.zip')
        self.write(meta_zip.write_to_buffer())
        self.flush()
        self.finish()
Esempio n. 11
0
def align_with_qiita_categories(samples,
                                categories,
                                failure_value='pulldown-issue',
                                omitted_value='Missing: Not provided'):
    """Obtain sample metadata, and subset to those categories present in Qiita

    Parameters
    ----------
    samples : list of str
        The samples to get metadata for
    categories : Iterable of str
        The categories to align against
    failure_value : str, optional
        The default value to use for a sample that failed pulldown.
    omitted_value : str, optional
        The default value to use for a variable not represented either in Qiita
        or the extracted metadata.

    Notes
    -----
    The env_package variable for failures will be autofilled with "Air" per a
    request from Gail.

    Any variable in extract metadata that is not represented in Qiita will be
    silently omitted (e.g., PM_USEFUL).

    Any variable in Qiita that is not represented in the extracted metadata
    (e.g., qiita_empo_1) will be filled with the omitted_value.

    Returns
    -------
    dict of dict
        A stucture of the metadata per sample. {sample-id: {category: value}}
    """
    surveys, failures = db.pulldown(samples)

    # pulldown returns a per-survey (e.g., primary, fermented food, etc) tab
    # delimited file. What we're doing here is de-serializing those data into
    # per survey DataFrames, and then concatenating them together such that
    # each sample ID is a row, each sample ID is only represented once, and the
    # columns correspond to variables from each survey type.
    surveys_as_df = []
    for _, v in sorted(surveys.items()):
        surveys_as_df.append(
            pd.read_csv(StringIO.StringIO(v), sep='\t',
                        dtype=str).set_index('sample_name'))

    surveys_as_df = pd.concat(surveys_as_df, axis=1)

    # oddly, it seems possible in the present pulldown code for an ID to be
    # successful and a failure
    failures = {f for f in failures if f not in surveys_as_df.index}

    # columns in Qiita are lower case
    surveys_as_df.columns = [c.lower() for c in surveys_as_df.columns]

    # subset the frame to the overlapping columns
    categories = set(categories)
    column_overlap = surveys_as_df.columns.intersection(categories)
    surveys_as_df = surveys_as_df[column_overlap]

    # missing categories are those in qiita but not in the pulldown
    missing_categories = categories - set(column_overlap)

    # represent failures in the dataframe
    failures_as_df = pd.DataFrame(index=list(failures),
                                  columns=surveys_as_df.columns)
    failures_as_df.fillna(failure_value, inplace=True)
    failures_as_df['env_package'] = 'Air'  # per request from Gail

    # append will add rows aligned on the columns
    surveys_as_df = surveys_as_df.append(failures_as_df)

    # represent missing entries in the dataframe
    missing = pd.DataFrame(index=list(surveys_as_df.index),
                           columns=sorted(missing_categories))
    missing.fillna(omitted_value, inplace=True)

    # join will add columns aligned on the index
    surveys_as_df = surveys_as_df.join(missing)

    return surveys_as_df.to_dict(orient='index')
Esempio n. 12
0
    def test_pulldown_third_party(self):
        # Add survey answers
        with open(self.ext_survey_fp, "rU") as f:
            obs = db.store_external_survey(f, "Vioscreen", separator=",", survey_id_col="SubjectId", trim="-160")
        self.assertEqual(obs, 3)

        barcodes = ["000029429", "000018046", "000023299", "000023300"]
        # Test without third party
        obs, _ = db.pulldown(barcodes)

        # Parse the metadata into a pandas dataframe to test some invariants
        # This tests does not ensure that the columns have the exact value
        # but at least ensure that the contents looks as expected
        survey_df = pd.read_csv(StringIO(obs[1]), delimiter="\t", dtype=str, encoding="utf-8")
        survey_df.set_index("sample_name", inplace=True, drop=True)

        # Make sure that the prohibited columns from EBI are not in the
        # pulldown
        self.assertEqual(set(survey_df.columns).intersection(ebi_remove), set())

        freq_accepted_vals = {
            "Never",
            "Rarely (a few times/month)",
            "Regularly (3-5 times/week)",
            "Occasionally (1-2 times/week)",
            "Unspecified",
            "Daily",
        }

        freq_cols = [
            "ALCOHOL_FREQUENCY",
            "PROBIOTIC_FREQUENCY",
            "ONE_LITER_OF_WATER_A_DAY_FREQUENCY",
            "POOL_FREQUENCY",
            "FLOSSING_FREQUENCY",
            "COSMETICS_FREQUENCY",
        ]

        for col in freq_cols:
            vals = set(survey_df[col])
            self.assertTrue(all([x in freq_accepted_vals for x in vals]))

        # This astype is making sure that the values in the BMI column are
        # values that can be casted to float.
        survey_df.BMI.astype(float)

        body_product_values = set(survey_df.BODY_PRODUCT)
        self.assertTrue(all([x.startswith("UBERON") or x == "Unspecified" for x in body_product_values]))

        survey = obs[1]
        self.assertFalse("VIOSCREEN" in survey)

        obs, _ = db.pulldown(barcodes, blanks=["BLANK.01"])
        survey = obs[1]
        self.assertFalse("VIOSCREEN" in survey)
        self.assertTrue("BLANK.01" in survey)

        # Test with third party
        obs, _ = db.pulldown(barcodes, external=["Vioscreen"])
        survey = obs[1]
        self.assertTrue("VIOSCREEN" in survey)

        obs, _ = db.pulldown(barcodes, blanks=["BLANK.01"], external=["Vioscreen"])
        survey = obs[1]
        self.assertTrue("VIOSCREEN" in survey)
        self.assertTrue("BLANK.01" in survey)
Esempio n. 13
0
    def test_pulldown_third_party(self):
        # Add survey answers
        with open(self.ext_survey_fp, 'rU') as f:
            obs = db.store_external_survey(
                f, 'Vioscreen', separator=',', survey_id_col='SubjectId',
                trim='-160')
        self.assertEqual(obs, 3)

        barcodes = ['000029429', '000018046', '000023299', '000023300']
        # Test without third party
        obs, _ = db.pulldown(barcodes)

        # Parse the metadata into a pandas dataframe to test some invariants
        # This tests does not ensure that the columns have the exact value
        # but at least ensure that the contents looks as expected
        survey_df = pd.read_csv(
            StringIO(obs[1]), delimiter='\t', dtype=str, encoding='utf-8')
        survey_df.set_index('sample_name', inplace=True, drop=True)

        # Make sure that the prohibited columns from EBI are not in the
        # pulldown
        self.assertEqual(set(survey_df.columns).intersection(ebi_remove),
                         set())

        freq_accepted_vals = {
            'Never', 'Rarely (a few times/month)',
            'Regularly (3-5 times/week)', 'Occasionally (1-2 times/week)',
            'Unspecified', 'Daily'}

        freq_cols = ['ALCOHOL_FREQUENCY', 'PROBIOTIC_FREQUENCY',
                     'ONE_LITER_OF_WATER_A_DAY_FREQUENCY', 'POOL_FREQUENCY',
                     'FLOSSING_FREQUENCY', 'COSMETICS_FREQUENCY']

        for col in freq_cols:
            vals = set(survey_df[col])
            self.assertTrue(all([x in freq_accepted_vals for x in vals]))

        # This astype is making sure that the values in the BMI column are
        # values that can be casted to float.
        survey_df[survey_df.BMI != 'Unspecified'] .BMI.astype(float)

        body_product_values = set(survey_df.BODY_PRODUCT)
        self.assertTrue(all([x.startswith('UBERON') or x == 'Unspecified'
                             for x in body_product_values]))

        survey = obs[1]
        self.assertFalse('VIOSCREEN' in survey)

        obs, _ = db.pulldown(barcodes, blanks=['BLANK.01'])
        survey = obs[1]
        self.assertFalse('VIOSCREEN' in survey)
        self.assertTrue('BLANK.01' in survey)

        # Test with third party
        obs, _ = db.pulldown(barcodes, external=['Vioscreen'])
        survey = obs[1]
        self.assertTrue('VIOSCREEN' in survey)

        obs, _ = db.pulldown(barcodes, blanks=['BLANK.01'],
                             external=['Vioscreen'])
        survey = obs[1]
        self.assertTrue('VIOSCREEN' in survey)
        self.assertTrue('BLANK.01' in survey)
Esempio n. 14
0
    def get_ag_details(self, barcode):
        ag_details = db.getAGBarcodeDetails(barcode)
        if len(ag_details) > 0:
            for col, val in ag_details.iteritems():
                if val is None:
                    ag_details[col] = ''
            ag_details['other_checked'] = ''
            ag_details['overloaded_checked'] = ''
            ag_details['moldy_checked'] = ''
            ag_details['login_user'] = ag_details['name']
            if ag_details['moldy'] == 'Y':
                ag_details['moldy_checked'] = 'checked'
            if ag_details['overloaded'] == 'Y':
                ag_details['overloaded_checked'] = 'checked'
            if ag_details['other'] == 'Y':
                ag_details['other_checked'] = 'checked'

            survey_id = db.get_barcode_survey(barcode)
            _, failures = db.pulldown([barcode])
            if not (ag_details['sample_date'] == ag_details['site_sampled'] ==
                    ag_details['sample_time'] == ''):
                # it has all sample details
                # (sample time, date, site)
                if survey_id is None:
                    div_id = "not_assigned"
                    message = "Missing info"
                    ag_details['email_type'] = "0"
                elif barcode in failures:
                    div_id = "no_metadata"
                    message = "Cannot retrieve metadata"
                    ag_details['email_type'] = "-1"
                elif survey_type[survey_id] == 'Human':
                    # and we can successfully retrieve sample
                    # metadata
                    div_id = "verified"
                    message = "All good"
                    ag_details['email_type'] = "1"
                elif survey_type[survey_id] == 'Animal':
                    div_id = "verified_animal"
                    message = "All good"
                    ag_details['email_type'] = "1"
                else:
                    # should never get here (this would happen
                    # if the metadata
                    # pulldown returned more than one row for a
                    # single barcode)
                    div_id = "md_pulldown_error"
                    message = ("This barcode has multiple entries "
                               "in the database, which should "
                               "never happen. Please notify "
                               "someone on the database crew.")
                    ag_details['email_type'] = "-1"
            else:
                div_id = "not_assigned"
                message = ("In American Gut project group but No "
                           "American Gut info for barcode")
                ag_details['email_type'] = "-1"
        else:
            div_id = "not_assigned"
            message = ("In American Gut project group but No "
                       "American Gut info for barcode")
            ag_details['email_type'] = "-1"
        return div_id, message, ag_details
Esempio n. 15
0
    def get_ag_details(self, barcode):
        ag_details = db.getAGBarcodeDetails(barcode)
        if len(ag_details) > 0:
            for col, val in ag_details.iteritems():
                if val is None:
                    ag_details[col] = ''
            ag_details['other_checked'] = ''
            ag_details['overloaded_checked'] = ''
            ag_details['moldy_checked'] = ''
            ag_details['login_user'] = ag_details['name']
            if ag_details['moldy'] == 'Y':
                ag_details['moldy_checked'] = 'checked'
            if ag_details['overloaded'] == 'Y':
                ag_details['overloaded_checked'] = 'checked'
            if ag_details['other'] == 'Y':
                ag_details['other_checked'] = 'checked'

            survey_id = db.get_barcode_survey(barcode)
            _, failures = db.pulldown([barcode])
            if not (ag_details['sample_date'] ==
                    ag_details['site_sampled'] ==
                    ag_details['sample_time'] == ''):
                # it has all sample details
                # (sample time, date, site)
                if survey_id is None:
                    div_id = "not_assigned"
                    message = "Missing info"
                    ag_details['email_type'] = "0"
                elif barcode in failures:
                    div_id = "no_metadata"
                    message = "Cannot retrieve metadata"
                    ag_details['email_type'] = "-1"
                elif survey_type[survey_id] == 'Human':
                    # and we can successfully retrieve sample
                    # metadata
                    div_id = "verified"
                    message = "All good"
                    ag_details['email_type'] = "1"
                elif survey_type[survey_id] == 'Animal':
                    div_id = "verified_animal"
                    message = "All good"
                    ag_details['email_type'] = "1"
                else:
                    # should never get here (this would happen
                    # if the metadata
                    # pulldown returned more than one row for a
                    # single barcode)
                    div_id = "md_pulldown_error"
                    message = ("This barcode has multiple entries "
                               "in the database, which should "
                               "never happen. Please notify "
                               "someone on the database crew.")
                    ag_details['email_type'] = "-1"
            else:
                div_id = "not_assigned"
                message = ("In American Gut project group but No "
                           "American Gut info for barcode")
                ag_details['email_type'] = "-1"
        else:
            div_id = "not_assigned"
            message = ("In American Gut project group but No "
                       "American Gut info for barcode")
            ag_details['email_type'] = "-1"
        return div_id, message, ag_details
Esempio n. 16
0
    def post(self):
        barcodes = listify(self.get_arguments('barcodes'))
        blanks = listify(self.get_arguments('blanks'))
        # query which surveys have been selected by the user
        selected_ag_surveys = listify(
            self.get_arguments('selected_ag_surveys'))
        external = listify(self.get_arguments('external'))

        selected_ag_surveys = list(map(int, selected_ag_surveys))

        # Get metadata and create zip file
        metadata, failures = db.pulldown(barcodes, blanks, external)

        meta_zip = InMemoryZip()
        failed = '\n'.join(['\t'.join(bc) for bc in viewitems(failures)])
        failtext = ("The following barcodes were not retrieved "
                    "for any survey:\n%s" % failed)
        meta_zip.append("failures.txt", failtext)

        # check database about what surveys are available
        available_agsurveys = {}
        for (_id, name, _) in db.list_ag_surveys():
            available_agsurveys[_id] = name.replace(' ', '_')

        results_as_pd = []
        for survey, meta in viewitems(metadata):
            # only create files for those surveys that have been selected by
            # the user. Note that ids from the DB are negative, in metadata
            # they are positive!
            # Currently, I (Stefan Janssen) don't have test data for external
            # surveys, thus I don't know their 'survey' value. I expect it to
            # be the name of the external survey. In order to not block their
            # pulldown I check that a skipped survey ID must be in the set of
            # all available surveys.
            survey = -1 * survey
            if (survey in selected_ag_surveys) or \
               (survey not in available_agsurveys):
                meta_zip.append('survey_%s_md.txt' %
                                available_agsurveys[survey], meta)
                # transform each survey into a pandas dataframe for later merge
                # read all columns as string to avoid unintened conversions,
                # like cutting leading zeros of barcodes
                pd_meta = pd.read_csv(StringIO(meta), sep="\t", dtype=str)
                # reset the index to barcodes = here sample_name
                pd_meta.set_index('sample_name', inplace=True)
                results_as_pd.append(pd_meta)

        # add the merged table of all selected surveys to the zip archive
        if self.get_argument('merged', default='False') == 'True':
            pd_all = pd.DataFrame()
            if len(results_as_pd) > 0:
                pd_all = pd.concat(results_as_pd, join='outer', axis=1)
                meta_zip.append('surveys_merged_md.txt',
                                pd_all.to_csv(sep='\t',
                                              index_label='sample_name'))

        # write out zip file
        self.add_header('Content-type',  'application/octet-stream')
        self.add_header('Content-Transfer-Encoding', 'binary')
        self.add_header('Accept-Ranges', 'bytes')
        self.add_header('Content-Encoding', 'none')
        self.add_header('Content-Disposition',
                        'attachment; filename=metadata.zip')
        self.write(meta_zip.write_to_buffer())
        self.flush()
        self.finish()