Example #1
0
    def test_get(self):
        # Create the usernames key so we can do autocomplete
        r_client.zadd('qiita-usernames', **{u: 0 for u in User.iter()})
        response = self.get(self.base_url % 't')
        self.assertEqual(response.code, 200)
        self.assertEqual(
            loads(response.body),
            {'results': [{
                "id": "*****@*****.**",
                "text": "*****@*****.**"
            }]})

        response = self.get(self.base_url % 'admi')
        self.assertEqual(response.code, 200)
        self.assertEqual(
            loads(response.body),
            {'results': [{
                "id": "*****@*****.**",
                "text": "*****@*****.**"
            }]})

        response = self.get(self.base_url % 'tesq')
        self.assertEqual(response.code, 200)
        self.assertEqual(loads(response.body), {'results': []})

        r_client.delete('qiita-usernames')
Example #2
0
    def test_post_select_samples(self):
        # just making sure that the key is not set in redis
        r_client.delete('maintenance')
        response = self.get('/auth/reset/')
        self.assertEqual(response.code, 200)
        self.assertIn(('<label for="newpass2" class="col-sm-2 '
                       'control-label">Repeat New Password'
                       '</label>'), response.body)

        # not displaying due to maintenance
        r_client.set('maintenance', 'This is my error message')
        response = self.get('/auth/reset/')
        self.assertEqual(response.code, 200)
        self.assertNotIn(('<label for="newpass2" class="col-sm-2 '
                          'control-label">Repeat New Password'
                          '</label>'), response.body)
        r_client.delete('maintenance')
Example #3
0
    def test_post_select_samples(self):
        # just making sure that the key is not set in redis
        r_client.delete('maintenance')
        response = self.get('/auth/reset/')
        self.assertEqual(response.code, 200)
        self.assertIn(('<label for="newpass2" class="col-sm-2 '
                       'control-label">Repeat New Password'
                       '</label>'), response.body)

        # not displaying due to maintenance
        r_client.set('maintenance', 'This is my error message')
        response = self.get('/auth/reset/')
        self.assertEqual(response.code, 200)
        self.assertNotIn(('<label for="newpass2" class="col-sm-2 '
                          'control-label">Repeat New Password'
                          '</label>'), response.body)
        r_client.delete('maintenance')
Example #4
0
    def test_post_select_samples(self):
        # just making sure that the key is not set in redis
        r_client.delete("maintenance")
        response = self.get("/auth/reset/")
        self.assertEqual(response.code, 200)
        self.assertTrue(
            ('<label for="newpass2" class="col-sm-10 ' 'control-label">Repeat New Password' "</label>") in response.body
        )

        # not displaying due to maintenance
        r_client.set("maintenance", "This is my error message")
        response = self.get("/auth/reset/")
        self.assertEqual(response.code, 200)
        self.assertFalse(
            ('<label for="newpass2" class="col-sm-10 ' 'control-label">Repeat New Password' "</label>") in response.body
        )
        r_client.delete("maintenance")
Example #5
0
    def test_get(self):
        # Create the usernames key so we can do autocomplete
        r_client.zadd('qiita-usernames', **{u: 0 for u in User.iter()})
        response = self.get(self.base_url % 't')
        self.assertEqual(response.code, 200)
        self.assertEqual(loads(response.body),
                         {'results': [{"id": "*****@*****.**",
                                       "text": "*****@*****.**"}]})

        response = self.get(self.base_url % 'admi')
        self.assertEqual(response.code, 200)
        self.assertEqual(loads(response.body),
                         {'results': [{"id": "*****@*****.**",
                                       "text": "*****@*****.**"}]})

        response = self.get(self.base_url % 'tesq')
        self.assertEqual(response.code, 200)
        self.assertEqual(loads(response.body),
                         {'results': []})

        r_client.delete('qiita-usernames')
Example #6
0
 def tearDown(self):
     r_client.delete('testing:jobs')
 def test_traverse_removed_child(self):
     r_client.delete('b')
     exp = {'a', 'c'}
     obs = {obj['id'] for obj in self.obj.traverse('testing')}
     self.assertEqual(obs, exp)
     self.assertEqual(r_client.smembers('testing:children'), exp)
 def tearDown(self):
     for key in self.to_delete:
         r_client.delete(key)
Example #9
0
def generate_biom_and_metadata_release(study_status='public'):
    """Generate a list of biom/meatadata filepaths and a tgz of those files

    Parameters
    ----------
    study_status : str, optional
        The study status to search for. Note that this should always be set
        to 'public' but having this exposed helps with testing. The other
        options are 'private' and 'sandbox'
    """
    studies = qdb.study.Study.get_by_status(study_status)
    qiita_config = ConfigurationManager()
    working_dir = qiita_config.working_dir
    portal = qiita_config.portal
    bdir = qdb.util.get_db_files_base_dir()
    time = datetime.now().strftime('%m-%d-%y %H:%M:%S')

    data = []
    for s in studies:
        # [0] latest is first, [1] only getting the filepath
        sample_fp = relpath(s.sample_template.get_filepaths()[0][1], bdir)

        for a in s.artifacts(artifact_type='BIOM'):
            if a.processing_parameters is None:
                continue

            cmd_name = a.processing_parameters.command.name

            # this loop is necessary as in theory an artifact can be
            # generated from multiple prep info files
            human_cmd = []
            for p in a.parents:
                pp = p.processing_parameters
                pp_cmd_name = pp.command.name
                if pp_cmd_name == 'Trimming':
                    human_cmd.append('%s @ %s' % (
                        cmd_name, str(pp.values['length'])))
                else:
                    human_cmd.append('%s, %s' % (cmd_name, pp_cmd_name))
            human_cmd = ', '.join(human_cmd)

            for _, fp, fp_type in a.filepaths:
                if fp_type != 'biom' or 'only-16s' in fp:
                    continue
                fp = relpath(fp, bdir)
                # format: (biom_fp, sample_fp, prep_fp, qiita_artifact_id,
                #          human readable name)
                for pt in a.prep_templates:
                    for _, prep_fp in pt.get_filepaths():
                        if 'qiime' not in prep_fp:
                            break
                    prep_fp = relpath(prep_fp, bdir)
                    data.append((fp, sample_fp, prep_fp, a.id, human_cmd))

    # writing text and tgz file
    ts = datetime.now().strftime('%m%d%y-%H%M%S')
    tgz_dir = join(working_dir, 'releases')
    if not exists(tgz_dir):
        makedirs(tgz_dir)
    tgz_name = join(tgz_dir, '%s-%s-building.tgz' % (portal, study_status))
    tgz_name_final = join(tgz_dir, '%s-%s.tgz' % (portal, study_status))
    txt_hd = StringIO()
    with topen(tgz_name, "w|gz") as tgz:
        # writing header for txt
        txt_hd.write(
            "biom_fp\tsample_fp\tprep_fp\tqiita_artifact_id\tcommand\n")
        for biom_fp, sample_fp, prep_fp, artifact_id, human_cmd in data:
            txt_hd.write("%s\t%s\t%s\t%s\t%s\n" % (
                biom_fp, sample_fp, prep_fp, artifact_id, human_cmd))
            tgz.add(join(bdir, biom_fp), arcname=biom_fp, recursive=False)
            tgz.add(join(bdir, sample_fp), arcname=sample_fp, recursive=False)
            tgz.add(join(bdir, prep_fp), arcname=prep_fp, recursive=False)

        txt_hd.seek(0)
        info = TarInfo(name='%s-%s-%s.txt' % (portal, study_status, ts))
        info.size = len(txt_hd.buf)
        tgz.addfile(tarinfo=info, fileobj=txt_hd)

    with open(tgz_name, "rb") as f:
        md5sum = md5()
        for c in iter(lambda: f.read(4096), b""):
            md5sum.update(c)

    rename(tgz_name, tgz_name_final)

    vals = [
        ('filepath', tgz_name_final[len(working_dir):], r_client.set),
        ('md5sum', md5sum.hexdigest(), r_client.set),
        ('time', time, r_client.set)]
    for k, v, f in vals:
        redis_key = '%s:release:%s:%s' % (portal, study_status, k)
        # important to "flush" variables to avoid errors
        r_client.delete(redis_key)
        f(redis_key, v)
Example #10
0
def update_redis_stats():
    """Generate the system stats and save them in redis

    Returns
    -------
    list of str
        artifact filepaths that are not present in the file system
    """
    STUDY = qdb.study.Study
    studies = {'public': STUDY.get_by_status('public'),
               'private': STUDY.get_by_status('private'),
               'sandbox': STUDY.get_by_status('sandbox')}
    number_studies = {k: len(v) for k, v in viewitems(studies)}

    number_of_samples = {}
    ebi_samples_prep = {}
    num_samples_ebi = 0
    for k, sts in viewitems(studies):
        number_of_samples[k] = 0
        for s in sts:
            st = s.sample_template
            if st is not None:
                number_of_samples[k] += len(list(st.keys()))

            ebi_samples_prep_count = 0
            for pt in s.prep_templates():
                ebi_samples_prep_count += len([
                    1 for _, v in viewitems(pt.ebi_experiment_accessions)
                    if v is not None and v != ''])
            ebi_samples_prep[s.id] = ebi_samples_prep_count

            if s.sample_template is not None:
                num_samples_ebi += len([
                    1 for _, v in viewitems(
                        s.sample_template.ebi_sample_accessions)
                    if v is not None and v != ''])

    num_users = qdb.util.get_count('qiita.qiita_user')

    lat_longs = get_lat_longs()

    num_studies_ebi = len([k for k, v in viewitems(ebi_samples_prep)
                           if v >= 1])
    number_samples_ebi_prep = sum([v for _, v in viewitems(ebi_samples_prep)])

    # generating file size stats
    stats = []
    missing_files = []
    for k, sts in viewitems(studies):
        for s in sts:
            for a in s.artifacts():
                for _, fp, dt in a.filepaths:
                    try:
                        s = stat(fp)
                        stats.append((dt, s.st_size, strftime('%Y-%m',
                                      localtime(s.st_ctime))))
                    except OSError:
                        missing_files.append(fp)

    summary = {}
    all_dates = []
    for ft, size, ym in stats:
        if ft not in summary:
            summary[ft] = {}
        if ym not in summary[ft]:
            summary[ft][ym] = 0
            all_dates.append(ym)
        summary[ft][ym] += size
    all_dates = sorted(set(all_dates))

    # sorting summaries
    rm_from_data = ['html_summary', 'tgz', 'directory', 'raw_fasta', 'log',
                    'biom', 'raw_sff', 'raw_qual']
    ordered_summary = {}
    for dt in summary:
        if dt in rm_from_data:
            continue
        new_list = []
        current_value = 0
        for ad in all_dates:
            if ad in summary[dt]:
                current_value += summary[dt][ad]
            new_list.append(current_value)
        ordered_summary[dt] = new_list

    plot_order = sorted([(k, ordered_summary[k][-1]) for k in ordered_summary],
                        key=lambda x: x[1])

    # helper function to generate y axis, modified from:
    # http://stackoverflow.com/a/1094933
    def sizeof_fmt(value, position):
        number = None
        for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
            if abs(value) < 1024.0:
                number = "%3.1f%s" % (value, unit)
                break
            value /= 1024.0
        if number is None:
            number = "%.1f%s" % (value, 'Yi')
        return number

    all_dates_axis = range(len(all_dates))
    plt.locator_params(axis='y', nbins=10)
    plt.figure(figsize=(20, 10))
    for k, v in plot_order:
        plt.plot(all_dates_axis, ordered_summary[k], linewidth=2, label=k)

    plt.xticks(all_dates_axis, all_dates)
    plt.legend()
    plt.grid()
    ax = plt.gca()
    ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(sizeof_fmt))
    plt.xlabel('Date')
    plt.ylabel('Storage space per data type')

    plot = StringIO()
    plt.savefig(plot, format='png')
    plot.seek(0)
    img = 'data:image/png;base64,' + quote(b64encode(plot.buf))

    time = datetime.now().strftime('%m-%d-%y %H:%M:%S')

    portal = qiita_config.portal
    vals = [
        ('number_studies', number_studies, r_client.hmset),
        ('number_of_samples', number_of_samples, r_client.hmset),
        ('num_users', num_users, r_client.set),
        ('lat_longs', lat_longs, r_client.set),
        ('num_studies_ebi', num_studies_ebi, r_client.set),
        ('num_samples_ebi', num_samples_ebi, r_client.set),
        ('number_samples_ebi_prep', number_samples_ebi_prep, r_client.set),
        ('img', img, r_client.set),
        ('time', time, r_client.set)]
    for k, v, f in vals:
        redis_key = '%s:stats:%s' % (portal, k)
        # important to "flush" variables to avoid errors
        r_client.delete(redis_key)
        f(redis_key, v)

    return missing_files
 def tearDown(self):
     for k in self.test_keys:
         r_client.delete(k)
 def test_traverse_removed_child(self):
     r_client.delete('b')
     exp = {'a', 'c'}
     obs = {obj['id'] for obj in self.obj.traverse('testing')}
     self.assertEqual(obs, exp)
     self.assertEqual(r_client.smembers('testing:children'), exp)
 def tearDown(self):
     for key in self.to_delete:
         r_client.delete(key)
 def tearDown(self):
     for k in self.test_keys:
         r_client.delete(k)