def delete_analysis(job): """Deletes a full analysis Parameters ---------- job : qiita_db.processing_job.ProcessingJob The processing job performing the task """ with qdb.sql_connection.TRN: analysis_id = job.parameters.values['analysis_id'] analysis = qdb.analysis.Analysis(analysis_id) # selecting roots of the analysis, can be multiple artifacts = [a for a in analysis.artifacts if a.processing_parameters is None] # deleting each of the processing graphs for a in artifacts: to_delete = list(a.descendants.nodes()) to_delete.reverse() for td in to_delete: qdb.artifact.Artifact.delete(td.id) qdb.analysis.Analysis.delete(analysis_id) r_client.delete('analysis_delete_%d' % analysis_id) job._set_status('success')
def test_get(self): # Create the usernames key so we can do autocomplete r_client.zadd('qiita-usernames', **{u: 0 for u in User.iter()}) response = self.get(self.base_url % 't') self.assertEqual(response.code, 200) self.assertEqual( loads(response.body), {'results': [{ "id": "*****@*****.**", "text": "*****@*****.**" }]}) response = self.get(self.base_url % 'admi') self.assertEqual(response.code, 200) self.assertEqual( loads(response.body), {'results': [{ "id": "*****@*****.**", "text": "*****@*****.**" }]}) response = self.get(self.base_url % 'tesq') self.assertEqual(response.code, 200) self.assertEqual(loads(response.body), {'results': []}) r_client.delete('qiita-usernames')
def test_get(self): base_url = '/study/sharing/autocomplete/?text=%s' r_client.zadd('qiita-usernames', {e: 0 for e, n in User.iter()}) response = self.get(base_url % 't') self.assertEqual(response.code, 200) self.assertEqual( loads(response.body), {'results': [{ "id": "*****@*****.**", "text": "*****@*****.**" }]}) response = self.get(base_url % 'admi') self.assertEqual(response.code, 200) self.assertEqual( loads(response.body), {'results': [{ "id": "*****@*****.**", "text": "*****@*****.**" }]}) response = self.get(base_url % 'tesq') self.assertEqual(response.code, 200) self.assertEqual(loads(response.body), {'results': []}) r_client.delete('qiita-usernames')
def delete_analysis(job): """Deletes a full analysis Parameters ---------- job : qiita_db.processing_job.ProcessingJob The processing job performing the task """ with qdb.sql_connection.TRN: analysis_id = job.parameters.values['analysis_id'] analysis = qdb.analysis.Analysis(analysis_id) # selecting roots of the analysis, can be multiple artifacts = [ a for a in analysis.artifacts if a.processing_parameters is None ] # deleting each of the processing graphs for a in artifacts: to_delete = list(a.descendants.nodes()) to_delete.reverse() for td in to_delete: qdb.artifact.Artifact.delete(td.id) qdb.analysis.Analysis.delete(analysis_id) r_client.delete('analysis_delete_%d' % analysis_id) job._set_status('success')
def test_post_select_samples(self): # just making sure that the key is not set in redis r_client.delete('maintenance') response = self.get('/auth/reset/') self.assertEqual(response.code, 200) self.assertIn(('<label for="newpass2" class="col-sm-2 ' 'control-label">Repeat New Password' '</label>'), response.body) # not displaying due to maintenance r_client.set('maintenance', 'This is my error message') response = self.get('/auth/reset/') self.assertEqual(response.code, 200) self.assertNotIn(('<label for="newpass2" class="col-sm-2 ' 'control-label">Repeat New Password' '</label>'), response.body) r_client.delete('maintenance')
def delete_analysis(job): """Deletes a full analysis Parameters ---------- job : qiita_db.processing_job.ProcessingJob The processing job performing the task """ with qdb.sql_connection.TRN: analysis_id = job.parameters.values['analysis_id'] analysis = qdb.analysis.Analysis(analysis_id) _delete_analysis_artifacts(analysis) r_client.delete('analysis_delete_%d' % analysis_id) job._set_status('success')
def test_get(self): # Create the usernames key so we can do autocomplete r_client.zadd('qiita-usernames', **{e: 0 for e, n in User.iter()}) response = self.get(self.base_url % 't') self.assertEqual(response.code, 200) self.assertEqual(loads(response.body), {'results': [{"id": "*****@*****.**", "text": "*****@*****.**"}]}) response = self.get(self.base_url % 'admi') self.assertEqual(response.code, 200) self.assertEqual(loads(response.body), {'results': [{"id": "*****@*****.**", "text": "*****@*****.**"}]}) response = self.get(self.base_url % 'tesq') self.assertEqual(response.code, 200) self.assertEqual(loads(response.body), {'results': []}) r_client.delete('qiita-usernames')
def test_get(self): base_url = '/study/sharing/autocomplete/?text=%s' r_client.zadd('qiita-usernames', {e: 0 for e, n in User.iter()}) response = self.get(base_url % 't') self.assertEqual(response.code, 200) self.assertEqual(loads(response.body), {'results': [{"id": "*****@*****.**", "text": "*****@*****.**"}]}) response = self.get(base_url % 'admi') self.assertEqual(response.code, 200) self.assertEqual(loads(response.body), {'results': [{"id": "*****@*****.**", "text": "*****@*****.**"}]}) response = self.get(base_url % 'tesq') self.assertEqual(response.code, 200) self.assertEqual(loads(response.body), {'results': []}) r_client.delete('qiita-usernames')
def delete_analysis(job): """Deletes a full analysis Parameters ---------- job : qiita_db.processing_job.ProcessingJob The processing job performing the task """ with qdb.sql_connection.TRN: analysis_id = job.parameters.values['analysis_id'] analysis = qdb.analysis.Analysis(analysis_id) artifacts = sorted( analysis.artifacts, key=lambda a: a.id, reverse=True) for artifact in artifacts: qdb.artifact.Artifact.delete(artifact.id) qdb.analysis.Analysis.delete(analysis_id) r_client.delete('analysis_delete_%d' % analysis_id) job._set_status('success')
def correct_redis_data(key, cmd, values_dict, user): """Corrects the data stored in the redis DB Parameters ---------- key: str The redis key to fix cmd : qiita_db.software.Command Command to use to create the processing job values_dict : dict Dictionary used to instantiate the parameters of the command user : qiita_db.user. User The user that will own the job """ info = r_client.get(key) if info: info = loads(info) if info['job_id'] is not None: if 'is_qiita_job' in info: if info['is_qiita_job']: try: job = ProcessingJob(info['job_id']) payload = {'job_id': info['job_id'], 'alert_type': info['status'], 'alert_msg': info['alert_msg']} r_client.set(key, dumps(payload)) except (QiitaDBUnknownIDError, KeyError): # We shomehow lost the information of this job # Simply delete the key r_client.delete(key) else: # These jobs don't contain any information on the live # dump. We can safely delete the key r_client.delete(key) else: # These jobs don't contain any information on the live # dump. We can safely delete the key r_client.delete(key) else: # Job is null, we have the information here if info['status'] == 'success': # In the success case no information is stored. We can # safely delete the key r_client.delete(key) elif info['status'] == 'warning': # In case of warning the key message stores the warning # message. We need to create a new job, mark it as # successful and store the error message as expected by # the new structure params = Parameters.load(cmd, values_dict=values_dict) job = ProcessingJob.create(user, params) job._set_status('success') payload = {'job_id': job.id, 'alert_type': 'warning', 'alert_msg': info['message']} r_client.set(key, dumps(payload)) else: # The status is error. The key message stores the error # message. We need to create a new job and mark it as # failed with the given error message params = Parameters.load(cmd, values_dict=values_dict) job = ProcessingJob.create(user, params) job._set_error(info['message']) payload = {'job_id': job.id} r_client.set(key, dumps(payload)) else: # The key doesn't contain any information. Delete the key r_client.delete(key)
# Create the command to complete a job parameters = {'job_id': ['string', None], 'payload': ['string', None]} create_command(qiita_plugin, "complete_job", "Completes a given job", parameters) # Assumptions on the structure of the data in the redis database has # changed, we need to fix to avoid failures # Get all the sample template keys for key in r_client.keys('sample_template_[0-9]*'): try: study = Study(int(key.split('_')[-1])) user = study.owner except QiitaDBUnknownIDError: # This means that the study no longer exists - delete the key # and continue r_client.delete(key) continue values_dict = {'study': study.id, 'template_fp': 'ignored-patch58'} correct_redis_data(key, st_cmd, values_dict, user) # Get all the prep template keys for key in r_client.keys('prep_template_[0-9]*'): try: pt = PrepTemplate(int(key.split('_')[-1])) user = Study(pt.study_id).owner except QiitaDBUnknownIDError: # This means that the prep template no longer exists - delete the # key and continue r_client.delete(key) continue values_dict = {'prep_template': pt.id,
def update_redis_stats(): """Generate the system stats and save them in redis Returns ------- list of str artifact filepaths that are not present in the file system """ STUDY = qdb.study.Study studies = { 'public': STUDY.get_by_status('public'), 'private': STUDY.get_by_status('private'), 'sandbox': STUDY.get_by_status('sandbox') } number_studies = {k: len(v) for k, v in viewitems(studies)} number_of_samples = {} ebi_samples_prep = {} num_samples_ebi = 0 for k, sts in viewitems(studies): number_of_samples[k] = 0 for s in sts: st = s.sample_template if st is not None: number_of_samples[k] += len(list(st.keys())) ebi_samples_prep_count = 0 for pt in s.prep_templates(): ebi_samples_prep_count += len([ 1 for _, v in viewitems(pt.ebi_experiment_accessions) if v is not None and v != '' ]) ebi_samples_prep[s.id] = ebi_samples_prep_count if s.sample_template is not None: num_samples_ebi += len([ 1 for _, v in viewitems( s.sample_template.ebi_sample_accessions) if v is not None and v != '' ]) num_users = qdb.util.get_count('qiita.qiita_user') num_processing_jobs = qdb.util.get_count('qiita.processing_job') lat_longs = dumps(get_lat_longs()) num_studies_ebi = len( [k for k, v in viewitems(ebi_samples_prep) if v >= 1]) number_samples_ebi_prep = sum([v for _, v in viewitems(ebi_samples_prep)]) # generating file size stats stats = [] missing_files = [] for k, sts in viewitems(studies): for s in sts: for a in s.artifacts(): for x in a.filepaths: try: s = stat(x['fp']) stats.append((x['fp_type'], s.st_size, strftime('%Y-%m', localtime(s.st_ctime)))) except OSError: missing_files.append(x['fp']) summary = {} all_dates = [] for ft, size, ym in stats: if ft not in summary: summary[ft] = {} if ym not in summary[ft]: summary[ft][ym] = 0 all_dates.append(ym) summary[ft][ym] += size all_dates = sorted(set(all_dates)) # sorting summaries rm_from_data = [ 'html_summary', 'tgz', 'directory', 'raw_fasta', 'log', 'biom', 'raw_sff', 'raw_qual' ] ordered_summary = {} for dt in summary: if dt in rm_from_data: continue new_list = [] current_value = 0 for ad in all_dates: if ad in summary[dt]: current_value += summary[dt][ad] new_list.append(current_value) ordered_summary[dt] = new_list plot_order = sorted([(k, ordered_summary[k][-1]) for k in ordered_summary], key=lambda x: x[1]) # helper function to generate y axis, modified from: # http://stackoverflow.com/a/1094933 def sizeof_fmt(value, position): number = None for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']: if abs(value) < 1024.0: number = "%3.1f%s" % (value, unit) break value /= 1024.0 if number is None: number = "%.1f%s" % (value, 'Yi') return number all_dates_axis = range(len(all_dates)) plt.locator_params(axis='y', nbins=10) plt.figure(figsize=(20, 10)) for k, v in plot_order: plt.plot(all_dates_axis, ordered_summary[k], linewidth=2, label=k) plt.xticks(all_dates_axis, all_dates) plt.legend() plt.grid() ax = plt.gca() ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(sizeof_fmt)) plt.xticks(rotation=90) plt.xlabel('Date') plt.ylabel('Storage space per data type') plot = BytesIO() plt.savefig(plot, format='png') plot.seek(0) img = 'data:image/png;base64,' + quote(b64encode(plot.getbuffer())) time = datetime.now().strftime('%m-%d-%y %H:%M:%S') portal = qiita_config.portal vals = [('number_studies', number_studies, r_client.hmset), ('number_of_samples', number_of_samples, r_client.hmset), ('num_users', num_users, r_client.set), ('lat_longs', (lat_longs), r_client.set), ('num_studies_ebi', num_studies_ebi, r_client.set), ('num_samples_ebi', num_samples_ebi, r_client.set), ('number_samples_ebi_prep', number_samples_ebi_prep, r_client.set), ('img', img, r_client.set), ('time', time, r_client.set), ('num_processing_jobs', num_processing_jobs, r_client.set)] for k, v, f in vals: redis_key = '%s:stats:%s' % (portal, k) # important to "flush" variables to avoid errors r_client.delete(redis_key) f(redis_key, v) return missing_files
def correct_redis_data(key, cmd, values_dict, user): """Corrects the data stored in the redis DB Parameters ---------- key: str The redis key to fix cmd : qiita_db.software.Command Command to use to create the processing job values_dict : dict Dictionary used to instantiate the parameters of the command user : qiita_db.user. User The user that will own the job """ info = r_client.get(key) if info: info = loads(info) if info['job_id'] is not None: if 'is_qiita_job' in info: if info['is_qiita_job']: try: job = ProcessingJob(info['job_id']) payload = { 'job_id': info['job_id'], 'alert_type': info['status'], 'alert_msg': info['alert_msg'] } r_client.set(key, dumps(payload)) except (QiitaDBUnknownIDError, KeyError): # We shomehow lost the information of this job # Simply delete the key r_client.delete(key) else: # These jobs don't contain any information on the live # dump. We can safely delete the key r_client.delete(key) else: # These jobs don't contain any information on the live # dump. We can safely delete the key r_client.delete(key) else: # Job is null, we have the information here if info['status'] == 'success': # In the success case no information is stored. We can # safely delete the key r_client.delete(key) elif info['status'] == 'warning': # In case of warning the key message stores the warning # message. We need to create a new job, mark it as # successful and store the error message as expected by # the new structure params = Parameters.load(cmd, values_dict=values_dict) job = ProcessingJob.create(user, params) job._set_status('success') payload = { 'job_id': job.id, 'alert_type': 'warning', 'alert_msg': info['message'] } r_client.set(key, dumps(payload)) else: # The status is error. The key message stores the error # message. We need to create a new job and mark it as # failed with the given error message params = Parameters.load(cmd, values_dict=values_dict) job = ProcessingJob.create(user, params) job._set_error(info['message']) payload = {'job_id': job.id} r_client.set(key, dumps(payload)) else: # The key doesn't contain any information. Delete the key r_client.delete(key)
def generate_plugin_releases(): """Generate releases for plugins """ ARCHIVE = qdb.archive.Archive qiita_config = ConfigurationManager() working_dir = qiita_config.working_dir commands = [c for s in qdb.software.Software.iter(active=True) for c in s.commands if c.post_processing_cmd is not None] tnow = datetime.now() ts = tnow.strftime('%m%d%y-%H%M%S') tgz_dir = join(working_dir, 'releases', 'archive') create_nested_path(tgz_dir) tgz_dir_release = join(tgz_dir, ts) create_nested_path(tgz_dir_release) for cmd in commands: cmd_name = cmd.name mschemes = [v for _, v in ARCHIVE.merging_schemes().items() if cmd_name in v] for ms in mschemes: ms_name = sub('[^0-9a-zA-Z]+', '', ms) ms_fp = join(tgz_dir_release, ms_name) create_nested_path(ms_fp) pfp = join(ms_fp, 'archive.json') archives = {k: loads(v) for k, v in ARCHIVE.retrieve_feature_values( archive_merging_scheme=ms).items() if v != ''} with open(pfp, 'w') as f: dump(archives, f) # now let's run the post_processing_cmd ppc = cmd.post_processing_cmd # concatenate any other parameters into a string params = ' '.join(["%s=%s" % (k, v) for k, v in ppc['script_params'].items()]) # append archives file and output dir parameters params = ("%s --fp_archive=%s --output_dir=%s" % ( params, pfp, ms_fp)) ppc_cmd = "%s %s %s" % ( ppc['script_env'], ppc['script_path'], params) p_out, p_err, rv = qdb.processing_job._system_call(ppc_cmd) p_out = p_out.rstrip() if rv != 0: raise ValueError('Error %d: %s' % (rv, p_out)) p_out = loads(p_out) # tgz-ing all files tgz_name = join(tgz_dir, 'archive-%s-building.tgz' % ts) tgz_name_final = join(tgz_dir, 'archive.tgz') with topen(tgz_name, "w|gz") as tgz: tgz.add(tgz_dir_release, arcname=basename(tgz_dir_release)) # getting the release md5 with open(tgz_name, "rb") as f: md5sum = md5() for c in iter(lambda: f.read(4096), b""): md5sum.update(c) rename(tgz_name, tgz_name_final) vals = [ ('filepath', tgz_name_final[len(working_dir):], r_client.set), ('md5sum', md5sum.hexdigest(), r_client.set), ('time', tnow.strftime('%m-%d-%y %H:%M:%S'), r_client.set)] for k, v, f in vals: redis_key = 'release-archive:%s' % k # important to "flush" variables to avoid errors r_client.delete(redis_key) f(redis_key, v)
def generate_biom_and_metadata_release(study_status='public'): """Generate a list of biom/meatadata filepaths and a tgz of those files Parameters ---------- study_status : str, optional The study status to search for. Note that this should always be set to 'public' but having this exposed helps with testing. The other options are 'private' and 'sandbox' """ studies = qdb.study.Study.get_by_status(study_status) qiita_config = ConfigurationManager() working_dir = qiita_config.working_dir portal = qiita_config.portal bdir = qdb.util.get_db_files_base_dir() time = datetime.now().strftime('%m-%d-%y %H:%M:%S') data = [] for s in studies: # [0] latest is first, [1] only getting the filepath sample_fp = relpath(s.sample_template.get_filepaths()[0][1], bdir) for a in s.artifacts(artifact_type='BIOM'): if a.processing_parameters is None: continue cmd_name = a.processing_parameters.command.name # this loop is necessary as in theory an artifact can be # generated from multiple prep info files human_cmd = [] for p in a.parents: pp = p.processing_parameters pp_cmd_name = pp.command.name if pp_cmd_name == 'Trimming': human_cmd.append('%s @ %s' % ( cmd_name, str(pp.values['length']))) else: human_cmd.append('%s, %s' % (cmd_name, pp_cmd_name)) human_cmd = ', '.join(human_cmd) for _, fp, fp_type in a.filepaths: if fp_type != 'biom' or 'only-16s' in fp: continue fp = relpath(fp, bdir) # format: (biom_fp, sample_fp, prep_fp, qiita_artifact_id, # human readable name) for pt in a.prep_templates: for _, prep_fp in pt.get_filepaths(): if 'qiime' not in prep_fp: break prep_fp = relpath(prep_fp, bdir) data.append((fp, sample_fp, prep_fp, a.id, human_cmd)) # writing text and tgz file ts = datetime.now().strftime('%m%d%y-%H%M%S') tgz_dir = join(working_dir, 'releases') if not exists(tgz_dir): makedirs(tgz_dir) tgz_name = join(tgz_dir, '%s-%s-building.tgz' % (portal, study_status)) tgz_name_final = join(tgz_dir, '%s-%s.tgz' % (portal, study_status)) txt_hd = StringIO() with topen(tgz_name, "w|gz") as tgz: # writing header for txt txt_hd.write( "biom_fp\tsample_fp\tprep_fp\tqiita_artifact_id\tcommand\n") for biom_fp, sample_fp, prep_fp, artifact_id, human_cmd in data: txt_hd.write("%s\t%s\t%s\t%s\t%s\n" % ( biom_fp, sample_fp, prep_fp, artifact_id, human_cmd)) tgz.add(join(bdir, biom_fp), arcname=biom_fp, recursive=False) tgz.add(join(bdir, sample_fp), arcname=sample_fp, recursive=False) tgz.add(join(bdir, prep_fp), arcname=prep_fp, recursive=False) txt_hd.seek(0) info = TarInfo(name='%s-%s-%s.txt' % (portal, study_status, ts)) info.size = len(txt_hd.buf) tgz.addfile(tarinfo=info, fileobj=txt_hd) with open(tgz_name, "rb") as f: md5sum = md5() for c in iter(lambda: f.read(4096), b""): md5sum.update(c) rename(tgz_name, tgz_name_final) vals = [ ('filepath', tgz_name_final[len(working_dir):], r_client.set), ('md5sum', md5sum.hexdigest(), r_client.set), ('time', time, r_client.set)] for k, v, f in vals: redis_key = '%s:release:%s:%s' % (portal, study_status, k) # important to "flush" variables to avoid errors r_client.delete(redis_key) f(redis_key, v)
def generate_biom_and_metadata_release(study_status='public'): """Generate a list of biom/meatadata filepaths and a tgz of those files Parameters ---------- study_status : str, optional The study status to search for. Note that this should always be set to 'public' but having this exposed helps with testing. The other options are 'private' and 'sandbox' """ studies = qdb.study.Study.get_by_status(study_status) qiita_config = ConfigurationManager() working_dir = qiita_config.working_dir portal = qiita_config.portal bdir = qdb.util.get_db_files_base_dir() time = datetime.now().strftime('%m-%d-%y %H:%M:%S') data = [] for s in studies: # [0] latest is first, [1] only getting the filepath sample_fp = relpath(s.sample_template.get_filepaths()[0][1], bdir) for a in s.artifacts(artifact_type='BIOM'): if a.processing_parameters is None: continue processing_params = a.processing_parameters cmd_name = processing_params.command.name ms = processing_params.command.merging_scheme software = processing_params.command.software software = '%s v%s' % (software.name, software.version) # this loop is necessary as in theory an artifact can be # generated from multiple prep info files afps = [fp for _, fp, _ in a.filepaths if fp.endswith('biom')] merging_schemes = [] parent_softwares = [] for p in a.parents: pparent = p.processing_parameters # if parent is None, then is a direct upload; for example # per_sample_FASTQ in shotgun data if pparent is None: parent_cmd_name = None parent_merging_scheme = None parent_pp = None parent_software = 'N/A' else: parent_cmd_name = pparent.command.name parent_merging_scheme = pparent.command.merging_scheme parent_pp = pparent.values psoftware = pparent.command.software parent_software = '%s v%s' % ( psoftware.name, psoftware.version) merging_schemes.append(qdb.util.human_merging_scheme( cmd_name, ms, parent_cmd_name, parent_merging_scheme, processing_params.values, afps, parent_pp)) parent_softwares.append(parent_software) merging_schemes = ', '.join(merging_schemes) parent_softwares = ', '.join(parent_softwares) for _, fp, fp_type in a.filepaths: if fp_type != 'biom' or 'only-16s' in fp: continue fp = relpath(fp, bdir) for pt in a.prep_templates: categories = pt.categories() platform = '' target_gene = '' if 'platform' in categories: platform = ', '.join( set(pt.get_category('platform').values())) if 'target_gene' in categories: target_gene = ', '.join( set(pt.get_category('target_gene').values())) for _, prep_fp in pt.get_filepaths(): if 'qiime' not in prep_fp: break prep_fp = relpath(prep_fp, bdir) # format: (biom_fp, sample_fp, prep_fp, qiita_artifact_id, # platform, target gene, merging schemes, # artifact software/version, # parent sofware/version) data.append((fp, sample_fp, prep_fp, a.id, platform, target_gene, merging_schemes, software, parent_softwares)) # writing text and tgz file ts = datetime.now().strftime('%m%d%y-%H%M%S') tgz_dir = join(working_dir, 'releases') create_nested_path(tgz_dir) tgz_name = join(tgz_dir, '%s-%s-building.tgz' % (portal, study_status)) tgz_name_final = join(tgz_dir, '%s-%s.tgz' % (portal, study_status)) txt_hd = StringIO() with topen(tgz_name, "w|gz") as tgz: txt_hd.write( "biom fp\tsample fp\tprep fp\tqiita artifact id\tplatform\t" "target gene\tmerging scheme\tartifact software\t" "parent software\n") for biom_fp, sample_fp, prep_fp, aid, pform, tg, ms, asv, psv in data: txt_hd.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % ( biom_fp, sample_fp, prep_fp, aid, pform, tg, ms, asv, psv)) tgz.add(join(bdir, biom_fp), arcname=biom_fp, recursive=False) tgz.add(join(bdir, sample_fp), arcname=sample_fp, recursive=False) tgz.add(join(bdir, prep_fp), arcname=prep_fp, recursive=False) txt_hd.seek(0) info = TarInfo(name='%s-%s-%s.txt' % (portal, study_status, ts)) info.size = len(txt_hd.buf) tgz.addfile(tarinfo=info, fileobj=txt_hd) with open(tgz_name, "rb") as f: md5sum = md5() for c in iter(lambda: f.read(4096), b""): md5sum.update(c) rename(tgz_name, tgz_name_final) vals = [ ('filepath', tgz_name_final[len(working_dir):], r_client.set), ('md5sum', md5sum.hexdigest(), r_client.set), ('time', time, r_client.set)] for k, v, f in vals: redis_key = '%s:release:%s:%s' % (portal, study_status, k) # important to "flush" variables to avoid errors r_client.delete(redis_key) f(redis_key, v)
def generate_biom_and_metadata_release(study_status='public'): """Generate a list of biom/meatadata filepaths and a tgz of those files Parameters ---------- study_status : str, optional The study status to search for. Note that this should always be set to 'public' but having this exposed helps with testing. The other options are 'private' and 'sandbox' """ studies = qdb.study.Study.get_by_status(study_status) qiita_config = ConfigurationManager() working_dir = qiita_config.working_dir portal = qiita_config.portal bdir = qdb.util.get_db_files_base_dir() time = datetime.now().strftime('%m-%d-%y %H:%M:%S') data = [] for s in studies: # [0] latest is first, [1] only getting the filepath sample_fp = relpath(s.sample_template.get_filepaths()[0][1], bdir) for a in s.artifacts(artifact_type='BIOM'): if a.processing_parameters is None or a.visibility != study_status: continue merging_schemes, parent_softwares = a.merging_scheme software = a.processing_parameters.command.software software = '%s v%s' % (software.name, software.version) for x in a.filepaths: if x['fp_type'] != 'biom' or 'only-16s' in x['fp']: continue fp = relpath(x['fp'], bdir) for pt in a.prep_templates: categories = pt.categories() platform = '' target_gene = '' if 'platform' in categories: platform = ', '.join( set(pt.get_category('platform').values())) if 'target_gene' in categories: target_gene = ', '.join( set(pt.get_category('target_gene').values())) for _, prep_fp in pt.get_filepaths(): if 'qiime' not in prep_fp: break prep_fp = relpath(prep_fp, bdir) # format: (biom_fp, sample_fp, prep_fp, qiita_artifact_id, # platform, target gene, merging schemes, # artifact software/version, # parent sofware/version) data.append( (fp, sample_fp, prep_fp, a.id, platform, target_gene, merging_schemes, software, parent_softwares)) # writing text and tgz file ts = datetime.now().strftime('%m%d%y-%H%M%S') tgz_dir = join(working_dir, 'releases') create_nested_path(tgz_dir) tgz_name = join(tgz_dir, '%s-%s-building.tgz' % (portal, study_status)) tgz_name_final = join(tgz_dir, '%s-%s.tgz' % (portal, study_status)) txt_lines = [ "biom fp\tsample fp\tprep fp\tqiita artifact id\tplatform\t" "target gene\tmerging scheme\tartifact software\tparent software" ] with topen(tgz_name, "w|gz") as tgz: for biom_fp, sample_fp, prep_fp, aid, pform, tg, ms, asv, psv in data: txt_lines.append( "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (biom_fp, sample_fp, prep_fp, aid, pform, tg, ms, asv, psv)) tgz.add(join(bdir, biom_fp), arcname=biom_fp, recursive=False) tgz.add(join(bdir, sample_fp), arcname=sample_fp, recursive=False) tgz.add(join(bdir, prep_fp), arcname=prep_fp, recursive=False) info = TarInfo(name='%s-%s-%s.txt' % (portal, study_status, ts)) txt_hd = BytesIO() txt_hd.write(bytes('\n'.join(txt_lines), 'ascii')) txt_hd.seek(0) info.size = len(txt_hd.read()) txt_hd.seek(0) tgz.addfile(tarinfo=info, fileobj=txt_hd) with open(tgz_name, "rb") as f: md5sum = md5() for c in iter(lambda: f.read(4096), b""): md5sum.update(c) rename(tgz_name, tgz_name_final) vals = [('filepath', tgz_name_final[len(working_dir):], r_client.set), ('md5sum', md5sum.hexdigest(), r_client.set), ('time', time, r_client.set)] for k, v, f in vals: redis_key = '%s:release:%s:%s' % (portal, study_status, k) # important to "flush" variables to avoid errors r_client.delete(redis_key) f(redis_key, v)
# Create the command to complete a job parameters = {'job_id': ['string', None], 'payload': ['string', None]} create_command(qiita_plugin, "complete_job", "Completes a given job", parameters) # Assumptions on the structure of the data in the redis database has # changed, we need to fix to avoid failures # Get all the sample template keys for key in r_client.keys('sample_template_[0-9]*'): try: study = Study(int(key.split('_')[-1])) user = study.owner except QiitaDBUnknownIDError: # This means that the study no longer exists - delete the key # and continue r_client.delete(key) continue values_dict = {'study': study.id, 'template_fp': 'ignored-patch58'} correct_redis_data(key, st_cmd, values_dict, user) # Get all the prep template keys for key in r_client.keys('prep_template_[0-9]*'): try: pt = PrepTemplate(int(key.split('_')[-1])) user = Study(pt.study_id).owner except QiitaDBUnknownIDError: # This means that the prep template no longer exists - delete the # key and continue r_client.delete(key) continue values_dict = {
def generate_biom_and_metadata_release(study_status='public'): """Generate a list of biom/meatadata filepaths and a tgz of those files Parameters ---------- study_status : str, optional The study status to search for. Note that this should always be set to 'public' but having this exposed helps with testing. The other options are 'private' and 'sandbox' """ studies = qdb.study.Study.get_by_status(study_status) qiita_config = ConfigurationManager() working_dir = qiita_config.working_dir portal = qiita_config.portal bdir = qdb.util.get_db_files_base_dir() time = datetime.now().strftime('%m-%d-%y %H:%M:%S') data = [] for s in studies: # [0] latest is first, [1] only getting the filepath sample_fp = relpath(s.sample_template.get_filepaths()[0][1], bdir) for a in s.artifacts(artifact_type='BIOM'): if a.processing_parameters is None: continue cmd_name = a.processing_parameters.command.name # this loop is necessary as in theory an artifact can be # generated from multiple prep info files human_cmd = [] for p in a.parents: pp = p.processing_parameters pp_cmd_name = pp.command.name if pp_cmd_name == 'Trimming': human_cmd.append('%s @ %s' % (cmd_name, str(pp.values['length']))) else: human_cmd.append('%s, %s' % (cmd_name, pp_cmd_name)) human_cmd = ', '.join(human_cmd) for _, fp, fp_type in a.filepaths: if fp_type != 'biom' or 'only-16s' in fp: continue fp = relpath(fp, bdir) # format: (biom_fp, sample_fp, prep_fp, qiita_artifact_id, # human readable name) for pt in a.prep_templates: for _, prep_fp in pt.get_filepaths(): if 'qiime' not in prep_fp: break prep_fp = relpath(prep_fp, bdir) data.append((fp, sample_fp, prep_fp, a.id, human_cmd)) # writing text and tgz file ts = datetime.now().strftime('%m%d%y-%H%M%S') tgz_dir = join(working_dir, 'releases') if not exists(tgz_dir): makedirs(tgz_dir) tgz_name = join(tgz_dir, '%s-%s-building.tgz' % (portal, study_status)) tgz_name_final = join(tgz_dir, '%s-%s.tgz' % (portal, study_status)) txt_hd = StringIO() with topen(tgz_name, "w|gz") as tgz: # writing header for txt txt_hd.write( "biom_fp\tsample_fp\tprep_fp\tqiita_artifact_id\tcommand\n") for biom_fp, sample_fp, prep_fp, artifact_id, human_cmd in data: txt_hd.write("%s\t%s\t%s\t%s\t%s\n" % (biom_fp, sample_fp, prep_fp, artifact_id, human_cmd)) tgz.add(join(bdir, biom_fp), arcname=biom_fp, recursive=False) tgz.add(join(bdir, sample_fp), arcname=sample_fp, recursive=False) tgz.add(join(bdir, prep_fp), arcname=prep_fp, recursive=False) txt_hd.seek(0) info = TarInfo(name='%s-%s-%s.txt' % (portal, study_status, ts)) info.size = len(txt_hd.buf) tgz.addfile(tarinfo=info, fileobj=txt_hd) with open(tgz_name, "rb") as f: md5sum = md5() for c in iter(lambda: f.read(4096), b""): md5sum.update(c) rename(tgz_name, tgz_name_final) vals = [('filepath', tgz_name_final[len(working_dir):], r_client.set), ('md5sum', md5sum.hexdigest(), r_client.set), ('time', time, r_client.set)] for k, v, f in vals: redis_key = '%s:release:%s:%s' % (portal, study_status, k) # important to "flush" variables to avoid errors r_client.delete(redis_key) f(redis_key, v)
def update_redis_stats(): """Generate the system stats and save them in redis Returns ------- list of str artifact filepaths that are not present in the file system """ STUDY = qdb.study.Study number_studies = {'public': 0, 'private': 0, 'sandbox': 0} number_of_samples = {'public': 0, 'private': 0, 'sandbox': 0} num_studies_ebi = 0 num_samples_ebi = 0 number_samples_ebi_prep = 0 stats = [] missing_files = [] per_data_type_stats = Counter() for study in STUDY.iter(): st = study.sample_template if st is None: continue # counting samples submitted to EBI-ENA len_samples_ebi = sum( [esa is not None for esa in st.ebi_sample_accessions.values()]) if len_samples_ebi != 0: num_studies_ebi += 1 num_samples_ebi += len_samples_ebi samples_status = defaultdict(set) for pt in study.prep_templates(): pt_samples = list(pt.keys()) pt_status = pt.status if pt_status == 'public': per_data_type_stats[pt.data_type()] += len(pt_samples) samples_status[pt_status].update(pt_samples) # counting experiments (samples in preps) submitted to EBI-ENA number_samples_ebi_prep += sum([ esa is not None for esa in pt.ebi_experiment_accessions.values() ]) # counting studies if 'public' in samples_status: number_studies['public'] += 1 elif 'private' in samples_status: number_studies['private'] += 1 else: # note that this is a catch all for other status; at time of # writing there is status: awaiting_approval number_studies['sandbox'] += 1 # counting samples; note that some of these lines could be merged with # the block above but I decided to split it in 2 for clarity if 'public' in samples_status: number_of_samples['public'] += len(samples_status['public']) if 'private' in samples_status: number_of_samples['private'] += len(samples_status['private']) if 'sandbox' in samples_status: number_of_samples['sandbox'] += len(samples_status['sandbox']) # processing filepaths for artifact in study.artifacts(): for adata in artifact.filepaths: try: s = stat(adata['fp']) except OSError: missing_files.append(adata['fp']) else: stats.append((adata['fp_type'], s.st_size, strftime('%Y-%m', localtime(s.st_mtime)))) num_users = qdb.util.get_count('qiita.qiita_user') num_processing_jobs = qdb.util.get_count('qiita.processing_job') lat_longs = dumps(get_lat_longs()) summary = {} all_dates = [] # these are some filetypes that are too small to plot alone so we'll merge # in other group_other = { 'html_summary', 'tgz', 'directory', 'raw_fasta', 'log', 'biom', 'raw_sff', 'raw_qual', 'qza', 'html_summary_dir', 'qza', 'plain_text', 'raw_barcodes' } for ft, size, ym in stats: if ft in group_other: ft = 'other' if ft not in summary: summary[ft] = {} if ym not in summary[ft]: summary[ft][ym] = 0 all_dates.append(ym) summary[ft][ym] += size all_dates = sorted(set(all_dates)) # sorting summaries ordered_summary = {} for dt in summary: new_list = [] current_value = 0 for ad in all_dates: if ad in summary[dt]: current_value += summary[dt][ad] new_list.append(current_value) ordered_summary[dt] = new_list plot_order = sorted([(k, ordered_summary[k][-1]) for k in ordered_summary], key=lambda x: x[1]) # helper function to generate y axis, modified from: # http://stackoverflow.com/a/1094933 def sizeof_fmt(value, position): number = None for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']: if abs(value) < 1024.0: number = "%3.1f%s" % (value, unit) break value /= 1024.0 if number is None: number = "%.1f%s" % (value, 'Yi') return number all_dates_axis = range(len(all_dates)) plt.locator_params(axis='y', nbins=10) plt.figure(figsize=(20, 10)) for k, v in plot_order: plt.plot(all_dates_axis, ordered_summary[k], linewidth=2, label=k) plt.xticks(all_dates_axis, all_dates) plt.legend() plt.grid() ax = plt.gca() ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(sizeof_fmt)) plt.xticks(rotation=90) plt.xlabel('Date') plt.ylabel('Storage space per data type') plot = BytesIO() plt.savefig(plot, format='png') plot.seek(0) img = 'data:image/png;base64,' + quote(b64encode(plot.getbuffer())) time = datetime.now().strftime('%m-%d-%y %H:%M:%S') portal = qiita_config.portal # making sure per_data_type_stats has some data so hmset doesn't fail if per_data_type_stats == {}: per_data_type_stats['No data'] = 0 vals = [('number_studies', number_studies, r_client.hmset), ('number_of_samples', number_of_samples, r_client.hmset), ('per_data_type_stats', dict(per_data_type_stats), r_client.hmset), ('num_users', num_users, r_client.set), ('lat_longs', (lat_longs), r_client.set), ('num_studies_ebi', num_studies_ebi, r_client.set), ('num_samples_ebi', num_samples_ebi, r_client.set), ('number_samples_ebi_prep', number_samples_ebi_prep, r_client.set), ('img', img, r_client.set), ('time', time, r_client.set), ('num_processing_jobs', num_processing_jobs, r_client.set)] for k, v, f in vals: redis_key = '%s:stats:%s' % (portal, k) # important to "flush" variables to avoid errors r_client.delete(redis_key) f(redis_key, v) # preparing vals to insert into DB vals = dumps(dict([x[:-1] for x in vals])) sql = """INSERT INTO qiita.stats_daily (stats, stats_timestamp) VALUES (%s, NOW())""" qdb.sql_connection.perform_as_transaction(sql, [vals]) return missing_files
def generate_biom_and_metadata_release(study_status='public'): """Generate a list of biom/meatadata filepaths and a tgz of those files Parameters ---------- study_status : str, optional The study status to search for. Note that this should always be set to 'public' but having this exposed helps with testing. The other options are 'private' and 'sandbox' """ studies = qdb.study.Study.get_by_status(study_status) qiita_config = ConfigurationManager() working_dir = qiita_config.working_dir portal = qiita_config.portal bdir = qdb.util.get_db_files_base_dir() time = datetime.now().strftime('%m-%d-%y %H:%M:%S') data = [] for s in studies: # [0] latest is first, [1] only getting the filepath sample_fp = relpath(s.sample_template.get_filepaths()[0][1], bdir) for a in s.artifacts(artifact_type='BIOM'): if a.processing_parameters is None or a.visibility != study_status: continue merging_schemes, parent_softwares = a.merging_scheme software = a.processing_parameters.command.software software = '%s v%s' % (software.name, software.version) for x in a.filepaths: if x['fp_type'] != 'biom' or 'only-16s' in x['fp']: continue fp = relpath(x['fp'], bdir) for pt in a.prep_templates: categories = pt.categories() platform = '' target_gene = '' if 'platform' in categories: platform = ', '.join( set(pt.get_category('platform').values())) if 'target_gene' in categories: target_gene = ', '.join( set(pt.get_category('target_gene').values())) for _, prep_fp in pt.get_filepaths(): if 'qiime' not in prep_fp: break prep_fp = relpath(prep_fp, bdir) # format: (biom_fp, sample_fp, prep_fp, qiita_artifact_id, # platform, target gene, merging schemes, # artifact software/version, # parent sofware/version) data.append((fp, sample_fp, prep_fp, a.id, platform, target_gene, merging_schemes, software, parent_softwares)) # writing text and tgz file ts = datetime.now().strftime('%m%d%y-%H%M%S') tgz_dir = join(working_dir, 'releases') create_nested_path(tgz_dir) tgz_name = join(tgz_dir, '%s-%s-building.tgz' % (portal, study_status)) tgz_name_final = join(tgz_dir, '%s-%s.tgz' % (portal, study_status)) txt_lines = [ "biom fp\tsample fp\tprep fp\tqiita artifact id\tplatform\t" "target gene\tmerging scheme\tartifact software\tparent software"] with topen(tgz_name, "w|gz") as tgz: for biom_fp, sample_fp, prep_fp, aid, pform, tg, ms, asv, psv in data: txt_lines.append("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % ( biom_fp, sample_fp, prep_fp, aid, pform, tg, ms, asv, psv)) tgz.add(join(bdir, biom_fp), arcname=biom_fp, recursive=False) tgz.add(join(bdir, sample_fp), arcname=sample_fp, recursive=False) tgz.add(join(bdir, prep_fp), arcname=prep_fp, recursive=False) info = TarInfo(name='%s-%s-%s.txt' % (portal, study_status, ts)) txt_hd = BytesIO() txt_hd.write(bytes('\n'.join(txt_lines), 'ascii')) txt_hd.seek(0) info.size = len(txt_hd.read()) txt_hd.seek(0) tgz.addfile(tarinfo=info, fileobj=txt_hd) with open(tgz_name, "rb") as f: md5sum = md5() for c in iter(lambda: f.read(4096), b""): md5sum.update(c) rename(tgz_name, tgz_name_final) vals = [ ('filepath', tgz_name_final[len(working_dir):], r_client.set), ('md5sum', md5sum.hexdigest(), r_client.set), ('time', time, r_client.set)] for k, v, f in vals: redis_key = '%s:release:%s:%s' % (portal, study_status, k) # important to "flush" variables to avoid errors r_client.delete(redis_key) f(redis_key, v)
def generate_plugin_releases(): """Generate releases for plugins """ ARCHIVE = qdb.archive.Archive qiita_config = ConfigurationManager() working_dir = qiita_config.working_dir commands = [ c for s in qdb.software.Software.iter(active=True) for c in s.commands if c.post_processing_cmd is not None ] tnow = datetime.now() ts = tnow.strftime('%m%d%y-%H%M%S') tgz_dir = join(working_dir, 'releases', 'archive') create_nested_path(tgz_dir) tgz_dir_release = join(tgz_dir, ts) create_nested_path(tgz_dir_release) for cmd in commands: cmd_name = cmd.name mschemes = [ v for _, v in ARCHIVE.merging_schemes().items() if cmd_name in v ] for ms in mschemes: ms_name = sub('[^0-9a-zA-Z]+', '', ms) ms_fp = join(tgz_dir_release, ms_name) create_nested_path(ms_fp) pfp = join(ms_fp, 'archive.json') archives = { k: loads(v) for k, v in ARCHIVE.retrieve_feature_values( archive_merging_scheme=ms).items() if v != '' } with open(pfp, 'w') as f: dump(archives, f) # now let's run the post_processing_cmd ppc = cmd.post_processing_cmd # concatenate any other parameters into a string params = ' '.join( ["%s=%s" % (k, v) for k, v in ppc['script_params'].items()]) # append archives file and output dir parameters params = ("%s --fp_archive=%s --output_dir=%s" % (params, pfp, ms_fp)) ppc_cmd = "%s %s %s" % (ppc['script_env'], ppc['script_path'], params) p_out, p_err, rv = qdb.processing_job._system_call(ppc_cmd) p_out = p_out.rstrip() if rv != 0: raise ValueError('Error %d: %s' % (rv, p_out)) p_out = loads(p_out) # tgz-ing all files tgz_name = join(tgz_dir, 'archive-%s-building.tgz' % ts) tgz_name_final = join(tgz_dir, 'archive.tgz') with topen(tgz_name, "w|gz") as tgz: tgz.add(tgz_dir_release, arcname=basename(tgz_dir_release)) # getting the release md5 with open(tgz_name, "rb") as f: md5sum = md5() for c in iter(lambda: f.read(4096), b""): md5sum.update(c) rename(tgz_name, tgz_name_final) vals = [('filepath', tgz_name_final[len(working_dir):], r_client.set), ('md5sum', md5sum.hexdigest(), r_client.set), ('time', tnow.strftime('%m-%d-%y %H:%M:%S'), r_client.set)] for k, v, f in vals: redis_key = 'release-archive:%s' % k # important to "flush" variables to avoid errors r_client.delete(redis_key) f(redis_key, v)
def update_redis_stats(): """Generate the system stats and save them in redis Returns ------- list of str artifact filepaths that are not present in the file system """ STUDY = qdb.study.Study studies = {'public': STUDY.get_by_status('public'), 'private': STUDY.get_by_status('private'), 'sandbox': STUDY.get_by_status('sandbox')} number_studies = {k: len(v) for k, v in viewitems(studies)} number_of_samples = {} ebi_samples_prep = {} num_samples_ebi = 0 for k, sts in viewitems(studies): number_of_samples[k] = 0 for s in sts: st = s.sample_template if st is not None: number_of_samples[k] += len(list(st.keys())) ebi_samples_prep_count = 0 for pt in s.prep_templates(): ebi_samples_prep_count += len([ 1 for _, v in viewitems(pt.ebi_experiment_accessions) if v is not None and v != '']) ebi_samples_prep[s.id] = ebi_samples_prep_count if s.sample_template is not None: num_samples_ebi += len([ 1 for _, v in viewitems( s.sample_template.ebi_sample_accessions) if v is not None and v != '']) num_users = qdb.util.get_count('qiita.qiita_user') lat_longs = get_lat_longs() num_studies_ebi = len([k for k, v in viewitems(ebi_samples_prep) if v >= 1]) number_samples_ebi_prep = sum([v for _, v in viewitems(ebi_samples_prep)]) # generating file size stats stats = [] missing_files = [] for k, sts in viewitems(studies): for s in sts: for a in s.artifacts(): for _, fp, dt in a.filepaths: try: s = stat(fp) stats.append((dt, s.st_size, strftime('%Y-%m', localtime(s.st_ctime)))) except OSError: missing_files.append(fp) summary = {} all_dates = [] for ft, size, ym in stats: if ft not in summary: summary[ft] = {} if ym not in summary[ft]: summary[ft][ym] = 0 all_dates.append(ym) summary[ft][ym] += size all_dates = sorted(set(all_dates)) # sorting summaries rm_from_data = ['html_summary', 'tgz', 'directory', 'raw_fasta', 'log', 'biom', 'raw_sff', 'raw_qual'] ordered_summary = {} for dt in summary: if dt in rm_from_data: continue new_list = [] current_value = 0 for ad in all_dates: if ad in summary[dt]: current_value += summary[dt][ad] new_list.append(current_value) ordered_summary[dt] = new_list plot_order = sorted([(k, ordered_summary[k][-1]) for k in ordered_summary], key=lambda x: x[1]) # helper function to generate y axis, modified from: # http://stackoverflow.com/a/1094933 def sizeof_fmt(value, position): number = None for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']: if abs(value) < 1024.0: number = "%3.1f%s" % (value, unit) break value /= 1024.0 if number is None: number = "%.1f%s" % (value, 'Yi') return number all_dates_axis = range(len(all_dates)) plt.locator_params(axis='y', nbins=10) plt.figure(figsize=(20, 10)) for k, v in plot_order: plt.plot(all_dates_axis, ordered_summary[k], linewidth=2, label=k) plt.xticks(all_dates_axis, all_dates) plt.legend() plt.grid() ax = plt.gca() ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(sizeof_fmt)) plt.xticks(rotation=90) plt.xlabel('Date') plt.ylabel('Storage space per data type') plot = StringIO() plt.savefig(plot, format='png') plot.seek(0) img = 'data:image/png;base64,' + quote(b64encode(plot.buf)) time = datetime.now().strftime('%m-%d-%y %H:%M:%S') portal = qiita_config.portal vals = [ ('number_studies', number_studies, r_client.hmset), ('number_of_samples', number_of_samples, r_client.hmset), ('num_users', num_users, r_client.set), ('lat_longs', lat_longs, r_client.set), ('num_studies_ebi', num_studies_ebi, r_client.set), ('num_samples_ebi', num_samples_ebi, r_client.set), ('number_samples_ebi_prep', number_samples_ebi_prep, r_client.set), ('img', img, r_client.set), ('time', time, r_client.set)] for k, v, f in vals: redis_key = '%s:stats:%s' % (portal, k) # important to "flush" variables to avoid errors r_client.delete(redis_key) f(redis_key, v) return missing_files