def recreate_project_from_db(analysis_top_dir, project_name, project_id): project_dir = os.path.join(analysis_top_dir, "DATA", project_name) project_obj = NGIProject(name=project_name, dirname=project_name, project_id=project_id, base_path=analysis_top_dir) charon_session = CharonSession() try: samples_dict = charon_session.project_get_samples(project_id)["samples"] except CharonError as e: raise RuntimeError("Could not access samples for project {}: {}".format(project_id, e)) for sample in samples_dict: sample_id = sample.get("sampleid") sample_dir = os.path.join(project_dir, sample_id) sample_obj = project_obj.add_sample(name=sample_id, dirname=sample_id) sample_obj.status = sample.get("status", "unknown") try: libpreps_dict = charon_session.sample_get_libpreps(project_id, sample_id)["libpreps"] except CharonError as e: raise RuntimeError("Could not access libpreps for project {} / sample {}: {}".format(project_id,sample_id, e)) for libprep in libpreps_dict: libprep_id = libprep.get("libprepid") libprep_obj = sample_obj.add_libprep(name=libprep_id, dirname=libprep_id) libprep_obj.status = libprep.get("status", "unknown") try: seqruns_dict = charon_session.libprep_get_seqruns(project_id, sample_id, libprep_id)["seqruns"] except CharonError as e: raise RuntimeError("Could not access seqruns for project {} / sample {} / " "libprep {}: {}".format(project_id, sample_id, libprep_id, e)) for seqrun in seqruns_dict: # e.g. 140528_D00415_0049_BC423WACXX seqrun_id = seqrun.get("seqrunid") seqrun_obj = libprep_obj.add_seqrun(name=seqrun_id, dirname=seqrun_id) seqrun_obj.status = seqrun.get("status", "unknown") return project_obj
def get_finished_seqruns_for_sample(project_id, sample_id, include_failed_libpreps=False): """Find all the finished seqruns for a particular sample. :param str project_id: The id of the project :param str sample_id: The id of the sample :returns: A dict of {libprep_01: [seqrun_01, ..., seqrun_nn], ...} :rtype: dict """ charon_session = CharonSession() sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id, sampleid=sample_id) libpreps = collections.defaultdict(list) for libprep in sample_libpreps['libpreps']: if libprep.get('qc') != "FAILED" or include_failed_libpreps: libprep_id = libprep['libprepid'] for seqrun in charon_session.libprep_get_seqruns(projectid=project_id, sampleid=sample_id, libprepid=libprep_id)['seqruns']: seqrun_id = seqrun['seqrunid'] aln_status = charon_session.seqrun_get(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id).get('alignment_status') if aln_status == "DONE": libpreps[libprep_id].append(seqrun_id) else: LOG.debug('Skipping seqrun "{}" due to alignment_status ' '"{}"'.format(seqrun_id, aln_status)) else: LOG.info('Skipping libprep "{}" due to qc status ' '"{}"'.format(libprep, libprep.get("qc"))) return dict(libpreps)
def update_analysis(project_id, status): charon_session = CharonSession() mail_analysis(project_id, engine_name='rna_ngi', level='INFO' if status else 'ERROR') new_sample_status = 'ANALYZED' if status else 'FAILED' new_seqrun_status = 'DONE' if status else 'FAILED' for sample in charon_session.project_get_samples(project_id).get( "samples", {}): if sample.get('analysis_status') == "UNDER_ANALYSIS": LOG.info("Marking analysis of sample {}/{} as {}".format( project_id, sample.get('sampleid'), new_sample_status)) charon_session.sample_update(project_id, sample.get('sampleid'), analysis_status=new_sample_status) for libprep in charon_session.sample_get_libpreps( project_id, sample.get('sampleid')).get('libpreps', {}): if libprep.get('qc') != 'FAILED': for seqrun in charon_session.libprep_get_seqruns( project_id, sample.get('sampleid'), libprep.get('libprepid')).get('seqruns', {}): if seqrun.get('alignment_status') == "RUNNING": LOG.info( "Marking analysis of seqrun {}/{}/{}/{} as {}". format(project_id, sample.get('sampleid'), libprep.get('libprepid'), seqrun.get('seqrunid'), new_seqrun_status)) charon_session.seqrun_update( project_id, sample.get('sampleid'), libprep.get('libprepid'), seqrun.get('seqrunid'), alignment_status=new_seqrun_status)
def recreate_project_from_db(analysis_top_dir, project_name, project_id): project_dir = os.path.join(analysis_top_dir, "DATA", project_name) project_obj = NGIProject(name=project_name, dirname=project_name, project_id=project_id, base_path=analysis_top_dir) charon_session = CharonSession() try: samples_dict = charon_session.project_get_samples(project_id)["samples"] except CharonError as e: raise RuntimeError("Could not access samples for project {}: {}".format(project_id, e)) for sample in samples_dict: sample_id = sample.get("sampleid") sample_dir = os.path.join(project_dir, sample_id) sample_obj = project_obj.add_sample(name=sample_id, dirname=sample_id) sample_obj.status = sample.get("status", "unknown") try: libpreps_dict = charon_session.sample_get_libpreps(project_id, sample_id)["libpreps"] except CharonError as e: raise RuntimeError("Could not access libpreps for project {} / sample {}: {}".format(project_id,sample_id, e)) for libprep in libpreps_dict: libprep_id = libprep.get("libprepid") libprep_obj = sample_obj.add_libprep(name=libprep_id, dirname=libprep_id) libprep_obj.status = libprep.get("status", "unknown") try: seqruns_dict = charon_session.libprep_get_seqruns(project_id, sample_id, libprep_id)["seqruns"] except CharonError as e: raise RuntimeError("Could not access seqruns for project {} / sample {} / " "libprep {}: {}".format(project_id, sample_id, libprep_id, e)) for seqrun in seqruns_dict: # e.g. 140528_D00415_0049_BC423WACXX seqrun_id = seqrun.get("seqrunid") seqrun_obj = libprep_obj.add_seqrun(name=seqrun_id, dirname=seqrun_id) seqrun_obj.status = seqrun.get("status", "unknown") return project_obj
def check_for_preexisting_sample_runs(project_obj, sample_obj, restart_running_jobs, restart_finished_jobs): """If any analysis is undergoing or has completed for this sample's seqruns, raise a RuntimeError. :param NGIProject project_obj: The project object :param NGISample sample_obj: The sample object :param boolean restart_running_jobs: command line parameter :param boolean restart_finished_jobs: command line parameter :raise RuntimeError if the status is RUNNING or DONE and the flags do not allow to continue """ project_id = project_obj.project_id sample_id = sample_obj.name charon_session = CharonSession() sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id, sampleid=sample_id) for libprep in sample_libpreps['libpreps']: libprep_id = libprep['libprepid'] for seqrun in charon_session.libprep_get_seqruns(projectid=project_id, sampleid=sample_id, libprepid=libprep_id)['seqruns']: seqrun_id = seqrun['seqrunid'] aln_status = charon_session.seqrun_get(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id).get('alignment_status') if (aln_status == "RUNNING" and not restart_running_jobs) or \ (aln_status == "DONE" and not restart_finished_jobs): raise RuntimeError('Project/Sample "{}/{}" has a preexisting ' 'seqrun "{}" with status "{}"'.format(project_obj, sample_obj, seqrun_id, aln_status))
def get_valid_seqruns_for_sample(project_id, sample_id, include_failed_libpreps=False, include_done_seqruns=False, status_field="alignment_status"): """Find all the valid seqruns for a particular sample. :param str project_id: The id of the project :param str sample_id: The id of the sample :param bool include_failed_libpreps: Include seqruns for libreps that have failed QC :param bool include_done_seqruns: Include seqruns that are already marked DONE :returns: A dict of {libprep_01: [seqrun_01, ..., seqrun_nn], ...} :rtype: dict :raises ValueError: If status_field is not a valid value """ valid_status_values = ( "alignment_status", "genotype_status", ) if status_field not in valid_status_values: raise ValueError('"status_field" argument must be one of {} ' '(value passed was "{}")'.format( ", ".join(valid_status_values), status_field)) charon_session = CharonSession() sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id, sampleid=sample_id) libpreps = collections.defaultdict(list) for libprep in sample_libpreps['libpreps']: if libprep.get('qc') != "FAILED" or include_failed_libpreps: libprep_id = libprep['libprepid'] for seqrun in charon_session.libprep_get_seqruns( projectid=project_id, sampleid=sample_id, libprepid=libprep_id)['seqruns']: seqrun_id = seqrun['seqrunid'] try: aln_status = charon_session.seqrun_get( projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id)[status_field] except KeyError: LOG.error( 'Field "{}" not available for seqrun "{}" in Charon ' 'for project "{}" / sample "{}". Including as ' 'valid.'.format(status_field, seqrun_id, project_id, sample_id)) aln_status = None if aln_status != "DONE" or include_done_seqruns: libpreps[libprep_id].append(seqrun_id) else: LOG.info('Skipping seqrun "{}" due to {}' '"{}"'.format(seqrun_id, status_field, aln_status)) else: LOG.info('Skipping libprep "{}" due to qc status ' '"{}"'.format(libprep, libprep.get("qc"))) return dict(libpreps)
def reset_charon_records_by_name(project_id, restrict_to_samples=None, restrict_to_libpreps=None, restrict_to_seqruns=None): if not restrict_to_samples: restrict_to_samples = [] if not restrict_to_libpreps: restrict_to_libpreps = [] if not restrict_to_seqruns: restrict_to_seqruns = [] charon_session = CharonSession() LOG.info("Resetting Charon record for project {}".format(project_id)) charon_session.project_reset(projectid=project_id) LOG.info("Charon record for project {} reset".format(project_id)) for sample in charon_session.project_get_samples(projectid=project_id).get('samples', []): sample_id = sample['sampleid'] if restrict_to_samples and sample_id not in restrict_to_samples: LOG.info("Skipping project/sample {}/{}: not in list of samples to use " "({})".format(project_id, sample_id, ", ".join(restrict_to_samples))) continue LOG.info("Resetting Charon record for project/sample {}/{}".format(project_id, sample_id)) charon_session.sample_reset(projectid=project_id, sampleid=sample_id) LOG.info("Charon record for project/sample {}/{} reset".format(project_id, sample_id)) for libprep in charon_session.sample_get_libpreps(projectid=project_id, sampleid=sample_id).get('libpreps', []): libprep_id = libprep['libprepid'] if restrict_to_libpreps and libprep_id not in restrict_to_libpreps: LOG.info("Skipping project/sample/libprep {}/{}/{}: not in list " "of libpreps to use ({})".format(project_id, sample_id, libprep_id, ", ".join(restrict_to_libpreps))) continue LOG.info("Resetting Charon record for project/sample" "libprep {}/{}/{}".format(project_id, sample_id, libprep_id)) charon_session.libprep_reset(projectid=project_id, sampleid=sample_id, libprepid=libprep_id) LOG.info("Charon record for project/sample/libprep {}/{}/{} " "reset".format(project_id, sample_id, libprep_id)) for seqrun in charon_session.libprep_get_seqruns(projectid=project_id, sampleid=sample_id, libprepid=libprep_id).get('seqruns', []): seqrun_id = seqrun['seqrunid'] if restrict_to_seqruns and seqrun_id not in restrict_to_seqruns: LOG.info("Skipping project/sample/libprep/seqrun {}/{}/{}/{}: " "not in list of seqruns to use ({})".format(project_id, sample_id, libprep_id, seqrun_id, ", ".join(restrict_to_seqruns))) continue LOG.info("Resetting Charon record for project/sample/libprep/" "seqrun {}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id)) charon_session.seqrun_reset(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id) LOG.info("Charon record for project/sample/libprep/seqrun " "{}/{}/{}/{} reset".format(project_id, sample_id, libprep_id, seqrun_id))
def get_valid_seqruns_for_sample(project_id, sample_id, include_failed_libpreps=False, include_done_seqruns=False, status_field="alignment_status"): """Find all the valid seqruns for a particular sample. :param str project_id: The id of the project :param str sample_id: The id of the sample :param bool include_failed_libpreps: Include seqruns for libreps that have failed QC :param bool include_done_seqruns: Include seqruns that are already marked DONE :returns: A dict of {libprep_01: [seqrun_01, ..., seqrun_nn], ...} :rtype: dict :raises ValueError: If status_field is not a valid value """ valid_status_values = ("alignment_status", "genotype_status",) if status_field not in valid_status_values: raise ValueError('"status_field" argument must be one of {} ' '(value passed was "{}")'.format(", ".join(valid_status_values), status_field)) charon_session = CharonSession() sample_libpreps = charon_session.sample_get_libpreps(projectid=project_id, sampleid=sample_id) libpreps = collections.defaultdict(list) for libprep in sample_libpreps['libpreps']: if libprep.get('qc') != "FAILED" or include_failed_libpreps: libprep_id = libprep['libprepid'] for seqrun in charon_session.libprep_get_seqruns(projectid=project_id, sampleid=sample_id, libprepid=libprep_id)['seqruns']: seqrun_id = seqrun['seqrunid'] try: aln_status = charon_session.seqrun_get(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id)[status_field] except KeyError: LOG.error('Field "{}" not available for seqrun "{}" in Charon ' 'for project "{}" / sample "{}". Including as ' 'valid.'.format(status_field, seqrun_id, project_id, sample_id)) aln_status = None if aln_status != "DONE" or include_done_seqruns: libpreps[libprep_id].append(seqrun_id) else: LOG.info('Skipping seqrun "{}" due to {}' '"{}"'.format(seqrun_id,status_field, aln_status)) else: LOG.info('Skipping libprep "{}" due to qc status ' '"{}"'.format(libprep, libprep.get("qc"))) return dict(libpreps)
def main(project): charon_session = CharonSession() samples = charon_session.project_get_samples(project) for sample in samples["samples"]: charon_session.sample_update(project, sample["sampleid"], analysis_status= "TO_ANALYZE", genotype_status=None, total_autosomal_coverage="0", total_sequenced_reads="0") for sample_prep in charon_session.sample_get_libpreps(project, sample["sampleid"])['libpreps']: seqruns = charon_session.libprep_get_seqruns(project, sample["sampleid"], sample_prep["libprepid"])['seqruns'] for seqrun in seqruns: charon_session.seqrun_update(project, sample["sampleid"], sample_prep["libprepid"], seqrun["seqrunid"], mean_autosomal_coverage = "0", alignment_status = "NOT_RUNNING")
def determine_library_prep_from_fcid(project_id, sample_name, fcid): """Use the information in the database to get the library prep id from the project name, sample name, and flowcell id. :param str project_id: The ID of the project :param str sample_name: The name of the sample :param str fcid: The flowcell ID :returns: The library prep (e.g. "A") :rtype str :raises ValueError: If no match was found. """ charon_session = CharonSession() try: libpreps = charon_session.sample_get_libpreps(project_id, sample_name)['libpreps'] if libpreps: for libprep in libpreps: # Get the sequencing runs and see if they match the FCID we have seqruns = charon_session.libprep_get_seqruns(project_id, sample_name, libprep['libprepid'])['seqruns'] if seqruns: for seqrun in seqruns: seqrun_runid = seqrun["seqrunid"] if seqrun_runid == fcid: ## BUG if we have one sample with two libpreps on the same flowcell, ## this just picks the first one it encounters; instead, ## it should raise an Exception. Requires restructuring. return libprep['libprepid'] else: raise CharonError("No seqruns found!", 404) else: raise CharonError("No match", 404) else: raise CharonError("No libpreps found!", 404) except CharonError as e: if e.status_code == 404: raise ValueError('No library prep found for project "{}" / sample "{}" ' '/ fcid "{}"'.format(project_id, sample_name, fcid)) else: raise ValueError('Could not determine library prep for project "{}" ' '/ sample "{}" / fcid "{}": {}'.format(project_id, sample_name, fcid, e))
def determine_library_prep_from_fcid(project_id, sample_name, fcid): """Use the information in the database to get the library prep id from the project name, sample name, and flowcell id. :param str project_id: The ID of the project :param str sample_name: The name of the sample :param str fcid: The flowcell ID :returns: The library prep (e.g. "A") :rtype str :raises ValueError: If no match was found. """ charon_session = CharonSession() try: libpreps = charon_session.sample_get_libpreps(project_id, sample_name)["libpreps"] if libpreps: for libprep in libpreps: # Get the sequencing runs and see if they match the FCID we have seqruns = charon_session.libprep_get_seqruns(project_id, sample_name, libprep["libprepid"])["seqruns"] if seqruns: for seqrun in seqruns: seqrun_runid = seqrun["seqrunid"] if seqrun_runid == fcid: ## BUG if we have one sample with two libpreps on the same flowcell, ## this just picks the first one it encounters; instead, ## it should raise an Exception. Requires restructuring. return libprep["libprepid"] else: raise CharonError("No seqruns found!", 404) else: raise CharonError("No match", 404) else: raise CharonError("No libpreps found!", 404) except CharonError as e: if e.status_code == 404: raise ValueError( 'No library prep found for project "{}" / sample "{}" ' '/ fcid "{}"'.format(project_id, sample_name, fcid) ) else: raise ValueError( 'Could not determine library prep for project "{}" ' '/ sample "{}" / fcid "{}": {}'.format(project_id, sample_name, fcid, e) )
def determine_library_prep_from_fcid(project_id, sample_name, fcid): """Use the information in the database to get the library prep id from the project name, sample name, and flowcell id. :param str project_id: The ID of the project :param str sample_name: The name of the sample :param str fcid: The flowcell ID :returns: The library prep (e.g. "A") :rtype str :raises ValueError: If no match was found. """ charon_session = CharonSession() try: libpreps = charon_session.sample_get_libpreps(project_id, sample_name)['libpreps'] if libpreps: for libprep in libpreps: # Get the sequencing runs and see if they match the FCID we have seqruns = charon_session.libprep_get_seqruns(project_id, sample_name, libprep['libprepid'])['seqruns'] if seqruns: for seqrun in seqruns: seqrun_runid = seqrun["seqrunid"] if seqrun_runid == fcid: return libprep['libprepid'] else: raise CharonError("No match", 404) else: raise CharonError("No seqruns found!", 404) else: raise CharonError("No libpreps found!", 404) except CharonError as e: if e.status_code == 404: raise ValueError('No library prep found for project "{}" / sample "{}" ' '/ fcid "{}"'.format(project_id, sample_name, fcid)) else: raise ValueError('Could not determine library prep for project "{}" ' '/ sample "{}" / fcid "{}": {}'.format(project_id, sample_name, fcid, e))
def main(project): charon_session = CharonSession() samples = charon_session.project_get_samples(project) for sample in samples["samples"]: charon_session.sample_update(project, sample["sampleid"], analysis_status="TO_ANALYZE", genotype_status=None, total_autosomal_coverage="0", total_sequenced_reads="0") for sample_prep in charon_session.sample_get_libpreps( project, sample["sampleid"])['libpreps']: seqruns = charon_session.libprep_get_seqruns( project, sample["sampleid"], sample_prep["libprepid"])['seqruns'] for seqrun in seqruns: charon_session.seqrun_update(project, sample["sampleid"], sample_prep["libprepid"], seqrun["seqrunid"], mean_autosomal_coverage="0", alignment_status="NOT_RUNNING")
def project_summarize(projects, verbosity=0): if type(verbosity) is not int or verbosity < 0: print_stderr('Invalid verbosity level ("{}"); must be a positive ' 'integer; falling back to 0') verbosity = 0 update_charon_with_local_jobs_status(quiet=True) # Don't send mails charon_session = CharonSession() projects_list = [] for project in projects: try: project = os.path.basename(locate_project(project)) except ValueError as e: print_stderr("Skipping project: {}".format(e)) continue print_stderr('Gathering information for project "{}"...'.format(project)) project_dict = {} try: project = charon_session.project_get(project) except CharonError as e: print_stderr('Project "{}" not found in Charon; skipping ({})'.format(project, e), file=sys.stderr) continue project_dict['name'] = project['name'] project_dict['id'] = project['projectid'] project_dict['status'] = project['status'] samples_list = project_dict['samples'] = [] for sample in charon_session.project_get_samples(project['projectid']).get('samples', []): sample_dict = {} sample_dict['id'] = sample['sampleid'] sample_dict['analysis_status'] = sample['analysis_status'] sample_dict['coverage'] = sample['total_autosomal_coverage'] libpreps_list = sample_dict['libpreps'] = [] samples_list.append(sample_dict) for libprep in charon_session.sample_get_libpreps(project['projectid'], sample['sampleid']).get('libpreps', []): libprep_dict = {} libprep_dict['id'] = libprep['libprepid'] libprep_dict['qc'] = libprep['qc'] seqruns_list = libprep_dict['seqruns'] = [] libpreps_list.append(libprep_dict) for seqrun in charon_session.libprep_get_seqruns(project['projectid'], sample['sampleid'], libprep['libprepid']).get('seqruns', []): seqrun_dict = {} seqrun_dict['id'] = seqrun['seqrunid'] seqrun_dict['alignment_status'] = seqrun['alignment_status'] seqrun_dict['coverage'] = seqrun['mean_autosomal_coverage'] if seqrun.get('total_reads'): seqrun_dict['total_reads'] = seqrun['total_reads'] seqruns_list.append(seqrun_dict) projects_list.append(project_dict) if verbosity in (0, 1): projects_status_list = [] #projects_by_status = collections.defaultdict(dict) #samples_by_status = collections.defaultdict(set) #libpreps_by_status = collections.defaultdict(set) #seqruns_by_status = collections.defaultdict(set) for project_dict in projects_list: project_status_dict = {} project_status_dict['name'] = "{} ({})".format(project_dict['name'], project_dict['id']) project_status_dict['status'] = project_dict['status'] samples_by_status = project_status_dict['samples_by_status'] = collections.defaultdict(set) libpreps_by_status = project_status_dict['libpreps_by_status'] = collections.defaultdict(set) seqruns_by_status = project_status_dict['seqruns_by_status'] = collections.defaultdict(set) for sample_dict in project_dict.get('samples', []): #samples_by_status[sample_dict['analysis_status']].add(sample_dict['id']) sample_status = sample_dict['analysis_status'] libpreps = sample_dict.get('libpreps') if libpreps: if not any([libprep["seqruns"] for libprep in libpreps]): sample_status = "NO_SEQRUNS" else: for libprep_dict in libpreps: libpreps_by_status[libprep_dict['qc']].add(libprep_dict['id']) for seqrun_dict in libprep_dict.get('seqruns', []): seqruns_by_status[seqrun_dict['alignment_status']].add(seqrun_dict['id']) else: sample_status = "NO_LIBPREPS" samples_by_status[sample_status].add(sample_dict['id']) projects_status_list.append(project_status_dict) print_items = (("Samples", "samples_by_status"), ("Libpreps", "libpreps_by_status"), ("Seqruns", "seqruns_by_status"),) for project_dict in projects_status_list: print_stderr("\nProject\n-------") print_stderr(" Name: {:>40}".format(project_dict['name'])) print_stderr(" Status: {:>40}".format(project_dict['status'])) for name, dict_key in print_items: status_dict = project_dict[dict_key] print_stderr("{}\n{}".format(name, "-"*len(name))) total_items = sum(map(len, status_dict.values())) # Sort by analysis value for status, item_set in sorted(status_dict.iteritems(), key=lambda key_value: key_value[0]): num_items = len(item_set) percent = (100.00 * num_items) / total_items print_stderr(" Status: {:<20} ({:>3}/{:<3}) ({:>6.2f}%)".format(status, num_items, total_items, percent)) if verbosity == 1: for item in sorted(item_set): print_stderr(" {}".format(item)) print_stderr("") else: # Verbosity is 2+, maximum verbosity output_template = "{}{:<30}{:>{rspace}}" for project_dict in projects_list: offset = 0 indent = " " * offset rspace = 80 - offset print_stderr(output_template.format(indent, "Project name:", project_dict['name'], rspace=rspace)) print_stderr(output_template.format(indent, "Project ID:", project_dict['id'], rspace=rspace)) print_stderr(output_template.format(indent, "Project status:", project_dict['status'], rspace=rspace)) for sample_dict in project_dict['samples']: print_stderr("") offset = 4 indent = " " * offset rspace = 80 - offset print_stderr(output_template.format(indent, "Sample ID:", sample_dict['id'], rspace=rspace)) print_stderr(output_template.format(indent, "Sample analysis status:", sample_dict['analysis_status'], rspace=rspace)) print_stderr(output_template.format(indent, "Sample coverage:", sample_dict['coverage'], rspace=rspace)) for libprep_dict in sample_dict['libpreps']: print_stderr("") offset = 8 indent = " " * offset rspace = 80 - offset print_stderr(output_template.format(indent, "Libprep ID:", libprep_dict['id'], rspace=rspace)) print_stderr(output_template.format(indent, "Libprep qc status:", libprep_dict['qc'], rspace=rspace)) for seqrun_dict in libprep_dict['seqruns']: print_stderr("") offset = 12 indent = " " * offset rspace = 80 - offset print_stderr(output_template.format(indent, "Seqrun ID:", seqrun_dict['id'], rspace=rspace)) print_stderr(output_template.format(indent, "Seqrun alignment status:", seqrun_dict['alignment_status'], rspace=rspace)) print_stderr(output_template.format(indent, "Seqrun mean auto. coverage:", seqrun_dict['coverage'], rspace=rspace)) if "total_reads" in seqrun_dict: print_stderr(output_template.format(indent, "Seqrun total reads:", seqrun_dict['total_reads'], rspace=rspace)) print_stderr("\n")
def project_summarize(projects, verbosity=0): if type(verbosity) is not int or verbosity < 0: print_stderr('Invalid verbosity level ("{}"); must be a positive ' 'integer; falling back to 0') verbosity = 0 update_charon_with_local_jobs_status(quiet=True) # Don't send mails charon_session = CharonSession() projects_list = [] for project in projects: try: project = os.path.basename(locate_project(project)) except ValueError as e: print_stderr("Skipping project: {}".format(e)) continue print_stderr( 'Gathering information for project "{}"...'.format(project)) project_dict = {} try: project = charon_session.project_get(project) except CharonError as e: print_stderr( 'Project "{}" not found in Charon; skipping ({})'.format( project, e), file=sys.stderr) continue project_dict['name'] = project['name'] project_dict['id'] = project['projectid'] project_dict['status'] = project['status'] samples_list = project_dict['samples'] = [] for sample in charon_session.project_get_samples( project['projectid']).get('samples', []): sample_dict = {} sample_dict['id'] = sample['sampleid'] sample_dict['analysis_status'] = sample['analysis_status'] sample_dict['coverage'] = sample['total_autosomal_coverage'] libpreps_list = sample_dict['libpreps'] = [] samples_list.append(sample_dict) for libprep in charon_session.sample_get_libpreps( project['projectid'], sample['sampleid']).get('libpreps', []): libprep_dict = {} libprep_dict['id'] = libprep['libprepid'] libprep_dict['qc'] = libprep['qc'] seqruns_list = libprep_dict['seqruns'] = [] libpreps_list.append(libprep_dict) for seqrun in charon_session.libprep_get_seqruns( project['projectid'], sample['sampleid'], libprep['libprepid']).get('seqruns', []): seqrun_dict = {} seqrun_dict['id'] = seqrun['seqrunid'] seqrun_dict['alignment_status'] = seqrun[ 'alignment_status'] seqrun_dict['coverage'] = seqrun['mean_autosomal_coverage'] if seqrun.get('total_reads'): seqrun_dict['total_reads'] = seqrun['total_reads'] seqruns_list.append(seqrun_dict) projects_list.append(project_dict) if verbosity in (0, 1): projects_status_list = [] #projects_by_status = collections.defaultdict(dict) #samples_by_status = collections.defaultdict(set) #libpreps_by_status = collections.defaultdict(set) #seqruns_by_status = collections.defaultdict(set) for project_dict in projects_list: project_status_dict = {} project_status_dict['name'] = "{} ({})".format( project_dict['name'], project_dict['id']) project_status_dict['status'] = project_dict['status'] samples_by_status = project_status_dict[ 'samples_by_status'] = collections.defaultdict(set) libpreps_by_status = project_status_dict[ 'libpreps_by_status'] = collections.defaultdict(set) seqruns_by_status = project_status_dict[ 'seqruns_by_status'] = collections.defaultdict(set) for sample_dict in project_dict.get('samples', []): #samples_by_status[sample_dict['analysis_status']].add(sample_dict['id']) sample_status = sample_dict['analysis_status'] libpreps = sample_dict.get('libpreps') if libpreps: if not any([libprep["seqruns"] for libprep in libpreps]): sample_status = "NO_SEQRUNS" else: for libprep_dict in libpreps: libpreps_by_status[libprep_dict['qc']].add( libprep_dict['id']) for seqrun_dict in libprep_dict.get('seqruns', []): seqruns_by_status[ seqrun_dict['alignment_status']].add( seqrun_dict['id']) else: sample_status = "NO_LIBPREPS" samples_by_status[sample_status].add(sample_dict['id']) projects_status_list.append(project_status_dict) print_items = ( ("Samples", "samples_by_status"), ("Libpreps", "libpreps_by_status"), ("Seqruns", "seqruns_by_status"), ) for project_dict in projects_status_list: print_stderr("\nProject\n-------") print_stderr(" Name: {:>40}".format(project_dict['name'])) print_stderr(" Status: {:>40}".format(project_dict['status'])) for name, dict_key in print_items: status_dict = project_dict[dict_key] print_stderr("{}\n{}".format(name, "-" * len(name))) total_items = sum(map(len, status_dict.values())) # Sort by analysis value for status, item_set in sorted( status_dict.iteritems(), key=lambda key_value: key_value[0]): num_items = len(item_set) percent = (100.00 * num_items) / total_items print_stderr( " Status: {:<20} ({:>3}/{:<3}) ({:>6.2f}%)".format( status, num_items, total_items, percent)) if verbosity == 1: for item in sorted(item_set): print_stderr(" {}".format(item)) print_stderr("") else: # Verbosity is 2+, maximum verbosity output_template = "{}{:<30}{:>{rspace}}" for project_dict in projects_list: offset = 0 indent = " " * offset rspace = 80 - offset print_stderr( output_template.format(indent, "Project name:", project_dict['name'], rspace=rspace)) print_stderr( output_template.format(indent, "Project ID:", project_dict['id'], rspace=rspace)) print_stderr( output_template.format(indent, "Project status:", project_dict['status'], rspace=rspace)) for sample_dict in project_dict['samples']: print_stderr("") offset = 4 indent = " " * offset rspace = 80 - offset print_stderr( output_template.format(indent, "Sample ID:", sample_dict['id'], rspace=rspace)) print_stderr( output_template.format(indent, "Sample analysis status:", sample_dict['analysis_status'], rspace=rspace)) print_stderr( output_template.format(indent, "Sample coverage:", sample_dict['coverage'], rspace=rspace)) for libprep_dict in sample_dict['libpreps']: print_stderr("") offset = 8 indent = " " * offset rspace = 80 - offset print_stderr( output_template.format(indent, "Libprep ID:", libprep_dict['id'], rspace=rspace)) print_stderr( output_template.format(indent, "Libprep qc status:", libprep_dict['qc'], rspace=rspace)) for seqrun_dict in libprep_dict['seqruns']: print_stderr("") offset = 12 indent = " " * offset rspace = 80 - offset print_stderr( output_template.format(indent, "Seqrun ID:", seqrun_dict['id'], rspace=rspace)) print_stderr( output_template.format( indent, "Seqrun alignment status:", seqrun_dict['alignment_status'], rspace=rspace)) print_stderr( output_template.format( indent, "Seqrun mean auto. coverage:", seqrun_dict['coverage'], rspace=rspace)) if "total_reads" in seqrun_dict: print_stderr( output_template.format( indent, "Seqrun total reads:", seqrun_dict['total_reads'], rspace=rspace)) print_stderr("\n")