def main(inbox=None, num_days=14, genotype_files=None, config=None, config_file_path=None): if genotype_files: gt_files_valid = [os.path.abspath(gt_file) for gt_file in genotype_files] else: if not inbox: try: inboxes = config["environment"]["flowcell_inbox"] except (KeyError, TypeError): raise ValueError("No path to delivery inbox specified by argument " "or in configuration file ({}). Exiting.".format(config_file_path)) for inbox in inboxes: inbox = os.path.abspath(inbox) # Convert to seconds cutoff_age = time.time() - (int(num_days) * 24 * 60 * 60) LOG.info("Searching for genotype files under {} modified after " "{}".format(inbox, time.ctime(cutoff_age))) gt_files_valid = [] for gt_file in filter(GENOTYPE_FILE_RE.match, glob.glob(os.path.join(inbox, "*"))): if os.stat(gt_file).st_mtime > time.time() - cutoff_age: gt_files_valid.append(os.path.abspath(gt_file)) if not gt_files_valid: LOG.info("No genotype files found under {} newer than " "{}".format(inbox, time.ctime(cutoff_age))) else: charon_session = CharonSession() for gt_file_path in gt_files_valid: project_samples_dict = \ find_projects_from_samples(parse_samples_from_vcf(gt_file_path)) for project_id, samples in project_samples_dict.iteritems(): LOG.info("Updating project {}...".format(project_id)) for sample in samples: try: genotype_status = \ charon_session.sample_get(projectid=project_id, sampleid=sample).get("genotype_status") if genotype_status in (None, "NOT_AVAILABLE"): LOG.info('Updating sample {} genotype_status ' 'to "AVAILABLE"...'.format(sample)) charon_session.sample_update(projectid=project_id, sampleid=sample, genotype_status="AVAILABLE") else: LOG.info('Not updating sample {} genotype_status ' '(already "{}")'.format(sample, genotype_status)) except CharonError as e: LOG.error('Could not update genotype status to "AVAILABLE" ' 'for project/sample "{}/{}": {}'.format(project_id, sample, e))
) except Exception as e: LOG.error(e.message) print(e, file=sys.stderr) LOG.info("Done with organization.") elif "genotype_project_dirs" in args: from ngi_pipeline.engines import piper_ngi genotype_file_path = args.genotype_file project_obj_list = [] if not args.genotype_project_dirs: LOG.info("No projects specified; running genotype analysis for all " "samples present in VCF file.") # User passed only the genotype file; try to determine samples/projects # from vcf file projects_samples_dict = find_projects_from_samples(parse_samples_from_vcf(genotype_file_path)) for project_id, samples in projects_samples_dict.iteritems(): try: path_to_project = locate_project(project_id) except ValueError: # Project has not yet been organized from flowcell level LOG.warn( 'Project "{}" has not yet been organized from ' "flowcell to project level; skipping.".format(project_id) ) continue project = recreate_project_from_filesystem(project_dir=path_to_project, restrict_to_samples=samples) project_obj_list.append(project) else: for genotype_project_dir in args.genotype_project_dirs: LOG.info(
def main(inbox=None, num_days=14, genotype_files=None, config=None, config_file_path=None): if genotype_files: gt_files_valid = [ os.path.abspath(gt_file) for gt_file in genotype_files ] else: if not inbox: try: inboxes = config["environment"]["flowcell_inbox"] except (KeyError, TypeError): raise ValueError( "No path to delivery inbox specified by argument " "or in configuration file ({}). Exiting.".format( config_file_path)) for inbox in inboxes: inbox = os.path.abspath(inbox) # Convert to seconds cutoff_age = time.time() - (int(num_days) * 24 * 60 * 60) LOG.info("Searching for genotype files under {} modified after " "{}".format(inbox, time.ctime(cutoff_age))) gt_files_valid = [] for gt_file in filter(GENOTYPE_FILE_RE.match, glob.glob(os.path.join(inbox, "*"))): if os.stat(gt_file).st_mtime > time.time() - cutoff_age: gt_files_valid.append(os.path.abspath(gt_file)) if not gt_files_valid: LOG.info("No genotype files found under {} newer than " "{}".format(inbox, time.ctime(cutoff_age))) else: charon_session = CharonSession() for gt_file_path in gt_files_valid: project_samples_dict = \ find_projects_from_samples(parse_samples_from_vcf(gt_file_path)) for project_id, samples in project_samples_dict.iteritems(): LOG.info("Updating project {}...".format(project_id)) for sample in samples: try: genotype_status = \ charon_session.sample_get(projectid=project_id, sampleid=sample).get("genotype_status") if genotype_status in (None, "NOT_AVAILABLE"): LOG.info('Updating sample {} genotype_status ' 'to "AVAILABLE"...'.format(sample)) charon_session.sample_update( projectid=project_id, sampleid=sample, genotype_status="AVAILABLE") else: LOG.info('Not updating sample {} genotype_status ' '(already "{}")'.format( sample, genotype_status)) except CharonError as e: LOG.error( 'Could not update genotype status to "AVAILABLE" ' 'for project/sample "{}/{}": {}'.format( project_id, sample, e))
except Exception as e: LOG.error(e.message) print(e, file=sys.stderr) LOG.info("Done with organization.") elif 'genotype_project_dirs' in args: from ngi_pipeline.engines import piper_ngi genotype_file_path = args.genotype_file project_obj_list = [] if not args.genotype_project_dirs: LOG.info('No projects specified; running genotype analysis for all ' 'samples present in VCF file.') # User passed only the genotype file; try to determine samples/projects # from vcf file projects_samples_dict = \ find_projects_from_samples(parse_samples_from_vcf(genotype_file_path)) for project_id, samples in projects_samples_dict.iteritems(): try: path_to_project = locate_project(project_id) except ValueError: # Project has not yet been organized from flowcell level LOG.warn('Project "{}" has not yet been organized from ' 'flowcell to project level; skipping.'.format(project_id)) continue project = recreate_project_from_filesystem(project_dir=path_to_project, restrict_to_samples=samples) project_obj_list.append(project) else: for genotype_project_dir in args.genotype_project_dirs: LOG.info("Starting genotype analysis of project {} with genotype " "file {}".format(genotype_project_dir, genotype_file_path))