def get_sample_info(child, rows, mux_analysis_list, mux_id, fastq_data_dir, \ run_num_flowcell, sample_info): """Collects sample info from ELM JOSN """ sample_cfg = {} site = get_site() ctime, _ = generate_window(1) _, _, flowcellid = get_machine_run_flowcell_id(run_num_flowcell) mux_analysis_list.add(mux_id) sample_id = child['libraryId'] sample_cfg['requestor'] = rows['requestor'] sample_cfg['ctime'] = ctime sample_cfg['site'] = site try: sample_cfg['pipeline_name'] = legacy_mapper['pipeline_mapper'][ child['Analysis']] except KeyError as e: sample_cfg['pipeline_name'] = child['Analysis'] logger.warning(str(e) + " Pipeline not mappped to newer version") return sample_info pipeline_version = get_pipeline_version(child['pipeline_version'] \ if 'pipeline_version' in rows else None) sample_cfg['pipeline_version'] = pipeline_version #sample_cfg['pipeline_params'] = 'params' ref_info = get_reference_info(child['Analysis'], \ sample_cfg['pipeline_version'], child['genome']) if not ref_info: logger.info("ref_info not available") return sample_info cmdline_info = get_cmdline_info(child) sample_cfg['references_cfg'] = ref_info if cmdline_info: sample_cfg['cmdline'] = cmdline_info readunits_dict = {} status, fq1, fq2 = check_fastq(fastq_data_dir, child['libraryId'],\ rows['laneId']) if status: ru = ReadUnit(run_num_flowcell, flowcellid, child['libraryId'],\ rows['laneId'], None, fq1, fq2) k = key_for_readunit(ru) readunits_dict[k] = dict(ru._asdict()) sample_cfg['readunits'] = readunits_dict if sample_info.get(sample_id, {}).get('readunits', {}): sample_info[sample_id]['readunits'].update(readunits_dict) else: sample_info[sample_id] = sample_cfg return sample_info
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) # generic args csv_cols = ['sample'] + list(ReadUnit._fields) parser.add_argument('-i', "--csv", required=True, help="CSV input file describing your samples using the" " following columns: {} (sample and fq1 are mandatory; leave unknown fields empty)".format( ", ".join("{}:{}".format(i+1, c) for i, c in enumerate(csv_cols)))) parser.add_argument('-o', "--yaml", required=True, help="Output config (yaml) file") parser.add_argument('-d', '--delimiter', default="\t", help="Use this delimiter for CSV (default is <tab>)") parser.add_argument('-f', '--force-overwrite', action='store_true', help="Force overwriting of existing file") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose) if len(args.delimiter) != 1: logger.fatal("Delimiter needs to be exactly one character") sys.exit(1) if not os.path.exists(args.csv): logger.fatal("Input file %s does not exist", args.csv) sys.exit(1) if os.path.exists(args.yaml) and not args.force_overwrite: logger.fatal("Cowardly refusing to overwrite existing file %s", args.yaml) sys.exit(1) samples = dict() readunits = dict() with open(args.csv) as csvfile: csvreader = csv.reader(csvfile, delimiter=args.delimiter) for row in csvreader: if len(row) == 0: continue logger.debug("DEBUG row %s", "\t".join("{}:{}".format(k, v) for k, v in zip(csv_cols, row))) if len(row) != len(csv_cols): logger.fatal("Only found %s fields (require %s) in row: %s", len(row), len(csv_cols), '\t'.join(row)) sys.exit(1) sample_name = row[0] ru_fields = row[1:] ru_fields = [x if len(x.strip()) else None for x in ru_fields] #sys.stderr.write("ru_fields={}".format(ru_fields) + "\n") ru = ReadUnit._make(ru_fields) if not ru.rg_id: ru = ru._replace(rg_id=create_rg_id_from_ru(ru)) ru_key = key_for_readunit(ru) readunits[ru_key] = dict(ru._asdict()) if sample_name not in samples: samples[sample_name] = [] samples[sample_name].append(ru_key) with open(args.yaml, 'w') as fh: yaml.dump(dict(samples=samples), fh, default_flow_style=False) yaml.dump(dict(readunits=readunits), fh, default_flow_style=False)
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) # generic args csv_cols = ['sample'] + list(ReadUnit._fields) parser.add_argument( '-i', "--csv", required=True, help="CSV input file describing your samples using the" " following columns: {} (sample and fq1 are mandatory; leave unknown fields empty)" .format(", ".join("{}:{}".format(i + 1, c) for i, c in enumerate(csv_cols)))) parser.add_argument('-o', "--yaml", required=True, help="Output config (yaml) file") parser.add_argument('-d', '--delimiter', default="\t", help="Use this delimiter for CSV (default is <tab>)") parser.add_argument('-f', '--force-overwrite', action='store_true', help="Force overwriting of existing file") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) if len(args.delimiter) != 1: logger.fatal("Delimiter needs to be exactly one character") sys.exit(1) if not os.path.exists(args.csv): logger.fatal("Input file %s does not exist", args.csv) sys.exit(1) if os.path.exists(args.yaml) and not args.force_overwrite: logger.fatal("Cowardly refusing to overwrite existing file %s", args.yaml) sys.exit(1) samples = dict() readunits = dict() with open(args.csv) as csvfile: csvreader = csv.reader(csvfile, delimiter=args.delimiter) for row in csvreader: if len(row) == 0: continue logger.debug( "DEBUG row %s", "\t".join("{}:{}".format(k, v) for k, v in zip(csv_cols, row))) if len(row) != len(csv_cols): logger.fatal("Only found %s fields (require %s) in row: %s", len(row), len(csv_cols), '\t'.join(row)) sys.exit(1) sample_name = row[0] ru_fields = row[1:] ru_fields = [x if len(x.strip()) else None for x in ru_fields] #sys.stderr.write("ru_fields={}".format(ru_fields) + "\n") ru = ReadUnit._make(ru_fields) if not ru.rg_id: ru = ru._replace(rg_id=create_rg_id_from_ru(ru)) ru_key = key_for_readunit(ru) readunits[ru_key] = dict(ru._asdict()) if sample_name not in samples: samples[sample_name] = [] samples[sample_name].append(ru_key) with open(args.yaml, 'w') as fh: yaml.dump(dict(samples=samples), fh, default_flow_style=False) yaml.dump(dict(readunits=readunits), fh, default_flow_style=False)
def get_lib_details(run_num_flowcell, mux_list, testing): """Lib info collection from ELM per run """ _, run_num, flowcellid = get_machine_run_flowcell_id(run_num_flowcell) # Call rest service to get component libraries if testing: print(run_num) rest_url = rest_services['run_details']['testing'].replace("run_num", run_num) logger.info("development server") else: rest_url = rest_services['run_details']['production'].replace("run_num", run_num) logger.info("production server") response = requests.get(rest_url) if response.status_code != requests.codes.ok: response.raise_for_status() rest_data = response.json() logger.debug("rest_data from %s: %s", rest_url, rest_data) sample_info = {} if rest_data.get('runId') is None: logger.info("JSON data is empty for run num %s", run_num) return sample_info for mux_id, out_dir in mux_list: fastq_data_dir = os.path.join(out_dir[0], 'out', "Project_"+mux_id) if os.path.exists(fastq_data_dir): for rows in rest_data['lanes']: if mux_id in rows['libraryId']: if "MUX" in rows['libraryId']: for child in rows['Children']: if child['Analysis'] != "Sequence only": ctime, _ = generate_window(1) sample_dict = {} sample = child['libraryId'] sample_dict['requestor'] = rows['requestor'] sample_dict['ctime'] = ctime sample_dict['pipeline_name'] = child['Analysis'] if 'pipeline_version' in rows: sample_dict['pipeline_version'] = child['pipeline_version'] else: sample_dict['pipeline_version'] = None sample_dict['pipeline_params'] = 'params' sample_dict['site'] = get_site() out_dir = get_downstream_outdir(sample_dict['requestor'], \ sample_dict['pipeline_version'], sample_dict['pipeline_name']) sample_dict['out_dir'] = out_dir readunits_dict = {} status, fq1, fq2 = check_fastq(fastq_data_dir, child['libraryId'],\ rows['laneId']) if status: ru = ReadUnit(run_num_flowcell, flowcellid, child['libraryId'],\ rows['laneId'], None, fq1, fq2) k = key_for_readunit(ru) readunits_dict[k] = dict(ru._asdict()) sample_dict['readunits'] = readunits_dict if sample_info.get(sample, {}).get('readunits'): sample_info[sample]['readunits'].update(readunits_dict) else: sample_info[sample] = sample_dict else: if rows['Analysis'] != "Sequence only": sample = rows['libraryId'] status, fq1, fq2 = check_fastq(fastq_data_dir, rows['libraryId'], \ rows['laneId']) if status: ctime, _ = generate_window(1) sample_dict = {} readunits_dict = {} ru = ReadUnit(run_num_flowcell, flowcellid, rows['libraryId'], \ rows['laneId'], None, fq1, fq2) k = key_for_readunit(ru) readunits_dict[k] = dict(ru._asdict()) sample_dict['readunits'] = readunits_dict sample_info[sample] = sample_dict return sample_info
def main(): """main function""" parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("-b", "--bcl2fastq", required=True, help="bcl2fastq directory") parser.add_argument("-o", "--outpref", help="Output prefix used for created yaml files per MUX (default: bcl2fastq dir)") parser.add_argument('-f', "--overwrite", action='store_true', help="Overwrite existing files") parser.add_argument('-n', "--dry-run", action='store_true', help="Dry run") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no loggerging at all logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose) if not os.path.exists(args.bcl2fastq): logger.fatal("out_dir %s does not exist", args.bcl2fastq) sys.exit(1) confinfo = os.path.join(args.bcl2fastq + '/conf.yaml') if not os.path.exists(confinfo): logger.fatal("conf info '%s' does not exist under Run directory.\n", confinfo) sys.exit(1) if args.outpref: outprefix = args.outpref else: outprefix = args.bcl2fastq # FIXME too many levels of nesting. export to functions with open(confinfo) as fh_cfg: yaml_data = yaml.safe_load(fh_cfg) assert "units" in yaml_data assert "run_num" in yaml_data run_num = yaml_data["run_num"] for mux, units in yaml_data["units"].items(): mux_id = mux.split("_")[-1] mux_folder = os.path.join(args.bcl2fastq, "out", mux) if not os.path.exists(mux_folder): continue # samples and readunits per mux samples = {} readunits = {} for child in os.listdir(os.path.join(args.bcl2fastq, "out", mux)): if not child.startswith('Sample'): continue sample_id = child.split('_')[-1] samples[sample_id] = [] sample_path = os.path.join(args.bcl2fastq, "out", mux, child) for lane_id in units["lane_ids"]: status, fq1, fq2 = check_fastq(sample_path, lane_id) if not status: # FIXME throw error? continue ru = ReadUnit(run_num, units["flowcell_id"], sample_id, lane_id, None, fq1, fq2) ru = ru._replace(rg_id=create_rg_id_from_ru(ru)) k = key_for_readunit(ru) readunits[k] = dict(ru._asdict()) samples[sample_id].append(k) # write yaml per mux muxinfo_cfg = outprefix + mux_id + ".yaml" if args.dry_run: logger.warning("Skipped creation of %s", muxinfo_cfg) else: if os.path.exists(muxinfo_cfg) and not args.overwrite: logger.fatal("Refusing to overwrite existing file %s", muxinfo_cfg) sys.exit(1) with open(muxinfo_cfg, 'w') as fh: fh.write(yaml.dump(dict(samples=samples), default_flow_style=False)) fh.write(yaml.dump(dict(readunits=readunits), default_flow_style=False)) logger.info("Created %s", muxinfo_cfg)