def _detect_rRNA(data): sample = dd.get_sample_name(data) gtf_file = dd.get_gtf_file(data) tidy_file = dd.get_sailfish_tidy(data) rrna_features = gtf.get_rRNA(gtf_file) transcripts = set([x[1] for x in rrna_features if x]) if not transcripts: return {'rRNA': "NA", "rRNA_rate": "NA"} count_table = pd.read_csv(tidy_file, sep="\t") sample_table = count_table[count_table["sample"].isin([sample])] rrna_exp = map(float, sample_table[sample_table["id"].isin(transcripts)]["numreads"]) total_exp = map(float, sample_table["numreads"]) rrna = sum(rrna_exp) rrna_rate = float(rrna) / sum(total_exp) return {'rRNA': str(rrna), 'rRNA_rate': str(rrna_rate)}
def _detect_rRNA(data): sample = dd.get_sample_name(data) gtf_file = dd.get_gtf_file(data) tidy_file = dd.get_sailfish_tidy(data) rrna_features = gtf.get_rRNA(gtf_file) transcripts = set([x[1] for x in rrna_features if x]) if not transcripts: return {'rRNA': "NA", "rRNA_rate": "NA"} count_table = pd.read_csv(tidy_file, sep="\t") sample_table = count_table[count_table["sample"].isin([sample])] rrna_exp = map( float, sample_table[sample_table["id"].isin(transcripts)]["numreads"]) total_exp = map(float, sample_table["numreads"]) rrna = sum(rrna_exp) rrna_rate = float(rrna) / sum(total_exp) return {'rRNA': str(rrna), 'rRNA_rate': str(rrna_rate)}
def _get_files_project(sample, upload_config): """Retrieve output files associated with an entire analysis project. """ out = [{"path": sample["provenance"]["programs"]}] if os.path.exists(tz.get_in(["provenance", "data"], sample) or ""): out.append({"path": sample["provenance"]["data"]}) for fname in ["bcbio-nextgen.log", "bcbio-nextgen-commands.log"]: if os.path.exists(os.path.join(log.get_log_dir(sample["config"]), fname)): out.append({"path": os.path.join(log.get_log_dir(sample["config"]), fname), "type": "external_command_log", "ext": ""}) if "summary" in sample and sample["summary"].get("project"): out.append({"path": sample["summary"]["project"]}) mixup_check = tz.get_in(["summary", "mixup_check"], sample) if mixup_check: out.append({"path": sample["summary"]["mixup_check"], "type": "directory", "ext": "mixup_check"}) report = os.path.join(dd.get_work_dir(sample), "report") if utils.file_exists(report): out.append({"path": report, "type": "directory", "ext": "report"}) multiqc = tz.get_in(["summary", "multiqc"], sample) if multiqc: out.extend(_flatten_file_with_secondary(multiqc, "multiqc")) if sample.get("seqcluster", None): out.append({"path": sample["seqcluster"], "type": "directory", "ext": "seqcluster"}) if sample.get("report", None): out.append({"path": os.path.dirname(sample["report"]), "type": "directory", "ext": "seqclusterViz"}) for x in sample.get("variants", []): if "pop_db" in x: out.append({"path": x["pop_db"], "type": "sqlite", "variantcaller": x["variantcaller"]}) for x in sample.get("variants", []): if "population" in x: pop_db = tz.get_in(["population", "db"], x) if pop_db: out.append({"path": pop_db, "type": "sqlite", "variantcaller": x["variantcaller"]}) out.extend(_get_variant_file(x, ("population", "vcf"))) for x in sample.get("variants", []): if x.get("validate") and x["validate"].get("grading_summary"): out.append({"path": x["validate"]["grading_summary"]}) break if "coverage" in sample: cov_db = tz.get_in(["coverage", "summary"], sample) if cov_db: out.append({"path": cov_db, "type": "sqlite", "ext": "coverage"}) all_coverage = tz.get_in(["coverage", "all"], sample) if all_coverage: out.append({"path": all_coverage, "type": "bed", "ext": "coverage"}) if dd.get_mirna_counts(sample): out.append({"path": dd.get_mirna_counts(sample)}) if dd.get_isomir_counts(sample): out.append({"path": dd.get_isomir_counts(sample)}) if dd.get_novel_mirna_counts(sample): out.append({"path": dd.get_novel_mirna_counts(sample)}) if dd.get_novel_isomir_counts(sample): out.append({"path": dd.get_novel_isomir_counts(sample)}) if dd.get_combined_counts(sample): out.append({"path": dd.get_combined_counts(sample)}) if dd.get_annotated_combined_counts(sample): out.append({"path": dd.get_annotated_combined_counts(sample)}) if dd.get_combined_fpkm(sample): out.append({"path": dd.get_combined_fpkm(sample)}) if dd.get_combined_fpkm_isoform(sample): out.append({"path": dd.get_combined_fpkm_isoform(sample)}) if dd.get_transcript_assembler(sample): out.append({"path": dd.get_merged_gtf(sample)}) if dd.get_dexseq_counts(sample): out.append({"path": dd.get_dexseq_counts(sample)}) if dd.get_express_counts(sample): out.append({"path": dd.get_express_counts(sample)}) if dd.get_express_fpkm(sample): out.append({"path": dd.get_express_fpkm(sample)}) if dd.get_express_tpm(sample): out.append({"path": dd.get_express_tpm(sample)}) if dd.get_isoform_to_gene(sample): out.append({"path": dd.get_isoform_to_gene(sample)}) if dd.get_square_vcf(sample): out.append({"path": dd.get_square_vcf(sample)}) if dd.get_sailfish_tidy(sample): out.append({"path": dd.get_sailfish_tidy(sample)}) if dd.get_sailfish_transcript_tpm(sample): out.append({"path": dd.get_sailfish_transcript_tpm(sample)}) if dd.get_sailfish_gene_tpm(sample): out.append({"path": dd.get_sailfish_gene_tpm(sample)}) if dd.get_tx2gene(sample): out.append({"path": dd.get_tx2gene(sample)}) return _add_meta(out, config=upload_config)
def _get_files_project(sample, upload_config): """Retrieve output files associated with an entire analysis project. """ out = [{"path": sample["provenance"]["programs"]}] if os.path.exists(tz.get_in(["provenance", "data"], sample) or ""): out.append({"path": sample["provenance"]["data"]}) for fname in ["bcbio-nextgen.log", "bcbio-nextgen-commands.log"]: if os.path.exists(os.path.join(log.get_log_dir(sample["config"]), fname)): out.append({"path": os.path.join(log.get_log_dir(sample["config"]), fname), "type": "external_command_log", "ext": ""}) if "summary" in sample and sample["summary"].get("project"): out.append({"path": sample["summary"]["project"]}) mixup_check = tz.get_in(["summary", "mixup_check"], sample) if mixup_check: out.append({"path": sample["summary"]["mixup_check"], "type": "directory", "ext": "mixup_check"}) report = os.path.join(dd.get_work_dir(sample), "report") if utils.file_exists(report): out.append({"path": report, "type": "directory", "ext": "report"}) multiqc = tz.get_in(["summary", "multiqc"], sample) if multiqc: out.extend(_flatten_file_with_secondary(multiqc, "multiqc")) if sample.get("seqcluster", {}): out.append({"path": sample["seqcluster"].get("out_dir"), "type": "directory", "ext": "seqcluster"}) if sample.get("report", None): out.append({"path": os.path.dirname(sample["report"]), "type": "directory", "ext": "seqclusterViz"}) for x in sample.get("variants", []): if "pop_db" in x: out.append({"path": x["pop_db"], "type": "sqlite", "variantcaller": x["variantcaller"]}) for x in sample.get("variants", []): if "population" in x: pop_db = tz.get_in(["population", "db"], x) if pop_db: out.append({"path": pop_db, "type": "sqlite", "variantcaller": x["variantcaller"]}) suffix = "-annotated-decomposed" if tz.get_in(("population", "decomposed"), x) else "-annotated" out.extend([_add_batch(x, sample) for x in _get_variant_file(x, ("population", "vcf"), suffix=suffix)]) for x in sample.get("variants", []): if x.get("validate") and x["validate"].get("grading_summary"): out.append({"path": x["validate"]["grading_summary"]}) break if "coverage" in sample: cov_db = tz.get_in(["coverage", "summary"], sample) if cov_db: out.append({"path": cov_db, "type": "sqlite", "ext": "coverage"}) all_coverage = tz.get_in(["coverage", "all"], sample) if all_coverage: out.append({"path": all_coverage, "type": "bed", "ext": "coverage"}) if dd.get_mirna_counts(sample): out.append({"path": dd.get_mirna_counts(sample)}) if dd.get_isomir_counts(sample): out.append({"path": dd.get_isomir_counts(sample)}) if dd.get_novel_mirna_counts(sample): out.append({"path": dd.get_novel_mirna_counts(sample)}) if dd.get_novel_isomir_counts(sample): out.append({"path": dd.get_novel_isomir_counts(sample)}) if dd.get_combined_counts(sample): count_file = dd.get_combined_counts(sample) if sample["analysis"].lower() == "scrna-seq": out.append({"path": count_file, "type": "mtx"}) out.append({"path": count_file + ".rownames", "type": "rownames"}) out.append({"path": count_file + ".colnames", "type": "colnames"}) else: out.append({"path": dd.get_combined_counts(sample)}) if dd.get_annotated_combined_counts(sample): out.append({"path": dd.get_annotated_combined_counts(sample)}) if dd.get_combined_fpkm(sample): out.append({"path": dd.get_combined_fpkm(sample)}) if dd.get_combined_fpkm_isoform(sample): out.append({"path": dd.get_combined_fpkm_isoform(sample)}) if dd.get_transcript_assembler(sample): out.append({"path": dd.get_merged_gtf(sample)}) if dd.get_dexseq_counts(sample): out.append({"path": dd.get_dexseq_counts(sample)}) if dd.get_express_counts(sample): out.append({"path": dd.get_express_counts(sample)}) if dd.get_express_fpkm(sample): out.append({"path": dd.get_express_fpkm(sample)}) if dd.get_express_tpm(sample): out.append({"path": dd.get_express_tpm(sample)}) if dd.get_isoform_to_gene(sample): out.append({"path": dd.get_isoform_to_gene(sample)}) if dd.get_square_vcf(sample): out.append({"path": dd.get_square_vcf(sample)}) if dd.get_sailfish_tidy(sample): out.append({"path": dd.get_sailfish_tidy(sample)}) if dd.get_sailfish_transcript_tpm(sample): out.append({"path": dd.get_sailfish_transcript_tpm(sample)}) if dd.get_sailfish_gene_tpm(sample): out.append({"path": dd.get_sailfish_gene_tpm(sample)}) if dd.get_tx2gene(sample): out.append({"path": dd.get_tx2gene(sample)}) if dd.get_spikein_counts(sample): out.append({"path": dd.get_spikein_counts(sample)}) return _add_meta(out, config=upload_config)
def _get_files_project(sample, upload_config): """Retrieve output files associated with an entire analysis project. """ out = [{"path": sample["provenance"]["programs"]}] for fname in ["bcbio-nextgen.log", "bcbio-nextgen-commands.log"]: if os.path.exists(os.path.join(log.get_log_dir(sample["config"]), fname)): out.append({"path": os.path.join(log.get_log_dir(sample["config"]), fname), "type": "external_command_log", "ext": ""}) if "summary" in sample and sample["summary"].get("project"): out.append({"path": sample["summary"]["project"]}) mixup_check = tz.get_in(["summary", "mixup_check"], sample) if mixup_check: out.append({"path": sample["summary"]["mixup_check"], "type": "directory", "ext": "mixup_check"}) report = os.path.join(dd.get_work_dir(sample), "report") if utils.file_exists(report): out.append({"path": report, "type": "directory", "ext": "report"}) if sample.get("seqcluster", None): out.append({"path": sample["seqcluster"], "type": "directory", "ext": "seqcluster"}) for x in sample.get("variants", []): if "pop_db" in x: out.append({"path": x["pop_db"], "type": "sqlite", "variantcaller": x["variantcaller"]}) for x in sample.get("variants", []): if "population" in x: pop_db = tz.get_in(["population", "db"], x) if pop_db: out.append({"path": pop_db, "type": "sqlite", "variantcaller": x["variantcaller"]}) out.extend(_get_variant_file(x, ("population", "vcf"))) for x in sample.get("variants", []): if x.get("validate") and x["validate"].get("grading_summary"): out.append({"path": x["validate"]["grading_summary"]}) break if "coverage" in sample: cov_db = tz.get_in(["coverage", "summary"], sample) if cov_db: out.append({"path": cov_db, "type": "sqlite", "ext": "coverage"}) all_coverage = tz.get_in(["coverage", "all"], sample) if all_coverage: out.append({"path": all_coverage, "type": "bed", "ext": "coverage"}) if dd.get_mirna_counts(sample): out.append({"path": dd.get_mirna_counts(sample)}) if dd.get_isomir_counts(sample): out.append({"path": dd.get_isomir_counts(sample)}) if dd.get_combined_counts(sample): out.append({"path": dd.get_combined_counts(sample)}) if dd.get_annotated_combined_counts(sample): out.append({"path": dd.get_annotated_combined_counts(sample)}) if dd.get_combined_fpkm(sample): out.append({"path": dd.get_combined_fpkm(sample)}) if dd.get_combined_fpkm_isoform(sample): out.append({"path": dd.get_combined_fpkm_isoform(sample)}) if dd.get_assembled_gtf(sample): out.append({"path": dd.get_assembled_gtf(sample)}) if dd.get_dexseq_counts(sample): out.append({"path": dd.get_dexseq_counts(sample)}) if dd.get_express_counts(sample): out.append({"path": dd.get_express_counts(sample)}) if dd.get_express_fpkm(sample): out.append({"path": dd.get_express_fpkm(sample)}) if dd.get_express_tpm(sample): out.append({"path": dd.get_express_tpm(sample)}) if dd.get_isoform_to_gene(sample): out.append({"path": dd.get_isoform_to_gene(sample)}) if dd.get_square_vcf(sample): out.append({"path": dd.get_square_vcf(sample)}) if dd.get_sailfish_tidy(sample): out.append({"path": dd.get_sailfish_tidy(sample)}) if dd.get_sailfish_transcript_tpm(sample): out.append({"path": dd.get_sailfish_transcript_tpm(sample)}) if dd.get_sailfish_gene_tpm(sample): out.append({"path": dd.get_sailfish_gene_tpm(sample)}) return _add_meta(out, config=upload_config)