def test_named_QCMetricRecord_repr(obj_a1, obj_b): metrics = [obj_a1, obj_b] record = QCMetricRecord(metrics, name="dis_my_name") assert ( record.__repr__() == "QCMetricRecord([QCMetric('a', OrderedDict([(1, 2)])), QCMetric('b', OrderedDict([(3, 4)]))], name='dis_my_name')" )
def main(args): remove_bam_from_end_re = re.compile("\.bam$") bam_root = remove_bam_from_end_re.sub("", os.path.basename(args.anno_bam)) with tarfile.open(args.rsem_index, "r:gz") as archive: archive.extractall(".", members=make_modified_TarInfo( archive, "rsem_index")) rsem_call = shlex.split( RSEM_COMMAND.format( rnd_seed=args.rnd_seed, ncpus=args.ncpus, ramGB=args.ramGB, fwd_prob=strand_to_fwd_prob(args.read_strand), paired_end=format_endedness(args.endedness), anno_bam=args.anno_bam, bam_root=bam_root, )) logger.info("Running RSEM command %s", " ".join(rsem_call)) subprocess.call(rsem_call) gene_quant_fn = str(bam_root) + "_rsem.genes.results" number_of_genes_detected = calculate_number_of_genes_detected( gene_quant_fn) number_of_genes_detected_dict = { "number_of_genes_detected": number_of_genes_detected } qc_record = QCMetricRecord() number_of_genes_QC = QCMetric("number_of_genes_detected", number_of_genes_detected_dict) qc_record.add(number_of_genes_QC) with open(str(bam_root) + "_number_of_genes_detected.json", "w") as f: json.dump(qc_record.to_ordered_dict(), f)
def test_QCMetricRecord_repr(obj_a1, obj_b): metrics = [obj_a1, obj_b] record = QCMetricRecord(metrics) assert ( record.__repr__() == "QCMetricRecord([QCMetric('a', OrderedDict([(1, 2)])), QCMetric('b', OrderedDict([(3, 4)]))])" )
def main(args): quant1 = pd.read_csv(args.quants[0], sep="\t", header=None, skiprows=4) quant2 = pd.read_csv(args.quants[1], sep="\t", header=None, skiprows=4) spearman_correlation = quant1[1].corr(quant2[1], method="spearman") qc_record = QCMetricRecord() spearman_metric = QCMetric("spearman_correlation", {"spearman_correlation": spearman_correlation}) qc_record.add(spearman_metric) with open(args.output_filename, "w") as fp: json.dump(qc_record.to_ordered_dict(), fp)
def main(args): qc_record = QCMetricRecord() logger.info( "Reading transcript id to gene type mapping from %s", args.tr_id_to_gene_type_tsv, ) tr_to_gene_type_map = read_dict_from_tsv(args.tr_id_to_gene_type_tsv) logger.info("Calculating gene type counts for bam %s", args.input_bam) gene_type_counts = get_gene_type_counts(tr_to_gene_type_map, args.input_bam) gene_type_counts = QCMetric("gene_type_count", gene_type_counts) qc_record.add(gene_type_counts) logger.info("Writing QC output into %s", args.output_filename) with open(args.output_filename, "wt") as fp: json.dump(qc_record.to_ordered_dict(), fp)
def main(args): abundance = pd.read_csv(args.abundance, sep="\t") abundance_filtered = filter_startswith_prefix( remove_genomic_transcripts(abundance), args.idprefix) gene_counts = calculate_abundances_aggregated_by_gene( abundance_filtered, args.counts_colname) number_of_genes_detected = sum(gene_counts >= 1) number_of_genes_record = QCMetricRecord() number_of_genes_metric = QCMetric( "number_of_genes_detected", {"number_of_genes_detected": number_of_genes_detected}, ) number_of_genes_record.add(number_of_genes_metric) with open(args.outfile, "w") as fp: json.dump(number_of_genes_record.to_ordered_dict(), fp)
def main(args): rep1_abundance = pd.read_csv(args.rep1_abundance, sep="\t") rep2_abundance = pd.read_csv(args.rep2_abundance, sep="\t") rep1_filtered = filter_startswith_prefix( remove_genomic_transcripts(rep1_abundance), args.rep1_idprefix) rep2_filtered = filter_startswith_prefix( remove_genomic_transcripts(rep2_abundance), args.rep2_idprefix) del rep1_abundance del rep2_abundance rep1_counts = calculate_abundances_aggregated_by_gene( rep1_filtered, rep1_filtered.columns[-1]) rep2_counts = calculate_abundances_aggregated_by_gene( rep2_filtered, rep2_filtered.columns[-1]) del rep1_filtered del rep2_filtered aligned_counts = rep1_counts.align(rep2_counts, join="outer", fill_value=0) spearman = aligned_counts[0].corr(aligned_counts[1], method="spearman") correlation_qc = QCMetric("replicates_correlation", {"spearman_correlation": spearman}) spearman_record = QCMetricRecord([correlation_qc]) with open(args.outfile, "w") as fp: json.dump(spearman_record.to_ordered_dict(), fp)
def main(args): logger.info("Reading input tsv: %s" % args.quants) quants_tsv = pd.read_csv(args.quants, sep="\t", header=None, skiprows=4) # calculate number of mirnas expressed at cpm>2 per_million = quants_tsv[1].sum() / 1000000 quants_tsv["cpm"] = quants_tsv[1] / per_million cpm_gte2 = sum(quants_tsv["cpm"] >= 2) star_qc_record = QCMetricRecord() cpm_metric = QCMetric("expressed_mirnas", {"expressed_mirnas": cpm_gte2}) # get metrics from star log star_qc = QCMetric("star_qc_metric", args.star_log, parse_starlog) star_qc_record.add_all([cpm_metric, star_qc]) # calculate number of reads (unique + multimapping) reads_mapped = int(star_qc.content["Uniquely mapped reads number"]) + int( star_qc.content["Number of reads mapped to multiple loci"] ) reads_mapped_qc = QCMetric("aligned_reads", {"aligned_reads": reads_mapped}) star_qc_record.add(reads_mapped_qc) logger.info("Writing output json %s" % args.output_filename) with open(args.output_filename, "w") as fp: json.dump(star_qc_record.to_ordered_dict(), fp)
def qc_record(): return QCMetricRecord()
def test_QCMetricRecord_getname(): named_record = QCMetricRecord(name="dis_my_name") assert named_record.name == "dis_my_name"
def test_add_all_failure_because_not_unique(obj_a1, obj_a2, obj_b): record = QCMetricRecord([obj_a1]) with pytest.raises(AssertionError): record.add_all([obj_b, obj_a2]) assert len(record) == 1
def test_add_all_to_nonempty_success(qc_record, obj_a1, obj_b, obj_c, obj_d): metrics = [obj_a1, obj_b] record = QCMetricRecord(metrics) record.add_all([obj_c, obj_d]) assert len(record) == 4
def test_init_from_list_success(obj_a1, obj_b): metrics = [obj_a1, obj_b] record = QCMetricRecord(metrics) assert record.metrics[0] is obj_a1 assert record.metrics[1] is obj_b
def test_init_from_list_not_unique(obj_a1, obj_a2): metrics = [obj_a1, obj_a2] with pytest.raises(AssertionError): QCMetricRecord(metrics)