def test_merge(self): EXPECTED_VALUES = { "n_reads": 300, "n_zmws": 60, } NAMES = { "n_reads": "Number of reads", "n_zmws": "Number of ZMWs" } chunks = [ Report("pbcommand_test", attributes=[ Attribute(id_="n_reads", value=50, name="Number of reads"), Attribute(id_="n_zmws", value=10, name="Number of ZMWs")], dataset_uuids=["12345"]), Report("pbcommand_test", attributes=[ Attribute(id_="n_reads", value=250, name="Number of reads"), Attribute(id_="n_zmws", value=50, name="Number of ZMWs")]), ] r = Report.merge(chunks) self.assertEqual([a.id for a in r.attributes], ["n_reads", "n_zmws"]) self.assertEqual(r._dataset_uuids, ["12345"]) for attr in r.attributes: self.assertEqual(attr.value, EXPECTED_VALUES[attr.id]) self.assertEqual(attr.name, NAMES[attr.id]) for table in r.tables: for column in table.columns: self.assertEqual(column.header, NAMES[column.id])
def test_merge(self): EXPECTED_VALUES = { "n_reads": 300, "n_zmws": 60, } NAMES = {"n_reads": "Number of reads", "n_zmws": "Number of ZMWs"} chunks = [ Report("pbcommand_test", attributes=[ Attribute(id_="n_reads", value=50, name="Number of reads"), Attribute(id_="n_zmws", value=10, name="Number of ZMWs") ], dataset_uuids=["12345"]), Report("pbcommand_test", attributes=[ Attribute(id_="n_reads", value=250, name="Number of reads"), Attribute(id_="n_zmws", value=50, name="Number of ZMWs") ]), ] r = Report.merge(chunks) self.assertEqual([a.id for a in r.attributes], ["n_reads", "n_zmws"]) self.assertEqual(r._dataset_uuids, ["12345"]) for attr in r.attributes: self.assertEqual(attr.value, EXPECTED_VALUES[attr.id]) self.assertEqual(attr.name, NAMES[attr.id]) for table in r.tables: for column in table.columns: self.assertEqual(column.header, NAMES[column.id])
def test_merge_tables(self): names = ['laa_report1.json', 'laa_report2.json'] r = Report.merge([_to_report(names[0]), _to_report(names[1])]) table = r.tables[0] self.assertEqual(len(table.columns), 7) self.assertEqual([col.header for col in table.columns], [ 'BarcodeName', 'FastaName', 'CoarseCluster', 'Phase', 'TotalCoverage', 'SequenceLength', 'PredictedAccuracy' ]) for col in table.columns: self.assertEqual(len(col.values), 4) if col.header == 'BarcodeName': self.assertEqual( col.values, ['Barcode1', 'Barcode2', 'Barcode4', 'Barcode3']) elif col.header == 'FastaName': self.assertEqual(col.values, [ 'BarcodeFasta1', 'BarcodeFasta2', 'BarcodeFasta4', 'BarcodeFasta3' ]) else: self.assertEqual(col.values, [1, 2, 4, 3]) column_list_d = table.to_columns_d() self.assertEqual(len(column_list_d), 4)
def test_merge_tables(self): names = ['laa_report1.json', 'laa_report2.json'] r = Report.merge([_to_report(names[0]), _to_report(names[1])]) table = r.tables[0] assert len(table.columns) == 7 assert [col.header for col in table.columns] == [ 'BarcodeName', 'FastaName', 'CoarseCluster', 'Phase', 'TotalCoverage', 'SequenceLength', 'PredictedAccuracy' ] for col in table.columns: assert len(col.values) == 4 if col.header == 'BarcodeName': assert col.values == [ 'Barcode1', 'Barcode2', 'Barcode4', 'Barcode3' ] elif col.header == 'FastaName': assert col.values == [ 'BarcodeFasta1', 'BarcodeFasta2', 'BarcodeFasta4', 'BarcodeFasta3' ] else: assert col.values == [1, 2, 4, 3] column_list_d = table.to_columns_d() assert len(column_list_d) == 4
def test_merge_tables(self): names = ["laa_report1.json", "laa_report2.json"] r = Report.merge([_to_report(names[0]), _to_report(names[1])]) table = r.tables[0] self.assertEqual(len(table.columns), 7) self.assertEqual( [col.header for col in table.columns], [ "BarcodeName", "FastaName", "CoarseCluster", "Phase", "TotalCoverage", "SequenceLength", "PredictedAccuracy", ], ) for col in table.columns: self.assertEqual(len(col.values), 4) if col.header == "BarcodeName": self.assertEqual(col.values, ["Barcode1", "Barcode2", "Barcode4", "Barcode3"]) elif col.header == "FastaName": self.assertEqual(col.values, ["BarcodeFasta1", "BarcodeFasta2", "BarcodeFasta4", "BarcodeFasta3"]) else: self.assertEqual(col.values, [1, 2, 4, 3])
def gather_report(json_files, output_file): """ Combines statistics (usually raw counts) stored as JSON files. Data models: pbcommand.models.report """ reports = [ load_report_from_json(fn) for fn in json_files ] merged = Report.merge(reports) with open(output_file, "w") as writer: writer.write(merged.to_json()) return output_file
def gather_report(json_files, output_file): """ Combines statistics (usually raw counts) stored as JSON files. Data models: pbcommand.models.report """ reports = [load_report_from_json(fn) for fn in json_files] merged = Report.merge(reports) with open(output_file, "w") as writer: writer.write(merged.to_json()) return output_file
def test_merge(self): r = Report.merge([ Report.from_simple_dict("pbcommand_test", {"n_reads": 50, "n_zmws": 10}, "pbcommand"), Report.from_simple_dict("pbcommand_test", {"n_reads": 250, "n_zmws": 50}, "pbcommand")]) attr = {a.id: a.value for a in r.attributes} self.assertEqual(attr['pbcommand_n_reads'], 300) self.assertEqual(attr['pbcommand_n_zmws'], 60)
def gather_report(json_files, output_file, dataset_xml=None): """ Combines statistics (usually raw counts) stored as JSON files. Data models: pbcommand.models.report """ reports = [load_report_from_json(fn) for fn in json_files] merged = Report.merge(reports) if dataset_xml is not None: ds_md = get_dataset_metadata(dataset_xml) merged._dataset_uuids = [ds_md.uuid] with open(output_file, "w") as writer: writer.write(merged.to_json()) return output_file
def test_merge(self): EXPECTED_VALUES = { "n_reads": 300, "n_zmws": 60, "sample": "Person1,Person2" } NAMES = { "n_reads": "Number of reads", "n_zmws": "Number of ZMWs", "sample": "Sample" } chunks = [ Report("pbcommand_test", attributes=[ Attribute(id_="n_reads", value=50, name="Number of reads"), Attribute(id_="n_zmws", value=10, name="Number of ZMWs"), Attribute(id_="sample", value="Person1", name="Sample") ], dataset_uuids=["12345"]), Report("pbcommand_test", attributes=[ Attribute(id_="n_reads", value=250, name="Number of reads"), Attribute(id_="n_zmws", value=50, name="Number of ZMWs"), Attribute(id_="sample", value="Person2", name="Sample") ]), ] r = Report.merge(chunks) assert [a.id for a in r.attributes] == ["n_reads", "n_zmws", "sample"] assert r._dataset_uuids == ["12345"] for attr in r.attributes: assert attr.value == EXPECTED_VALUES[attr.id] assert attr.name == NAMES[attr.id] for table in r.tables: for column in table.columns: assert column.header == NAMES[column.id]
def test_merge_tables(self): names = ['laa_report1.json', 'laa_report2.json'] r = Report.merge([_to_report(names[0]), _to_report(names[1])]) table = r.tables[0] self.assertEqual(len(table.columns), 7) self.assertEqual( [col.header for col in table.columns], ['BarcodeName', 'FastaName', 'CoarseCluster', 'Phase', 'TotalCoverage', 'SequenceLength', 'PredictedAccuracy']) for col in table.columns: self.assertEqual(len(col.values), 4) if col.header == 'BarcodeName': self.assertEqual( col.values, ['Barcode1', 'Barcode2', 'Barcode4', 'Barcode3']) elif col.header == 'FastaName': self.assertEqual( col.values, ['BarcodeFasta1', 'BarcodeFasta2', 'BarcodeFasta4', 'BarcodeFasta3']) else: self.assertEqual(col.values, [1, 2, 4, 3])
def gather_report(json_files, output_file): """ Combines statistics (usually raw counts) stored as JSON files. Data models: pbcommand.models.report """ reports = [ load_report_from_json(fn) for fn in json_files ] merged = Report.merge(reports) total_num_flnc_bases = 0 total_num_flnc = 0 for r in reports: attrs = {a.id:a.value for a in r.attributes} num_flnc = attrs["num_flnc"] num_flnc_bases = attrs["num_flnc_bases"] total_num_flnc += num_flnc total_num_flnc_bases += num_flnc_bases if total_num_flnc > 0: for a in merged.attributes: if a.id == "avg_flnc_len": # mimicking pbtranscript.io.Summary a._value = int(total_num_flnc_bases / total_num_flnc) log.info("Setting avg_flnc_len = {v}".format(v=a.value)) with open(output_file, "w") as writer: writer.write(merged.to_json()) return output_file
def gather_report(json_files, output_file): """ Combines statistics (usually raw counts) stored as JSON files. Data models: pbcommand.models.report """ reports = [load_report_from_json(fn) for fn in json_files] merged = Report.merge(reports) total_num_flnc_bases = 0 total_num_flnc = 0 for r in reports: attrs = {a.id: a.value for a in r.attributes} num_flnc = attrs["num_flnc"] num_flnc_bases = attrs["num_flnc_bases"] total_num_flnc += num_flnc total_num_flnc_bases += num_flnc_bases if total_num_flnc > 0: for a in merged.attributes: if a.id == "avg_flnc_len": # mimicking pbtranscript.io.Summary a._value = int(total_num_flnc_bases / total_num_flnc) log.info("Setting avg_flnc_len = {v}".format(v=a.value)) with open(output_file, "w") as writer: writer.write(merged.to_json()) return output_file