Ejemplo n.º 1
0
 def test_merge(self):
     EXPECTED_VALUES = {
         "n_reads": 300,
         "n_zmws": 60,
     }
     NAMES = {
         "n_reads": "Number of reads",
         "n_zmws": "Number of ZMWs"
     }
     chunks = [
         Report("pbcommand_test",
             attributes=[
                 Attribute(id_="n_reads", value=50, name="Number of reads"),
                 Attribute(id_="n_zmws", value=10, name="Number of ZMWs")],
             dataset_uuids=["12345"]),
         Report("pbcommand_test",
             attributes=[
                 Attribute(id_="n_reads", value=250, name="Number of reads"),
                 Attribute(id_="n_zmws", value=50, name="Number of ZMWs")]),
     ]
     r = Report.merge(chunks)
     self.assertEqual([a.id for a in r.attributes], ["n_reads", "n_zmws"])
     self.assertEqual(r._dataset_uuids, ["12345"])
     for attr in r.attributes:
         self.assertEqual(attr.value, EXPECTED_VALUES[attr.id])
         self.assertEqual(attr.name, NAMES[attr.id])
     for table in r.tables:
         for column in table.columns:
             self.assertEqual(column.header, NAMES[column.id])
Ejemplo n.º 2
0
 def test_merge(self):
     EXPECTED_VALUES = {
         "n_reads": 300,
         "n_zmws": 60,
     }
     NAMES = {"n_reads": "Number of reads", "n_zmws": "Number of ZMWs"}
     chunks = [
         Report("pbcommand_test",
                attributes=[
                    Attribute(id_="n_reads",
                              value=50,
                              name="Number of reads"),
                    Attribute(id_="n_zmws", value=10, name="Number of ZMWs")
                ],
                dataset_uuids=["12345"]),
         Report("pbcommand_test",
                attributes=[
                    Attribute(id_="n_reads",
                              value=250,
                              name="Number of reads"),
                    Attribute(id_="n_zmws", value=50, name="Number of ZMWs")
                ]),
     ]
     r = Report.merge(chunks)
     self.assertEqual([a.id for a in r.attributes], ["n_reads", "n_zmws"])
     self.assertEqual(r._dataset_uuids, ["12345"])
     for attr in r.attributes:
         self.assertEqual(attr.value, EXPECTED_VALUES[attr.id])
         self.assertEqual(attr.name, NAMES[attr.id])
     for table in r.tables:
         for column in table.columns:
             self.assertEqual(column.header, NAMES[column.id])
Ejemplo n.º 3
0
    def test_merge_tables(self):
        names = ['laa_report1.json', 'laa_report2.json']
        r = Report.merge([_to_report(names[0]), _to_report(names[1])])
        table = r.tables[0]
        self.assertEqual(len(table.columns), 7)
        self.assertEqual([col.header for col in table.columns], [
            'BarcodeName', 'FastaName', 'CoarseCluster', 'Phase',
            'TotalCoverage', 'SequenceLength', 'PredictedAccuracy'
        ])
        for col in table.columns:
            self.assertEqual(len(col.values), 4)
            if col.header == 'BarcodeName':
                self.assertEqual(
                    col.values,
                    ['Barcode1', 'Barcode2', 'Barcode4', 'Barcode3'])
            elif col.header == 'FastaName':
                self.assertEqual(col.values, [
                    'BarcodeFasta1', 'BarcodeFasta2', 'BarcodeFasta4',
                    'BarcodeFasta3'
                ])
            else:
                self.assertEqual(col.values, [1, 2, 4, 3])

        column_list_d = table.to_columns_d()
        self.assertEqual(len(column_list_d), 4)
Ejemplo n.º 4
0
    def test_merge_tables(self):
        names = ['laa_report1.json', 'laa_report2.json']
        r = Report.merge([_to_report(names[0]), _to_report(names[1])])
        table = r.tables[0]
        assert len(table.columns) == 7
        assert [col.header for col in table.columns] == [
            'BarcodeName', 'FastaName', 'CoarseCluster', 'Phase',
            'TotalCoverage', 'SequenceLength', 'PredictedAccuracy'
        ]
        for col in table.columns:
            assert len(col.values) == 4
            if col.header == 'BarcodeName':
                assert col.values == [
                    'Barcode1', 'Barcode2', 'Barcode4', 'Barcode3'
                ]
            elif col.header == 'FastaName':
                assert col.values == [
                    'BarcodeFasta1', 'BarcodeFasta2', 'BarcodeFasta4',
                    'BarcodeFasta3'
                ]
            else:
                assert col.values == [1, 2, 4, 3]

        column_list_d = table.to_columns_d()
        assert len(column_list_d) == 4
Ejemplo n.º 5
0
 def test_merge_tables(self):
     names = ["laa_report1.json", "laa_report2.json"]
     r = Report.merge([_to_report(names[0]), _to_report(names[1])])
     table = r.tables[0]
     self.assertEqual(len(table.columns), 7)
     self.assertEqual(
         [col.header for col in table.columns],
         [
             "BarcodeName",
             "FastaName",
             "CoarseCluster",
             "Phase",
             "TotalCoverage",
             "SequenceLength",
             "PredictedAccuracy",
         ],
     )
     for col in table.columns:
         self.assertEqual(len(col.values), 4)
         if col.header == "BarcodeName":
             self.assertEqual(col.values, ["Barcode1", "Barcode2", "Barcode4", "Barcode3"])
         elif col.header == "FastaName":
             self.assertEqual(col.values, ["BarcodeFasta1", "BarcodeFasta2", "BarcodeFasta4", "BarcodeFasta3"])
         else:
             self.assertEqual(col.values, [1, 2, 4, 3])
Ejemplo n.º 6
0
def gather_report(json_files, output_file):
    """
    Combines statistics (usually raw counts) stored as JSON files.
    Data models: pbcommand.models.report
    """
    reports = [ load_report_from_json(fn) for fn in json_files ]
    merged = Report.merge(reports)
    with open(output_file, "w") as writer:
        writer.write(merged.to_json())
    return output_file
Ejemplo n.º 7
0
def gather_report(json_files, output_file):
    """
    Combines statistics (usually raw counts) stored as JSON files.
    Data models: pbcommand.models.report
    """
    reports = [load_report_from_json(fn) for fn in json_files]
    merged = Report.merge(reports)
    with open(output_file, "w") as writer:
        writer.write(merged.to_json())
    return output_file
Ejemplo n.º 8
0
 def test_merge(self):
     r = Report.merge([
         Report.from_simple_dict("pbcommand_test",
                                 {"n_reads": 50, "n_zmws": 10},
                                 "pbcommand"),
         Report.from_simple_dict("pbcommand_test",
                                 {"n_reads": 250, "n_zmws": 50},
                                 "pbcommand")])
     attr = {a.id: a.value for a in r.attributes}
     self.assertEqual(attr['pbcommand_n_reads'], 300)
     self.assertEqual(attr['pbcommand_n_zmws'], 60)
Ejemplo n.º 9
0
def gather_report(json_files, output_file, dataset_xml=None):
    """
    Combines statistics (usually raw counts) stored as JSON files.
    Data models: pbcommand.models.report
    """
    reports = [load_report_from_json(fn) for fn in json_files]
    merged = Report.merge(reports)
    if dataset_xml is not None:
        ds_md = get_dataset_metadata(dataset_xml)
        merged._dataset_uuids = [ds_md.uuid]
    with open(output_file, "w") as writer:
        writer.write(merged.to_json())
    return output_file
Ejemplo n.º 10
0
 def test_merge(self):
     EXPECTED_VALUES = {
         "n_reads": 300,
         "n_zmws": 60,
         "sample": "Person1,Person2"
     }
     NAMES = {
         "n_reads": "Number of reads",
         "n_zmws": "Number of ZMWs",
         "sample": "Sample"
     }
     chunks = [
         Report("pbcommand_test",
                attributes=[
                    Attribute(id_="n_reads",
                              value=50,
                              name="Number of reads"),
                    Attribute(id_="n_zmws", value=10,
                              name="Number of ZMWs"),
                    Attribute(id_="sample", value="Person1", name="Sample")
                ],
                dataset_uuids=["12345"]),
         Report("pbcommand_test",
                attributes=[
                    Attribute(id_="n_reads",
                              value=250,
                              name="Number of reads"),
                    Attribute(id_="n_zmws", value=50,
                              name="Number of ZMWs"),
                    Attribute(id_="sample", value="Person2", name="Sample")
                ]),
     ]
     r = Report.merge(chunks)
     assert [a.id for a in r.attributes] == ["n_reads", "n_zmws", "sample"]
     assert r._dataset_uuids == ["12345"]
     for attr in r.attributes:
         assert attr.value == EXPECTED_VALUES[attr.id]
         assert attr.name == NAMES[attr.id]
     for table in r.tables:
         for column in table.columns:
             assert column.header == NAMES[column.id]
Ejemplo n.º 11
0
 def test_merge_tables(self):
     names = ['laa_report1.json', 'laa_report2.json']
     r = Report.merge([_to_report(names[0]), _to_report(names[1])])
     table = r.tables[0]
     self.assertEqual(len(table.columns), 7)
     self.assertEqual(
         [col.header for col in table.columns],
         ['BarcodeName', 'FastaName', 'CoarseCluster', 'Phase',
          'TotalCoverage', 'SequenceLength', 'PredictedAccuracy'])
     for col in table.columns:
         self.assertEqual(len(col.values), 4)
         if col.header == 'BarcodeName':
             self.assertEqual(
                 col.values,
                 ['Barcode1', 'Barcode2', 'Barcode4', 'Barcode3'])
         elif col.header == 'FastaName':
             self.assertEqual(
                 col.values,
                 ['BarcodeFasta1', 'BarcodeFasta2', 'BarcodeFasta4',
                  'BarcodeFasta3'])
         else:
             self.assertEqual(col.values, [1, 2, 4, 3])
def gather_report(json_files, output_file):
    """
    Combines statistics (usually raw counts) stored as JSON files.
    Data models: pbcommand.models.report
    """
    reports = [ load_report_from_json(fn) for fn in json_files ]
    merged = Report.merge(reports)
    total_num_flnc_bases = 0
    total_num_flnc = 0
    for r in reports:
        attrs = {a.id:a.value for a in r.attributes}
        num_flnc = attrs["num_flnc"]
        num_flnc_bases = attrs["num_flnc_bases"]
        total_num_flnc += num_flnc
        total_num_flnc_bases += num_flnc_bases
    if total_num_flnc > 0:
        for a in merged.attributes:
            if a.id == "avg_flnc_len":
                # mimicking pbtranscript.io.Summary
                a._value = int(total_num_flnc_bases / total_num_flnc)
                log.info("Setting avg_flnc_len = {v}".format(v=a.value))
    with open(output_file, "w") as writer:
        writer.write(merged.to_json())
    return output_file
def gather_report(json_files, output_file):
    """
    Combines statistics (usually raw counts) stored as JSON files.
    Data models: pbcommand.models.report
    """
    reports = [load_report_from_json(fn) for fn in json_files]
    merged = Report.merge(reports)
    total_num_flnc_bases = 0
    total_num_flnc = 0
    for r in reports:
        attrs = {a.id: a.value for a in r.attributes}
        num_flnc = attrs["num_flnc"]
        num_flnc_bases = attrs["num_flnc_bases"]
        total_num_flnc += num_flnc
        total_num_flnc_bases += num_flnc_bases
    if total_num_flnc > 0:
        for a in merged.attributes:
            if a.id == "avg_flnc_len":
                # mimicking pbtranscript.io.Summary
                a._value = int(total_num_flnc_bases / total_num_flnc)
                log.info("Setting avg_flnc_len = {v}".format(v=a.value))
    with open(output_file, "w") as writer:
        writer.write(merged.to_json())
    return output_file