Python DataStore Examples, pbcommand.models.DataStore Python Examples

Example #1

0

Show file

def run_consolidate(dataset_file,
                    output_file,
                    datastore_file,
                    consolidate,
                    n_files,
                    task_id=Constants.TOOL_ID):
    datastore_files = []
    with openDataSet(dataset_file) as ds_in:
        if consolidate:
            if len(ds_in.toExternalFiles()) != 1:
                new_resource_file = op.splitext(output_file)[0] + ".bam"
                ds_in.consolidate(new_resource_file, numFiles=n_files)
            # always display the BAM/BAI if consolidation is enabled
            # XXX there is no uniqueness constraint on the sourceId, but this
            # seems sloppy nonetheless - unfortunately I don't know how else to
            # make view rule whitelisting work
            for ext_res in ds_in.externalResources:
                if ext_res.resourceId.endswith(".bam"):
                    ds_file = DataStoreFile(ext_res.uniqueId,
                                            task_id + "-out-2",
                                            ext_res.metaType, ext_res.bam)
                    datastore_files.append(ds_file)
                    for index in ext_res.indices:
                        if index.metaType in Constants.BAI_FILE_TYPES:
                            ds_file = DataStoreFile(index.uniqueId,
                                                    task_id + "-out-3",
                                                    index.metaType,
                                                    index.resourceId)
                            datastore_files.append(ds_file)
        ds_in.newUuid()
        ds_in.write(output_file)
    datastore = DataStore(datastore_files)
    datastore.write_json(datastore_file)
    return 0

Example #2

0

Show file

def _make_datastore(subreads):
    files = [
        DataStoreFile(uuid.uuid4(), "barcoding.tasks.lima-out-0",
                      FileTypes.DS_SUBREADS.file_type_id, subreads)
    ]
    ds = DataStore(files)
    ds_path = tempfile.NamedTemporaryFile(suffix=".datastore.json").name
    ds.write_json(ds_path)
    return ds_path

Example #3

0

Show file

def dataset_to_datastore(dataset_file,
                         datastore_file,
                         source_id="dataset_to_datastore"):
    """Copied from pbcoretools.tasks.barcoding"""
    # FIXME: replace barcoding
    dsmd = get_dataset_metadata(dataset_file)
    ds_file = DataStoreFile(dsmd.uuid, source_id, dsmd.metatype, dataset_file)
    ds_out = DataStore([ds_file])
    ds_out.write_json(datastore_file)
    return 0

Example #4

0

Show file

File: consolidate_transcripts.py Project: MShaffar19/pbcoretools

def run_consolidate(dataset_file,
                    output_file,
                    datastore_file,
                    consolidate,
                    n_files,
                    consolidate_f=lambda ds: ds.consolidate):
    # XXX https://github.com/pysam-developers/pysam/issues/939
    pysam.set_verbosity(0)  # pylint: disable=no-member
    datastore_files = []
    with openDataSet(dataset_file) as ds_in:
        if consolidate:
            if len(ds_in.toExternalFiles()) <= 0:
                raise ValueError(
                    "DataSet {} must contain one or more files!".format(
                        dataset_file))
            new_resource_file = bam_of_dataset(output_file)
            consolidate_f(ds_in)(new_resource_file,
                                 numFiles=n_files,
                                 useTmp=False)
            # always display the BAM/BAI if consolidation is enabled
            # XXX there is no uniqueness constraint on the sourceId, but this
            # seems sloppy nonetheless - unfortunately I don't know how else to
            # make view rule whitelisting work
            reads_name = get_reads_name(ds_in)
            for ext_res in ds_in.externalResources:
                if ext_res.resourceId.endswith(".bam"):
                    ds_file = DataStoreFile(ext_res.uniqueId,
                                            Constants.TOOL_ID + "-out-2",
                                            ext_res.metaType,
                                            ext_res.bam,
                                            name=reads_name,
                                            description=reads_name)
                    datastore_files.append(ds_file)
                    # Prevent duplicated index files being added to datastore, since consolidated
                    # dataset may contain multiple indices pointing to the same physical file
                    added_resources = set()
                    for index in ext_res.indices:
                        if (index.metaType in Constants.BAI_FILE_TYPES
                                and index.resourceId not in added_resources):
                            added_resources.add(index.resourceId)
                            ds_file = DataStoreFile(
                                index.uniqueId,
                                Constants.TOOL_ID + "-out-3",
                                index.metaType,
                                index.resourceId,
                                name="Index of {}".format(reads_name.lower()),
                                description="Index of {}".format(
                                    reads_name.lower()))
                            datastore_files.append(ds_file)
        ds_in.newUuid()
        ds_in.write(output_file)
    datastore = DataStore(datastore_files)
    datastore.write_json(datastore_file)
    return 0

Example #5

0

Show file

 def test_datastore_paths(self):
     tmpfile = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     base_dir = os.path.dirname(tmpfile)
     tmp_ds = os.path.join(base_dir, "datastore.json")
     dsf = DataStoreFile(str(uuid.uuid4()), "pbcommand.tasks.dev_task",
                         FileTypes.DS_SUBREADS.file_type_id,
                         os.path.basename(tmpfile), False, "Subreads",
                         "Subread DataSet XML")
     ds = DataStore([dsf])
     ds.write_json(tmp_ds)
     with open(tmp_ds) as json_in:
         d = json.loads(json_in.read())
         self.assertFalse(os.path.isabs(d['files'][0]['path']))
     ds = DataStore.load_from_json(tmp_ds)
     self.assertEqual(ds.files.values()[0].path, tmpfile)

Example #6

0

Show file

def datastore_to_datastorefile_objs(in_datastore_json,
                                    allowed_types=ALLOWED_TYPES):
    """Return (datastorefile_objs, type_id, cls, ext)
    datastorefile_objs -- a list of DataStoreFile objects.
    type_id -- id
    cls -- e.g., SubreadSet
    ext -- e.g., subreadset.xml
    """
    datastore = DataStore.load_from_json(in_datastore_json)
    allowed_type_ids = [t.file_type_id for t in allowed_types]
    # Is input datastore empty?
    if len(datastore.files) == 0:
        raise ValueError(
            "Expected one or more dataset files in datastore {}".format(
                in_datastore_json))

    # Do all files share the same type?
    observed_type_ids = list(
        set([f.file_type_id for f in datastore.files.values()]))
    if len(observed_type_ids) != 1:
        raise ValueError(
            "Could not handle datastore of mixed types: {}!".format(
                observed_type_ids))

    # Is it an allowed file type?
    type_id = observed_type_ids[0]
    if not type_id in allowed_type_ids:
        raise ValueError(
            "Could not handle {} dataset in datastore file {}, only support {}!"
            .format(type_id, in_datastore_json, allowed_type_ids))

    cls = _type_id_to_cls(type_id)
    ext = _type_id_to_ext(type_id)
    return datastore.files.values(), type_id, cls, ext

Example #7

0

Show file

File: subreads_reports.py Project: MShaffar19/pbreports

def to_reports(subreads, output_dir):
    output_files = []
    log.info("Loading {f}".format(f=subreads))
    ds = SubreadSet(subreads)
    ds.loadStats()
    for base, module in [("filter_stats_xml", filter_stats_xml),
                         ("adapter_xml", adapter_xml),
                         ("loading_xml", loading_xml),
                         ("control", control)]:
        constants = getattr(module, "Constants")
        task_id = constants.TOOL_ID
        to_report = getattr(module, "to_report_impl")
        try:
            rpt_output_dir = os.path.join(output_dir, base)
            os.mkdir(rpt_output_dir)
            file_name = os.path.join(rpt_output_dir, "{b}.json".format(b=base))
            report = to_report(ds, rpt_output_dir)
            log.info("Writing {f}".format(f=file_name))
            report.write_json(file_name)
            output_files.append(DataStoreFile(
                uuid=report.uuid,
                source_id=task_id,
                type_id=FileTypes.REPORT.file_type_id,
                path=file_name,
                is_chunked=False,
                name=base))
        except InvalidStatsError as e:
            log.error("This dataset lacks some required statistics")
            log.error("Skipping generation of {b} report".format(b=base))
    datastore = DataStore(output_files)
    return datastore

Example #8

0

Show file

File: test_datastore.py Project: wenmm/pbsmrtpipe

 def test_load_datastore_from_file(self):
     """
     Can load Datastore from Json
     :return:
     """
     ds = DataStore.load_from_json(_to_ds_json(self.job_dir))
     self.assertIsInstance(ds, DataStore)

Example #9

0

Show file

File: file_utils.py Project: MShaffar19/pbcoretools

def mock_update_barcoded_sample_metadata(base_dir,
                                         datastore_file,
                                         input_reads,
                                         barcode_set,
                                         use_barcode_uuids=True):
    """
    Function to mimic the actual update function, without actually reading
    any barcoding information from the datasets.  Instead, the barcodes
    defined in the input dataset will be applied sequentially.
    """
    barcode_names, bio_samples_d, barcode_uuids_d, update_files, parent_info = _load_files_for_update(
        input_reads, barcode_set, datastore_file, None)
    barcode_ids = {name: i for i, name in enumerate(barcode_names)}
    bc_pairs = []
    ds_files = {}
    for bc_label in barcode_uuids_d.keys():
        bc_fw_label, bc_rev_label = bc_label.split("--")
        bc_pairs.append((barcode_ids[bc_fw_label], barcode_ids[bc_rev_label]))
        suffix = ".{l}.subreadset.xml".format(l=bc_label)
        for ds_file in update_files:
            if ds_file.path.endswith(suffix):
                ds_files[bc_pairs[-1]] = ds_file
    new_files = []
    assert len(bc_pairs) >= len(update_files)
    for bc_pair in bc_pairs:
        ds_file = ds_files[bc_pair]
        new_files.append(
            _mock_update_barcoded_sample_metadata(base_dir, ds_file,
                                                  barcode_names, parent_info,
                                                  use_barcode_uuids, bc_pair,
                                                  bio_samples_d,
                                                  barcode_uuids_d))
    return DataStore(new_files)

Example #10

0

Show file

def run_dev_txt_to_datastore(rtc):

    p = os.path.dirname(rtc.task.output_files[0])

    sleep_multiplier = rtc.task.options[
        'pbsmrtpipe.task_options.sleep_multiplier']
    t_sleep = sleep_multiplier * random.random()
    log.info("Sleeping for %.1f seconds", t_sleep)
    time.sleep(t_sleep)

    from pbcore.io import SubreadSet

    num_subreadsets = rtc.task.options[
        'pbsmrtpipe.task_options.num_subreadsets']

    sset = SubreadSet(rtc.task.input_files[0])
    add_parent = True
    if len(sset.metadata.provenance) > 0:
        log.warn("Not adding provenance since input already has a parent")
        add_parent = False

    def to_f(x):
        source_id = "out-1"
        sset_out = sset.copy()
        sset_out.newUuid(random=True)
        if add_parent:
            sset_out.metadata.addParentDataSet(sset.uuid,
                                               sset.datasetType,
                                               createdBy="AnalysisJob",
                                               timeStampedName="")
        file_name = "file-{x:03d}.subreadset.xml".format(x=x)
        out_path = os.path.join(p, file_name)
        sset_out.write(out_path)
        sset_uuid = sset_out.uniqueId
        name = "subreadset-{}".format(x)
        dsf = DataStoreFile(sset_uuid,
                            source_id,
                            FileTypes.DS_SUBREADS.file_type_id,
                            file_name,
                            name=name,
                            description="{} Example Description".format(name))
        return dsf

    files = [to_f(i + 1) for i in xrange(num_subreadsets)]
    ds = DataStore(files)
    ds.write_json(rtc.task.output_files[0])
    return 0

Example #11

0

Show file

File: test_datastore.py Project: wenmm/pbsmrtpipe

    def test_datastore_file_name_and_description(self):
        """
        Make sure output files have non-blank name and description.
        """
        ds = DataStore.load_from_json(_to_ds_json(self.job_dir))
        rx = re.compile(r'[a-zA-Z0-9]{1,}')

        for fd in ds.files.values():
            for x in (fd.name, fd.description):
                self.assertTrue(rx.search(x))

Example #12

0

Show file

File: file_utils.py Project: MShaffar19/pbcoretools

def iterate_datastore_read_set_files(
        datastore_file, allowed_read_types=Constants.ALLOWED_BC_TYPES):
    """
    Iterate over dataset (e.g., SubreadSet or ConsensusReadSet) files listed in a datastore JSON.
    """
    ds = DataStore.load_from_json(datastore_file)
    files = ds.files.values()
    for f in files:
        if f.file_type_id in allowed_read_types:
            yield f

Example #13

0

Show file

def _run_auto_ccs_outputs_barcoded(datastore_in, datastore_out, nproc=Constants.MAX_NPROC):
    base_dir = op.dirname(datastore_out)
    files = DataStore.load_from_json(datastore_in).files.values()
    ccs_files = []
    for ds_file in files:
        # FIXME use a better file_id
        if ds_file.file_type_id == FileTypes.DS_CCS.file_type_id and ds_file.file_id == "barcoding.tasks.lima-0":
            ccs_files.append(ds_file.path)
            log.info("Exporting %s", ds_file.path)
    log.info("Exporting %d CCS datasets", len(ccs_files))
    args = [(f, base_dir) for f in ccs_files]
    output_files = list(itertools.chain.from_iterable(
        pool_map(__run_ccs_bam_fastq_exports, args, nproc)))
    output_files.extend([
        _create_zipped_fastq(output_files, "all_barcodes.fastq.tar.gz"),
        _create_zipped_fasta(output_files, "all_barcodes.fasta.tar.gz")
    ])
    DataStore(output_files).write_json(datastore_out)
    return 0

Example #14

0

Show file

def run_args(args):
    dstore = DataStore.load_from_json(os.path.realpath(args.datastore))
    ds_in = ConsensusReadSet(args.ccs_in, trustCounts=True)
    ds_out = ConsensusReadSet(*([f.path for f in dstore.files.values()]),
                              trustCounts=True)
    sanitize_dataset_tags(ds_out, remove_hidden=True)
    ds_out.name = ds_in.name.replace(" (filtered)", "") + " (trimmed)"
    ds_out.subdatasets = []
    ds_out.write("trimmed.consensusreadset.xml")
    return 0

Example #15

0

Show file

 def run_after(self, rtc, output_dir):
     with openDataSet(rtc.task.output_files[0]) as f:
         f.assertIndexed()
         self.assertEqual(len(f.toExternalFiles()), 1)
         # test for bug 33778
         qnames = set()
         for rec in f:
             qnames.add(rec.qName)
         self.assertEqual(len(qnames), len(f))
     ds = DataStore.load_from_json(rtc.task.output_files[1])
     self.assertEqual(len(ds.files), 2)

Example #16

0

Show file

File: base.py Project: pombredanne/pbsmrtpipe

 def wrapper(self):
     ds_path = os.path.join(self.job_dir, "workflow", "datastore.json")
     ds = DataStore.load_from_json(ds_path)
     # log.info("Loaded datastore {d}".format(d=ds))
     for ds_file in ds.files.values():
         if ds_file.file_type_id == file_type_id:
             started_at = time.time()
             validator_func(ds_file.path, **kwargs)
             run_time = time.time() - started_at
             log.debug("Successfully validated in {s:.2f} sec {p}".format(p=ds_file.path, s=run_time))
     self.assertTrue(True)

Example #17

0

Show file

File: base.py Project: yqin22/pbsmrtpipe

 def wrapper(self):
     ds_path = os.path.join(self.job_dir, "workflow", "datastore.json")
     ds = DataStore.load_from_json(ds_path)
     # log.info("Loaded datastore {d}".format(d=ds))
     for ds_file in ds.files.values():
         if ds_file.file_type_id == file_type_id:
             started_at = time.time()
             validator_func(ds_file.path, **kwargs)
             run_time = time.time() - started_at
             log.debug("Successfully validated in {s:.2f} sec {p}".format(p=ds_file.path, s=run_time))
     self.assertTrue(True)

Example #18

0

Show file

File: gather_lima_datasets.py Project: MShaffar19/pbcoretools

def gather_chunks(chunks, output_file, nproc=1):
    if len(chunks) == 1:
        datastore = DataStore.load_from_json(op.realpath(chunks[0]))
        log.info("Writing datastore to %s", output_file)
        datastore.write_json(output_file)
        return len(datastore.files)
    file_names_by_bc = defaultdict(list)
    datastore_files_by_bc = {}
    for file_name in chunks:
        log.info("Reading datastore from %s", file_name)
        datastore = DataStore.load_from_json(op.realpath(file_name))
        for ds_file in datastore.files.values():
            ds_file_name = op.realpath(ds_file.path)
            base_name = op.basename(ds_file_name)
            fields = base_name.split(".")
            bc_pair = fields[-3]
            file_names_by_bc[bc_pair].append(ds_file_name)
            datastore_files_by_bc[bc_pair] = ds_file
    log.info("Found %d unique barcode pairs", len(file_names_by_bc))
    _results = []
    pool = multiprocessing.Pool(nproc)
    for bc_pair, file_names in file_names_by_bc.items():
        _results.append(
            pool.apply_async(_merge_chunks,
                             (file_names, datastore_files_by_bc[bc_pair])))
    pool.close()
    pool.join()
    datastore_files = [r.get() for r in _results]
    datastore_out = DataStore(datastore_files)
    log.info("Writing datastore to %s", output_file)
    datastore_out.write_json(output_file)
    return len(datastore_files)

Example #19

0

Show file

File: test_tasks_gather.py Project: MShaffar19/pbcoretools

def test_gather_datastore_json():
    import subprocess
    from pbcommand.models import DataStore
    d = '/pbi/dept/secondary/siv/testdata/pbsvtools-unittest/data/test_scatter_align_datastore/'
    if1 = op.join(d, '1.aln.datastore.json')
    if2 = op.join(d, '2.aln.datastore.json')
    of = tempfile.NamedTemporaryFile(suffix=".datastore.json").name
    args = ['python', '-m', 'pbcoretools.tasks.gather', of, if1, if2]
    subprocess.check_call(args)
    out_fns = DataStore.load_from_json(of).to_dict()['files']
    expected_bam_1 = op.join(d, '1.bam')
    expected_bam_2 = op.join(d, '2.bam')
    assert out_fns[0]['path'] == expected_bam_1
    assert out_fns[1]['path'] == expected_bam_2

Example #20

0

Show file

File: file_utils.py Project: MShaffar19/pbcoretools

def update_barcoded_sample_metadata(
        base_dir,
        datastore_file,
        input_reads,
        barcode_set,
        isoseq_mode=False,
        use_barcode_uuids=True,
        nproc=1,
        min_score_filter=Constants.BARCODE_QUALITY_GREATER_THAN):
    """
    Given a datastore JSON of SubreadSets produced by barcoding, apply the
    following updates to each:
    1. Include only the BioSample(s) corresponding to its barcode
    2. Add the BioSample name to the dataset name
    3. Add a ParentDataSet record in the Provenance section.
    """
    barcode_names, bio_samples_d, barcode_uuids_d, update_files, parent_info = _load_files_for_update(
        input_reads, barcode_set, datastore_file)
    pool = multiprocessing.Pool(nproc)
    _results = []
    for ds_file in update_files:
        _results.append(
            pool.apply_async(_update_barcoded_sample_metadata,
                             (base_dir, ds_file, barcode_names, parent_info,
                              isoseq_mode, use_barcode_uuids, bio_samples_d,
                              barcode_uuids_d, min_score_filter)))
    pool.close()
    pool.join()
    datastore_files = [r.get() for r in _results]
    # copy over the un-barcoded reads BAM
    dstore = DataStore.load_from_json(datastore_file)
    files = dstore.files.values()
    for f in files:
        if f.file_id != "barcoding.tasks.lima-0":
            datastore_files.append(f)
    return DataStore(datastore_files)

Example #21

0

Show file

 def test_integration(self):
     ds_out = op.join(self._output_dir, "datastore.json")
     args = [
         "python", "-m", "pbreports.report.subreads_reports",
         pbtestdata.get_file("subreads-sequel"), ds_out
     ]
     o, c, m = backticks(" ".join(args))
     self.assertEqual(c, 0)
     self.assertTrue(op.exists(ds_out))
     datastore = DataStore.load_from_json(ds_out)
     datastore_files = [f for u, f in datastore.files.iteritems()]
     self.assertEqual(sorted([f.file_id for f in datastore_files]), [
         "pbreports.tasks.adapter_report_xml",
         "pbreports.tasks.filter_stats_report_xml",
         "pbreports.tasks.loading_report_xml"
     ])

Example #22

0

Show file

File: barcode.py Project: MShaffar19/pbreports

def _get_barcoded_datasets(reads_file):
    dir_name = os.path.dirname(os.path.abspath(reads_file))
    if reads_file.endswith(".datastore.json"):
        datastore = DataStore.load_from_json(reads_file)
        datasets = [
            _to_abs_path(dir_name, f.path)
            for u, f in datastore.files.iteritems()
            if f.file_type_id in Constants.VALID_FT_IDS
        ]
        if len(datasets) == 0:
            raise ValueError("No datasets containing barcoded reads were " +
                             "present in the input.  This could mean that " +
                             "demultiplexing was run with incorrect inputs " +
                             "or an overly restrictive minimum barcode score.")
        return datasets
    else:
        return [reads_file]

Example #23

0

Show file

File: test_datastore.py Project: wenmm/pbsmrtpipe

    def test_datastore_report_file_uuid(self):
        """Test that the DataStore file and the Underlying Report have the same UUID"""
        ds = DataStore.load_from_json(_to_ds_json(self.job_dir))
        n_tested = 0
        for ds_file in ds.files.values():
            if ds_file.file_type_id == FileTypes.REPORT.file_type_id:
                rpt = load_report_from_json(ds_file.path)
                emsg = "{p}: {u1} != {u2}".format(p=ds_file.path,
                                                  u1=rpt.uuid,
                                                  u2=ds_file.uuid)
                # by convention the DS UUID and the Report UUID should the same value
                self.assertEqual(rpt.uuid, ds_file.uuid, emsg)
                n_tested += 1

        if n_tested == 0:
            raise unittest.SkipTest(
                "Warning. No Report JSON files in datastore.")

Example #24

0

Show file

File: test_datastore.py Project: wenmm/pbsmrtpipe

    def _validate_datastore_reports(self, validate_func):

        ds = DataStore.load_from_json(_to_ds_json(self.job_dir))

        # found one or more valid Report
        have_reports = True

        for ds_file in ds.files.values():
            if ds_file.file_type_id == FileTypes.REPORT.file_type_id:
                try:
                    _ = validate_func(ds_file.path)
                except ValueError as e:
                    self.fail(
                        "Report validation failed:\n{e}".format(e=str(e)))
                else:
                    have_reports = True

        if not have_reports:
            raise unittest.SkipTest("No Report JSON files in datastore.")
        return have_reports

Example #25

0

Show file

File: collect_files.py Project: MShaffar19/pbcoretools

def run_args(args):
    sample_name = None
    if not args.single_sample and not args.all_samples:
        bam = openDataFile(args.samples_file)
        sample_name = bam.readGroupTable[0].SampleName
        log.info("Sample name is {}".format(sample_name))
    elif args.all_samples:
        sample_name = "All Samples"
    files = []
    for file_id, file_type, label in FILE_IDS_AND_NAMES:
        file_path = getattr(args, file_id)
        if file_path is None:
            log.info("Skipping {}".format(file_id))
            continue
        assert file_path is not None and op.exists(file_path)
        if sample_name:
            label += " ({})".format(sample_name)
        files.append(to_datastore_file(file_path, file_id, file_type, label))
    DataStore(files).write_json(args.datastore)
    return 0

Example #26

0

Show file

def run_args(args):
    datastore_out = op.abspath(args.datastore_out)
    base_dir = op.dirname(datastore_out)
    datastore_files = []
    with ConsensusReadSet(args.dataset_file, strict=True) as ds:
        bam_file_name, file_prefix = get_prefix_and_bam_file_name(
            ds, is_barcoded=False)
        if args.mode == "fasta":
            datastore_files.extend(to_fastx_files(
                FileTypes.FASTA, ds, args.dataset_file, Constants.FASTA_FILE_IDS, base_dir, file_prefix, args.min_rq, no_zip=args.no_zip))
        elif args.mode == "fastq":
            datastore_files.extend(to_fastx_files(
                FileTypes.FASTQ, ds, args.dataset_file, Constants.FASTQ_FILE_IDS, base_dir, file_prefix, args.min_rq, no_zip=args.no_zip))
        elif args.mode == "consolidate":
            if bam_file_name is None:
                datastore_files.append(
                    consolidate_bam(base_dir, file_prefix, ds,
                                    min_rq=args.min_rq))
    DataStore(datastore_files).write_json(datastore_out)
    return 0

Example #27

0

Show file

File: test_datastore.py Project: wenmm/pbsmrtpipe

    def test_datastore_dataset_file_uuid(self):
        """Test that the DataStore file and the Underlying Report have the same UUID"""
        dataset_type_ids = FileTypes.ALL_DATASET_TYPES().keys()

        ds = DataStore.load_from_json(_to_ds_json(self.job_dir))

        n_tested = 0
        for ds_file in ds.files.values():
            if ds_file.file_type_id in dataset_type_ids:
                path = ds_file.path
                dsf_uuid = ds_file.uuid
                uuid = getDataSetUuid(path)
                self.assertEqual(
                    uuid, dsf_uuid, "{p}: {u1} != {u2}".format(p=path,
                                                               u1=uuid,
                                                               u2=dsf_uuid))
                n_tested += 1

        if n_tested == 0:
            raise unittest.SkipTest(
                "Warning. No DataSet XML files in datastore.")

Example #28

0

Show file

 def test_failure_no_inputs(self):
     ds = DataStore([])
     ds_path = tempfile.NamedTemporaryFile(suffix=".datastore.json").name
     ds.write_json(ds_path)
     with self.assertRaises(ValueError) as err:
         report = run_to_report(ds_path, self.barcodes, self.subreads)

Example #29

0

Show file

 def _check_datastore(self, file_name):
     ds = DataStore.load_from_json(file_name)
     files = sorted([f.source_id for f in ds.files.values()])
     assert files == ["mapped_bam", "mapped_bam_bai"]

Example #30

0

Show file

File: driver_utils.py Project: bnbowman/pbsmrtpipe

def write_and_initialize_data_store_json(file_name, ds_files):
    ds = DataStore(ds_files)
    ds.write_json(file_name)
    return ds

Example #31

0

Show file

File: gather.py Project: MShaffar19/pbcoretools

def gather_datastore(input_files, output_file, skip_empty=True):
    ds = DataStore([])
    for i_fn in input_files:
        for uuid, f in DataStore.load_from_json(i_fn).files.items():
            ds.add(f)
    ds.write_json(output_file)

Example #32

0

Show file

def _to_datastore(dx):
    # Friction to get around service endpoint not returning a list of files
    ds_files = [_to_ds_file(d) for d in dx]
    return DataStore(ds_files)