def pb_isoseq_collapse():
    """
    Internal Iso-Seq pipeline, starting from an existing Iso-Seq job, continuing to collapse,
    continue to collapse, count and filter isoforms, requiring a reference genome GMAP dataset.
    """
    return _core_isoseq_collapse(hq_isoforms_fq=to_entry("hq_isoforms_fq"),
                                 gmap_ref_ds=Constants.ENTRY_DS_GMAPREF,
                                 sample_prefix_pickle=to_entry("sample_prefix_pickle"))
def pb_isoseq_collapse():
    """
    Internal Iso-Seq pipeline, starting from an existing Iso-Seq job, continuing to collapse,
    continue to collapse, count and filter isoforms, requiring a reference genome GMAP dataset.
    """
    return _core_isoseq_collapse(
        hq_isoforms_fq=to_entry("hq_isoforms_fq"),
        gmap_ref_ds=Constants.ENTRY_DS_GMAPREF,
        sample_prefix_pickle=to_entry("sample_prefix_pickle"))
def pb_isoseq2_collapse():
    """
    Internal Iso-Seq2 pipeline, starting from an existing Iso-Seq2 job, continuing to collapse,
    continue to collapse, count and filter isoforms, requiring a reference genome GMAP dataset.
    """
    return _core_isoseq2_collapse(ws_json=to_entry('e_ws_json'),
                                  hq_isoforms_fq=to_entry('e_hq_isoforms_fq'),
                                  gmap_ref_ds=Constants.ENTRY_DS_GMAPREF,
                                  sample_to_uc_pickle_json=to_entry('e_sample_uc_json'))
def pb_isoseq2_collapse():
    """
    Internal Iso-Seq2 pipeline, starting from an existing Iso-Seq2 job, continuing to collapse,
    continue to collapse, count and filter isoforms, requiring a reference genome GMAP dataset.
    """
    return _core_isoseq2_collapse(
        ws_json=to_entry('e_ws_json'),
        hq_isoforms_fq=to_entry('e_hq_isoforms_fq'),
        gmap_ref_ds=Constants.ENTRY_DS_GMAPREF,
        sample_to_uc_pickle_json=to_entry('e_sample_uc_json'))
def pb_isoseq_cluster_with_genome():
    """
    Internal Iso-Seq pipeline, starting from existing isoseq_flnc and isoseq_nfl datasets,
    continue to collapse, count and filter isoforms, requiring a reference genome GMAP dataset.
    """
    b1 = _core_isoseq_cluster_chunk_by_bins(subreads_ds=Constants.ENTRY_DS_SUBREAD,
                                            ccs_ds=Constants.ENTRY_DS_CCS,
                                            flnc_ds=to_entry("e_flnc_fa"),
                                            nfl_ds=to_entry("e_nfl_fa"))
    b2 = _core_isoseq_collapse(hq_isoforms_fq="pbtranscript.tasks.combine_cluster_bins:4",
                               gmap_ref_ds=Constants.ENTRY_DS_GMAPREF,
                               sample_prefix_pickle="pbtranscript.tasks.combine_cluster_bins:7")
    return b1 + b2
def pb_isoseq_cluster_with_genome():
    """
    Internal Iso-Seq pipeline, starting from existing isoseq_flnc and isoseq_nfl datasets,
    continue to collapse, count and filter isoforms, requiring a reference genome GMAP dataset.
    """
    b1 = _core_isoseq_cluster_chunk_by_bins(
        subreads_ds=Constants.ENTRY_DS_SUBREAD,
        ccs_ds=Constants.ENTRY_DS_CCS,
        flnc_ds=to_entry("e_flnc_fa"),
        nfl_ds=to_entry("e_nfl_fa"))
    b2 = _core_isoseq_collapse(
        hq_isoforms_fq="pbtranscript.tasks.combine_cluster_bins:4",
        gmap_ref_ds=Constants.ENTRY_DS_GMAPREF,
        sample_prefix_pickle="pbtranscript.tasks.combine_cluster_bins:7")
    return b1 + b2
def validate_entry_points(d):
    from pbsmrtpipe.pb_pipelines.pb_pipeline_constants import Constants, to_entry
    for ep in d['entryPoints']:
        eid = to_entry(ep['entryId'])
        if eid in Constants.ENTRY_FILE_TYPES:
            file_type_id = Constants.ENTRY_FILE_TYPES[eid].file_type_id
            if ep['fileTypeId'] != file_type_id:
                raise ValueError("Expected {r} for {e}, got {t}".format(
                    r=file_type_id, e=eid, t=ep['fileTypeId']))
def pb_isoseq_cluster():
    return _core_isoseq_cluster_chunk_by_bins(
        subreads_ds=Constants.ENTRY_DS_SUBREAD,
        ccs_ds=Constants.ENTRY_DS_CCS,
        flnc_ds=to_entry("e_flnc_fa"),
        nfl_ds=to_entry("e_nfl_fa"))
def pb_isoseq2_cluster():
    return _core_isoseq2_cluster(subreads_ds=Constants.ENTRY_DS_SUBREAD,
                                 ccs_ds=Constants.ENTRY_DS_CCS,
                                 flnc_ds=to_entry("e_flnc_fa"),
                                 nfl_ds=to_entry("e_nfl_fa"))