예제 #1
0
파일: cli.py 프로젝트: lpp1985/lpp_Script
 def _wrapper(path):
     p = validate_file(path)
     sx = _get_size_mb(path)
     if sx > max_size_mb:
         raise argparse.ArgumentTypeError("Fasta file is too large {s:.2f} MB > {m:.2f} MB. Create a ReferenceSet using {e}, then import using `pbservice import-dataset /path/to/referenceset.xml` ".format(e=Constants.FASTA_TO_REFERENCE, s=sx, m=Constants.MAX_FASTA_FILE_MB))
     else:
         return p
예제 #2
0
파일: cli.py 프로젝트: lpp1985/lpp_Script
def _validate_analysis_job_json(path):
    px = validate_file(path)
    with open(px, 'r') as f:
        d = json.loads(f.read())

    try:
        load_analysis_job_json(d)
        return px
    except (KeyError, TypeError, ValueError) as e:
        raise argparse.ArgumentTypeError("Invalid analysis.json format for '{p}' {e}".format(p=px, e=repr(e)))
예제 #3
0
def _validate_analysis_job_json(path):
    px = validate_file(path)
    with open(px, 'r') as f:
        d = json.loads(f.read())

    try:
        load_analysis_job_json(d)
        return px
    except (KeyError, TypeError, ValueError) as e:
        raise argparse.ArgumentTypeError("Invalid analysis.json format for '{p}' {e}".format(p=px, e=repr(e)))
예제 #4
0
파일: cli.py 프로젝트: knyquist/pbcommand
 def _wrapper(path):
     p = validate_file(path)
     sx = _get_size_mb(path)
     if sx > max_size_mb:
         raise argparse.ArgumentTypeError(
             "Fasta file is too large {s:.2f} MB > {m:.2f} MB. Create a ReferenceSet using {e}, then import using `pbservice import-dataset /path/to/referenceset.xml` "
             .format(e=Constants.FASTA_TO_REFERENCE,
                     s=sx,
                     m=Constants.MAX_FASTA_FILE_MB))
     else:
         return p
예제 #5
0
def _validate_report(path):
    p = validate_file(path)
    _ = load_report_from_json(path)
    return p
예제 #6
0
 def _f(path):
     path = validate_file(path)
     return _f(path)
예제 #7
0
def to_report(filtered_csv,
              output_dir,
              dpi=DEFAULT_DPI,
              thumb_dpi=DEFAULT_THUMB_DPI):
    """
    Run Report
    """
    validate_file(filtered_csv)
    validate_dir(output_dir)

    aggregators = {
        'nbases': SumAggregator('length'),
        'nreads': CountAggregator('length'),
        'mean_subreadlength': MeanSubreadLengthAggregator('length'),
        'max_readlength': MaxAggregator('length'),
        'n50': N50Aggregator('length'),
        'readlength_histogram': HistogramAggregator('length',
                                                    0,
                                                    100,
                                                    nbins=10000),
        'subread': SubreadLengthHistogram(dx=100)
    }

    passed_filter = lambda record: record.passed_filter is True

    passed_filter_func = functools.partial(_apply, [passed_filter],
                                           aggregators.values())

    all_subread_aggregators = {
        'raw_nreads':
        SumAggregator('length'),
        'max_raw_readlength':
        MaxAggregator('length'),
        'raw_readlength_histogram':
        HistogramAggregator('length', 0, 100, nbins=10000)
    }

    all_filter_func = functools.partial(_apply, [null_filter],
                                        all_subread_aggregators.values())

    funcs = [passed_filter_func, all_filter_func]

    with open(filtered_csv, 'r') as f:
        # read in header
        header = f.readline()
        # validate_header(header)
        applyer(to_record, f, funcs)

    for aggregator in itertools.chain(aggregators.values(),
                                      all_subread_aggregators.values()):
        log.info(aggregator)

    # Check if any reads are found
    if all_subread_aggregators['raw_nreads'].attribute == 0:
        raise NoSubreadsFound(
            "No subreads found in {f}".format(f=filtered_csv))

    # Now check
    if aggregators['nreads'].attribute == 0:
        msg = "No subreads passed the filter in {f}.".format(f=filtered_csv)
        raise NoSubreadsPassedFilter(msg)

    # this is where you change the plotting options
    plot_view = PlotViewProperties(Constants.P_POST_FILTER,
                                   Constants.PG_SUBREAD_LENGTH,
                                   custom_subread_length_histogram,
                                   Constants.I_FILTER_SUBREADS_HIST,
                                   xlabel=get_plot_xlabel(
                                       spec, Constants.PG_SUBREAD_LENGTH,
                                       Constants.P_POST_FILTER),
                                   ylabel="Subreads",
                                   rlabel="bp > Subread Length",
                                   thumb="filtered_subread_report_thmb.png",
                                   use_group_thumb=True,
                                   color=get_green(3),
                                   edgecolor=get_green(2))

    view_config_d = {'post_filter': plot_view}
    id_aggregators = {'post_filter': aggregators['subread']}

    plot_groups = to_plot_groups(view_config_d, output_dir, id_aggregators)

    to_a = lambda n: aggregators[n].attribute

    attributes = _to_attributes(to_a('nreads'), to_a('nbases'),
                                to_a('mean_subreadlength'), to_a('n50'))

    report = Report(Constants.R_ID,
                    plotgroups=plot_groups,
                    attributes=attributes)

    log.debug(str(report))

    return spec.apply_view(report)
예제 #8
0
def validate_file_and_load_manifest(path):
    rt = RunnableTask.from_manifest_json(validate_file(path))
    # if we got here everything is valid
    return path
예제 #9
0
 def _f(path):
     path = validate_file(path)
     return _f(path)
예제 #10
0
파일: runner.py 프로젝트: wenmm/pbsmrtpipe
def validate_file_and_load_manifest(path):
    rt = RunnableTask.from_manifest_json(validate_file(path))
    # if we got here everything is valid
    return path