def test_basic_construction(tmpdir, from_file, bundled_piface):
    """ PipelineInterface constructor handles Mapping or filepath. """

    if from_file:
        pipe_iface_config = tmpdir.join("pipe-iface-conf.yaml").strpath
        with open(tmpdir.join("pipe-iface-conf.yaml").strpath, 'w') as f:
            yaml.safe_dump(bundled_piface, f)
    else:
        pipe_iface_config = bundled_piface

    pi = PipelineInterface(pipe_iface_config)

    # Check for the protocol mapping and pipeline interface keys.
    assert PL_KEY in pi, "Missing pipeline key ({})".format(PL_KEY)
    assert PROTOMAP_KEY in pi, \
        "Missing protocol mapping key: ({})".format(PROTOMAP_KEY)

    assert pi.pipe_iface_file == (pipe_iface_config if from_file else None)
    if from_file:
        assert pi.pipelines_path == tmpdir.strpath
    else:
        assert pi.pipelines_path is None

    # Validate protocol mapping and interfaces contents.
    assert AttributeDict(bundled_piface[PL_KEY]) == pi[PL_KEY]
    assert AttributeDict(bundled_piface[PROTOMAP_KEY]) == pi[PROTOMAP_KEY]

    # Certain access modes should agree with one another.
    assert pi.pipelines == pi[PL_KEY]
    assert list(pi.pipelines.keys()) == pi.pipeline_names
Esempio n. 2
0
def main():
    # Parse command-line arguments
    parser = ArgumentParser(prog="starrseq-pipeline",
                            description="STARR-seq pipeline.")
    parser = arg_parser(parser)
    parser = pypiper.add_pypiper_args(
        parser, groups=["ngs", "looper", "resource", "pypiper"])
    args = parser.parse_args()

    # Read in yaml configs
    sample = STARRSeqSample(pd.Series(yaml.load(open(args.sample_config,
                                                     "r"))))

    # Check if merged
    if len(sample.data_path.split(" ")) > 1:
        sample.merged = True
    else:
        sample.merged = False
    sample.prj = AttributeDict(sample.prj)
    sample.paths = AttributeDict(sample.paths.__dict__)

    # Check read type if not provided
    if not hasattr(sample, "ngs_inputs"):
        sample.ngs_inputs = [sample.data_source]
    if not hasattr(sample, "read_type"):
        sample.set_read_type()

    # Shorthand for read_type
    if sample.read_type == "paired":
        sample.paired = True
    else:
        sample.paired = False

    # Set file paths
    sample.set_file_paths()
    # sample.make_sample_dirs()  # should be fixed to check if values of paths are strings and paths indeed

    # Start Pypiper object
    # Best practice is to name the pipeline with the name of the script;
    # or put the name in the pipeline interface.
    pipe_manager = pypiper.PipelineManager(name="starrseq",
                                           outfolder=sample.paths.sample_root,
                                           args=args)
    pipe_manager.config.tools.scripts_dir = os.path.join(
        os.path.dirname(os.path.realpath(__file__)), "tools")

    # Start main function
    process(sample, pipe_manager, args)
Esempio n. 3
0
def main():
    # Parse command-line arguments
    parser = ArgumentParser(
        prog="dropseq-pipeline",
        description="Drop-seq pipeline."
    )
    parser = arg_parser(parser)
    parser = pypiper.add_pypiper_args(parser, groups=["ngs", "looper", "resource", "pypiper"])
    args = parser.parse_args()
    if args.sample_config is None:
        parser.print_help()
        return 1

    # Read in yaml configs
    sample = AttributeDict(yaml.load(open(args.sample_config, "r")))
    pipeline_config = AttributeDict(yaml.load(open(os.path.join(os.path.dirname(__file__), args.config_file), "r")))

    # Start main function
    process(sample, pipeline_config, args)
Esempio n. 4
0
def main():
    # Parse command-line arguments
    parser = ArgumentParser(prog="chipseq-pipeline",
                            description="ChIP-seq pipeline.")
    parser = arg_parser(parser)
    parser = pypiper.add_pypiper_args(parser, groups=["all"])
    args = parser.parse_args()
    if args.sample_config is None:
        parser.print_help()
        return 1

    # Read in yaml configs
    series = pd.Series(yaml.load(open(args.sample_config, "r")))

    # looper 0.6/0.7 compatibility:
    if "protocol" in series.index:
        key = "protocol"
    elif "library" in series.index:
        key = "library"
    else:
        raise KeyError(
            "Sample does not contain either a 'protocol' or 'library' attribute!"
        )

    # Create Sample object
    if series[key] != "ChIPmentation":
        sample = ChIPseqSample(series)
    else:
        sample = ChIPmentation(series)

    # Check if merged
    if len(sample.data_path.split(" ")) > 1:
        sample.merged = True
    else:
        sample.merged = False
    sample.prj = AttributeDict(sample.prj)
    sample.paths = AttributeDict(sample.paths.__dict__)

    # Check read type if not provided
    if not hasattr(sample, "ngs_inputs"):
        sample.ngs_inputs = [sample.data_source]
    if not hasattr(sample, "read_type"):
        sample.set_read_type()
    else:
        if sample.read_type not in ['single', 'paired']:
            sample.set_read_type()

    # Shorthand for read_type
    if sample.read_type == "paired":
        sample.paired = True
    else:
        sample.paired = False

    # Set file paths
    sample.set_file_paths()
    # sample.make_sample_dirs()  # should be fixed to check if values of paths are strings and paths indeed

    # Start Pypiper object
    # Best practice is to name the pipeline with the name of the script;
    # or put the name in the pipeline interface.
    pipe_manager = pypiper.PipelineManager(name="chipseq",
                                           outfolder=sample.paths.sample_root,
                                           args=args)

    # Start main function
    if not args.only_peaks:
        pipe_manager = process(sample, pipe_manager, args)
    else:
        print("Skipped processing sample '{}'.".format(sample.name))

    # If sample does not have "ctrl" attribute, finish processing it.
    if not hasattr(sample, "compare_sample"):
        pipe_manager.stop_pipeline()
        print("Finished processing sample '{}'.".format(sample.name))
        return
    # If compare_sample is empty string, finish processing.
    if sample.compare_sample == "":
        pipe_manager.stop_pipeline()
        print("Finished processing sample '{}'.".format(sample.name))
        return

    # The pipeline will now wait for the comparison sample file to be completed
    pipe_manager._wait_for_file(
        sample.filtered.replace(sample.name, sample.compare_sample))

    # Start peak calling function
    call_peaks(sample, pipe_manager, args)
Esempio n. 5
0
def main():
    # Parse command-line arguments
    parser = ArgumentParser(prog="atacseq-pipeline",
                            description="ATAC-seq pipeline.")
    parser = arg_parser(parser)
    parser = pypiper.add_pypiper_args(
        parser, groups=["ngs", "looper", "resource", "pypiper"])
    args = parser.parse_args()

    # Read in yaml configs
    series = pd.Series(yaml.load(open(args.sample_config, "r")))

    # looper 0.6/0.7 compatibility:
    if "protocol" in series.index:
        key = "protocol"
    elif "library" in series.index:
        key = "library"
    else:
        raise KeyError(
            "Sample does not contain either a 'protocol' or 'library' attribute!"
        )

    # Create Sample object
    if series[key] != "DNase-seq":
        sample = ATACseqSample(series)
    else:
        sample = DNaseSample(series)

    # Check if merged
    if len(sample.data_path.split(" ")) > 1:
        sample.merged = True
    else:
        sample.merged = False
    sample.prj = AttributeDict(sample.prj)
    sample.paths = AttributeDict(sample.paths.__dict__)

    # Check read type if not provided
    if not hasattr(sample, "ngs_inputs"):
        sample.ngs_inputs = [sample.data_source]
    if not hasattr(sample, "read_type"):
        sample.set_read_type()
    else:
        if sample.read_type not in ['single', 'paired']:
            sample.set_read_type()

    # Shorthand for read_type
    if sample.read_type == "paired":
        sample.paired = True
    else:
        sample.paired = False

    # Set file paths
    sample.set_file_paths()
    # sample.make_sample_dirs()  # should be fixed to check if values of paths are strings and paths indeed

    # Start Pypiper object
    # Best practice is to name the pipeline with the name of the script;
    # or put the name in the pipeline interface.
    pipe_manager = pypiper.PipelineManager(name="atacseq",
                                           outfolder=sample.paths.sample_root,
                                           args=args)
    pipe_manager.config.tools.scripts_dir = os.path.join(
        os.path.dirname(os.path.realpath(__file__)), "tools")

    # Start main function
    process(sample, pipe_manager, args)