def test_basic_construction(tmpdir, from_file, bundled_piface): """ PipelineInterface constructor handles Mapping or filepath. """ if from_file: pipe_iface_config = tmpdir.join("pipe-iface-conf.yaml").strpath with open(tmpdir.join("pipe-iface-conf.yaml").strpath, 'w') as f: yaml.safe_dump(bundled_piface, f) else: pipe_iface_config = bundled_piface pi = PipelineInterface(pipe_iface_config) # Check for the protocol mapping and pipeline interface keys. assert PL_KEY in pi, "Missing pipeline key ({})".format(PL_KEY) assert PROTOMAP_KEY in pi, \ "Missing protocol mapping key: ({})".format(PROTOMAP_KEY) assert pi.pipe_iface_file == (pipe_iface_config if from_file else None) if from_file: assert pi.pipelines_path == tmpdir.strpath else: assert pi.pipelines_path is None # Validate protocol mapping and interfaces contents. assert AttributeDict(bundled_piface[PL_KEY]) == pi[PL_KEY] assert AttributeDict(bundled_piface[PROTOMAP_KEY]) == pi[PROTOMAP_KEY] # Certain access modes should agree with one another. assert pi.pipelines == pi[PL_KEY] assert list(pi.pipelines.keys()) == pi.pipeline_names
def main(): # Parse command-line arguments parser = ArgumentParser(prog="starrseq-pipeline", description="STARR-seq pipeline.") parser = arg_parser(parser) parser = pypiper.add_pypiper_args( parser, groups=["ngs", "looper", "resource", "pypiper"]) args = parser.parse_args() # Read in yaml configs sample = STARRSeqSample(pd.Series(yaml.load(open(args.sample_config, "r")))) # Check if merged if len(sample.data_path.split(" ")) > 1: sample.merged = True else: sample.merged = False sample.prj = AttributeDict(sample.prj) sample.paths = AttributeDict(sample.paths.__dict__) # Check read type if not provided if not hasattr(sample, "ngs_inputs"): sample.ngs_inputs = [sample.data_source] if not hasattr(sample, "read_type"): sample.set_read_type() # Shorthand for read_type if sample.read_type == "paired": sample.paired = True else: sample.paired = False # Set file paths sample.set_file_paths() # sample.make_sample_dirs() # should be fixed to check if values of paths are strings and paths indeed # Start Pypiper object # Best practice is to name the pipeline with the name of the script; # or put the name in the pipeline interface. pipe_manager = pypiper.PipelineManager(name="starrseq", outfolder=sample.paths.sample_root, args=args) pipe_manager.config.tools.scripts_dir = os.path.join( os.path.dirname(os.path.realpath(__file__)), "tools") # Start main function process(sample, pipe_manager, args)
def main(): # Parse command-line arguments parser = ArgumentParser( prog="dropseq-pipeline", description="Drop-seq pipeline." ) parser = arg_parser(parser) parser = pypiper.add_pypiper_args(parser, groups=["ngs", "looper", "resource", "pypiper"]) args = parser.parse_args() if args.sample_config is None: parser.print_help() return 1 # Read in yaml configs sample = AttributeDict(yaml.load(open(args.sample_config, "r"))) pipeline_config = AttributeDict(yaml.load(open(os.path.join(os.path.dirname(__file__), args.config_file), "r"))) # Start main function process(sample, pipeline_config, args)
def main(): # Parse command-line arguments parser = ArgumentParser(prog="chipseq-pipeline", description="ChIP-seq pipeline.") parser = arg_parser(parser) parser = pypiper.add_pypiper_args(parser, groups=["all"]) args = parser.parse_args() if args.sample_config is None: parser.print_help() return 1 # Read in yaml configs series = pd.Series(yaml.load(open(args.sample_config, "r"))) # looper 0.6/0.7 compatibility: if "protocol" in series.index: key = "protocol" elif "library" in series.index: key = "library" else: raise KeyError( "Sample does not contain either a 'protocol' or 'library' attribute!" ) # Create Sample object if series[key] != "ChIPmentation": sample = ChIPseqSample(series) else: sample = ChIPmentation(series) # Check if merged if len(sample.data_path.split(" ")) > 1: sample.merged = True else: sample.merged = False sample.prj = AttributeDict(sample.prj) sample.paths = AttributeDict(sample.paths.__dict__) # Check read type if not provided if not hasattr(sample, "ngs_inputs"): sample.ngs_inputs = [sample.data_source] if not hasattr(sample, "read_type"): sample.set_read_type() else: if sample.read_type not in ['single', 'paired']: sample.set_read_type() # Shorthand for read_type if sample.read_type == "paired": sample.paired = True else: sample.paired = False # Set file paths sample.set_file_paths() # sample.make_sample_dirs() # should be fixed to check if values of paths are strings and paths indeed # Start Pypiper object # Best practice is to name the pipeline with the name of the script; # or put the name in the pipeline interface. pipe_manager = pypiper.PipelineManager(name="chipseq", outfolder=sample.paths.sample_root, args=args) # Start main function if not args.only_peaks: pipe_manager = process(sample, pipe_manager, args) else: print("Skipped processing sample '{}'.".format(sample.name)) # If sample does not have "ctrl" attribute, finish processing it. if not hasattr(sample, "compare_sample"): pipe_manager.stop_pipeline() print("Finished processing sample '{}'.".format(sample.name)) return # If compare_sample is empty string, finish processing. if sample.compare_sample == "": pipe_manager.stop_pipeline() print("Finished processing sample '{}'.".format(sample.name)) return # The pipeline will now wait for the comparison sample file to be completed pipe_manager._wait_for_file( sample.filtered.replace(sample.name, sample.compare_sample)) # Start peak calling function call_peaks(sample, pipe_manager, args)
def main(): # Parse command-line arguments parser = ArgumentParser(prog="atacseq-pipeline", description="ATAC-seq pipeline.") parser = arg_parser(parser) parser = pypiper.add_pypiper_args( parser, groups=["ngs", "looper", "resource", "pypiper"]) args = parser.parse_args() # Read in yaml configs series = pd.Series(yaml.load(open(args.sample_config, "r"))) # looper 0.6/0.7 compatibility: if "protocol" in series.index: key = "protocol" elif "library" in series.index: key = "library" else: raise KeyError( "Sample does not contain either a 'protocol' or 'library' attribute!" ) # Create Sample object if series[key] != "DNase-seq": sample = ATACseqSample(series) else: sample = DNaseSample(series) # Check if merged if len(sample.data_path.split(" ")) > 1: sample.merged = True else: sample.merged = False sample.prj = AttributeDict(sample.prj) sample.paths = AttributeDict(sample.paths.__dict__) # Check read type if not provided if not hasattr(sample, "ngs_inputs"): sample.ngs_inputs = [sample.data_source] if not hasattr(sample, "read_type"): sample.set_read_type() else: if sample.read_type not in ['single', 'paired']: sample.set_read_type() # Shorthand for read_type if sample.read_type == "paired": sample.paired = True else: sample.paired = False # Set file paths sample.set_file_paths() # sample.make_sample_dirs() # should be fixed to check if values of paths are strings and paths indeed # Start Pypiper object # Best practice is to name the pipeline with the name of the script; # or put the name in the pipeline interface. pipe_manager = pypiper.PipelineManager(name="atacseq", outfolder=sample.paths.sample_root, args=args) pipe_manager.config.tools.scripts_dir = os.path.join( os.path.dirname(os.path.realpath(__file__)), "tools") # Start main function process(sample, pipe_manager, args)