Example #1
0
def main():
    # Parse command-line arguments
    parser = ArgumentParser(
        prog="hic-pipeline",
        description="Hi-C pipeline."
    )
    parser = arg_parser(parser)
    parser = pypiper.add_pypiper_args(parser, groups=["ngs", "looper", "resource", "pypiper"])
    args = parser.parse_args()

    # Read in yaml configs
    series = pd.Series(yaml.load(open(args.sample_config, "r")))

    # looper 0.6/0.7 compatibility:
    if "protocol" in series.index:
        key = "protocol"
    elif "library" in series.index:
        key = "library"
    else:
        raise KeyError(
            "Sample does not contain either a 'protocol' or 'library' attribute!")

    # Create Sample object
    if series[key] != "HiChIP":
        sample = HiCSample(series)
    else:
        sample = HiChIPSample(series)

    # Check if merged
    if len(sample.data_path.split(" ")) > 1:
        sample.merged = True
    else:
        sample.merged = False
    sample.prj = AttributeDict(sample.prj)
    sample.paths = AttributeDict(sample.paths.__dict__)

    # Check read type if not provided
    if not hasattr(sample, "ngs_inputs"):
        sample.ngs_inputs = [sample.data_source]
    if not hasattr(sample, "read_type"):
        sample.set_read_type()

    # Shorthand for read_type
    if sample.read_type == "paired":
        sample.paired = True
    else:
        sample.paired = False

    # Set file paths
    sample.set_file_paths()
    # sample.make_sample_dirs()  # should be fixed to check if values of paths are strings and paths indeed

    # Start Pypiper object
    # Best practice is to name the pipeline with the name of the script;
    # or put the name in the pipeline interface.
    pipe_manager = pypiper.PipelineManager(name="hic", outfolder=sample.paths.sample_root, args=args)
    pipe_manager.config.tools.scripts_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "tools")

    # Start main function
    process(sample, pipe_manager, args)
Example #2
0
    def test_no_default_env_settings_provided(
            self, minimal_project_conf_path,
            explicit_null, compute_env_attname):
        """ Project doesn't require default environment settings. """

        kwargs = {"default_compute": None} if explicit_null else {}
        project = Project(minimal_project_conf_path, **kwargs)

        observed_attribute = getattr(project, compute_env_attname)
        expected_attribute = \
                self.default_compute_settings(project)[compute_env_attname]

        if compute_env_attname == "compute":
            # 'compute' refers to a section in the default environment
            # settings file and also to a Project attribute. A Project
            # instance selects just one of the options in the 'compute'
            # section of the file as the value for its 'compute' attribute.
            expected_attribute = expected_attribute["default"]
            observed_attribute = _compute_paths_to_names(observed_attribute)
        elif compute_env_attname == "environment":
            envs_with_reduced_filepaths = \
                    _env_paths_to_names(observed_attribute["compute"])
            observed_attribute = AttributeDict(
                    {"compute": envs_with_reduced_filepaths})

        assert expected_attribute == observed_attribute
Example #3
0
def main():
    # Parse command-line arguments
    parser = ArgumentParser(prog="rnaseq-pipeline",
                            description="RNA-seq pipeline.")
    parser = arg_parser(parser)
    parser = pypiper.add_pypiper_args(parser, groups=["all"])
    args = parser.parse_args()

    # Read in yaml configs
    sample = RNASeqSample(pd.Series(yaml.load(open(args.sample_config, "r"))))

    # Check if merged
    if len(sample.data_path.split(" ")) > 1:
        sample.merged = True
    else:
        sample.merged = False
    sample.prj = AttributeDict(sample.prj)
    sample.paths = AttributeDict(sample.paths.__dict__)

    # Check read type if not provided
    if not hasattr(sample, "ngs_inputs"):
        sample.ngs_inputs = [sample.data_source]
    if not hasattr(sample, "read_type"):
        sample.set_read_type()

    # Shorthand for read_type
    if sample.read_type == "paired":
        sample.paired = True
    else:
        sample.paired = False

    # Set file paths
    sample.set_file_paths()
    # sample.make_sample_dirs()  # should be fixed to check if values of paths are strings and paths indeed

    # Start Pypiper object
    # Best practice is to name the pipeline with the name of the script;
    # or put the name in the pipeline interface.
    pipe_manager = pypiper.PipelineManager(name="rnaseq",
                                           outfolder=sample.paths.sample_root,
                                           args=args)
    pipe_manager.config.tools.scripts_dir = os.path.join(
        os.path.dirname(os.path.realpath(__file__)), "tools")

    # Start main function
    process(sample, pipe_manager, args)
Example #4
0
def main():
    # Parse command-line arguments
    parser = ArgumentParser(prog="dropseq-pipeline",
                            description="Drop-seq pipeline.")
    parser = arg_parser(parser)
    parser = pypiper.add_pypiper_args(parser, groups=["all"])
    args = parser.parse_args()
    if args.sample_config is None:
        parser.print_help()
        return 1

    # Read in yaml configs
    sample = AttributeDict(yaml.load(open(args.sample_config, "r")))
    pipeline_config = AttributeDict(
        yaml.load(
            open(os.path.join(os.path.dirname(__file__), args.config_file),
                 "r")))

    # Start main function
    process(sample, pipeline_config, args)
Example #5
0
def main():
    # Parse command-line arguments
    parser = ArgumentParser(prog="chipseq-pipeline",
                            description="ChIP-seq pipeline.")
    parser = arg_parser(parser)
    parser = pypiper.add_pypiper_args(parser, groups=["all"])
    args = parser.parse_args()
    if args.sample_config is None:
        parser.print_help()
        return 1

    # Read in yaml configs
    series = pd.Series(yaml.load(open(args.sample_config, "r")))

    # looper 0.6/0.7 compatibility:
    if "protocol" in series.index:
        key = "protocol"
    elif "library" in series.index:
        key = "library"
    else:
        raise KeyError(
            "Sample does not contain either a 'protocol' or 'library' attribute!"
        )

    # Create Sample object
    if series[key] != "ChIPmentation":
        sample = ChIPseqSample(series)
    else:
        sample = ChIPmentation(series)

    # Check if merged
    if len(sample.data_path.split(" ")) > 1:
        sample.merged = True
    else:
        sample.merged = False
    sample.prj = AttributeDict(sample.prj)
    sample.paths = AttributeDict(sample.paths.__dict__)

    # Check read type if not provided
    if not hasattr(sample, "ngs_inputs"):
        sample.ngs_inputs = [sample.data_source]
    if not hasattr(sample, "read_type"):
        sample.set_read_type()

    # Shorthand for read_type
    if sample.read_type == "paired":
        sample.paired = True
    else:
        sample.paired = False

    # Set file paths
    sample.set_file_paths()
    # sample.make_sample_dirs()  # should be fixed to check if values of paths are strings and paths indeed

    # Start Pypiper object
    # Best practice is to name the pipeline with the name of the script;
    # or put the name in the pipeline interface.
    pipe_manager = pypiper.PipelineManager(name="chipseq",
                                           outfolder=sample.paths.sample_root,
                                           args=args)

    # Start main function
    if not args.only_peaks:
        pipe_manager = process(sample, pipe_manager, args)
    else:
        print("Skipped processing sample '{}'.".format(sample.name))

    # If sample does not have "ctrl" attribute, finish processing it.
    if not hasattr(sample, "compare_sample"):
        pipe_manager.stop_pipeline()
        print("Finished processing sample '{}'.".format(sample.name))
        return

    # The pipeline will now wait for the comparison sample file to be completed
    pipe_manager._wait_for_file(
        sample.filtered.replace(sample.name, sample.compare_sample))

    # Start peak calling function
    call_peaks(sample, pipe_manager, args)
Example #6
0
 def test_AttributeDict_representations_smoke(self, data, funcname):
     """ Text representation of base AttributeDict doesn't fail. """
     attrdict = AttributeDict(data)
     getattr(attrdict, funcname).__call__()