Beispiel #1
0
    def test_single_sample(self, tmpdir, path_proj_conf_file,
                           which_sample_index):
        """ Single Sample is perfectly valid for Project and sheet. """

        # Pull out the values for the current sample.
        values = DATA[which_sample_index]

        # Write the annotations.
        anns_path = os.path.join(tmpdir.strpath, NAME_ANNOTATIONS_FILE)
        with open(anns_path, 'w') as anns_file:
            anns_file.write("{}\n".format(",".join(COLUMNS)))
            anns_file.write("{}\n".format(",".join([str(v) for v in values])))

        # Build the sheet.
        p = Project(path_proj_conf_file)
        sheet = p.build_sheet()

        # It should be a single-row DataFrame.
        assert isinstance(sheet, pd.DataFrame)
        assert 1 == len(sheet)
        assert 1 == p.num_samples

        # There will be additional values added from the Project,
        # but the core data values will have remained the same.
        sample = list(p.samples)[0]
        for attr, exp_val in zip(COLUMNS, values):
            obs_val = getattr(sample, attr)
            try:
                assert exp_val == obs_val
            except AssertionError as e:
                try:
                    assert exp_val == int(obs_val)
                except AssertionError:
                    raise e
    def test_nonexistent_env_settings_file(
            self, tmpdir, minimal_project_conf_path,
            env_config_filepath, envconf_filename):
        """ Project doesn't require default environment settings. """

        # Create name to nonexistent file based on true default file.
        envconf_dirpath, _ = os.path.split(env_config_filepath)
        misnamed_envconf = os.path.join(envconf_dirpath, envconf_filename)

        # Create and add log message handler for expected errors.
        logfile = tmpdir.join("project-error-messages.log").strpath
        expected_error_message_handler = logging.FileHandler(logfile, mode='w')
        expected_error_message_handler.setLevel(logging.ERROR)
        looper.models._LOGGER.handlers.append(expected_error_message_handler)

        # Create Project, expecting to generate error messages.
        project = Project(minimal_project_conf_path,
                          default_compute=misnamed_envconf)

        # Remove the temporary message handler.
        del looper.models._LOGGER.handlers[-1]

        # Ensure nulls for all relevant Project attributes.
        self._assert_null_compute_environment(project)
        # We should have two error messages, describing the exception caught
        # during default environment parsing and that it couldn't be set.
        with open(logfile, 'r') as messages:
            exception_messages = messages.readlines()
        try:
            assert 2 == len(exception_messages)
        except AssertionError:
            print("Exception messages: {}".format(exception_messages))
            raise
 def test_minimal_configuration_name_inference(
         self, tmpdir, minimal_project_conf_path, env_config_filepath):
     """ Project infers name from where its configuration lives. """
     project = Project(minimal_project_conf_path,
                       default_compute=env_config_filepath)
     _, expected_name = os.path.split(tmpdir.strpath)
     assert expected_name == project.name
    def test_no_default_env_settings_provided(
            self, minimal_project_conf_path,
            explicit_null, compute_env_attname):
        """ Project doesn't require default environment settings. """

        kwargs = {"default_compute": None} if explicit_null else {}
        project = Project(minimal_project_conf_path, **kwargs)

        observed_attribute = getattr(project, compute_env_attname)
        expected_attribute = \
                self.default_compute_settings(project)[compute_env_attname]

        if compute_env_attname == "compute":
            # 'compute' refers to a section in the default environment
            # settings file and also to a Project attribute. A Project
            # instance selects just one of the options in the 'compute'
            # section of the file as the value for its 'compute' attribute.
            expected_attribute = expected_attribute["default"]
            observed_attribute = _compute_paths_to_names(observed_attribute)
        elif compute_env_attname == "environment":
            envs_with_reduced_filepaths = \
                    _env_paths_to_names(observed_attribute["compute"])
            observed_attribute = AttributeDict(
                    {"compute": envs_with_reduced_filepaths})

        assert expected_attribute == observed_attribute
Beispiel #5
0
def project(request, tmpdir, env_config_filepath):
    """ Provide requesting test case with a basic Project instance. """

    # Write just the sample names as the annotations.
    annotations_filename = "anns-fill.csv"
    anns_path = tmpdir.join(annotations_filename).strpath
    num_samples = request.getfixturevalue("num_samples")
    df = pd.DataFrame(
        OrderedDict([("sample_name",
                      ["sample{}".format(i) for i in range(num_samples)]),
                     ("data", range(num_samples))]))
    with open(anns_path, 'w') as anns_file:
        df.to_csv(anns_file, sep="\t", index=False)

    # Create the Project config data.
    config_data = {"metadata": {SAMPLE_ANNOTATIONS_KEY: annotations_filename}}
    if request.getfixturevalue(request.cls.CONFIG_DATA_PATHS_HOOK):
        config_data["paths"] = {}
        paths_dest = config_data["paths"]
    else:
        paths_dest = config_data["metadata"]

    # Add the paths data to the Project config.
    for path_name, path in PATH_BY_TYPE.items():
        paths_dest[path_name] = os.path.join(tmpdir.strpath, path)

    # Write the Project config file.
    conf_path = tmpdir.join("proj-conf.yaml").strpath
    with open(conf_path, 'w') as conf_file:
        yaml.safe_dump(config_data, conf_file)

    return Project(conf_path, default_compute=env_config_filepath)
def main():
    args = parse_arguments()

    # Start project object
    prj = Project(args.project_config_file)

    if "trackhubs" not in prj:
        raise ValueError(
            "Project configuration does not have a trackhub section.")
    if "trackhub_dir" not in prj.trackhubs:
        raise ValueError(
            "Project trackhub configuration does not have a trackhub_dir attribute."
        )

    # Setup paths and hub files
    bigwig_dir = os.path.join(prj.trackhubs.trackhub_dir)
    track_hub = os.path.join(bigwig_dir, "hub.txt")
    genomes_hub = os.path.join(bigwig_dir, "genomes.txt")
    open(genomes_hub, 'w').write("")

    # Setup attributes
    proj_name = prj[
        'project_name'] if "project_name" in prj else os.path.basename(
            prj['paths']['output_dir'])
    proj_desc = prj[
        'project_description'] if "project_description" in prj else proj_name
    user_email = prj['email'] if "email" in prj else ""

    # In the future there will be more actions than this
    make_ucsc_trackhub(args, prj, track_hub, bigwig_dir, genomes_hub,
                       proj_name, proj_desc, user_email)

    track_file = os.path.join(bigwig_dir, "igv", "index.html")
    track_url = os.path.join(prj['trackhubs']['url'], "igv")
    make_igv_tracklink(prj, track_file, track_url)
 def test_sample_name_availability(
         self, path_project_conf, path_sample_anns, lazy):
     """ Sample names always available on Project. """
     with open(path_sample_anns, 'r') as anns_file:
         expected_sample_names = \
                 [l.split(",")[0] for l in anns_file.readlines()[1:] if l]
     p = Project(path_project_conf, defer_sample_construction=lazy)
     assert expected_sample_names == list(p.sample_names)
Beispiel #8
0
def test_samples_are_generic(path_anns_file, path_proj_conf_file):
    """ Regardless of protocol, Samples for sheet are generic. """
    # Annotations filepath fixture is also writes that file, so
    # it's needed even though that return value isn't used locally.
    p = Project(path_proj_conf_file)
    assert len(SAMPLE_NAMES) == p.num_samples
    samples = list(p.samples)
    assert p.num_samples == len(samples)
    assert all([Sample is type(s) for s in samples])
 def project(self, tmpdir, minimal_project_conf_path):
     """ Create a Project with base/default environment. """
     # Write base/default environment data to disk.
     env_config_filename = "env-conf.yaml"
     env_config_filepath = tmpdir.join(env_config_filename).strpath
     with open(env_config_filepath, 'w') as env_conf:
         yaml.safe_dump(self.ENVIRONMENT_CONFIG_DATA, env_conf)
     return Project(minimal_project_conf_path,
                       default_compute=env_config_filepath)
Beispiel #10
0
    def test_multiple_samples(self, protocols, path_anns_file,
                              path_proj_conf_file):
        """ Project also processes multiple Sample fine. """

        p = Project(path_proj_conf_file)

        # Total sample count is constant.
        assert len(SAMPLE_NAMES) == sum(1 for _ in p.samples)

        # But the sheet permits filtering to specific protocol(s).
        exp_num_samples = len(SAMPLE_NAMES) if not protocols else \
            sum(sum(1 for l in LIBRARIES if l == p) for p in protocols)
        sheet = p.build_sheet(*protocols)
        assert exp_num_samples == len(sheet)
        if protocols:
            fuzzy_protos = {alpha_cased(p) for p in protocols}
            for _, sample_data in sheet.iterrows():
                assert alpha_cased(sample_data.library) in fuzzy_protos
Beispiel #11
0
def interactive(prj_lines=PROJECT_CONFIG_LINES,
                iface_lines=PIPELINE_INTERFACE_CONFIG_LINES,
                merge_table_lines=MERGE_TABLE_LINES,
                annotation_lines=SAMPLE_ANNOTATION_LINES,
                project_kwargs=None,
                logger_kwargs=None):
    """
    Create Project and PipelineInterface instances from default or given data.

    This is intended to provide easy access to instances of fundamental looper
    object for interactive test-authorship-motivated work in an iPython
    interpreter or Notebook. Test authorship is simplified if we provide
    easy access to viable instances of these objects.

    :param Iterable[str] prj_lines: project config lines
    :param Iterable[str] iface_lines: pipeline interface config lines
    :param Iterable[str] merge_table_lines: lines for a merge table file
    :param Iterable[str] annotation_lines: lines for a sample annotations file
    :param str | int loglevel: level at which to attend to log messages
    :param dict project_kwargs: keyword arguments for Project constructor
    :param dict logger_kwargs: keyword arguments for logging configuration
    :param bool devmode: whether logging should be done in development mode;
        this implies a more verbose level setting and a more information-rich
        template for logging message formatting
    :param str logfile: path to file to which to write logging messages
    :return Project, PipelineInterface: one Project and one PipelineInterface,
    """

    # Establish logging for interactive session.
    looper_logger_kwargs = {"level": "DEBUG"}
    looper_logger_kwargs.update(logger_kwargs or {})
    setup_looper_logger(**looper_logger_kwargs)

    # TODO: don't work with tempfiles once ctors tolerate Iterable.
    dirpath = tempfile.mkdtemp()
    path_conf_file = _write_temp(prj_lines,
                                 dirpath=dirpath,
                                 fname=P_CONFIG_FILENAME)
    path_iface_file = _write_temp(iface_lines,
                                  dirpath=dirpath,
                                  fname="pipeline_interface.yaml")
    path_merge_table_file = _write_temp(merge_table_lines,
                                        dirpath=dirpath,
                                        fname=MERGE_TABLE_FILENAME)
    path_sample_annotation_file = _write_temp(annotation_lines,
                                              dirpath=dirpath,
                                              fname=ANNOTATIONS_FILENAME)

    prj = Project(path_conf_file, **(project_kwargs or {}))
    iface = PipelineInterface(path_iface_file)
    for path in [
            path_conf_file, path_iface_file, path_merge_table_file,
            path_sample_annotation_file
    ]:
        os.unlink(path)
    return prj, iface
    def observed_argstring_elements(
            self, confdata, pipeline, confpath, envpath):
        """
        Write config, build project, and validate argstring for pipeline.
        
        :param dict confdata: project configuration data
        :param str pipeline: name of pipeline for which to build argstring
        :param str confpath: where to write project config file
        :param str envpath: pointer to default environment file
        :return Iterable[str] argstring components
        """
        conf_file_path = _write_project_config(confdata, dirpath=confpath)

        # Subvert requirement for sample annotations file.
        with mock.patch("looper.models.check_sheet"):
            project = Project(conf_file_path, default_compute=envpath)

        argstring = project.get_arg_string(pipeline)
        return argstring.split(" ")
 def test_counting_samples_doesnt_create_samples(
         self, sample_annotation_lines,
         path_project_conf, path_sample_anns):
     """ User can ask about sample count without creating samples. """
     # We're not parameterized in terms of Sample creation laziness here
     # because a piece of the test's essence is Sample collection absence.
     p = Project(path_project_conf, defer_sample_construction=True)
     assert p._samples is None
     expected_sample_count = sum(1 for _ in sample_annotation_lines) - 1
     assert expected_sample_count == p.num_samples
     assert p._samples is None
    def test_lacks_sample_annotations(
            self, project_config_data, env_config_filepath, tmpdir):
        """ Lack of sample annotations precludes Project construction. """

        # Remove sample annotations KV pair from config data for this test.
        del project_config_data["metadata"][SAMPLE_ANNOTATIONS_KEY]

        # Write the config and assert the expected exception for Project ctor.
        conf_path = _write_project_config(
            project_config_data, dirpath=tmpdir.strpath)
        with pytest.raises(_MissingMetadataException):
            Project(conf_path, default_compute=env_config_filepath)
    def create_project(
            self, project_config_data, default_env_path, case_type, dirpath):
        """
        For a test case, determine expectations and create Project instance.
        
        :param dict project_config_data: the actual data to write to the 
            Project configuration file
        :param str default_env_path: path to the default environment config 
            file to pass to Project constructor
        :param str case_type: type of test case to execute; this determines 
            how to specify the derived columns in the config file
        :param str dirpath: path in which to write config file
        :return (Iterable[str], Project): collection of names of derived 
            columns to expect, along with Project instance with which to test
        """

        # Ensure valid parameterization.
        if case_type not in self.DERIVED_COLUMNS_CASE_TYPES:
            raise ValueError(
                "Unexpected derived_columns case type: '{}' (known={})".
                format(case_type, self.DERIVED_COLUMNS_CASE_TYPES))

        # Parameterization specifies expectation and explicit specification.
        expected_derived_columns = copy.copy(Project.DERIVED_COLUMNS_DEFAULT)
        if case_type == "implicit":
            # Negative control; ensure config data lacks derived columns.
            assert "derived_columns" not in project_config_data
        else:
            explicit_derived_columns = \
                    copy.copy(self.ADDITIONAL_DERIVED_COLUMNS)
            expected_derived_columns.extend(self.ADDITIONAL_DERIVED_COLUMNS)
            # Determine explicit inclusion of default derived columns.
            if case_type == "intersection":
                explicit_derived_columns.extend(
                        Project.DERIVED_COLUMNS_DEFAULT)
            project_config_data["derived_columns"] = explicit_derived_columns

        # Write the config and build the Project.
        conf_file_path = _write_project_config(
                project_config_data, dirpath=dirpath)
        with mock.patch("looper.models.check_sheet"):
            project = Project(conf_file_path, default_compute=default_env_path)
        return expected_derived_columns, project
    def test_sample_creation_laziness(
            self, path_project_conf, path_sample_anns, lazy):
        """ Project offers control over whether to create base Sample(s). """

        p = Project(path_project_conf, defer_sample_construction=lazy)

        if lazy:
            # Samples should remain null during lazy Project construction.
            assert p._samples is None

        else:
            # Eager Project construction builds Sample objects.
            assert p._samples is not None
            with open(path_sample_anns, 'r') as anns_file:
                anns_file_lines = anns_file.readlines()

            # Sum excludes the header line.
            num_samples_expected = sum(1 for l in anns_file_lines[1:] if l)
            assert num_samples_expected == len(p._samples)
            assert all([Sample == type(s) for s in p._samples])
 def test_no_merge_table_in_config(
         self, tmpdir, spec_type, lazy, proj_conf_data, path_sample_anns):
     """ Merge table attribute remains null if config lacks merge_table. """
     metadata = proj_conf_data["metadata"]
     try:
         assert "merge_table" in metadata
     except AssertionError:
         print("Project metadata section lacks 'merge_table'")
         print("All config data: {}".format(proj_conf_data))
         print("Config metadata section: {}".format(metadata))
         raise
     if spec_type == "as_null":
         metadata["merge_table"] = None
     elif spec_type == "missing":
         del metadata["merge_table"]
     else:
         raise ValueError("Unknown way to specify no merge table: {}".
                          format(spec_type))
     path_config_file = os.path.join(tmpdir.strpath, "project_config.yaml")
     with open(path_config_file, 'w') as conf_file:
         yaml.safe_dump(proj_conf_data, conf_file)
     p = Project(path_config_file, defer_sample_construction=lazy)
     assert p.merge_table is None
Beispiel #18
0
                sep="\t", header=None, skiprows=1,
                names=["gene_name", "chr", "strand", sample.name]).set_index("gene_name")[sample.name]
        except IOError:
            print("Sample {} is missing.".format(sample.name))
            continue
        # add gene index
        if first:
            expr = pd.DataFrame(counts)
            first = False
        else:
            expr[sample.name] = counts

    return expr


prj = Project(os.path.join("metadata", "config.yaml"))
prj.add_sample_sheet()
prj.paths.results_dir = os.path.join("results")

# get guide annotation
guide_annotation = pd.read_csv(os.path.join("metadata", "guide_annotation.csv"))


# Gather gRNA assignment info across all samples used
for experiment, rows in prj.sheet.df.groupby(['experiment']):
    # merge gRNA data
    reads = pd.DataFrame()
    scores = pd.DataFrame()
    assignment = pd.DataFrame()

    for sample_name in rows["sample_name"]:
Beispiel #19
0
                             "duplets_assignment_overlap.bothlog.svg"),
                bbox_inches="tight")
    plt.close("all")
    sns.jointplot(overlap_others,
                  overlap_assignment,
                  xlim=(-100, overlap_assignment.max() + 100),
                  ylim=(-100, overlap_assignment.max() + 100),
                  alpha=0.1)
    plt.savefig(os.path.join(output_dir,
                             "duplets_assignment_overlap.lims.svg"),
                bbox_inches="tight")
    plt.close("all")


# Start project, add samples
prj = Project(os.path.join("metadata", "config.yaml"))
# only used in older versions of looper
# prj.add_sample_sheet()

# get guide annotation
guide_annotation = pd.read_csv(os.path.join("metadata",
                                            "guide_annotation.csv"))

for sample in [s for s in prj.samples if hasattr(s, "replicate")
               ]:  # [s for s in prj.samples if hasattr(s, "replicate")]
    output_dir = os.path.join(sample.paths.sample_root, "gRNA_assignment")

    # select gRNAs in respective sample library
    sel_guide_annotation = guide_annotation[guide_annotation['library'] ==
                                            sample.grna_library]
Beispiel #20
0
def main():
    parser = ArgumentParser(prog="ngs_analysis_recipe",
                            description="NGS analysis recipe.")
    parser = add_args(parser)
    args = parser.parse_args()
    # args = parser.parse_args('-t ATAC-seq metadata/project_config.yaml'.split(" "))

    # Start project
    print(
        "Starting looper project with project configuration file: '{}'".format(
            args.config_file))
    prj = Project(args.config_file)
    print("Changing directory to project root directory: '{}'.".format(
        prj.metadata.output_dir))
    os.chdir(prj.metadata.output_dir)
    if args.pass_qc:
        print(
            "Filtering samples out which didn't pass QC as specified in sample annotation in column 'pass_qc'"
        )
        prj._samples = [
            s for s in prj._samples
            if s.pass_qc not in ['0', 0, 'False', False]
        ]
    print("Setting location of sample files dependent on sample types.")
    for sample in prj.samples:
        if hasattr(sample, "protocol"):
            sample.library = sample.protocol

        if sample.library in ["ATAC-seq", "ChIP-seq", "ChIPmentation"]:
            sample.mapped = os.path.join(sample.paths.sample_root, "mapped",
                                         sample.name + ".trimmed.bowtie2.bam")
            sample.filtered = os.path.join(
                sample.paths.sample_root, "mapped",
                sample.name + ".trimmed.bowtie2.filtered.bam")
            sample.peaks = os.path.join(sample.paths.sample_root, "peaks",
                                        sample.name + "_peaks.narrowPeak")
        elif sample.library == "RNA-seq":
            sample.bitseq_counts = os.path.join(
                sample.paths.sample_root,
                "bowtie1_{}".format(sample.transcriptome), "bitSeq",
                sample.name + ".counts")

    # ANALYSIS
    if args.data_type is None:
        print(
            "Type of analysis not specified. Will run independent analysis for all types of data in the sample annotation sheet."
        )
        data_types = sorted(list(set([s.library for s in prj.samples])))
        print("Sample data types: '{}'.".format(",".join(data_types)))
    else:
        print(
            "Type of analysis specified. Will run only analysis for samples of type '{}'."
            .format(args.data_type))
        data_types = [args.data_type]
        print("Sample data types: '{}'.".format(",".join(data_types)))
    if args.name is None:
        print(
            "Analysis name not specified, will use name in project configuration file: '{}'."
            .format(prj.project_name))
        args.name = prj.project_name

    for data_type in data_types:
        print("Starting analysis for samples of type: '{}'.".format(data_type))
        samples = [s for s in prj.samples if (s.library == data_type)]
        if len(samples) > 0:
            print("Samples under consideration: '{}'. ".format(",".join(
                [s.name for s in samples])) +
                  "Total of {} samples.".format(len([s.name
                                                     for s in samples])))
        else:
            raise ValueError(
                "There were no valid samples for this analysis type!")

        if data_type in ["ATAC-seq"]:
            print("Initializing ATAC-seq analysis")
            analysis = ATACSeqAnalysis(name=args.name + "_atacseq",
                                       prj=prj,
                                       samples=samples,
                                       results_dir=args.results_dir)
        elif data_type in ["ChIP-seq"]:
            print("Initializing ChIP-seq analysis")
            analysis = ChIPSeqAnalysis(name=args.name + "_chipseq",
                                       prj=prj,
                                       samples=samples,
                                       results_dir=args.results_dir)
        elif data_type in ["RNA-seq"]:
            print("Initializing RNA-seq analysis")
            analysis = RNASeqAnalysis(name=args.name + "_rnaseq",
                                      prj=prj,
                                      samples=samples,
                                      results_dir=args.results_dir)

        if hasattr(prj, "sample_attributes"):
            print(
                "Using sample attributes from project configuration file: '{}'."
                .format(",".join(prj.sample_attributes)))
            sample_attributes = prj.sample_attributes
        else:
            print(
                "Project configuration file does not contain a 'sample_attributes' section."
            )
            print("Sample annotation will be minimal!")
            sample_attributes = ['sample_name']
        if hasattr(prj, "group_attributes"):
            print(
                "Using group attributes from project configuration file: '{}'."
                .format(",".join(prj.group_attributes)))
            group_attributes = prj.group_attributes
        else:
            print(
                "Project configuration file does not contain a 'group_attributes' section."
            )
            print("Group-wise labeling of samples will not be possible!")
            group_attributes = ['sample_name']

        return main_analysis_pipeline(analysis,
                                      data_type=data_type,
                                      sample_attributes=sample_attributes,
                                      plotting_attributes=group_attributes,
                                      alpha=args.alpha,
                                      abs_fold_change=args.abs_fold_change)
 def test_minimal_configuration_doesnt_fail(
         self, minimal_project_conf_path, env_config_filepath):
     """ Project ctor requires minimal config and default environment. """
     Project(config_file=minimal_project_conf_path,
             default_compute=env_config_filepath)
 def test_minimal_configuration_output_dir(
         self, tmpdir, minimal_project_conf_path, env_config_filepath):
     """ Project infers output path from its configuration location. """
     project = Project(minimal_project_conf_path,
                       default_compute=env_config_filepath)
     assert tmpdir.strpath == project.output_dir
Beispiel #23
0
 def test_no_samples(self, protocols, delimiter, path_empty_project):
     """ Lack of Samples is unproblematic for the sheet build. """
     # Regardless of protocol(s), the sheet should be empty.
     p = Project(path_empty_project)
     sheet = p.build_sheet(*protocols)
     assert sheet.empty
 def test_no_samples(self, path_empty_project):
     """ Lack of Samples is unproblematic. """
     p = Project(path_empty_project)
     assert 0 == p.num_samples
     assert [] == list(p.samples)
Beispiel #25
0
 def test_empty_project(self, path_empty_project):
     """ It's unproblematic to create a Project that lacks samples. """
     Project(path_empty_project)
Beispiel #26
0
    interactions_TF.to_csv(os.path.join(foots_dir, label + ".piq.TF-TF_interactions.tsv"), sep="\t", index=False)

    # Filter for TF-TF interactions stronger than 1
    interactions_TF_filtered = interactions_TF[interactions_TF['interaction_score'] >= 1]
    interactions_TF_filtered.to_csv(os.path.join(foots_dir, label + ".piq.TF-TF_interactions.filtered.tsv"), sep="\t", index=False)


# INIT
# Get path configuration
data_dir = os.path.join('.', "data")
scratch_dir = os.path.join("/scratch/users/arendeiro/piq")
results_dir = os.path.join('.', "results")
plots_dir = os.path.join(results_dir, "plots")

# Start project
prj = Project("metadata/project_config.yaml")
prj.add_sample_sheet()

# annotated samples with a few more things:
prj.samples = annotate_samples(prj.samples, prj.sheet.df.columns.tolist())
samples = prj.samples

# MOTIFS FOR PIQ
motifs_file = "data/external/jaspar_human_motifs.txt"
n_motifs = 366

# prepare motifs for footprinting (done once)
cmds = piq_prepare_motifs(motifs_file, n_motifs)
for cmd in cmds:
    cmd2 = tk.slurmHeader("PIQ_preparemotifs", os.path.join("/home/arendeiro/", "piq_preparemotifs.slurm.log"), cpusPerTask=1, queue="shortq")
        interactions_TF['interaction_score'] >= 1]
    interactions_TF_filtered.to_csv(os.path.join(
        foots_dir, label + ".piq.TF-TF_interactions.filtered.tsv"),
                                    sep="\t",
                                    index=False)


# INIT
# Get path configuration
data_dir = os.path.join('.', "data")
scratch_dir = os.path.join("/scratch/users/arendeiro/piq")
results_dir = os.path.join('.', "results")
plots_dir = os.path.join(results_dir, "plots")

# Start project
prj = Project("metadata/project_config.yaml")
prj.add_sample_sheet()

# annotated samples with a few more things:
prj.samples = annotate_samples(prj.samples, prj.sheet.df.columns.tolist())
samples = prj.samples

# MOTIFS FOR PIQ
motifs_file = "data/external/jaspar_human_motifs.txt"
n_motifs = 366

# prepare motifs for footprinting (done once)
cmds = piq_prepare_motifs(motifs_file, n_motifs)
for cmd in cmds:
    cmd2 = tk.slurmHeader("PIQ_preparemotifs",
                          os.path.join("/home/arendeiro/",