Python Metadata.Metadata Examples, basic_modules.metadata.Metadata.Metadata Python Examples

Example #1

0

Show file

File: test_fastq_splitter.py Project: markmcdowall/mg-process-fastq

def test_paired_splitter():
    """
    Function to test paired splitter
    """
    resource_path = os.path.join(os.path.dirname(__file__), "data/")
    fastq_1file = resource_path + "bsSeeker.Mouse.SRR892982_1.fastq"
    fastq_2file = resource_path + "bsSeeker.Mouse.SRR892982_2.fastq"

    fqs_handle = fastq_splitter()
    results = fqs_handle.run(
        {
            "fastq1" : fastq_1file,
            "fastq2" : fastq_2file
        },
        {
            "fastq1": Metadata(
                "data_rnaseq", "fastq", [], None,
                {'assembly' : 'test'}),
            "fastq2": Metadata(
                "data_rnaseq", "fastq", [], None,
                {'assembly' : 'test'})
        },
        {"output" : fastq_1file + ".tar.gz"}
    )

    print("WGBS - PAIRED RESULTS:", results)

    assert os.path.isfile(results[0]["output"]) is True
    assert os.path.getsize(results[0]["output"]) > 0

Example #2

0

Show file

File: test_pipeline_rnaseq.py Project: markmcdowall/mg-process-fastq

def test_rnaseq_pipeline():
    """
    Test case to ensure that the RNA-seq pipeline code works.

    Running the pipeline with the test data from the command line:

    .. code-block:: none

       runcompss                                                         \\
          --lang=python                                                  \\
          --library_path=${HOME}/bin                                     \\
          --pythonpath=/<pyenv_virtenv_dir>/lib/python2.7/site-packages/ \\
          --log_level=debug                                              \\
          process_rnaseq.py                                              \\
             --taxon_id 9606                                             \\
             --genome /<dataset_dir>/Human.GRCh38.fasta                  \\
             --assembly GRCh38                                           \\
             --file /<dataset_dir>/ERR030872_1.fastq                     \\
             --file2 /<dataset_dir>/ERR030872_2.fastq

    """
    resource_path = os.path.join(os.path.dirname(__file__), "data/")

    files = {
        'cdna': resource_path + 'kallisto.Human.GRCh38.fasta',
        'fastq1': resource_path + 'kallisto.Human.ERR030872_1.fastq',
        'fastq2': resource_path + 'kallisto.Human.ERR030872_2.fastq'
    }

    metadata = {
        "cdna":
        Metadata("Assembly", "fasta", files['cdna'], None,
                 {'assembly': 'GCA_000001405.22'}),
        "fastq1":
        Metadata("data_rna_seq", "fastq", files['fastq1'], None,
                 {'assembly': 'GCA_000001405.22'}),
        "fastq2":
        Metadata("data_rna_seq", "fastq", files['fastq2'], None,
                 {'assembly': 'GCA_000001405.22'}),
    }

    files_out = {
        "index": 'tests/data/kallisto.idx',
        "abundance_h5_file": 'tests/data/kallisto.abundance.h5',
        "abundance_tsv_file": 'tests/data/kallisto.abundance.tsv',
        "run_info_file": 'tests/data/kallisto.run_info.json'
    }

    rs_handle = process_rnaseq()
    rs_files, rs_meta = rs_handle.run(files, metadata, files_out)  # pylint: disable=unused-variable

    # Checks that the returned files matches the expected set of results
    assert len(rs_files) == 4

    # Add tests for all files created
    for f_out in rs_files:
        print("RNA SEQ RESULTS FILE:", f_out)
        assert rs_files[f_out] == files_out[f_out]
        assert os.path.isfile(rs_files[f_out]) is True
        assert os.path.getsize(rs_files[f_out]) > 0

Example #3

0

Show file

File: test_bed_functions.py Project: Multiscale-Genomics/mg-process-files

def test_bed_02_indexer():
    """
    Function to test Kallisto indexer
    """
    resource_path = os.path.join(os.path.dirname(__file__), "data/")

    f_check = h5py.File(resource_path + "file_index.hdf5", "a")
    f_check.close()

    input_files = {
        "bed": resource_path + "sample.sorted.bed",
        "chrom_file": resource_path + "chrom_GRCh38.size",
        "hdf5_file": resource_path + "file_index.hdf5"
    }

    output_files = {"bb_file": resource_path + "sample.bb"}

    metadata = {
        "bed":
        Metadata("data_rnaseq", "bed", "test_bed_location", [],
                 {'assembly': 'test'}),
        "hdf5_file":
        Metadata("data_file", "hdf5", "test_location", [], {})
    }

    bs_handle = bedIndexerTool({"bed_type": "bed6+4"})
    bs_handle.run(input_files, metadata, output_files)

    print(resource_path)
    assert os.path.isfile(resource_path + "sample.bb") is True
    assert os.path.getsize(resource_path + "sample.bb") > 0

Example #4

0

Show file

    def run(self, input_files, input_metadata, output_files):
        """
        The main function to run the test_writer tool

        Parameters
        ----------
        input_files : dict
            List of input files - In this case there are no input files required
        input_metadata: dict
            Matching metadata for each of the files, plus any additional data
        output_files : dict
            List of the output files that are to be generated

        Returns
        -------
        output_files : dict
            List of files with a single entry.
        output_metadata : dict
            List of matching metadata for the returned files
        """
	if not output_files["output"]:
            output_files["output"] = self.configuration['execution'] + '/dinamic_name.tsv'

        results = self.test_writer(
            input_files["matrix"],
            input_files["features"],
            output_files["output"],
            output_files["output_tar"]
        )
        results = compss_wait_on(results)

        if results is False:
            logger.fatal("Test Writer: run failed")
            return {}, {}

        output_metadata = {
            "output": Metadata(
                #data_type="<data_type>",
                #file_type="txt",
                file_path=output_files["output"],
                sources=[input_metadata["matrix"].file_path, input_metadata["features"].file_path],
                taxon_id=input_metadata["matrix"].taxon_id,
                meta_data={
                    "tool": "ChAs"
                }
            ),
            "output_tar": Metadata(
                #data_type="<data_type>",
                #file_type="txt",
                file_path=output_files["output"],
                sources=[input_metadata["matrix"].file_path, input_metadata["features"].file_path],
                taxon_id=input_metadata["matrix"].taxon_id,
                meta_data={
                    "tool": "ChAs"
                }
            )
        }

        return (output_files, output_metadata)

Example #5

0

Show file

File: test_pipeline_trimgalore.py Project: markmcdowall/mg-process-fastq

def test_trim_galore_pipeline_02():
    """
    Test case to ensure that the trimgalore pipeline code works
    for paired end data.

    Running the pipeline with the test data from the command line:

    .. code-block:: none

       runcompss                                                         \\
          --lang=python                                                  \\
          --library_path=${HOME}/bin                                     \\
          --pythonpath=/<pyenv_virtenv_dir>/lib/python2.7/site-packages/ \\
          --log_level=debug                                              \\
          process_trim_galore.py                                         \\
             --taxon_id 9606                                             \\
             --fastq1 /<dataset_dir>/bsSeeker.Mouse.SRR892982_1.fastq.gz \\
             --fastq2 /<dataset_dir>/bsSeeker.Mouse.SRR892982_2.fastq.gz

    """
    resource_path = os.path.join(os.path.dirname(__file__), "data/")

    files = {
        'fastq1': resource_path + 'bsSeeker.Mouse.SRR892982_1.fastq.gz',
        'fastq2': resource_path + 'bsSeeker.Mouse.SRR892982_2.fastq.gz'
    }

    metadata = {
        "fastq1": Metadata(
            "data_wgbs", "fastq", files['fastq1'], None,
        ),

        "fastq2": Metadata(
            "data_wgbs", "fastq", files['fastq2'], None,
        )
    }

    files_out = {
        "fastq1_trimmed": 'tests/data/bsSeeker.Mouse.SRR892982_1.trimmed.fastq.gz',
        "fastq2_trimmed": 'tests/data/bsSeeker.Mouse.SRR892982_2.trimmed.fastq.gz',
        "fastq1_report": 'tests/data/bsSeeker.Mouse.SRR892982_1.trimmed.report.txt',
        "fastq2_report": 'tests/data/bsSeeker.Mouse.SRR892982_2.trimmed.report.txt'
    }

    tg_handle = process_trim_galore()
    tg_files, tg_meta = tg_handle.run(files, metadata, files_out)

    # Checks that the returned files matches the expected set of results
    assert len(tg_files) == 2
    print (tg_meta)

    # Add tests for all files created
    for f_out in tg_files:
        print("TRIM GALORE RESULTS FILE:", f_out)
        assert tg_files[f_out] == files_out[f_out]
        assert f_out in tg_meta
        assert os.path.isfile(tg_files[f_out]) is True
        assert os.path.getsize(tg_files[f_out]) > 0

Example #6

0

Show file

def test_idear_pipeline():
    """
    Test case to ensure that the iDEAR pipeline code works.

    Running the pipeline with the test data from the command line:
    """
    resource_path = os.path.join(os.path.dirname(__file__), "data/")

    files = {
        'bsgenome':
        resource_path + "idear.Human.GCA_000001405.22.22.bsgenome.tar.gz",
        'bam_1': resource_path + 'idear.Human.SRR3714775.bam',
        'bam_2': resource_path + 'idear.Human.SRR3714776.bam',
        'bg_bam_1': resource_path + 'idear.Human.SRR3714777.bam',
        'bg_bam_2': resource_path + 'idear.Human.SRR3714778.bam',
    }

    output_files = {"bigwig": resource_path + "idear.Human.Nup98-GFP.bw"}

    metadata = {
        "bsgenome":
        Metadata("data_damid_seq", "bsgenome", [], None, {'assembly': 'test'},
                 9606),
        "bam_1":
        Metadata("data_damid_seq", "bam", [], None, {'assembly': 'test'},
                 9606),
        "bam_2":
        Metadata("data_damid_seq", "bam", [], None, {'assembly': 'test'},
                 9606),
        "bg_bam_1":
        Metadata("data_damid_seq", "bam", [], None, {'assembly': 'test'},
                 9606),
        "bg_bam_2":
        Metadata("data_damid_seq", "bam", [], None, {'assembly': 'test'},
                 9606),
    }

    config_param = {
        "idear_title": "Full genome sequences for H**o sapiens (GRCh38)",
        "idear_description": "Full genome sequences for H**o sapiens (GRCh38)",
        "idear_common_name": "Human",
        "idear_organism": "H**o sapiens",
        "idear_provider": "ENA",
        "idear_release_date": "2013",
        "idear_sample_param": "Nup98",
        "idear_background_param": "GFP",
    }

    damidseq_handle = process_idear(config_param)
    damidseq_files, damidseq_meta = damidseq_handle.run(
        files, metadata, output_files)  # pylint: disable=unused-variable

    print(damidseq_files)

    # Add tests for all files created
    for f_out in damidseq_files:
        assert os.path.isfile(damidseq_files[f_out]) is True
        assert os.path.getsize(damidseq_files[f_out]) > 0

Example #7

0

Show file

def test_bwa_aligner_idamidseq():
    """
    Function to test BWA Aligner
    """
    resource_path = os.path.join(os.path.dirname(__file__), "data/")
    genome_fa = resource_path + "idear.Human.GCA_000001405.22.fasta"
    fastq_files = [
        resource_path + "idear.Human.SRR3714775.fastq",
        resource_path + "idear.Human.SRR3714776.fastq",
        resource_path + "idear.Human.SRR3714777.fastq",
        resource_path + "idear.Human.SRR3714778.fastq"
    ]

    # Unzipped the test data
    for fastq_file in fastq_files:
        with gzip.open(fastq_file + '.gz', 'rb') as fgz_in:
            with open(fastq_file, 'w') as f_out:
                f_out.write(fgz_in.read())

        assert os.path.isfile(fastq_file) is True
        assert os.path.getsize(fastq_file) > 0

    # Run the aligner for each fastq file
    for fastq_file in fastq_files:
        input_files = {
            "genome": genome_fa,
            "index": genome_fa + ".bwa.tar.gz",
            "loc": fastq_file
        }

        output_files = {
            "output": fastq_file.replace(".fastq", ".bam")
        }

        metadata = {
            "genome": Metadata(
                "Assembly", "fasta", genome_fa, None,
                {"assembly": "test"}),
            "index": Metadata(
                "index_bwa", "", [genome_fa],
                {
                    "assembly": "test",
                    "tool": "bwa_indexer"
                }
            ),
            "loc": Metadata(
                "data_damid_seq", "fastq", fastq_file, None,
                {"assembly": "test"}
            )
        }

        bwa_t = bwaAlignerMEMTool()
        bwa_t.run(input_files, metadata, output_files)

        assert os.path.isfile(fastq_file.replace(".fastq", ".bam")) is True
        assert os.path.getsize(fastq_file.replace(".fastq", ".bam")) > 0

Example #8

0

Show file

def test_bwa_aligner_mem_paired():
    """
    Function to test BWA Aligner
    """
    resource_path = os.path.join(os.path.dirname(__file__), "data/")
    genome_fa = resource_path + "bsSeeker.Mouse.GRCm38.fasta"
    fastq_file_1 = resource_path + "bsSeeker.Mouse.SRR892982_1.fastq"
    fastq_file_2 = resource_path + "bsSeeker.Mouse.SRR892982_2.fastq"

    input_files = {
        "genome": genome_fa,
        "index": genome_fa + ".bwa.tar.gz",
        "loc": fastq_file_1,
        "fastq2": fastq_file_2
    }

    output_files = {
        "output": fastq_file_1.replace(".fastq", "_mem.bam")
    }

    metadata = {
        "genome": Metadata(
            "Assembly", "fasta", genome_fa, None,
            {"assembly": "test"}),
        "index": Metadata(
            "index_bwa", "", [genome_fa],
            {
                "assembly": "test",
                "tool": "bwa_indexer"
            }
        ),
        "loc": Metadata(
            "data_wgbs", "fastq", fastq_file_1, None,
            {"assembly": "test"}
        ),
        "fastq2": Metadata(
            "data_wgbs", "fastq", fastq_file_2, None,
            {"assembly": "test"}
        )
    }

    bwa_t = bwaAlignerMEMTool()
    bwa_t.run(input_files, metadata, output_files)

    print(__file__)

    assert os.path.isfile(resource_path + "bsSeeker.Mouse.SRR892982_1_mem.bam") is True
    assert os.path.getsize(resource_path + "bsSeeker.Mouse.SRR892982_1_mem.bam") > 0

    try:
        os.remove(resource_path + "bsSeeker.Mouse.SRR892982_1_mem.bam")
    except OSError, ose:
        print("Error: %s - %s." % (ose.filename, ose.strerror))

Example #9

0

Show file

File: test_bs_seeker_methylation_caller.py Project: markmcdowall/mg-process-fastq

def test_bs_seeker_methylation_caller():
    """
    Test that it is possible to call the methylation called by BS seeker
    """
    resource_path = os.path.join(os.path.dirname(__file__), "data/")
    home = os.path.expanduser('~')

    input_files = {
        "genome": resource_path + "bsSeeker.Mouse.GRCm38.fasta",
        "index": resource_path + "bsSeeker.Mouse.GRCm38.fasta.bt2.tar.gz",
        "bam": resource_path + "bsSeeker.Mouse.SRR892982_1.filtered.bam",
        "bai": resource_path + "bsSeeker.Mouse.SRR892982_1.filtered.bai",
    }

    output_files = {
        "wig_file": resource_path + "bsSeeker.Mouse.SRR892982_1.wig",
        "cgmap_file": resource_path + "bsSeeker.Mouse.SRR892982_1.cgmap",
        "atcgmap_file": resource_path + "bsSeeker.Mouse.SRR892982_1.atcgmap"
    }

    metadata = {
        "genome": Metadata(
            "Assembly", "fasta", input_files["genome"], None,
            {'assembly' : 'test'}),
        "index": Metadata(
            "index_bowtie", "index", input_files["genome"], None,
            {'assembly' : 'test'}),
        "bam": Metadata(
            "data_wgbs", "bam", input_files["bam"], None,
            {'assembly' : 'test'}),
        "bai": Metadata(
            "data_wgbs", "bai", input_files["bai"], None,
            {'assembly' : 'test'}),
    }

    config_param = {
        "aligner" : "bowtie2",
        "aligner_path" : home + "/lib/bowtie2-2.3.4-linux-x86_64",
        "bss_path" : home + "/lib/BSseeker2"
    }

    bsmc = bs_seeker_methylation_caller.bssMethylationCallerTool(config_param)
    bsmc.run(input_files, metadata, output_files)

    assert os.path.isfile(output_files["wig_file"]) is True
    assert os.path.getsize(output_files["wig_file"]) > 0
    assert os.path.isfile(output_files["cgmap_file"]) is True
    assert os.path.getsize(output_files["cgmap_file"]) > 0
    assert os.path.isfile(output_files["atcgmap_file"]) is True
    assert os.path.getsize(output_files["atcgmap_file"]) > 0

Example #10

0

Show file

def test_design():
    """
    Function to test the right generation of CHicago Design files
    """
    path = os.path.join(os.path.dirname(__file__), "data/")

    config_file = {
        "makeDesignFiles_minFragLen": "150",
        "makeDesignFiles_maxFragLen": "40000",
        "makeDesignFiles_maxLBrownEst": "1500000",
        "makeDesignFiles_binsize": "20000",
        "makeDesignFiles_removeb2b": True,
        "makeDesignFiles_removeAdjacent": True,
        "makeDesignFiles_outfilePrefix": path + "test_run_chicago/test",
        #"makeDesignFiles_designDir" : path + "test_run_chicago",
        "makeDesignFiles_rmap": path + "test_run_chicago/test.rmap",
        "makeDesignFiles_baitmap": path + "test_run_chicago/test.baitmap"
    }

    input_files = {
        "RMAP": path + "test_run_chicago/test.rmap",
        "BAITMAP": path + "test_run_chicago/test.baitmap"
    }

    metadata = {
        "RMAP":
        Metadata("data_chicago_input", ".rmap", path + "test_run_chicago",
                 None, {}, 9606),
        "BAITMAP":
        Metadata("data_chicago_input", ".baitmap", path + "test_run_chicago",
                 None, {}, 9606)
    }

    output_files = {
        "nbpb": path + "test_run_chicago/test.nbpb",
        "npb": path + "test_run_chicago/test.npb",
        "poe": path + "test_run_chicago/test.poe"
    }

    design_handle = makeDesignFilesTool(config_file)
    design_handle.run(input_files, metadata, output_files)

    assert os.path.isfile(path + "test_run_chicago/test" + ".nbpb") is True
    assert os.path.getsize(path + "test_run_chicago/test" + ".nbpb") > 0

    assert os.path.isfile(path + "test_run_chicago/test" + ".npb") is True
    assert os.path.getsize(path + "test_run_chicago/test" + ".npb") > 0

    assert os.path.isfile(path + "test_run_chicago/test" + ".poe") is True
    assert os.path.getsize(path + "test_run_chicago/test" + ".poe") > 0

Example #11

0

Show file

File: test_idear.py Project: markmcdowall/mg-process-fastq

def test_idear():
    """
    Function to test forging BSgenomes
    """

    resource_path = os.path.join(os.path.dirname(__file__), "data/")

    input_files = {
        "bsgenome": resource_path + "idear.Human.GCA_000001405.22.22.bsgenome.tar.gz",
        "bam_1": resource_path + "idear.Human.SRR3714775.bam",
        "bam_2": resource_path + "idear.Human.SRR3714776.bam",
        "bg_bam_1": resource_path + "idear.Human.SRR3714777.bam",
        "bg_bam_2": resource_path + "idear.Human.SRR3714778.bam",
    }

    output_files = {
        "bigwig": resource_path + "idear.Human.Nup98-GFP.bw"
    }

    metadata = {
        "bsgenome": Metadata(
            "data_damid_seq", "bsgenome", [], None,
            {'assembly' : 'test'}, 9606),
        "bam_1": Metadata(
            "data_damid_seq", "bam", [], None,
            {'assembly' : 'test'}, 9606),
        "bam_2": Metadata(
            "data_damid_seq", "bam", [], None,
            {'assembly' : 'test'}, 9606),
        "bg_bam_1": Metadata(
            "data_damid_seq", "bam", [], None,
            {'assembly' : 'test'}, 9606),
        "bg_bam_2": Metadata(
            "data_damid_seq", "bam", [], None,
            {'assembly' : 'test'}, 9606),
    }

    config = {
        "idear_common_name": "Human",
        "idear_sample_param": "Nup98",
        "idear_background_param": "GFP"
    }

    idear_handle = idearTool(config)
    idear_handle.run(input_files, metadata, output_files)

    assert os.path.isfile(resource_path + "idear.Human.Nup98-GFP.bw") is True
    assert os.path.getsize(resource_path + "idear.Human.Nup98-GFP.bw") > 0

Example #12

0

Show file

File: test_process_rmap.py Project: Multiscale-Genomics/CHi-C

def test_process_rmap():
    """
    Test for process_rmapBaitmap pipeline.
    This pipeline generate .rmap file,
    input files for CHiCAGO pipeline
    """

    path = os.path.join(os.path.dirname(__file__), "data/")

    configuration = {"renzime": {"HindIII": 'A|AGCTT'}}

    input_files = {
        "genome_fa": path + "test_baitmap/chr21_hg19.fa",
    }

    metadata = {
        "genome_fa":
        Metadata("txt", "fasta", path + "test_baitmap/chr21_hg19.fa", None,
                 9606, ""),
    }

    output_files = {
        "RMAP": path + "test_run_chicago/test.rmap",
        "Rtree_file_dat": path + "test_rmap/rtree_file.dat",
        "Rtree_file_idx": path + "test_rmap/rtree_file.idx",
        "chr_handler": path + "test_baitmap/chr_handler.txt"
    }

    rmap_handle = process_rmap(configuration)
    rmap_handle.run(input_files, metadata, output_files)

    assert os.path.getsize(output_files["Rtree_file_dat"])
    assert os.path.getsize(output_files["Rtree_file_idx"])

Example #13

0

Show file

File: test_pipeline_test.py Project: inab/mg-process_H_randomizer

def test_test_pipeline():
    """
    Test case to ensure that the Genome indexing pipeline code works.

    Running the pipeline with the test data from the command line:

    .. code-block:: none

       pytest tests/test_pipeline_test.py
    """
    resource_path = os.path.join(os.path.dirname(__file__), "data/")

    input_files = {"input": resource_path + "test_input.txt"}

    metadata = {
        "input":
        Metadata("text", "txt", input_files["input"], None,
                 {"assembly": "test"})
    }

    files_out = {
        "output": resource_path + 'test.txt',
    }

    tt_handle = process_H_randomizer()
    tt_files, tt_meta = tt_handle.run(input_files, metadata, files_out)

    # Add tests for all files created
    for f_out in tt_files:
        print("GENOME RESULTS FILE:", f_out)
        assert os.path.isfile(tt_files[f_out]) is True
        assert os.path.getsize(tt_files[f_out]) > 0

Example #14

0

Show file

File: test_bamqc.py Project: rehamFatima/Bam_QC

def test_bamqc():
    """
    Test case to ensure that the testTool works.

    .. code-block:: none

       pytest tests/test_bamqc.py
    """
    resource_path = os.path.join(os.path.dirname(__file__), "data/")
    bam_file = resource_path + "macs2.Human.bam"

    input_files = {"bam": bam_file}

    output_files = {"html": resource_path + "macs2.Human_bamqc.html"}

    metadata = {
        "bam":
        Metadata("data_chip_seq", "bam", bam_file, None, {"assembly": "test"})
    }

    bamqc_handle = bamQC()
    bamqc_handle.run(input_files, metadata, output_files)

    assert os.path.isfile(output_files["html"]) is True
    assert os.path.getsize(output_files["html"]) > 0

Example #15

0

Show file

def test_gem_indexer():
    """
    Test case to ensure that the GEM indexer works.
    """
    resource_path = os.path.join(os.path.dirname(__file__), "data/")
    genome_fa = resource_path + "tb.Human.GCA_000001405.22.fasta"
    with gzip.open(genome_fa + '.gz', 'rb') as fgz_in:
        with open(genome_fa, 'wb') as f_out:
            f_out.write(fgz_in.read())

    genome_gem_idx = resource_path + "tb.Human.GCA_000001405.22.fasta.gem.gz"

    input_files = {"genome": genome_fa}

    output_files = {"index": genome_gem_idx}

    metadata = {
        "genome":
        Metadata("Assembly", "fasta", genome_fa, None, {'assembly': 'test'}),
    }

    print(input_files, output_files)

    gem_it = gemIndexerTool()
    gem_it.run(input_files, metadata, output_files)

    assert os.path.isfile(genome_gem_idx) is True
    assert os.path.getsize(genome_gem_idx) > 0

Example #16

0

Show file

File: test_bowtie_indexer.py Project: markmcdowall/mg-process-fastq

def test_bowtie_indexer_wgbs():
    """
    Test to ensure Bowtie indexer is working for macs data set
    """
    resource_path = os.path.join(os.path.dirname(__file__), "data/")

    genome_fa = resource_path + "bsSeeker.Mouse.GRCm38.fasta"

    input_files = {
        "genome": genome_fa
    }

    output_files = {
        "index": genome_fa + ".bt2.tar.gz"
    }

    metadata = {
        "genome": Metadata(
            "Assembly", "fasta", genome_fa, None,
            {'assembly' : 'test'}),
    }

    bti = bowtie_indexer.bowtieIndexerTool()
    bti.run(input_files, metadata, output_files)

    assert os.path.isfile(output_files["index"]) is True
    assert os.path.getsize(output_files["index"]) > 0

Example #17

0

Show file

File: test_bwa_indexer.py Project: markmcdowall/mg-process-fastq

def test_bwa_indexer_idear():
    """
    Test case to ensure that the BWA indexer works
    """
    resource_path = os.path.join(os.path.dirname(__file__), "data/")
    genome_fa = resource_path + "idear.Human.GCA_000001405.22.fasta"

    input_files = {"genome": genome_fa}

    output_files = {"index": genome_fa + ".bwa.tar.gz"}

    metadata = {
        "genome":
        Metadata("Assembly", "fasta", genome_fa, None, {'assembly': 'test'}),
    }

    print(input_files, output_files)

    bwa_it = bwaIndexerTool()
    bwa_it.run(input_files, metadata, output_files)

    assert os.path.isfile(
        resource_path +
        "idear.Human.GCA_000001405.22.fasta.bwa.tar.gz") is True
    assert os.path.getsize(resource_path +
                           "idear.Human.GCA_000001405.22.fasta.bwa.tar.gz") > 0

Example #18

0

Show file

File: test_rmap_tool.py Project: Multiscale-Genomics/CHi-C

def test_rmap_tool():
    """
    Function to test generation of .rmap input files
    from CHiCAGO
    """

    path = os.path.join(os.path.dirname(__file__), "data/")

    configuration = {"chic_RE_name": "HindIII", "chic_RE_sequence": "A|AGCTT"}

    input_files = {
        "genome_fa": path + "test_baitmap/chr21_hg19.fa",
    }

    metadata = {
        "genome_fa":
        Metadata("txt", "fasta", path + "test_baitmap/chr21_hg19.fa", None,
                 9606, ""),
    }

    output_files = {
        "RMAP": path + "test_run_chicago/test.rmap",
        "Rtree_file_dat": path + "test_rmap/rtree_file.dat",
        "Rtree_file_idx": path + "test_rmap/rtree_file.idx",
        "chr_handler": path + "test_baitmap/chr_handler.txt"
    }

    rmap_handle = makeRmapFile(configuration)
    rmap_handle.run(input_files, metadata, output_files)

    assert os.path.getsize(output_files["Rtree_file_dat"]) > 0
    assert os.path.getsize(output_files["Rtree_file_idx"]) > 0

Example #19

0

Show file

def test_biobambam_chipseq():
    """
    Test case to ensure that BioBamBam works
    """

    resource_path = os.path.join(os.path.dirname(__file__), "data/")

    input_files = {"input": resource_path + "macs2.Human.DRR000150.22_aln.bam"}

    output_files = {
        "output": resource_path + "macs2.Human.DRR000150.22_aln_filtered.bam"
    }

    metadata = {
        "input": Metadata("data_chipseq", "fastq", [], None,
                          {'assembly': 'test'}),
    }

    bbb = biobambam_filter.biobambam()
    bbb.run(input_files, metadata, output_files)

    assert os.path.isfile(resource_path +
                          "macs2.Human.DRR000150.22_aln_filtered.bam") is True
    assert os.path.getsize(resource_path +
                           "macs2.Human.DRR000150.22_aln_filtered.bam") > 0

Example #20

0

Show file

def test_biobambam_idamidseq():
    """
    Test case to ensure that BioBamBam works
    """

    resource_path = os.path.join(os.path.dirname(__file__), "data/")

    bam_files = [
        resource_path + "idear.Human.SRR3714775.bam",
        resource_path + "idear.Human.SRR3714776.bam",
        resource_path + "idear.Human.SRR3714777.bam",
        resource_path + "idear.Human.SRR3714778.bam"
    ]

    for bam_file in bam_files:
        input_files = {"input": bam_file}

        output_files = {"output": bam_file.replace(".bam", "_filtered.bam")}

        metadata = {
            "input":
            Metadata("data_damid_seq", "bam", [], None, {'assembly': 'test'}),
        }

        bbb = biobambam_filter.biobambam()
        bbb.run(input_files, metadata, output_files)

        assert os.path.isfile(bam_file.replace(".bam",
                                               "_filtered.bam")) is True
        assert os.path.getsize(bam_file.replace(".bam", "_filtered.bam")) > 0

Example #21

0

Show file

    def run(self, input_files, metadata, output_files):
        """
        Function that runs and pass the parameters to bam2chicago

        Parameters
        ----------
        input_files : dict
        hicup_outdir_tar : str
        rmapFile : str
        baitmapFile : str

        metadata : dict

        Returns
        -------
        output_files : list
        List of locations for the output files.
        output_metadata : list
        List of matching metadata dict objects
        """
        if os.path.isdir(os.path.split(output_files["chinput"])[0]) is False:
            logger.info("creating output directory")
            os.mkdir(os.path.split(output_files["chinput"])[0])

        folder_name = os.path.split(input_files["hicup_outdir_tar"])[0] + "/"+\
                    "".join(os.path.split(input_files["hicup_outdir_tar"])[1].split(".")[:-1])

        tar = tarfile.open(input_files["hicup_outdir_tar"])
        tar.extractall(
            path="".join(os.path.split(input_files["hicup_outdir_tar"])[0]))
        tar.close()

        bam_file = "".join([
            file_hdl for file_hdl in os.listdir(folder_name)
            if file_hdl.endswith(".bam")
        ])

        path_bam = folder_name + "/" + bam_file

        results = self.bam2chicago(path_bam, input_files["RMAP"],
                                   input_files["BAITMAP"],
                                   output_files["chinput"])

        #results = compss_wait_on(results)

        output_metadata = {
            "chinput":
            Metadata(data_type="CHiC_data",
                     file_type="tar",
                     file_path=output_files["chinput"],
                     sources=[
                         metadata["RMAP"].file_path,
                         metadata["BAITMAP"].file_path,
                         metadata["hicup_outdir_tar"].file_path
                     ],
                     taxon_id=metadata["hicup_outdir_tar"].taxon_id,
                     meta_data={"tool": "bam2chicago_tool"})
        }

        return output_files, output_metadata

Example #22

0

Show file

def test_bs_seeker_filter_02():
    """
    Test that it is possible to call the BSseeker filter
    """
    resource_path = os.path.join(os.path.dirname(__file__), "data/")
    home = os.path.expanduser('~')

    input_files = {"fastq": resource_path + "bsSeeker.Mouse.SRR892982_2.fastq"}

    output_files = {
        "fastq_filtered":
        resource_path + "bsSeeker.Mouse.SRR892982_2.filtered.fastq"
    }

    metadata = {
        "fastq":
        Metadata("data_wgbs", "fastq", input_files["fastq"], None,
                 {'assembly': 'test'})
    }

    config_param = {
        "aligner": "bowtie2",
        "aligner_path": home + "/lib/bowtie2-2.3.4-linux-x86_64",
        "bss_path": home + "/lib/BSseeker2"
    }

    bsi = bs_seeker_filter.filterReadsTool(config_param)
    bsi.run(input_files, metadata, output_files)

    assert os.path.isfile(output_files["fastq_filtered"]) is True
    assert os.path.getsize(output_files["fastq_filtered"]) > 0

Example #23

0

Show file

def test_testTool():
    """
    Test case to ensure that the testTool works.

    .. code-block:: none

       pytest tests/test_tool.py
    """
    resource_path = os.path.join(os.path.dirname(__file__), "data/")

    input_files = {"input": resource_path + "test_input.txt"}

    output_files = {"output": resource_path + "test_output.txt"}

    metadata = {
        "input":
        Metadata("text", "txt", input_files["input"], None,
                 {"assembly": "test"})
    }

    tt_handle = testTool()
    tt_handle.run(input_files, metadata, output_files)

    assert os.path.isfile(output_files["output"]) is True
    assert os.path.getsize(output_files["output"]) > 0

Example #24

0

Show file

def test_bwa_aligner_aln():
    """
    Function to test BWA Aligner
    """
    resource_path = os.path.join(os.path.dirname(__file__), "data/")
    genome_fa = resource_path + "macs2.Human.GCA_000001405.22.fasta"
    fastq_file = resource_path + "macs2.Human.DRR000150.22.fastq"

    input_files = {
        "genome": genome_fa,
        "index": genome_fa + ".bwa.tar.gz",
        "loc": fastq_file
    }

    output_files = {
        "output": fastq_file.replace(".fastq", "_aln.bam")
    }

    metadata = {
        "genome": Metadata(
            "Assembly", "fasta", genome_fa, None,
            {"assembly": "test"}),
        "index": Metadata(
            "index_bwa", "", [genome_fa],
            {
                "assembly": "test",
                "tool": "bwa_indexer"
            }
        ),
        "loc": Metadata(
            "data_chip_seq", "fastq", fastq_file, None,
            {"assembly": "test"}
        )
    }

    bwa_t = bwaAlignerTool()
    bwa_t.run(input_files, metadata, output_files)

    print(__file__)

    assert os.path.isfile(resource_path + "macs2.Human.DRR000150.22_aln.bam") is True
    assert os.path.getsize(resource_path + "macs2.Human.DRR000150.22_aln.bam") > 0

    try:
        os.remove(resource_path + "macs2.Human.DRR000150.22_aln.bam")
    except OSError, ose:
        print("Error: %s - %s." % (ose.filename, ose.strerror))

Example #25

0

Show file

File: test_bs_seeker_aligner.py Project: markmcdowall/mg-process-fastq

def test_bs_seeker_aligner():
    """
    Test to ensure bs-Seeker aligner works
    """
    resource_path = os.path.join(os.path.dirname(__file__), "data/")
    home = os.path.expanduser('~')

    input_files = {
        "genome": resource_path + "bsSeeker.Mouse.GRCm38.fasta",
        "index": resource_path + "bsSeeker.Mouse.GRCm38.fasta.bt2.tar.gz",
        "fastq1": resource_path + "bsSeeker.Mouse.SRR892982_1.filtered.fastq",
        "fastq2": resource_path + "bsSeeker.Mouse.SRR892982_2.filtered.fastq",
    }

    output_files = {
        "bam": resource_path + "bsSeeker.Mouse.SRR892982_1.filtered.bam",
        "bai": resource_path + "bsSeeker.Mouse.SRR892982_1.filtered.bai"
    }

    metadata = {
        "genome":
        Metadata("Assembly", "fasta", input_files["genome"], None,
                 {'assembly': 'test'}),
        "index":
        Metadata("index_bowtie", "index", input_files["genome"], None,
                 {'assembly': 'test'}),
        "fastq1":
        Metadata("data_wgbs", "fastq", input_files["fastq1"], None,
                 {'assembly': 'test'}),
        "fastq2":
        Metadata("data_wgbs", "fastq", input_files["fastq2"], None,
                 {'assembly': 'test'})
    }

    config_param = {
        "aligner": "bowtie2",
        "aligner_path": home + "/lib/bowtie2-2.3.4-linux-x86_64",
        "bss_path": home + "/lib/BSseeker2"
    }

    bsa = bs_seeker_aligner.bssAlignerTool(config_param)
    bsa.run(input_files, metadata, output_files)

    assert os.path.isfile(output_files["bam"]) is True
    assert os.path.getsize(output_files["bam"]) > 0
    assert os.path.isfile(output_files["bai"]) is True
    assert os.path.getsize(output_files["bai"]) > 0

Example #26

0

Show file

def test_bam2chicago():
    """
    Function to test bam2chicago.py
    """
    path = os.path.join(os.path.dirname(__file__), "data/")

    input_files = {
        "RMAP": path + "test_run_chicago/test.rmap",
        "BAITMAP": path + "test_run_chicago/test.baitmap",
        "hicup_outdir_tar": path + "test_hicup/output.tar",
        "fastq1": path + "/test_truncater/SRR3535023_1_chr21_new.fastq",
        "fastq2": path + "/test_truncater/SRR3535023_2_chr21_new.fastq"
    }

    output_files = {
        "chinput": path + "test_bam2chicago_tool/output_chinput.chinput",
        "hicup_outdir_tar": path + "test_hicup/output.tar"
    }

    metadata = {
        "RMAP":
        Metadata("TXT", ".rmap", path + "/h19_chr20and21_chr.rmap", None, {},
                 9606),
        "BAITMAP":
        Metadata("TXT", ".baitmap",
                 path + "/h19_chr20and21.baitmap_4col_chr.txt", None, {},
                 9606),
        "hicup_outdir_tar":
        Metadata(
            "TAR", "CHiC_data", input_files["hicup_outdir_tar"], {
                "fastq1": "SRR3535023_1.fastq",
                "fastq2": "SRR3535023_2.fastq",
                "genome": "human_hg19"
            }, 9606),
        "genome_fa":
        Metadata("TXT", "RG", path + "/h19_chr20and21.baitmap_4col_chr.txt",
                 None, {}, 9606),
    }

    configuration = {"aligner": "tadbit", "execution": path + "test_baitmap"}

    bam2chicago_handle = bam2chicagoTool(configuration)
    bam2chicago_handle.run(input_files, metadata, output_files)

    assert os.path.isfile(output_files["chinput"]) is True
    assert os.path.getsize(output_files["chinput"]) > 0

Example #27

0

Show file

File: inps.py Project: markmcdowall/mg-process-fastq

    def run(self, input_files, input_metadata, output_files):
        """
        The main function to run iNPS for peak calling over a given BAM file
        and matching background BAM file.

        Parameters
        ----------
        input_files : list
            List of input bam file locations where 0 is the bam data file and 1
            is the matching background bam file
        metadata : dict

        Returns
        -------
        output_files : list
            List of locations for the output files.
        output_metadata : list
            List of matching metadata dict objects

        """

        command_params = []

        if "inps_sp_param" in self.configuration:
            command_params = command_params + [
                "--s_p", str(self.configuration["inps_sp_param"])
            ]
        if "inps_pe_max_param" in self.configuration:
            command_params = command_params + [
                "--pe_max",
                str(self.configuration["inps_pe_max_param"])
            ]
        if "inps_pe_min_param" in self.configuration:
            command_params = command_params + [
                "--pe_min",
                str(self.configuration["inps_pe_min_param"])
            ]

        results = self.inps_peak_calling(input_files["bam"],
                                         output_files["bed"], command_params)

        results = compss_wait_on(results)

        output_metadata = {
            "bed":
            Metadata(data_type=input_metadata['bam'].data_type,
                     file_type="BED",
                     file_path=output_files["bed"],
                     sources=[input_metadata["bam"].file_path],
                     taxon_id=input_metadata["bam"].taxon_id,
                     meta_data={
                         "assembly":
                         input_metadata["bam"].meta_data["assembly"],
                         "tool": "inps"
                     })
        }

        return (output_files, output_metadata)

Example #28

0

Show file

    def run(self, input_files, metadata, output_files):
        """
        Function that runs and pass the parameters for
        all the functions

        Parameters
        ----------
        input_files: dict
        metadata: dict
        output_files: dict
        """
        output_dir = os.path.split(output_files["hicup_outdir_tar"])[0]
        if os.path.isdir(output_dir) is False:
            os.mkdir(output_dir)

        if isinstance(self.configuration["hicup_renzyme"], list) is True:
            re_enzyme = ":".join(self.configuration["hicup_renzyme"])
        else:
            re_enzyme = self.configuration["hicup_renzyme"]

        if "renzyme_name2" in self.configuration:

            genome_d = self.digest_genome(self.configuration["genome_name"],
                                          re_enzyme, input_files["genome_fa"],
                                          self.configuration["renzyme_name2"])
        else:
            genome_d = self.digest_genome(self.configuration["genome_name"],
                                          re_enzyme, input_files["genome_fa"],
                                          "enzyme2")

        parameters_hicup = self.get_hicup_params(self.configuration)

        #if os.path.isdir(self.configuration["hicup_outdir"]) is False:
        #    os.mkdir(self.configuration["hicup_outdir"])

        variable = self.hicup_alig_filt(  # pylint: disable=too-many-locals,too-many-arguments
            parameters_hicup, genome_d, input_files["bowtie_gen_idx"],
            input_files["genome_fa"], input_files["fastq1"],
            input_files["fastq2"], output_files["hicup_outdir_tar"])

        os.remove(genome_d)
        #variable = compss_wait_on(variable)

        output_metadata = {
            "hicup_outdir_tar":
            Metadata(data_type="data_CHiC",
                     file_type="TAR",
                     file_path=output_files["hicup_outdir_tar"],
                     sources=[
                         metadata["genome_fa"].file_path,
                         metadata["fastq1"].file_path,
                         metadata["fastq1"].file_path
                     ],
                     taxon_id=metadata["genome_fa"].taxon_id,
                     meta_data={"tool": "hicup_tool"})
        }

        return output_files, output_metadata

Example #29

0

Show file

File: test_process_chicago.py Project: Multiscale-Genomics/CHi-C

def test_process_chicago():
    """
    Test the chicago Pipeline

    Running the chicago pipeline with the test data from the command line
    """

    path = os.path.join(os.path.dirname(__file__), "data/")

    input_files = {
        "chinput": path + "test_run_chicago/data_chicago/GM_rep1.chinput",
        "setting_file":
        path + "test_run_chicago/data_chicago/sGM12878.settingsFile",
        "rmap_chicago":
        path + "test_run_chicago/data_chicago/h19_chr20and21.rmap",
        "baitmap_chicago":
        path + "test_run_chicago/data_chicago/h19_chr20and21.baitmap",
        "nbpb_chicago":
        path + "test_run_chicago/data_chicago/h19_chr20and21.nbpb",
        "poe_chicago":
        path + "test_run_chicago/data_chicago/h19_chr20and21.poe",
    }

    output_files = {
        "output": path + "test_run_chicago/data_chicago/out_run_chicago.tar",
    }

    metadata = {
        "chinput": Metadata("data_chicago", "chinput", [], None, None, 9606)
    }

    config = {
        "chicago_design_dir": path + "/test_run_chicago/data_chicago",
        "chicago_print_memory": "None",
        "chicago_out_prefix": "output_test",
        "chicago_cutoff": "5",
        "chicago_export_format": "washU_text",
        "chicago_export_order": "None",
        "chicago_rda": "None",
        "chicago_save_df_only": "None",
        "chicago_examples_prox_dist": "1e6",
        "chicago_examples_full_range": "None",
        "chicago_en_feat_files": "None",
        "chicago_en_min_dist": "0",
        "chicago_en_max_dist": "1e6",
        "chicago_en_full_cis_range": "None",
        "chicago_en_sample_no": "100",
        "chicago_en_trans": "None",
        "chicago_features_only": "None"
    }

    chicago_handle = process_run_chicago(config)
    chicago_handle.run(input_files, metadata, output_files)

    assert os.path.isfile(output_files["output"]) is True

    assert os.path.getsize(output_files["output"]) > 0

Example #30

0

Show file

def test_bwa_aligner_mnaseseq():
    """
    Function to test BWA Aligner for MNase seq data
    """
    resource_path = os.path.join(os.path.dirname(__file__), "data/")
    genome_fa = resource_path + "inps.Mouse.GRCm38.fasta"
    fastq_file = resource_path + "inps.Mouse.DRR000386.fastq"

    input_files = {
        "genome": genome_fa,
        "index": genome_fa + ".bwa.tar.gz",
        "loc": fastq_file
    }

    output_files = {
        "output": fastq_file.replace(".fastq", ".bam")
    }

    metadata = {
        "genome": Metadata(
            "Assembly", "fasta", genome_fa, None,
            {"assembly": "test"}),
        "index": Metadata(
            "index_bwa", "", [genome_fa],
            {
                "assembly": "test",
                "tool": "bwa_indexer"
            }
        ),
        "loc": Metadata(
            "data_chip_seq", "fastq", fastq_file, None,
            {"assembly": "test"}
        )
    }

    bwa_t = bwaAlignerTool()
    bwa_t.run(input_files, metadata, output_files)

    print(__file__)

    assert os.path.isfile(resource_path + "inps.Mouse.DRR000386.bam") is True
    assert os.path.getsize(resource_path + "inps.Mouse.DRR000386.bam") > 0