Python reroot_pathの例、paleomix.common.fileutils.reroot_path Pythonの例

コード例 #1

0

ファイルを表示

ファイル: pipeline.py プロジェクト: MikkelSchubert/paleomix

def setup_example(config):
    root = os.path.join(config.destination, 'zonkey_pipeline')

    with tarfile.TarFile(config.tablefile) as tar_handle:
        example_files = []
        existing_files = []
        for member in tar_handle.getmembers():
            if os.path.dirname(member.name) == 'examples' and member.isfile():
                example_files.append(member)

                destination = fileutils.reroot_path(root, member.name)
                if os.path.exists(destination):
                    existing_files.append(destination)

        if existing_files:
            print_err("Output files already exist at destination:\n    - %s"
                      % ("\n    - ".join(map(repr, existing_files))))
            return 1
        elif not example_files:
            print_err("Sample database %r does not contain example data; "
                      "cannot proceed." % (config.tablefile,))
            return 1

        if not os.path.exists(root):
            fileutils.make_dirs(root)

        for member in example_files:
            destination = fileutils.reroot_path(root, member.name)
            src_handle = tar_handle.extractfile(member)
            with open(destination, 'w') as out_handle:
                shutil.copyfileobj(src_handle, out_handle)

    print_info("Sucessfully saved example data in %r" % (root,))

    return 0

コード例 #2

0

ファイルを表示

ファイル: formats.py プロジェクト: tmancill/paleomix

 def _teardown(self, _config, temp):
     move_file(reroot_path(temp, self._out_prefix + ".phy"),
               self._out_prefix + ".phy")
     move_file(
         reroot_path(temp, self._out_prefix + ".partitions"),
         self._out_prefix + ".partitions",
     )

コード例 #3

0

ファイルを表示

    def _run(self, _config, temp):
        table = {}
        for filename in self.input_files:
            coverage.read_table(table, filename)

        coverage.write_table(table, reroot_path(temp, self._output_file))
        move_file(reroot_path(temp, self._output_file), self._output_file)

コード例 #4

0

ファイルを表示

def setup_example(config):
    root = os.path.join(config.destination, 'zonkey_pipeline')

    with tarfile.TarFile(config.tablefile) as tar_handle:
        example_files = []
        existing_files = []
        for member in tar_handle.getmembers():
            if os.path.dirname(member.name) == 'examples' and member.isfile():
                example_files.append(member)

                destination = fileutils.reroot_path(root, member.name)
                if os.path.exists(destination):
                    existing_files.append(destination)

        if existing_files:
            print_err("Output files already exist at destination:\n    - %s"
                      % ("\n    - ".join(map(repr, existing_files))))
            return 1
        elif not example_files:
            print_err("Sample database %r does not contain example data; "
                      "cannot proceed." % (config.tablefile,))
            return 1

        if not os.path.exists(root):
            fileutils.make_dirs(root)

        for member in example_files:
            destination = fileutils.reroot_path(root, member.name)
            src_handle = tar_handle.extractfile(member)
            with open(destination, 'w') as out_handle:
                shutil.copyfileobj(src_handle, out_handle)

    print_info("Sucessfully saved example data in %r" % (root,))

    return 0

コード例 #5

0

ファイルを表示

    def _teardown(self, config, temp):
        with open(fileutils.reroot_path(temp, self._params_file), "w") as out:
            out.write("k: %i\n" % (self._param_k, ))
            out.write("m: %i\n" % (self._param_m, ))
            out.write("outgroup: %r\n" % (list(self._param_outgroup), ))

        open(fileutils.reroot_path(temp, self._parameters_hash), "w").close()

        CommandNode._teardown(self, config, temp)

コード例 #6

0

ファイルを表示

ファイル: nuclear.py プロジェクト: muslih14/paleomix

    def _teardown(self, config, temp):
        with open(fileutils.reroot_path(temp, self._params_file), "w") as out:
            out.write("k: %i\n" % (self._param_k,))
            out.write("m: %i\n" % (self._param_m,))
            out.write("outgroup: %r\n" % (list(self._param_outgroup),))

        open(fileutils.reroot_path(temp, self._parameters_hash), "w").close()

        CommandNode._teardown(self, config, temp)

コード例 #7

0

ファイルを表示

    def _teardown(self, config, temp):
        for postfix in ("ALIGNMENT", "PARTITION"):
            filenames = [self._kwargs["TEMP_IN_" + postfix],
                         self._kwargs["TEMP_IN_" + postfix] + ".reduced",
                         self._kwargs["OUT_" + postfix]]

            for (source, destination) in zip(filenames, filenames[1:]):
                source      = fileutils.reroot_path(temp, source)
                destination = fileutils.reroot_path(temp, destination)

                if not os.path.exists(destination):
                    fileutils.copy_file(source, destination)
                os.remove(source)

        CommandNode._teardown(self, config, temp)

コード例 #8

0

ファイルを表示

    def _check_output_files(cls, output_files):
        """Checks dict of output files to nodes for cases where
        multiple nodes create the same output file.

        The directory component of paths are realized in order to
        detect cases where nodes create the same file, but via
        different paths (e.g. due to relative/absolute paths, or
        due to use of symbolic links). Since output files are
        replaced, not modified in place, it is not nessesary to
        compare files themselves."""
        dirpath_cache, real_output_files = {}, {}
        for (filename, nodes) in output_files.items():
            dirpath = os.path.dirname(filename)
            if dirpath not in dirpath_cache:
                dirpath_cache[dirpath] = os.path.realpath(dirpath)

            real_output_file = reroot_path(dirpath_cache[dirpath], filename)
            real_output_files.setdefault(real_output_file, []).extend(nodes)

        for (filename, nodes) in real_output_files.items():
            if len(nodes) > 1:
                nodes = _summarize_nodes(nodes)
                yield (
                    "Multiple nodes create the same (clobber) output-file:\n"
                    "\tFilename: %s\n\tNodes: %s" %
                    (filename, "\n\t       ".join(nodes)))

コード例 #9

0

ファイルを表示

ファイル: nuclear.py プロジェクト: muslih14/paleomix

    def _setup(self, config, temp):
        CommandNode._setup(self, config, temp)

        input_files = [
            self._input_file,
            fileutils.swap_ext(self._input_file, ".bim"),
            fileutils.swap_ext(self._input_file, ".fam"),
        ]

        for filename in input_files:
            basename = os.path.basename(filename)
            os.symlink(os.path.abspath(filename), os.path.join(temp, basename))

        if self._supervised:
            fam_filename = fileutils.swap_ext(self._input_file, ".fam")

            pop_filename = fileutils.swap_ext(fam_filename, ".pop")
            pop_filename = fileutils.reroot_path(temp, pop_filename)

            key = "Group(%i)" % (self._k_groups,)
            with open(fam_filename) as fam_handle:
                with open(pop_filename, "w") as pop_handle:
                    for line in fam_handle:
                        sample, _ = line.split(None, 1)
                        group = self._samples.get(sample, {}).get(key, "-")

                        pop_handle.write("%s\n" % (group,))

コード例 #10

0

ファイルを表示

ファイル: samtools.py プロジェクト: muslih14/paleomix

    def _setup(self, config, temp):
        """See CommandNode._setup."""
        infile = os.path.abspath(self._infile)
        outfile = reroot_path(temp, self._infile)
        os.symlink(infile, outfile)

        CommandNode._setup(self, config, temp)

コード例 #11

0

ファイルを表示

    def _setup(self, config, temp):
        CommandNode._setup(self, config, temp)

        input_files = [
            self._input_file,
            fileutils.swap_ext(self._input_file, ".bim"),
            fileutils.swap_ext(self._input_file, ".fam"),
        ]

        for filename in input_files:
            basename = os.path.basename(filename)
            os.symlink(os.path.abspath(filename), os.path.join(temp, basename))

        if self._supervised:
            fam_filename = fileutils.swap_ext(self._input_file, ".fam")

            pop_filename = fileutils.swap_ext(fam_filename, ".pop")
            pop_filename = fileutils.reroot_path(temp, pop_filename)

            key = "Group(%i)" % (self._k_groups, )
            with open(fam_filename) as fam_handle:
                with open(pop_filename, "w") as pop_handle:
                    for line in fam_handle:
                        sample, _ = line.split(None, 1)
                        group = self._samples.get(sample, {}).get(key, "-")

                        pop_handle.write("%s\n" % (group, ))

コード例 #12

0

ファイルを表示

ファイル: samtools.py プロジェクト: tmancill/paleomix

    def _setup(self, config, temp):
        """See CommandNode._setup."""
        infile = os.path.abspath(self._infile)
        outfile = reroot_path(temp, self._infile)
        os.symlink(infile, outfile)

        CommandNode._setup(self, config, temp)

コード例 #13

0

ファイルを表示

ファイル: mafft.py プロジェクト: muslih14/paleomix

 def _teardown(self, config, temp):
     # Validate output from MAFFT
     output_file = reroot_path(temp, self._output_file)
     try:
         MSA.from_file(output_file)
     except MSAError, error:
         raise NodeError("Invalid MSA produced by MAFFT:\n%s" % (error,))

コード例 #14

0

ファイルを表示

ファイル: nodegraph.py プロジェクト: MikkelSchubert/paleomix

    def _check_output_files(cls, output_files):
        """Checks dict of output files to nodes for cases where
        multiple nodes create the same output file.

        The directory component of paths are realized in order to
        detect cases where nodes create the same file, but via
        different paths (e.g. due to relative/absolute paths, or
        due to use of symbolic links). Since output files are
        replaced, not modified in place, it is not nessesary to
        compare files themselves."""
        dirpath_cache, real_output_files = {}, {}
        for (filename, nodes) in output_files.iteritems():
            dirpath = os.path.dirname(filename)
            if dirpath not in dirpath_cache:
                dirpath_cache[dirpath] = os.path.realpath(dirpath)

            real_output_file = reroot_path(dirpath_cache[dirpath], filename)
            real_output_files.setdefault(real_output_file, []).extend(nodes)

        for (filename, nodes) in real_output_files.iteritems():
            if (len(nodes) > 1):
                nodes = _summarize_nodes(nodes)
                yield "Multiple nodes create the same (clobber) output-file:" \
                      "\n\tFilename: %s\n\tNodes: %s" \
                      % (filename, "\n\t       ".join(nodes))

コード例 #15

0

ファイルを表示

 def _teardown(self, config, temp):
     # Validate output from MAFFT
     output_file = reroot_path(temp, self._output_file)
     try:
         MSA.from_file(output_file)
     except MSAError, error:
         raise NodeError("Invalid MSA produced by MAFFT:\n%s" % (error,))

コード例 #16

0

ファイルを表示

ファイル: examl.py プロジェクト: jelber2/paleomix

    def _teardown(self, config, temp):
        os.remove(os.path.join(temp, "RAxML_info.output"))

        source = os.path.join(temp, "RAxML_parsimonyTree.output.0")
        destination = fileutils.reroot_path(temp, self._output_tree)
        fileutils.move_file(source, destination)

        CommandNode._teardown(self, config, temp)

コード例 #17

0

ファイルを表示

ファイル: examl.py プロジェクト: MikkelSchubert/paleomix

    def _teardown(self, config, temp):
        os.remove(os.path.join(temp, "RAxML_info.output"))

        source      = os.path.join(temp, "RAxML_parsimonyTree.output.0")
        destination = fileutils.reroot_path(temp, self._output_tree)
        fileutils.move_file(source, destination)

        CommandNode._teardown(self, config, temp)

コード例 #18

0

ファイルを表示

ファイル: sequences.py プロジェクト: tmancill/paleomix

    def _run(self, _config, temp):
        alignment = MSA.from_file(self._input_file)
        for (to_filter, groups) in self._filter_by.items():
            alignment = alignment.filter_singletons(to_filter, groups)

        temp_filename = fileutils.reroot_path(temp, self._output_file)
        with open(temp_filename, "w") as handle:
            alignment.to_file(handle)
        fileutils.move_file(temp_filename, self._output_file)

コード例 #19

0

ファイルを表示

ファイル: sequences.py プロジェクト: MikkelSchubert/paleomix

    def _run(self, _config, temp):
        alignment = MSA.from_file(self._input_file)
        for (to_filter, groups) in self._filter_by.iteritems():
            alignment = alignment.filter_singletons(to_filter, groups)

        temp_filename = fileutils.reroot_path(temp, self._output_file)
        with open(temp_filename, "w") as handle:
            alignment.to_file(handle)
        fileutils.move_file(temp_filename, self._output_file)

コード例 #20

0

ファイルを表示

 def _teardown(self, config, temp):
     template   = self._output_template
     bootstraps = self._bootstrap_num
     start      = self._bootstrap_start
     for (src_file, dst_file) in self._bootstraps(template, bootstraps, start):
         src_file = os.path.join(temp, src_file)
         dst_file = fileutils.reroot_path(temp, dst_file)
         fileutils.move_file(src_file, dst_file)
     CommandNode._teardown(self, config, temp)

コード例 #21

0

ファイルを表示

ファイル: paml.py プロジェクト: muslih14/paleomix

 def _run(self, config, temp):
     try:
         CommandNode._run(self, config, temp)
     except NodeError, error:
         if self._command.join() == [1, None]:
             with open(fileutils.reroot_path(temp, "template.stdout")) as handle:
                 lines = handle.readlines()
             if lines and ("Giving up." in lines[-1]):
                 error = NodeError("%s\n\n%s" % (error, lines[-1]))
         raise error

コード例 #22

0

ファイルを表示

ファイル: paml.py プロジェクト: jelber2/paleomix

 def _run(self, config, temp):
     try:
         CommandNode._run(self, config, temp)
     except NodeError, error:
         if self._command.join() == [1, None]:
             with open(fileutils.reroot_path(temp,
                                             "template.stdout")) as handle:
                 lines = handle.readlines()
             if lines and ("Giving up." in lines[-1]):
                 error = NodeError("%s\n\n%s" % (error, lines[-1]))
         raise error

コード例 #23

0

ファイルを表示

ファイル: formats.py プロジェクト: muslih14/paleomix

    def _run(self, _config, temp):
        # Read and check that MSAs share groups
        msas = [MSA.from_file(filename) for filename in sorted(self.input_files)]
        MSA.validate(*msas)

        blocks = []
        for msa in msas:
            blocks.append(sequential_phy(msa, add_flag = self._add_flag))

        with open(reroot_path(temp, self._out_phy), "w") as output:
            output.write("\n\n".join(blocks))

コード例 #24

0

ファイルを表示

ファイル: pipeline.py プロジェクト: tmancill/paleomix

def setup_example(config):
    root = os.path.join(config.destination, "zonkey_pipeline")
    log = logging.getLogger(__name__)
    log.info("Copying example project to %r", root)

    with tarfile.TarFile(config.database.filename) as tar_handle:
        example_files = []
        existing_files = []
        for member in tar_handle.getmembers():
            if os.path.dirname(member.name) == "examples" and member.isfile():
                example_files.append(member)

                destination = fileutils.reroot_path(root, member.name)
                if os.path.exists(destination):
                    existing_files.append(destination)

        if existing_files:
            log.error("Output files already exist at destination:")
            for filename in sorted(existing_files):
                log.error(" - %r", filename)
            return 1
        elif not example_files:
            log.error(
                "Sample database %r does not contain example data; cannot proceed.",
                config.database.filename,
            )
            return 1

        if not os.path.exists(root):
            fileutils.make_dirs(root)

        for member in example_files:
            destination = fileutils.reroot_path(root, member.name)
            src_handle = tar_handle.extractfile(member)
            with open(destination, "wb") as out_handle:
                shutil.copyfileobj(src_handle, out_handle)

    log.info("Sucessfully saved example data in %r", root)

    return 0

コード例 #25

0

ファイルを表示

ファイル: msa.py プロジェクト: tmancill/paleomix

def build_msa_nodes(options, settings, regions, filtering, dependencies):
    if settings["Program"].lower() != "mafft":
        raise RuntimeError("Only MAFFT support has been implemented!")

    sequencedir = os.path.join(options.destination, "alignments",
                               regions["Name"])
    # Run on full set of sequences
    sequences = regions["Sequences"][None]

    node = CollectSequencesNode(
        fasta_files=regions["Genotypes"],
        destination=sequencedir,
        sequences=sequences,
        dependencies=dependencies,
    )

    if settings["Enabled"]:
        fasta_files = {}
        algorithm = settings["MAFFT"]["Algorithm"]
        for sequence in sequences:
            input_file = os.path.join(sequencedir, sequence + ".fasta")
            output_file = os.path.join(sequencedir, sequence + ".afa")

            fasta_files[output_file] = MAFFTNode(
                input_file=input_file,
                output_file=output_file,
                algorithm=algorithm,
                options=settings["MAFFT"],
                dependencies=node,
            )
    else:
        fasta_files = dict((filename, node) for filename in node.output_files)

    if not any(filtering.values()):
        return list(fasta_files.values())

    destination = sequencedir + ".filtered"
    filtering = dict(filtering)
    filtered_nodes = []

    for (filename, node) in fasta_files.items():
        output_filename = fileutils.reroot_path(destination, filename)
        filtered_node = FilterSingletonsNode(
            input_file=filename,
            output_file=output_filename,
            filter_by=filtering,
            dependencies=node,
        )

        filtered_nodes.append(filtered_node)

    return filtered_nodes

コード例 #26

0

ファイルを表示

    def _run(self, _config, temp):
        # Read and check that MSAs share groups
        msas = [
            MSA.from_file(filename) for filename in sorted(self.input_files)
        ]
        MSA.validate(*msas)

        blocks = []
        for msa in msas:
            blocks.append(sequential_phy(msa, add_flag=self._add_flag))

        with open(reroot_path(temp, self._out_phy), "w") as output:
            output.write("\n\n".join(blocks))

コード例 #27

0

ファイルを表示

ファイル: formats.py プロジェクト: muslih14/paleomix

    def _run(self, _config, temp):
        merged_msas = []
        for (name, files_dd) in sorted(self._infiles.iteritems()):
            partitions = files_dd["partitions"]
            msas = dict((key, []) for key in partitions)
            for filename in files_dd["filenames"]:
                msa = MSA.from_file(filename)
                if self._excluded:
                    msa = msa.exclude(self._excluded)

                for (key, msa_part) in msa.split(partitions).iteritems():
                    msas[key].append(msa_part)

            msas.pop("X", None)
            for (key, msa_parts) in sorted(msas.iteritems()):
                merged_msa = MSA.join(*msa_parts)
                if self._reduce:
                    merged_msa = merged_msa.reduce()

                if merged_msa is not None:
                    merged_msas.append(("%s_%s" % (name, key),
                                        merged_msa))

        out_fname_phy = reroot_path(temp, self._out_prefix + ".phy")
        with open(out_fname_phy, "w") as output_phy:
            final_msa = MSA.join(*(msa for (_, msa) in merged_msas))
            output_phy.write(interleaved_phy(final_msa))

        partition_end = 0
        out_fname_parts = reroot_path(temp, self._out_prefix + ".partitions")
        with open(out_fname_parts, "w") as output_part:
            for (name, msa) in merged_msas:
                length = msa.seqlen()
                output_part.write("DNA, %s = %i-%i\n"
                                  % (name,
                                     partition_end + 1,
                                     partition_end + length))
                partition_end += length

コード例 #28

0

ファイルを表示

ファイル: formats.py プロジェクト: tmancill/paleomix

    def _run(self, _config, temp):
        merged_msas = []
        for (name, files_dd) in sorted(self._infiles.items()):
            partitions = files_dd["partitions"]
            msas = dict((key, []) for key in partitions)
            for filename in files_dd["filenames"]:
                msa = MSA.from_file(filename)
                if self._excluded:
                    msa = msa.exclude(self._excluded)

                for (key, msa_part) in msa.split(partitions).items():
                    msas[key].append(msa_part)

            msas.pop("X", None)
            for (key, msa_parts) in sorted(msas.items()):
                merged_msa = MSA.join(*msa_parts)
                if self._reduce:
                    merged_msa = merged_msa.reduce()

                if merged_msa is not None:
                    merged_msas.append(("%s_%s" % (name, key), merged_msa))

        out_fname_phy = reroot_path(temp, self._out_prefix + ".phy")
        with open(out_fname_phy, "w") as output_phy:
            final_msa = MSA.join(*(msa for (_, msa) in merged_msas))
            output_phy.write(interleaved_phy(final_msa))

        partition_end = 0
        out_fname_parts = reroot_path(temp, self._out_prefix + ".partitions")
        with open(out_fname_parts, "w") as output_part:
            for (name, msa) in merged_msas:
                length = msa.seqlen()
                output_part.write(
                    "DNA, %s = %i-%i\n" %
                    (name, partition_end + 1, partition_end + length))
                partition_end += length

コード例 #29

0

ファイルを表示

ファイル: misc.py プロジェクト: muslih14/paleomix

    def __init__(self, description, destination, source_nodes):
        source_nodes = safe_coerce_to_tuple(source_nodes)

        input_files  = []
        for source_node in source_nodes:
            input_files.extend(source_node.output_files)

        output_files = [reroot_path(destination, fpath) for fpath in input_files]
        self._files  = zip(input_files, output_files)

        Node.__init__(self,
                      description  = "<Copy %s output to %r>" % (description, destination),
                      input_files  = input_files,
                      output_files = output_files,
                      dependencies = source_nodes)

コード例 #30

0

ファイルを表示

ファイル: common.py プロジェクト: jelber2/paleomix

    def _run(self, config, temp):
        output_file, = self.output_files
        samples = self._samples
        groups = set(sample["Group(3)"] for sample in samples.itervalues())
        colors = dict(zip(groups, _DEFAULT_COLORS))

        with open(fileutils.reroot_path(temp, output_file), "w") as handle:
            handle.write("Name\tGroup\tColor\n")

            for name, sample in sorted(samples.iteritems()):
                group = sample["Group(3)"]
                color = colors[group]

                handle.write("%s\t%s\t%s\n" % (name, group, color))

            handle.write("Sample\t-\t#000000\n")

コード例 #31

0

ファイルを表示

ファイル: common.py プロジェクト: muslih14/paleomix

    def _run(self, config, temp):
        output_file, = self.output_files
        samples = self._samples
        groups = set(sample["Group(3)"] for sample in samples.itervalues())
        colors = dict(zip(groups, _DEFAULT_COLORS))

        with open(fileutils.reroot_path(temp, output_file), "w") as handle:
            handle.write("Name\tGroup\tColor\n")

            for name, sample in sorted(samples.iteritems()):
                group = sample["Group(3)"]
                color = colors[group]

                handle.write("%s\t%s\t%s\n" % (name, group, color))

            handle.write("Sample\t-\t#000000\n")

コード例 #32

0

ファイルを表示

ファイル: msa.py プロジェクト: MikkelSchubert/paleomix

def build_msa_nodes(options, settings, regions, filtering, dependencies):
    if settings["Program"].lower() != "mafft":
        raise RuntimeError("Only MAFFT support has been implemented!")

    sequencedir = os.path.join(options.destination, "alignments", regions["Name"])
    # Run on full set of sequences
    sequences = regions["Sequences"][None]

    node = CollectSequencesNode(fasta_files=regions["Genotypes"],
                                destination=sequencedir,
                                sequences=sequences,
                                dependencies=dependencies)

    if settings["Enabled"]:
        fasta_files = {}
        algorithm = settings["MAFFT"]["Algorithm"]
        for sequence in sequences:
            input_file = os.path.join(sequencedir, sequence + ".fasta")
            output_file = os.path.join(sequencedir, sequence + ".afa")

            mafft = MAFFTNode.customize(input_file=input_file,
                                        output_file=output_file,
                                        algorithm=algorithm,
                                        dependencies=node)
            apply_options(mafft.command, settings["MAFFT"])
            fasta_files[output_file] = mafft.build_node()
    else:
        fasta_files = dict((filename, node) for filename in node.output_files)

    if not any(filtering.itervalues()):
        return fasta_files.values()

    destination = sequencedir + ".filtered"
    filtering = dict(filtering)
    filtered_nodes = []

    for (filename, node) in fasta_files.iteritems():
        output_filename = fileutils.reroot_path(destination, filename)
        filtered_node = FilterSingletonsNode(input_file=filename,
                                             output_file=output_filename,
                                             filter_by=filtering,
                                             dependencies=node)

        filtered_nodes.append(filtered_node)

    return filtered_nodes

コード例 #33

0

ファイルを表示

    def _run(self, config, temp):
        likelihoods = []
        for fileset in self._files:
            for filename in fileset:
                if filename.endswith(".log"):
                    likelihoods.append(
                        (self._read_admixture_log(filename), fileset))
                    break
            else:
                raise NodeError("No log-file found in list of admixture "
                                "output-files: %r" % (fileset, ))

        _, fileset = max(likelihoods)
        for src_filename in fileset:
            dst_filename = fileutils.reroot_path(self._output_root,
                                                 src_filename)
            fileutils.copy_file(src_filename, dst_filename)

コード例 #34

0

ファイルを表示

ファイル: nuclear.py プロジェクト: muslih14/paleomix

    def _run(self, config, temp):
        likelihoods = []
        for fileset in self._files:
            for filename in fileset:
                if filename.endswith(".log"):
                    likelihoods.append((self._read_admixture_log(filename),
                                        fileset))
                    break
            else:
                raise NodeError("No log-file found in list of admixture "
                                "output-files: %r" % (fileset,))

        _, fileset = max(likelihoods)
        for src_filename in fileset:
            dst_filename = fileutils.reroot_path(self._output_root,
                                                 src_filename)
            fileutils.copy_file(src_filename, dst_filename)

コード例 #35

0

ファイルを表示

    def _run(self, config, temp):
        rois = self._stat_areas_of_interest(self._prefixes)
        genomes = self._stat_prefixes(self._prefixes)
        with open(reroot_path(temp, self._output_file), "w") as table:
            table.write("# Command:\n")
            table.write("#     %s\n" % (" ".join(sys.argv)))
            table.write("#\n")
            table.write("# Directory:\n")
            table.write("#     %s\n" % (os.getcwd()))
            table.write("#\n")
            self._write_genomes(table, genomes)
            table.write("#\n")
            self._write_areas_of_interest(table, rois)
            table.write("#\n#\n")

            for roi in rois.values():
                genomes[roi["Label"]] = {"Size": roi["Size"]}
            self._write_tables(table, genomes)

コード例 #36

0

ファイルを表示

    def __init__(self, description, destination, source_nodes):
        source_nodes = safe_coerce_to_tuple(source_nodes)

        input_files = []
        for source_node in source_nodes:
            input_files.extend(source_node.output_files)

        output_files = [
            reroot_path(destination, fpath) for fpath in input_files
        ]
        self._files = zip(input_files, output_files)

        Node.__init__(self,
                      description="<Copy %s output to %r>" %
                      (description, destination),
                      input_files=input_files,
                      output_files=output_files,
                      dependencies=source_nodes)

コード例 #37

0

ファイルを表示

    def _run(self, config, temp):
        (output_file, ) = self.output_files
        samples = self._samples

        group = self._groups[max(self._groups)]
        group_colors = dict(zip(sorted(set(group.values())), _DEFAULT_COLORS))

        with open(fileutils.reroot_path(temp, output_file), "w") as handle:
            handle.write("Name\tGroup\tColor\n")

            for sample_name in sorted(samples):
                group_name = group[sample_name]
                group_color = group_colors[group_name]

                handle.write("%s\t%s\t%s\n" %
                             (sample_name, group_name, group_color))

            handle.write("Sample\t-\t#000000\n")

コード例 #38

0

ファイルを表示

ファイル: formats.py プロジェクト: muslih14/paleomix

    def _run(self, _config, temp):
        end = 0
        partitions = collections.defaultdict(list)
        for (filename, msa) in _read_sequences(self._infiles):
            length = msa.seqlen()
            start, end = end + 1, end + length

            for (group, offsets) in self._get_partition_by(filename):
                if len(offsets) != 3:
                    parts = [("%i-%i\\3" % (start + offset, end)) for offset in offsets]
                else:
                    parts = ["%i-%i" % (start, end)]

                name = "%s_%s" % (self._infiles[filename]["name"], group)
                partitions[name].extend(parts)

        with open(reroot_path(temp, self._out_part), "w") as part_file:
            for (name, parts) in sorted(partitions.items()):
                part_file.writelines("DNA, %s = %s\n" % (name, ", ".join(parts)))

コード例 #39

0

ファイルを表示

ファイル: phylip.py プロジェクト: jelber2/paleomix

    def _run(self, _config, temp):
        if self._seed is not None:
            rng = random.Random(self._seed)
        partitions = _read_partitions(self._input_part)
        header, names, sequences = _read_sequences(self._input_phy)
        bootstraps = self._bootstrap_sequences(sequences, partitions, rng)

        temp_fpath = reroot_path(temp, self._output_phy)
        with open(temp_fpath, "w") as output_phy:
            output_phy.write(header)

            for (name, fragments) in zip(names, bootstraps):
                output_phy.write(name)
                output_phy.write(" ")
                for sequence in fragments:
                    output_phy.write(sequence)
                output_phy.write("\n")

        move_file(temp_fpath, self._output_phy)

コード例 #40

0

ファイルを表示

ファイル: bedtools.py プロジェクト: jelber2/paleomix

    def _run(self, config, temp):
        contigs = {}
        with open(self._fai_file) as handle:
            for line in handle:
                name, length, _ = line.split('\t', 2)
                if name in contigs:
                    raise NodeError('Reference genome contains multiple '
                                    'identically named contigs (%r)!' %
                                    (name, ))

                contigs[name] = int(length)

        with open(reroot_path(temp, self._outfile), 'w') as handle:
            for record in read_bed_file(self._infile, contigs=contigs):
                max_length = contigs[record.contig]
                record.start = max(0, record.start - self._amount)
                record.end = min(record.end + self._amount, max_length)

                handle.write('%s\n' % (record, ))

コード例 #41

0

ファイルを表示

ファイル: phylip.py プロジェクト: muslih14/paleomix

    def _run(self, _config, temp):
        if self._seed is not None:
            rng = random.Random(self._seed)
        partitions = _read_partitions(self._input_part)
        header, names, sequences = _read_sequences(self._input_phy)
        bootstraps = self._bootstrap_sequences(sequences, partitions, rng)

        temp_fpath = reroot_path(temp, self._output_phy)
        with open(temp_fpath, "w") as output_phy:
            output_phy.write(header)

            for (name, fragments) in zip(names, bootstraps):
                output_phy.write(name)
                output_phy.write(" ")
                for sequence in fragments:
                    output_phy.write(sequence)
                output_phy.write("\n")

        move_file(temp_fpath, self._output_phy)

コード例 #42

0

ファイルを表示

ファイル: bedtools.py プロジェクト: MikkelSchubert/paleomix

    def _run(self, config, temp):
        contigs = {}
        with open(self._fai_file) as handle:
            for line in handle:
                name, length, _ = line.split('\t', 2)
                if name in contigs:
                    raise NodeError('Reference genome contains multiple '
                                    'identically named contigs (%r)!'
                                    % (name,))

                contigs[name] = int(length)

        with open(reroot_path(temp, self._outfile), 'w') as handle:
            for record in read_bed_file(self._infile, contigs=contigs):
                max_length = contigs[record.contig]
                record.start = max(0, record.start - self._amount)
                record.end = min(record.end + self._amount, max_length)

                handle.write('%s\n' % (record,))

コード例 #43

0

ファイルを表示

    def _run(self, config, temp):
        contigs = {}
        with open(self._fai_file) as handle:
            for line in handle:
                name, length, _ = line.split("\t", 2)
                if name in contigs:
                    raise NodeError("Reference genome contains multiple "
                                    "identically named contigs (%r)!" %
                                    (name, ))

                contigs[name] = int(length)

        with open(reroot_path(temp, self._outfile), "w") as handle:
            records = list(read_bed_file(self._infile, contigs=contigs))
            pad_bed_records(records=records,
                            padding=self._amount,
                            max_sizes=contigs)

            for record in merge_bed_records(records):
                handle.write("%s\n" % (record, ))

コード例 #44

0

ファイルを表示

    def _run(self, _config, temp):
        end = 0
        partitions = collections.defaultdict(list)
        for (filename, msa) in _read_sequences(self._infiles):
            length = msa.seqlen()
            start, end = end + 1, end + length

            for (group, offsets) in self._get_partition_by(filename):
                if len(offsets) != 3:
                    parts = [("%i-%i\\3" % (start + offset, end))
                             for offset in offsets]
                else:
                    parts = ["%i-%i" % (start, end)]

                name = "%s_%s" % (self._infiles[filename]["name"], group)
                partitions[name].extend(parts)

        with open(reroot_path(temp, self._out_part), "w") as part_file:
            for (name, parts) in sorted(partitions.items()):
                part_file.writelines("DNA, %s = %s\n" %
                                     (name, ", ".join(parts)))

コード例 #45

0

ファイルを表示

ファイル: summary.py プロジェクト: MikkelSchubert/paleomix

    def _run(self, config, temp):
        rois    = self._stat_areas_of_interest(self._prefixes)
        genomes = self._stat_prefixes(self._prefixes)
        with open(reroot_path(temp, self._output_file), "w") as table:
            table.write("# Command:\n")
            table.write("#     %s\n" % (" ".join(sys.argv)),)
            table.write("#\n")
            table.write("# Directory:\n")
            table.write("#     %s\n" % (os.getcwd()),)
            table.write("#\n")
            table.write("# Makefile:\n")
            table.write("#     Filename: %s\n" % (self._makefile["Filename"],))
            table.write("#     SHA1Sum:  %s\n" % (self._makefile["Hash"],))
            table.write("#     MTime:    %s\n" % (self._makefile["MTime"],))
            table.write("#\n")
            self._write_genomes(table, genomes)
            table.write("#\n")
            self._write_areas_of_interest(table, rois)
            table.write("#\n#\n")

            for roi in rois.itervalues():
                genomes[roi["Label"]] = {"Size" : roi["Size"]}
            self._write_tables(table, genomes)

コード例 #46

0

ファイルを表示

ファイル: picard.py プロジェクト: muslih14/paleomix

 def _setup(self, _config, temp):
     os.symlink(self._in_reference, reroot_path(temp, self._in_reference))

コード例 #47

0

ファイルを表示

ファイル: fileutils_test.py プロジェクト: MikkelSchubert/paleomix

def test_reroot_path__rel_rel__w_final_dash():
    assert_equal(reroot_path("etc/apt/", "tmp/sources.list"),
                 "etc/apt/sources.list")

コード例 #48

0

ファイルを表示

ファイル: remap.py プロジェクト: muslih14/paleomix

def main(argv):
    config, args = parse_options(argv)
    if config is None:
        return 1

    # Get default options for bam_pipeline
    bam_config, _ = bam_cfg.parse_config(args, "bam")
    makefiles = bam_pipeline.read_makefiles(bam_config, args)
    # Build .fai files for reference .fasta files
    bam_pipeline.index_references(bam_config, makefiles)

    for makefile in makefiles:
        mkfile_fname = makefile["Statistics"]["Filename"]
        bam_config.destination = os.path.dirname(mkfile_fname)
        tasks = bam_pipeline.build_pipeline_full(bam_config, makefile,
                                                 return_nodes=False)

        make_dirs(config.destination)
        makefile_name = add_postfix(makefile["Statistics"]["Filename"],
                                    config.postfix)
        makefile_path = reroot_path(config.destination, makefile_name)
        if samefile(makefile["Statistics"]["Filename"], makefile_path):
            sys.stderr.write("ERROR: Would overwrite source makefile at %r\n" % (makefile_path,))
            sys.stderr.write("       Please set --destination and/or --output-name-postfix\n")
            sys.stderr.write("       before continuing.\n")
            return 1

        print("Writing makefile", makefile_path)

        found_prefix = False
        for prefix in makefile["Prefixes"]:
            if prefix != config.prefix:
                print("%sSkipping %s" % (_INDENTATION, prefix))
            else:
                found_prefix = True

        if not found_prefix:
            sys.stderr.write("\nERROR:\n")
            sys.stderr.write("Could not find prefix %r in %r! Aborting ...\n"
                             % (config.prefix, mkfile_fname))
            return 1

        with open(makefile_path, "w") as makefile_handle:
            template = bam_mkfile.build_makefile(add_sample_tmpl=False)
            makefile_handle.write(template)
            makefile_handle.write("\n" * 3)

            for target in tasks:
                target_name = add_postfix(target.name, config.postfix)
                print("%sTarget: %s -> %s" % (_INDENTATION,
                                              target.name,
                                              target_name))

                makefile_handle.write('%s"%s":\n' % (_INDENTATION * 0,
                                                     target_name))
                for prefix in target.prefixes:
                    if prefix.name != config.prefix:
                        continue

                    for sample in prefix.samples:
                        print("%sSample: %s" % (_INDENTATION * 2, sample.name))

                        makefile_handle.write('%s"%s":\n' % (_INDENTATION * 1,
                                                             sample.name))

                        for library in sample.libraries:
                            print("%sLibrary: %s" % (_INDENTATION * 3,
                                                     library.name))
                            makefile_handle.write('%s"%s":\n'
                                                  % (_INDENTATION * 2,
                                                     library.name))

                            sink_cache = {}
                            destination = os.path.join(target_name,
                                                       "reads",
                                                       sample.name,
                                                       library.name)

                            for lane in library.lanes:
                                convert_reads(config, destination, lane, sink_cache)
                            ReadSink.close_all_sinks()

                            for lane_name in sorted(sink_cache):
                                makefile_handle.write('%s"%s":\n' % (_INDENTATION * 3, lane_name))
                                for (reads_type, sink) in sorted(sink_cache[lane_name].items()):
                                    makefile_handle.write('%s%s "%s"\n'
                                                          % (_INDENTATION * 4,
                                                             ("%s:" % (reads_type,)).ljust(20),
                                                             sink.filename))
                                makefile_handle.write("\n")
        print("\tDone ...")
        print()

    return 0

コード例 #49

0

ファイルを表示

 def _setup(self, _config, temp):
     # Ensure that Picard CreateSequenceDict cannot reuse any existing
     # sequence dictionaries, if the underlying files have changed.
     os.symlink(self._in_reference, reroot_path(temp, self._in_reference))

コード例 #50

0

ファイルを表示

 def _teardown(self, _config, temp):
     move_file(reroot_path(temp, self._output_file), self._output_file)

コード例 #51

0

ファイルを表示

ファイル: samtools.py プロジェクト: tmancill/paleomix

    def _teardown(self, config, temp):
        """See CommandNode._teardown."""
        os.remove(reroot_path(temp, self._infile))

        CommandNode._teardown(self, config, temp)

コード例 #52

0

ファイルを表示

ファイル: samtools.py プロジェクト: muslih14/paleomix

    def _teardown(self, config, temp):
        """See CommandNode._teardown."""
        os.remove(reroot_path(temp, self._infile))

        CommandNode._teardown(self, config, temp)

コード例 #53

0

ファイルを表示

ファイル: formats.py プロジェクト: muslih14/paleomix

 def  _teardown(self, _config, temp):
     move_file(reroot_path(temp, self._out_phy), self._out_phy)

コード例 #54

0

ファイルを表示

ファイル: picard.py プロジェクト: MikkelSchubert/paleomix

 def _setup(self, _config, temp):
     # Ensure that Picard CreateSequenceDict cannot reuse any existing
     # sequence dictionaries, if the underlying files have changed.
     os.symlink(self._in_reference, reroot_path(temp, self._in_reference))

コード例 #55

0

ファイルを表示

ファイル: formats.py プロジェクト: muslih14/paleomix

    def _run(self, _config, temp):
        msa = MSA.join(*(MSA.from_file(filename) for filename in sorted(self.input_files)))

        with open(reroot_path(temp, self._out_phy), "w") as output:
            output.write(interleaved_phy(msa, add_flag = self._add_flag))

コード例 #56

0

ファイルを表示

ファイル: sequences.py プロジェクト: MikkelSchubert/paleomix

 def _teardown(self, _config, temp):
     for destination in sorted(self._outfiles):
         source = fileutils.reroot_path(temp, destination)
         fileutils.move_file(source, destination)

コード例 #57

0

ファイルを表示

    def _teardown(self, config, temp):
        (destination, ) = self.output_files
        source = fileutils.reroot_path(temp, destination)

        fileutils.move_file(source, destination)

コード例 #58

0

ファイルを表示

ファイル: formats.py プロジェクト: muslih14/paleomix

 def _teardown(self, _config, temp):
     move_file(reroot_path(temp, self._out_prefix + ".phy"),
               self._out_prefix + ".phy")
     move_file(reroot_path(temp, self._out_prefix + ".partitions"),
               self._out_prefix + ".partitions")