Beispiel #1
0
def main(argv):
    options, args = parse_args(argv)
    if not options:
        return 1

    print("Generating %i lane(s) of synthetic reads ...\nDISCLAIMER: For "
          "demonstration of PALEOMIX usage only; not for serious usage!"
          % (options.lanes_num,))

    specimen = Specimen(options, args[0])
    sample = Sample(options, specimen)
    damage = Damage(options, sample)
    library = Library(options, damage)

    for (lnum, lane) in enumerate(library.lanes, start=1):
        fragments = fragment(options.lanes_per_file, lane.sequences)
        for (readsnum, reads) in enumerate(fragments, start=1):
            templ = "%s%s_L%i_R%%s_%02i.fastq.gz" % (args[1], library.barcode,
                                                     lnum, readsnum)

            print("  Writing %s" % (templ % "{Pair}",))
            with gzip.open(templ % 1, "w") as out_1:
                with gzip.open(templ % 2, "w") as out_2:
                    for (name, seq_1, seq_2) in reads:
                        out_1.write("@%s%s/1\n%s\n" % (library.barcode, name, seq_1))
                        out_1.write("+\n%s\n" % ("I" * len(seq_1),))
                        out_2.write("@%s%s/2\n%s\n" % (library.barcode, name, seq_2))
                        out_2.write("+\n%s\n" % ("H" * len(seq_2),))
Beispiel #2
0
    def _run(self, _config, temp):
        msa = read_msa(self._input_file)
        for excluded_group in self._excluded:
            msa.pop(excluded_group)

        lines = []
        lines.append("  %i %i" % (len(msa), len(msa.itervalues().next())))
        for (name, seq) in sorted(msa.iteritems()):
            lines.append("")
            lines.append(name)

            for line in fragment(60, seq.upper()):
                lines.append(" ".join(fragment(3, line)))

        with open(fileutils.reroot_path(temp, self._output_file), "w") as output:
            output.write("\n".join(lines))
def main(argv):
    options, args = parse_args(argv)
    if not options:
        return 1

    print("Generating %i lane(s) of synthetic reads ...\nDISCLAIMER: For "
          "demonstration of PALEOMIX usage only; not for serious usage!"
          % (options.lanes_num,))

    specimen = Specimen(options, args[0])
    sample = Sample(options, specimen)
    damage = Damage(options, sample)
    library = Library(options, damage)

    for (lnum, lane) in enumerate(library.lanes, start=1):
        fragments = fragment(options.lanes_per_file, lane.sequences)
        for (readsnum, reads) in enumerate(fragments, start=1):
            templ = "%s%s_L%i_R%%s_%02i.fastq.gz" % (args[1], library.barcode,
                                                     lnum, readsnum)

            print("  Writing %s" % (templ % "{Pair}",))
            with gzip.open(templ % 1, "w") as out_1:
                with gzip.open(templ % 2, "w") as out_2:
                    for (name, seq_1, seq_2) in reads:
                        out_1.write("@%s%s/1\n%s\n" % (library.barcode, name, seq_1))
                        out_1.write("+\n%s\n" % ("I" * len(seq_1),))
                        out_2.write("@%s%s/2\n%s\n" % (library.barcode, name, seq_2))
                        out_2.write("+\n%s\n" % ("H" * len(seq_2),))
Beispiel #4
0
    def _run(self, _config, temp):
        msa = read_msa(self._input_file)
        for excluded_group in self._excluded:
            msa.pop(excluded_group)

        lines = []
        lines.append("  %i %i" % (len(msa), len(msa.itervalues().next())))
        for (name, seq) in sorted(msa.iteritems()):
            lines.append("")
            lines.append(name)

            for line in fragment(60, seq.upper()):
                lines.append(" ".join(fragment(3, line)))

        with open(fileutils.reroot_path(temp, self._output_file),
                  "w") as output:
            output.write("\n".join(lines))
Beispiel #5
0
def flush_fasta(sequence):
    """Takes a FASTA sequence as a string, fragments it into lines of exactly
    _FASTA_COLUMNS chars (e.g. 60), and prints all complete lines. The final
    incomplete line (if any) is returned.

    """
    for seq_frag in utilities.fragment(_FASTA_COLUMNS, sequence):
        if len(seq_frag) < _FASTA_COLUMNS:
            return seq_frag
        print(seq_frag)
    return ""
Beispiel #6
0
def flush_fasta(sequence):
    """Takes a FASTA sequence as a string, fragments it into lines of exactly
    _FASTA_COLUMNS chars (e.g. 60), and prints all complete lines. The final
    incomplete line (if any) is returned.

    """
    for seq_frag in utilities.fragment(_FASTA_COLUMNS, sequence):
        if len(seq_frag) < _FASTA_COLUMNS:
            return seq_frag
        print(seq_frag)
    return ""
Beispiel #7
0
    def _run(self, _config, temp):
        fastas = {}
        for (name, filename) in self._infiles.iteritems():
            current_fastas = {}
            for ((name, _meta), sequence) in read_fasta(filename):
                current_fastas[name] = sequence
            fastas[name] = current_fastas
        fastas = list(sorted(fastas.items()))

        for (sequence_name, taxa_map) in sorted(self._sequences.iteritems()):
            lines = []
            for (taxon_name, sequences) in fastas:
                fastaseq = "\n".join(fragment(60, sequences[sequence_name]))
                current_name = taxa_map[taxon_name]

                lines.append(">%s %s\n%s\n" % (taxon_name, current_name, fastaseq))

            filename = os.path.join(temp, sequence_name + ".fasta")
            with open(filename, "w") as fasta:
                fasta.write("".join(lines))
Beispiel #8
0
    def _run(self, _config, temp):
        fastas = {}
        for (name, filename) in self._infiles.iteritems():
            current_fastas = {}
            for ((name, _meta), sequence) in read_fasta(filename):
                current_fastas[name] = sequence
            fastas[name] = current_fastas
        fastas = list(sorted(fastas.items()))

        for (sequence_name, taxa_map) in sorted(self._sequences.iteritems()):
            lines = []
            for (taxon_name, sequences) in fastas:
                fastaseq = "\n".join(fragment(60, sequences[sequence_name]))
                current_name = taxa_map[taxon_name]

                lines.append(">%s %s\n%s\n" %
                             (taxon_name, current_name, fastaseq))

            filename = os.path.join(temp, sequence_name + ".fasta")
            with open(filename, "w") as fasta:
                fasta.write("".join(lines))
Beispiel #9
0
def test_fragment__multiple_fragments_partial():
    assert_equal(list(utils.fragment(3, "abcdefgh")), ["abc", "def", "gh"])
    assert_equal(
        list(utils.fragment(3, list("abcdefgh"))),
        [list("abc"), list("def"), list("gh")])
Beispiel #10
0
def test_fragment__single_fragment():
    assert_equal(list(utils.fragment(3, "abc")), ["abc"])
    assert_equal(list(utils.fragment(3, ["a", "b", "c"])), [["a", "b", "c"]])
Beispiel #11
0
def test_fragment__partial_fragment():
    assert_equal(list(utils.fragment(3, "ab")), ["ab"])
    assert_equal(list(utils.fragment(3, ["a", "b"])), [["a", "b"]])
Beispiel #12
0
def test_fragment__empty():
    assert_equal(list(utils.fragment(5, "")), [])
    assert_equal(list(utils.fragment(5, [])), [])
Beispiel #13
0
 def _format_int(self, value):
     return (",".join(fragment(3, str(value)[::-1])))[::-1]
Beispiel #14
0
def test_fragment__empty():
    assert_equal(list(utils.fragment(5, "")), [])
    assert_equal(list(utils.fragment(5, [])), [])
Beispiel #15
0
def test_fragment__partial_fragment():
    assert_equal(list(utils.fragment(3, "ab")), ["ab"])
    assert_equal(list(utils.fragment(3, ["a", "b"])), [["a", "b"]])
Beispiel #16
0
def test_fragment__single_fragment():
    assert_equal(list(utils.fragment(3, "abc")), ["abc"])
    assert_equal(list(utils.fragment(3, ["a", "b", "c"])), [["a", "b", "c"]])
Beispiel #17
0
def test_fragment__multiple_fragments_partial():
    assert_equal(list(utils.fragment(3, "abcdefgh")), ["abc", "def", "gh"])
    assert_equal(list(utils.fragment(3, list("abcdefgh"))), [list("abc"), list("def"), list("gh")])
Beispiel #18
0
 def _format_int(self, value):
     return (",".join(fragment(3, str(value)[::-1])))[::-1]
Beispiel #19
0
 def __repr__(self):
     """Process a printable FASTA sequence, wrapping long sequences at 60 chars."""
     name = self.name
     if self.meta:
         name = "%s %s" % (name, self.meta)
     return ">%s\n%s\n" % (name, "\n".join(fragment(60, self.sequence)))
Beispiel #20
0
def test_fragment__iterable():
    list(utils.fragment(3, xrange(6)))
Beispiel #21
0
def wrap_fasta(name, sequence):
    """Process a printable FASTA sequence, wrapping long sequences at 60 chars."""
    return ">%s\n%s\n" % (name, "\n".join(fragment(60, sequence)))
Beispiel #22
0
def test_fragment__iterable():
    list(utils.fragment(3, xrange(6)))
Beispiel #23
0
def test_fragment__set():
    list(utils.fragment(3, set(range(6))))
Beispiel #24
0
def test_fragment__set():
    list(utils.fragment(3, set(range(6))))
Beispiel #25
0
def wrap_fasta(name, sequence):
    """Process a printable FASTA sequence, wrapping long sequences at 60 chars."""
    return ">%s\n%s\n" % (name, "\n".join(fragment(60, sequence)))