Python Forwarder.fromSequence Examples

Programming Language: Python

Namespace/Package Name: pyZipHMM

Class/Type: Forwarder

Method/Function: fromSequence

Examples at hotexamples.com: 2

Python Forwarder.fromSequence - 2 examples found. These are the top rated real world Python examples of pyZipHMM.Forwarder.fromSequence extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

fromDirectory(2)

fromSequence(1)

Frequently Used Methods

fromDirectory (2)

fromSequence (1)

Example #1

Show file

File: prepare-alignments.py Project: pombredanne/coalhmm

def main():
    usage = """%prog [options] <input> <input format> <output dir>

This program reads in an input sequence in any format supported by BioPython
and writes out a preprocessed file ready for use with zipHMM.
Also supports gzipped input files, if the name ends with `.gz`.

Assumption #1: Either the file is a pairwise alignment, or you have provided
exactly two names to the `--names` option.

Assumption #2: The file uses a simple ACGT format (and N/-). Anything else will
be interpreted as N and a warning will be given with all unknown symbols.

Warning: This program uses SeqIO.to_dict to read in the entire alignment, you
may want to split the alignment first if it's very large.
"""

    parser = OptionParser(usage=usage, version="%prog 1.0")

    parser.add_option(
        "--names",
        dest="names",
        type="string",
        default=None,
        help="A comma-separated list of names to use from the source file",
    )
    parser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Print some stuff")

    (options, args) = parser.parse_args()

    if len(args) != 3:
        parser.error("Needs input file, input format and output file")
    in_filename = args.pop(0)
    in_format = args.pop(0)
    output_dirname = args.pop(0)

    assert os.path.exists(in_filename), "Must use an existing input file"
    if in_filename.endswith(".gz"):
        if options.verbose:
            print "Assuming '%s' is a gzipped file." % in_filename
        inf = gzip.open(in_filename)
    else:
        inf = open(in_filename)

    if options.verbose:
        print "Loading data...",
        sys.stdout.flush()
    alignments = SeqIO.to_dict(SeqIO.parse(inf, in_format))
    if options.verbose:
        print "done"

    if options.names:
        names = options.names.split(",")
    else:
        names = list(alignments.keys())
    assert len(names) == 2, "Must be a pairwise alignment."
    if options.verbose:
        print "Assuming pairwise alignment between '%s' and '%s'" % (names[0], names[1])
    srcs = [alignments[name].seq for name in names]

    clean = set("ACGT")
    A = srcs[0]
    B = srcs[1]
    assert len(A) == len(B)
    L = len(A)
    fd, foutname = tempfile.mkstemp()
    if options.verbose:
        print "Writing temp file readable by zipHMM to '%s'..." % (foutname),
        sys.stdout.flush()
    seen = set()
    with os.fdopen(fd, "w", 64 * 1024) as f:
        for i in xrange(L):
            s1, s2 = A[i].upper(), B[i].upper()
            seen.add(s1)
            seen.add(s2)
            if s1 not in clean or s2 not in clean:
                print >> f, 2,
            elif s1 == s2:
                print >> f, 0,
            else:
                print >> f, 1,
    if options.verbose:
        print "done"
    if len(seen - set("ACGTN-")) > 1:
        print >> sys.stderr, "I didn't understand the following symbols form the input sequence: %s" % (
            "".join(list(seen - set("ACGTN-")))
        )
    if options.verbose:
        print "zipHMM  is preprocessing...",
        sys.stdout.flush()
    f = Forwarder.fromSequence(seqFilename=foutname, alphabetSize=3, minNoEvals=500)
    if options.verbose:
        print "done"

    if options.verbose:
        print "Writing zipHMM data to '%s'..." % (output_dirname),
        sys.stdout.flush()
    if not os.path.exists(output_dirname):
        os.makedirs(output_dirname)
    f.writeToDirectory(output_dirname)
    os.rename(foutname, os.path.join(output_dirname, "original_sequence"))
    if options.verbose:
        print "done"

Example #2

Show file

File: prepare-alignments.py Project: tkchafin/coalhmm

def main():
    usage = """%prog [options] <input> <input format> <output dir>

This program reads in an input sequence in any format supported by BioPython
and writes out a preprocessed file ready for use with zipHMM.
Also supports gzipped input files, if the name ends with `.gz`.

Assumption #1: Either the file is a pairwise alignment, or you have provided
exactly two names to the `--names` option.

Assumption #2: The file uses a simple ACGT format (and N/-). Anything else will
be interpreted as N and a warning will be given with all unknown symbols.

Warning: This program uses SeqIO.to_dict to read in the entire alignment, you
may want to split the alignment first if it's very large.
"""

    parser = OptionParser(usage=usage, version="%prog 1.0")

    parser.add_option(
        "--names",
        dest="names",
        type="string",
        default=None,
        help="A comma-separated list of names to use from the source file")
    parser.add_option("-v",
                      "--verbose",
                      dest="verbose",
                      action="store_true",
                      default=False,
                      help="Print some stuff")

    (options, args) = parser.parse_args()

    if len(args) != 3:
        parser.error("Needs input file, input format and output file")
    in_filename = args.pop(0)
    in_format = args.pop(0)
    output_dirname = args.pop(0)

    assert os.path.exists(in_filename), "Must use an existing input file"
    if in_filename.endswith('.gz'):
        if options.verbose:
            print "Assuming '%s' is a gzipped file." % in_filename
        inf = gzip.open(in_filename)
    else:
        inf = open(in_filename)

    if options.verbose:
        print "Loading data...",
        sys.stdout.flush()
    alignments = SeqIO.to_dict(SeqIO.parse(inf, in_format))
    if options.verbose:
        print "done"

    if options.names:
        names = options.names.split(',')
    else:
        names = list(alignments.keys())
    assert len(names) == 2, "Must be a pairwise alignment."
    if options.verbose:
        print "Assuming pairwise alignment between '%s' and '%s'" % (names[0],
                                                                     names[1])
    srcs = [alignments[name].seq for name in names]

    clean = set('ACGT')
    A = srcs[0]
    B = srcs[1]
    assert len(A) == len(B)
    L = len(A)
    fd, foutname = tempfile.mkstemp()
    if options.verbose:
        print "Writing temp file readable by zipHMM to '%s'..." % (foutname),
        sys.stdout.flush()
    seen = set()
    with os.fdopen(fd, 'w', 64 * 1024) as f:
        for i in xrange(L):
            s1, s2 = A[i].upper(), B[i].upper()
            seen.add(s1)
            seen.add(s2)
            if s1 not in clean or s2 not in clean:
                print >> f, 2,
            elif s1 == s2:
                print >> f, 0,
            else:
                print >> f, 1,
    if options.verbose:
        print "done"
    if len(seen - set('ACGTN-')) > 1:
        print >> sys.stderr, "I didn't understand the following symbols form the input sequence: %s" % (
            ''.join(list(seen - set('ACGTN-'))))
    if options.verbose:
        print "zipHMM  is preprocessing...",
        sys.stdout.flush()
    f = Forwarder.fromSequence(seqFilename=foutname,
                               alphabetSize=3,
                               minNoEvals=500)
    if options.verbose:
        print "done"

    if options.verbose:
        print "Writing zipHMM data to '%s'..." % (output_dirname),
        sys.stdout.flush()
    if not os.path.exists(output_dirname):
        os.makedirs(output_dirname)
    f.writeToDirectory(output_dirname)
    os.rename(foutname, os.path.join(output_dirname, 'original_sequence'))
    if options.verbose:
        print "done"