コード例 #1
0
ファイル: sequences2mali.py プロジェクト: CGATOxford/Optic
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version="%prog version: $Id: sequences2mali.py 2782 2009-09-10 11:40:29Z andreas $", usage=globals()["__doc__"])

    parser.add_option("-i", "--input-format", dest="input_format", type="choice",
                      choices=(
                          "plain", "fasta", "clustal", "stockholm", "phylip"),
                      help="input format of multiple alignment")

    parser.add_option("-o", "--output-format", dest="output_format", type="choice",
                      choices=("plain", "fasta", "stockholm", "phylip"),
                      help="output format of multiple alignment")

    parser.add_option("-m", "--method", dest="method", type="choice",
                      choices=("add",),
                      help="""method to use to build multiple alignment.""")

    parser.add_option("-p", "--parameters", dest="parameters", type="string",
                      help="parameter stack for methods that require one.")

    parser.add_option("-a", "--alignment-method", dest="alignment_method", type="choice",
                      choices=("sw", "nw"),
                      help="alignment_method [%default].")

    parser.set_defaults(
        input_format="fasta",
        output_format="fasta",
        method=None,
        parameters="",
        gop=-10.0,
        gep=-1.0,
        alignment_method="sw",
    )

    (options, args) = E.Start(parser)

    options.parameters = options.parameters.split(",")

    iterator = FastaIterator.iterate(sys.stdin)

    if options.method == "add":

        mali = Mali.Mali()

        mali.readFromFile(
            open(options.parameters[0], "r"), format=options.input_format)
        del options.parameters[0]

        old_length = mali.getLength()

        new_mali = convertMali2Mali(mali)

        if options.alignment_method == "sw":
            alignator = alignlib_lite.py_makeAlignatorFullDP(
                options.gop, options.gep)
        else:
            alignator = alignlib_lite.py_makeAlignatorFullDPGlobal(
                options.gop, options.gep)

        while 1:
            cur_record = iterator.next()
            if cur_record is None:
                break

            map_mali2seq = alignlib_lite.py_makeAlignataVector()

            sequence = alignlib_lite.py_makeSequence(cur_record.sequence)
            profile = alignlib_lite.py_makeProfileFromMali(new_mali)

            if options.loglevel >= 4:
                options.stdlog.write(profile.Write())

            alignator.Align(profile, sequence, map_mali2seq)

            if options.loglevel >= 3:
                options.stdlog.write(map_mali2seq.Write())

            # add sequence to mali
            a = alignlib_lite.py_makeAlignatumFromString(cur_record.sequence)
            a.thisown = 0

            new_mali.addAlignatum(a, map_mali2seq, 1, 1, 1, 1, 1)

            id = cur_record.title
            mali.mIdentifiers.append(id)
            mali.mMali[id] = Mali.AlignedString(id, 0, len(
                cur_record.sequence), new_mali.getRow(new_mali.getWidth() - 1).getString())

        # substitute
        for x in range(old_length):
            mali.mMali[mali.mIdentifiers[x]].mString = new_mali.getRow(
                x).getString()

        mali.writeToFile(sys.stdout, format=options.output_format)

    E.Stop()
コード例 #2
0
ファイル: align_all_vs_all.py プロジェクト: santayana/cgat
def main(argv=None):

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: align_all_vs_all.py 2782 2009-09-10 11:40:29Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-s",
                      "--sequences",
                      dest="filename_sequences",
                      type="string",
                      help="input file with sequences")

    parser.set_defaults(
        filename_sequences=None,
        gop=-10.0,
        gep=-1.0,
    )

    (options, args) = E.Start(parser, add_pipe_options=True)

    if options.filename_sequences:
        infile = open(options.filename_sequences, "r")
    else:
        infile = sys.stdin

    parser = FastaIterator.FastaIterator(infile)

    sequences = []
    while 1:
        cur_record = iterator.next()

        if cur_record is None:
            break
        sequences.append(
            (cur_record.title,
             alignlib_lite.py_makeSequence(re.sub(" ", "",
                                                  cur_record.sequence))))

    if options.filename_sequences:
        infile.close()

    alignator = alignlib_lite.py_makeAlignatorFullDP(options.gop, options.gep)
    map_a2b = alignlib_lite.py_makeAlignataVector()
    nsequences = len(sequences)

    for x in range(0, nsequences - 1):
        for y in range(x + 1, nsequences):
            alignator.Align(sequences[x][1], sequences[y][1], map_a2b)

            row_ali, col_ali = alignlib_lite.py_writeAlignataCompressed(
                map_a2b)

            options.stdout.write(
                "%s\t%s\t%i\t%i\t%i\t%s\t%i\t%i\t%s\t%i\t%i\t%i\t%i\n" %
                (sequences[x][0], sequences[y][0], map_a2b.getScore(),
                 map_a2b.getRowFrom(), map_a2b.getRowTo(), row_ali,
                 map_a2b.getColFrom(), map_a2b.getColTo(), col_ali,
                 map_a2b.getScore(),
                 100 * alignlib_lite.py_calculatePercentIdentity(
                     map_a2b, sequences[x][1], sequences[y][1]),
                 sequences[x][1].getLength(), sequences[y][1].getLength()))

    E.Stop()
コード例 #3
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv == None: argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: sequences2mali.py 2782 2009-09-10 11:40:29Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-i",
                      "--input-format",
                      dest="input_format",
                      type="choice",
                      choices=("plain", "fasta", "clustal", "stockholm",
                               "phylip"),
                      help="input format of multiple alignment")

    parser.add_option("-o",
                      "--output-format",
                      dest="output_format",
                      type="choice",
                      choices=("plain", "fasta", "stockholm", "phylip"),
                      help="output format of multiple alignment")

    parser.add_option("-m",
                      "--method",
                      dest="method",
                      type="choice",
                      choices=("add", ),
                      help="""method to use to build multiple alignment.""")

    parser.add_option("-p",
                      "--parameters",
                      dest="parameters",
                      type="string",
                      help="parameter stack for methods that require one.")

    parser.add_option("-a",
                      "--alignment-method",
                      dest="alignment_method",
                      type="choice",
                      choices=("sw", "nw"),
                      help="alignment_method [%default].")

    parser.set_defaults(
        input_format="fasta",
        output_format="fasta",
        method=None,
        parameters="",
        gop=-10.0,
        gep=-1.0,
        alignment_method="sw",
    )

    (options, args) = E.Start(parser)

    options.parameters = options.parameters.split(",")

    iterator = FastaIterator.iterate(sys.stdin)

    if options.method == "add":

        mali = Mali.Mali()

        mali.readFromFile(open(options.parameters[0], "r"),
                          format=options.input_format)
        del options.parameters[0]

        old_length = mali.getLength()

        new_mali = convertMali2Mali(mali)

        if options.alignment_method == "sw":
            alignator = alignlib_lite.py_makeAlignatorFullDP(
                options.gop, options.gep)
        else:
            alignator = alignlib_lite.py_makeAlignatorFullDPGlobal(
                options.gop, options.gep)

        while 1:
            cur_record = iterator.next()
            if cur_record is None: break

            map_mali2seq = alignlib_lite.py_makeAlignataVector()

            sequence = alignlib_lite.py_makeSequence(cur_record.sequence)
            profile = alignlib_lite.py_makeProfileFromMali(new_mali)

            if options.loglevel >= 4:
                options.stdlog.write(profile.Write())

            alignator.Align(profile, sequence, map_mali2seq)

            if options.loglevel >= 3:
                options.stdlog.write(map_mali2seq.Write())

            ## add sequence to mali
            a = alignlib_lite.py_makeAlignatumFromString(cur_record.sequence)
            a.thisown = 0

            new_mali.addAlignatum(a, map_mali2seq, 1, 1, 1, 1, 1)

            id = cur_record.title
            mali.mIdentifiers.append(id)
            mali.mMali[id] = Mali.AlignedString(
                id, 0, len(cur_record.sequence),
                new_mali.getRow(new_mali.getWidth() - 1).getString())

        # substitute
        for x in range(old_length):
            mali.mMali[mali.mIdentifiers[x]].mString = new_mali.getRow(
                x).getString()

        mali.writeToFile(sys.stdout, format=options.output_format)

    E.Stop()
コード例 #4
0
def main( argv = None ):
    
    if argv == None: argv = sys.argv

    parser = E.OptionParser( version = "%prog version: $Id: align_all_vs_all.py 2782 2009-09-10 11:40:29Z andreas $",
                             usage = globals()["__doc__"] )

    parser.add_option("-s", "--sequences", dest="filename_sequences", type="string",
                      help="input file with sequences"  )

    parser.set_defaults(
        filename_sequences = None,
        gop = -10.0,
        gep = -1.0,
        )

    (options, args) = E.Start( parser, add_pipe_options = True )

    if options.filename_sequences:
        infile = open(options.filename_sequences, "r")
    else:
        infile = sys.stdin

    parser = FastaIterator.FastaIterator( infile )

    sequences = []
    while 1:
        cur_record = iterator.next()
        
        if cur_record is None: break
        sequences.append( (cur_record.title, alignlib_lite.py_makeSequence(re.sub( " ", "", cur_record.sequence)) ) )
    
    if options.filename_sequences:
        infile.close()

    alignator = alignlib_lite.py_makeAlignatorFullDP( options.gop, options.gep )
    map_a2b = alignlib_lite.py_makeAlignataVector()
    nsequences = len(sequences)
    
    for x in range(0,nsequences-1):
        for y in range(x+1, nsequences):
            alignator.Align( sequences[x][1], sequences[y][1], map_a2b)

            row_ali, col_ali = alignlib_lite.py_writeAlignataCompressed( map_a2b )
            
            options.stdout.write( "%s\t%s\t%i\t%i\t%i\t%s\t%i\t%i\t%s\t%i\t%i\t%i\t%i\n" % (\
                sequences[x][0], sequences[y][0],
                map_a2b.getScore(),
                map_a2b.getRowFrom(),
                map_a2b.getRowTo(),
                row_ali,
                map_a2b.getColFrom(),
                map_a2b.getColTo(),
                col_ali,
                map_a2b.getScore(),
                100 * alignlib_lite.py_calculatePercentIdentity( map_a2b, sequences[x][1], sequences[y][1]),
                sequences[x][1].getLength(),
                sequences[y][1].getLength() ))
            

    E.Stop()