def main(args,maxMismatch=0,seedLen=36):
    for f in args:
        bowtieFile,bowtiePath = mystemp(suffix='.bowtie')
        fStub,fExt = os.path.splitext(f)
        oFile = safeOFW('.'.join((fStub,'Hg18rm',fExt[1:])),'w')
        print os.curdir
        try:
            subprocess.call(['bowtie', '-f','-l',str(seedLen),'-n',str(maxMismatch),
                             hg18db, f, bowtiePath])
            humanQrys =B.readTitleSet(bowtiePath)
            if len(humanQrys)==0:
                raise RuntimeError, "No Alignments found"

            for rec in fasta.generalIterator(f):

                if rec.title in humanQrys:
                    continue
                else:
                    print >> oFile, rec
            oFile.close()

        except BaseException:
            os.unlink(oFile.name)
            print '%s -failed' %f
            raise

        finally:
            os.unlink(bowtiePath)

    return 0
Example #2
0
def main(args):
    for f in args:
        mbrFile,mbrPath = mystemp(suffix='.mbr')
        fStub,fExt = os.path.splitext(f)
        oFile = safeOFW('.'.join((fStub,'Hg18rm',fExt[1:])),'w')
        print os.curdir
        try:
            subprocess.call(['megablast', '-m8', '-i',f,
                             '-d', hg18db,'-o', mbrPath])
            humanQrys =B.m8TitleSet(mbrPath,qrySet=True)
            for rec in fasta.generalIterator(f):
                if len(humanQrys)==0:
                    break
                if rec.title in humanQrys:
                    continue
                else:
                    print >> oFile, rec
            oFile.close()

        except BaseException:
            os.unlink(oFile.name)
            print '%s -failed' %f
            raise

        finally:
            os.unlink(mbrPath)

    return 0
Example #3
0
def main(sysargs=[]):

    # short usage message
    oneLineUsage = "Usage: %prog [options] <input files>"

    # set a long desccription for the help message
    description=("Add reverse complement fasta records to Fasta file(s). "
                 "Use '-' to read from STDIN.")

    op = optparse.OptionParser(
        oneLineUsage,description=description,
        version="%prog " + '.'.join([str(x) for x in __version__]))

    # OPTION DEFINITIONS
    op.add_option('-o','--outputfile',dest="outFileName",default='-',
                  help="name of output file. '-' means: STDOUT")
    op.add_option('-c','--clobber',action="store_true", default=False,
                  dest="clobber", help="allow output file to be overwritten.")


    # all options should be defined above here

    ### Don't Change In Box ###################
    #                                         #
    # OPTION PATCHING                         #
    # add defaults to help messages           # 
    #                                         #
    for o in op.option_list:                  #
        if o.type == None:                    #
            continue                          #
        if o.help == None:                    #
            o.help = "Default: %default"      #
        else:                                 #
            o.help+= " Default: %default"     #
                                              #
                                              #
    #                                         #
    # OPTION PARSING                          #
    (opts,args) = op.parse_args(sysargs)      #
    #                                         #
    # option values are available as attrs    #
    # of the opts object, e.g. opts.myClobber #
    #                                         #
    # args is a list of the balance of the    #
    # command line                            # 
    ###########################################


    # OPTION CHECKING
    try:
        # (check an opt) tileSizes = [int(x) for x in opts.tileSizesL.split(',') if len(x)>0]
        outFile = utils.safeOFW(opts.outFileName,clobber=opts.clobber) # open the output file
    except Exception, eData:
        print >> sys.stderr, ("\nOption Error: %s\n" %eData.message)
        print >> sys.stderr, op.format_help()
        return 1
Example #4
0
 def cluster(self,outFile,log2transform=True):
     """
     """
     if type(outFile) in StringTypes:
         oFile=safeOFW(outFileName)
     else:
         oFile=outFile
     print >> oFile, '\t'.join(['LOCUS','NAME']+self.sampleNames)
     for row in self.sampleIdNameData(log2transform):
         if row[2:].count('') < len(row[2:]):
             print >> oFile, '\t'.join(row) 
     oFile.close()
Example #5
0
def main(sysargs=[]):

    # short usage message
    oneLineUsage = "Usage: %prog [options] <input files>"

    description="""
Checks for binary zeros in files. Exit status is non zero if zeros
are detected. Checking a file is stopped as soon as a zero is found.
Gzip'ed files are accepted, and unzipped on the fly. '-' indicates
that standard input should be checked. 
"""

    op = optparse.OptionParser(
        oneLineUsage,description=description,
        version="%prog " + '.'.join([str(x) for x in __version__]))

    # OPTION DEFINITIONS
    op.add_option('-o','--outputfile',dest="outFileName",default='-',
                  help="name of output file. '-' means: STDOUT")
    op.add_option('-c','--clobber',action="store_true", default=False,
                  dest="clobber", help="allow output file to be overwritten.")
    op.add_option('-a', '--append', action="store_true", default=False,
                  dest="append", help="append to output file..")

    # all options should be defined above here
    # (you don't have to define version or help)

    # add defaults to help messages     
    for o in op.option_list:            
        if o.type == None:              
            continue                    
        if o.help == None:              
            o.help = "Default: %default"
        else:                           
            o.help+= " Default: %default"
                                        
    # OPTION PARSING                    
    (opts,args) = op.parse_args(sysargs)

    # OPTION CHECKING
    try:
        # (check an opt) 
        outFile = utils.safeOFW(opts.outFileName,
                                append=opts.append,clobber=opts.clobber) # open the output file
    except Exception, eData:
        print >> sys.stderr, ("\nOption Error: %s\n" %eData.message)
        print >> sys.stderr, op.format_help()
        return 1
Example #6
0
def main(sysargs=[]):

    # short usage message
    oneLineUsage = "Usage: %prog [options] <input files>"

    # set a long desccription for the help message
    description=None

    op = optparse.OptionParser(
        oneLineUsage,description=description,
        version="%prog " + '.'.join([str(x) for x in __version__]))

    # OPTION DEFINITIONS
    op.add_option('-d','--drop-dup', dest="dropDup",default=False,
                  action="store_true",
                  help="remove duplicate sequences from output")
    op.add_option('-o','--outputfile',dest="outFileName",default='-',
                  help="name of output file. '-' means: STDOUT")
    op.add_option('-c','--clobber',action="store_true", default=False,
                  dest="clobber", help="allow output file to be overwritten.")
    op.add_option('-a', '--append', action="store_true", default=False,
                  dest="append", help="append to output file..")

    # all options should be defined above here
    # (you don't have to define version or help)

    # add defaults to help messages     
    for o in op.option_list:            
        if o.type == None:              
            continue                    
        if o.help == None:              
            o.help = "Default: %default"
        else:                           
            o.help+= " Default: %default"
                                        
    # OPTION PARSING                    
    (opts,args) = op.parse_args(sysargs)

    # OPTION CHECKING
    try:
        # (check an opt) tileSizes = [int(x) for x in opts.tileSizesL.split(',') if len(x)>0]
        outFile = utils.safeOFW(opts.outFileName,
                                append=opts.append,clobber=opts.clobber) # open the output file
    except Exception, eData:
        print >> sys.stderr, ("\nOption Error: %s\n" %eData.message)
        print >> sys.stderr, op.format_help()
        return 1
Example #7
0
def main(sysargs):

    op = optparse.OptionParser("Usage: %prog [options] <fasta files>",
                         version=__version__)
    op.add_option('-t','--tilesizes',dest="tileSizesL", default="60",
                  help="comma seperated list of tile sizes to generate. e.g. '50,70'.")
    op.add_option('-s','--shift',default=25,type='int',dest="shift",
                  help="Start position offset" )
    op.add_option('-c','--clobber',action="store_true", default=False,
                  dest="clobber", help="allow output file to be overwritten.")
    op.add_option('-E','--endtile',action="store_false", default=True,
                  dest="endtile", help="Suppress generation of an tile that covers the"
                  " last base in each recrod regardless of the -s and -t settings.")
    op.add_option('-o','--outputfile',dest="outFileName",default='-',
                  help="name of output file. '-' means: STDOUT")
    op.add_option('-P','--permissiveTitles',action="store_true", default=False,
                  dest="nonGiTitles",
                  help="allow non-gi fasta titles when calculating tile titles")


    for o in op.option_list:
        if o.help == None:
            o.help = "Default: %default"
        else:
            o.help+= " Default: %default"

    
    (opts,args) = op.parse_args(sysargs)

    if opts.nonGiTitles:
        tileTitleCallback = fasta.generalTileTitle
    else:
        tileTitleCallback = fasta.giTileTitle

    try:
        tileSizes = [int(x) for x in opts.tileSizesL.split(',') if len(x)>0]
        outFile = utils.safeOFW(opts.outFileName,clobber=opts.clobber)
    except Exception, eData:
        print >> sys.stderr, ("\nOption Error: %s\n" %eData.message)
        print >> sys.stderr, op.format_help()
        return 1
def main(sysargs):

    usageStr = __doc__
    epilog = ""

    op = optparse.OptionParser(usageStr, version=__version__)

    #
    # OPTION DEFINITIONS
    #
    op.add_option("-t", "--tablesuffix", default="", help="Hsp table suffix")
    op.add_option("-c", "--clobber", action="store_true", default=False, help="allow output file to be overwritten.")
    op.add_option("-l", "--logfile", default="-", help="name of log file. '-' means: STDOUT")

    #
    # OPTION PATCHING
    #
    # add "defaults" to help messages
    for o in op.option_list:
        if o.type != None or o.action.startswith("store_"):
            if o.help == None:
                o.help = "Default: %default"
            else:
                o.help += " Default: %default"

    # call the parser
    (opts, m8files) = op.parse_args(sysargs)

    #
    # OPTION CHECKING
    #

    #   Example of option checking beyond what parse_args does:
    #   (Note that parse_args can do some basic type checking
    #   and conversion.)
    try:
        outFile = utils.safeOFW(opts.logfile, clobber=opts.clobber)
    except Exception, eData:
        print >>sys.stderr, ("\nOption Error: %s\n" % eData.message)
        print >>sys.stderr, op.format_help()
        return 1
def processFiles(files, clobber=False):
    for f in files:
        outF = safeOFW(f + ".dG", clobber=clobber)
        for hsp in B.xmlBlast2energy(f):
            print >> outF, "\t".join([str(x) for x in [hsp[0], hsp[1], hsp[-1]]])
    return 0
Example #10
0
def main(sysargs=[]):

    # short usage message
    oneLineUsage = "Usage: %prog [options] <sequence files>"

    # set a long desccription for the help message
    description=(
        "Takes files containing reads (raw sequence with no title info)"
        " and combines them into a fasta file.  The fasta titles are"
        " derived from the file names."
        )

    op = optparse.OptionParser(
        oneLineUsage,description=description,
        version="%prog " + '.'.join([str(x) for x in __version__]))

    # OPTION DEFINITIONS
    op.add_option('-o','--outputfile',dest="outFileName",default=defaultOutName,
                  help="name of output file. '-' means: STDOUT")
    op.add_option('-c','--clobber',action="store_true", default=False,
                  dest="clobber", help="allow output file to be overwritten.")

    op.add_option('-w','--width',dest="fmtWidth", default=defaultFastaWidth,
                  help="formatted width of output fasta records.")

    op.add_option('-s','--spaceChar',dest="spaceChar",default=defaultSpaceChar,
                 help="replace title spaces with this character/string")

    op.add_option('-P','--useFullPaths',action="store_true",
                  dest="useFullPaths",default=False,
                  help="include pathnames in titles")

    # all options should be defined above here

    # OPTION PATCHING
    # add defaults to help messages
    for o in op.option_list:
        if o.type == None:
            continue
        if o.help == None:
            o.help = "Default: %default"
        else:
            o.help+= " Default: %default"

    # OPTION PARSING
    (opts,args) = op.parse_args(sysargs)  # call the parser

    # option values are available as attributes of the opts object
    # e.g. opts.myClobber
    # args is a list of the balance of the command line

    # OPTION CHECKING
    #   Example of option checking beyond what parse_args does:
    #   (Note that parse_args can do some basic type checking
    #   and conversion.)
    

    try:
        # (check an opt) tileSizes = [int(x) for x in opts.tileSizesL.split(',') if len(x)>0]
        outFile = utils.safeOFW(opts.outFileName,clobber=opts.clobber) # open the output file
    except Exception, eData:
        print >> sys.stderr, ("\nOption Error: %s\n" %eData.message)
        print >> sys.stderr, op.format_help()
        return 1
def main(sysargs=[]):
    """
    """
    oneLineUsage = "Usage: %prog [options] <input fasta file>"

    # set a long desccription for the help message
    description = """This program makes a multiprocess (RPC) screener to remove
redundant sequence from a fastafile.

The output file is named identityScreen_Rpyc.py.
"""

    op = optparse.OptionParser(
        oneLineUsage, description=description, version="%prog " + ".".join([str(x) for x in __version__])
    )

    # OPTION DEFINITIONS
    op.add_option(
        "-o",
        "--outputfile",
        dest="outFileName",
        default="identityScreen_Rpyc.py",
        help="name of output file. '-' means: STDOUT",
    )
    op.add_option(
        "-c",
        "--clobber",
        action="store_true",
        default=False,
        dest="clobber",
        help="allow output file to be overwritten.",
    )
    op.add_option("-n", "--slice-names", dest="faNameFile", help="Name of file that will hold the slice names")
    op.add_option(
        "-S",
        "--slice-size",
        dest="sliceSize",
        type="int",
        default=sliceSize,
        help="maximum size, in bytes, of each slice file",
    )
    op.add_option("-C", "--slice-count", dest="sliceCount", type="int", default=sliceSize, help="number of slices")

    # add defaults to help messages
    for o in op.option_list:
        if o.type == None:
            continue
        if o.help == None:
            o.help = "Default: %default"
        else:
            o.help += " Default: %default"

    # OPTION PARSING
    (opts, args) = op.parse_args(sysargs)

    try:
        if opts.sliceSize is None and opts.sliceCount is None:
            raise Exception, "sliceCount (-C) or sliceSize (-S) must be set."
        if len(args) != 1:
            raise Exception, "Input fasta file must me specified."

        outFile = utils.safeOFW(opts.outFileName, append=False, clobber=opts.clobber)  # open the output file
    except Exception, eData:
        # raise
        print >>sys.stderr, ("\nUsage Error: %s\n" % eData.message)
        print >>sys.stderr, op.format_help()
        return 1
Example #12
0
    ##         print >> sys.stderr, ("\nWrong number of files or arguments: "
    ##                               "%s found (expected max of %s)"
    ##                               % (len(args), maxArgs))
    ##         argProblem = True

    ##     elif len(args) < minArgs:
    ##         print >> sys.stderr, ("\nWrong number of files or arguments: "
    ##                               "%s found (expected min of %s)"
    ##                               % (len(args),minArgs))
    ##         argProblem = True

    ##     if argProblem:
    ##         print >> sys.stderr, op.format_help()
    ##         return 1

    oFile = utils.safeOFW(opts.outFileName, clobber=opts.clobber)

    somethingUsefull(args, oFile)

    return 0  # we did it!


def somethingUsefull(args, oFile):
    for f in args:
        for rec in fasta.generalIterator(f):
            if fasta.isAllDRNA(rec.sequence):
                print >> oFile, rec
            else:
                n = 0
                for uaSeq in fasta.expandAmbiguousSequence(rec.sequence):
                    uaTitle = rec.title + "|ua%s" % n
Example #13
0
def main(args):
    parser = OptionParser(('usage: %prog [options] '
                          '<qualifier to use as gene name>'))
    parser.add_option('-g', '--gi', dest='gi', type='int',
                      help='gi of the genbank record to analyze.')
    parser.add_option('-r', '--record', dest='record', type='string',
                      help='read genbank record from FILE',
                      metavar='FILE')
    parser.add_option('-o', '--output', dest="output",
                      help='name of output file.')
    parser.add_option('-c', '--clobber', dest="clobber",
                      help='Clobber the output files.',
                      action='store_true', default=False)
    (options, args) = parser.parse_args(args)

    if not options.gi and not options.record:
        print >> sys.stderr, 'Usage error: a record file (-r) or ' + \
              'a gi (-g) is required'
        print >> sys.stderr, parser.format_help()
        sys.exit(1)

    if options.output:
        filenameOut = options.output
    else:
        if options.record:
            filenameOut = '%s' % options.record
        else:
            filenameOut = '%s' % options.gi

    if len(args) < 2:
        print >> sys.stderr, 'Qualifier name is required'
        print >> sys.stderr, parser.format_help()
        sys.exit(1)

    if options.record:
        record = genbank.Record(file(options.record))
    else:
        record = giInfo.GiRecord(options.gi, True)
        
    qualifierName = args[1]

    fastaFh = safeOFW('%s.fasta' % filenameOut,
                      clobber=options.clobber)
    print >> fastaFh, record.fasta()
    fastaFh.close()

    genesFh = safeOFW('%s.genes.fasta' % filenameOut,
                      clobber=options.clobber)
    genes = []
    for feature in record.features():
        if len(feature.location.regions) != 1:
            continue

        if feature.type != 'gene':
            continue

        region = feature.location.regions[0]
        seq = record.sequence[region.start - 1:region.end]
        seq = seq.upper()
        if region.complement:
            seq = genbank.reverseComplement(seq)
        
        if feature.qualifiers.has_key(qualifierName):
            feat_name = feature.qualifiers[qualifierName]
        else:
            print >> sys.stderr, ("Feature doesn't have qualifier "
                                  "'%s'.\nQualifiers: %s" %
                                  (qualifierName, feature.qualifiers))
            continue

        print >> genesFh, fasta.Record(title=feat_name, sequence=seq)
        print >> genesFh, ''
    
    genesFh.close()
def main(sysargs=[]):

    # short usage message
    oneLineUsage = "Usage: %prog [options] <input file>"

    # set a long desccription for the help message
    description = (
        "Extract random records or subsequences from a input fasta/fastq file.\n"
        "Input file can be STDIN; specified as '-'.\n"
    )

    op = optparse.OptionParser(
        oneLineUsage, description=description, version="%prog " + ".".join([str(x) for x in __version__])
    )

    # Output Control
    op.add_option("-o", "--outputfile", dest="outFileName", default="-", help="name of output file. '-' means: STDOUT")
    op.add_option(
        "-c",
        "--clobber",
        action="store_true",
        default=False,
        dest="clobber",
        help="allow output file to be overwritten.",
    )
    op.add_option("-a", "--append", action="store_true", default=False, dest="append", help="append to output file..")
    # Subsequence control
    op.add_option(
        "-l",
        "--length",
        dest="ssLen",
        type="int",
        default=None,
        action="store",
        help="length of subsequences. " "If None, the complete input record is output.",
    )
    op.add_option(
        "-n",
        "--number",
        dest="recNumber",
        type="int",
        default=1,
        action="store",
        help="number of output records. " "Set to 0 for no limit",
    )
    op.add_option(
        "-e", "--excludePat", dest="exRE", default=None, action="store", help="exclude sequences that match regex"
    )

    # all options should be defined above here
    # (you don't have to define version or help)

    # add defaults to help messages
    for o in op.option_list:
        if o.type == None:
            continue
        if o.help == None:
            o.help = "Default: %default"
        else:
            o.help += " Default: %default"

    # OPTION PARSING
    (opts, args) = op.parse_args(sysargs)

    if opts.recNumber == 0:
        opts.recNumber = None

    # OPTION CHECKING
    try:
        # (check an opt) tileSizes = [int(x) for x in opts.tileSizesL.split(',') if len(x)>0]
        outFile = utils.safeOFW(opts.outFileName, append=opts.append, clobber=opts.clobber)  # open the output file
    except Exception, eData:
        print >>sys.stderr, ("\nOption Error: %s\n" % eData.message)
        print >>sys.stderr, op.format_help()
        return 1
Example #15
0
def main(sysargs=[]):

    # short usage message
    oneLineUsage = "Usage: %prog [options] <input files>"

    # set a long desccription for the help message
    description=None

    op = optparse.OptionParser(
        oneLineUsage,description=description,
        version="%prog " + '.'.join([str(x) for x in __version__]))

    # OPTION DEFINITIONS
    op.add_option('-o','--outputfile',dest="outFileName",default='-',
                  help="name of output file. '-' means: STDOUT")
    op.add_option('-c','--clobber',action="store_true", default=False,
                  dest="clobber", help="allow output file to be overwritten.")

    #  Below are some examples of setting up options
    #  Note the above line gives you -h, --help, --version
    #  and an error message if an unspecified option is given.
    #  for more see: http://docs.python.org/library/optparse.html#defining-options
## 
##     op.add_option('-t','--tilesizes',dest="tileSizesL", default="60",
##                   help="comma seperated list of tile sizes to generate. e.g. '50,70'.")
##     op.add_option('-s','--shift',default=25,type='int',dest="shift",
##                   help="Start position offset" )
##     op.add_option('-E','--endtile',action="store_false", default=False,
##                   dest="endtile", help="Suppress generation of an tile that covers the"
##                   " last base in each recrod regardless of the -s and -t settings.")
##

    # all options should be defined above here

    ### Don't Change In Box ###################
    #                                         #
    # OPTION PATCHING                         #
    # add defaults to help messages           # 
    #                                         #
    for o in op.option_list:                  #
        if o.type == None:                    #
            continue                          #
        if o.help == None:                    #
            o.help = "Default: %default"      #
        else:                                 #
            o.help+= " Default: %default"     #
                                              #
                                              #
    #                                         #
    # OPTION PARSING                          #
    (opts,args) = op.parse_args(sysargs)      #
    #                                         #
    # option values are available as attrs    #
    # of the opts object, e.g. opts.myClobber #
    #                                         #
    # args is a list of the balance of the    #
    # command line                            # 
    ###########################################


    # OPTION CHECKING
    #   Option checking beyond what parse_args does:
    #   (Note that parse_args can do some basic 
    #   type checking and conversion.)    
    try:
        # (check an opt) tileSizes = [int(x) for x in opts.tileSizesL.split(',') if len(x)>0]
        outFile = utils.safeOFW(opts.outFileName,clobber=opts.clobber) # open the output file
    except Exception, eData:
        print >> sys.stderr, ("\nOption Error: %s\n" %eData.message)
        print >> sys.stderr, op.format_help()
        return 1