Example #1
0
def main():
    
    usage = "usage: %prog [options]" 
    parser = OptionParser( usage=usage,version="%prog 1.0" ) # allow_interspersed_args=True

    parser.add_option("-g", dest="gtf",
                      help="genome annotation gtf/gff [requires -f]" )
    parser.add_option("-f", dest="fasta",
                      help="genome fasta [can be gzipped]" )
    parser.add_option("-i", dest="fpath",
                      help="input file [stdin]")
    parser.add_option("-o", dest="outfn",
                      help="output fname [stdout]")
    parser.add_option("-d", dest="minDepth", default=10,  type=int,
                      help="minimal depth [%default]")
    parser.add_option("-m", dest="minFreq",  default=0.8, type=float,
                      help="min frequency of alternative base [%default]")
    parser.add_option("-n", dest="indels",   default=True, action="store_false", 
                      help="ignore indels")
    parser.add_option("-b", dest="bothStrands", default=True, action="store_false", 
                      help="report events confirmed by single strand algs")
    parser.add_option("-v", dest="verbose",  default=True, action="store_false")
    
    ( o, args ) = parser.parse_args()
    if o.verbose:
        sys.stderr.write( "%s\n" % ( str(o), ) )

    ctg2cds,id2gene,ctg2seq = {},{},{}
    if o.gtf: # if annotation
        # load genome
        if not o.fasta: # fasta has to be provided
            parser.errer( "Fasta file (-f) is requeired!" )
        elif not os.path.isfile( o.fasta ):
            parser.error( "No such file: %s" % o.fasta )
        ctg2seq        = genome2dict( o.fasta )

        # load genome annotation
        if not os.path.isfile( o.gtf ): # check if correct file
            parser.error( "No such file: %s" % o.gtf )
        # load gtf/gff
        if o.gtf.endswith(".gff"):
            id2gene,ctg2cds = load_gff( o.gtf )
        else:
            id2gene,ctg2cds = load_gtf( o.gtf )
        if o.verbose:
            sys.stderr.write( "Loaded annotation of %s CDS from %s\n" % ( len(id2gene),o.gtf ) )

    # parse pileup
    parse_vcf( o.fpath,o.outfn,ctg2cds,id2gene,ctg2seq,o.minDepth,o.minFreq,o.indels,o.bothStrands )
Example #2
0
def main():
    
    usage = "usage: %prog [options]" 
    parser = OptionParser( usage=usage,version="%prog 1.0" ) # allow_interspersed_args=True

    parser.add_option("-g", dest="gtf",
                      help="genome annotation gtf/gff [requires -f]" )
    parser.add_option("-f", dest="fasta",
                      help="genome fasta [can be gzipped]" )
    parser.add_option("-i", dest="fpath",
                      help="input file [stdin]")
    parser.add_option("-o", dest="outfn",
                      help="output fname [stdout]")
    parser.add_option("-d", dest="minDepth", default=10,  type=int,
                      help="minimal depth [%default]")
    parser.add_option("-m", dest="minFreq",  default=0.8, type=float,
                      help="min frequency of alternative base [%default]")
    parser.add_option("-n", dest="indels",   default=True, action="store_false", 
                      help="ignore indels")
    parser.add_option("-b", dest="bothStrands", default=True, action="store_false", 
                      help="report events confirmed by single strand algs")
    parser.add_option("-v", dest="verbose",  default=True, action="store_false")
    
    ( o, args ) = parser.parse_args()
    if o.verbose:
        sys.stderr.write( "%s\n" % ( str(o), ) )

    ctg2cds,id2gene,ctg2seq = {},{},{}
    if o.gtf: # if annotation
        # load genome
        if not o.fasta: # fasta has to be provided
            parser.errer( "Fasta file (-f) is requeired!" )
        elif not os.path.isfile( o.fasta ):
            parser.error( "No such file: %s" % o.fasta )
        ctg2seq        = genome2dict( o.fasta )

        # load genome annotation
        if not os.path.isfile( o.gtf ): # check if correct file
            parser.error( "No such file: %s" % o.gtf )
        # load gtf/gff
        if o.gtf.endswith(".gff"):
            id2gene,ctg2cds = load_gff( o.gtf )
        else:
            id2gene,ctg2cds = load_gtf( o.gtf )
        if o.verbose:
            sys.stderr.write( "Loaded annotation of %s CDS from %s\n" % ( len(id2gene),o.gtf ) )

    # parse pileup
    parse_vcf( o.fpath,o.outfn,ctg2cds,id2gene,ctg2seq,o.minDepth,o.minFreq,o.indels,o.bothStrands )
Example #3
0
def get_options():
    '''
    subroutine to capture the optional inputs
    '''
    from os import remove as rmfile
    from optparse import OptionParser, OptionGroup

    parser = OptionParser(usage="Usage: %prog [options] <station list filename>", \
        description="Program to query a datacenter using the obspy fdsn client. " \
        "All station returned in this query are saved into both a csv format 1sls " \
        "file as well as a stationdb (stdb.StDbElement) pickled dictionary. The input " \
        "argument, <station file name> is the prefix for the output file, which is by " \
        "default <station file name>.csv and <station file name>.pkl.")

    # General Settings
    parser.add_option("-D", "--debug", action="store_true", dest="debug", default=False, \
        help="Debug mode. After the client query is complete (and successful), instead of " \
        "parsing the inventory, it is instead pickled to <station file name>_query_debug.pkl " \
        "which can be loaded in ipython to examine manually.")
    parser.add_option("--long-keys", action="store_true", dest="lkey", default=False, \
        help="Specify Key format. Default is Net.Stn. Long keys are Net.Stn.Chn")
    parser.add_option("-a", "--ascii", action="store_false", dest="use_binary", default=True, \
        help="Specify to write ascii Pickle files instead of binary. Ascii are larger file size, " \
        "but more likely to be system independent.")

    # Server Settings
    ServerGroup = OptionGroup(parser, title="Server Settings", description="Settings associated with " \
        "which datacenter to log into.")
    ServerGroup.add_option("--Server", action="store", type=str, dest="Server", default="IRIS", \
        help="Specify the server to connect to. Options include: BGR, ETH, GEONET, GFZ, INGV, IPGP, " \
        "IRIS, KOERI, LMU, NCEDC, NEIP, NERIES, ODC, ORFEUS, RESIF, SCEDC, USGS, USP. [Default IRIS]")
    ServerGroup.add_option("--User-Auth", action="store", type=str, dest="UserAuth", default="", \
        help="Enter your IRIS Authentification Username and Password (--User-Auth='username:authpassword') " \
        "to access and download restricted data. [Default no user and password]")

    # Selection Settings
    SelectGroup = OptionGroup(parser, title="Channel Priority/Selection Settings", description="Settings " \
        "associated with selecting the channels to retain.")
    SelectGroup.add_option("--channel-rank", action="store", type=str, dest="chnrank", default="HH,BH,LH", \
        help="If requesting more than one type of channel, specify a comma separated list of the first two " \
        "lettres of the desired components to retain. Default is HH > BH > LH : ['HH,BH,LH']")

    # Channel Settings
    ChannelGroup=OptionGroup(parser, title="Station-Channel Settings", description="Options to narrow down " \
        "the specific channels based on network, station, etc")
    ChannelGroup.add_option("-N","--networks", action="store", type=str, dest="nets", default="*", \
        help="Specify a comma separated list of network codes to search for [Default *]")
    ChannelGroup.add_option("-S","--stations", action="store", type=str, dest="stns", default="*", \
        help="Specify a comma separated list of station names. If you want wildcards, enclose in quotes [Default *]")
    ChannelGroup.add_option("-L","--locations", action="store", type=str, dest="locs", default="*", \
        help="Specify a comma separated list of location codes. If you want wildcards, enclose in quotes [Default *]")
    ChannelGroup.add_option("-C","--channels", action="store", type=str, dest="chns", default="HH*,BH*,LH*", \
        help="Specify a comma separated, wildcarded list of channel names. [Default HH*,BH*,LH*]")

    # Geographic Settings
    BoxGroup = OptionGroup(parser, title="Geographic Lat/Lon Box Search", description="Define the coordinates " \
        "of a lat/lon box in which to select stations. If filled out, takes precedence over values for " \
        "Radius Search (below).")
    BoxGroup.add_option("--minlat","--min-latitude", action="store", type="float", dest="minlat", default=None, \
        help="Specify minimum latitude to search (must specify all of minlat, maxlat, minlon, maxlon).")
    BoxGroup.add_option("--maxlat","--max-latitude", action="store", type="float", dest="maxlat", default=None, \
        help="Specify maximum latitude to search (must specify all of minlat, maxlat, minlon, maxlon).")
    BoxGroup.add_option("--minlon","--min-longitude", action="store", type="float", dest="minlon", default=None, \
        help="Specify minimum longitude to search (must specify all of minlat, maxlat, minlon, maxlon).")
    BoxGroup.add_option("--maxlon","--max-longitude", action="store", type="float", dest="maxlon", default=None, \
        help="Specify maximum longitude to search (must specify all of minlat, maxlat, minlon, maxlon).")
    RadGroup=OptionGroup(parser, title="Geographic Radius Search", description="Central point and min/max " \
        "radius search settings. Box Search Settings take precedence over radius search.")
    RadGroup.add_option("--lat","--latitude", action="store", type="float", dest="lat", default=None, \
        help="Specify a Lat (if any of --lon --min-radius and --max-radius are empty, an error will prompt).")
    RadGroup.add_option("--lon","--longitude", action="store", type="float", dest="lon", default=None, \
        help="Specify a Lon (if any of --lat --min-radius and --max-radius are empty, an error will prompt).")
    RadGroup.add_option("--minr","--min-radius", action="store", type="float", dest="minr", default=0., \
        help="Specify a minimum search radius (in degrees) around the point defined by --lat and --lon " \
        "(if any of --lat --lon and --max-radius are empty, an error will prompt). [Default 0. degrees]")
    RadGroup.add_option("--maxr","--max-radius", action="store", type="float", dest="maxr", default=None, \
        help="Specify a maximum search radius (in degrees) around the point defined by --lat and --lon " \
        "(if any of --lat --lon and --min-radius are empty, an error will prompt).")

    # Temporal Settings
    FixedRangeGroup = OptionGroup(parser, title="Fixed Time Range Settings", description="Find all stations " \
        "operating within the start and end date/time. If either are filled out, they take precedence over " \
        "Non-Specific time range search (below)")
    FixedRangeGroup.add_option("--start","--start-date", action="store", type=None, dest="stdate", default=None, \
        help="Specify the Start Date/Time in a UTCDateTime compatible String (ie, 2010-01-15 15:15:45.2). [Default Blank]")
    FixedRangeGroup.add_option("--end","--end-date", action="store", type=None, dest="enddate", default=None, \
        help="Specify the End Date/Time in a UTCDateTime compatible String (ie, 2010-01-15 15:15:45.2). [Default Blank]")

    VarRangeGroup = OptionGroup(parser, title="Non-Specific Time Range Settings", description="Time settings " \
        "with less specificity. Ensure that those you specify do not interfere with each other. If above Fixed " \
        "Range values are set, they will take precedence over these values.")
    VarRangeGroup.add_option("--start-before", action="store", type=None, dest="stbefore", default=None, \
        help="Specify a Date/Time which stations must start before (must be UTCDateTime compatible string, " \
        "ie 2010-01-15 15:15:45.2). [Default empty]")
    VarRangeGroup.add_option("--start-after", action="store", type=None, dest="stafter", default=None, \
        help="Specify a Date/Time which stations must start after (must be UTCDateTime compatible string, " \
        "ie 2010-01-15 15:15:45.2). [Default empty]")
    VarRangeGroup.add_option("--end-before", action="store", type=None, dest="endbefore", default=None, \
        help="Specify a Date/Time which stations must end before (must be UTCDateTime compatible string, " \
        "ie 2010-01-15 15:15:45.2). [Default empty]")
    VarRangeGroup.add_option("--end-after", action="store", type=None, dest="endafter", default=None, \
        help="Specify a Date/Time which stations must end after (must be UTCDateTime compatible string, " \
        "ie 2010-01-15 15:15:45.2). [Default empty]")

    # Add All Groups
    parser.add_option_group(ServerGroup)
    parser.add_option_group(SelectGroup)
    parser.add_option_group(ChannelGroup)
    parser.add_option_group(BoxGroup)
    parser.add_option_group(RadGroup)
    parser.add_option_group(FixedRangeGroup)
    parser.add_option_group(VarRangeGroup)

    # Run Parser
    (opts, args) = parser.parse_args()

    # Check output file name
    if len(args) != 1: parser.error("Need station database file")
    outpref = args[0]
    if not opts.debug:
        if exists(outpref + ".csv") and exists(outpref + ".pkl"):
            print ("Warning: Output Files " + outpref + ".csv and " + outpref + \
                ".pkl already exist. These will be overwritten...")
            rmfile(outpref + ".pkl")
            rmfile(outpref + ".csv")
        elif exists(outpref + ".csv"):
            print("Warning: Output File " + outpref +
                  ".csv already exists. It will be overwritten...")
            rmfile(outpref + ".csv")
        elif exists(outpref + ".pkl"):
            print("Warning: Output File " + outpref +
                  ".pkl already exists. It will be overwritten...")
            rmfile(outpref + ".pkl")

    # Parse User Authentification
    if not len(opts.UserAuth) == 0:
        tt = opts.UserAuth.split(':')
        if not len(tt) == 2:
            parser.errer(
                "Error: Incorrect Username and Password Strings for User Authentification"
            )
        else:
            opts.UserAuth = tt
    else:
        opts.UserAuth = []

    # Parse Channel Rank to List
    opts.chnrank = opts.chnrank.split(',')

    # Check Geographic Settings
    if opts.minlat is not None or opts.maxlat is not None or opts.minlon is not None or opts.maxlon is not None:
        if opts.minlat is None or opts.maxlat is None or opts.minlon is None or opts.maxlon is None:
            # Not all value set
            opts.minlat = None
            opts.maxlat = None
            opts.minlon = None
            opts.maxlon = None
            print(
                "Warning: one of minlat,maxlat,minlon,maxlon were not set. All values reset to None. "
            )
            print("")
        else:
            # Ensure proper min/max set
            tempminlat = min([opts.minlat, opts.maxlat])
            tempmaxlat = max([opts.minlat, opts.maxlat])
            opts.minlat = tempminlat
            opts.maxlat = tempmaxlat
            tempminlon = min([opts.minlon, opts.maxlon])
            tempmaxlon = max([opts.minlon, opts.maxlon])
            opts.minlon = tempminlon
            opts.maxlon = tempmaxlon
            print("Performing Geographic Box Search:")
            print("    LL: {0:9.4f}, {1:8.4f}".format(opts.minlat,
                                                      opts.minlon))
            print("    UR: {0:9.4f}, {1:8.4f}".format(opts.maxlat,
                                                      opts.maxlon))
            print(" ")
            # set all other box parameters to none
            opts.minr = None
            opts.maxr = None
            opts.lat = None
            opts.lon = None

    elif opts.lat is not None or opts.lon is not None or opts.minr is not None or opts.maxr is not None:
        if opts.lat is None or opts.lon is None or opts.minr is None or opts.maxr is None:
            opts.lat = None
            opts.lon = None
            opts.minr = None
            opts.maxr = None
            print(
                "Warning: one of lat,lon,minr,maxr were not set. All values reset to None. "
            )
            print(" ")
        else:
            # Ensure minr/maxr set
            opts.minr = min([opts.minr, opts.maxr])
            opts.maxr = max([opts.minr, opts.maxr])
            print("Performing Geographic Radius Search: ")
            print("   Centre Point: {0:9.4f}, {1:8.4f}".format(
                opts.lon, opts.lat))
            print("   Radius: {0:6.2f} to {1:6.2f} degrees".format(
                opts.minr, opts.maxr))
            print(" ")

    # Check Time Settings
    if opts.stdate is not None or opts.enddate is not None:
        # Use Fixed Range, not other
        opts.stbefore = None
        opts.stafter = None
        opts.endbefore = None
        opts.endafter = None
        # Fix End Date
        if opts.enddate is None:
            opts.enddate = UTCDateTime("2599-12-31 23:59:59.9")
        else:
            opts.enddate = UTCDateTime(opts.enddate)
        # Assign stdate as UTCDateTime
        if opts.stdate is not None:
            opts.stdate = UTCDateTime(opts.stdate)
        print("Performing Fixed Time Range Search: ")
        print("   Start: " + opts.stdate.strftime("%Y-%m-%d %H:%M:%S"))
        print("   End:   " + opts.enddate.strftime("%Y-%m-%d %H:%M:%S"))
        print(" ")
    else:
        # No Fixed Range Set. Are other values set?
        if opts.stbefore is not None or opts.stafter is not None or opts.endbefore is not None or opts.endafter is not None:
            print("Performing Non-Specific Time Search: ")
            if opts.stbefore is not None:
                opts.stbefore = UTCDateTime(opts.stbefore)
                print("   Start Before: " +
                      opts.stbefore.strftime("%Y-%m-%d %H:%M:%S"))
            if opts.stafter is not None:
                opts.stafter = UTCDateTime(opts.stafter)
                print("   Start After: " +
                      opts.stafter.strftime("%Y-%m-%d %H:%M:%S"))
            if opts.endbefore is not None:
                opts.endbefore = UTCDateTime(opts.endbefore)
                print("   End Before: " +
                      opts.endbefore.strftime("%Y-%m-%d %H:%M:%S"))
            if opts.endafter is not None:
                opts.endafter = UTCDateTime(opts.endafter)
                print("   End After: " +
                      opts.endafter.strftime("%Y-%m-%d %H:%M:%S"))
            print(" ")

        else:
            print("Warning: No Time Range Specified for Search")
            print(" ")

    # Station/Channel Search Parameters
    print("Station/Channel Search Parameters:")
    print("   Network:  {0:s}".format(opts.nets))
    print("   Stations: {0:s}".format(opts.stns))
    print("   Locations: {0:s}".format(opts.locs))
    print("   Channels: {0:s}".format(opts.chns))
    print("   Channel Rank: {0:s}".format(",".join(opts.chnrank)))
    print(" ")
    if opts.debug:
        print("Output Files: {0:s}_query_debug.pkl and {0:s}_query_debug.kcsv".
              format(outpref))
    else:
        print("Output Files: {0:s}.csv and {0:s}.pkl".format(outpref))
    print(" ")

    # Return Values
    return opts, outpref
Example #4
0
def main():
    
    usage = "usage: %prog [options] *.vcf" 
    parser = OptionParser( usage=usage,version="%prog 1.0" ) # allow_interspersed_args=True

    parser.add_option("-g", dest="gtf",
                      help="genome annotation gtf/gff [requires -f]" )
    parser.add_option("-f", dest="fasta",
                      help="genome fasta" )
    parser.add_option("-1", dest="bam1",
                      help="sample bam")
    parser.add_option("-2", dest="bam2",
                      help="reference bam")
    parser.add_option("-o", dest="outfn",
                      help="output fname [stdout]")
    parser.add_option("-d", dest="minDepth", default=5,  type=int,
                      help="""minimal depth; note both samples need to have pass depth filtering [%default]""")
    parser.add_option("-m", dest="minFreq",  default=0.8, type=float,
                      help="min frequency of alternative base [%default]")
    parser.add_option("-n", dest="indels",   default=True, action="store_false", 
                      help="ignore indels [%default]")
    parser.add_option("-b", dest="bothStrands", default=True, action="store_false", 
                      help="report events confirmed by single strand algs")
    parser.add_option("-v", dest="verbose",  default=True, action="store_false")
    
    ( o, args ) = parser.parse_args()
    if o.verbose:
        sys.stderr.write( "%s\n" % ( str(o), ) )

    if not args:
        parser.error( "At least one vcf file has to be specified!" )

    for fn in args:
        if not os.path.isfile( fn ):
            parser.error( "No such file: %s" % fn )

    ctg2cds,id2gene,ctg2seq = {},{},{}
    if o.gtf: # if annotation
        # load genome
        if not o.fasta: # fasta has to be provided
            parser.errer( "Fasta file (-f) is requeired!" )
        elif not os.path.isfile( o.fasta ):
            parser.error( "No such file: %s" % o.fasta )
        ctg2seq        = genome2dict( o.fasta )

        # load genome annotation
        if not os.path.isfile( o.gtf ): # check if correct file
            parser.error( "No such file: %s" % o.gtf )
        # load gtf/gff
        if o.gtf.endswith(".gff"):
            id2gene,ctg2cds = load_gff( o.gtf )
        else:
            id2gene,ctg2cds = load_gtf( o.gtf )
        if o.verbose:
            sys.stderr.write( "Loaded annotation of %s CDS from %s\n" % ( len(id2gene),o.gtf ) )

    # load possible SNPs coordinates
    coords = load_vcf( args,o.indels )
            
    # check with mpileup
    check_snps( coords,o.bam1,o.bam2,o.fasta,o.outfn,ctg2cds,id2gene,ctg2seq,o.minDepth,o.minFreq,o.indels,o.bothStrands )
Example #5
0
def main():

    usage = "usage: %prog [options] *.vcf"
    parser = OptionParser(usage=usage,
                          version="%prog 1.0")  # allow_interspersed_args=True

    parser.add_option("-g",
                      dest="gtf",
                      help="genome annotation gtf/gff [requires -f]")
    parser.add_option("-f", dest="fasta", help="genome fasta")
    parser.add_option("-1", dest="bam1", help="sample bam")
    parser.add_option("-2", dest="bam2", help="reference bam")
    parser.add_option("-o", dest="outfn", help="output fname [stdout]")
    parser.add_option(
        "-d",
        dest="minDepth",
        default=5,
        type=int,
        help=
        """minimal depth; note both samples need to have pass depth filtering [%default]"""
    )
    parser.add_option("-m",
                      dest="minFreq",
                      default=0.8,
                      type=float,
                      help="min frequency of alternative base [%default]")
    parser.add_option("-n",
                      dest="indels",
                      default=True,
                      action="store_false",
                      help="ignore indels [%default]")
    parser.add_option("-b",
                      dest="bothStrands",
                      default=True,
                      action="store_false",
                      help="report events confirmed by single strand algs")
    parser.add_option("-v", dest="verbose", default=True, action="store_false")

    (o, args) = parser.parse_args()
    if o.verbose:
        sys.stderr.write("%s\n" % (str(o), ))

    if not args:
        parser.error("At least one vcf file has to be specified!")

    for fn in args:
        if not os.path.isfile(fn):
            parser.error("No such file: %s" % fn)

    ctg2cds, id2gene, ctg2seq = {}, {}, {}
    if o.gtf:  # if annotation
        # load genome
        if not o.fasta:  # fasta has to be provided
            parser.errer("Fasta file (-f) is requeired!")
        elif not os.path.isfile(o.fasta):
            parser.error("No such file: %s" % o.fasta)
        ctg2seq = genome2dict(o.fasta)

        # load genome annotation
        if not os.path.isfile(o.gtf):  # check if correct file
            parser.error("No such file: %s" % o.gtf)
        # load gtf/gff
        if o.gtf.endswith(".gff"):
            id2gene, ctg2cds = load_gff(o.gtf)
        else:
            id2gene, ctg2cds = load_gtf(o.gtf)
        if o.verbose:
            sys.stderr.write("Loaded annotation of %s CDS from %s\n" %
                             (len(id2gene), o.gtf))

    # load possible SNPs coordinates
    coords = load_vcf(args, o.indels)

    # check with mpileup
    check_snps(coords, o.bam1, o.bam2, o.fasta, o.outfn, ctg2cds, id2gene,
               ctg2seq, o.minDepth, o.minFreq, o.indels, o.bothStrands)