Exemple #1
0
    def initArgParser(self):
	TableTool.initArgParser(self)
	self.parser.add_option("-k", dest="sortKeys", 
	    action="append", default = [],
	    metavar="COL[:r]",
	    help="Specifies column to sort on , with optional 'r' specifying " +\
	         "to reverse the sort order. Repeatible, for specifying multilevel sort.")
Exemple #2
0
 def initArgParser(self):
     TableTool.initArgParser(self)
     self.parser.add_option("-x", "--expand",
         action="append", dest="xpSpecs", default=[],
         metavar="COL[:PSS]",
         help="Expand column COL. " + \
      "Use PSS as prefix/sep/suffix (Optional. Default=','). ")
Exemple #3
0
    def initArgParser(self):
        TableTool.initArgParser(self)

        self.parser.add_option(
            "-f",
            "--filename",
            dest="filename",
            default="-",
            metavar="FILE",
            help=
            "Specifies file for input table. Default='-' (read from stdin).")

        self.parser.add_option("-s",
                               "--separator",
                               dest="sep",
                               default=TAB,
                               metavar="CHAR",
                               help="Separator character (default=TAB).")

        self.parser.add_option(
            "-c",
            "--comment",
            dest="com1",
            default=HASH,
            metavar="CHAR",
            help=
            "Comment character (default=HASH). Lines beginning with CHAR are skipped."
        )
Exemple #4
0
    def __init__(self,argv):
	self.kcols1 = []
	self.kcols2 = []
	self.rows = []
	self.graph = None
	self.bucketFiles = {}
	TableTool.__init__(self,1,argv)
Exemple #5
0
 def processOptions(self):
     TableTool.processOptions(self)
     if self.options.tmplt is None:
         self.parser.error("No filename template specified.")
     if "%s" in self.options.tmplt and self.options.pcol is None:
         self.parser.error("No partition column specified.")
     if self.options.pcol is not None and "%s" not in self.options.tmplt:
         self.parser.error(
             "Partition column specified but template has no '%s'.")
Exemple #6
0
    def processOptions(self):
        TableTool.processOptions(self)
        if len(self.options.k1) > 0:
            self.kcols1 = self.parseIntList(self.options.k1)
        if len(self.options.k2) > 0:
            self.kcols2 = self.parseIntList(self.options.k2)

        nkc1 = len(self.kcols1)
        nkc2 = len(self.kcols2)

        if nkc1 != nkc2:
            self.parser.error("Same number of key columns must " + \
             "be specified for both IDs.")
Exemple #7
0
    def processOptions(self):
        #
        TableTool.processOptions(self)

        # group-by columns
        #
        for g in self.options.groupByColumns:
            self.addGroupByColumn(g)

        # aggregation ops
        #
        for a in self.options.aggSpecs:
            self.addAggregation(a)
Exemple #8
0
    def processOptions(self):
	TableTool.processOptions(self)
	nsk = []
	for skey in self.options.sortKeys:
	    reverse=False
	    if skey.endswith(":r"):
	        reverse=True
		skey = skey[:-2]
	    elif skey.endswith("r"):
	        reverse=True
		skey = skey[:-1]
	    col=int(skey)
	    nsk.append( (col, reverse) )
	self.options.sortKeys = nsk
Exemple #9
0
    def initArgParser(self):
	TableTool.initArgParser(self)
	self.parser.add_option("--k1", dest="k1", 
	    action="append", default = [],
	    metavar="COLUMN(S)",
	    help="Specifies column(s) of first ID.")

	self.parser.add_option("--k2", dest="k2", 
	    action="append", default = [],
	    metavar="COLUMN(S)",
	    help="Specifies column(s) of second ID.")

	self.parser.add_option("-n", "--null-string", dest="nullString", 
	    action="store", default = "", metavar="NULLSTR",
	    help="Specifies string for null values. (Default: empty string)")
Exemple #10
0
    def initArgParser(self):
        TableTool.initArgParser(self)
        self.parser.add_option("-p",
                               "--parent",
                               dest="parent",
                               metavar="COL",
                               type="int",
                               help="Specify parent column")

        self.parser.add_option("-k",
                               "--kid",
                               dest="child",
                               metavar="COL",
                               type="int",
                               help="Specify child column")
Exemple #11
0
    def initArgParser(self):
        TableTool.initArgParser(self)

        self.parser.add_option("--exec-file", dest="execFile", default=None,
            action="store",
            metavar="FILE",
            help="Execs the code in FILE, e.g., for defining functions to " +\
               "use in command line filters and generators.")

        self.parser.add_option("--expr-file", dest="exprFiles", default=[],
            action="append",
            metavar="FILE",
            help="Loads expressions (filters/generators) from FILE." + \
             " Expression loaded from files are evaluated before " + \
         "command line expressions.")
Exemple #12
0
    def initArgParser(self):
        TableTool.initArgParser(self)
        self.parser.add_option("--k1",
                               dest="k1",
                               action="append",
                               default=[],
                               metavar="COLUMN(S)",
                               help="Specifies key column(s) for table T1.")

        self.parser.add_option("--k2",
                               dest="k2",
                               action="append",
                               default=[],
                               metavar="COLUMN(S)",
                               help="Specifies key column(s) for table T2.")
Exemple #13
0
    def __init__(self, argv):
        self.maxColIndex = 0
        self.currentLine = None
        self.currentLineNum = 0

        self.gbColumns = []  # list of integer col indexes
        self.accumulatorClasses = []  # list of Accumulator classes
        self.accumulatorColumns = []  # corresp. list of columns to accum
        self.accumulatorXtraArg = []  # corresp extra arg to accum constructor
        self.col2stats = {}  # maps col# to Statistics accum
        self.outSpecifiers = []  #

        self.partitions = {}

        TableTool.__init__(self, 1, argv)
Exemple #14
0
    def __init__(self,argv):

	self.jcols1 = []
	self.jcols2 = []

        self.ncols1 = 0
        self.ncols2 = 0

	self.doLeftOuter = False
	self.doRightOuter = False

	self.swappedInputs = False
	self.selfJoin = False
	self.inner = None

	TableTool.__init__(self,2,argv)
Exemple #15
0
    def initArgParser(self):
        TableTool.initArgParser(self)
        self.parser.add_option(
            "-p",
            dest="pcol",
            action="store",
            default=None,
            type="int",
            metavar="COLUMN",
            help=
            "Specifies column to partition on. Remember: column numbers start at 0!"
        )

        self.parser.add_option("-L",
                               "--limit",
                               dest="limit",
                               action="store",
                               default=100,
                               type="int",
                               metavar="LIMIT",
                               help='''
Limits the number of files created, as a safety feature.
Default is 100.
Setting it to -1 makes this number unlimited (be careful).
            ''')

        self.parser.add_option(
            "-t",
            dest="tmplt",
            action="store",
            default=None,
            metavar="FILE",
            help=
            '''Output file name template. The output file for a given row is determined by substituting
the value of that row's partition column for the string "%s" in the template. 
Example: partition a GFF3 file into files by chromosome. The template might be something
like "./mygffdata.chr%s.gff3".
''')
        self.parser.add_option("-T",
                               "--tee",
                               dest="tee",
                               action="store_true",
                               default=False,
                               help='''
If true, the partition operator passes all input rows to its output. This allows a pipeline to
continue after a partitioning operator.
''')
Exemple #16
0
    def initArgParser(self):
        TableTool.initArgParser(self)
        self.parser.add_option(
            "-o",
            "--output",
            dest="output",
            default="-",
            metavar="DST",
            help="Specifies output destination. Default='-' (write to stdout)")

        self.parser.add_option(
            "-m",
            "--mode",
            dest="mode",
            default="w",
            metavar="MODE",
            help="Specifies output mode. w=write, a=append. Default=w.")
Exemple #17
0
    def initArgParser(self):

	TableTool.initArgParser(self)

	self.parser.add_option("--k1", dest="j1", 
	    action="append", default = [],
	    metavar="COLUMN(S)",
	    help='''Specifies T1 join key column(s). For a multipart key, you can use a comma 
            separated list of column numbers or simply repeat the --k1 option.
            (Remember, column numbers start at 0!)''')

	self.parser.add_option("--k2", dest="j2", 
	    action="append", default = [],
	    metavar="COLUMN(S)",
	    help='''Specifies T2 join key column(s). For a multipart key, you can use a comma 
            separated list of column numbers or simply repeat the --k2 option.
            (Remember, column numbers start at 0!)''')

	self.parser.add_option("-c", "--columns", dest="ocols",
	    action="append", default = [],
	    metavar="COLUMN(S)",
	    help='''Specifies with columns of the input rows should be output, and in what order.
            The two input rows are named 'a' and 'b', and you specify which columns to output
            using Python array syntax. Example: -c "a[1] b a[0] b[3:6]" .
            This says, output col 1 of a, followed by all of b's columns, followed by a's 0-th column,
            followed by a's columns 3,4,5.
            ''')

	self.parser.add_option("--left-outer", dest="dlo", 
	    action="store_true", default = False,
	    help='''Perform left-outer join (default: No). In a left-outer join, every row
            in T1 produces at least one output row: if row in T1 does not match anything in T2,
            the output row contains NULLs in the T2 columns.''')

	self.parser.add_option("--right-outer", dest="dro", 
	    action="store_true", default = False,
	    help='''Performs right-outer join (default: No). In a right-outer join, every row
            in T2 produces at least one output row: if row in T2 does not match anything in T1,
            the output row contains NULLs in the T1 columns.''')

	self.parser.add_option("-n", "--null-string", dest="nullString", 
	    action="store", default = "", metavar="NULLSTR",
	    help="Specifies string to use for NULL values output but left/right outer joins. (Default: empty string)")
Exemple #18
0
    def __init__(self, argv):
        #
        #
        self.ncols = 0

        #
        self.separatorChar = TAB
        self.commentChar = HASH

        #
        self.fileName = None
        self.fileDesc = None
        #
        self.currentLine = None
        self.currentLineNum = 0
        #
        self.currentRow = None
        self.currentRowNum = 0
        #
        TableTool.__init__(self, 0, argv)
Exemple #19
0
    def processOptions(self):
	TableTool.processOptions(self)
	if len(self.options.j1) > 0:
	    self.jcols1 = self.parseIntList(self.options.j1)
	if len(self.options.j2) > 0:
	    self.jcols2 = self.parseIntList(self.options.j2)

	njc1 = len(self.jcols1)
	njc2 = len(self.jcols2)

	if njc1 != njc2:
	    self.parser.error("Same number of join columns must " + \
	    	"be specified for both tables.")

	self.doLeftOuter = self.options.dlo
	self.doRightOuter = self.options.dro

        # matches array access syntax for r1 and r2, e.g., 
        #  r1[0]
        #  r2[1:4]
        #  r2
        #  r1[:-1]
        rex= re.compile(r'^r[12](\[(-?\d+)?:?(-?\d+)?\])?$')
        parts = []
        for oc in self.args:
            tokens = oc.strip().split()
            for t in tokens:
                m = rex.match(t)
                if not m:
                    self.parser.error("Syntax error in column spec.")
                if "[" in t and ":" not in t:
                    parts.append("[%s]"%t)
                else:
                    parts.append(t)
        if len(parts) == 0:
            parts = ['r1','r2']
        expr = "lambda r1, r2: " + "+".join(parts)
        self.fun = eval(expr)
Exemple #20
0
    def initArgParser(self):
        TableTool.initArgParser(self)

        self.parser.add_option("-g",
                               "--group-by",
                               metavar="COLUMN(S)",
                               action="append",
                               dest="groupByColumns",
                               default=[],
                               help=GBHELP)

        self.parser.add_option("-a",
                               "--aggregate",
                               metavar="FCN:COLUMN",
                               action="append",
                               dest="aggSpecs",
                               default=[],
                               help=AGGHELP)

        self.parser.add_option("--stream",
                               action="store_true",
                               dest="streamMode",
                               default=False,
                               help=STREAMHELP)
Exemple #21
0
 def __init__(self, argv):
     self.pcols = []
     self.fname2ofd = {}
     self.pval2fname = {}
     TableTool.__init__(self, 1, argv)
Exemple #22
0
 def __init__(self, argv):
     self.kcols1 = []
     self.kcols2 = []
     self.t2Keys = {}
     TableTool.__init__(self, 2, argv)
Exemple #23
0
 def __init__(self, argv):
     TableTool.__init__(self, 1, argv)
Exemple #24
0
 def processOptions(self):
     TableTool.processOptions(self)
     for spec in self.options.xpSpecs:
         self.processXspec(spec)
Exemple #25
0
 def __init__(self, argv):
     self.functionContext = {}
     self.functions = []
     self.isFilter = []
     TableTool.__init__(self, 1, argv)
Exemple #26
0
 def __init__(self, argv):
     self.xpColumns = []  # list (col,pref,sep,suff)
     TableTool.__init__(self, 1, argv)
Exemple #27
0
 def processOptions(self):
     TableTool.processOptions(self)
     self.loadExprs()
Exemple #28
0
 def processOptions(self):
     TableTool.processOptions(self)
     if self.options.parent is None:
         self.parser.error("No parent index specified.")
     if self.options.child is None:
         self.parser.error("No child index specified.")
Exemple #29
0
    def __init__(self,argv):
	self.rows = []
	TableTool.__init__(self,1,argv)