def __init__(self, condition, relvars, outers=(), columns=()): myrels = [] relUsage = {} def checkUsage(rv): r = id(rv) if r in relUsage: raise ValueError("Relvar used more than once", rv) else: relUsage[r] = True return rv outers = map(checkUsage, outers) for rv in relvars: myrels.extend(map(checkUsage, rv.getInnerRVs())) outers.extend(map(checkUsage, rv.getOuterRVs())) condition = condition & rv.getCondition() if len(myrels) < 1: raise TypeError("BasicJoin requires at least 1 relvar") myrels.sort() outers.sort() self.relvars = tuple(myrels) self.outers = tuple(outers) self.condition = condition self.columns = Graph(columns) self._hashAndCompare = (self.__class__.__name__, condition, self.relvars, self.outers, ImmutableSet(self.columns))
def __call__(self, where=None, join=(), outer=(), rename=(), keep=None, calc=(), groupBy=()): cols = Graph(self.columns) rename = ~Graph(rename) groupBy = Set(groupBy) join = [self] + list(join) outer = list(outer) if where is None: where = EMPTY for rv in join + outer: cols += rv.attributes() if groupBy or keep is not None: cols = Graph.fromkeys( Set(keep or ()) | Set((~rename).keys()) | groupBy) * cols if rename: cols = rename * cols + (cols - cols.restrict(~rename)) rv = BasicJoin(where, join, outer, cols + calc) if groupBy: return GroupBy(rv, groupBy) return rv
class Cmp(_expr, HashAndCompare): invOps = {'=': '<>', '<': '>=', '>': '<='} invOps = Graph(invOps.items()) invOps = invOps + ~invOps def __init__(self, arg1, op, arg2): self.arg1 = arg1 self.op = op self.arg2 = arg2 self._hashAndCompare = op, arg1, arg2 def __invert__(self): try: revOp = self.invOps[self.op] except KeyError: return Not(self) else: return self.__class__(self.arg1, revOp, self.arg2) def __repr__(self): return 'Cmp%r' % ((self.arg1, self.op, self.arg2), ) def sqlCondition(self, writer): writer.writeExpr(self.arg1) writer.write(self.op) writer.writeExpr(self.arg2)
def __init__(self, rv, groupBy, cond=EMPTY): self.condition = cond groupBy = list(groupBy) groupBy.sort() self.rv = rv self.groupBy = groupBy self.columns = Graph([(c, Column(c, self)) for c in rv.keys()]) for k in rv.keys(): if k not in groupBy and not rv[k].isAggregate(): raise TypeError("Non-aggregate column in groupBy", k, rv[k]) self._hashAndCompare = (self.__class__.__name__, rv, tuple(groupBy))
def sqlSelect(self, writer): writer.assignAliasesFor(self) writer.write('SELECT ') sep = writer.separator(', ') remainingColumns = self.columns for tbl in self.relvars: tblCols = tbl.attributes() outputSubset = remainingColumns * Graph.fromkeys( Set(tblCols.values())) remainingColumns = remainingColumns - outputSubset if outputSubset == tblCols: # All names in table are kept as-is, so just use '*' sep() writer.writeAlias(tbl) writer.write(".*") continue # For all output columns that are in this table... for name, col in outputSubset.items(): sep() writer.writeExpr(col) if name <> col.name: # XXX!!! writer.write(' AS ') writer.write(name) for name, col in remainingColumns.items(): sep() writer.writeExpr(col) writer.write(' AS ') writer.write(name) writer.write(' FROM ') sep = writer.separator(', ') for tbl in self.relvars: sep() writer.writeTable(tbl) writer.prepender(' WHERE ').writeCond(self.condition) return writer.data()
def compare(self,rows,data,column=0): """Compare 'rows' and 'data' beginning at 'column' -> 'missing,extra' Return value is a tuple '(missingRows,extraRecords)' containing the 'rows' not found in 'data', and the 'data' not present in 'rows', respectively. This works by successively partitioning the data on each column from left to right, until either one of 'rows' or 'data' is empty, or both contain only a single item. (In the latter case, the items are compared field-by-field, with the differences marked.) """ if not rows or not data: # One list is empty, so other is "missing" (or extra) by definition return rows,data elif len(rows)==1 and len(data)==1: self.compareRow(rows[0],data[0]) # do 1-to-1 comparison return [],[] # no missing or extra rows else: # Partition the data into subsets based on current column, # then assemble missing/extra data by recursing on subsets mapper = self.mappers[column] extract = mapper.extract parse = mapper.parse recordMap = Graph([(extract(record),record) for record in data]) rowMap = Graph([(parse(row.cells[column]),row) for row in rows]) column += 1 missing, extra = [], [] for key in Set(rowMap.keys()+recordMap.keys()): m,e = self.compare( rowMap.neighbors(key), recordMap.neighbors(key), column ) missing.extend(m) extra.extend(e) return missing,extra
class BasicJoin(AbstractRV, HashAndCompare): def __init__(self, condition, relvars, outers=(), columns=()): myrels = [] relUsage = {} def checkUsage(rv): r = id(rv) if r in relUsage: raise ValueError("Relvar used more than once", rv) else: relUsage[r] = True return rv outers = map(checkUsage, outers) for rv in relvars: myrels.extend(map(checkUsage, rv.getInnerRVs())) outers.extend(map(checkUsage, rv.getOuterRVs())) condition = condition & rv.getCondition() if len(myrels) < 1: raise TypeError("BasicJoin requires at least 1 relvar") myrels.sort() outers.sort() self.relvars = tuple(myrels) self.outers = tuple(outers) self.condition = condition self.columns = Graph(columns) self._hashAndCompare = (self.__class__.__name__, condition, self.relvars, self.outers, ImmutableSet(self.columns)) def __repr__(self): parms = (self.condition, list(self.relvars), list(self.outers), list(self.columns.items())) return '%s%r' % (self._hashAndCompare[0], parms) def getInnerRVs(self): return self.relvars def getDB(self): db = self.relvars[0].getDB() for rv in self.relvars[1:]: if rv.getDB() is not db: return None return db def sqlSelect(self, writer): writer.assignAliasesFor(self) writer.write('SELECT ') sep = writer.separator(', ') remainingColumns = self.columns for tbl in self.relvars: tblCols = tbl.attributes() outputSubset = remainingColumns * Graph.fromkeys( Set(tblCols.values())) remainingColumns = remainingColumns - outputSubset if outputSubset == tblCols: # All names in table are kept as-is, so just use '*' sep() writer.writeAlias(tbl) writer.write(".*") continue # For all output columns that are in this table... for name, col in outputSubset.items(): sep() writer.writeExpr(col) if name <> col.name: # XXX!!! writer.write(' AS ') writer.write(name) for name, col in remainingColumns.items(): sep() writer.writeExpr(col) writer.write(' AS ') writer.write(name) writer.write(' FROM ') sep = writer.separator(', ') for tbl in self.relvars: sep() writer.writeTable(tbl) writer.prepender(' WHERE ').writeCond(self.condition) return writer.data()
def __init__(self, name, columns, db=None): self.name = name self.columns = Graph([(c, Column(c, self)) for c in columns]) self.db = db self._hashAndCompare = (self.__class__.__name__, self.db, self.name)
def parseCluster(prefix, file): try: import socket hn = socket.gethostname() except: hn = 'NO_NAME' props = {} props[prefix + 'hostname'] = hn props[prefix + 'shortname'] = hn.split('.', 1)[0] if file is None: file = [hn] all = Graph() groups = Set() hosts = Set() order = {} gname = '__orphans__' inLump = False lineno = 0 for l in file: lineno += 1; l = l.strip() lumpline = l.startswith('LUMP:') if not l or l.startswith('#'): continue if lumpline or l.startswith('GROUP:'): inLump = lumpline gname = l.split(':', 1)[1] groups.add(gname) if gname not in order: order[gname] = lineno, gname else: all.add(l, gname) if inLump: groups.add(l) else: hosts.add(l) if not order.has_key(l): order[l] = lineno, l def ordered_tuple(set): values = (Graph.fromkeys(set) * order.items()).values() values.sort() return tuple([v for (k,v) in values]) host_pre = prefix+'hosts.' group_pre = prefix+'groups.' for host in hosts: props[host_pre + host] = ordered_tuple(all.reachable(host)) g = ~all # get reverse mappping from groups to hosts for group in list(groups) + ['__orphans__']: props[group_pre + group] = ordered_tuple( # don't include groups in groups' membership (g.reachable(group) - groups) ) props[prefix + 'groups'] = ordered_tuple(groups) props[prefix + 'hosts'] = ordered_tuple(hosts) props[prefix + 'groups.__all__'] = ordered_tuple(hosts) return props
def ordered_tuple(set): values = (Graph.fromkeys(set) * order.items()).values() values.sort() return tuple([v for (k,v) in values])