def to_python(self, value): if isinstance(value, dict): return value else: if not value: return value return wbin.deserialize(decompress(b64decode(value)))
def page_iter(lang='en', paginate=10000000, desired=None): events, conn = get_events_table() count_query = select([func.count(events.c.id)], events.c.lang == lang) s = select([events.c.title, events.c.data, events.c.talk, events.c.total_editors, events.c.bot_editors, events.c.anonymous_editors], events.c.lang == lang).order_by( events.c.title, events.c.talk).limit(paginate) ## searching only desired pages if desired: s = s.where(events.c.title.in_(desired)) count_query = count_query.where(events.c.title.in_(desired)) count = conn.execute(count_query).fetchall()[0][0] print 'PAGES:', count for offset in xrange(0, count, paginate): rs = conn.execute(s.offset(offset)) for row in rs: yield (row[0], deserialize(decompress(b64decode(row[1]))), row[2], row[3], row[4], row[5])
def user_iter(lang='en', paginate=10000000): contrib, conn = get_contributions_table() count_query = select([func.count(contrib.c.id)], contrib.c.lang == lang) s = select([contrib], contrib.c.lang == lang).order_by( contrib.c.id).limit(paginate) count = conn.execute(count_query).fetchall()[0][0] #print >>sys.stderr, 'PAGES:', count for offset in xrange(0, count, paginate): rs = conn.execute(s.offset(offset)) for row in rs: ## row is a RowProxy object: supports dict and list methods ## convert it to dict to use with csv.DictWriter v = dict(row) del v['id'] del v['lang'] v['namespace_edits'] = deserialize(decompress(b64decode( v['namespace_edits'] ))) if v['namespace_edits'] is not None else None yield v
def page_iter(lang='en', paginate=10000000, desired=None): events, conn = get_events_table() count_query = select([func.count(events.c.id)], events.c.lang == lang) s = select([ events.c.title, events.c.data, events.c.talk, events.c.total_editors, events.c.bot_editors, events.c.anonymous_editors ], events.c.lang == lang).order_by(events.c.title, events.c.talk).limit(paginate) ## searching only desired pages if desired: s = s.where(events.c.title.in_(desired)) count_query = count_query.where(events.c.title.in_(desired)) count = conn.execute(count_query).fetchall()[0][0] print 'PAGES:', count for offset in xrange(0, count, paginate): rs = conn.execute(s.offset(offset)) for row in rs: yield (row[0], deserialize(decompress(b64decode(row[1]))), row[2], row[3], row[4], row[5])
def _deserialize(self, s): if self._des_func is not None: return wbin.deserialize(s, self._des_func, (s,), self._des_size) else: return wbin.deserialize(s)