def user_iter(lang='en', paginate=10000000): contrib, conn = get_contributions_table() count_query = select([func.count(contrib.c.id)], contrib.c.lang == lang) s = select([contrib], contrib.c.lang == lang).order_by( contrib.c.id).limit(paginate) count = conn.execute(count_query).fetchall()[0][0] #print >>sys.stderr, 'PAGES:', count for offset in xrange(0, count, paginate): rs = conn.execute(s.offset(offset)) for row in rs: ## row is a RowProxy object: supports dict and list methods ## convert it to dict to use with csv.DictWriter v = dict(row) del v['id'] del v['lang'] v['namespace_edits'] = deserialize(decompress(b64decode( v['namespace_edits'] ))) if v['namespace_edits'] is not None else None yield v
def __init__(self, namespaces): global ATTR_LEN super(ContribDict, self).__init__() self._namespaces = namespaces ATTR_LEN = len(namespaces) self._d_namespaces = dict([(name.decode('utf-8'), idx) for idx, (_, name) in enumerate(namespaces)]) self._re_welcome = re2_compile_with_fallback(r'well?come', flags=re.I) self._re_npov = re2_compile_with_fallback(r'[ n]pov', flags=re.I) self._re_please = re2_compile_with_fallback(r'pl(s|z|ease)', flags=re.I) self._re_thanks = re2_compile_with_fallback(r'th(ank|anx|x)', flags=re.I) self._re_revert = re2_compile_with_fallback(r'(revert| rev )', flags=re.I) contributions, self.connection = get_contributions_table() self.insert = contributions.insert()
def __init__(self, namespaces): global ATTR_LEN super(ContribDict, self).__init__() self._namespaces = namespaces ATTR_LEN = len(namespaces) self._d_namespaces = dict([(name.decode('utf-8'), idx) for idx, (_, name) in enumerate(namespaces) ]) self._re_welcome = re2_compile_with_fallback(r'well?come', flags=re.I) self._re_npov = re2_compile_with_fallback(r'[ n]pov', flags=re.I) self._re_please = re2_compile_with_fallback(r'pl(s|z|ease)', flags=re.I) self._re_thanks = re2_compile_with_fallback(r'th(ank|anx|x)', flags=re.I) self._re_revert = re2_compile_with_fallback(r'(revert| rev )', flags=re.I) contributions, self.connection = get_contributions_table() self.insert = contributions.insert()