def process_page(elem, send): """ send is a Pipe connection, write only """ user = None global count_utp, count_missing for child in elem: if child.tag == tag['title'] and child.text: title = child.text try: user = mwlib.username_from_utp(title, (en_user_talk, lang_user_talk)) except ValueError: return elif child.tag == tag['revision']: for rc in child: if rc.tag != tag['text']: continue #assert user, "User still not defined" if not (rc.text and user): continue user = user.encode('utf-8') try: send.send((user_classes[user], rc.text)) except KeyError: ## fix for anonymous users not in the rich file if mwlib.isip(user): send.send(('anonymous', rc.text)) else: logging.warn("Exception with user %s", user) count_missing += 1 count_utp += 1 if not count_utp % 500: print >> sys.stderr, count_utp
def process_page(elem, queue=None): q = queue user = None global count for child in elem: if child.tag == tag['title'] and child.text: a_title = child.text.split('/')[0].split(':') if len(a_title) > 1 and a_title[0] in (en_user, lang_user): #if len(a_title) > 1 and a_title[0] == en_user: user = a_title[1] else: return elif child.tag == tag['revision']: for rc in child: if rc.tag != tag['text']: continue #assert user, "User still not defined" if not (rc.text and user): continue user = user.encode('utf-8') try: q.put((user_classes[user], rc.text)) except: ## fix for anonymous users not in the rich file if mwlib.isip(user): send.send(('anonymous', rc.text)) else: logging.warn("Exception with user %s", user) count_missing += 1 count += 1 if not count % 500: print >> sys.stderr, count
def process_page(elem, queue=None): q = queue user = None global count for child in elem: if child.tag == tag['title'] and child.text: a_title = child.text.split('/')[0].split(':') if len(a_title) > 1 and a_title[0] in (en_user, lang_user): #if len(a_title) > 1 and a_title[0] == en_user: user = a_title[1] else: return elif child.tag == tag['revision']: for rc in child: if rc.tag != tag['text']: continue #assert user, "User still not defined" if not (rc.text and user): continue user = user.encode('utf-8') try: q.put((user_classes[user], rc.text)) except: ## fix for anonymous users not in the rich file if mwlib.isip(user): send.send(('anonymous', rc.text)) else: logging.warn("Exception with user %s", user) count_missing += 1 count += 1 if not count % 500: print >>sys.stderr, count