Ejemplo n.º 1
0
def process_page(elem, send):
    """
    send is a Pipe connection, write only
    """
    user = None
    global count_utp, count_missing

    for child in elem:
        if child.tag == tag['title'] and child.text:
            title = child.text

            try:
                user = mwlib.username_from_utp(title,
                                               (en_user_talk, lang_user_talk))
            except ValueError:
                return
        elif child.tag == tag['revision']:
            for rc in child:
                if rc.tag != tag['text']:
                    continue

                #assert user, "User still not defined"
                if not (rc.text and user):
                    continue

                user = user.encode('utf-8')
                try:
                    send.send((user_classes[user], rc.text))
                except KeyError:
                    ## fix for anonymous users not in the rich file
                    if mwlib.isip(user):
                        send.send(('anonymous', rc.text))
                    else:
                        logging.warn("Exception with user %s", user)
                        count_missing += 1

                count_utp += 1

                if not count_utp % 500:
                    print >> sys.stderr, count_utp
Ejemplo n.º 2
0
def process_page(elem, send):
    """
    send is a Pipe connection, write only
    """
    user = None
    global count_utp, count_missing

    for child in elem:
        if child.tag == tag['title'] and child.text:
            title = child.text

            try:
                user = mwlib.username_from_utp(title,
                                               (en_user_talk, lang_user_talk))
            except ValueError:
                return
        elif child.tag == tag['revision']:
            for rc in child:
                if rc.tag != tag['text']:
                    continue

                #assert user, "User still not defined"
                if not (rc.text and user):
                    continue

                user = user.encode('utf-8')
                try:
                    send.send((user_classes[user], rc.text))
                except KeyError:
                    ## fix for anonymous users not in the rich file
                    if mwlib.isip(user):
                        send.send(('anonymous', rc.text))
                    else:
                        logging.warn("Exception with user %s", user)
                        count_missing += 1

                count_utp += 1

                if not count_utp % 500:
                    print >> sys.stderr, count_utp
Ejemplo n.º 3
0
def process_page(elem, queue=None):
    q = queue
    user = None
    global count

    for child in elem:
        if child.tag == tag['title'] and child.text:
            a_title = child.text.split('/')[0].split(':')

            if len(a_title) > 1 and a_title[0] in (en_user, lang_user):
            #if len(a_title) > 1 and a_title[0] == en_user:
                user = a_title[1]
            else:
                return
        elif child.tag == tag['revision']:
            for rc in child:
                if rc.tag != tag['text']:
                    continue

                #assert user, "User still not defined"
                if not (rc.text and user):
                    continue

                user = user.encode('utf-8')
                try:
                    q.put((user_classes[user], rc.text))
                except:
                    ## fix for anonymous users not in the rich file
                    if mwlib.isip(user):
                        send.send(('anonymous', rc.text))
                    else:
                        logging.warn("Exception with user %s", user)
                        count_missing += 1

                count += 1
                if not count % 500:
                    print >> sys.stderr, count
Ejemplo n.º 4
0
def process_page(elem, queue=None):
    q = queue
    user = None
    global count

    for child in elem:
        if child.tag == tag['title'] and child.text:
            a_title = child.text.split('/')[0].split(':')

            if len(a_title) > 1 and a_title[0] in (en_user, lang_user):
            #if len(a_title) > 1 and a_title[0] == en_user:
                user = a_title[1]
            else:
                return
        elif child.tag == tag['revision']:
            for rc in child:
                if rc.tag != tag['text']:
                    continue

                #assert user, "User still not defined"
                if not (rc.text and user):
                    continue

                user = user.encode('utf-8')
                try:
                    q.put((user_classes[user], rc.text))
                except:
                    ## fix for anonymous users not in the rich file
                    if mwlib.isip(user):
                        send.send(('anonymous', rc.text))
                    else:
                        logging.warn("Exception with user %s", user)
                        count_missing += 1

                count += 1
                if not count % 500:
                    print >>sys.stderr, count