Python membertopersonの例、resolvemembernames.memberList.membertoperson Pythonの例

コード例 #1

0

ファイルを表示

ファイル: future-fetch.py プロジェクト: hzj123/theyworkforyou

    def __init__(self, entry):
        self.id = entry.event.attrib['id']
        self.deleted = 0
        self.link_calendar = entry.guid
        self.link_external = entry.link
        chamber = entry.event.chamber.text.strip()
        self.chamber = '%s: %s' % (entry.event.house.text.strip(), chamber)
        self.event_date = entry.event.date.text
        self.time_start = getattr(entry.event, 'startTime', None)
        self.time_end = getattr(entry.event, 'endTime', None)

        committee_text = entry.event.comittee.text
        if committee_text:
            committee_text = committee_text.strip()
            if chamber in ('Select Committee', 'General Committee'):
                self.committee_name = committee_text
            elif committee_text != "Prime Minister's Question Time":
                self.debate_type = committee_text

        self.people = []

        title_text = entry.event.inquiry.text
        if title_text:
            m = re.search(' - ([^-]*)$', title_text)
            if m:
                person_texts = [x.strip() for x in m.group(1).split('/')]

                for person_text in person_texts:
                    id, name, cons = memberList.matchfullnamecons(person_text, None, self.event_date)
                    if not id:
                        try:
                            id = lordsList.GetLordIDfname(person_text, None, self.event_date)
                        except:
                            pass
                    if id:
                        self.people.append(int(memberList.membertoperson(id).replace('uk.org.publicwhip/person/', '')))

                if len(self.people) == len(person_texts):
                    title_text = title_text.replace(' - ' + m.group(1), '')

            self.title = title_text.strip()
        elif committee_text == "Prime Minister's Question Time":
            self.title = committee_text

        self.witnesses = []
        witness_text = entry.event.witnesses.text
        if witness_text == 'This is a private meeting.':
            self.title = witness_text
        elif witness_text:
            self.witnesses_str = witness_text.strip()
            m = re.findall(r'\b(\w+ \w+ MP)', self.witnesses_str)
            for mp in m:
                id, name, cons = memberList.matchfullnamecons(mp, None, self.event_date)
                if not id: continue
                pid = int(memberList.membertoperson(id).replace('uk.org.publicwhip/person/', ''))
                mp_link = '<a href="/mp/?p=%d">%s</a>' % (pid, mp)
                self.witnesses.append(pid)
                self.witnesses_str = self.witnesses_str.replace(mp, mp_link)

        location_text = entry.event.location.text
        if location_text: self.location = location_text.strip()

コード例 #2

0

ファイルを表示

ファイル: expenses.py プロジェクト: samknight/parlparse

#    matcher = '<TD ALIGN="LEFT" VALIGN="TOP"><A HREF="(/weblink/html/member.html/.*)/log=\d+/pos=\d+" TARGET="_parent"><font face="arial,helvetica" size=2>(.*)/(.*)</A></TD>\s*<TD ALIGN="LEFT" VALIGN="TOP"><font face="arial,helvetica" size=2>(.*)</TD>'
#    matches = re.findall(matcher, content)

	for line in content:
		cols = line.split("\t")
		first = cols[0]
		last = cols[1]
		cons = cols[2]
		money = cols[3:]
		money = map(lambda x: re.sub("\xa3","", x), money)
		money = map(lambda x: re.sub(",","", x), money)
		id, name, cons =  memberList.matchfullnamecons(first + " " + last, cons, yeardate)
		if not id:
			raise Exception, "Failed to find MP %s %s" % (first, last)

		pid = memberList.membertoperson(id)
#		print >>sys.stderr, last, first, money
		if pid in expmembers:
			print >>sys.stderr, "Ignored repeated entry for " , pid
		else:
			fout.write('<personinfo id="%s" ' % pid)
			for i in [ 0,1,2,3,4,5,6,7,8,9 ]:
				if (year=='2004'):
					if (i==7):
						col = '7a'
					elif (i==8 or i==9):
						col = i
					else:
						col = i+1
				else:
					if (i<9):

コード例 #3

0

ファイルを表示

ファイル: edmmotions.py プロジェクト: piersb/theyworkforyou

    '03': '2003',
    '02': '2002',
    '01': '2001',
    '00': '2000',
    '99': '1999',
    '98': '1998',
    '97': '1997'
}

signers = {}
edms = {}
sigs = {}
primary = {}
session = sys.argv[1]
for memberurl in edmList.edmlookups:
    pid = memberList.membertoperson(edmList.lookup(memberurl))
    m = re.search('=(.*?)SlAsHcOdEsTrInG(.*)', memberurl)
    lastname = urllib.unquote(m.group(1))
    firstname = urllib.unquote(m.group(2))
    pnum = int(re.sub('uk.org.publicwhip/person/', '', pid))
    #	print >> sys.stderr, "Member:%s, ID:%s, session:%s" % (memberurl,pid,sessions[session])
    content = get_member(memberurl, pnum, session)
    if re.search('no EDMs', content):
        continue
    for fix in fixes:
        content = re.sub(fix[0], fix[1], content)
    m = re.search('ound (\d+) EDMs? signed', content)
    total = int(m.group(1))
    matches = re.findall(matcher, content)
    count = 0
    for (type, ref, url, title, num, day, month, year) in matches:

コード例 #4

0

ファイルを表示

ファイル: regmemfilter.py プロジェクト: nrhorner/parlparse

def RunRegmemFilters2010(fout, text, sdate, sdatever):
        print "2010-? new register of members interests!  Check it is working properly (via mpinfoin.pl) - %s" % sdate

        WriteXMLHeader(fout)
	fout.write("<publicwhip>\n")
        
        memberset = set()
        text = re.sub('<span class="highlight">([^<]*?)</span>', r'\1', text)
        t = BeautifulStoneSoup(text)
        for page in t('page'):
                title = page.h2.renderContents()
                res = re.search("^([^,]*), ([^(]*) \((.*)\)\s*$", title)
                if not res:
                        raise ContextException, "Failed to break up into first/last/cons: %s" % title
                (lastname, firstname, constituency) = res.groups()
                firstname = memberList.striptitles(firstname)[0].decode('utf-8')
                lastname = memberList.lowercaselastname(lastname).decode('utf-8')
                constituency = constituency.decode('utf-8')
                lastname = lastname.replace(u'O\u2019brien', "O'Brien") # Hmm
                (id, remadename, remadecons) = memberList.matchfullnamecons(firstname + " " + lastname, constituency, sdate)
                if not id:
                        raise ContextException, "Failed to match name %s %s (%s) date %s\n" % (firstname, lastname, constituency, sdate)
                fout.write(('<regmem personid="%s" memberid="%s" membername="%s" date="%s">\n' % (memberList.membertoperson(id), id, remadename, sdate)).encode("latin-1"))
                memberset.add(id)
                category = None
                categoryname = None
                subcategory = None
                for row in page.h2.findNextSiblings():
                        text = row.renderContents().decode('utf-8').encode('iso-8859-1', 'xmlcharrefreplace')
                        if not text or re.match('\s*\.\s*$', text): continue
                        if re.match('\s*Nil\.?\s*$', text):
                                fout.write('Nil.\n')
                                continue
                        if row.name == 'h3':
                                if re.match('\s*$', text): continue
                                m = re.match("\s*(\d\d?)\.\s*(.*)$", text)
                                if m:
                                        if category:
                                                fout.write('\t</category>\n')
                                        category, categoryname = m.groups()
                                        subcategory = None
                                        fout.write('\t<category type="%s" name="%s">\n' % (category, categoryname))
                                        continue
                        if row.get('class') == 'spacer': continue
                        subcategorymatch = re.match("\s*\(([ab])\)\s*(.*)$", text)
                        if subcategorymatch:
                                subcategory = subcategorymatch.group(1)
                                fout.write('\t\t(%s)\n' % subcategory)
                                fout.write('\t\t<item subcategory="%s">%s</item>\n' % (subcategory, subcategorymatch.group(2)))
                                continue
                        if subcategory:
                                fout.write('\t\t<item subcategory="%s">%s</item>\n' % (subcategory, text))
                        else:
                                fout.write('\t\t<item>%s</item>\n' % text)
                if category:
                        fout.write('\t</category>\n')
                fout.write('</regmem>\n')                                

        membersetexpect = set(memberList.mpslistondate(sdate))
        
        # check for missing/extra entries
        missing = membersetexpect.difference(memberset)
        if len(missing) > 0:
                print "Missing %d MP entries:\n" % len(missing), missing
        extra = memberset.difference(membersetexpect)
        if len(extra) > 0:
                print "Extra %d MP entries:\n" % len(extra), extra

	fout.write("</publicwhip>\n")

コード例 #5

0

ファイルを表示

ファイル: regmemfilter.py プロジェクト: nrhorner/parlparse

def RunRegmemFilters(fout, text, sdate, sdatever):
        if sdate >= '2010-09-01':
                return RunRegmemFilters2010(fout, text, sdate, sdatever)

        # message for cron so I check I'm using this
        print "New register of members interests!  Check it is working properly (via mpinfoin.pl) - %s" % sdate

	text = ApplyFixSubstitutions(text, sdate, fixsubs)

        WriteXMLHeader(fout)
	fout.write("<publicwhip>\n")

        text = re.sub('Rt Shaun', 'Shaun', text) # Always get his name wrong
        text = re.sub('&#128;', '&#163;', text) # Always get some pound signs wrong
        rows = re.findall("<TR>(.*)</TR>", text)
        rows = [ re.sub("&nbsp;", " ", row) for row in rows ]
        rows = [ re.sub("<B>|</B>|<BR>|`", "", row) for row in rows ]
        rows = [ re.sub('<span style="background-color: #FFFF00">|</span>', '', row) for row in rows ]
        rows = [ re.sub('<IMG SRC="3lev.gif">', "", row) for row in rows ]
        rows = [ re.sub("&#173;", "-", row) for row in rows ]
        rows = [ re.sub('\[<A NAME="n\d+"><A HREF="\#note\d+">\d+</A>\]', '', row) for row in rows ]
        rows = [ re.sub('\[<A NAME="n\d+">\d+\]', '', row) for row in rows ]

        # Fix incorrect tabling of categories when highlighting is in play
        rows = [ re.sub('<TD COLSPAN=4>(\d\.) ([^<]*?)</TD>', r'<TD>\1</TD><TD COLSPAN=3>\2</TD>', row) for row in rows ]
        # split into cells within a row
        rows = [ re.findall("<TD.*?>\s*(.*?)\s*</TD>", row) for row in rows ]

        memberset = set()
        needmemberend = False
        category = None
        categoryname = None
        subcategory = None
        for row in rows:
                striprow = re.sub('</?[^>]+>', '', "".join(row))
                #print row
                if striprow.strip() == "":
                        # There is no text on the row, just tags
                        pass
                elif len(row) == 1 and re.match("(?i)(<i>)? +(</i>)?", row[0]):
                        # <TR><TD COLSPAN=4>&nbsp;</TD></TR>
                        pass
                elif len(row) == 1:
                        # <TR><TD COLSPAN=4><B>JACKSON, Robert (Wantage)</B></TD></TR>
                        res = re.search("^([^,]*), ([^(]*) \((.*)\)$", row[0])
                        if not res:
                                print row
                                raise ContextException, "Failed to break up into first/last/cons: %s" % row[0]
                        (lastname, firstname, constituency) = res.groups()
                        constituency = constituency.replace(')', '')
                        constituency = constituency.replace('(', '')
                        firstname = memberList.striptitles(firstname)[0]

                        # Register came out after they stood down
                        if (firstname == 'Ian' and lastname == 'GIBSON' and sdate > '2009-06-08') \
                            or (firstname == 'Michael' and lastname == 'MARTIN' and sdate > '2009-06-22'):
                                check_date = '2009-06-08'
                        else:
                                check_date = sdate
                        (id, remadename, remadecons) = memberList.matchfullnamecons(firstname + " " + memberList.lowercaselastname(lastname), constituency, check_date)
                        if not id:
                                raise ContextException, "Failed to match name %s %s (%s) date %s" % (firstname, lastname, constituency, sdate)
                        if category:
                                fout.write('\t</category>\n')
                        if needmemberend:
                                fout.write('</regmem>\n')                                
                                needmemberend = False
                        fout.write(('<regmem personid="%s" memberid="%s" membername="%s" date="%s">\n' % (memberList.membertoperson(id), id, remadename, sdate)).encode("latin-1"))
                        memberset.add(id)
                        needmemberend = True
                        category = None
                        categoryname = None
                        subcategory = None
                elif len(row) == 2 and row[0] == '' and re.match('Nil\.\.?', row[1]):
                        # <TR><TD></TD><TD COLSPAN=3><B>Nil.</B></TD></TR> 
                        fout.write('Nil.\n')
                elif len(row) == 2 and row[0] != '':
                        # <TR><TD><B>1.</B></TD><TD COLSPAN=3><B>Remunerated directorships</B></TD></TR>
                        if category:
                                fout.write('\t</category>\n')
                        digits = row[0]
                        category = re.match("\s*(\d\d?)\.$", digits).group(1)
                        categoryname = row[1]
                        subcategory = None
                        fout.write('\t<category type="%s" name="%s">\n' % (category, categoryname))
                elif len(row) == 2 and row[0] == '':
                        # <TR><TD></TD><TD COLSPAN=3><B>Donations to the Office of the Leader of the Liberal Democrats received from:</B></TD></TR>
                        if subcategory:
                                fout.write('\t\t<item subcategory="%s">%s</item>\n' % (subcategory, FixHTMLEntities(row[1])))
                        else:
                                fout.write('\t\t<item>%s</item>\n' % FixHTMLEntities(row[1]))
                elif len(row) == 3 and row[0] == '' and row[1] == '':
                        # <TR><TD></TD><TD></TD><TD COLSPAN=2>19 and 20 September 2002, two days fishing on the River Tay in Scotland as a guest of Scottish Coal. (Registered 3 October 2002)</TD></TR>
                        if subcategory:
                                fout.write('\t\t<item subcategory="%s">%s</item>\n' % (subcategory, FixHTMLEntities(row[2])))
                        else:
                                fout.write('\t\t<item>%s</item>\n' % FixHTMLEntities(row[2]))
                elif len(row) == 3 and row[0] == '':
                        # <TR><TD></TD><TD><B>(a)</B></TD><TD COLSPAN=2>Smithville Associates; training consultancy.</TD></TR>
                        if subcategory:
                                fout.write('\t\t<item subcategory="%s">%s</item>\n' % (subcategory, FixHTMLEntities(row[1] + ' ' + row[2])))
                        else:
                                fout.write('\t\t<item>%s</item>\n' % FixHTMLEntities(row[1] + ' ' + row[2]))
                elif len(row) == 4 and row[0] == '' and (row[1] == '' or row[1] == '<IMG SRC="3lev.gif">'):
                        # <TR><TD></TD><TD></TD><TD>(b)</TD><TD>Great Portland Estates PLC</TD></TR>
                        subcategorymatch = re.match("\(([ab])\)$", row[2])
                        if not subcategorymatch:
                                content = FixHTMLEntities(row[2] + " " + row[3])
                                if subcategory:
                                        fout.write('\t\t<item subcategory="%s">%s</item>\n' % (subcategory, content))
                                else:
                                        fout.write('\t\t<item>%s</item>\n' % content)
                        else:
                                subcategory = subcategorymatch.group(1)
                                fout.write('\t\t(%s)\n' % subcategory)
                                fout.write('\t\t<item subcategory="%s">%s</item>\n' % (subcategory, FixHTMLEntities(row[3])))
                else:
                        print row
                        raise ContextException, "Unknown row type match, length %d" % (len(row))
        if category:
                fout.write('\t</category>\n')
        if needmemberend:
                fout.write('</regmem>\n')                                
                needmemberend = False

        membersetexpect = set(memberList.mpslistondate(sdate))
        
        # check for missing/extra entries
        missing = membersetexpect.difference(memberset)
        if len(missing) > 0:
                print "Missing %d MP entries:\n" % len(missing), missing
        extra = memberset.difference(membersetexpect)
        if len(extra) > 0:
                print "Extra %d MP entries:\n" % len(extra), extra

	fout.write("</publicwhip>\n")

コード例 #6

0

ファイルを表示

ファイル: bbcconv.py プロジェクト: JonathanBowker/parlparse

        match = map(lambda x: re.sub("\xa0", "", x), match)
        match = map(lambda x: x.strip(), match)
        (url, cons, name) = match

        # Not in aliases file - see comment there (it's to
        # avoid ambiguity in debates parsing)
        if cons == 'Great Yarmouth' and name == 'Tony Wright':
            name = 'Anthony D Wright'

        id, canonname, canoncons =  memberList.matchfullnamecons(name, cons, date_today)
        if not id:
            print >>sys.stderr, "Failed to match %s %s %s" % (name, cons, date_today)
            continue
        url = urlparse.urljoin(bbc_index_url, url)

        pid = memberList.membertoperson(id)
        if pid in bbcmembers:
            print >>sys.stderr, "Ignored repeated entry for " , pid
        else:
            print '<personinfo id="%s" bbc_profile_url="%s" />' % (pid, url)

        bbcmembers.add(pid)

    sys.stdout.flush()

print '</publicwhip>'

# Check we have everybody
allmembers = sets.Set([ memberList.membertoperson(id) for id in memberList.currentmpslist() ])
symdiff = allmembers.symmetric_difference(bbcmembers)
if len(symdiff) > 0:

コード例 #7

0

ファイルを表示

    def __init__(self, entry):
        self.id = entry.event.attrib['id']
        self.deleted = 0
        self.link_calendar = entry.guid
        self.link_external = entry.link
        chamber = entry.event.chamber.text.strip()
        self.chamber = '%s: %s' % (entry.event.house.text.strip(), chamber)
        self.event_date = entry.event.date.text
        self.time_start = getattr(entry.event, 'startTime', None)
        self.time_end = getattr(entry.event, 'endTime', None)

        committee_text = entry.event.comittee.text
        if committee_text:
            committee_text = committee_text.strip()
            if chamber in ('Select Committee', 'General Committee'):
                self.committee_name = committee_text
            elif committee_text != "Prime Minister's Question Time":
                self.debate_type = committee_text

        self.people = []

        title_text = entry.event.inquiry.text
        if title_text:
            m = re.search(' - ([^-]*)$', title_text)
            if m:
                person_texts = [x.strip() for x in m.group(1).split('/')]

                for person_text in person_texts:
                    id, name, cons = memberList.matchfullnamecons(person_text, None, self.event_date)
                    if not id:
                        try:
                            id = lordsList.GetLordIDfname(person_text, None, self.event_date)
                        except:
                            pass
                    if id:
                        self.people.append(int(memberList.membertoperson(id).replace('uk.org.publicwhip/person/', '')))

                if len(self.people) == len(person_texts):
                    title_text = title_text.replace(' - ' + m.group(1), '')

            self.title = title_text.strip()
        elif committee_text == "Prime Minister's Question Time":
            self.title = committee_text

        self.witnesses = []
        witness_text = entry.event.witnesses.text
        if witness_text == 'This is a private meeting.':
            self.title = witness_text
        elif witness_text:
            self.witnesses_str = witness_text.strip()
            m = re.findall(r'\b(\w+ \w+ MP)', self.witnesses_str)
            for mp in m:
                id, name, cons = memberList.matchfullnamecons(mp, None, self.event_date)
                if not id: continue
                pid = int(memberList.membertoperson(id).replace('uk.org.publicwhip/person/', ''))
                mp_link = '<a href="/mp/?p=%d">%s</a>' % (pid, mp)
                self.witnesses.append(pid)
                self.witnesses_str = self.witnesses_str.replace(mp, mp_link)

        location_text = entry.event.location.text
        if location_text: self.location = location_text.strip()

コード例 #8

0

ファイルを表示

ファイル: journa-list-scrape.py プロジェクト: JonathanBowker/parlparse

    try:
        attr = memberList.getmember(member)
        fullname = attr["firstname"] + " " + attr["lastname"]

        # Load search page from journa-list
        params = {}
        params['name'] = fullname
        params = urllib.urlencode(params)
        ur = urllib.urlopen("http://www.journalisted.com/list", params)
        content = ur.read()
        ur.close()

        # Find match count
        match = re.search("""<p\>(\d+) Matches\<\/p\>""", content) 
        assert match, "%s\ndidn't find matches count %s" % (content, fullname)
        matches = match.groups()[0]
        matches = int(matches)

        if matches > 0:
            print fullname.encode('utf-8'), matches
            print memberList.membertoperson(member)

            links = re.findall("""\<li\>\<a href="([^"]+)">[^<]+\<\/a\>\<\/li\>""", content)
            assert links, "%s\ndidn't find links despite matches %s" % (content, fullname)
            print links
    except:
        print >>sys.stderr, "trouble with " + member

コード例 #9

0

ファイルを表示

ファイル: edmmotions.py プロジェクト: DanielKinsman/twfy

matcher += '<font face="arial,helvetica" size=2>(?:<[BI]>)?([^<]*?)</font></A>\s*'
matcher += '</TD>\s*<!-- \*\*\* Signatures -->.*?'
matcher += '(?:<font face="arial,helvetica" size=2>(?:<[BI]>)?(\d+) &nbsp;&nbsp;</font>\s*)?'
matcher += '</TD>\s*<!-- \*\*\* Motion date \*\*\* -->.*?'
matcher += '<font face="arial,helvetica" size=2>(?:<[BI]>)?(\d\d)\.(\d\d)\.(\d\d)</FONT>'
matcher += '(?s)'

sessions = {'05':'2005', '':'2004', '04':'2004', '03':'2003', '02':'2002', '01':'2001', '00':'2000', '99':'1999', '98':'1998', '97':'1997'}

signers = {}
edms = {}
sigs = {}
primary = {}
session = sys.argv[1]
for memberurl in edmList.edmlookups:
	pid = memberList.membertoperson(edmList.lookup(memberurl))
	m = re.search('=(.*?)SlAsHcOdEsTrInG(.*)', memberurl)
	lastname = urllib.unquote(m.group(1))
	firstname = urllib.unquote(m.group(2))
	pnum = int(re.sub('uk.org.publicwhip/person/','',pid))
#	print >> sys.stderr, "Member:%s, ID:%s, session:%s" % (memberurl,pid,sessions[session])
	content = get_member(memberurl, pnum, session)
	if re.search('no EDMs', content):
		continue;
	for fix in fixes:
		content = re.sub(fix[0], fix[1], content)
	m = re.search('ound (\d+) EDMs? signed', content)
	total = int(m.group(1))
	matches = re.findall(matcher, content)
	count = 0
	for (type, ref, url, title, num, day, month, year) in matches:

コード例 #10

0

ファイルを表示

    if constituency == "Trumpton":  # i didn't know james was religious
        continue
    if constituency == "Stefstown":  # i didn't know stef was knighted
        continue

    try:
        mp_id, name, cons = memberList.matchfullnamecons(
            mp_name, constituency, made_date)
    except Exception, e:
        print >> sys.stderr, "FaxYourMP name match failed", e
    else:
        if not mp_id:
            print >> sys.stderr, "FaxYourMP name match failed %s, %s" % (
                mp_name, constituency)
        else:
            id = memberList.membertoperson(mp_id)
            if vote.lower() == "no":
                nohash[id] = nohash.get(id, 0) + 1
            elif vote.lower() == "yes" or vote.lower() == "yes" + chr(160):
                yeshash[id] = yeshash.get(id, 0) + 1
            elif vote == "":
                # print >>sys.stderr, "Blank vote"
                # Ignore for now
                pass
            else:
                print >> sys.stderr, standee_name, made, made_date, vote, constituency, mp_name, "--", messagetype, id, name, cons
                print >> sys.stderr, "Strange vote %s" % vote

ih.close()

コード例 #11

0

ファイルを表示

ファイル: wikipedia-msp.py プロジェクト: nrhorner/parlparse

matches.append(('/wiki/Nicholas_Johnston', 'Nick Johnston'))

for (url, name) in matches:
    id_list = None
    #cons = cons.decode('utf-8')
    #cons = cons.replace('&amp;', '&')
    name = name.decode('utf-8')
    try:
        id_list = memberList.match_string_somehow(name, None, '', True)
    except Exception, e:
        print >>sys.stderr, e
    if not id_list:
        continue

    for id_to_add in id_list:
        pid = memberList.membertoperson(id_to_add)
        wikimembers[pid] = url

print '''<?xml version="1.0" encoding="ISO-8859-1"?>
<publicwhip>'''
k = wikimembers.keys()
k.sort()
for id in k:
    url = urlparse.urljoin(wiki_index_urls[0], wikimembers[id])
    print '<personinfo id="%s" wikipedia_url="%s" />' % (id, url)
print '</publicwhip>'

wikimembers = set(wikimembers.keys())
allmembers = set([ memberList.membertoperson(id) for id in memberList.list_all_dates() ])

symdiff = allmembers.symmetric_difference(wikimembers)

コード例 #12

0

ファイルを表示

        (url, cons, name) = match

        # Not in aliases file - see comment there (it's to
        # avoid ambiguity in debates parsing)
        if cons == 'Great Yarmouth' and name == 'Tony Wright':
            name = 'Anthony D Wright'

        id, canonname, canoncons = memberList.matchfullnamecons(
            name, cons, date_today)
        if not id:
            print >> sys.stderr, "Failed to match %s %s %s" % (name, cons,
                                                               date_today)
            continue
        url = urlparse.urljoin(bbc_index_url, url)

        pid = memberList.membertoperson(id)
        if pid in bbcmembers:
            print >> sys.stderr, "Ignored repeated entry for ", pid
        else:
            print '<personinfo id="%s" bbc_profile_url="%s" />' % (pid, url)

        bbcmembers.add(pid)

    sys.stdout.flush()

print '</publicwhip>'

# Check we have everybody
allmembers = sets.Set(
    [memberList.membertoperson(id) for id in memberList.currentmpslist()])
symdiff = allmembers.symmetric_difference(bbcmembers)

コード例 #13

0

ファイルを表示

ファイル: journa-list-scrape.py プロジェクト: samknight/parlparse

for member in allmembers:
    try:
        attr = memberList.getmember(member)
        fullname = attr["firstname"] + " " + attr["lastname"]

        # Load search page from journa-list
        params = {}
        params['name'] = fullname
        params = urllib.urlencode(params)
        ur = urllib.urlopen("http://www.journalisted.com/list", params)
        content = ur.read()
        ur.close()

        # Find match count
        match = re.search("""<p\>(\d+) Matches\<\/p\>""", content)
        assert match, "%s\ndidn't find matches count %s" % (content, fullname)
        matches = match.groups()[0]
        matches = int(matches)

        if matches > 0:
            print fullname.encode('utf-8'), matches
            print memberList.membertoperson(member)

            links = re.findall(
                """\<li\>\<a href="([^"]+)">[^<]+\<\/a\>\<\/li\>""", content)
            assert links, "%s\ndidn't find links despite matches %s" % (
                content, fullname)
            print links
    except:
        print >> sys.stderr, "trouble with " + member

コード例 #14

0

ファイルを表示

ファイル: faxyourmpfinaladdup.py プロジェクト: JonathanBowker/parlparse

    if constituency == "South Tomshire": # better keep rosa's membership of parliament secret
        continue
    if constituency == "Trumpton": # i didn't know james was religious
        continue
    if constituency == "Stefstown": # i didn't know stef was knighted
        continue
    
    try:
        mp_id, name, cons =  memberList.matchfullnamecons(mp_name, constituency, made_date)
    except Exception, e:
        print >>sys.stderr, "FaxYourMP name match failed", e
    else:
        if not mp_id:
            print >>sys.stderr, "FaxYourMP name match failed %s, %s" % (mp_name, constituency)
        else:
            id = memberList.membertoperson(mp_id)
            if vote.lower() == "no":
                nohash[id] = nohash.get(id, 0) + 1
            elif vote.lower() == "yes" or vote.lower() == "yes"+chr(160):
                yeshash[id] = yeshash.get(id, 0) + 1
            elif vote == "":
                # print >>sys.stderr, "Blank vote"
                # Ignore for now
                pass
            else:
                print >>sys.stderr, standee_name,made, made_date,vote,constituency,mp_name,"--",messagetype, id, name, cons
                print >>sys.stderr, "Strange vote %s" % vote

ih.close()

def responsiveness(id):