def create_feed(bezirk, gremium, gremium_name, outfile): # fetch complete previous year year_current = datetime.datetime.now().year year_prev = year_current - 1 month = datetime.datetime.now().month sitzungen = extractor.run_extractor(bvvfeeds.extractors['sitzungen'], (bezirk, gremium, year_prev, month, year_current, month)) sitzungen = sorted(sitzungen, key=lambda s : s['date'], reverse=True) feeditems = [] for sitzung in sitzungen: if (sitzung['date'] == None or sitzung['name'] == None): print "Skipping Sitzung:" + str(sitzung) continue if (sitzung['id'] == None): title = sitzung['date'].strftime('%Y-%m-%d') + ': ' + sitzung['name'] link = bvvfeeds.link_gremium % (bezirk,gremium) description = '(Tagesordnung nicht verfügbar.)' else: title = sitzung['date'].strftime('%Y-%m-%d') + ': ' + sitzung['name'] + " (" + str(sitzung['id']) + ")" link = bvvfeeds.link_sitzung % (bezirk,sitzung['id']) description = list_tops(bezirk, sitzung) try: guid = PyRSS2Gen.Guid(hashlib.md5(pickle.dumps((title,link,description))).hexdigest(), isPermaLink=False) item = PyRSS2Gen.RSSItem(title = title, link = link, description = description, guid = guid, pubDate = sitzung['date']) feeditems.append(item) except Exception, e: print e print sitzung continue
def create_feed(bezirk, outfile): drucksachen = extractor.run_extractor(bvvfeeds.extractors['drucksachen'], (bezirk)) # replace empty date fields with current date date_current = datetime.datetime.today() for ds in drucksachen: if not ds['date']: ds['date'] = date_current else: ds['date'] = ds['date'] drucksachen = sorted(drucksachen, key=lambda s: s['date'], reverse=True) feeditems = [] for drucksache in drucksachen: title = drucksache['name'] link = "http://www.berlin.de/ba-%s/bvv-online/vo020.asp?VOLFDNR=%s" % ( bezirk, drucksache['id']) description = "Art: %s<br />Initiator: %s<br />Link: <a href=\"%s\">%s</a>" % ( drucksache['type'], drucksache['initiator'], link, link) try: guid = PyRSS2Gen.Guid(link, isPermaLink=True) item = PyRSS2Gen.RSSItem(title=title, link=link, description=description, guid=guid, pubDate=drucksache['date']) feeditems.append(item) except Exception, e: print e print drucksache continue
def create_feed(bezirk, outfile): drucksachen = extractor.run_extractor(bvvfeeds.extractors['drucksachen'], (bezirk)) # replace empty date fields with current date date_current = datetime.datetime.today() for ds in drucksachen: if not ds['date']: ds['date'] = date_current else: ds['date'] = ds['date'] drucksachen = sorted(drucksachen, key=lambda s : s['date'], reverse=True) feeditems = [] for drucksache in drucksachen: title = drucksache['name'] link = "http://www.berlin.de/ba-%s/bvv-online/vo020.asp?VOLFDNR=%s" % (bezirk, drucksache['id']) description = "Art: %s<br />Initiator: %s<br />Link: <a href=\"%s\">%s</a>" % (drucksache['type'], drucksache['initiator'], link, link); try: guid = PyRSS2Gen.Guid(link, isPermaLink=True) item = PyRSS2Gen.RSSItem(title = title, link = link, description = description, guid = guid, pubDate = drucksache['date']) feeditems.append(item) except Exception, e: print e print drucksache continue
def create_feed(bezirk, gremium, gremium_name, outfile): # fetch complete previous year year_current = datetime.datetime.now().year year_prev = year_current - 1 month = datetime.datetime.now().month sitzungen = extractor.run_extractor( bvvfeeds.extractors["sitzungen"], (bezirk, gremium, year_prev, month, year_current, month) ) sitzungen = sorted(sitzungen, key=lambda s: s["date"], reverse=True) feeditems = [] for sitzung in sitzungen: if sitzung["date"] == None or sitzung["name"] == None: print "Skipping Sitzung:" + str(sitzung) continue if sitzung["id"] == None: title = sitzung["date"].strftime("%Y-%m-%d") + ": " + sitzung["name"] link = bvvfeeds.link_gremium % (bezirk, gremium) description = "(Tagesordnung nicht verfügbar.)" else: title = sitzung["date"].strftime("%Y-%m-%d") + ": " + sitzung["name"] + " (" + str(sitzung["id"]) + ")" link = bvvfeeds.link_sitzung % (bezirk, sitzung["id"]) description = list_tops(bezirk, sitzung) try: guid = PyRSS2Gen.Guid(hashlib.md5(pickle.dumps((title, link, description))).hexdigest(), isPermaLink=False) item = PyRSS2Gen.RSSItem( title=title, link=link, description=description, guid=guid, pubDate=sitzung["date"] ) feeditems.append(item) except Exception, e: print e print sitzung continue
def list_tops(bezirk, sitzung): tops = extractor.run_extractor(bvvfeeds.extractors["tagesordnung"], (bezirk, sitzung["id"])) abstract = "" for top in tops: if top["name"] == None: continue try: abstract += "<ul>" + format_top(bezirk, top) + "</ul>" except Exception, e: print e print sitzung print top continue
def list_tops(bezirk, sitzung): tops = extractor.run_extractor(bvvfeeds.extractors['tagesordnung'], (bezirk, sitzung['id'])) abstract = '' for top in tops: if top['name'] == None: continue try: abstract += '<ul>' + format_top(bezirk, top) + '</ul>' except Exception, e: print e print sitzung print top continue
def pretty_name(g): g = g.encode('utf-8') g = g.lower() for k, v in pretty_replacements: g = g.replace(k, v) if pretty_replacements_map.has_key(g): return pretty_replacements_map[g] return g gremien_collection = {} for b in bvvfeeds.bezirke: print b gremien_collection[b] = {} gremien = extractor.run_extractor(bvvfeeds.extractors['gremien'], (b)) for gremium in gremien: if re.search("inaktiv", gremium['name']): continue if gremium['name'].strip() == "": continue gremium_detail = extractor.run_extractor( bvvfeeds.extractors['gremium'], (b, gremium['id'])) name = gremium_detail[0]['name'] if re.search("^(BVV|Bezirksverordnetenversammlung).*", name): name = 'BVV' pname = pretty_name(name) if gremien_collection[b].has_key(pname): # gleichnamiges Gremium bereits in Liste if int(gremien_collection[b][pname][0]) < int(gremium['id']): # verwende das mit der hoeheren ID
#!/usr/bin/python ## -*- coding: utf-8 -*- import bvvfeeds import extractor import re import json bezirke_collection = {} bezirke = extractor.run_extractor(bvvfeeds.extractors['bezirke'], ()) def fix_fullname(b): return b.replace(" - ", "-") for bezirk in bezirke: bezirke_collection[bezirk['name']] = fix_fullname(bezirk['fullname']) f = open(bvvfeeds.application_root + '/data/bezirke.json', 'w') f.write(json.dumps(bezirke_collection, indent=4, ensure_ascii=False).encode('utf-8')) f.close()
def test_extractor(name, arguments): entries = extractor.run_extractor(bvvfeeds.extractors[name], arguments) # entries = sorted(entries, key=lambda e: e['datum']) for entry in entries: print entry
#!/usr/bin/python ## -*- coding: utf-8 -*- import bvvfeeds import extractor import re import json bezirke_collection = {} bezirke = extractor.run_extractor(bvvfeeds.extractors['bezirke'], ()) def fix_fullname(b): return b.replace(" - ", "-") for bezirk in bezirke: bezirke_collection[bezirk['name']] = fix_fullname(bezirk['fullname']) f = open(bvvfeeds.application_root + '/data/bezirke.json', 'w') f.write( json.dumps(bezirke_collection, indent=4, ensure_ascii=False).encode('utf-8')) f.close()
def pretty_name(g): g = g.encode('utf-8') g = g.lower() for k, v in pretty_replacements: g = g.replace(k, v) if pretty_replacements_map.has_key(g): return pretty_replacements_map[g] return g gremien_collection = {} for b in bvvfeeds.bezirke: print b gremien_collection[b] = {} gremien = extractor.run_extractor(bvvfeeds.extractors['gremien'], (b)) for gremium in gremien: if re.search("inaktiv", gremium['name']): continue if gremium['name'].strip() == "": continue gremium_detail = extractor.run_extractor(bvvfeeds.extractors['gremium'], (b, gremium['id'])) name = gremium_detail[0]['name'] if re.search("^(BVV|Bezirksverordnetenversammlung).*", name): name = 'BVV' pname = pretty_name(name) if gremien_collection[b].has_key(pname): # gleichnamiges Gremium bereits in Liste if int(gremien_collection[b][pname][0]) < int(gremium['id']): # verwende das mit der hoeheren ID gremien_collection[b][pname] = [gremium['id'], name]