Exemple #1
0
 def get_meeting(self, page):
     items = self.get_meeting_items(page)
     info = self.get_meeting_info(page)
     # FIXME: THIS DEPENDS ON USING collect()
     info['id'], info['guid'] = legistar_id_guid(self.url)
     info['link'] = self.url
     id, guid = legistar_id_guid(info['dept'])
     info['dept_id'] = id
     return dict(items=items, info=info)
Exemple #2
0
 def get_people_ids(self):
     anchors = self._get_people_anchors()
     ids = {}
     for anchor in anchors:
         name = anchor.text.strip()
         id, guid = legistar_id_guid(anchor['href'])
         ids[id] = name
     return ids
Exemple #3
0
    def add_collected_action(self, item_id, action):
        dbaction = Action()
        action['filetype'] = action['ftype']
        main_keys = ['id', 'guid']
        if not action['roll_call']:
            main_keys += ['action', 'action_text', 'result', 'filetype']
        for key in main_keys:
            setattr(dbaction, key, action[key])
        if not action['roll_call']:
            for key in ['agenda_note', 'minutes_note']:
                value = action[key].strip()
                if not value:
                    value = None
                setattr(dbaction, key, value)
            for key in ['mover', 'seconder']:
                name, link = action[key]
                if not name:
                    continue
                id, guid = legistar_id_guid(link)
                att = '%s_id' % key
                setattr(dbaction, att, id)
            file_id, link = action['file_id']
            dbaction.file_id = file_id
        else:
            dbaction.action = 'Roll Call'
        self.session.add(dbaction)
        # flush here so the action can be referred by
        # foreign keys in 'item_action' and 'action_vote'
        self.session.flush()
        # make item_action object
        item_action = ItemAction(item_id, dbaction.id)
        self.session.add(item_action)
        # handle votes
        ward_num = 1
        vote_attributes = dict(action_id=dbaction.id)
        for name, link, vote in action['votes']:
            person_id, ignore = legistar_id_guid(link)
            id_key = 'ward{}_person_id'.format(ward_num)
            vote_attributes[id_key] = person_id
            vote_attributes['ward{}'.format(ward_num)] = vote
            ward_num += 1

        avote = ActionVote()
        for attribute, value in vote_attributes.items():
            setattr(avote, attribute, value)
        self.session.add(avote)
Exemple #4
0
 def _get_depts(self, page):
     depts = []
     # each entry is a tuple (id, guid, name)
     anchors = page.find_all('a', id=re.compile('.+_hypBody$'))
     for anchor in anchors:
         id, guid = legistar_id_guid(anchor['href'])
         name = anchor.text.strip()
         depts.append((id, guid, name))
     return depts
Exemple #5
0
 def _get_depts(self, page):
     depts = []
     # each entry is a tuple (id, guid, name)
     anchors = page.find_all('a', id=re.compile('.+_hypBody$'))
     for anchor in anchors:
         id, guid = legistar_id_guid(anchor['href'])
         name = anchor.text.strip()
         depts.append((id, guid, name))
     return depts
Exemple #6
0
 def _add_meeting_from_rss(self, entry):
     with transaction.manager:
         m = Meeting()
         m.title = entry.title
         m.link = entry.link
         m.rss = entry
         m.id, m.guid = legistar_id_guid(entry.link)
         m.updated = datetime.now()
         self.session.add(m)
     return self.session.merge(m)
Exemple #7
0
 def add_meeting_from_rss(self, entry):
     transaction.begin()
     meeting = Meeting()
     meeting.title = entry.title
     meeting.link = entry.link
     meeting.rss = entry
     meeting.id, meeting.guid = legistar_id_guid(entry.link)
     meeting.updated = datetime.now()
     self.session.add(meeting)
     self.session.flush()
     transaction.commit()
Exemple #8
0
 def add(self, item_id, actiondata):
     with transaction.manager:
         a = Action()
         actiondata['filetype'] = actiondata['ftype']
         main_keys = ['id', 'guid']
         rollcall = actiondata['roll_call']
         if not rollcall:
             main_keys += ['action', 'action_text', 'result', 'filetype']
         for key in main_keys:
             setattr(a, key, actiondata[key])
         if not rollcall:
             for key in ['agenda_note', 'minutes_note']:
                 value = actiondata[key].strip()
                 if not value:
                     value = None
                 setattr(a, key, value)
             for key in ['mover', 'seconder']:
                 name, link = actiondata[key]
                 if not name:
                     continue
                 id, guid = legistar_id_guid(link)
                 attribute = '%s_id' % key
                 setattr(a, attribute, id)
             file_id, link = actiondata['file_id']
             a.file_id = file_id
         else:
             a.action = 'Roll Call'
         self.session.add(a)
         # flush here so the action can be referred by
         # foreign keys in 'item_action' and 'action_vote'
         self.session.flush()
         # make item_action object
         item_action = ItemAction(item_id, a.id)
         self.session.add(item_action)
         # handle votes
         for name, link, vote in actiondata['votes']:
             person_id, ignore = legistar_id_guid(link)
             avote = ActionVote(a.id, person_id, vote)
             self.session.add(avote)
     return self.session.merge(a)
Exemple #9
0
 def remote_legislation_item(self, link):
     item = self._remote_legislation_item(link)
     # add id, guid to item
     item['id'], item['guid'] = legistar_id_guid(link)
     for key in ['introduced', 'on_agenda', 'passed']:
         if key in item and item[key]:
             item[key] = make_true_date(item[key])
     key = 'action_details'
     if len(item[key]):
         item['acted_on'] = True
     else:
         item['acted_on'] = False
     return item
Exemple #10
0
 def add_rss_meetings(self, url, rss=None):
     if rss is None:
         rss = self.manager.get_rss(url)
     for entry in rss.entries:
         id, guid = legistar_id_guid(entry.link)
         meeting = self.session.query(Meeting).get(id)
         if meeting is None:
             print("adding meeting {} from rss".format(id))
             try:
                 self.manager.add_meeting_from_rss(entry)
                 self.session.commit()
             except IntegrityError:
                 self.session.rollback()
         else:
             print("Meeting {} already present.".format(id))
Exemple #11
0
 def make_cache_object(self, type, link=None):
     from hubby.database import MainCache
     id = None
     if type in ['meeting', 'item', 'action']:
         id, guid = legistar_id_guid(link)
     filename = self._filename(type, id)
     dbname = self._dbname(type, id)
     if os.path.isfile(filename):
         content = Pickle.load(open(filename, 'rb'), encoding='utf-8')
         now = datetime.now()
         mc = MainCache()
         mc.name = dbname
         mc.retrieved = now
         mc.updated = now
         mc.content = content
     else:
         raise RuntimeError("No file present %s" % filename)
     return mc
Exemple #12
0
 def _get_person(self, page):
     markers = DATA_IDENTIFIERS
     item_keys = list(markers.keys())
     item = {}.fromkeys(item_keys)
     for key in item_keys:
         if key == 'photo_link':
             no_pix = False
         #print "trying for key", key
         exp = re.compile('.+%s$' % markers[key])
         tags = page.find_all('span', id=exp)
         ttype = 'span'
         if not tags:
             tags = page.find_all('a', id=exp)
             ttype = 'a'
         if not tags:
             tags = page.find_all('img', id=exp)
             ttype = 'img'
         if not tags:
             if ttype == 'img' and key == 'photo_link':
                 no_pix = True
             else:
                 raise RuntimeError("no tags found for %s" % key)
         if len(tags) > 1:
             print("len(%s) == %d" % (key, len(tags)))
         if key == 'photo_link':
             if not no_pix:
                 tag = tags[0]
                 item[key] = tag['src']
             else:
                 item[key] = None
             continue
         tag = tags[0]
         if key == 'website':
             #item[key] = tag['href']
             item[key] = tag.text.strip()
             continue
         item[key] = tag.text.strip()
         print(key, item[key])
     item['id'], item['guid'] = legistar_id_guid(self.url)
     return item
Exemple #13
0
 def add(self, itemdata):
     with transaction.manager:
         i = Item()
         for key in itemdata:
             if key == 'attachments':
                 continue
             value = itemdata[key]
             setattr(i, key, value)
         self.session.add(i)
         if itemdata['attachments'] is not None:
             for name, link in itemdata['attachments']:
                 id, guid = legistar_id_guid(link)
                 if self.session.query(Attachment).get(id) is not None:
                     raise RuntimeError("Duplicate attachment %d" % id)
                 a = Attachment()
                 a.id = id
                 a.guid = guid
                 a.name = name
                 a.link = link
                 a.item_id = i.id
                 self.session.add(a)
     return self.session.merge(i)
Exemple #14
0
 def collect(self, type, link=None):
     id = None
     if type in ['meeting', 'item', 'action']:
         id, guid = legistar_id_guid(link)
     filename = self._filename(type, id)
     if not os.path.isfile(filename):
         print("Retrieving %s from legistar..." % filename)
         collector = self._collector(type)
         if link is not None:
             print('link is', link, type)
             if not link.startswith('http'):
                 link = collector.url_prefix + link
             print("Retrieving", link)
             collector.set_url(link)
         collector.collect()
         data = dict(result=collector.result, content=collector.content)
         Pickle.dump(data, open(filename, 'wb'))
     try:    
         data = Pickle.load(open(filename, 'rb'))
     except UnicodeDecodeError:
         data = Pickle.load(open(filename, 'rb'), encoding='bytes')
     return data['result']
Exemple #15
0
 def collect(self):
     self.retrieve_page(self.url)
     if b'Invalid parameters!' in self.content:
         item = dict()
         item['action_details'] = []
         item['bad_url'] = url
         self.item = item
         self.result = self.item
         print("Invalid parameters found", self.result)
         return
     self.item = self._get_item(self.soup)
     for key in ['passed', 'introduced', 'on_agenda']:
         if key in self.item and not self.item[key]:
             self.item[key] = None
         else:
             self.item[key] = make_true_date(self.item[key])
     if len(self.item['action_details']):
         self.item['acted_on'] = True
     else:
         self.item['acted_on'] = False
     self.item['id'], self.item['guid'] = legistar_id_guid(self.url)
     self.result = self.item
Exemple #16
0
 def _merge_pickled_meeting_items(self, meeting_id, collected):
     transaction.begin()
     items = collected['items']
     item_count = 0
     for item in items:
         item_count += 1
         item_id, guid = legistar_id_guid(item['item_page'])
         query = self.session.query(MeetingItem)
         query = query.filter_by(meeting_id=meeting_id)
         query = query.filter_by(item_id=item_id)
         try:
             dbitem = query.one()
         except NoResultFound:
             dbitem = MeetingItem(meeting_id, item_id)
         agenda_num = item['agenda_num']
         ##########################################
         ## Work around       #####################
         ## irregular entries #####################
         ##########################################
         if agenda_num == '2011-0229':
             agenda_num = None
         if agenda_num == '1.':
             agenda_num = '1'
         ##########################################
         ##                    ####################
         ##                    ####################
         ##########################################
         # first agenda item is missing from meeting details
         if meeting_id == 302621:
             agenda_num = '2'
         if agenda_num is not None:
             dbitem.agenda_num = agenda_num
             dbitem.type, dbitem.order = convert_agenda_number(agenda_num)
         dbitem.item_order = item_count
         dbitem.version = int(item['version'])
         self.session.merge(dbitem)
         self.session.flush()
     transaction.commit()
Exemple #17
0
 def _add_collected_legislation_item(self, item):
     transaction.begin()
     dbitem = Item()
     for key in item:
         if key == 'attachments':
             continue
         value = item[key]
         setattr(dbitem, key, value)
     self.session.add(dbitem)
     if item['attachments'] is not None:
         for name, link in item['attachments']:
             id, guid = legistar_id_guid(link)
             dbobj = self.session.query(Attachment).get(id)
             if dbobj is None:
                 attachment = Attachment()
                 attachment.id, attachment.guid = id, guid
                 attachment.name = name
                 attachment.link = link
                 attachment.item_id = dbitem.id
                 self.session.add(attachment)
             else:
                 msg = 'Duplicate attachment %d' % id
                 raise RuntimeError(msg)
     transaction.commit()
Exemple #18
0
 def collect(self):
     self.retrieve_page(self.url)
     self.action = self._get_action(self.soup)
     self.action['id'], self.action['guid'] = legistar_id_guid(self.url)
     self.result = self.action
Exemple #19
0
 def collect(self):
     self.retrieve_page(self.url)
     self.action = self._get_action(self.soup)
     self.action['id'], self.action['guid'] = legistar_id_guid(self.url)
     self.result = self.action