Python Meeting.description Exemples

Langage de programmation: Python

Espace de nommage/Pack: model.meeting

Class/Type: Meeting

Méthode/Fonction: description

Exemples au hotexamples.com: 2

Python Meeting.description - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de model.meeting.Meeting.description extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

Meeting(4)

name(2)

agendaitem(2)

title(2)

description(2)

original_url(2)

identifier(2)

type(1)

start(1)

shortName(1)

room(1)

resultsProtocol(1)

originalUrl(1)

invitation(1)

organization_name(1)

address(1)

end(1)

document(1)

date_start(1)

date_end(1)

committee_name(1)

auxiliaryFile(1)

agendaItem(1)

verbatimProtocol(1)

Méthodes fréquemment utilisées

Meeting (4)

name (2)

agendaitem (2)

title (2)

description (2)

original_url (2)

identifier (2)

type (1)

start (1)

shortName (1)

Méthodes fréquemment utilisées

room (1)

resultsProtocol (1)

originalUrl (1)

invitation (1)

organization_name (1)

address (1)

end (1)

document (1)

date_start (1)

date_end (1)

committee_name (1)

auxiliaryFile (1)

agendaItem (1)

verbatimProtocol (1)

Méthodes fréquemment utilisées

committee_name (1)

auxiliaryFile (1)

agendaItem (1)

verbatimProtocol (1)

Associées

EntityManager

save_journalpost_for_soknad

break_even

step

exits

Command

valid_iso8601

FrozenEvent

find_roots

post_request

Related in langs

EventSubjectBase (PHP)

wp_gdsr_debug_clean (PHP)

DwmSize (C#)

IToysService (C#)

ProduceTree (C++)

IsLeafNode (C++)

LoadAll (Go)

ArrayContains (Go)

OrientationHelper (Java)

UserRepository (Java)

Exemple #1

0

Afficher le fichier

Fichier : scraperallris.py Projet : OpenRuhr/ris-scraper

def find_meeting(self, start_date=None, end_date=None): """ Find meetings within a given time frame and add them to the meeting queue. """ meeting_url = "%ssi010.asp?selfaction=ws&template=xyz&kaldatvon=%s&kaldatbis=%s" % (self.config.BASE_URL, start_date.strftime("%d.%m.%Y"), end_date.strftime("%d.%m.%Y")) logging.info("Getting meeting overview from %s", meeting_url) parser = etree.XMLParser(recover=True) r = self.get_url(meeting_url) if not r: return xml = r.text.encode('ascii','xmlcharrefreplace') root = etree.fromstring(xml, parser=parser) for item in root[1].iterchildren(): raw_meeting = {} for e in item.iterchildren(): raw_meeting[e.tag] = e.text meeting = Meeting(numeric_id=int(raw_meeting['silfdnr']), identifier=int(raw_meeting['silfdnr'])) meeting.date_start = self.parse_date(raw_meeting['sisbvcs']) meeting.date_end = self.parse_date(raw_meeting['sisevcs']) meeting.identifier = raw_meeting['siname'] meeting.original_url = "%sto010.asp?SILFDNR=%s&options=4" % (self.config.BASE_URL, raw_meeting['silfdnr']) meeting.title = raw_meeting['sitext'] meeting.committee_name = raw_meeting['grname'] meeting.description = raw_meeting['sitext'] oid = self.db.save_meeting(meeting) self.meeting_queue.add(meeting.numeric_id)

Exemple #2

0

Afficher le fichier

Fichier : scrapersessionnet.py Projet : Mic92/ris-scraper

def get_meeting(self, meeting_url=None, meeting_id=None): """ Load meeting details for the given detail page URL or numeric ID """ # Read either meeting_id or meeting_url from the opposite if meeting_id is not None: meeting_url = self.urls['SESSION_DETAIL_PRINT_PATTERN'] % meeting_id elif meeting_url is not None: parsed = parse.search(self.urls['SESSION_DETAIL_PARSE_PATTERN'], meeting_url) meeting_id = parsed['meeting_id'] logging.info("Getting meeting (session) %d from %s", meeting_id, meeting_url) meeting = Meeting(numeric_id=meeting_id) time.sleep(self.config.WAIT_TIME) response = self.get_url(meeting_url) if not response: return # forms for later document download mechanize_forms = mechanize.ParseResponse(response, backwards_compat=False) # seek(0) is necessary to reset response pointer. response.seek(0) html = response.read() html = html.replace(' ', ' ') parser = etree.HTMLParser() dom = etree.parse(StringIO(html), parser) # check for page errors try: page_title = dom.xpath('//h1')[0].text if 'Fehlermeldung' in page_title: logging.info("Page %s cannot be accessed due to server error", meeting_url) return if 'Berechtigungsfehler' in page_title: logging.info("Page %s cannot be accessed due to permissions", meeting_url) return except: pass try: error_h3 = dom.xpath('//h3[@class="smc_h3"]')[0].text.strip() if 'Keine Daten gefunden' in error_h3: logging.info("Page %s does not contain any agenda items", meeting_url) return if 'Fehlercode: 1104' in error_h3: logging.info("Page %s cannot be accessed due to permissions", meeting_url) return except: pass meeting.original_url = meeting_url # Session title try: meeting.title = dom.xpath(self.xpath['SESSION_DETAIL_TITLE'])[0].text except: logging.critical('Cannot find session title element using XPath SESSION_DETAIL_TITLE') raise TemplateError('Cannot find session title element using XPath SESSION_DETAIL_TITLE') # Committe link #try: # links = dom.xpath(self.xpath['SESSION_DETAIL_COMMITTEE_LINK']) # for link in links: # href = link.get('href') # parsed = parse.search(self.urls['COMMITTEE_DETAIL_PARSE_PATTERN'], href) # if parsed is not None: # meeting.committees = [Commitee(numeric_id=int(parsed['committee_id']))] # if hasattr(self, 'committee_queue'): # self.committee_queue.add(int(parsed['committee_id'])) #except: # logging.critical('Cannot find link to committee detail page using SESSION_DETAIL_COMMITTEE_LINK_XPATH') # raise TemplateError('Cannot find link to committee detail page using SESSION_DETAIL_COMMITTEE_LINK_XPATH') # Meeting identifier, date, address etc tds = dom.xpath(self.xpath['SESSION_DETAIL_IDENTIFIER_TD']) if len(tds) == 0: logging.critical('Cannot find table fields using SESSION_DETAIL_IDENTIFIER_TD_XPATH at session ' + meeting_url) raise TemplateError('Cannot find table fields using SESSION_DETAIL_IDENTIFIER_TD_XPATH at session ' + meeting_url) else: for n in range(0, len(tds)): try: tdcontent = tds[n].text.strip() nextcontent = tds[n + 1].text.strip() except: continue if tdcontent == 'Sitzung:': meeting.identifier = nextcontent # We don't need this any more because it's scraped in committee detail page(?) #elif tdcontent == 'Gremium:': # meeting.committee_name = nextcontent elif tdcontent == 'Datum:': start = nextcontent end = nextcontent if tds[n + 2].text == 'Zeit:': if tds[n + 3].text is not None: times = tds[n + 3].text.replace(' Uhr', '').split('-') start = start + ' ' + times[0] if len(times) > 1: end = end + ' ' + times[1] else: end = start meeting.start = start meeting.end = end elif tdcontent == 'Raum:': meeting.address = " ".join(tds[n + 1].xpath('./text()')) elif tdcontent == 'Bezeichnung:': meeting.description = nextcontent if not hasattr(meeting, 'identifier'): logging.critical('Cannot find session identifier using XPath SESSION_DETAIL_IDENTIFIER_TD') raise TemplateError('Cannot find session identifier using XPath SESSION_DETAIL_IDENTIFIER_TD') # Agendaitems found_documents = [] rows = dom.xpath(self.xpath['SESSION_DETAIL_AGENDA_ROWS']) if len(rows) == 0: logging.critical('Cannot find agenda using XPath SESSION_DETAIL_AGENDA_ROWS') raise TemplateError('Cannot find agenda using XPath SESSION_DETAIL_AGENDA_ROWS') meeting.agendaitem = [] else: agendaitems = [] agendaitem_id = None public = True agendaitem = None for row in rows: row_id = row.get('id') row_classes = row.get('class').split(' ') fields = row.xpath('td') number = fields[0].xpath('./text()') if len(number) > 0: number = number[0] else: # when theres a updated notice theres an additional spam number = fields[0].xpath('.//span/text()') if len(number) > 0: number = number[0] if number == []: number = None if row_id is not None: # Agendaitem main row # first: save agendaitem from before if agendaitem: agendaitems.append(agendaitem) # create new agendaitem agendaitem = Agendaitem(numeric_id=int(row_id.rsplit('_', 1)[1])) if number is not None: agendaitem.sequence_number = number # in some ris this is a link, sometimes not. test both. if len(fields[1].xpath('./a/text()')): agendaitem.title = "; ".join(fields[1].xpath('./a/text()')) elif len(fields[1].xpath('./text()')): agendaitem.title = "; ".join(fields[1].xpath('./text()')) # ignore no agendaitem information if agendaitem.title == 'keine Tagesordnungspunkte': agendaitem = None continue agendaitem.public = public # paper links links = row.xpath(self.xpath['SESSION_DETAIL_AGENDA_ROWS_SUBMISSION_LINK']) papers = [] for link in links: href = link.get('href') if href is None: continue parsed = parse.search(self.urls['SUBMISSION_DETAIL_PARSE_PATTERN'], href) if parsed is not None: paper = Paper(numeric_id=int(parsed['paper_id']), identifier=link.text) papers.append(paper) # Add paper to paper queue if hasattr(self, 'paper_queue'): self.paper_queue.add(int(parsed['paper_id'])) if len(papers): agendaitem.paper = papers """ Note: we don't scrape agendaitem-related documents for now, based on the assumption that they are all found via paper detail pages. All we do here is get a list of document IDs in found_documents """ # find links links = row.xpath('.//a[contains(@href,"getfile.")]') for link in links: if not link.xpath('.//img'): file_link = self.config.BASE_URL + link.get('href') document_id = file_link.split('id=')[1].split('&')[0] found_documents.append(document_id) # find forms forms = row.xpath('.//form') for form in forms: for hidden_field in form.xpath('input'): if hidden_field.get('name') != 'DT': continue document_id = hidden_field.get('value') found_documents.append(document_id) # Alternative für smc_tophz wegen Version 4.3.5 bi (Layout 3) elif ('smc_tophz' in row_classes) or (row.get('valign') == 'top' and row.get('debug') == '3'): # additional (optional row for agendaitem) label = fields[1].text value = fields[2].text if label is not None and value is not None: label = label.strip() value = value.strip() if label in ['Ergebnis:', 'Beschluss:', 'Beratungsergebnis:']: if value in self.config.RESULT_STRINGS: agendaitem.result = self.config.RESULT_STRINGS[value] else: logging.warn("String '%s' not found in configured RESULT_STRINGS", value) agendaitem.result = value elif label in ['Bemerkung:', 'Abstimmung:']: agendaitem.result_details = value # What's this? #elif label == 'Abstimmung:': # agendaitems[agendaitem_id]['voting'] = value else: logging.critical("Agendaitem info label '%s' is unknown", label) raise ValueError('Agendaitem info label "%s" is unknown' % label) elif 'smcrowh' in row_classes: # Subheading (public / nonpublic part) if fields[0].text is not None and "Nicht öffentlich" in fields[0].text.encode('utf-8'): public = False meeting.agendaitem = agendaitems # meeting-related documents containers = dom.xpath(self.xpath['SESSION_DETAIL_ATTACHMENTS']) for container in containers: classes = container.get('class') if classes is None: continue classes = classes.split(' ') if self.xpath['SESSION_DETAIL_ATTACHMENTS_CONTAINER_CLASSNAME'] not in classes: continue documents = [] rows = container.xpath('.//tr') for row in rows: if not row.xpath('.//form'): links = row.xpath('.//a') for link in links: # ignore additional pdf icon links if not link.xpath('.//img'): title = ' '.join(link.xpath('./text()')).strip() file_link = self.config.BASE_URL + link.get('href') document_id = file_link.split('id=')[1].split('&')[0] if document_id in found_documents: continue document = Document( identifier=document_id, numeric_id=document_id, title=title, original_url=file_link) document = self.get_document_file(document=document, link=file_link) if 'Einladung' in title: document_type = 'invitation' elif 'Niederschrift' in title: document_type = 'results_protocol' else: document_type = 'misc' documents.append({'relation': document_type, 'document': document}) found_documents.append(document_id) else: forms = row.xpath('.//form') for form in forms: title = " ".join(row.xpath('./td/text()')).strip() for hidden_field in form.xpath('input'): if hidden_field.get('name') != 'DT': continue document_id = hidden_field.get('value') # make sure to add only those which aren't agendaitem-related if document_id not in found_documents: document = Document( identifier=document_id, numeric_id=document_id, title=title ) # Traversing the whole mechanize response to submit this form for mform in mechanize_forms: for control in mform.controls: if control.name == 'DT' and control.value == document_id: document = self.get_document_file(document, mform) if 'Einladung' in title: document_type = 'invitation' elif 'Niederschrift' in title: document_type = 'results_protocol' else: document_type = 'misc' documents.append({'relation': document_type, 'document': document}) found_documents.append(document_id) if len(documents): meeting.document = documents oid = self.db.save_meeting(meeting) logging.info("Meeting %d stored with _id %s", meeting_id, oid)