def add_or_update(self): """ Add current_bill to database, or update the record if it already exists. Then clear the current_bill attribute to make it ready for the next bill to be scraped. """ bill_data = self.current_bill try: if self.current_bill.get('status') and self.current_bill['status'] == "Draft": # save scraped draft bill to database bill = Bill.query.filter(Bill.name==bill_data['bill_name']).filter(Bill.year==bill_data['year']).first() if bill is None: bill = Bill() bill.name = bill_data['bill_name'] bill.year = bill_data['year'] self.stats['new_drafts'] += 1 bill.bill_type = "Draft" db.session.add(bill) self.stats['total_drafts'] += 1 else: # save scraped bills to database bill_code = self.current_bill["code"] bill = Bill.query.filter(Bill.code==bill_code).first() if bill is None: bill = Bill() bill.code = bill_code self.stats['new_bills'] += 1 bill.name = bill_data['bill_name'] bill.year = bill_data['year'] bill.number = bill_data['number'] db.session.add(bill) self.stats['total_bills'] += 1 # save related bill versions for entry_data in bill_data['versions']: entry = Entry.query.filter(Entry.url==entry_data['url']).first() # Look for pre-existing entry. if entry is None: entry = Entry() # Create new entry. self.stats['new_bill_versions'] += 1 entry = scrapertools.populate_entry(entry, entry_data) entry.bills.append(bill) db.session.add(entry) self.stats['total_bill_versions'] += 1 except Exception: error_msg = "Error saving bill: " if self.current_bill.get('bill_name'): error_msg += self.current_bill['bill_name'] if self.current_bill.get('versions'): error_msg += " - " + self.current_bill['versions'][0]['title'] logger.error(error_msg) self.stats['errors'].append(error_msg) pass logger.debug(json.dumps(self.current_bill, indent=4, default=scrapertools.handler)) self.current_bill = {} return
def add_or_update(self): """ Add current_hansard to database, or update the record if it already exists. """ self.current_hansard['entry_type'] = "hansard" bills = [] if self.current_hansard.get('bills'): bills = self.current_hansard["bills"] # TODO: improve filtering hansard = Entry.query.filter(Entry.type=="hansard").filter(Entry.title==self.current_hansard['title']).first() if hansard is None: hansard = Entry() self.stats["new_hansards"] += 1 hansard = scrapertools.populate_entry(hansard, self.current_hansard, bills) db.session.add(hansard) self.stats["total_hansards"] += 1 return
def add_or_update(self): """ Add current_hansard to database, or update the record if it already exists. """ self.current_hansard['entry_type'] = "hansard" bills = [] if self.current_hansard.get('bills'): bills = self.current_hansard["bills"] # TODO: improve filtering hansard = Entry.query.filter(Entry.type == "hansard").filter( Entry.title == self.current_hansard['title']).first() if hansard is None: hansard = Entry() self.stats["new_hansards"] += 1 hansard = scrapertools.populate_entry(hansard, self.current_hansard, bills) db.session.add(hansard) self.stats["total_hansards"] += 1 return
def add_or_update(self): """ Add current_report to database, or update the record if it already exists. """ report = Entry.query.filter_by(agent_id=self.current_committee.agent_id) \ .filter_by(url=self.current_report['url'])\ .filter_by(is_deleted=False).first() if report is None: report = Entry() self.stats["new_committee_reports"] += 1 tmp_bills = None if self.current_report.get('bills'): tmp_bills = self.current_report['bills'] logger.info(str(tmp_bills)) report = scrapertools.populate_entry(report, self.current_report, tmp_bills) db.session.add(report) self.stats["total_committee_reports"] += 1 self.current_report = {} return
def add_or_update(self): """ Add current_bill to database, or update the record if it already exists. Then clear the current_bill attribute to make it ready for the next bill to be scraped. """ bill_data = self.current_bill try: if self.current_bill.get( 'status') and self.current_bill['status'] == "Draft": # save scraped draft bill to database bill = Bill.query.filter( Bill.name == bill_data['bill_name']).filter( Bill.year == bill_data['year']).first() if bill is None: bill = Bill() bill.name = bill_data['bill_name'] bill.year = bill_data['year'] self.stats['new_drafts'] += 1 bill.bill_type = "Draft" db.session.add(bill) self.stats['total_drafts'] += 1 else: # save scraped bills to database bill_code = self.current_bill["code"] bill = Bill.query.filter(Bill.code == bill_code).first() if bill is None: bill = Bill() bill.code = bill_code self.stats['new_bills'] += 1 bill.name = bill_data['bill_name'] bill.year = bill_data['year'] bill.number = bill_data['number'] db.session.add(bill) self.stats['total_bills'] += 1 # save related bill versions for entry_data in bill_data['versions']: entry = Entry.query.filter( Entry.url == entry_data['url']).first() # Look for pre-existing entry. if entry is None: entry = Entry() # Create new entry. self.stats['new_bill_versions'] += 1 entry = scrapertools.populate_entry(entry, entry_data) entry.bills.append(bill) db.session.add(entry) self.stats['total_bill_versions'] += 1 except Exception: error_msg = "Error saving bill: " if self.current_bill.get('bill_name'): error_msg += self.current_bill['bill_name'] if self.current_bill.get('versions'): error_msg += " - " + self.current_bill['versions'][0]['title'] logger.error(error_msg) self.stats['errors'].append(error_msg) pass logger.debug( json.dumps(self.current_bill, indent=4, default=scrapertools.handler)) self.current_bill = {} return