Exemple #1
0
    def add_or_update(self):
        """
        Add current_bill to database, or update the record if it already exists.
        Then clear the current_bill attribute to make it ready for the next bill to be scraped.
        """

        bill_data = self.current_bill

        try:
            if self.current_bill.get('status') and self.current_bill['status'] == "Draft":
                # save scraped draft bill to database
                bill = Bill.query.filter(Bill.name==bill_data['bill_name']).filter(Bill.year==bill_data['year']).first()
                if bill is None:
                    bill = Bill()
                    bill.name = bill_data['bill_name']
                    bill.year = bill_data['year']
                    self.stats['new_drafts'] += 1
                bill.bill_type = "Draft"
                db.session.add(bill)
                self.stats['total_drafts'] += 1

            else:
                # save scraped bills to database
                bill_code = self.current_bill["code"]
                bill = Bill.query.filter(Bill.code==bill_code).first()
                if bill is None:
                    bill = Bill()
                    bill.code = bill_code
                    self.stats['new_bills'] += 1
                bill.name = bill_data['bill_name']
                bill.year = bill_data['year']
                bill.number = bill_data['number']
                db.session.add(bill)
                self.stats['total_bills'] += 1

            # save related bill versions
            for entry_data in bill_data['versions']:
                entry = Entry.query.filter(Entry.url==entry_data['url']).first()  # Look for pre-existing entry.
                if entry is None:
                    entry = Entry()  # Create new entry.
                    self.stats['new_bill_versions'] += 1
                entry = scrapertools.populate_entry(entry, entry_data)
                entry.bills.append(bill)
                db.session.add(entry)
                self.stats['total_bill_versions'] += 1

        except Exception:
            error_msg = "Error saving bill: "
            if self.current_bill.get('bill_name'):
                error_msg += self.current_bill['bill_name']
            if self.current_bill.get('versions'):
                error_msg += " - " + self.current_bill['versions'][0]['title']
            logger.error(error_msg)
            self.stats['errors'].append(error_msg)
            pass

        logger.debug(json.dumps(self.current_bill, indent=4, default=scrapertools.handler))
        self.current_bill = {}
        return
Exemple #2
0
    def add_or_update(self):
        """
        Add current_hansard to database, or update the record if it already exists.
        """

        self.current_hansard['entry_type'] = "hansard"
        bills = []
        if self.current_hansard.get('bills'):
            bills = self.current_hansard["bills"]
            # TODO: improve filtering
        hansard = Entry.query.filter(Entry.type=="hansard").filter(Entry.title==self.current_hansard['title']).first()
        if hansard is None:
            hansard = Entry()
            self.stats["new_hansards"] += 1
        hansard = scrapertools.populate_entry(hansard, self.current_hansard, bills)
        db.session.add(hansard)
        self.stats["total_hansards"] += 1
        return
    def add_or_update(self):
        """
        Add current_hansard to database, or update the record if it already exists.
        """

        self.current_hansard['entry_type'] = "hansard"
        bills = []
        if self.current_hansard.get('bills'):
            bills = self.current_hansard["bills"]
            # TODO: improve filtering
        hansard = Entry.query.filter(Entry.type == "hansard").filter(
            Entry.title == self.current_hansard['title']).first()
        if hansard is None:
            hansard = Entry()
            self.stats["new_hansards"] += 1
        hansard = scrapertools.populate_entry(hansard, self.current_hansard,
                                              bills)
        db.session.add(hansard)
        self.stats["total_hansards"] += 1
        return
    def add_or_update(self):
        """
        Add current_report to database, or update the record if it already exists.
        """

        report = Entry.query.filter_by(agent_id=self.current_committee.agent_id) \
            .filter_by(url=self.current_report['url'])\
            .filter_by(is_deleted=False).first()
        if report is None:
            report = Entry()
            self.stats["new_committee_reports"] += 1

        tmp_bills = None
        if self.current_report.get('bills'):
            tmp_bills = self.current_report['bills']
            logger.info(str(tmp_bills))
        report = scrapertools.populate_entry(report, self.current_report, tmp_bills)
        db.session.add(report)
        self.stats["total_committee_reports"] += 1
        self.current_report = {}
        return
    def add_or_update(self):
        """
        Add current_report to database, or update the record if it already exists.
        """

        report = Entry.query.filter_by(agent_id=self.current_committee.agent_id) \
            .filter_by(url=self.current_report['url'])\
            .filter_by(is_deleted=False).first()
        if report is None:
            report = Entry()
            self.stats["new_committee_reports"] += 1

        tmp_bills = None
        if self.current_report.get('bills'):
            tmp_bills = self.current_report['bills']
            logger.info(str(tmp_bills))
        report = scrapertools.populate_entry(report, self.current_report,
                                             tmp_bills)
        db.session.add(report)
        self.stats["total_committee_reports"] += 1
        self.current_report = {}
        return
Exemple #6
0
    def add_or_update(self):
        """
        Add current_bill to database, or update the record if it already exists.
        Then clear the current_bill attribute to make it ready for the next bill to be scraped.
        """

        bill_data = self.current_bill

        try:
            if self.current_bill.get(
                    'status') and self.current_bill['status'] == "Draft":
                # save scraped draft bill to database
                bill = Bill.query.filter(
                    Bill.name == bill_data['bill_name']).filter(
                        Bill.year == bill_data['year']).first()
                if bill is None:
                    bill = Bill()
                    bill.name = bill_data['bill_name']
                    bill.year = bill_data['year']
                    self.stats['new_drafts'] += 1
                bill.bill_type = "Draft"
                db.session.add(bill)
                self.stats['total_drafts'] += 1

            else:
                # save scraped bills to database
                bill_code = self.current_bill["code"]
                bill = Bill.query.filter(Bill.code == bill_code).first()
                if bill is None:
                    bill = Bill()
                    bill.code = bill_code
                    self.stats['new_bills'] += 1
                bill.name = bill_data['bill_name']
                bill.year = bill_data['year']
                bill.number = bill_data['number']
                db.session.add(bill)
                self.stats['total_bills'] += 1

            # save related bill versions
            for entry_data in bill_data['versions']:
                entry = Entry.query.filter(
                    Entry.url ==
                    entry_data['url']).first()  # Look for pre-existing entry.
                if entry is None:
                    entry = Entry()  # Create new entry.
                    self.stats['new_bill_versions'] += 1
                entry = scrapertools.populate_entry(entry, entry_data)
                entry.bills.append(bill)
                db.session.add(entry)
                self.stats['total_bill_versions'] += 1

        except Exception:
            error_msg = "Error saving bill: "
            if self.current_bill.get('bill_name'):
                error_msg += self.current_bill['bill_name']
            if self.current_bill.get('versions'):
                error_msg += " - " + self.current_bill['versions'][0]['title']
            logger.error(error_msg)
            self.stats['errors'].append(error_msg)
            pass

        logger.debug(
            json.dumps(self.current_bill,
                       indent=4,
                       default=scrapertools.handler))
        self.current_bill = {}
        return