Example #1
0
def get_proposals(
        autoanalyze=False,
        no_commit=False,
        limit=None,
        ):
    import pickle
    from mptracker.scraper.proposals import SingleProposalScraper
    from mptracker.proposals import ocr_proposal
    from mptracker.policy import calculate_proposal

    index = {'pk_cdep': {}, 'pk_senate': {}}


    dirty_proposal_set = set()


    def cdep_id(mandate):
        return (mandate.year, mandate.cdep_number)


    sp_updates = sp_added = sp_removed = 0

    changed = []
    seen = []

    with proposal_patcher.process(autoflush=1000) as add_proposal:
        for proposal in dirty_proposal_set:
            page_cdep = (
                models.ScrapedProposalPage.query
                .filter_by(chamber=2, pk=proposal.cdeppk_cdep)
                .first()
            )
            page_senate = (
                models.ScrapedProposalPage.query
                .filter_by(chamber=1, pk=proposal.cdeppk_senate)
                .first()
            )

            single_scraper = SingleProposalScraper()

            if page_senate:
                single_scraper.scrape_page('senate',
                    pickle.loads(page_senate.result))
                page_senate.parsed = True

            if page_cdep:
                single_scraper.scrape_page('cdep',
                    pickle.loads(page_cdep.result))
                page_cdep.parsed = True

            prop = single_scraper.finalize()

            prop.id = proposal.id or models.random_uuid()
            prop.cdeppk_cdep = proposal.cdeppk_cdep
            prop.cdeppk_senate = proposal.cdeppk_senate


            record = prop.as_dict(['id', 'cdeppk_cdep', 'cdeppk_senate',
                'decision_chamber', 'url', 'title', 'date', 'number_bpi',
                'number_cdep', 'number_senate', 'proposal_type',
                'pdf_url', 'status', 'status_text', 'modification_date'])

            record['activity'] = flask.json.dumps([
                item.as_dict(['date', 'location', 'html'])
                for item in prop.activity
            ])

            slug = prop.decision_chamber
            if slug:
                record['decision_chamber'] = chamber_by_slug[slug]

            result = add_proposal(record)
            row = result.row
            if result.is_changed:
                changed.append(row)
            seen.append(row)

            new_people = set(by_cdep_id[ci] for ci in prop.sponsorships)
            existing_sponsorships = {sp.mandate: sp
                                     for sp in row.sponsorships}
            to_remove = set(existing_sponsorships) - set(new_people)
            to_add = set(new_people) - set(existing_sponsorships)
            if to_remove:
                logger.info("Removing sponsors %s: %r", row.id,
                            [cdep_id(m) for m in to_remove])
                sp_removed += 1
                for m in to_remove:
                    sp = existing_sponsorships[m]
                    models.db.session.delete(sp)
            if to_add:
                logger.info("Adding sponsors %s: %r", row.id,
                            [cdep_id(m) for m in to_add])
                sp_added += 1
                for m in to_add:
                    row.sponsorships.append(models.Sponsorship(mandate=m))

            if to_remove or to_add:
                sp_updates += 1


    if no_commit:
        logger.warn("Rolling back the transaction")
        models.db.session.rollback()
        return

    models.db.session.commit()

    logger.info("Updated sponsorship for %d proposals (+%d, -%d)",
                sp_updates, sp_added, sp_removed)

    if autoanalyze:
        logger.info("Scheduling analysis jobs for %d proposals", len(changed))
        for proposal in changed:
            if proposal.pdf_url:
                ocr_proposal.delay(proposal.id, autoanalyze=True)

        logger.info("Scheduling policy jobs for %d proposals", len(seen))
        for proposal in seen:
            if proposal.policy_domain_id is None:
                calculate_proposal.delay(proposal.id)