def execute(trial=False):
        """
            Scrape electionstats to get data about each MA general state senate election (2000-2018)
            and insert into collection
            ex) {
                    "_id" : "131666",
                    "year" : 2018,
                    "district" : "1st Middlesex",
                    "candidates" :
                    [ {
                        "name" : "Edward J. Kennedy",
                        "party" : "Democratic",
                        "isWinner" : true
                    }, {
                        "name" : "John A. Macdonald",
                        "party" : "Republican",
                        "isWinner" : false
                    } ] }
        """
        startTime = datetime.datetime.now()

        # Set up the database connection.
        client = dml.pymongo.MongoClient()
        repo = client.repo
        repo.authenticate(TEAM_NAME, TEAM_NAME)

        # Get HTML of page and pull all election divs
        raw_html = scraper.simple_get(STATE_SENATE_GENERAL_2000_2018_URL)
        html = BeautifulSoup(raw_html, 'html.parser')

        if trial:
            electionsList = html.findAll("tr", {"class": "election_item"})[:10]
        else:
            electionsList = html.findAll("tr", {"class": "election_item"})

        electionsRows = []

        # Build row for each election
        for election in electionsList:
            electionData = {}
            electionData['_id'] = election['id'].split('-')[-1]
            electionData['year'] = int(
                election.findAll("td", {"class": "year"})[0].contents[0])
            electionData['district'] = election.findAll(
                "td", {"class": ""})[1].contents[0]

            # Build sub json for candidates containing name, party, and isWinner
            electionData['candidates'] = []
            table = election.find("table", {"class": "candidates"})

            winner = table.find("tr", {
                "class": "is_winner"
            }).find("td", {"class": "candidate"})
            electionData['candidates'].append(
                stateSenateElections.buildCandidateRow(winner, True))

            otherCandidates = table.findAll("td", {"class": "candidate"})[1:]
            [
                electionData['candidates'].append(
                    stateSenateElections.buildCandidateRow(candidate, False))
                for candidate in otherCandidates
            ]
            electionData['candidates'] = [
                x for x in electionData['candidates'] if x is not None
            ]

            electionsRows.append(electionData)

        # Insert rows into collection
        repo.dropCollection("stateSenateElection")
        repo.dropCollection(STATE_SENATE_ELECTIONS)
        repo.createCollection(STATE_SENATE_ELECTIONS)
        repo[STATE_SENATE_ELECTIONS_NAME].insert_many(electionsRows)
        repo[STATE_SENATE_ELECTIONS_NAME].metadata({'complete': True})
        print(repo[STATE_SENATE_ELECTIONS_NAME].metadata())

        repo.logout()

        endTime = datetime.datetime.now()

        return {"start": startTime, "end": endTime}
Ejemplo n.º 2
0
    def execute(trial=False):
        """
            Scrape electionstats to get data about each MA ballot question (2000-2018)
            and insert into collection
            ex) {
                    "_id" : "7322",
                    "year" : 2018,
                    "number" : "4",
                    "description" : "SUMMARY Sections 3 to 7 of Chapter 44B of the General Laws
                        of Massachusetts, also known as the Community Preservation Act (the 'Act'),
                        establishes a dedicated funding source for the: acquisition, creation and
                        preservation of open space; acquisition, preservation, rehabilitation and
                        restoration of hi...",
                    "type" : "Local Question",
                    "location" : "Various cities/towns"
                }
        """
        startTime = datetime.datetime.now()

        if trial:
            endTime = datetime.datetime.now()
            return {"start": startTime, "end": endTime}

        # Set up the database connection.
        client = dml.pymongo.MongoClient()
        repo = client.repo
        repo.authenticate(TEAM_NAME, TEAM_NAME)

        # Get HTML of page and pull all ballot question divs
        raw_html = scraper.simple_get(BALLOT_QUESTION_2000_2018_URL)
        html = BeautifulSoup(raw_html, 'html.parser')
        questionList = html.findAll("tr", {"class": "election_item"})

        ballotQuestionsRows = []

        # Build row for each ballot questions
        for question in questionList:
            questionData = {}
            questionData['_id'] = question['id'].split('-')[-1]
            questionData['year'] = int(
                question.findAll("td", {"class": "year"})[0].contents[0])
            questionData['number'] = question.findAll(
                "td", {"class": "number"})[0].contents[0]
            questionData['description'] = question.findAll(
                "td", {"class": "display_question"})[0].contents[0]
            questionData['type'] = question.findAll("td", {"class": "bq_types"})[0].contents[0] \
                if question.findAll("td", {"class": "bq_types"})[0].contents \
                else ''
            questionData['location'] = question.findAll(
                "td", {"class": "bq_location"})[0].contents[0]
            ballotQuestionsRows.append(questionData)

        # Insert rows into collection
        repo.dropCollection(BALLOT_QUESTIONS)
        repo.createCollection(BALLOT_QUESTIONS)
        repo[BALLOT_QUESTIONS_NAME].insert_many(ballotQuestionsRows)
        repo[BALLOT_QUESTIONS_NAME].metadata({'complete': True})
        print(repo[BALLOT_QUESTIONS_NAME].metadata())

        repo.logout()

        endTime = datetime.datetime.now()

        return {"start": startTime, "end": endTime}