Ejemplo n.º 1
0
def load_from_democlub(csv_files, frozen_seats):
    # Get list of existing refined issues in remote datastore, so can track what to delete
    log("Getting list of refined issues")
    refined_issues = RefinedIssue.all().filter("deleted =", False).fetch(100)
    to_be_marked_deleted = {}
    while refined_issues:
        for refined_issue in refined_issues:
            key_name = refined_issue.key().name()
            log("  Marking before have refined issue key " + key_name)
            to_be_marked_deleted[key_name] = refined_issue
        refined_issues = RefinedIssue.all().filter("deleted =", False).filter('__key__ >', refined_issues[-1].key()).fetch(100)

    # Load in CSV file and create/update all the issues
    refined_issues_by_key = {}
    for csv_file in csv_files:
        log("Reading CSV file " + csv_file)
        reader = csv.reader(open(csv_file, "rb"))
        for row in reader:

            if len(row) == 7:
                row.append(None)
            (democlub_id, question, reference_url, seat_name, created, updated, seat_slug, short_name) = row
            key_name = democlub_id

            # DemocracyClub has this constituency without its accent, YourNextMP has it with it.
            seat_name = seat_name.replace("Ynys Mon", "Ynys Môn")
            seat = find_democracyclub_seat_in_yournextmp(seat_name.decode('utf-8'), seat_slug.decode('utf-8'))

            if seat.key().name() in frozen_seats:
                log("  Frozen seat " + seat_name.decode('utf-8') + ", not storing issue")
            else:
                refined_issue = RefinedIssue(
                    democlub_id = int(democlub_id),
                    question = question.decode('utf-8'),
                    reference_url = reference_url.decode('utf-8'),
                    short_name = short_name and short_name.decode('utf-8') or None,
                    national = (seat.name == 'National'),
                    seat = seat,
                    created = convdate(created),
                    updated = convdate(updated),
                    key_name = key_name
                )
                log("  Storing local issue for " + seat_name + ": " + question)
                refined_issues_by_key[key_name] = refined_issue

            # record we still have this issue
            if key_name in to_be_marked_deleted:
                del to_be_marked_deleted[key_name]
    log("Putting all refined issues")
    put_in_batches(refined_issues_by_key.values())

    # See which refined issues are left, i.e. are deleted
    for key_name, refined_issue in to_be_marked_deleted.iteritems():
        log("  Marking deleted issue for " + refined_issue.seat.name + ":" + refined_issue.question)
        refined_issue.deleted = True
    log("Putting marked deleted refined issues")
    put_in_batches(to_be_marked_deleted.values())
Ejemplo n.º 2
0
def lookup_issues_by_id():
    log("Getting all issues")
    fs = RefinedIssue.all().fetch(100)
    issues_by_id = {}
    c = 0
    while fs:
        log("  getting batch from " + str(c))
        for f in fs:
            c = c + 1
            issues_by_id[str(f.key())] = f
        fs = RefinedIssue.all().filter('__key__ >', fs[-1].key()).fetch(100)
    return issues_by_id
Ejemplo n.º 3
0
def load_from_democlub(csv_files, frozen_seats):
    # Get list of existing refined issues in remote datastore, so can track what to delete
    log("Getting list of refined issues")
    refined_issues = RefinedIssue.all().filter("deleted =", False).fetch(100)
    to_be_marked_deleted = {}
    while refined_issues:
        for refined_issue in refined_issues:
            key_name = refined_issue.key().name()
            log("  Marking before have refined issue key " + key_name)
            to_be_marked_deleted[key_name] = refined_issue
        refined_issues = RefinedIssue.all().filter("deleted =", False).filter('__key__ >', refined_issues[-1].key()).fetch(100)

    # Load in CSV file and create/update all the issues
    refined_issues_by_key = {}
    for csv_file in csv_files:
        log("Reading CSV file " + csv_file)
        reader = csv.reader(open(csv_file, "rb"))
        for row in reader:
            (democlub_id, question, reference_url, seat_name, created, updated) = row
            key_name = democlub_id
            seat = find_seat(seat_name.decode('utf-8'))

            if seat.key().name() in frozen_seats:
                log("  Frozen seat " + seat_name + ", not storing issue: " + question)
            else:
                refined_issue = RefinedIssue(
                    democlub_id = int(democlub_id),
                    question = question.decode('utf-8'),
                    reference_url = reference_url.decode('utf-8'),
                    seat = seat,
                    created = convdate(created),
                    updated = convdate(updated),
                    key_name = key_name
                )
                log("  Storing local issue for " + seat_name + ": " + question)
                refined_issues_by_key[key_name] = refined_issue

            # record we still have this issue
            if key_name in to_be_marked_deleted:
                del to_be_marked_deleted[key_name]
    log("Putting all refined issues")
    put_in_batches(refined_issues_by_key.values())

    # See which refined issues are left, i.e. are deleted
    for key_name, refined_issue in to_be_marked_deleted.iteritems():
        log("  Marking deleted issue for " + refined_issue.seat.name + ":" + refined_issue.question)
        refined_issue.deleted = True
    log("Putting marked deleted refined issues")
    put_in_batches(to_be_marked_deleted.values())
Ejemplo n.º 4
0
def task_average_response_by_party(request, party_key_name, refined_issue_key_name):
    party = Party.get_by_key_name(party_key_name)
    refined_issue = RefinedIssue.get_by_key_name(refined_issue_key_name)

    arbp = db.Query(AverageResponseByParty).filter('party =', party).filter('refined_issue =', refined_issue).get()
    if not arbp:
        arbp = AverageResponseByParty(party = party, refined_issue = refined_issue,
                average_agreement = None, processing_running_total = 0, 
                processing_running_count = 0, processing_next_key = None)

    chunk = db.Query(Candidacy).filter('deleted = ', False).filter('survey_filled_in =', True)

    # carry on calculation where we left off
    if arbp.processing_last_candidacy == None:
        assert arbp.processing_running_total == 0
        assert arbp.processing_running_count == 0
    else:
        chunk = chunk.filter('__key__ >', arbp.processing_last_candidacy.key())

    # do 100 candidacies at a time, as too slow otherwise
    chunk.fetch(10) # XXX

    candidacy = None
    for candidacy in chunk:
        survey_response = db.Query(SurveyResponse).filter('candidacy =', candidacy).filter('refined_issue =', refined_issue).get()
        if survey_response:
            arbp.processing_running_total += survey_response.agreement
            arbp.processing_running_count += 1
    arbp.processing_last_candidacy = candidacy

    # if we've finished, work out average
    if candidacy == None:
        arbp.average_agreement = float(arbp.processing_running_total) / float(arbp.processing_running_count)

    arbp.put()

    # calculate next chunk
    if candidacy == None:
        return HttpResponse("Calculation complete for " + party.name + " question: " + refined_issue.question)
    else:
        taskqueue.Queue('average-calc').add(taskqueue.Task(url='/task/average_response_by_party/' + party_key_name + "/" + refined_issue_key_name))
        return HttpResponse("Done " + str(arbp.processing_running_count) + ", queued next chunk for " + party.name + " question: " + question)
Ejemplo n.º 5
0
def load_from_democlub(csv_files, frozen_seats):
    # Get list of existing refined issues in remote datastore, so can track what to delete
    log("Getting list of refined issues")
    refined_issues = RefinedIssue.all().filter("deleted =", False).fetch(100)
    to_be_marked_deleted = {}
    while refined_issues:
        for refined_issue in refined_issues:
            key_name = refined_issue.key().name()
            log("  Marking before have refined issue key " + key_name)
            to_be_marked_deleted[key_name] = refined_issue
        refined_issues = RefinedIssue.all().filter("deleted =", False).filter(
            '__key__ >', refined_issues[-1].key()).fetch(100)

    # Load in CSV file and create/update all the issues
    refined_issues_by_key = {}
    for csv_file in csv_files:
        log("Reading CSV file " + csv_file)
        reader = csv.reader(open(csv_file, "rb"))
        for row in reader:

            if len(row) == 7:
                row.append(None)
            (democlub_id, question, reference_url, seat_name, created, updated,
             seat_slug, short_name) = row
            key_name = democlub_id

            # DemocracyClub has this constituency without its accent, YourNextMP has it with it.
            seat_name = seat_name.replace("Ynys Mon", "Ynys Môn")
            seat = find_democracyclub_seat_in_yournextmp(
                seat_name.decode('utf-8'), seat_slug.decode('utf-8'))

            if seat.key().name() in frozen_seats:
                log("  Frozen seat " + seat_name.decode('utf-8') +
                    ", not storing issue")
            else:
                refined_issue = RefinedIssue(
                    democlub_id=int(democlub_id),
                    question=question.decode('utf-8'),
                    reference_url=reference_url.decode('utf-8'),
                    short_name=short_name and short_name.decode('utf-8')
                    or None,
                    national=(seat.name == 'National'),
                    seat=seat,
                    created=convdate(created),
                    updated=convdate(updated),
                    key_name=key_name)
                log("  Storing local issue for " + seat_name + ": " + question)
                refined_issues_by_key[key_name] = refined_issue

            # record we still have this issue
            if key_name in to_be_marked_deleted:
                del to_be_marked_deleted[key_name]
    log("Putting all refined issues")
    put_in_batches(refined_issues_by_key.values())

    # See which refined issues are left, i.e. are deleted
    for key_name, refined_issue in to_be_marked_deleted.iteritems():
        log("  Marking deleted issue for " + refined_issue.seat.name + ":" +
            refined_issue.question)
        refined_issue.deleted = True
    log("Putting marked deleted refined issues")
    put_in_batches(to_be_marked_deleted.values())