예제 #1
0
def load_from_democlub(csv_files, frozen_seats):
    # Get list of existing refined issues in remote datastore, so can track what to delete
    log("Getting list of refined issues")
    refined_issues = RefinedIssue.all().filter("deleted =", False).fetch(100)
    to_be_marked_deleted = {}
    while refined_issues:
        for refined_issue in refined_issues:
            key_name = refined_issue.key().name()
            log("  Marking before have refined issue key " + key_name)
            to_be_marked_deleted[key_name] = refined_issue
        refined_issues = RefinedIssue.all().filter("deleted =", False).filter('__key__ >', refined_issues[-1].key()).fetch(100)

    # Load in CSV file and create/update all the issues
    refined_issues_by_key = {}
    for csv_file in csv_files:
        log("Reading CSV file " + csv_file)
        reader = csv.reader(open(csv_file, "rb"))
        for row in reader:

            if len(row) == 7:
                row.append(None)
            (democlub_id, question, reference_url, seat_name, created, updated, seat_slug, short_name) = row
            key_name = democlub_id

            # DemocracyClub has this constituency without its accent, YourNextMP has it with it.
            seat_name = seat_name.replace("Ynys Mon", "Ynys Môn")
            seat = find_democracyclub_seat_in_yournextmp(seat_name.decode('utf-8'), seat_slug.decode('utf-8'))

            if seat.key().name() in frozen_seats:
                log("  Frozen seat " + seat_name.decode('utf-8') + ", not storing issue")
            else:
                refined_issue = RefinedIssue(
                    democlub_id = int(democlub_id),
                    question = question.decode('utf-8'),
                    reference_url = reference_url.decode('utf-8'),
                    short_name = short_name and short_name.decode('utf-8') or None,
                    national = (seat.name == 'National'),
                    seat = seat,
                    created = convdate(created),
                    updated = convdate(updated),
                    key_name = key_name
                )
                log("  Storing local issue for " + seat_name + ": " + question)
                refined_issues_by_key[key_name] = refined_issue

            # record we still have this issue
            if key_name in to_be_marked_deleted:
                del to_be_marked_deleted[key_name]
    log("Putting all refined issues")
    put_in_batches(refined_issues_by_key.values())

    # See which refined issues are left, i.e. are deleted
    for key_name, refined_issue in to_be_marked_deleted.iteritems():
        log("  Marking deleted issue for " + refined_issue.seat.name + ":" + refined_issue.question)
        refined_issue.deleted = True
    log("Putting marked deleted refined issues")
    put_in_batches(to_be_marked_deleted.values())
예제 #2
0
def lookup_issues_by_id():
    log("Getting all issues")
    fs = RefinedIssue.all().fetch(100)
    issues_by_id = {}
    c = 0
    while fs:
        log("  getting batch from " + str(c))
        for f in fs:
            c = c + 1
            issues_by_id[str(f.key())] = f
        fs = RefinedIssue.all().filter('__key__ >', fs[-1].key()).fetch(100)
    return issues_by_id
예제 #3
0
def load_from_democlub(csv_files, frozen_seats):
    # Get list of existing refined issues in remote datastore, so can track what to delete
    log("Getting list of refined issues")
    refined_issues = RefinedIssue.all().filter("deleted =", False).fetch(100)
    to_be_marked_deleted = {}
    while refined_issues:
        for refined_issue in refined_issues:
            key_name = refined_issue.key().name()
            log("  Marking before have refined issue key " + key_name)
            to_be_marked_deleted[key_name] = refined_issue
        refined_issues = RefinedIssue.all().filter("deleted =", False).filter('__key__ >', refined_issues[-1].key()).fetch(100)

    # Load in CSV file and create/update all the issues
    refined_issues_by_key = {}
    for csv_file in csv_files:
        log("Reading CSV file " + csv_file)
        reader = csv.reader(open(csv_file, "rb"))
        for row in reader:
            (democlub_id, question, reference_url, seat_name, created, updated) = row
            key_name = democlub_id
            seat = find_seat(seat_name.decode('utf-8'))

            if seat.key().name() in frozen_seats:
                log("  Frozen seat " + seat_name + ", not storing issue: " + question)
            else:
                refined_issue = RefinedIssue(
                    democlub_id = int(democlub_id),
                    question = question.decode('utf-8'),
                    reference_url = reference_url.decode('utf-8'),
                    seat = seat,
                    created = convdate(created),
                    updated = convdate(updated),
                    key_name = key_name
                )
                log("  Storing local issue for " + seat_name + ": " + question)
                refined_issues_by_key[key_name] = refined_issue

            # record we still have this issue
            if key_name in to_be_marked_deleted:
                del to_be_marked_deleted[key_name]
    log("Putting all refined issues")
    put_in_batches(refined_issues_by_key.values())

    # See which refined issues are left, i.e. are deleted
    for key_name, refined_issue in to_be_marked_deleted.iteritems():
        log("  Marking deleted issue for " + refined_issue.seat.name + ":" + refined_issue.question)
        refined_issue.deleted = True
    log("Putting marked deleted refined issues")
    put_in_batches(to_be_marked_deleted.values())
예제 #4
0
def load_from_democlub(csv_files, frozen_seats):
    # Get list of existing refined issues in remote datastore, so can track what to delete
    log("Getting list of refined issues")
    refined_issues = RefinedIssue.all().filter("deleted =", False).fetch(100)
    to_be_marked_deleted = {}
    while refined_issues:
        for refined_issue in refined_issues:
            key_name = refined_issue.key().name()
            log("  Marking before have refined issue key " + key_name)
            to_be_marked_deleted[key_name] = refined_issue
        refined_issues = RefinedIssue.all().filter("deleted =", False).filter(
            '__key__ >', refined_issues[-1].key()).fetch(100)

    # Load in CSV file and create/update all the issues
    refined_issues_by_key = {}
    for csv_file in csv_files:
        log("Reading CSV file " + csv_file)
        reader = csv.reader(open(csv_file, "rb"))
        for row in reader:

            if len(row) == 7:
                row.append(None)
            (democlub_id, question, reference_url, seat_name, created, updated,
             seat_slug, short_name) = row
            key_name = democlub_id

            # DemocracyClub has this constituency without its accent, YourNextMP has it with it.
            seat_name = seat_name.replace("Ynys Mon", "Ynys Môn")
            seat = find_democracyclub_seat_in_yournextmp(
                seat_name.decode('utf-8'), seat_slug.decode('utf-8'))

            if seat.key().name() in frozen_seats:
                log("  Frozen seat " + seat_name.decode('utf-8') +
                    ", not storing issue")
            else:
                refined_issue = RefinedIssue(
                    democlub_id=int(democlub_id),
                    question=question.decode('utf-8'),
                    reference_url=reference_url.decode('utf-8'),
                    short_name=short_name and short_name.decode('utf-8')
                    or None,
                    national=(seat.name == 'National'),
                    seat=seat,
                    created=convdate(created),
                    updated=convdate(updated),
                    key_name=key_name)
                log("  Storing local issue for " + seat_name + ": " + question)
                refined_issues_by_key[key_name] = refined_issue

            # record we still have this issue
            if key_name in to_be_marked_deleted:
                del to_be_marked_deleted[key_name]
    log("Putting all refined issues")
    put_in_batches(refined_issues_by_key.values())

    # See which refined issues are left, i.e. are deleted
    for key_name, refined_issue in to_be_marked_deleted.iteritems():
        log("  Marking deleted issue for " + refined_issue.seat.name + ":" +
            refined_issue.question)
        refined_issue.deleted = True
    log("Putting marked deleted refined issues")
    put_in_batches(to_be_marked_deleted.values())