def load_from_democlub(csv_files, frozen_seats): # Get list of existing refined issues in remote datastore, so can track what to delete log("Getting list of refined issues") refined_issues = RefinedIssue.all().filter("deleted =", False).fetch(100) to_be_marked_deleted = {} while refined_issues: for refined_issue in refined_issues: key_name = refined_issue.key().name() log(" Marking before have refined issue key " + key_name) to_be_marked_deleted[key_name] = refined_issue refined_issues = RefinedIssue.all().filter("deleted =", False).filter('__key__ >', refined_issues[-1].key()).fetch(100) # Load in CSV file and create/update all the issues refined_issues_by_key = {} for csv_file in csv_files: log("Reading CSV file " + csv_file) reader = csv.reader(open(csv_file, "rb")) for row in reader: if len(row) == 7: row.append(None) (democlub_id, question, reference_url, seat_name, created, updated, seat_slug, short_name) = row key_name = democlub_id # DemocracyClub has this constituency without its accent, YourNextMP has it with it. seat_name = seat_name.replace("Ynys Mon", "Ynys Môn") seat = find_democracyclub_seat_in_yournextmp(seat_name.decode('utf-8'), seat_slug.decode('utf-8')) if seat.key().name() in frozen_seats: log(" Frozen seat " + seat_name.decode('utf-8') + ", not storing issue") else: refined_issue = RefinedIssue( democlub_id = int(democlub_id), question = question.decode('utf-8'), reference_url = reference_url.decode('utf-8'), short_name = short_name and short_name.decode('utf-8') or None, national = (seat.name == 'National'), seat = seat, created = convdate(created), updated = convdate(updated), key_name = key_name ) log(" Storing local issue for " + seat_name + ": " + question) refined_issues_by_key[key_name] = refined_issue # record we still have this issue if key_name in to_be_marked_deleted: del to_be_marked_deleted[key_name] log("Putting all refined issues") put_in_batches(refined_issues_by_key.values()) # See which refined issues are left, i.e. are deleted for key_name, refined_issue in to_be_marked_deleted.iteritems(): log(" Marking deleted issue for " + refined_issue.seat.name + ":" + refined_issue.question) refined_issue.deleted = True log("Putting marked deleted refined issues") put_in_batches(to_be_marked_deleted.values())
def lookup_issues_by_id(): log("Getting all issues") fs = RefinedIssue.all().fetch(100) issues_by_id = {} c = 0 while fs: log(" getting batch from " + str(c)) for f in fs: c = c + 1 issues_by_id[str(f.key())] = f fs = RefinedIssue.all().filter('__key__ >', fs[-1].key()).fetch(100) return issues_by_id
def load_from_democlub(csv_files, frozen_seats): # Get list of existing refined issues in remote datastore, so can track what to delete log("Getting list of refined issues") refined_issues = RefinedIssue.all().filter("deleted =", False).fetch(100) to_be_marked_deleted = {} while refined_issues: for refined_issue in refined_issues: key_name = refined_issue.key().name() log(" Marking before have refined issue key " + key_name) to_be_marked_deleted[key_name] = refined_issue refined_issues = RefinedIssue.all().filter("deleted =", False).filter('__key__ >', refined_issues[-1].key()).fetch(100) # Load in CSV file and create/update all the issues refined_issues_by_key = {} for csv_file in csv_files: log("Reading CSV file " + csv_file) reader = csv.reader(open(csv_file, "rb")) for row in reader: (democlub_id, question, reference_url, seat_name, created, updated) = row key_name = democlub_id seat = find_seat(seat_name.decode('utf-8')) if seat.key().name() in frozen_seats: log(" Frozen seat " + seat_name + ", not storing issue: " + question) else: refined_issue = RefinedIssue( democlub_id = int(democlub_id), question = question.decode('utf-8'), reference_url = reference_url.decode('utf-8'), seat = seat, created = convdate(created), updated = convdate(updated), key_name = key_name ) log(" Storing local issue for " + seat_name + ": " + question) refined_issues_by_key[key_name] = refined_issue # record we still have this issue if key_name in to_be_marked_deleted: del to_be_marked_deleted[key_name] log("Putting all refined issues") put_in_batches(refined_issues_by_key.values()) # See which refined issues are left, i.e. are deleted for key_name, refined_issue in to_be_marked_deleted.iteritems(): log(" Marking deleted issue for " + refined_issue.seat.name + ":" + refined_issue.question) refined_issue.deleted = True log("Putting marked deleted refined issues") put_in_batches(to_be_marked_deleted.values())
def task_average_response_by_party(request, party_key_name, refined_issue_key_name): party = Party.get_by_key_name(party_key_name) refined_issue = RefinedIssue.get_by_key_name(refined_issue_key_name) arbp = db.Query(AverageResponseByParty).filter('party =', party).filter('refined_issue =', refined_issue).get() if not arbp: arbp = AverageResponseByParty(party = party, refined_issue = refined_issue, average_agreement = None, processing_running_total = 0, processing_running_count = 0, processing_next_key = None) chunk = db.Query(Candidacy).filter('deleted = ', False).filter('survey_filled_in =', True) # carry on calculation where we left off if arbp.processing_last_candidacy == None: assert arbp.processing_running_total == 0 assert arbp.processing_running_count == 0 else: chunk = chunk.filter('__key__ >', arbp.processing_last_candidacy.key()) # do 100 candidacies at a time, as too slow otherwise chunk.fetch(10) # XXX candidacy = None for candidacy in chunk: survey_response = db.Query(SurveyResponse).filter('candidacy =', candidacy).filter('refined_issue =', refined_issue).get() if survey_response: arbp.processing_running_total += survey_response.agreement arbp.processing_running_count += 1 arbp.processing_last_candidacy = candidacy # if we've finished, work out average if candidacy == None: arbp.average_agreement = float(arbp.processing_running_total) / float(arbp.processing_running_count) arbp.put() # calculate next chunk if candidacy == None: return HttpResponse("Calculation complete for " + party.name + " question: " + refined_issue.question) else: taskqueue.Queue('average-calc').add(taskqueue.Task(url='/task/average_response_by_party/' + party_key_name + "/" + refined_issue_key_name)) return HttpResponse("Done " + str(arbp.processing_running_count) + ", queued next chunk for " + party.name + " question: " + question)
def load_from_democlub(csv_files, frozen_seats): # Get list of existing refined issues in remote datastore, so can track what to delete log("Getting list of refined issues") refined_issues = RefinedIssue.all().filter("deleted =", False).fetch(100) to_be_marked_deleted = {} while refined_issues: for refined_issue in refined_issues: key_name = refined_issue.key().name() log(" Marking before have refined issue key " + key_name) to_be_marked_deleted[key_name] = refined_issue refined_issues = RefinedIssue.all().filter("deleted =", False).filter( '__key__ >', refined_issues[-1].key()).fetch(100) # Load in CSV file and create/update all the issues refined_issues_by_key = {} for csv_file in csv_files: log("Reading CSV file " + csv_file) reader = csv.reader(open(csv_file, "rb")) for row in reader: if len(row) == 7: row.append(None) (democlub_id, question, reference_url, seat_name, created, updated, seat_slug, short_name) = row key_name = democlub_id # DemocracyClub has this constituency without its accent, YourNextMP has it with it. seat_name = seat_name.replace("Ynys Mon", "Ynys Môn") seat = find_democracyclub_seat_in_yournextmp( seat_name.decode('utf-8'), seat_slug.decode('utf-8')) if seat.key().name() in frozen_seats: log(" Frozen seat " + seat_name.decode('utf-8') + ", not storing issue") else: refined_issue = RefinedIssue( democlub_id=int(democlub_id), question=question.decode('utf-8'), reference_url=reference_url.decode('utf-8'), short_name=short_name and short_name.decode('utf-8') or None, national=(seat.name == 'National'), seat=seat, created=convdate(created), updated=convdate(updated), key_name=key_name) log(" Storing local issue for " + seat_name + ": " + question) refined_issues_by_key[key_name] = refined_issue # record we still have this issue if key_name in to_be_marked_deleted: del to_be_marked_deleted[key_name] log("Putting all refined issues") put_in_batches(refined_issues_by_key.values()) # See which refined issues are left, i.e. are deleted for key_name, refined_issue in to_be_marked_deleted.iteritems(): log(" Marking deleted issue for " + refined_issue.seat.name + ":" + refined_issue.question) refined_issue.deleted = True log("Putting marked deleted refined issues") put_in_batches(to_be_marked_deleted.values())