def load_from_democlub(csv_files, frozen_seats): # Get list of existing refined issues in remote datastore, so can track what to delete log("Getting list of refined issues") refined_issues = RefinedIssue.all().filter("deleted =", False).fetch(100) to_be_marked_deleted = {} while refined_issues: for refined_issue in refined_issues: key_name = refined_issue.key().name() log(" Marking before have refined issue key " + key_name) to_be_marked_deleted[key_name] = refined_issue refined_issues = RefinedIssue.all().filter("deleted =", False).filter('__key__ >', refined_issues[-1].key()).fetch(100) # Load in CSV file and create/update all the issues refined_issues_by_key = {} for csv_file in csv_files: log("Reading CSV file " + csv_file) reader = csv.reader(open(csv_file, "rb")) for row in reader: if len(row) == 7: row.append(None) (democlub_id, question, reference_url, seat_name, created, updated, seat_slug, short_name) = row key_name = democlub_id # DemocracyClub has this constituency without its accent, YourNextMP has it with it. seat_name = seat_name.replace("Ynys Mon", "Ynys Môn") seat = find_democracyclub_seat_in_yournextmp(seat_name.decode('utf-8'), seat_slug.decode('utf-8')) if seat.key().name() in frozen_seats: log(" Frozen seat " + seat_name.decode('utf-8') + ", not storing issue") else: refined_issue = RefinedIssue( democlub_id = int(democlub_id), question = question.decode('utf-8'), reference_url = reference_url.decode('utf-8'), short_name = short_name and short_name.decode('utf-8') or None, national = (seat.name == 'National'), seat = seat, created = convdate(created), updated = convdate(updated), key_name = key_name ) log(" Storing local issue for " + seat_name + ": " + question) refined_issues_by_key[key_name] = refined_issue # record we still have this issue if key_name in to_be_marked_deleted: del to_be_marked_deleted[key_name] log("Putting all refined issues") put_in_batches(refined_issues_by_key.values()) # See which refined issues are left, i.e. are deleted for key_name, refined_issue in to_be_marked_deleted.iteritems(): log(" Marking deleted issue for " + refined_issue.seat.name + ":" + refined_issue.question) refined_issue.deleted = True log("Putting marked deleted refined issues") put_in_batches(to_be_marked_deleted.values())
def lookup_issues_by_id(): log("Getting all issues") fs = RefinedIssue.all().fetch(100) issues_by_id = {} c = 0 while fs: log(" getting batch from " + str(c)) for f in fs: c = c + 1 issues_by_id[str(f.key())] = f fs = RefinedIssue.all().filter('__key__ >', fs[-1].key()).fetch(100) return issues_by_id
def load_from_democlub(csv_files, frozen_seats): # Get list of existing refined issues in remote datastore, so can track what to delete log("Getting list of refined issues") refined_issues = RefinedIssue.all().filter("deleted =", False).fetch(100) to_be_marked_deleted = {} while refined_issues: for refined_issue in refined_issues: key_name = refined_issue.key().name() log(" Marking before have refined issue key " + key_name) to_be_marked_deleted[key_name] = refined_issue refined_issues = RefinedIssue.all().filter("deleted =", False).filter('__key__ >', refined_issues[-1].key()).fetch(100) # Load in CSV file and create/update all the issues refined_issues_by_key = {} for csv_file in csv_files: log("Reading CSV file " + csv_file) reader = csv.reader(open(csv_file, "rb")) for row in reader: (democlub_id, question, reference_url, seat_name, created, updated) = row key_name = democlub_id seat = find_seat(seat_name.decode('utf-8')) if seat.key().name() in frozen_seats: log(" Frozen seat " + seat_name + ", not storing issue: " + question) else: refined_issue = RefinedIssue( democlub_id = int(democlub_id), question = question.decode('utf-8'), reference_url = reference_url.decode('utf-8'), seat = seat, created = convdate(created), updated = convdate(updated), key_name = key_name ) log(" Storing local issue for " + seat_name + ": " + question) refined_issues_by_key[key_name] = refined_issue # record we still have this issue if key_name in to_be_marked_deleted: del to_be_marked_deleted[key_name] log("Putting all refined issues") put_in_batches(refined_issues_by_key.values()) # See which refined issues are left, i.e. are deleted for key_name, refined_issue in to_be_marked_deleted.iteritems(): log(" Marking deleted issue for " + refined_issue.seat.name + ":" + refined_issue.question) refined_issue.deleted = True log("Putting marked deleted refined issues") put_in_batches(to_be_marked_deleted.values())
def load_from_democlub(csv_files, frozen_seats): # Get list of existing refined issues in remote datastore, so can track what to delete log("Getting list of refined issues") refined_issues = RefinedIssue.all().filter("deleted =", False).fetch(100) to_be_marked_deleted = {} while refined_issues: for refined_issue in refined_issues: key_name = refined_issue.key().name() log(" Marking before have refined issue key " + key_name) to_be_marked_deleted[key_name] = refined_issue refined_issues = RefinedIssue.all().filter("deleted =", False).filter( '__key__ >', refined_issues[-1].key()).fetch(100) # Load in CSV file and create/update all the issues refined_issues_by_key = {} for csv_file in csv_files: log("Reading CSV file " + csv_file) reader = csv.reader(open(csv_file, "rb")) for row in reader: if len(row) == 7: row.append(None) (democlub_id, question, reference_url, seat_name, created, updated, seat_slug, short_name) = row key_name = democlub_id # DemocracyClub has this constituency without its accent, YourNextMP has it with it. seat_name = seat_name.replace("Ynys Mon", "Ynys Môn") seat = find_democracyclub_seat_in_yournextmp( seat_name.decode('utf-8'), seat_slug.decode('utf-8')) if seat.key().name() in frozen_seats: log(" Frozen seat " + seat_name.decode('utf-8') + ", not storing issue") else: refined_issue = RefinedIssue( democlub_id=int(democlub_id), question=question.decode('utf-8'), reference_url=reference_url.decode('utf-8'), short_name=short_name and short_name.decode('utf-8') or None, national=(seat.name == 'National'), seat=seat, created=convdate(created), updated=convdate(updated), key_name=key_name) log(" Storing local issue for " + seat_name + ": " + question) refined_issues_by_key[key_name] = refined_issue # record we still have this issue if key_name in to_be_marked_deleted: del to_be_marked_deleted[key_name] log("Putting all refined issues") put_in_batches(refined_issues_by_key.values()) # See which refined issues are left, i.e. are deleted for key_name, refined_issue in to_be_marked_deleted.iteritems(): log(" Marking deleted issue for " + refined_issue.seat.name + ":" + refined_issue.question) refined_issue.deleted = True log("Putting marked deleted refined issues") put_in_batches(to_be_marked_deleted.values())