def get(self): max_results = min(self.params.max_results or 10, HARD_MAX_RESULTS) skip = min(self.params.skip or 0, MAX_SKIP) if self.params.omit_notes: # Return only the person records. get_notes_for_person = lambda person: [] else: def get_notes_for_person(person): notes = model.Note.get_by_person_record_id(person['person_record_id']) records = map(pfif.PFIF_1_2.note_to_dict, notes) utils.filter_sensitive_fields(records, self.request) return records if self.params.min_entry_date: # Scan forward. query = model.Person.all().order('entry_date').filter( 'entry_date >=', self.params.min_entry_date) else: # Show recent entries, scanning backward. query = model.Person.all().order('-entry_date') persons = query.fetch(max_results, skip) updated = get_latest_entry_date(persons) self.response.headers['Content-Type'] = 'application/xml' records = map(pfif.PFIF_1_2.person_to_dict, persons) utils.filter_sensitive_fields(records, self.request) atom.ATOM_PFIF_1_2.write_person_feed(self.response.out, records, get_notes_for_person, self.request.url, self.domain, '', updated)
def test_filter_sensitive_fields_in_joined_record(self): """Test passing a joined recrod of a person and a note to utils.filter_sensitive_fields(). """ joined_record = { 'person_record_id': 'person.1', 'person_full_name': 'Taro Yamada', 'person_date_of_birth': '2000-01-01', 'person_author_email': '*****@*****.**', 'person_author_phone': '01234567890', 'note_record_id': 'note.1', 'note_status': 'is_note_author', 'note_text': 'I am safe', 'note_author_email': '*****@*****.**', 'note_author_phone': '01234567890', } utils.filter_sensitive_fields([joined_record]) assert joined_record['person_record_id'] == 'person.1' assert joined_record['person_full_name'] == 'Taro Yamada' assert joined_record['person_date_of_birth'] == '' assert joined_record['person_author_email'] == '' assert joined_record['person_author_phone'] == '' assert joined_record['note_record_id'] == 'note.1' assert joined_record['note_status'] == 'is_note_author' assert joined_record['note_text'] == 'I am safe' assert joined_record['note_author_email'] == '' assert joined_record['note_author_phone'] == ''
def run_task_for_repo(self, repo): start_time = utils.get_utcnow() timestamp = self.params.timestamp or start_time is_first = not self.params.cursor query = model.Person.all_in_repo(repo).order('entry_date') if self.params.cursor: query.with_cursor(self.params.cursor) filtered_writer = record_writer.PersonWithNoteCsvWriter( StringIO.StringIO(), write_header=is_first) full_writer = record_writer.PersonWithNoteCsvWriter( StringIO.StringIO(), write_header=is_first) has_data = False scan_completed = False while True: persons = query.fetch(limit=FETCH_LIMIT) if persons: has_data = True else: scan_completed = True break full_records = self.get_person_records_with_notes(repo, persons) full_writer.write(full_records) filtered_records = copy.deepcopy(full_records) utils.filter_sensitive_fields(filtered_records) filtered_writer.write(filtered_records) if utils.get_utcnow() >= start_time + self.MAX_FETCH_TIME: break query.with_cursor(query.cursor()) for kind, writer in [('filtered', filtered_writer), ('full', full_writer)]: base_name = '%s-persons-%s-%s' % ( repo, kind, timestamp.strftime('%Y-%m-%d-%H%M%S')) final_csv_name = '%s.csv' % base_name temp_csv_name = '%s.temp.csv' % base_name if is_first: self.storage.insert_object(final_csv_name, 'text/csv', writer.io.getvalue()) elif has_data: # Creates a temporary CSV file with new records, and append it to # the final CSV file. self.storage.insert_object(temp_csv_name, 'text/csv', writer.io.getvalue()) self.storage.compose_objects([final_csv_name, temp_csv_name], final_csv_name, 'text/csv') if scan_completed: key = 'latest_%s_csv_object_name' % kind config.set_for_repo(repo, **{key: final_csv_name}) if not scan_completed: self.schedule_next_task(query.cursor(), timestamp)
def run_task_for_repo(self, repo): start_time = utils.get_utcnow() timestamp = self.params.timestamp or start_time is_first = not self.params.cursor query = model.Person.all_in_repo(repo).order('entry_date') if self.params.cursor: query.with_cursor(self.params.cursor) filtered_writer = record_writer.PersonWithNoteCsvWriter( StringIO.StringIO(), write_header=is_first) full_writer = record_writer.PersonWithNoteCsvWriter( StringIO.StringIO(), write_header=is_first) has_data = False scan_completed = False while True: persons = query.fetch(limit=FETCH_LIMIT) if persons: has_data = True else: scan_completed = True break full_records = self.get_person_records_with_notes(repo, persons) full_writer.write(full_records) filtered_records = copy.deepcopy(full_records) utils.filter_sensitive_fields(filtered_records) filtered_writer.write(filtered_records) if utils.get_utcnow() >= start_time + self.MAX_FETCH_TIME: break query.with_cursor(query.cursor()) for kind, writer in [ ('filtered', filtered_writer), ('full', full_writer)]: base_name = '%s-persons-%s-%s' % ( repo, kind, timestamp.strftime('%Y-%m-%d-%H%M%S')) final_csv_name = '%s.csv' % base_name temp_csv_name = '%s.temp.csv' % base_name if is_first: self.storage.insert_object( final_csv_name, 'text/csv', writer.io.getvalue()) elif has_data: # Creates a temporary CSV file with new records, and append it to # the final CSV file. self.storage.insert_object( temp_csv_name, 'text/csv', writer.io.getvalue()) self.storage.compose_objects( [final_csv_name, temp_csv_name], final_csv_name, 'text/csv') if scan_completed: key = 'latest_%s_csv_object_name' % kind config.set_for_repo(repo, **{key: final_csv_name}) if not scan_completed: self.schedule_next_task(query.cursor(), timestamp)
def run_task_for_repo(self, repo): start_time = utils.get_utcnow() timestamp = self.params.timestamp or start_time base_name = '%s-persons-%s' % (repo, timestamp.strftime('%Y-%m-%d-%H%M%S')) is_first = not self.params.cursor query = model.Person.all_in_repo(repo).order('entry_date') if self.params.cursor: query.with_cursor(self.params.cursor) csv_io = StringIO.StringIO() writer = record_writer.PersonWithNoteCsvWriter(csv_io, write_header=is_first) has_data = False scan_completed = False while True: persons = query.fetch(limit=FETCH_LIMIT) if persons: has_data = True else: scan_completed = True break records = self.get_person_records_with_notes(repo, persons) # So far it only supports dump of records without sensitive fields. utils.filter_sensitive_fields(records) writer.write(records) if utils.get_utcnow() >= start_time + self.MAX_FETCH_TIME: break query.with_cursor(query.cursor()) final_csv_name = '%s.csv' % base_name temp_csv_name = '%s.temp.csv' % base_name if is_first: self.storage.insert_object(final_csv_name, 'text/csv', csv_io.getvalue()) elif has_data: # Creates a temporary CSV file with new records, and append it to # the final CSV file. self.storage.insert_object(temp_csv_name, 'text/csv', csv_io.getvalue()) self.storage.compose_objects([final_csv_name, temp_csv_name], final_csv_name, 'text/csv') if scan_completed: config.set_for_repo(repo, latest_csv_object_name=final_csv_name) else: self.schedule_next_task(query.cursor(), timestamp)
def test_filter_sensitive_fields_in_person_record(self): """Test passing a person recrod to utils.filter_sensitive_fields(). """ person_record = { 'person_record_id': 'person.1', 'full_name': 'Taro Yamada', 'date_of_birth': '2000-01-01', 'author_email': '*****@*****.**', 'author_phone': '01234567890', } utils.filter_sensitive_fields([person_record]) assert person_record['person_record_id'] == 'person.1' assert person_record['full_name'] == 'Taro Yamada' assert person_record['date_of_birth'] == '' assert person_record['author_email'] == '' assert person_record['author_phone'] == ''
def test_filter_sensitive_fields_in_note_record(self): """Test passing a note recrod to utils.filter_sensitive_fields(). """ note_record = { 'note_record_id': 'note.1', 'person_record_id': 'person.1', 'status': 'is_note_author', 'text': 'I am safe', 'author_email': '*****@*****.**', 'author_phone': '01234567890', } utils.filter_sensitive_fields([note_record]) assert note_record['note_record_id'] == 'note.1' assert note_record['person_record_id'] == 'person.1' assert note_record['status'] == 'is_note_author' assert note_record['text'] == 'I am safe' assert note_record['author_email'] == '' assert note_record['author_phone'] == ''
def get(self): pfif_version = pfif.PFIF_VERSIONS.get(self.params.version or '1.1') # Note that self.request.get can handle multiple IDs at once; we # can consider adding support for multiple records later. record_id = self.request.get('id') if not record_id: return self.error(400, 'Missing id parameter') person = model.Person.get_by_person_record_id(record_id) if not person: return self.error(404, 'No person record with ID %s' % record_id) notes = model.Note.get_by_person_record_id(record_id, 200) self.response.headers['Content-Type'] = 'application/xml' records = [pfif_version.person_to_dict(person)] note_records = map(pfif_version.note_to_dict, notes) utils.filter_sensitive_fields(records, self.request) utils.filter_sensitive_fields(note_records, self.request) pfif_version.write_file(self.response.out, records, lambda p: note_records)
def get(self): max_results = min(self.params.max_results or 10, HARD_MAX_RESULTS) skip = min(self.params.skip or 0, MAX_SKIP) if self.params.min_entry_date: # Scan forward. query = model.Note.all().order('entry_date').filter( 'entry_date >=', self.params.min_entry_date) else: # Show recent entries, scanning backward. query = model.Note.all().order('-entry_date') if self.params.person_record_id: # Show notes for a specific person. query = query.filter('person_record_id = ', self.params.person_record_id) notes = query.fetch(max_results, skip) updated = get_latest_entry_date(notes) self.response.headers['Content-Type'] = 'application/xml' records = map(pfif.PFIF_1_2.note_to_dict, notes) utils.filter_sensitive_fields(records, self.request) atom.ATOM_PFIF_1_2.write_note_feed(self.response.out, records, self.request.url, self.domain, '', updated)
def get_notes_for_person(person): notes = model.Note.get_by_person_record_id(person['person_record_id']) records = map(pfif.PFIF_1_2.note_to_dict, notes) utils.filter_sensitive_fields(records, self.request) return records