def import_site_export(export_path, remote_api_host, app_id, batch_size, store_all): # Log in, then use the pfif parser to parse the export file. Use the # importer methods to convert the dicts to entities then add them as in # import.py, but less strict, to ensure that all exported data is available. remote_api.connect(remote_api_host, app_id) logging.info("%s: importing exported records from %s", remote_api_host, export_path) if not export_path.endswith(".zip"): export_fd = open(export_path) else: export_fd = open_file_inside_zip(export_path) persons, notes = pfif.parse_file(export_fd) logging.info("loaded %d persons, %d notes", len(persons), len(notes)) if not store_all: persons = [d for d in persons if is_clone(d.get("person_record_id"))] notes = [d for d in notes if is_clone(d.get("note_record_id"))] logging.info( "... down to %d persons, %d notes after excluding %r records", len(persons), len(notes), HOME_DOMAIN ) logging.info("... adding persons") add_entities(persons, create_person, batch_size, "person", store_all) logging.info("... adding notes") add_entities(notes, create_note, batch_size, "note", store_all)
def pytest_configure(self, config): options = config.option url = options.server or "localhost:%d" % options.port secure, host, port, path = remote_api.parse_url(url) if host == "localhost": # We need to start up a clean new appserver for testing. self.threads.append(AppServerRunner(options.port, options.mailport)) self.threads.append(MailThread(options.mailport)) for thread in self.threads: thread.start() for thread in self.threads: thread.wait_until_ready() # Connect to the datastore. remote_api.connect(url, server_type="local") # Reset the datastore for the first test. reset_data() # Give the tests access to configuration information. config.hostport = "%s:%d" % (host, port) config.mail_server = MailThread
def pytest_configure(self, config): options = config.option url = options.server or 'localhost:%d' % options.port secure, host, port, path = remote_api.parse_url(url) if host == 'localhost': # We need to start up a clean new appserver for testing. self.threads.append(AppServerRunner(options.port, options.mailport)) self.threads.append(MailThread(options.mailport)) for thread in self.threads: thread.start() for thread in self.threads: thread.wait_until_ready() # Connect to the datastore. remote_api.connect(url, server_type='local') # Reset the datastore for the first test. reset_data() # Give the tests access to configuration information. config.hostport = '%s:%d' % (host, port) config.mail_server = MailThread
def import_site_export(export_path, remote_api_host, app_id, batch_size, store_all): # Log in, then use the pfif parser to parse the export file. Use the # importer methods to convert the dicts to entities then add them as in # import.py, but less strict, to ensure that all exported data is available. remote_api.connect(remote_api_host, app_id) logging.info('%s: importing exported records from %s', remote_api_host, export_path) if not export_path.endswith('.zip'): export_fd = open(export_path) else: export_fd = open_file_inside_zip(export_path) persons, notes = pfif.parse_file(export_fd) logging.info('loaded %d persons, %d notes', len(persons), len(notes)) if not store_all: persons = [d for d in persons if is_clone(d.get('person_record_id'))] notes = [d for d in notes if is_clone(d.get('note_record_id'))] logging.info( '... down to %d persons, %d notes after excluding %r records', len(persons), len(notes), HOME_DOMAIN) logging.info('... adding persons') add_entities(persons, create_person, batch_size, 'person', store_all) logging.info('... adding notes') add_entities(notes, create_note, batch_size, 'note', store_all)
def main(): logging.basicConfig(file=sys.stderr, level=logging.INFO) parser = optparse.OptionParser(usage='%prog [options] <appserver_url>') parser.add_option('--host', help='Host name. e.g. mypersonfinder.appspot.com:80') parser.add_option('--repo', help='Name of the Person Finder repository.') parser.add_option('--email', help=('Email address used to connect to AppEngine. ' 'The user must be admin of the application.')) parser.add_option('--password_path', help=('A text file which contains the password of ' 'the user specified by --email. If omitted, ' 'the command prompts you to input password.')) parser.add_option('--action', type='choice', choices=['expire', 'preview'], help=('Either of:\n' 'expire: Marks old entries as expired. They ' 'disappear from the UI and the API, but are kept ' 'in the data store. They are automatically ' 'deleted later. Note that clone records are ' 'deleted immediately.\n' 'preview: Does nothing, but dumps entries which ' 'will be marked as expired.')) parser.add_option('--min_age_seconds', type='int', default=3600, help=('Expires entries whose entry_date ' 'is older than this value in seconds.')) parser.add_option('--id_whitelist', help=('Comma-separated person IDs which are never ' 'deleted. e.g. ' 'example.personfinder.google.org/person.1234,' 'example.personfinder.google.org/person.5678')) parser.add_option('--dump_all', default='false', help='Also dumps entries which are not deleted.') parser.add_option('--include_expired', default='false', help='Includes expired entries on --dump_all.') options, args = parser.parse_args() if len(args) != 1: parser.error('Just one argument must be given.') host = args[0] if not options.repo: parser.error('--repo is missing.') if not options.action: parser.error('--action is missing.') if options.password_path: f = open(options.password_path) password = f.read().rstrip('\n') f.close() else: password = None if options.id_whitelist: id_whitelist = options.id_whitelist.split(',') else: id_whitelist = [] remote_api.connect(host, options.email, password) expired_entries = [] max_entry_date = ( datetime.datetime.utcnow() - datetime.timedelta(seconds=options.min_age_seconds)) query = model.Person.all_in_repo( options.repo, filter_expired=options.include_expired != 'true') if options.dump_all != 'true': query.filter('entry_date <=', max_entry_date) while True: people = query.fetch(MAX_ENTITIES_PER_REQUEST) if not people: break for person in people: # Checks entry_date again because the filter is not applied when # --dump_all=true. if (person.entry_date <= max_entry_date and person.get_record_id() not in id_whitelist): logging.info('To be expired: %s' % person_to_text(person)) expired_entries.append(person) elif options.dump_all == 'true': logging.info('To be kept: %s' % person_to_text(person)) query = query.with_cursor(query.cursor()) if options.action == 'expire': for person in expired_entries: expire_person(person)
self.data['http_statuses'].append(status) if __name__ == '__main__': logging.basicConfig(level=logging.INFO) if len(sys.argv) != 3 or sys.argv[2] not in ('create', 'search'): sys.stderr.write( 'Usage:\n' 'tools/load_test config.json create\n' 'tools/load_test config.json search\n') with open(sys.argv[1]) as f: conf = json.load(f) if not os.path.exists(conf['output_dir']): os.makedirs(conf['output_dir']) remote_api.connect(conf['base_url']) if len(sys.argv) == 3 and sys.argv[2] == 'create': load_test = CreateRecordsLoadTest(conf) elif sys.argv[2] == 'search': load_test = SearchRecordsLoadTest(conf) else: raise Exception('Should not happen') load_test.execute_all() load_test.save_result() load_test.print_stats()
import sys SHOW_ERRORS = 5 def import_from_file(host, repo, kind, converter, filename): print('%s: importing %s records from %s' % (host, kind, filename)) written, skipped, total = importer.import_records( repo, source_domain, converter, importer.utf8_decoder(csv.DictReader(open(filename)))) for error, record in skipped[:SHOW_ERRORS]: print(' - %s: %r' % (error, record)) if len(skipped) > SHOW_ERRORS: print(' (more errors not shown)') print('wrote %d of %d (skipped %d with errors)' % ( written, total, len(skipped))) if __name__ == '__main__': if len(sys.argv) < 6: raise SystemExit( 'Usage: %s host repo source_domain person.csv note.csv' % sys.argv[0]) host, repo, source_domain, person_file, note_file = sys.argv[1:] host = remote_api.connect(host) if person_file: import_from_file( host, repo, 'Person', importer.create_person, person_file) if note_file: import_from_file( host, repo, 'Note', importer.create_note, note_file)
SHOW_ERRORS = 5 def import_from_file(host, repo, kind, converter, filename): print('%s: importing %s records from %s' % (host, kind, filename)) written, skipped, total = importer.import_records( repo, source_domain, converter, importer.utf8_decoder(csv.DictReader(open(filename)))) for error, record in skipped[:SHOW_ERRORS]: print(' - %s: %r' % (error, record)) if len(skipped) > SHOW_ERRORS: print(' (more errors not shown)') print('wrote %d of %d (skipped %d with errors)' % (written, total, len(skipped))) if __name__ == '__main__': if len(sys.argv) < 6: raise SystemExit( 'Usage: %s host repo source_domain person.csv note.csv' % sys.argv[0]) host, repo, source_domain, person_file, note_file = sys.argv[1:] host = remote_api.connect(host) if person_file: import_from_file(host, repo, 'Person', importer.create_person, person_file) if note_file: import_from_file(host, repo, 'Note', importer.create_note, note_file)
def import_from_file(host, subdomain, kind, converter, filename): print '%s: importing %s records from %s' % (host, kind, filename) written, skipped, total = importer.import_records( subdomain, source_domain, converter, importer.utf8_decoder(csv.DictReader(open(filename)))) for error, record in skipped[:SHOW_ERRORS]: print ' - %s: %r' % (error, record) if len(skipped) > SHOW_ERRORS: print ' (more errors not shown)' print 'wrote %d of %d (skipped %d with errors)' % (written, total, len(skipped)) if __name__ == '__main__': if len(sys.argv) < 6: raise SystemExit( 'Usage: %s app_id host subdomain source_domain person.csv note.csv' % sys.argv[0]) app_id, host, subdomain, source_domain, person_file, note_file = \ sys.argv[1:] host = remote_api.connect(host, app_id) if person_file: import_from_file(host, subdomain, 'Person', importer.create_person, person_file) if note_file: import_from_file(host, subdomain, 'Note', importer.create_note, note_file)
def connect(url, server_type=None): remote_api.connect(url, server_type=server_type) # ConfigureRemoteApi sets os.environ['APPLICATION_ID'] app_id = os.environ['APPLICATION_ID'] sys.ps1 = app_id + '> ' # for the interactive console
def main(): parser = optparse.OptionParser(usage='''tools/batch_delete [options] Delete entities in AppEngine data store in batch. It can be used in case any manual clean up is needed. Use it with care. Example: This command line performs a *preview* i.e., it shows sample entities to be deleted without actually deleting them: $ tools/batch_delete \\ --url=xyz.appspot.com \\ --gql-query="select * from Person where repo='test'" \\ --mode=preview Confirm the output and replace --mode=preview with --mode=delete to actually delete them. ''') parser.add_option('--url', dest='url', help='The AppEngine server URL to connect to. See the ' 'comment in console.py for acceptable URL formats.') parser.add_option('--server-type', dest='server_type', help='"appengine": The server is AppEngine. ' '"local": The server is a local dev_appserver. ' 'It is guessed automatically when omitted.') parser.add_option('--gql-query', dest='gql_query', help='GQL query in the format "select * from ...". It ' 'deletes all entities matching this query.') parser.add_option('--batch-size', dest='batch_size', type='int', default=1000, help='The number of entities in a single deletion batch. ' 'It shouldn\'t be too large to avoid timeout.') parser.add_option('--mode', dest='mode', type='choice', choices=['preview', 'delete'], default='preview', help='"preview": Shows sample entities to be deleted ' 'without actually deleting them. ' '"delete": Actually deletes the entities matching ' 'the query.') parser.add_option('--output-attr', dest='output_attr', default='__key__', help='It outputs the attribute with this name of each ' 'entity in the log. It outputs the entity key by ' 'default.') options, args = parser.parse_args() remote_api.connect(options.url, server_type=options.server_type) query = db.GqlQuery(options.gql_query) while True: entities = query.fetch(limit=options.batch_size) if not entities: break message_prefix = ( 'Deleting' if options.mode == 'delete' else 'Will delete') for entity in entities: if options.output_attr == '__key__': attr_value = str(entity.key().id_or_name()) else: attr_value = getattr(entity, options.output_attr) print('%s %s with %s = %r' % ( message_prefix, type(entity).kind(), options.output_attr, attr_value)) if options.mode == 'delete': db.delete(entities) else: break if options.mode == 'preview': print ( '\nAbove are %d (or maybe less) sample entities matching the ' 'query. If you rerun the command with --mode=delete, it will ' 'delete ALL entities matching the query, including those not ' 'dumped above.' % options.batch_size)
domain, id = person_record_id.split('/', 1) note_record_id = domain + '/note.batch-' + id note_dicts.append({ 'note_record_id': note_record_id, 'person_record_id': person_record_id, 'author_name': 'Google Person Finder', 'source_date': date, 'status': status, 'text': text }) for i in range(0, len(note_dicts), batch_size): filename = basename + '.' + str(i) + '.xml' with open(filename, 'w') as file: atom.ATOM_PFIF_1_4.write_note_feed( file, note_dicts[i:i + batch_size], url='', title='Notes generated by notes_for_all_persons.py', subtitle='', updated=datetime.datetime.utcnow() ) logging.info('wrote ' + filename) if __name__ == '__main__': remote_api.connect(sys.argv[1], sys.argv[2], None) text = None with open(sys.argv[7]) as f: text = f.read().decode('utf-8') write_notes_for_all_persons( sys.argv[3], sys.argv[4], sys.argv[5], sys.argv[6], text)
def main(): logging.basicConfig(file=sys.stderr, level=logging.INFO) parser = optparse.OptionParser(usage='%prog [options] <appserver_url>') parser.add_option('--host', help='Host name. e.g. mypersonfinder.appspot.com:80') parser.add_option('--repo', help='Name of the Person Finder repository.') parser.add_option('--email', help=('Email address used to connect to AppEngine. ' 'The user must be admin of the application.')) parser.add_option('--password_path', help=('A text file which contains the password of ' 'the user specified by --email. If omitted, ' 'the command prompts you to input password.')) parser.add_option('--action', type='choice', choices=['expire', 'preview'], help=('Either of:\n' 'expire: Marks old entries as expired. They ' 'disappear from the UI and the API, but are kept ' 'in the data store. They are automatically ' 'deleted later. Note that clone records are ' 'deleted immediately.\n' 'preview: Does nothing, but dumps entries which ' 'will be marked as expired.')) parser.add_option('--min_age_seconds', type='int', default=3600, help=('Expires entries whose entry_date ' 'is older than this value in seconds.')) parser.add_option('--id_whitelist', help=('Comma-separated person IDs which are never ' 'deleted. e.g. ' 'example.personfinder.google.org/person.1234,' 'example.personfinder.google.org/person.5678')) parser.add_option('--dump_all', default='false', help='Also dumps entries which are not deleted.') parser.add_option('--include_expired', default='false', help='Includes expired entries on --dump_all.') options, args = parser.parse_args() if len(args) != 1: parser.error('Just one argument must be given.') host = args[0] if not options.repo: parser.error('--repo is missing.') if not options.action: parser.error('--action is missing.') if options.password_path: f = open(options.password_path) password = f.read().rstrip('\n') f.close() else: password = None if options.id_whitelist: id_whitelist = options.id_whitelist.split(',') else: id_whitelist = [] remote_api.connect(host, options.email, password) expired_entries = [] max_entry_date = (datetime.datetime.utcnow() - datetime.timedelta(seconds=options.min_age_seconds)) query = model.Person.all_in_repo( options.repo, filter_expired=options.include_expired != 'true') if options.dump_all != 'true': query.filter('entry_date <=', max_entry_date) while True: people = query.fetch(MAX_ENTITIES_PER_REQUEST) if not people: break for person in people: # Checks entry_date again because the filter is not applied when # --dump_all=true. if (person.entry_date <= max_entry_date and person.get_record_id() not in id_whitelist): logging.info('To be expired: %s' % person_to_text(person)) expired_entries.append(person) elif options.dump_all == 'true': logging.info('To be kept: %s' % person_to_text(person)) query = query.with_cursor(query.cursor()) if options.action == 'expire': for person in expired_entries: expire_person(person)
SHOW_ERRORS = 5 def import_from_file(host, subdomain, kind, converter, filename): print '%s: importing %s records from %s' % (host, kind, filename) written, skipped, total = importer.import_records( subdomain, source_domain, converter, importer.utf8_decoder(csv.DictReader(open(filename)))) for error, record in skipped[:SHOW_ERRORS]: print ' - %s: %r' % (error, record) if len(skipped) > SHOW_ERRORS: print ' (more errors not shown)' print 'wrote %d of %d (skipped %d with errors)' % ( written, total, len(skipped)) if __name__ == '__main__': if len(sys.argv) < 6: raise SystemExit( 'Usage: %s app_id host subdomain source_domain person.csv note.csv' % sys.argv[0]) app_id, host, subdomain, source_domain, person_file, note_file = \ sys.argv[1:] host = remote_api.connect(host, app_id) if person_file: import_from_file( host, subdomain, 'Person', importer.create_person, person_file) if note_file: import_from_file( host, subdomain, 'Note', importer.create_note, note_file)