def import_site_export(export_path, remote_api_host, app_id, batch_size, store_all):
    # Log in, then use the pfif parser to parse the export file.  Use the
    # importer methods to convert the dicts to entities then add them as in
    # import.py, but less strict, to ensure that all exported data is available.
    remote_api.connect(remote_api_host, app_id)
    logging.info("%s: importing exported records from %s", remote_api_host, export_path)
    if not export_path.endswith(".zip"):
        export_fd = open(export_path)
    else:
        export_fd = open_file_inside_zip(export_path)
    persons, notes = pfif.parse_file(export_fd)
    logging.info("loaded %d persons, %d notes", len(persons), len(notes))
    if not store_all:
        persons = [d for d in persons if is_clone(d.get("person_record_id"))]
        notes = [d for d in notes if is_clone(d.get("note_record_id"))]
        logging.info(
            "... down to %d persons, %d notes after excluding %r records", len(persons), len(notes), HOME_DOMAIN
        )
    logging.info("... adding persons")
    add_entities(persons, create_person, batch_size, "person", store_all)
    logging.info("... adding notes")
    add_entities(notes, create_note, batch_size, "note", store_all)
Beispiel #2
0
    def pytest_configure(self, config):
        options = config.option
        url = options.server or "localhost:%d" % options.port
        secure, host, port, path = remote_api.parse_url(url)
        if host == "localhost":
            # We need to start up a clean new appserver for testing.
            self.threads.append(AppServerRunner(options.port, options.mailport))
        self.threads.append(MailThread(options.mailport))
        for thread in self.threads:
            thread.start()
        for thread in self.threads:
            thread.wait_until_ready()

        # Connect to the datastore.
        remote_api.connect(url, server_type="local")

        # Reset the datastore for the first test.
        reset_data()

        # Give the tests access to configuration information.
        config.hostport = "%s:%d" % (host, port)
        config.mail_server = MailThread
    def pytest_configure(self, config):
        options = config.option
        url = options.server or 'localhost:%d' % options.port
        secure, host, port, path = remote_api.parse_url(url)
        if host == 'localhost':
            # We need to start up a clean new appserver for testing.
            self.threads.append(AppServerRunner(options.port, options.mailport))
        self.threads.append(MailThread(options.mailport))
        for thread in self.threads:
            thread.start()
        for thread in self.threads:
            thread.wait_until_ready()

        # Connect to the datastore.
        remote_api.connect(url, server_type='local')

        # Reset the datastore for the first test.
        reset_data()

        # Give the tests access to configuration information.
        config.hostport = '%s:%d' % (host, port)
        config.mail_server = MailThread
Beispiel #4
0
def import_site_export(export_path, remote_api_host, app_id, batch_size,
                       store_all):
    # Log in, then use the pfif parser to parse the export file.  Use the
    # importer methods to convert the dicts to entities then add them as in
    # import.py, but less strict, to ensure that all exported data is available.
    remote_api.connect(remote_api_host, app_id)
    logging.info('%s: importing exported records from %s', remote_api_host,
                 export_path)
    if not export_path.endswith('.zip'):
        export_fd = open(export_path)
    else:
        export_fd = open_file_inside_zip(export_path)
    persons, notes = pfif.parse_file(export_fd)
    logging.info('loaded %d persons, %d notes', len(persons), len(notes))
    if not store_all:
        persons = [d for d in persons if is_clone(d.get('person_record_id'))]
        notes = [d for d in notes if is_clone(d.get('note_record_id'))]
        logging.info(
            '... down to %d persons, %d notes after excluding %r records',
            len(persons), len(notes), HOME_DOMAIN)
    logging.info('... adding persons')
    add_entities(persons, create_person, batch_size, 'person', store_all)
    logging.info('... adding notes')
    add_entities(notes, create_note, batch_size, 'note', store_all)
def main():
    logging.basicConfig(file=sys.stderr, level=logging.INFO)

    parser = optparse.OptionParser(usage='%prog [options] <appserver_url>')
    parser.add_option('--host',
                      help='Host name. e.g. mypersonfinder.appspot.com:80')
    parser.add_option('--repo',
                      help='Name of the Person Finder repository.')
    parser.add_option('--email',
                      help=('Email address used to connect to AppEngine. '
                            'The user must be admin of the application.'))
    parser.add_option('--password_path',
                      help=('A text file which contains the password of '
                            'the user specified by --email. If omitted, '
                            'the command prompts you to input password.'))
    parser.add_option('--action',
                      type='choice',
                      choices=['expire', 'preview'],
                      help=('Either of:\n'
                            'expire: Marks old entries as expired. They '
                            'disappear from the UI and the API, but are kept '
                            'in the data store. They are automatically '
                            'deleted later. Note that clone records are '
                            'deleted immediately.\n'
                            'preview: Does nothing, but dumps entries which '
                            'will be marked as expired.'))
    parser.add_option('--min_age_seconds',
                      type='int',
                      default=3600,
                      help=('Expires entries whose entry_date '
                            'is older than this value in seconds.'))
    parser.add_option('--id_whitelist',
                      help=('Comma-separated person IDs which are never '
                            'deleted. e.g. '
                            'example.personfinder.google.org/person.1234,'
                            'example.personfinder.google.org/person.5678'))
    parser.add_option('--dump_all',
                      default='false',
                      help='Also dumps entries which are not deleted.')
    parser.add_option('--include_expired',
                      default='false',
                      help='Includes expired entries on --dump_all.')
    options, args = parser.parse_args()

    if len(args) != 1:
        parser.error('Just one argument must be given.')
    host = args[0]
    if not options.repo:
        parser.error('--repo is missing.')
    if not options.action:
        parser.error('--action is missing.')

    if options.password_path:
      f = open(options.password_path)
      password = f.read().rstrip('\n')
      f.close()
    else:
      password = None

    if options.id_whitelist:
        id_whitelist = options.id_whitelist.split(',')
    else:
        id_whitelist = []

    remote_api.connect(host, options.email, password)

    expired_entries = []
    max_entry_date = (
            datetime.datetime.utcnow() -
            datetime.timedelta(seconds=options.min_age_seconds))
    query = model.Person.all_in_repo(
            options.repo, filter_expired=options.include_expired != 'true')
    if options.dump_all != 'true':
        query.filter('entry_date <=', max_entry_date)

    while True:
        people = query.fetch(MAX_ENTITIES_PER_REQUEST)
        if not people:
            break
        for person in people:
            # Checks entry_date again because the filter is not applied when
            # --dump_all=true.
            if (person.entry_date <= max_entry_date and
                person.get_record_id() not in id_whitelist):
                logging.info('To be expired: %s' % person_to_text(person))
                expired_entries.append(person)
            elif options.dump_all == 'true':
                logging.info('To be kept: %s' % person_to_text(person))
        query = query.with_cursor(query.cursor())

    if options.action == 'expire':
        for person in expired_entries:
            expire_person(person)
Beispiel #6
0
            self.data['http_statuses'].append(status)


if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO)

    if len(sys.argv) != 3 or sys.argv[2] not in ('create', 'search'):
        sys.stderr.write(
            'Usage:\n'
            'tools/load_test config.json create\n'
            'tools/load_test config.json search\n')

    with open(sys.argv[1]) as f:
        conf = json.load(f)

    if not os.path.exists(conf['output_dir']):
        os.makedirs(conf['output_dir'])

    remote_api.connect(conf['base_url'])

    if len(sys.argv) == 3 and sys.argv[2] == 'create':
        load_test = CreateRecordsLoadTest(conf)
    elif sys.argv[2] == 'search':
        load_test = SearchRecordsLoadTest(conf)
    else:
        raise Exception('Should not happen')

    load_test.execute_all()
    load_test.save_result()
    load_test.print_stats()
Beispiel #7
0
import sys

SHOW_ERRORS = 5

def import_from_file(host, repo, kind, converter, filename):
    print('%s: importing %s records from %s' % (host, kind, filename))
    written, skipped, total = importer.import_records(
        repo, source_domain, converter,
        importer.utf8_decoder(csv.DictReader(open(filename))))
    for error, record in skipped[:SHOW_ERRORS]:
        print('    - %s: %r' % (error, record))
    if len(skipped) > SHOW_ERRORS:
        print('    (more errors not shown)')

    print('wrote %d of %d (skipped %d with errors)' % (
        written, total, len(skipped)))

if __name__ == '__main__':
    if len(sys.argv) < 6:
        raise SystemExit(
            'Usage: %s host repo source_domain person.csv note.csv'
            % sys.argv[0])
    host, repo, source_domain, person_file, note_file = sys.argv[1:]
    host = remote_api.connect(host)
    if person_file:
        import_from_file(
            host, repo, 'Person', importer.create_person, person_file)
    if note_file:
        import_from_file(
            host, repo, 'Note', importer.create_note, note_file)
            self.data['http_statuses'].append(status)


if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO)

    if len(sys.argv) != 3 or sys.argv[2] not in ('create', 'search'):
        sys.stderr.write(
            'Usage:\n'
            'tools/load_test config.json create\n'
            'tools/load_test config.json search\n')

    with open(sys.argv[1]) as f:
        conf = json.load(f)

    if not os.path.exists(conf['output_dir']):
        os.makedirs(conf['output_dir'])

    remote_api.connect(conf['base_url'])

    if len(sys.argv) == 3 and sys.argv[2] == 'create':
        load_test = CreateRecordsLoadTest(conf)
    elif sys.argv[2] == 'search':
        load_test = SearchRecordsLoadTest(conf)
    else:
        raise Exception('Should not happen')

    load_test.execute_all()
    load_test.save_result()
    load_test.print_stats()
Beispiel #9
0
SHOW_ERRORS = 5


def import_from_file(host, repo, kind, converter, filename):
    print('%s: importing %s records from %s' % (host, kind, filename))
    written, skipped, total = importer.import_records(
        repo, source_domain, converter,
        importer.utf8_decoder(csv.DictReader(open(filename))))
    for error, record in skipped[:SHOW_ERRORS]:
        print('    - %s: %r' % (error, record))
    if len(skipped) > SHOW_ERRORS:
        print('    (more errors not shown)')

    print('wrote %d of %d (skipped %d with errors)' %
          (written, total, len(skipped)))


if __name__ == '__main__':
    if len(sys.argv) < 6:
        raise SystemExit(
            'Usage: %s host repo source_domain person.csv note.csv' %
            sys.argv[0])
    host, repo, source_domain, person_file, note_file = sys.argv[1:]
    host = remote_api.connect(host)
    if person_file:
        import_from_file(host, repo, 'Person', importer.create_person,
                         person_file)
    if note_file:
        import_from_file(host, repo, 'Note', importer.create_note, note_file)
Beispiel #10
0

def import_from_file(host, subdomain, kind, converter, filename):
    print '%s: importing %s records from %s' % (host, kind, filename)
    written, skipped, total = importer.import_records(
        subdomain, source_domain, converter,
        importer.utf8_decoder(csv.DictReader(open(filename))))
    for error, record in skipped[:SHOW_ERRORS]:
        print '    - %s: %r' % (error, record)
    if len(skipped) > SHOW_ERRORS:
        print '    (more errors not shown)'

    print 'wrote %d of %d (skipped %d with errors)' % (written, total,
                                                       len(skipped))


if __name__ == '__main__':
    if len(sys.argv) < 6:
        raise SystemExit(
            'Usage: %s app_id host subdomain source_domain person.csv note.csv'
            % sys.argv[0])
    app_id, host, subdomain, source_domain, person_file, note_file = \
        sys.argv[1:]
    host = remote_api.connect(host, app_id)
    if person_file:
        import_from_file(host, subdomain, 'Person', importer.create_person,
                         person_file)
    if note_file:
        import_from_file(host, subdomain, 'Note', importer.create_note,
                         note_file)
Beispiel #11
0
def connect(url, server_type=None):
    remote_api.connect(url, server_type=server_type)

    # ConfigureRemoteApi sets os.environ['APPLICATION_ID']
    app_id = os.environ['APPLICATION_ID']
    sys.ps1 = app_id + '> '  # for the interactive console
Beispiel #12
0
def main():
    parser = optparse.OptionParser(usage='''tools/batch_delete [options]

Delete entities in AppEngine data store in batch.

It can be used in case any manual clean up is needed. Use it with care.

Example:

    This command line performs a *preview* i.e., it shows sample entities to be
    deleted without actually deleting them:

    $ tools/batch_delete \\
      --url=xyz.appspot.com \\
      --gql-query="select * from Person where repo='test'" \\
      --mode=preview

    Confirm the output and replace --mode=preview with --mode=delete to actually
    delete them.
''')
    parser.add_option('--url',
                      dest='url',
                      help='The AppEngine server URL to connect to. See the '
                           'comment in console.py for acceptable URL formats.')
    parser.add_option('--server-type',
                      dest='server_type',
                      help='"appengine": The server is AppEngine. '
                           '"local": The server is a local dev_appserver. '
                           'It is guessed automatically when omitted.')
    parser.add_option('--gql-query',
                      dest='gql_query',
                      help='GQL query in the format "select * from ...". It '
                           'deletes all entities matching this query.')
    parser.add_option('--batch-size',
                      dest='batch_size',
                      type='int',
                      default=1000,
                      help='The number of entities in a single deletion batch. '
                      'It shouldn\'t be too large to avoid timeout.')
    parser.add_option('--mode',
                      dest='mode',
                      type='choice',
                      choices=['preview', 'delete'],
                      default='preview',
                      help='"preview": Shows sample entities to be deleted '
                           'without actually deleting them. '
                           '"delete": Actually deletes the entities matching '
                           'the query.')
    parser.add_option('--output-attr',
                      dest='output_attr',
                      default='__key__',
                      help='It outputs the attribute with this name of each '
                           'entity in the log. It outputs the entity key by '
                           'default.')
    options, args = parser.parse_args()

    remote_api.connect(options.url, server_type=options.server_type)

    query = db.GqlQuery(options.gql_query)
    while True:
        entities = query.fetch(limit=options.batch_size)
        if not entities: break

        message_prefix = (
                'Deleting' if options.mode == 'delete' else 'Will delete')
        for entity in entities:
            if options.output_attr == '__key__':
                attr_value = str(entity.key().id_or_name())
            else:
                attr_value = getattr(entity, options.output_attr)
            print('%s %s with %s = %r' % (
                    message_prefix,
                    type(entity).kind(),
                    options.output_attr,
                    attr_value))

        if options.mode == 'delete':
            db.delete(entities)
        else:
            break

    if options.mode == 'preview':
        print (
            '\nAbove are %d (or maybe less) sample entities matching the '
            'query. If you rerun the command with --mode=delete, it will '
            'delete ALL entities matching the query, including those not '
            'dumped above.'
            % options.batch_size)
        domain, id = person_record_id.split('/', 1)
        note_record_id = domain + '/note.batch-' + id
        note_dicts.append({
            'note_record_id': note_record_id,
            'person_record_id': person_record_id,
            'author_name': 'Google Person Finder',
            'source_date': date,
            'status': status,
            'text': text
        })
    for i in range(0, len(note_dicts), batch_size):
        filename = basename + '.' + str(i) + '.xml'
        with open(filename, 'w') as file:
            atom.ATOM_PFIF_1_4.write_note_feed(
                file,
                note_dicts[i:i + batch_size],
                url='',
                title='Notes generated by notes_for_all_persons.py',
                subtitle='',
                updated=datetime.datetime.utcnow()
            )
        logging.info('wrote ' + filename)

if __name__ == '__main__':
    remote_api.connect(sys.argv[1], sys.argv[2], None)
    text = None
    with open(sys.argv[7]) as f:
        text = f.read().decode('utf-8')
    write_notes_for_all_persons(
            sys.argv[3], sys.argv[4], sys.argv[5], sys.argv[6], text)
Beispiel #14
0
def connect(url, server_type=None):
    remote_api.connect(url, server_type=server_type)

    # ConfigureRemoteApi sets os.environ['APPLICATION_ID']
    app_id = os.environ['APPLICATION_ID']
    sys.ps1 = app_id + '> '  # for the interactive console
Beispiel #15
0
def main():
    logging.basicConfig(file=sys.stderr, level=logging.INFO)

    parser = optparse.OptionParser(usage='%prog [options] <appserver_url>')
    parser.add_option('--host',
                      help='Host name. e.g. mypersonfinder.appspot.com:80')
    parser.add_option('--repo', help='Name of the Person Finder repository.')
    parser.add_option('--email',
                      help=('Email address used to connect to AppEngine. '
                            'The user must be admin of the application.'))
    parser.add_option('--password_path',
                      help=('A text file which contains the password of '
                            'the user specified by --email. If omitted, '
                            'the command prompts you to input password.'))
    parser.add_option('--action',
                      type='choice',
                      choices=['expire', 'preview'],
                      help=('Either of:\n'
                            'expire: Marks old entries as expired. They '
                            'disappear from the UI and the API, but are kept '
                            'in the data store. They are automatically '
                            'deleted later. Note that clone records are '
                            'deleted immediately.\n'
                            'preview: Does nothing, but dumps entries which '
                            'will be marked as expired.'))
    parser.add_option('--min_age_seconds',
                      type='int',
                      default=3600,
                      help=('Expires entries whose entry_date '
                            'is older than this value in seconds.'))
    parser.add_option('--id_whitelist',
                      help=('Comma-separated person IDs which are never '
                            'deleted. e.g. '
                            'example.personfinder.google.org/person.1234,'
                            'example.personfinder.google.org/person.5678'))
    parser.add_option('--dump_all',
                      default='false',
                      help='Also dumps entries which are not deleted.')
    parser.add_option('--include_expired',
                      default='false',
                      help='Includes expired entries on --dump_all.')
    options, args = parser.parse_args()

    if len(args) != 1:
        parser.error('Just one argument must be given.')
    host = args[0]
    if not options.repo:
        parser.error('--repo is missing.')
    if not options.action:
        parser.error('--action is missing.')

    if options.password_path:
        f = open(options.password_path)
        password = f.read().rstrip('\n')
        f.close()
    else:
        password = None

    if options.id_whitelist:
        id_whitelist = options.id_whitelist.split(',')
    else:
        id_whitelist = []

    remote_api.connect(host, options.email, password)

    expired_entries = []
    max_entry_date = (datetime.datetime.utcnow() -
                      datetime.timedelta(seconds=options.min_age_seconds))
    query = model.Person.all_in_repo(
        options.repo, filter_expired=options.include_expired != 'true')
    if options.dump_all != 'true':
        query.filter('entry_date <=', max_entry_date)

    while True:
        people = query.fetch(MAX_ENTITIES_PER_REQUEST)
        if not people:
            break
        for person in people:
            # Checks entry_date again because the filter is not applied when
            # --dump_all=true.
            if (person.entry_date <= max_entry_date
                    and person.get_record_id() not in id_whitelist):
                logging.info('To be expired: %s' % person_to_text(person))
                expired_entries.append(person)
            elif options.dump_all == 'true':
                logging.info('To be kept: %s' % person_to_text(person))
        query = query.with_cursor(query.cursor())

    if options.action == 'expire':
        for person in expired_entries:
            expire_person(person)
        domain, id = person_record_id.split('/', 1)
        note_record_id = domain + '/note.batch-' + id
        note_dicts.append({
            'note_record_id': note_record_id,
            'person_record_id': person_record_id,
            'author_name': 'Google Person Finder',
            'source_date': date,
            'status': status,
            'text': text
        })
    for i in range(0, len(note_dicts), batch_size):
        filename = basename + '.' + str(i) + '.xml'
        with open(filename, 'w') as file:
            atom.ATOM_PFIF_1_4.write_note_feed(
                file,
                note_dicts[i:i + batch_size],
                url='',
                title='Notes generated by notes_for_all_persons.py',
                subtitle='',
                updated=datetime.datetime.utcnow()
            )
        logging.info('wrote ' + filename)

if __name__ == '__main__':
    remote_api.connect(sys.argv[1], sys.argv[2], None)
    text = None
    with open(sys.argv[7]) as f:
        text = f.read().decode('utf-8')
    write_notes_for_all_persons(
            sys.argv[3], sys.argv[4], sys.argv[5], sys.argv[6], text)
Beispiel #17
0
SHOW_ERRORS = 5

def import_from_file(host, subdomain, kind, converter, filename):
    print '%s: importing %s records from %s' % (host, kind, filename)
    written, skipped, total = importer.import_records(
        subdomain, source_domain, converter,
        importer.utf8_decoder(csv.DictReader(open(filename))))
    for error, record in skipped[:SHOW_ERRORS]:
        print '    - %s: %r' % (error, record)
    if len(skipped) > SHOW_ERRORS:
        print '    (more errors not shown)'

    print 'wrote %d of %d (skipped %d with errors)' % (
        written, total, len(skipped))

if __name__ == '__main__':
    if len(sys.argv) < 6:
        raise SystemExit(
            'Usage: %s app_id host subdomain source_domain person.csv note.csv'
            % sys.argv[0])
    app_id, host, subdomain, source_domain, person_file, note_file = \
        sys.argv[1:]
    host = remote_api.connect(host, app_id)
    if person_file:
        import_from_file(
            host, subdomain, 'Person', importer.create_person, person_file)
    if note_file:
        import_from_file(
            host, subdomain, 'Note', importer.create_note, note_file)