def test_a_initialize(self): """Initialize the database for /rememberedforms tests.""" dbsession = self.dbsession db = DBUtils(dbsession, self.settings) self.create_db() db.clear_all_models() administrator = omb.generate_default_administrator( settings=self.settings) contributor = omb.generate_default_contributor(settings=self.settings) viewer = omb.generate_default_viewer(settings=self.settings) dbsession.add_all([administrator, contributor, viewer]) dbsession.commit() _create_test_data(dbsession, db, self.n) add_SEARCH_to_web_test_valid_methods() # Create an application settings where the contributor is unrestricted viewer, contributor, administrator = get_users(db) application_settings = omb.generate_default_application_settings() application_settings.unrestricted_users = [contributor] dbsession.add(application_settings) dbsession.commit()
def test_e_cleanup(self): """Clean up the database after /rememberedforms tests.""" dbsession = self.dbsession db = DBUtils(dbsession, self.settings) db.clear_all_models() administrator = omb.generate_default_administrator( settings=self.settings) contributor = omb.generate_default_contributor(settings=self.settings) viewer = omb.generate_default_viewer(settings=self.settings) dbsession.add_all([administrator, contributor, viewer]) dbsession.commit()
def test_aaa_initialize(self): """Initialize the database using pseudo-data generated from random lorem ipsum sentences. These are located in ``old/tests/data/corpora``. The data contain morphologically analyzed sentences, their component morphemes, and syntactic categories. The sentences have phrase structure trees in bracket notation. The test will try to load the lorem ipsum dataset from a MySQL/SQLite dump file in ``onlinelinguisticdatabase/tests/data/corpora``. If the dump file corresponding to ``loremipsum_path`` does not exist, it will import the lorem ipsum data directly from the text files and create the dump file so that future tests can run more speedily. The ``loremipsum100_path``, ``loremipsum1000_path``, ``loremipsum10000_path`` and ``loremipsum30000_path`` files are available and contain 100, 1000 and 10,000 sentences, respectively. Setting the ``via_request`` variable to ``True`` will cause all of the forms to be created via request, i.e., via ``self.app.post(url('forms))...``. This is much slower but may be desirable since values for the morphological analysis attributes will be generated. .. note:: In order to run ``mysqldump`` the MySQL user must have permission to lock and update tables (alter and file privileges may also be required ...):: mysql -u root -p<root_password> grant lock tables, update on old_test.* to 'old'@'localhost'; .. warning:: Loading the .txt or .sql files with the ``via_request`` option set to ``True`` will take a very long time. This might be an argument for separating the interface and logic components of the controllers so that a "core" HTTP-less OLD application could be exposed. This would facilitate the creation of models with system-generated data and validation but without the HTTP overhead... """ self.create_db() dbsession = self.dbsession db = DBUtils(dbsession, self.settings) ################################################################### # Configure lorem ipsum data set import ################################################################### # Set ``loremipsum_path`` this to ``self.loremipsum100_path``, # ``self.loremipsum1000_path`` or ``self.loremipsum10000_path``. # WARNING: the larger ones will take a long time. # Use the 10,000-sentence lorem ipsum dataset to ensure that # very large corpora are handled correctly. loremipsum_path = self.loremipsum100_path # Set ``via_request`` to ``True`` to create all forms via HTTP requests. via_request = True add_SEARCH_to_web_test_valid_methods() # Add an application settings so that morpheme references will work # out right. application_settings = omb.generate_default_application_settings() dbsession.add(application_settings) dbsession.commit() def create_model(line, categories, via_request=False): """Create a model (form or syncat) using the string in ``line``.""" model = 'Form' elements = str(line).split('\t') non_empty_elements = list(filter(None, elements)) try: ol, mb, mg, ml, sc, sx = non_empty_elements except ValueError: try: ol, mb, mg, ml, sc = non_empty_elements sx = '' except ValueError: try: model = 'SyntacticCategory' n, t = non_empty_elements except ValueError: return categories if via_request: if model == 'SyntacticCategory': params = self.syntactic_category_create_params.copy() params.update({'name': n, 'type': t}) params = json.dumps(params) response = self.app.post( '/{}/syntacticcategories'.format(self.old_name), params, self.json_headers, self.extra_environ_admin) cat_id = response.json_body['id'] categories[n] = cat_id else: params = self.form_create_params.copy() params.update({ 'transcription': ol, 'morpheme_break': mb, 'morpheme_gloss': mg, 'translations': [{ 'transcription': ml, 'grammaticality': '' }], 'syntax': sx, 'syntactic_category': categories.get(sc, '') }) params = json.dumps(params) self.app.post('/{}/forms'.format(self.old_name), params, self.json_headers, self.extra_environ_admin) else: if model == 'SyntacticCategory': syntactic_category = old_models.SyntacticCategory() syntactic_category.name = n syntactic_category.type = t dbsession.add(syntactic_category) categories[n] = syntactic_category.id else: form = old_models.Form() form.transcription = ol form.morpheme_break = mb form.morpheme_gloss = mg translation = old_models.Translation() translation.transcription = ml form.translations.append(translation) form.syntax = sx form.syntacticcategory_id = categories.get(sc, None) dbsession.add(form) return categories def add_loremipsum_to_db(loremipsum_path, via_request=False): """Add the contents of the file at ``loremipsum_path`` to the database.""" categories = {} with open(loremipsum_path, 'r') as f: i = 0 for l in f: if i % 100 == 0: if not via_request: dbsession.commit() LOGGER.debug('%d lines processed' % i) i = i + 1 categories = create_model(l.replace('\n', ''), categories, via_request) dbsession.commit() loremipsum_path_no_ext = os.path.splitext(loremipsum_path)[0] sqlalchemy_URL = self.settings['sqlalchemy.url'] sqlalchemy_URL_list = sqlalchemy_URL.split(':') olddump_script_path = os.path.join(self.test_scripts_path, 'olddump.sh') oldload_script_path = os.path.join(self.test_scripts_path, 'oldload.sh') RDBMS = sqlalchemy_URL_list[0] if RDBMS.startswith('mysql'): RDBMS = 'mysql' if RDBMS == 'mysql': mysql_dump_path = '%s_mysql.sql' % loremipsum_path_no_ext username = sqlalchemy_URL_list[1][2:] password = sqlalchemy_URL_list[2].split('@')[0] dbname = sqlalchemy_URL_list[3].split('/')[1] # This is not an option anymore: too frustrated trying to load # the dump file. if False and os.path.exists(mysql_dump_path): LOGGER.debug( 'The lorem ipsum MySQL dump file exists. Loading it...') # Clear the current DB completely db.clear_all_models(retain=[]) # Load the dump file to the DB shell_script = '#!/bin/sh\nmysql -u %s -p%s %s < %s' % ( username, password, dbname, mysql_dump_path) with open(oldload_script_path, 'w') as f: f.write(shell_script) os.chmod(oldload_script_path, 0o744) # Load the DB with open(os.devnull, 'w') as f: call([oldload_script_path], stdout=f, stderr=f) # Destroy the load script os.remove(oldload_script_path) LOGGER.debug('Loaded.') else: LOGGER.debug( 'Have to import the lorem ipsum dataset from the text file and create the MySQL dump file.' ) # Populate the database from the loremipusm text file and dump it add_loremipsum_to_db(loremipsum_path, via_request=via_request) # Write the DB dump shell script # Note: the --single-transaction option seems to be required (on Mac MySQL 5.6 using InnoDB tables ...) # see http://forums.mysql.com/read.php?10,108835,112951#msg-112951 shell_script = '#!/bin/sh\nmysqldump -u %s -p%s --single-transaction --no-create-info --result-file=%s %s' % ( username, password, mysql_dump_path, dbname) with open(olddump_script_path, 'w') as f: f.write(shell_script) os.chmod(olddump_script_path, 0o744) # Dump the DB with open(os.devnull, 'w') as f: call([olddump_script_path], stdout=f, stderr=f) # Destroy the dump script os.remove(olddump_script_path) LOGGER.debug('Imported and dumped.') elif RDBMS == 'sqlite' and h.command_line_program_installed('sqlite3'): sqlite_dump_path = '%s_sqlite.sql' % loremipsum_path_no_ext sqlite_full_dump_path = '%s_full_dump_tmp_sqlite.sql' % ( loremipsum_path_no_ext, ) sqlite_schema_dump_path = '%s_schema_dump_tmp_sqlite.sql' % ( loremipsum_path_no_ext, ) sqlite_db = sqlalchemy_URL.split('/')[-1] dbpath = os.path.join(self.here, sqlite_db) if os.path.exists(sqlite_dump_path): LOGGER.debug( 'The lorem ipsum SQLite dump file exists. Loading it...') print( 'The lorem ipsum SQLite dump file exists. Loading it...') # Destroy the sqlite db file os.remove(dbpath) # Load the dump file to the DB shell_script = '#!/bin/sh\nsqlite3 %s < %s' % ( dbpath, sqlite_dump_path) with open(oldload_script_path, 'w') as f: f.write(shell_script) os.chmod(oldload_script_path, 0o744) # Load the DB with open(os.devnull, 'w') as f: call([oldload_script_path], stdout=f, stderr=f) # Destroy the load script os.remove(oldload_script_path) LOGGER.debug('Loaded.') else: LOGGER.debug('Have to import the lorem ipsum dataset from' ' the text file and create the SQLite dump' ' file.') # Populate the database from the loremipusm text file and # dump it add_loremipsum_to_db(loremipsum_path, via_request=via_request) # Write the DB dump shell script shell_script = ( '#!/bin/sh\n' 'sqlite3 {dbpath} .dump > {dump_path}\n'.format( dbpath=dbpath, dump_path=sqlite_dump_path)) _shell_script = ('#!/bin/sh\n' 'sqlite3 {dbpath} .schema > {schema_path}\n' 'sqlite3 {dbpath} .dump > {full_dump_path}\n' 'grep -vx -f {schema_path} {full_dump_path} >' ' {dump_path}\n' 'rm {schema_path}\n' 'rm {full_dump_path}\n'.format( dbpath=dbpath, schema_path=sqlite_schema_dump_path, full_dump_path=sqlite_full_dump_path, dump_path=sqlite_dump_path)) # shell_script = ('#!/bin/sh\nsqlite3 %s ".dump" | grep -v' # ' "^CREATE" > %s' % (dbpath, # sqlite_dump_path)) with open(olddump_script_path, 'w') as f: f.write(shell_script) os.chmod(olddump_script_path, 0o744) # Dump the DB with open(os.devnull, 'w') as f: call([olddump_script_path], stdout=f, stderr=f) # Destroy the dump script os.remove(olddump_script_path) LOGGER.debug('Imported and dumped.') forms = db.get_forms() LOGGER.debug( 'Lorem ipsum data loaded. There are now %d forms in the db.' % len(forms)) print('Lorem ipsum data loaded. There are now %d forms in the db.' % len(forms)) # Restrict one sentential form in the db. restricted_tag = omb.generate_restricted_tag() dbsession.add(restricted_tag) dbsession.commit() a_form = dbsession.query(old_models.Form).\ filter(old_models.Form.syntactic_category.\ has(old_models.SyntacticCategory.name=='S')).first() a_form_id = a_form.id a_form.tags.append(restricted_tag) dbsession.commit() restricted_form = dbsession.query(old_models.Form).\ filter(old_models.Form.tags.any( old_models.Tag.name=='restricted')).first() assert a_form_id == restricted_form.id