Ejemplo n.º 1
0
    def test_aaa_initialize(self):
        """Initialize the database using pseudo-data generated from random
        lorem ipsum sentences.

        These are located in ``old/tests/data/corpora``.
        The data contain morphologically analyzed sentences, their component
        morphemes, and syntactic categories.  The sentences have phrase
        structure trees in bracket notation.

        The test will try to load the lorem ipsum dataset from a MySQL/SQLite
        dump file in ``onlinelinguisticdatabase/tests/data/corpora``.  If the
        dump file corresponding to ``loremipsum_path`` does not exist, it will
        import the lorem ipsum data directly from the text files and create
        the dump file so that future tests can run more speedily.  The
        ``loremipsum100_path``, ``loremipsum1000_path``, ``loremipsum10000_path``
        and ``loremipsum30000_path`` files are available and contain 100, 1000
        and 10,000 sentences, respectively.

        Setting the ``via_request`` variable to ``True`` will cause all of the
        forms to be created via request, i.e., via
        ``self.app.post(url('forms))...``.  This is much slower but may be
        desirable since values for the morphological analysis attributes
        will be generated.

        .. note::

            In order to run ``mysqldump`` the MySQL user must have permission
            to lock and update tables (alter and file privileges may also be
            required ...)::

                mysql -u root -p<root_password>
                grant lock tables, update on old_test.* to 'old'@'localhost';

        .. warning::

            Loading the .txt or .sql files with the ``via_request`` option set to
            ``True`` will take a very long time.  This might be an argument for
            separating the interface and logic components of the controllers so
            that a "core" HTTP-less OLD application could be exposed.  This
            would facilitate the creation of models with system-generated data
            and validation but without the HTTP overhead...

        """
        self.create_db()

        dbsession = self.dbsession
        db = DBUtils(dbsession, self.settings)

        ###################################################################
        # Configure lorem ipsum data set import
        ###################################################################

        # Set ``loremipsum_path`` this to ``self.loremipsum100_path``,
        # ``self.loremipsum1000_path`` or ``self.loremipsum10000_path``.
        # WARNING: the larger ones will take a long time.
        # Use the 10,000-sentence lorem ipsum dataset to ensure that
        # very large corpora are handled correctly.
        loremipsum_path = self.loremipsum100_path

        # Set ``via_request`` to ``True`` to create all forms via HTTP requests.
        via_request = True

        add_SEARCH_to_web_test_valid_methods()

        # Add an application settings so that morpheme references will work
        # out right.
        application_settings = omb.generate_default_application_settings()
        dbsession.add(application_settings)
        dbsession.commit()

        def create_model(line, categories, via_request=False):
            """Create a model (form or syncat) using the string in ``line``."""
            model = 'Form'
            elements = str(line).split('\t')
            non_empty_elements = list(filter(None, elements))
            try:
                ol, mb, mg, ml, sc, sx = non_empty_elements
            except ValueError:
                try:
                    ol, mb, mg, ml, sc = non_empty_elements
                    sx = ''
                except ValueError:
                    try:
                        model = 'SyntacticCategory'
                        n, t = non_empty_elements
                    except ValueError:
                        return categories
            if via_request:
                if model == 'SyntacticCategory':
                    params = self.syntactic_category_create_params.copy()
                    params.update({'name': n, 'type': t})
                    params = json.dumps(params)
                    response = self.app.post(
                        '/{}/syntacticcategories'.format(self.old_name),
                        params, self.json_headers, self.extra_environ_admin)
                    cat_id = response.json_body['id']
                    categories[n] = cat_id
                else:
                    params = self.form_create_params.copy()
                    params.update({
                        'transcription':
                        ol,
                        'morpheme_break':
                        mb,
                        'morpheme_gloss':
                        mg,
                        'translations': [{
                            'transcription': ml,
                            'grammaticality': ''
                        }],
                        'syntax':
                        sx,
                        'syntactic_category':
                        categories.get(sc, '')
                    })
                    params = json.dumps(params)
                    self.app.post('/{}/forms'.format(self.old_name), params,
                                  self.json_headers, self.extra_environ_admin)
            else:
                if model == 'SyntacticCategory':
                    syntactic_category = old_models.SyntacticCategory()
                    syntactic_category.name = n
                    syntactic_category.type = t
                    dbsession.add(syntactic_category)
                    categories[n] = syntactic_category.id
                else:
                    form = old_models.Form()
                    form.transcription = ol
                    form.morpheme_break = mb
                    form.morpheme_gloss = mg
                    translation = old_models.Translation()
                    translation.transcription = ml
                    form.translations.append(translation)
                    form.syntax = sx
                    form.syntacticcategory_id = categories.get(sc, None)
                    dbsession.add(form)
            return categories

        def add_loremipsum_to_db(loremipsum_path, via_request=False):
            """Add the contents of the file at ``loremipsum_path`` to the database."""
            categories = {}
            with open(loremipsum_path, 'r') as f:
                i = 0
                for l in f:
                    if i % 100 == 0:
                        if not via_request: dbsession.commit()
                        LOGGER.debug('%d lines processed' % i)
                    i = i + 1
                    categories = create_model(l.replace('\n', ''), categories,
                                              via_request)
                dbsession.commit()

        loremipsum_path_no_ext = os.path.splitext(loremipsum_path)[0]
        sqlalchemy_URL = self.settings['sqlalchemy.url']
        sqlalchemy_URL_list = sqlalchemy_URL.split(':')
        olddump_script_path = os.path.join(self.test_scripts_path,
                                           'olddump.sh')
        oldload_script_path = os.path.join(self.test_scripts_path,
                                           'oldload.sh')
        RDBMS = sqlalchemy_URL_list[0]
        if RDBMS.startswith('mysql'):
            RDBMS = 'mysql'

        if RDBMS == 'mysql':
            mysql_dump_path = '%s_mysql.sql' % loremipsum_path_no_ext
            username = sqlalchemy_URL_list[1][2:]
            password = sqlalchemy_URL_list[2].split('@')[0]
            dbname = sqlalchemy_URL_list[3].split('/')[1]
            # This is not an option anymore: too frustrated trying to load
            # the dump file.
            if False and os.path.exists(mysql_dump_path):
                LOGGER.debug(
                    'The lorem ipsum MySQL dump file exists.  Loading it...')
                # Clear the current DB completely
                db.clear_all_models(retain=[])
                # Load the dump file to the DB
                shell_script = '#!/bin/sh\nmysql -u %s -p%s %s < %s' % (
                    username, password, dbname, mysql_dump_path)
                with open(oldload_script_path, 'w') as f:
                    f.write(shell_script)
                os.chmod(oldload_script_path, 0o744)
                # Load the DB
                with open(os.devnull, 'w') as f:
                    call([oldload_script_path], stdout=f, stderr=f)
                # Destroy the load script
                os.remove(oldload_script_path)
                LOGGER.debug('Loaded.')
            else:
                LOGGER.debug(
                    'Have to import the lorem ipsum dataset from the text file and create the MySQL dump file.'
                )
                # Populate the database from the loremipusm text file and dump it
                add_loremipsum_to_db(loremipsum_path, via_request=via_request)
                # Write the DB dump shell script
                # Note: the --single-transaction option seems to be required (on Mac MySQL 5.6 using InnoDB tables ...)
                # see http://forums.mysql.com/read.php?10,108835,112951#msg-112951
                shell_script = '#!/bin/sh\nmysqldump -u %s -p%s --single-transaction --no-create-info --result-file=%s %s' % (
                    username, password, mysql_dump_path, dbname)
                with open(olddump_script_path, 'w') as f:
                    f.write(shell_script)
                os.chmod(olddump_script_path, 0o744)
                # Dump the DB
                with open(os.devnull, 'w') as f:
                    call([olddump_script_path], stdout=f, stderr=f)
                # Destroy the dump script
                os.remove(olddump_script_path)
                LOGGER.debug('Imported and dumped.')
        elif RDBMS == 'sqlite' and h.command_line_program_installed('sqlite3'):
            sqlite_dump_path = '%s_sqlite.sql' % loremipsum_path_no_ext
            sqlite_full_dump_path = '%s_full_dump_tmp_sqlite.sql' % (
                loremipsum_path_no_ext, )
            sqlite_schema_dump_path = '%s_schema_dump_tmp_sqlite.sql' % (
                loremipsum_path_no_ext, )
            sqlite_db = sqlalchemy_URL.split('/')[-1]
            dbpath = os.path.join(self.here, sqlite_db)
            if os.path.exists(sqlite_dump_path):
                LOGGER.debug(
                    'The lorem ipsum SQLite dump file exists.  Loading it...')
                print(
                    'The lorem ipsum SQLite dump file exists.  Loading it...')
                # Destroy the sqlite db file
                os.remove(dbpath)
                # Load the dump file to the DB
                shell_script = '#!/bin/sh\nsqlite3 %s < %s' % (
                    dbpath, sqlite_dump_path)
                with open(oldload_script_path, 'w') as f:
                    f.write(shell_script)
                os.chmod(oldload_script_path, 0o744)
                # Load the DB
                with open(os.devnull, 'w') as f:
                    call([oldload_script_path], stdout=f, stderr=f)
                # Destroy the load script
                os.remove(oldload_script_path)
                LOGGER.debug('Loaded.')
            else:
                LOGGER.debug('Have to import the lorem ipsum dataset from'
                             ' the text file and create the SQLite dump'
                             ' file.')
                # Populate the database from the loremipusm text file and
                # dump it
                add_loremipsum_to_db(loremipsum_path, via_request=via_request)
                # Write the DB dump shell script

                shell_script = (
                    '#!/bin/sh\n'
                    'sqlite3 {dbpath} .dump > {dump_path}\n'.format(
                        dbpath=dbpath, dump_path=sqlite_dump_path))

                _shell_script = ('#!/bin/sh\n'
                                 'sqlite3 {dbpath} .schema > {schema_path}\n'
                                 'sqlite3 {dbpath} .dump > {full_dump_path}\n'
                                 'grep -vx -f {schema_path} {full_dump_path} >'
                                 ' {dump_path}\n'
                                 'rm {schema_path}\n'
                                 'rm {full_dump_path}\n'.format(
                                     dbpath=dbpath,
                                     schema_path=sqlite_schema_dump_path,
                                     full_dump_path=sqlite_full_dump_path,
                                     dump_path=sqlite_dump_path))

                # shell_script = ('#!/bin/sh\nsqlite3 %s ".dump" | grep -v'
                #                 ' "^CREATE" > %s' % (dbpath,
                #                                      sqlite_dump_path))

                with open(olddump_script_path, 'w') as f:
                    f.write(shell_script)
                os.chmod(olddump_script_path, 0o744)
                # Dump the DB
                with open(os.devnull, 'w') as f:
                    call([olddump_script_path], stdout=f, stderr=f)
                # Destroy the dump script
                os.remove(olddump_script_path)
                LOGGER.debug('Imported and dumped.')
        forms = db.get_forms()
        LOGGER.debug(
            'Lorem ipsum data loaded. There are now %d forms in the db.' %
            len(forms))
        print('Lorem ipsum data loaded. There are now %d forms in the db.' %
              len(forms))

        # Restrict one sentential form in the db.
        restricted_tag = omb.generate_restricted_tag()
        dbsession.add(restricted_tag)
        dbsession.commit()
        a_form = dbsession.query(old_models.Form).\
            filter(old_models.Form.syntactic_category.\
                has(old_models.SyntacticCategory.name=='S')).first()
        a_form_id = a_form.id
        a_form.tags.append(restricted_tag)
        dbsession.commit()
        restricted_form = dbsession.query(old_models.Form).\
            filter(old_models.Form.tags.any(
                old_models.Tag.name=='restricted')).first()
        assert a_form_id == restricted_form.id
Ejemplo n.º 2
0
    def test_writetofile_all_sentences(self):
        """Tests file writing/retrieval of a corpus containing all sentences.

        That is, that ``PUT /corpora/id/writetofile`` and
        ``GET /corpora/id/servefile`` both work with a corpus defined by a form
        search model that returns all sentences.

        """

        dbsession = self.dbsession
        db = DBUtils(dbsession, self.settings)
        forms = db.get_forms()
        assert len(forms) > 0

        restricted_form_id = dbsession.query(old_models.Form)\
            .filter(old_models.Form.tags.any(
                old_models.Tag.name=='restricted')).first().id
        tgrep2_installed = h.command_line_program_installed('tgrep2')

        # Create a form search model that retrieves all sentences
        query = {'filter': ['Form', 'syntactic_category', 'name', '=', 'S']}
        params = json.dumps({
            'name': 'Get all sentences',
            'description': 'Query to return all sentences in the database.',
            'search': query
        })
        response = self.app.post(
            '/{old_name}/formsearches'.format(old_name=self.old_name), params,
            self.json_headers, self.extra_environ_admin)
        resp = response.json_body
        form_search_id = resp['id']

        # Perform the search to get the resulting forms.
        params = json.dumps({
            'query': query,
            'paginator': {
                'page': 1,
                'items_per_page': 1
            }
        })
        response = self.app.post(
            '/{old_name}/forms/search'.format(old_name=self.old_name), params,
            self.json_headers, self.extra_environ_admin)
        resp = response.json_body
        sentence_count = resp['paginator']['count']

        # Generate some valid corpus creation input parameters.
        params = self.corpus_create_params.copy()
        params.update({
            'name': 'Corpus of sentences',
            'description': 'No ordering, no duplicates.',
            'form_search': form_search_id
        })
        params = json.dumps(params)

        # Create the corpus
        #assert os.listdir(self.corpora_path) == []
        original_corpus_count = dbsession.query(Corpus).count()
        response = self.app.post(url('create'), params, self.json_headers,
                                 self.extra_environ_admin)
        resp = response.json_body
        corpus_id = resp['id']
        new_corpus_count = dbsession.query(Corpus).count()
        corpus = dbsession.query(Corpus).get(corpus_id)
        corpus_dir = os.path.join(self.corpora_path, 'corpus_%d' % corpus_id)
        corpus_dir_contents = os.listdir(corpus_dir)
        assert new_corpus_count == original_corpus_count + 1
        assert resp['name'] == 'Corpus of sentences'
        assert resp['description'] == 'No ordering, no duplicates.'
        assert corpus_dir_contents == []
        assert response.content_type == 'application/json'
        assert resp['content'] == ''
        assert len(corpus.forms) == sentence_count
        assert resp['form_search']['id'] == form_search_id

        # Try to TGrep2-search the corpus without first writing it to file
        # and expect to fail.
        tgrep2pattern = json.dumps({'tgrep2pattern': 'S < NP-SBJ'})
        if h.command_line_program_installed('tgrep2'):
            # Failed tgrep2 search with invalid corpus id.
            response = self.app.request('/{}/corpora/{}/tgrep2'.format(
                self.old_name, corpus_id),
                                        method='SEARCH',
                                        body=tgrep2pattern.encode('utf8'),
                                        headers=self.json_headers,
                                        environ=self.extra_environ_admin,
                                        status=400)
            tgrep2resp = response.json_body
            assert tgrep2resp['error'] == (
                'Corpus %d has not been written to file as a treebank.' %
                (corpus_id, ))

        # Write the corpus to file
        sleep(1)
        params = json.dumps({'format': 'treebank'})
        response = self.app.put('/%s/corpora/%d/writetofile' %
                                (self.old_name, corpus_id),
                                params,
                                headers=self.json_headers,
                                extra_environ=self.extra_environ_admin)
        resp2 = response.json_body
        corpus_dir_contents = os.listdir(corpus_dir)
        corpus_tbk_path = os.path.join(corpus_dir, 'corpus_%d.tbk' % corpus_id)
        corpus_tbk_mod_time = h.get_file_modification_time(corpus_tbk_path)
        corpus_tbk_gzipped_path = '%s.gz' % corpus_tbk_path
        corpus_tbk_file_length = h.get_file_length(corpus_tbk_path)
        corpus_tbk_t2c_path = os.path.join(corpus_dir,
                                           'corpus_%d.tbk.t2c' % corpus_id)
        corpus_file_id = resp2['files'][0]['id']
        assert resp['id'] == resp2['id']
        assert resp['name'] == resp2['name']
        assert resp2['datetime_modified'] > resp['datetime_modified']
        assert os.path.exists(corpus_tbk_path)
        if tgrep2_installed:
            assert os.path.exists(corpus_tbk_t2c_path)
        else:
            assert not os.path.exists(corpus_tbk_t2c_path)
        assert os.path.exists(corpus_tbk_gzipped_path)
        assert get_file_size(corpus_tbk_path) > get_file_size(
            corpus_tbk_gzipped_path)
        assert sentence_count == corpus_tbk_file_length

        # Retrieve the corpus file directly from the filesystem.
        with open(corpus_tbk_path, 'rb') as filei:
            corpus_file_object = filei
            corpus_file_content = corpus_file_object.read()

        # Attempt to retrieve the gzipped corpus file via request as a restricted
        # user and expect to fail.  This is because there is one restricted
        # sentential form in the db, cf. the ``initialize`` "test".
        response = self.app.get('/%s/corpora/%d/servefile/%d' %
                                (self.old_name, corpus_id, corpus_file_id),
                                params,
                                status=403,
                                headers=self.json_headers,
                                extra_environ=self.extra_environ_contrib)
        resp = response.json_body
        assert resp == UNAUTHORIZED_MSG

        # Retrieve the gzipped corpus file via request.
        response = self.app.get('/%s/corpora/%d/servefile/%d' %
                                (self.old_name, corpus_id, corpus_file_id),
                                params,
                                headers=self.json_headers,
                                extra_environ=self.extra_environ_admin)
        unzipped_corpus_file_content = decompress_gzip_string(response.body)
        assert unzipped_corpus_file_content == corpus_file_content
        assert response.content_type == 'application/x-gzip'

        # Now update the corpus by changing the form search, re-write-to-file
        # and make sure everything works.

        # Create a form search model that retrieves all sentences with even-numbered
        # ids and the restricted form.
        query = {
            'filter': [
                'and',
                [['Form', 'syntactic_category', 'name', '=', 'S'],
                 [
                     'or',
                     [['Form', 'id', '=', restricted_form_id],
                      ['Form', 'id', 'regex', '[02468]$']]
                 ]]
            ]
        }
        params = json.dumps({
            'name': 'Get even-numbered or restricted sentences',
            'description':
            'Query to return all sentences in the database that have even-numbered ids or are restricted.',
            'search': query
        })
        response = self.app.post(
            '/{old_name}/formsearches'.format(old_name=self.old_name), params,
            self.json_headers, self.extra_environ_admin)
        resp = response.json_body
        form_search_id = resp['id']

        # Perform the search to get the resulting forms.
        params = json.dumps({
            'query': query,
            'paginator': {
                'page': 1,
                'items_per_page': 1
            }
        })
        response = self.app.post('/%s/forms/search' % self.old_name, params,
                                 self.json_headers, self.extra_environ_admin)
        resp = response.json_body
        sentence_count = resp['paginator']['count']

        # Update the above-created corpus.
        dbsession.expire(corpus)
        params = self.corpus_create_params.copy()
        params.update({
            'name': 'Corpus of even-numbered sentences',
            'description': 'No ordering, no duplicates.',
            'form_search': form_search_id
        })
        params = json.dumps(params)
        original_corpus_count = dbsession.query(Corpus).count()
        response = self.app.put(url('update', id=corpus_id), params,
                                self.json_headers, self.extra_environ_admin)
        resp = response.json_body
        new_corpus_count = dbsession.query(Corpus).count()
        corpus = dbsession.query(Corpus).get(corpus_id)
        corpus_dir = os.path.join(self.corpora_path, 'corpus_%d' % corpus_id)
        corpus_dir_contents = os.listdir(corpus_dir)
        assert new_corpus_count == original_corpus_count
        assert resp['name'] == 'Corpus of even-numbered sentences'
        assert resp['description'] == 'No ordering, no duplicates.'
        assert corpus_dir_contents != [
        ]  # Already a previously written corpus file there
        assert response.content_type == 'application/json'
        assert resp['content'] == ''
        assert len(corpus.forms) == sentence_count
        assert resp['form_search']['id'] == form_search_id

        # Write the corpus to file
        sleep(1)
        params = json.dumps({'format': 'treebank'})
        response = self.app.put('/%s/corpora/%d/writetofile' %
                                (self.old_name, corpus_id),
                                params,
                                headers=self.json_headers,
                                extra_environ=self.extra_environ_admin)
        resp2 = response.json_body  # Response is a JSON repr. of the corpus
        corpus_dir_contents = os.listdir(corpus_dir)
        corpus_tbk_path = os.path.join(corpus_dir, 'corpus_%d.tbk' % corpus_id)
        old_corpus_tbk_mod_time = corpus_tbk_mod_time
        corpus_tbk_mod_time = h.get_file_modification_time(corpus_tbk_path)
        corpus_tbk_gzipped_path = '%s.gz' % corpus_tbk_path
        corpus_tbk_file_length = h.get_file_length(
            corpus_tbk_path)  # no. of lines
        corpus_tbk_t2c_path = os.path.join(corpus_dir,
                                           'corpus_%d.tbk.t2c' % corpus_id)
        corpus_file_id = resp2['files'][0]['id']
        assert old_corpus_tbk_mod_time < corpus_tbk_mod_time
        assert len(resp2['files']) == 1
        assert resp['id'] == resp2['id']
        assert resp['name'] == resp2['name']
        assert resp2['datetime_modified'] > resp['datetime_modified']
        assert os.path.exists(corpus_tbk_path)
        assert os.path.exists(corpus_tbk_gzipped_path)
        if tgrep2_installed:
            assert os.path.exists(corpus_tbk_t2c_path)
        else:
            assert not os.path.exists(corpus_tbk_t2c_path)
        assert get_file_size(corpus_tbk_path) > get_file_size(
            corpus_tbk_gzipped_path)
        assert sentence_count == corpus_tbk_file_length

        # Retrieve the corpus file directly from the filesystem.
        with open(corpus_tbk_path, 'rb') as filei:
            corpus_file_object = filei
            corpus_file_content = corpus_file_object.read()

        # Attempt to retrieve the gzipped corpus file via request as a restricted
        # user and expect to fail.  This is because the one restricted sentential
        # form in the db is in the corpus.
        response = self.app.get('/%s/corpora/%d/servefile/%d' %
                                (self.old_name, corpus_id, corpus_file_id),
                                params,
                                status=403,
                                headers=self.json_headers,
                                extra_environ=self.extra_environ_contrib)
        resp = response.json_body
        assert resp == UNAUTHORIZED_MSG

        # Retrieve the gzipped corpus file via request.
        response = self.app.get('/%s/corpora/%d/servefile/%d' %
                                (self.old_name, corpus_id, corpus_file_id),
                                params,
                                headers=self.json_headers,
                                extra_environ=self.extra_environ_admin)
        unzipped_corpus_file_content = decompress_gzip_string(response.body)
        assert unzipped_corpus_file_content == corpus_file_content

        # Write the corpus to file again without any changes and expect a vacuous recreation
        sleep(1)
        params = json.dumps({'format': 'treebank'})
        response = self.app.put('/%s/corpora/%d/writetofile' %
                                (self.old_name, corpus_id),
                                params,
                                headers=self.json_headers,
                                extra_environ=self.extra_environ_admin)
        old_resp2 = resp2
        resp2 = response.json_body  # Response is a JSON repr. of the corpus
        corpus_tbk_path = os.path.join(corpus_dir, 'corpus_%d.tbk' % corpus_id)
        old_corpus_tbk_mod_time = corpus_tbk_mod_time
        corpus_tbk_mod_time = h.get_file_modification_time(corpus_tbk_path)
        assert old_corpus_tbk_mod_time < corpus_tbk_mod_time
        assert len(resp2['files']) == 1
        assert resp2['datetime_modified'] > old_resp2['datetime_modified']
        assert os.path.exists(corpus_tbk_path)

        # TGrep2-search the corpus-as-treebank
        # {'order_by': {'order_by_model': '', 'order_by_attribute': '', 'order_by_direction': ''}}
        # {'paginator': {'page': 0, 'items_per_page': 0}}
        tgrep2pattern = 'S < NP-SBJ'
        query = {
            'paginator': {
                'page': 1,
                'items_per_page': 10
            },
            'tgrep2pattern': tgrep2pattern
        }
        json_query = json.dumps(query)
        if not h.command_line_program_installed('tgrep2'):
            response = self.app.request('/{}/corpora/{}/tgrep2'.format(
                self.old_name, corpus_id),
                                        method='SEARCH',
                                        body=json_query.encode('utf8'),
                                        headers=self.json_headers,
                                        environ=self.extra_environ_admin,
                                        status=400)
            resp = response.json_body
            assert resp["error"] == "TGrep2 is not installed."
        else:
            # TGrep2-search the corpus-as-treebank
            response = self.app.request('/{}/corpora/{}/tgrep2'.format(
                self.old_name, corpus_id),
                                        method='SEARCH',
                                        body=json_query.encode('utf8'),
                                        headers=self.json_headers,
                                        environ=self.extra_environ_admin)
            resp = response.json_body
            for f in resp['items']:
                assert '(S ' in f['syntax'] and '(NP-SBJ ' in f['syntax']

            # A slightly more complex TGrep2 search
            tgrep2pattern = 'S < NP-SBJ << DT'
            query['tgrep2pattern'] = tgrep2pattern
            json_query = json.dumps(query)
            response = self.app.request('/{}/corpora/{}/tgrep2'.format(
                self.old_name, corpus_id),
                                        method='SEARCH',
                                        body=json_query.encode('utf8'),
                                        headers=self.json_headers,
                                        environ=self.extra_environ_admin)
            resp = response.json_body
            for f in resp['items']:
                assert ('(S ' in f['syntax'] and '(NP-SBJ ' in f['syntax']
                        and '(DT ' in f['syntax'])

            # Another TGrep2 search
            tgrep2pattern = 'NP-SBJ < DT . VP'
            query['tgrep2pattern'] = tgrep2pattern
            json_query = json.dumps(query)
            response = self.app.request('/{}/corpora/{}/tgrep2'.format(
                self.old_name, corpus_id),
                                        method='SEARCH',
                                        body=json_query.encode('utf8'),
                                        headers=self.json_headers,
                                        environ=self.extra_environ_admin)
            resp = response.json_body
            match_count = resp['paginator']['count']
            for f in resp['items']:
                assert ('(NP-SBJ ' in f['syntax'] and '(DT ' in f['syntax']
                        and '(VP ' in f['syntax'])

            # Failed tgrep2 search with invalid corpus id.
            response = self.app.request('/{}/corpora/{}/tgrep2'.format(
                self.old_name, 123456789),
                                        method='SEARCH',
                                        body=json_query.encode('utf8'),
                                        headers=self.json_headers,
                                        environ=self.extra_environ_admin,
                                        status=404)
            resp = response.json_body
            assert resp['error'] == 'There is no corpus with id 123456789'

            # Restricted user will not get all of the results.
            response = self.app.request('/{}/corpora/{}/tgrep2'.format(
                self.old_name, corpus_id),
                                        method='SEARCH',
                                        body=json_query.encode('utf8'),
                                        headers=self.json_headers,
                                        environ=self.extra_environ_view)
            resp = response.json_body
            restricted_match_count = resp['paginator']['count']
            assert isinstance(restricted_match_count,
                              int) and restricted_match_count < match_count

            # Failed TGrep2 search: bad JSON in request body
            json_query = json_query[:-1]
            response = self.app.request('/{}/corpora/{}/tgrep2'.format(
                self.old_name, corpus_id),
                                        method='SEARCH',
                                        body=json_query.encode('utf8'),
                                        headers=self.json_headers,
                                        environ=self.extra_environ_admin,
                                        status=400)
            resp = response.json_body
            assert resp == JSONDecodeErrorResponse

            # Failed TGrep2 search: malformed params
            tgrep2pattern = json.dumps({'TGrep2pattern': 'NP-SBJ < DT . VP'})
            response = self.app.request('/{}/corpora/{}/tgrep2'.format(
                self.old_name, corpus_id),
                                        method='SEARCH',
                                        body=tgrep2pattern.encode('utf8'),
                                        headers=self.json_headers,
                                        environ=self.extra_environ_admin,
                                        status=400)
            resp = response.json_body
            assert resp['errors']['tgrep2pattern'] == \
                    "A tgrep2pattern attribute must be supplied and must have a string value"

            # Empty string TGrep2 pattern results in no forms being returned.
            tgrep2pattern = json.dumps({'tgrep2pattern': ''})
            response = self.app.request('/{}/corpora/{}/tgrep2'.format(
                self.old_name, corpus_id),
                                        method='SEARCH',
                                        body=tgrep2pattern.encode('utf8'),
                                        headers=self.json_headers,
                                        environ=self.extra_environ_admin)
            resp = response.json_body
            assert resp == []
    def test_d_search(self):
        """Tests that SEARCH /rememberedforms/id returns an array of the forms remembered by the user with id=id that match the search criteria.

        Here we show the somewhat complex interplay of the unrestricted users, the
        restricted tag and the remembered_forms relation between users and forms.
        """

        dbsession = self.dbsession
        db = DBUtils(dbsession, self.settings)
        forms = json.loads(
            json.dumps([self.fix_form(f.get_dict()) for f in db.get_forms()]))
        mysql_engine = old_models.Model.__table_args__.get('mysql_engine')
        viewer, contributor, administrator = get_users(db)
        viewer_id = viewer.id
        contributor_id = contributor.id
        administrator_id = administrator.id

        viewer_remembered_forms = [
            f for f in forms
            if 'restricted' not in [t['name'] for t in f['tags']]
        ]
        contributor_remembered_forms = [f for f in forms if f['id'] % 2 != 0]
        administrator_remembered_forms = [
            f for f in forms if f['id'] % 2 == 0 and f['id'] > 25
        ]
        RDBMSName = h.get_RDBMS_name(self.settings)
        _today_timestamp = today_timestamp

        # The query we will use over and over again
        json_query = json.dumps({
            'query': {
                'filter': [
                    'and',
                    [['Translation', 'transcription', 'like', '%1%'],
                     ['not', ['Form', 'morpheme_break', 'regex', '[18][5-7]']],
                     [
                         'or',
                         [[
                             'Form', 'datetime_modified', '=',
                             today_timestamp.isoformat()
                         ], ['Form', 'date_elicited', '=',
                             jan1.isoformat()]]
                     ]]
                ]
            }
        })

        # A slight variation on the above query so that searches on the admin's
        # remembered forms will return some values
        json_query_admin = json.dumps({
            'query': {
                'filter': [
                    'and',
                    [['Translation', 'transcription', 'like', '%8%'],
                     ['not', ['Form', 'morpheme_break', 'regex', '[18][5-7]']],
                     [
                         'or',
                         [[
                             'Form', 'datetime_modified', '=',
                             today_timestamp.isoformat()
                         ], ['Form', 'date_elicited', '=',
                             jan1.isoformat()]]
                     ]]
                ]
            }
        })

        # The expected output of the above query on each of the user's remembered forms list
        result_set_viewer = [
            f for f in viewer_remembered_forms
            if '1' in ' '.join([g['transcription'] for g in f['translations']])
            and not re.search('[18][5-7]', f['morpheme_break']) and
            (_today_timestamp.isoformat() == f['datetime_modified'] or
             (f['date_elicited'] and jan1.isoformat() == f['date_elicited']))
        ]
        result_set_contributor = [
            f for f in contributor_remembered_forms
            if '1' in ' '.join([g['transcription'] for g in f['translations']])
            and not re.search('[18][5-7]', f['morpheme_break']) and
            (_today_timestamp.isoformat() == f['datetime_modified'] or
             (f['date_elicited'] and jan1.isoformat() == f['date_elicited']))
        ]
        result_set_administrator = [
            f for f in administrator_remembered_forms
            if '8' in ' '.join([g['transcription'] for g in f['translations']])
            and not re.search('[18][5-7]', f['morpheme_break']) and
            (_today_timestamp.isoformat() == f['datetime_modified'] or
             (f['date_elicited'] and jan1.isoformat() == f['date_elicited']))
        ]

        # Search the viewer's remembered forms as the viewer
        response = self.app.post(
            '/{old_name}/rememberedforms/{id}/search'.format(
                old_name=self.old_name, id=viewer_id), json_query,
            self.json_headers, self.extra_environ_admin_appset)
        resp = response.json_body
        assert [f['id'] for f in result_set_viewer] == [f['id'] for f in resp]
        assert response.content_type == 'application/json'
        assert resp

        # Perform the same search as above on the contributor's remembered forms,
        # as the contributor.
        response = self.app.request('/{old_name}/rememberedforms/{id}'.format(
            old_name=self.old_name, id=contributor_id),
                                    method='SEARCH',
                                    body=json_query.encode('utf8'),
                                    headers=self.json_headers,
                                    environ=self.extra_environ_contrib_appset)
        resp = response.json_body

        assert [f['id']
                for f in result_set_contributor] == ([f['id'] for f in resp])
        assert response.content_type == 'application/json'
        assert resp

        # Perform the same search as above on the contributor's remembered forms,
        # but search as the viewer and expect not to see the restricted forms,
        # i.e., those with ids > 50.
        response = self.app.post(
            '/{old_name}/rememberedforms/{id}/search'.format(
                old_name=self.old_name, id=contributor_id), json_query,
            self.json_headers, self.extra_environ_view_appset)
        resp = response.json_body
        result_set = [
            f for f in result_set_contributor
            if 'restricted' not in [t['name'] for t in f['tags']]
        ]
        assert [f['id'] for f in result_set] == [f['id'] for f in resp]
        assert response.content_type == 'application/json'
        assert resp

        # Perform the search on the administrator's remembered forms as the viewer.
        response = self.app.request('/{old_name}/rememberedforms/{id}'.format(
            old_name=self.old_name, id=administrator_id),
                                    method='SEARCH',
                                    body=json_query.encode('utf8'),
                                    headers=self.json_headers,
                                    environ=self.extra_environ_view_appset)
        resp = response.json_body

        result_set = [
            f for f in result_set_administrator
            if 'restricted' not in [t['name'] for t in f['tags']]
        ]
        assert [f['id'] for f in result_set] == [f['id'] for f in resp]
        assert response.content_type == 'application/json'

        # Perform the search on the administrator's remembered forms as the
        # contributor.
        response = self.app.post(
            '/{old_name}/rememberedforms/{id}/search'.format(
                old_name=self.old_name, id=administrator_id), json_query_admin,
            self.json_headers, self.extra_environ_contrib_appset)
        resp = response.json_body
        result_set = result_set_administrator
        assert [f['id'] for f in result_set] == [f['id'] for f in resp]
        assert response.content_type == 'application/json'
        assert resp

        # Perform the search on the administrator's remembered forms as the
        # administrator.
        response = self.app.request('/{old_name}/rememberedforms/{id}'.format(
            old_name=self.old_name, id=administrator_id),
                                    method='SEARCH',
                                    body=json_query_admin.encode('utf8'),
                                    headers=self.json_headers,
                                    environ=self.extra_environ_admin_appset)
        resp = response.json_body
        result_set = result_set_administrator
        assert [f['id'] for f in result_set] == [f['id'] for f in resp]
        assert response.content_type == 'application/json'
        assert resp
    def test_c_show(self):
        """Tests that GET /rememberedforms/id returns an array of the forms remembered by the user with id=id."""

        dbsession = self.dbsession
        db = DBUtils(dbsession, self.settings)
        forms = json.loads(
            json.dumps([self.fix_form(f.get_dict()) for f in db.get_forms()]))
        viewer, contributor, administrator = get_users(db)
        viewer_id = viewer.id
        contributor_id = contributor.id
        administrator_id = administrator.id

        ########################################################################
        # Viewer
        ########################################################################

        # Get the viewer's remembered forms (show that a contributor can do this)
        response = self.app.get(
            '/{old_name}/rememberedforms/{id}'.format(old_name=self.old_name,
                                                      id=viewer_id),
            headers=self.json_headers,
            extra_environ=self.extra_environ_contrib_appset)
        resp = response.json_body
        result_set = [
            f for f in forms
            if 'restricted' not in [t['name'] for t in f['tags']]
        ]
        assert response.content_type == 'application/json'
        assert set([f['id']
                    for f in result_set]) == set([f['id'] for f in resp])
        # Test the pagination and order by

        # Test the paginator GET params.
        paginator = {'items_per_page': 7, 'page': 3}
        response = self.app.get(
            '/{old_name}/rememberedforms/{id}'.format(old_name=self.old_name,
                                                      id=viewer_id),
            paginator,
            headers=self.json_headers,
            extra_environ=self.extra_environ_contrib_appset)
        resp = response.json_body
        assert response.content_type == 'application/json'
        assert len(resp['items']) == 7
        assert resp['items'][0]['transcription'] == result_set[14][
            'transcription']

        # Test the order_by GET params.
        order_by_params = {
            'order_by_model': 'Form',
            'order_by_attribute': 'transcription',
            'order_by_direction': 'desc'
        }
        response = self.app.get(
            '/{old_name}/rememberedforms/{id}'.format(old_name=self.old_name,
                                                      id=viewer_id),
            order_by_params,
            headers=self.json_headers,
            extra_environ=self.extra_environ_contrib_appset)
        resp = response.json_body
        result_set_ordered = sorted(result_set,
                                    key=lambda f: f['transcription'],
                                    reverse=True)
        assert response.content_type == 'application/json'
        assert result_set_ordered == resp

        # Test the order_by *with* paginator.
        params = {
            'order_by_model': 'Form',
            'order_by_attribute': 'transcription',
            'order_by_direction': 'desc',
            'items_per_page': 7,
            'page': 3
        }
        response = self.app.get(
            '/{old_name}/rememberedforms/{id}'.format(old_name=self.old_name,
                                                      id=viewer_id),
            params,
            headers=self.json_headers,
            extra_environ=self.extra_environ_contrib_appset)
        resp = response.json_body
        assert len(resp['items']) == 7
        assert result_set_ordered[14]['transcription'] == resp['items'][0][
            'transcription']

        # Expect a 400 error when the order_by_direction param is invalid
        order_by_params = {
            'order_by_model': 'Form',
            'order_by_attribute': 'transcription',
            'order_by_direction': 'descending'
        }
        response = self.app.get(
            '/{old_name}/rememberedforms/{id}'.format(old_name=self.old_name,
                                                      id=viewer_id),
            order_by_params,
            headers=self.json_headers,
            extra_environ=self.extra_environ_contrib_appset,
            status=400)
        resp = response.json_body
        assert response.content_type == 'application/json'
        assert resp['errors'][
            'order_by_direction'] == "Value must be one of: asc; desc (not 'descending')"

        # Expect the default BY id ASCENDING ordering when the order_by_model/Attribute
        # param is invalid.
        order_by_params = {
            'order_by_model': 'Formosa',
            'order_by_attribute': 'transcrumption',
            'order_by_direction': 'desc'
        }
        response = self.app.get(
            '/{old_name}/rememberedforms/{id}'.format(old_name=self.old_name,
                                                      id=viewer_id),
            order_by_params,
            headers=self.json_headers,
            extra_environ=self.extra_environ_contrib_appset)
        resp = response.json_body
        assert resp[0]['id'] == forms[0]['id']

        # Expect a 400 error when the paginator GET params are, empty, not
        # or integers that are less than 1
        paginator = {'items_per_page': 'a', 'page': ''}
        response = self.app.get(
            '/{old_name}/rememberedforms/{id}'.format(old_name=self.old_name,
                                                      id=viewer_id),
            paginator,
            headers=self.json_headers,
            extra_environ=self.extra_environ_contrib_appset,
            status=400)
        resp = response.json_body
        assert resp['errors'][
            'items_per_page'] == 'Please enter an integer value'
        assert resp['errors']['page'] == 'Please enter a value'

        paginator = {'items_per_page': 0, 'page': -1}
        response = self.app.get(
            '/{old_name}/rememberedforms/{id}'.format(old_name=self.old_name,
                                                      id=viewer_id),
            paginator,
            headers=self.json_headers,
            extra_environ=self.extra_environ_contrib_appset,
            status=400)
        resp = response.json_body
        assert resp['errors'][
            'items_per_page'] == 'Please enter a number that is 1 or greater'
        assert resp['errors'][
            'page'] == 'Please enter a number that is 1 or greater'

        ########################################################################
        # Contributor
        ########################################################################

        # Get the contributor's remembered forms
        response = self.app.get(
            '/{old_name}/rememberedforms/{id}'.format(old_name=self.old_name,
                                                      id=contributor_id),
            headers=self.json_headers,
            extra_environ=self.extra_environ_contrib_appset)
        resp = response.json_body
        result_set = [f for f in forms if f['id'] % 2 != 0]
        assert response.content_type == 'application/json'
        assert set([f['id']
                    for f in result_set]) == set([f['id'] for f in resp])

        # Invalid user id returns a 404 error
        response = self.app.get(
            '/{old_name}/rememberedforms/{id}'.format(old_name=self.old_name,
                                                      id=200987654),
            headers=self.json_headers,
            extra_environ=self.extra_environ_contrib_appset,
            status=404)
        resp = response.json_body
        assert response.content_type == 'application/json'
        assert resp['error'] == 'There is no user with id 200987654'

        ########################################################################
        # Administrator
        ########################################################################

        # Get the administrator's remembered forms
        response = self.app.get('/{old_name}/rememberedforms/{id}'.format(
            old_name=self.old_name, id=administrator_id),
                                headers=self.json_headers,
                                extra_environ=self.extra_environ_admin_appset)
        resp = response.json_body
        result_set = [f for f in forms if f['id'] % 2 == 0 and f['id'] > 25]
        assert response.content_type == 'application/json'
        assert set([f['id']
                    for f in result_set]) == set([f['id'] for f in resp])
    def test_b_update(self):
        """Tests that PUT /rememberedforms/id correctly updates the set of forms remembered by the user with id=id."""

        dbsession = self.dbsession
        db = DBUtils(dbsession, self.settings)

        forms = sorted(json.loads(
            json.dumps([self.fix_form(f.get_dict()) for f in db.get_forms()])),
                       key=lambda f: f['id'])
        viewer, contributor, administrator = get_users(db)
        viewer_id = viewer.id
        viewer_datetime_modified = viewer.datetime_modified
        contributor_id = contributor.id
        administrator_id = administrator.id

        ########################################################################
        # Viewer -- play with the viewer's remembered forms
        ########################################################################

        # Try to add every form in the database to the viewer's remembered
        # forms. Since the viewer is restricted (i.e., not unrestricted),
        # an error will be generated.
        sleep(1)
        params = json.dumps({'forms': [f['id'] for f in forms]})
        response = self.app.put(
            '/{old_name}/rememberedforms/{id}'.format(old_name=self.old_name,
                                                      id=viewer_id), params,
            self.json_headers, self.extra_environ_view_appset)
        resp = response.json_body
        viewer_remembered_forms = sorted(resp, key=lambda f: f['id'])
        result_set = [
            f for f in forms
            if 'restricted' not in [t['name'] for t in f['tags']]
        ]
        dbsession.expire(viewer)
        viewer, contributor, administrator = get_users(db)
        new_viewer_datetime_modified = viewer.datetime_modified
        assert new_viewer_datetime_modified != viewer_datetime_modified
        assert set([f['id']
                    for f in result_set]) == set([f['id'] for f in resp])
        assert response.content_type == 'application/json'

        # Try to clear the viewer's remembered forms as the contributor and
        # expect the request to be denied.
        params = json.dumps({'forms': []})
        response = self.app.put('/{old_name}/rememberedforms/{id}'.format(
            old_name=self.old_name, id=viewer_id),
                                params,
                                self.json_headers,
                                self.extra_environ_contrib_appset,
                                status=403)
        resp = response.json_body
        assert response.content_type == 'application/json'
        assert resp[
            'error'] == 'You are not authorized to access this resource.'

        # Get the list of ids from the userforms relational table.  This is used
        # to show that resetting a user's remembered_forms attribute via SQLAlchemy
        # does not wastefully recreate all relations.  See below
        user_forms = dbsession.query(UserForm).filter(
            UserForm.user_id == viewer_id).all()
        expected_new_user_form_ids = [
            uf.id for uf in user_forms
            if uf.form_id != viewer_remembered_forms[-1]['id']
        ]

        # Remove the last of the viewer's remembered forms as the administrator.
        params = json.dumps(
            {'forms': [f['id'] for f in viewer_remembered_forms][:-1]})
        response = self.app.put(
            '/{old_name}/rememberedforms/{id}'.format(old_name=self.old_name,
                                                      id=viewer_id), params,
            self.json_headers, self.extra_environ_admin_appset)
        resp = response.json_body
        result_set = result_set[:-1]
        assert set([f['id']
                    for f in result_set]) == set([f['id'] for f in resp])
        assert response.content_type == 'application/json'

        # See what happens when a large list of remembered forms is altered like this;
        # are all the relations destroyed and recreated?
        # Get the list of ids from the userforms relational table
        user_forms = dbsession.query(UserForm).filter(
            UserForm.user_id == viewer_id).all()
        current_user_form_ids = sorted([uf.id for uf in user_forms])
        assert set(expected_new_user_form_ids) == set(current_user_form_ids)

        # Attempted update fails: bad user id
        params = json.dumps({'forms': []})
        response = self.app.put('/{old_name}/rememberedforms/{id}'.format(
            old_name=self.old_name, id=100896),
                                params,
                                self.json_headers,
                                self.extra_environ_admin_appset,
                                status=404)
        resp = response.json_body
        assert response.content_type == 'application/json'
        assert resp['error'] == 'There is no user with id 100896'

        # Attempted update fails: invalid array of form ids
        params = json.dumps({'forms': ['a', 1000000087654]})
        response = self.app.put('/{old_name}/rememberedforms/{id}'.format(
            old_name=self.old_name, id=viewer_id),
                                params,
                                self.json_headers,
                                self.extra_environ_admin_appset,
                                status=400)
        resp = response.json_body
        assert response.content_type == 'application/json'
        assert resp['errors']['forms'] == [
            u'Please enter an integer value',
            'There is no form with id 1000000087654.'
        ]

        # Attempted update fails: array of form ids is bad JSON
        params = json.dumps({'forms': []})[:-1]
        response = self.app.put('/{old_name}/rememberedforms/{id}'.format(
            old_name=self.old_name, id=viewer_id),
                                params,
                                self.json_headers,
                                self.extra_environ_admin_appset,
                                status=400)
        resp = response.json_body
        assert response.content_type == 'application/json'
        assert resp[
            'error'] == 'JSON decode error: the parameters provided were not valid JSON.'

        # Clear the forms
        params = json.dumps({'forms': []})
        response = self.app.put(
            '/{old_name}/rememberedforms/{id}'.format(old_name=self.old_name,
                                                      id=viewer_id), params,
            self.json_headers, self.extra_environ_admin_appset)
        resp = response.json_body
        viewer = dbsession.query(
            old_models.User).filter(old_models.User.role == 'viewer').first()
        assert response.content_type == 'application/json'
        assert viewer.remembered_forms == []
        assert resp == []

        # Attempt to clear the forms again and fail because the submitted data are not new.
        params = json.dumps({'forms': []})
        response = self.app.put('/{old_name}/rememberedforms/{id}'.format(
            old_name=self.old_name, id=viewer_id),
                                params,
                                self.json_headers,
                                self.extra_environ_view_appset,
                                status=400)
        resp = response.json_body
        assert response.content_type == 'application/json'
        assert resp[
            'error'] == 'The update request failed because the submitted data were not new.'

        # Attempt to add all unrestricted forms to the viewer's remembered forms.
        # Fail because unauthenticated.
        params = json.dumps({'forms': [f['id'] for f in forms]})
        response = self.app.put('/{old_name}/rememberedforms/{id}'.format(
            old_name=self.old_name, id=viewer_id),
                                params,
                                self.json_headers,
                                status=401)
        resp = response.json_body
        assert response.content_type == 'application/json'
        assert resp[
            'error'] == 'Authentication is required to access this resource.'

        # Finally for the viewer, re-add all unrestricted forms to the viewer's
        # remembered forms for subsequent searches and GETs.
        params = json.dumps({'forms': [f['id'] for f in forms]})
        response = self.app.put(
            '/{old_name}/rememberedforms/{id}'.format(old_name=self.old_name,
                                                      id=viewer_id), params,
            self.json_headers, self.extra_environ_view_appset)
        resp = response.json_body
        assert response.content_type == 'application/json'
        result_set = [
            f for f in forms
            if 'restricted' not in [t['name'] for t in f['tags']]
        ]
        assert set([f['id']
                    for f in result_set]) == set([f['id'] for f in resp])

        ########################################################################
        # Contributor -- play with the contributor's remembered forms
        ########################################################################

        # The contributor is unrestricted.  Add all forms to this user's
        # remembered forms.
        params = json.dumps({'forms': [f['id'] for f in forms]})
        response = self.app.put(
            '/{old_name}/rememberedforms/{id}'.format(old_name=self.old_name,
                                                      id=contributor_id),
            params, self.json_headers, self.extra_environ_contrib_appset)
        resp = response.json_body
        assert response.content_type == 'application/json'
        assert set([f['id'] for f in forms]) == set([f['id'] for f in resp])

        # Change the contributor's remembered forms to contain only the forms
        # with odd numbered ids.
        odd_numbered_form_ids = [f['id'] for f in forms if f['id'] % 2 != 0]
        params = json.dumps({'forms': odd_numbered_form_ids})
        response = self.app.put(
            '/{old_name}/rememberedforms/{id}'.format(old_name=self.old_name,
                                                      id=contributor_id),
            params, self.json_headers, self.extra_environ_contrib_appset)
        resp = response.json_body
        assert response.content_type == 'application/json'
        assert set(odd_numbered_form_ids) == set([f['id'] for f in resp])

        ########################################################################
        # Administrator -- play with the administrator's remembered forms
        ########################################################################

        # Make sure even an unrestricted contributor cannot update another user's
        # remembered forms.
        form_ids_for_admin = [
            f['id'] for f in forms if f['id'] % 2 != 0 and f['id'] > 25
        ]
        params = json.dumps({'forms': form_ids_for_admin})
        response = self.app.put('/{old_name}/rememberedforms/{id}'.format(
            old_name=self.old_name, id=administrator_id),
                                params,
                                self.json_headers,
                                self.extra_environ_contrib_appset,
                                status=403)
        resp = response.json_body
        assert response.content_type == 'application/json'
        assert resp[
            'error'] == 'You are not authorized to access this resource.'

        # The administrator's remembered forms are all the evenly id-ed ones with
        # ids greater than 25.
        form_ids_for_admin = [
            f['id'] for f in forms if f['id'] % 2 == 0 and f['id'] > 25
        ]
        params = json.dumps({'forms': form_ids_for_admin})
        response = self.app.put(
            '/{old_name}/rememberedforms/{id}'.format(old_name=self.old_name,
                                                      id=administrator_id),
            params, self.json_headers, self.extra_environ_admin_appset)
        resp = response.json_body
        assert response.content_type == 'application/json'
        assert set(form_ids_for_admin) == set([f['id'] for f in resp])
Ejemplo n.º 6
0
    def test_index(self):
        """Tests that GET & SEARCH /corpusbackups behave correctly.
        """

        dbsession = self.dbsession
        db = DBUtils(dbsession, self.settings)

        tag = old_models.Tag()
        tag.name = 'random tag name'
        dbsession.add(tag)
        dbsession.flush()
        tag_id = tag.id
        dbsession.commit()

        # Add 10 forms and use them to generate a valid value for ``test_corpus_content``
        def create_form_from_index(index):
            form = old_models.Form()
            form.transcription = 'Form %d' % index
            translation = old_models.Translation()
            translation.transcription = 'Translation %d' % index
            form.translation = translation
            return form

        forms = [create_form_from_index(i) for i in range(1, 10)]
        dbsession.add_all(forms)
        dbsession.commit()
        forms = db.get_forms()
        half_forms = forms[:5]
        form_ids = [form.id for form in forms]
        half_form_ids = [form.id for form in half_forms]
        test_corpus_content = ','.join(map(str, form_ids))
        test_corpus_half_content = ','.join(map(str, half_form_ids))

        # Create a form search model
        query = {'filter': ['Form', 'transcription', 'regex', '[a-zA-Z]{3,}']}
        params = json.dumps({
            'name': 'form search',
            'description': 'This one\'s worth saving!',
            'search': query
        })
        response = self.app.post(fs_url('create'), params, self.json_headers,
                                 self.extra_environ_admin)
        resp = response.json_body
        form_search_id = resp['id']

        # Generate some valid corpus creation input parameters.
        params = self.corpus_create_params.copy()
        params.update({
            'name': 'Corpus',
            'description': 'Covers a lot of the data.',
            'content': test_corpus_content
        })
        params = json.dumps(params)

        # Attempt to create a corpus as a viewer and expect to fail
        response = self.app.post(crps_url('create'),
                                 params,
                                 self.json_headers,
                                 self.extra_environ_view,
                                 status=403)
        resp = response.json_body
        assert resp[
            'error'] == 'You are not authorized to access this resource.'
        assert response.content_type == 'application/json'

        # Successfully create a corpus as the admin
        assert os.listdir(self.corpora_path) == []
        original_corpus_count = dbsession.query(Corpus).count()
        response = self.app.post(crps_url('create'), params, self.json_headers,
                                 self.extra_environ_admin)
        resp = response.json_body
        corpus_id = resp['id']
        new_corpus_count = dbsession.query(Corpus).count()
        corpus = dbsession.query(Corpus).get(corpus_id)
        corpus_form_ids = sorted([f.id for f in corpus.forms])
        corpus_dir = os.path.join(self.corpora_path, 'corpus_%d' % corpus_id)
        corpus_dir_contents = os.listdir(corpus_dir)
        assert new_corpus_count == original_corpus_count + 1
        assert resp['name'] == 'Corpus'
        assert resp['description'] == 'Covers a lot of the data.'
        assert corpus_dir_contents == []
        assert response.content_type == 'application/json'
        assert resp['content'] == test_corpus_content
        assert corpus_form_ids == sorted(form_ids)

        # Update the corpus as the contributor -- now we should have one backup
        dbsession.expire(corpus)
        params = self.corpus_create_params.copy()
        params.update({
            'name': 'Corpus',
            'description': 'Covers a little less data.',
            'content': test_corpus_half_content
        })
        params = json.dumps(params)
        response = self.app.put(crps_url('update', id=corpus_id), params,
                                self.json_headers, self.extra_environ_contrib)
        resp = response.json_body
        corpus_count = new_corpus_count
        new_corpus_count = dbsession.query(Corpus).count()
        corpus = dbsession.query(Corpus).get(corpus_id)
        corpus_form_ids = sorted([f.id for f in corpus.forms])
        assert new_corpus_count == corpus_count
        assert resp['name'] == 'Corpus'
        assert resp['description'] == 'Covers a little less data.'
        assert response.content_type == 'application/json'
        assert resp['content'] == test_corpus_half_content
        assert corpus_form_ids == sorted(half_form_ids)

        # Update the corpus again -- now we should have two backups
        sleep(1)
        params = self.corpus_create_params.copy()
        params.update({
            'name': 'Corpus',
            'description': 'Covers a little less data.',
            'content': test_corpus_half_content,
            'tags': [tag_id]
        })
        params = json.dumps(params)
        response = self.app.put(crps_url('update', id=corpus_id), params,
                                self.json_headers, self.extra_environ_admin)
        resp = response.json_body
        corpus_count = new_corpus_count
        new_corpus_count = dbsession.query(Corpus).count()
        corpus = dbsession.query(Corpus).get(corpus_id)
        corpus_form_ids = sorted([f.id for f in corpus.forms])
        assert new_corpus_count == corpus_count
        assert resp['name'] == 'Corpus'
        assert resp['description'] == 'Covers a little less data.'
        assert response.content_type == 'application/json'
        assert resp['content'] == test_corpus_half_content
        assert corpus_form_ids == sorted(half_form_ids)

        all_corpus_backups = dbsession.query(CorpusBackup).order_by(
            CorpusBackup.id).all()
        all_corpus_backup_ids = [cb.id for cb in all_corpus_backups]
        all_corpus_backup_descriptions = [
            cb.description for cb in all_corpus_backups
        ]

        # Now request the corpus backups as either the contributor or the viewer and
        # expect to get them all.
        response = self.app.get(url('index'),
                                headers=self.json_headers,
                                extra_environ=self.extra_environ_contrib)
        resp = response.json_body
        assert len(resp) == 2
        assert response.content_type == 'application/json'
        assert resp[0]['modifier']['role'] == 'administrator'
        assert resp[1]['modifier']['role'] == 'contributor'

        # The admin should get them all too.
        response = self.app.get(url('index'),
                                headers=self.json_headers,
                                extra_environ=self.extra_environ_view)
        resp = response.json_body
        assert len(resp) == 2
        assert [cb['id'] for cb in resp] == all_corpus_backup_ids

        # Test the paginator GET params.
        paginator = {'items_per_page': 1, 'page': 2}
        response = self.app.get(url('index'),
                                paginator,
                                headers=self.json_headers,
                                extra_environ=self.extra_environ_admin)
        resp = response.json_body
        assert len(resp['items']) == 1
        assert resp['paginator']['count'] == 2
        assert response.content_type == 'application/json'
        assert resp['items'][0]['id'] == all_corpus_backup_ids[1]

        # Test the order_by GET params.
        order_by_params = {
            'order_by_model': 'CorpusBackup',
            'order_by_attribute': 'id',
            'order_by_direction': 'desc'
        }
        response = self.app.get(url('index'),
                                order_by_params,
                                headers=self.json_headers,
                                extra_environ=self.extra_environ_admin)
        resp = response.json_body
        result_set = list(reversed(all_corpus_backup_ids))
        assert [cb['id'] for cb in resp] == result_set

        # Test the order_by *with* paginator.
        params = {
            'order_by_model': 'CorpusBackup',
            'order_by_attribute': 'id',
            'order_by_direction': 'desc',
            'items_per_page': 1,
            'page': 1
        }
        response = self.app.get(url('index'),
                                params,
                                headers=self.json_headers,
                                extra_environ=self.extra_environ_admin)
        resp = response.json_body
        assert result_set[0] == resp['items'][0]['id']

        # Now test the show action:

        # Get a specific corpus backup.
        response = self.app.get(url('show', id=all_corpus_backup_ids[0]),
                                headers=self.json_headers,
                                extra_environ=self.extra_environ_admin)
        resp = response.json_body
        assert resp['description'] == 'Covers a lot of the data.'
        assert resp['content'] == test_corpus_content
        assert response.content_type == 'application/json'

        # A nonexistent cb id will return a 404 error
        response = self.app.get(url('show', id=100987),
                                headers=self.json_headers,
                                extra_environ=self.extra_environ_view,
                                status=404)
        resp = response.json_body
        assert resp['error'] == 'There is no corpus backup with id 100987'
        assert response.content_type == 'application/json'

        # Test the search action
        add_SEARCH_to_web_test_valid_methods()

        # A search on corpus backup titles using POST /corpusbackups/search
        json_query = json.dumps({
            'query': {
                'filter': ['CorpusBackup', 'description', 'like', '%less%']
            }
        })
        response = self.app.post(url('search_post'),
                                 json_query,
                                 self.json_headers,
                                 extra_environ=self.extra_environ_admin)
        resp = response.json_body
        result_set = [
            name for name in all_corpus_backup_descriptions if 'less' in name
        ]
        assert len(resp) == len(result_set) == 1
        assert resp[0]['description'] == result_set[0]
        assert response.content_type == 'application/json'

        # A search on corpus backup titles using SEARCH /corpusbackups
        json_query = json.dumps({
            'query': {
                'filter': ['CorpusBackup', 'description', 'like', '%less%']
            }
        })
        response = self.app.request(url('search'),
                                    method='SEARCH',
                                    body=json_query.encode('utf8'),
                                    headers=self.json_headers,
                                    environ=self.extra_environ_admin)
        resp = response.json_body
        assert len(resp) == len(result_set) == 1
        assert resp[0]['description'] == result_set[0]
        assert response.content_type == 'application/json'

        # Attempting to call edit/new/create/delete/update on a read-only resource
        # will return a 404 response
        response = self.app.get(url('edit', id=2232),
                                status=404,
                                extra_environ=self.extra_environ_admin)
        assert response.json_body['error'] == 'This resource is read-only.'
        response = self.app.get(url('new', id=2232),
                                status=404,
                                extra_environ=self.extra_environ_admin)
        assert response.json_body['error'] == 'This resource is read-only.'
        response = self.app.post(url('create'),
                                 status=404,
                                 extra_environ=self.extra_environ_admin)
        assert response.json_body['error'] == 'This resource is read-only.'
        response = self.app.put(url('update', id=2232),
                                status=404,
                                extra_environ=self.extra_environ_admin)
        assert response.json_body['error'] == 'This resource is read-only.'
        response = self.app.delete(url('delete', id=2232),
                                   status=404,
                                   extra_environ=self.extra_environ_admin)
        assert response.json_body['error'] == 'This resource is read-only.'
        assert response.content_type == 'application/json'