Example #1
0
def create_new_morpheme_language_model(data):
    """Create a new morpheme language model.

    :param dict data: the data for the morpheme language model to be created.
    :returns: an SQLAlchemy model object representing the morpheme language model.

    """
    morpheme_language_model = MorphemeLanguageModel(
        parent_directory = h.get_OLD_directory_path('morphemelanguagemodels', config=config),
        rare_delimiter = h.rare_delimiter,
        start_symbol = h.lm_start,
        end_symbol = h.lm_end,
        morpheme_delimiters = h.get_morpheme_delimiters(type_=u'unicode'),
        UUID = unicode(uuid4()),
        name = h.normalize(data['name']),
        description = h.normalize(data['description']),
        enterer = session['user'],
        modifier = session['user'],
        datetime_modified = h.now(),
        datetime_entered = h.now(),
        vocabulary_morphology = data['vocabulary_morphology'],
        corpus = data['corpus'],
        toolkit = data['toolkit'],
        order = data['order'],
        smoothing = data['smoothing'],
        categorial = data['categorial']
    )
    return morpheme_language_model
Example #2
0
def serve_file(id, reduced=False):
    """Serve the content (binary data) of a file.
    
    :param str id: the ``id`` value of the file whose file data will be served.
    :param bool reduced: toggles serving of file data or reduced-size file data.

    """
    file = Session.query(File).options(subqueryload(File.parent_file)).get(id)
    if getattr(file, 'parent_file', None):
        file = file.parent_file
    elif getattr(file, 'url', None):
        response.status_int = 400
        return json.dumps({'error': u'The content of file %s is stored elsewhere at %s' % (id, file.url)})
    if file:
        files_dir = h.get_OLD_directory_path('files', config=config)
        if reduced:
            filename = getattr(file, 'lossy_filename', None)
            if not filename:
                response.status_int = 404
                return json.dumps({'error': u'There is no size-reduced copy of file %s' % id})
            file_path = os.path.join(files_dir, 'reduced_files', filename)
        else:
            file_path = os.path.join(files_dir, file.filename)
        unrestricted_users = h.get_unrestricted_users()
        if h.user_is_authorized_to_access_model(session['user'], file, unrestricted_users):
            return forward(FileApp(file_path))
        else:
            response.status_int = 403
            return json.dumps(h.unauthorized_msg)
    else:
        response.status_int = 404
        return json.dumps({'error': 'There is no file with id %s' % id})
Example #3
0
def create_new_morphology(data):
    """Create a new morphology.

    :param dict data: the data for the morphology to be created.
    :returns: an SQLAlchemy model object representing the morphology.

    """
    morphology = Morphology(
        parent_directory = h.get_OLD_directory_path('morphologies', config=config),
        word_boundary_symbol = h.word_boundary_symbol,
        morpheme_delimiters = h.get_morpheme_delimiters(type_=u'unicode'),
        rare_delimiter = h.rare_delimiter,
        UUID = unicode(uuid4()),
        name = h.normalize(data['name']),
        description = h.normalize(data['description']),
        enterer = session['user'],
        modifier = session['user'],
        datetime_modified = h.now(),
        datetime_entered = h.now(),
        lexicon_corpus = data['lexicon_corpus'],
        rules_corpus = data['rules_corpus'],
        script_type = data['script_type'],
        extract_morphemes_from_rules_corpus = data['extract_morphemes_from_rules_corpus'],
        rules = data['rules'],
        rich_upper = data['rich_upper'],
        rich_lower = data['rich_lower'],
        include_unknowns = data['include_unknowns']
    )
    return morphology
Example #4
0
def create_new_morpheme_language_model(data):
    """Create a new morpheme language model.

    :param dict data: the data for the morpheme language model to be created.
    :returns: an SQLAlchemy model object representing the morpheme language model.

    """
    morpheme_language_model = MorphemeLanguageModel(
        parent_directory=h.get_OLD_directory_path('morphemelanguagemodels',
                                                  config=config),
        rare_delimiter=h.rare_delimiter,
        start_symbol=h.lm_start,
        end_symbol=h.lm_end,
        morpheme_delimiters=h.get_morpheme_delimiters(type_=u'unicode'),
        UUID=unicode(uuid4()),
        name=h.normalize(data['name']),
        description=h.normalize(data['description']),
        enterer=session['user'],
        modifier=session['user'],
        datetime_modified=h.now(),
        datetime_entered=h.now(),
        vocabulary_morphology=data['vocabulary_morphology'],
        corpus=data['corpus'],
        toolkit=data['toolkit'],
        order=data['order'],
        smoothing=data['smoothing'],
        categorial=data['categorial'])
    return morpheme_language_model
Example #5
0
    def __setattrs__(self):
        self.extra_environ_view = {'test.authentication.role': u'viewer'}
        self.extra_environ_contrib = {'test.authentication.role': u'contributor'}
        self.extra_environ_admin = {'test.authentication.role': u'administrator'}
        self.extra_environ_view_appset = {'test.authentication.role': u'viewer',
                                            'test.application_settings': True}
        self.extra_environ_contrib_appset = {'test.authentication.role': u'contributor',
                                            'test.application_settings': True}
        self.extra_environ_admin_appset = {'test.authentication.role': u'administrator',
                                            'test.application_settings': True}

        self.json_headers = {'Content-Type': 'application/json'}

        config = self.config = appconfig('config:test.ini', relative_to='.')
        self.here = config['here']
        self.files_path = h.get_OLD_directory_path('files', config=config)
        self.reduced_files_path = h.get_OLD_directory_path('reduced_files', config=config)
        self.test_files_path = os.path.join(self.here, 'onlinelinguisticdatabase', 'tests',
                             'data', 'files')
        self.create_reduced_size_file_copies = asbool(config.get(
            'create_reduced_size_file_copies', False))
        self.preferred_lossy_audio_format = config.get('preferred_lossy_audio_format', 'ogg')
        self.corpora_path = h.get_OLD_directory_path('corpora', config=config)
        self.test_datasets_path = os.path.join(self.here, 'onlinelinguisticdatabase',
                            'tests', 'data', 'datasets')
        self.test_scripts_path = os.path.join(self.here, 'onlinelinguisticdatabase',
                            'tests', 'scripts')
        self.loremipsum100_path = os.path.join(self.test_datasets_path, 'loremipsum_100.txt')
        self.loremipsum1000_path = os.path.join(self.test_datasets_path , 'loremipsum_1000.txt')
        self.loremipsum10000_path = os.path.join(self.test_datasets_path, 'loremipsum_10000.txt')
        self.users_path = h.get_OLD_directory_path('users', config=config)
        self.morphologies_path = h.get_OLD_directory_path('morphologies', config=config)
        self.morphological_parsers_path = h.get_OLD_directory_path('morphological_parsers', config=config)
        self.phonologies_path = h.get_OLD_directory_path('phonologies', config=config)
        self.morpheme_language_models_path = h.get_OLD_directory_path('morpheme_language_models', config=config)
        self.test_phonologies_path = os.path.join(self.here, 'onlinelinguisticdatabase',
                            'tests', 'data', 'phonologies')
        self.test_phonology_script_path = os.path.join(
                self.test_phonologies_path, 'test_phonology.script')
        self.test_malformed_phonology_script_path = os.path.join(
                self.test_phonologies_path, 'test_phonology_malformed.script')
        self.test_phonology_no_phonology_script_path = os.path.join(
                self.test_phonologies_path, 'test_phonology_malformed.script')
        self.test_medium_phonology_script_path = os.path.join(
                self.test_phonologies_path, 'test_phonology_medium.script')
        self.test_large_phonology_script_path = os.path.join(
                self.test_phonologies_path, 'test_phonology_large.script')
        self.test_phonology_testless_script_path = os.path.join(
                self.test_phonologies_path, 'test_phonology_no_tests.script')
        self.test_morphologies_path = os.path.join(self.here, 'onlinelinguisticdatabase',
                            'tests', 'data', 'morphologies')
        self.test_morphophonologies_path = os.path.join(self.here, 'onlinelinguisticdatabase',
                            'tests', 'data', 'morphophonologies')
Example #6
0
def delete_file(file):
    """Delete a file model.

    :param file: a file model object to delete.
    :returns: ``None``.

    This deletes the file model object from the database as well as any binary
    files associated with it that are stored on the filesystem.

    """
    if getattr(file, 'filename', None):
        file_path = os.path.join(h.get_OLD_directory_path('files', config=config),
                                file.filename)
        os.remove(file_path)
    if getattr(file, 'lossy_filename', None):
        file_path = os.path.join(h.get_OLD_directory_path('reduced_files', config=config),
                                file.lossy_filename)
        os.remove(file_path)
    Session.delete(file)
    Session.commit()
Example #7
0
def create_plain_file():
    """Create a local file using data from a ``Content-Type: multipart/form-data`` request.

    :param request.POST['filedata']: a ``cgi.FieldStorage`` object containing
        the file data.
    :param str request.POST['filename']: the name of the binary file.
    :returns: an SQLAlchemy model object representing the file.

    .. note::
    
        The validator expects ``request.POST`` to encode list input via the
        ``formencode.variabledecode.NestedVariables`` format.  E.g., a list of
        form ``id`` values would be provided as values to keys with names like
        ``'forms-0'``, ``'forms-1'``, ``'forms-2'``, etc.

    """
    values = dict(request.params)
    filedata = request.POST.get('filedata')
    if not hasattr(filedata, 'file'):
        raise InvalidFieldStorageObjectError
    if not values.get('filename'):
        values['filename'] = os.path.split(filedata.filename)[-1]
    values['filedata_first_KB'] = filedata.value[:1024]
    schema = FileCreateWithFiledataSchema()
    data = schema.to_python(values)

    file = File()
    file.filename = h.normalize(data['filename'])
    file.MIME_type = data['MIME_type']

    files_path = h.get_OLD_directory_path('files', config=config)
    file_path = os.path.join(files_path, file.filename)
    file_object, file_path = get_unique_file_path(file_path)
    file.filename = os.path.split(file_path)[-1]
    file.name = file.filename
    shutil.copyfileobj(filedata.file, file_object)
    filedata.file.close()
    file_object.close()
    file.size = os.path.getsize(file_path)

    file = add_standard_metadata(file, data)

    return file
Example #8
0
def create_new_phonology(data):
    """Create a new phonology.

    :param dict data: the data for the phonology to be created.
    :returns: an SQLAlchemy model object representing the phonology.

    """
    phonology = Phonology(
        parent_directory = h.get_OLD_directory_path('phonologies', config=config),
        word_boundary_symbol = h.word_boundary_symbol,
        UUID = unicode(uuid4()),
        name = h.normalize(data['name']),
        description = h.normalize(data['description']),
        script = h.normalize(data['script']).replace(u'\r', u''),  # normalize or not?
        enterer = session['user'],
        modifier = session['user'],
        datetime_modified = h.now(),
        datetime_entered = h.now()
    )
    return phonology
Example #9
0
def create_new_morphological_parser(data):
    """Create a new morphological parser.

    :param dict data: the data for the morphological parser to be created.
    :returns: an SQLAlchemy model object representing the morphological parser.

    """
    morphological_parser = MorphologicalParser(
        parent_directory = h.get_OLD_directory_path('morphologicalparsers', config=config),
        UUID = unicode(uuid4()),
        name = h.normalize(data['name']),
        description = h.normalize(data['description']),
        enterer = session['user'],
        modifier = session['user'],
        datetime_modified = h.now(),
        datetime_entered = h.now(),
        phonology = data['phonology'],
        morphology = data['morphology'],
        language_model = data['language_model']
    )
    return morphological_parser
Example #10
0
def create_new_morphological_parser(data):
    """Create a new morphological parser.

    :param dict data: the data for the morphological parser to be created.
    :returns: an SQLAlchemy model object representing the morphological parser.

    """
    morphological_parser = MorphologicalParser(
        parent_directory=h.get_OLD_directory_path('morphologicalparsers',
                                                  config=config),
        UUID=unicode(uuid4()),
        name=h.normalize(data['name']),
        description=h.normalize(data['description']),
        enterer=session['user'],
        modifier=session['user'],
        datetime_modified=h.now(),
        datetime_entered=h.now(),
        phonology=data['phonology'],
        morphology=data['morphology'],
        language_model=data['language_model'])
    return morphological_parser
Example #11
0
def create_base64_file(data):
    """Create a local file using data from a ``Content-Type: application/json`` request.

    :param dict data: the data to create the file model.
    :param str data['base64_encoded_file']: Base64-encoded file data.
    :returns: an SQLAlchemy model object representing the file.

    """

    data['MIME_type'] = u''  # during validation, the schema will set a proper value based on the base64_encoded_file or filename attribute
    schema = FileCreateWithBase64EncodedFiledataSchema()
    state = h.State()
    state.full_dict = data
    state.user = session['user']
    data = schema.to_python(data, state)

    file = File()
    file.MIME_type = data['MIME_type']
    file.filename = h.normalize(data['filename'])

    file = add_standard_metadata(file, data)

    # Write the file to disk (making sure it's unique and thereby potentially)
    # modifying file.filename; and calculate file.size.
    file_data = data['base64_encoded_file']     # base64-decoded during validation
    files_path = h.get_OLD_directory_path('files', config=config)
    file_path = os.path.join(files_path, file.filename)
    file_object, file_path = get_unique_file_path(file_path)
    file.filename = os.path.split(file_path)[-1]
    file.name = file.filename
    file_object.write(file_data)
    file_object.close()
    file_data = None
    file.size = os.path.getsize(file_path)

    file = restrict_file_by_forms(file)
    return file
Example #12
0
def get_corpus_dir_path(corpus):
    return os.path.join(h.get_OLD_directory_path('corpora', config=config),
                        'corpus_%d' % corpus.id)
Example #13
0
def get_corpus_dir_path(corpus):
    return os.path.join(h.get_OLD_directory_path('corpora', config=config),
                        'corpus_%d' % corpus.id)
Example #14
0
    def __setattrs__(self):
        self.extra_environ_view = {'test.authentication.role': u'viewer'}
        self.extra_environ_contrib = {
            'test.authentication.role': u'contributor'
        }
        self.extra_environ_admin = {
            'test.authentication.role': u'administrator'
        }
        self.extra_environ_view_appset = {
            'test.authentication.role': u'viewer',
            'test.application_settings': True
        }
        self.extra_environ_contrib_appset = {
            'test.authentication.role': u'contributor',
            'test.application_settings': True
        }
        self.extra_environ_admin_appset = {
            'test.authentication.role': u'administrator',
            'test.application_settings': True
        }

        self.json_headers = {'Content-Type': 'application/json'}

        config = self.config = appconfig('config:test.ini', relative_to='.')
        self.here = config['here']
        self.files_path = h.get_OLD_directory_path('files', config=config)
        self.reduced_files_path = h.get_OLD_directory_path('reduced_files',
                                                           config=config)
        self.test_files_path = os.path.join(self.here,
                                            'onlinelinguisticdatabase',
                                            'tests', 'data', 'files')
        self.create_reduced_size_file_copies = asbool(
            config.get('create_reduced_size_file_copies', False))
        self.preferred_lossy_audio_format = config.get(
            'preferred_lossy_audio_format', 'ogg')
        self.corpora_path = h.get_OLD_directory_path('corpora', config=config)
        self.test_datasets_path = os.path.join(self.here,
                                               'onlinelinguisticdatabase',
                                               'tests', 'data', 'datasets')
        self.test_scripts_path = os.path.join(self.here,
                                              'onlinelinguisticdatabase',
                                              'tests', 'scripts')
        self.loremipsum100_path = os.path.join(self.test_datasets_path,
                                               'loremipsum_100.txt')
        self.loremipsum1000_path = os.path.join(self.test_datasets_path,
                                                'loremipsum_1000.txt')
        self.loremipsum10000_path = os.path.join(self.test_datasets_path,
                                                 'loremipsum_10000.txt')
        self.users_path = h.get_OLD_directory_path('users', config=config)
        self.morphologies_path = h.get_OLD_directory_path('morphologies',
                                                          config=config)
        self.morphological_parsers_path = h.get_OLD_directory_path(
            'morphological_parsers', config=config)
        self.phonologies_path = h.get_OLD_directory_path('phonologies',
                                                         config=config)
        self.morpheme_language_models_path = h.get_OLD_directory_path(
            'morpheme_language_models', config=config)
        self.test_phonologies_path = os.path.join(self.here,
                                                  'onlinelinguisticdatabase',
                                                  'tests', 'data',
                                                  'phonologies')
        self.test_phonology_script_path = os.path.join(
            self.test_phonologies_path, 'test_phonology.script')
        self.test_malformed_phonology_script_path = os.path.join(
            self.test_phonologies_path, 'test_phonology_malformed.script')
        self.test_phonology_no_phonology_script_path = os.path.join(
            self.test_phonologies_path, 'test_phonology_malformed.script')
        self.test_medium_phonology_script_path = os.path.join(
            self.test_phonologies_path, 'test_phonology_medium.script')
        self.test_large_phonology_script_path = os.path.join(
            self.test_phonologies_path, 'test_phonology_large.script')
        self.test_phonology_testless_script_path = os.path.join(
            self.test_phonologies_path, 'test_phonology_no_tests.script')
        self.test_morphologies_path = os.path.join(self.here,
                                                   'onlinelinguisticdatabase',
                                                   'tests', 'data',
                                                   'morphologies')
        self.test_morphophonologies_path = os.path.join(
            self.here, 'onlinelinguisticdatabase', 'tests', 'data',
            'morphophonologies')