Example #1
0
def create_new_morpheme_language_model(data):
    """Create a new morpheme language model.

    :param dict data: the data for the morpheme language model to be created.
    :returns: an SQLAlchemy model object representing the morpheme language model.

    """
    morpheme_language_model = MorphemeLanguageModel(
        parent_directory = h.get_OLD_directory_path('morphemelanguagemodels', config=config),
        rare_delimiter = h.rare_delimiter,
        start_symbol = h.lm_start,
        end_symbol = h.lm_end,
        morpheme_delimiters = h.get_morpheme_delimiters(type_=u'unicode'),
        UUID = unicode(uuid4()),
        name = h.normalize(data['name']),
        description = h.normalize(data['description']),
        enterer = session['user'],
        modifier = session['user'],
        datetime_modified = h.now(),
        datetime_entered = h.now(),
        vocabulary_morphology = data['vocabulary_morphology'],
        corpus = data['corpus'],
        toolkit = data['toolkit'],
        order = data['order'],
        smoothing = data['smoothing'],
        categorial = data['categorial']
    )
    return morpheme_language_model
Example #2
0
def create_new_morphology(data):
    """Create a new morphology.

    :param dict data: the data for the morphology to be created.
    :returns: an SQLAlchemy model object representing the morphology.

    """
    morphology = Morphology(
        parent_directory = h.get_OLD_directory_path('morphologies', config=config),
        word_boundary_symbol = h.word_boundary_symbol,
        morpheme_delimiters = h.get_morpheme_delimiters(type_=u'unicode'),
        rare_delimiter = h.rare_delimiter,
        UUID = unicode(uuid4()),
        name = h.normalize(data['name']),
        description = h.normalize(data['description']),
        enterer = session['user'],
        modifier = session['user'],
        datetime_modified = h.now(),
        datetime_entered = h.now(),
        lexicon_corpus = data['lexicon_corpus'],
        rules_corpus = data['rules_corpus'],
        script_type = data['script_type'],
        extract_morphemes_from_rules_corpus = data['extract_morphemes_from_rules_corpus'],
        rules = data['rules'],
        rich_upper = data['rich_upper'],
        rich_lower = data['rich_lower'],
        include_unknowns = data['include_unknowns']
    )
    return morphology
Example #3
0
def create_new_morpheme_language_model(data):
    """Create a new morpheme language model.

    :param dict data: the data for the morpheme language model to be created.
    :returns: an SQLAlchemy model object representing the morpheme language model.

    """
    morpheme_language_model = MorphemeLanguageModel(
        parent_directory=h.get_OLD_directory_path('morphemelanguagemodels',
                                                  config=config),
        rare_delimiter=h.rare_delimiter,
        start_symbol=h.lm_start,
        end_symbol=h.lm_end,
        morpheme_delimiters=h.get_morpheme_delimiters(type_=u'unicode'),
        UUID=unicode(uuid4()),
        name=h.normalize(data['name']),
        description=h.normalize(data['description']),
        enterer=session['user'],
        modifier=session['user'],
        datetime_modified=h.now(),
        datetime_entered=h.now(),
        vocabulary_morphology=data['vocabulary_morphology'],
        corpus=data['corpus'],
        toolkit=data['toolkit'],
        order=data['order'],
        smoothing=data['smoothing'],
        categorial=data['categorial'])
    return morpheme_language_model
Example #4
0
def update_morphology(morphology, data):
    """Update a morphology.

    :param morphology: the morphology model to be updated.
    :param dict data: representation of the updated morphology.
    :returns: the updated morphology model or, if ``changed`` has not been set
        to ``True``, ``False``.

    """
    changed = False
    changed = morphology.set_attr('name', h.normalize(data['name']), changed)
    changed = morphology.set_attr('description', h.normalize(data['description']), changed)
    changed = morphology.set_attr('lexicon_corpus', data['lexicon_corpus'], changed)
    changed = morphology.set_attr('rules_corpus', data['rules_corpus'], changed)
    changed = morphology.set_attr('script_type', data['script_type'], changed)
    changed = morphology.set_attr('extract_morphemes_from_rules_corpus', data['extract_morphemes_from_rules_corpus'], changed)
    changed = morphology.set_attr('rules', data['rules'], changed)
    changed = morphology.set_attr('rich_upper', data['rich_upper'], changed)
    changed = morphology.set_attr('rich_lower', data['rich_lower'], changed)
    changed = morphology.set_attr('include_unknowns', data['include_unknowns'], changed)
    changed = morphology.set_attr('rare_delimiter', h.rare_delimiter, changed)
    changed = morphology.set_attr('word_boundary_symbol', h.word_boundary_symbol, changed)
    if changed:
        session['user'] = Session.merge(session['user'])
        morphology.modifier = session['user']
        morphology.datetime_modified = h.now()
        return morphology
    return changed
Example #5
0
def update_corpus(corpus, data):
    """Update a corpus.

    :param corpus: the corpus model to be updated.
    :param dict data: representation of the updated corpus.
    :returns: the updated corpus model or, if ``changed`` has not been set
        to ``True``, ``False``.

    """
    changed = False
    # Unicode Data
    changed = corpus.set_attr('name', h.normalize(data['name']), changed)
    changed = corpus.set_attr('description', h.normalize(data['description']),
                              changed)
    changed = corpus.set_attr('content', data['content'], changed)
    changed = corpus.set_attr('form_search', data['form_search'], changed)

    tags_to_add = [t for t in data['tags'] if t]
    forms_to_add = [f for f in data['forms'] if f]
    if set(tags_to_add) != set(corpus.tags):
        corpus.tags = tags_to_add
        changed = True
    if set(forms_to_add) != set(corpus.forms):
        corpus.forms = forms_to_add
        changed = True

    if changed:
        session['user'] = Session.merge(session['user'])
        corpus.modifier = session['user']
        corpus.datetime_modified = h.now()
        return corpus
    return changed
Example #6
0
def update_morpheme_language_model(morpheme_language_model, data):
    """Update a morpheme language model.

    :param morpheme_language_model: the morpheme language model model to be updated.
    :param dict data: representation of the updated morpheme language model.
    :returns: the updated morpheme language model model or, if ``changed`` has not been set
        to ``True``, ``False``.

    """
    changed = False
    changed = morpheme_language_model.set_attr('name', h.normalize(data['name']), changed)
    changed = morpheme_language_model.set_attr('description', h.normalize(data['description']), changed)
    changed = morpheme_language_model.set_attr('vocabulary_morphology', data['vocabulary_morphology'], changed)
    changed = morpheme_language_model.set_attr('corpus', data['corpus'], changed)
    changed = morpheme_language_model.set_attr('toolkit', data['toolkit'], changed)
    changed = morpheme_language_model.set_attr('order', data['order'], changed)
    changed = morpheme_language_model.set_attr('smoothing', data['smoothing'], changed)
    changed = morpheme_language_model.set_attr('categorial', data['categorial'], changed)
    changed = morpheme_language_model.set_attr('rare_delimiter', h.rare_delimiter, changed)
    changed = morpheme_language_model.set_attr('start_symbol', h.lm_start, changed)
    changed = morpheme_language_model.set_attr('end_symbol', h.lm_end, changed)
    if changed:
        session['user'] = Session.merge(session['user'])
        morpheme_language_model.modifier = session['user']
        morpheme_language_model.datetime_modified = h.now()
        return morpheme_language_model
    return changed
Example #7
0
def update_morphological_parser(morphological_parser, data):
    """Update a morphological parser.

    :param morphological_parser: the morphological parser model to be updated.
    :param dict data: representation of the updated morphological parser.
    :returns: the updated morphological parser model or, if ``changed`` has not been set
        to ``True``, ``False``.

    """
    changed = False
    changed = morphological_parser.set_attr('name', h.normalize(data['name']),
                                            changed)
    changed = morphological_parser.set_attr('description',
                                            h.normalize(data['description']),
                                            changed)
    changed = morphological_parser.set_attr('phonology', data['phonology'],
                                            changed)
    changed = morphological_parser.set_attr('morphology', data['morphology'],
                                            changed)
    changed = morphological_parser.set_attr('language_model',
                                            data['language_model'], changed)
    if changed:
        session['user'] = Session.merge(session['user'])
        morphological_parser.modifier = session['user']
        morphological_parser.datetime_modified = h.now()
        return morphological_parser
    return changed
Example #8
0
def update_corpus(corpus, data):
    """Update a corpus.

    :param corpus: the corpus model to be updated.
    :param dict data: representation of the updated corpus.
    :returns: the updated corpus model or, if ``changed`` has not been set
        to ``True``, ``False``.

    """
    changed = False
    # Unicode Data
    changed = corpus.set_attr('name', h.normalize(data['name']), changed)
    changed = corpus.set_attr('description', h.normalize(data['description']), changed)
    changed = corpus.set_attr('content', data['content'], changed)
    changed = corpus.set_attr('form_search', data['form_search'], changed)

    tags_to_add = [t for t in data['tags'] if t]
    forms_to_add = [f for f in data['forms'] if f]
    if set(tags_to_add) != set(corpus.tags):
        corpus.tags = tags_to_add
        changed = True
    if set(forms_to_add) != set(corpus.forms):
        corpus.forms = forms_to_add
        changed = True

    if changed:
        session['user'] = Session.merge(session['user'])
        corpus.modifier = session['user']
        corpus.datetime_modified = h.now()
        return corpus
    return changed
Example #9
0
def add_standard_metadata(file, data):
    """Add the standard metadata to the file model using the data dictionary.
    
    :param file: file model object
    :param dict data: dictionary containing file attribute values.
    :returns: the updated file model object.
    
    """
    file.description = h.normalize(data['description'])
    file.utterance_type = data['utterance_type']
    file.date_elicited = data['date_elicited']
    if data['elicitor']:
        file.elicitor = data['elicitor']
    if data['speaker']:
        file.speaker = data['speaker']
    file.tags = [t for t in data['tags'] if t]
    file.forms = [f for f in data['forms'] if f]
    now = h.now()
    file.datetime_entered = now
    file.datetime_modified = now
    # Because of SQLAlchemy's uniqueness constraints, we may need to set the
    # enterer to the elicitor.
    if data['elicitor'] and (data['elicitor'].id == session['user'].id):
        file.enterer = data['elicitor']
    else:
        file.enterer = session['user']
    return file
Example #10
0
def create_new_corpus(data):
    """Create a new corpus.

    :param dict data: the data for the corpus to be created.
    :returns: an SQLAlchemy model object representing the corpus.

    .. note::
    
        I have opted not to complicate corpora by giving meaning to the
        "restricted" tag where they are concerned.  Given that a corpus' forms
        can be determined by a form search model and are therefore variable, it
        does not seem practical to toggle restricted status based on the status
        of any number of forms.  The corpus files that may be associated to a
        corpus by requesting ``PUT /corpora/id/writetofile`` may, however, be
        restricted if a restricted form is written to file.

    """
    corpus = Corpus()
    corpus.UUID = unicode(uuid4())
    corpus.name = h.normalize(data['name'])
    corpus.description = h.normalize(data['description'])
    corpus.content = data['content']
    corpus.form_search = data['form_search']
    corpus.forms = data['forms']
    corpus.tags = data['tags']
    corpus.enterer = corpus.modifier = session['user']
    corpus.datetime_modified = corpus.datetime_entered = h.now()
    return corpus
Example #11
0
def create_new_corpus(data):
    """Create a new corpus.

    :param dict data: the data for the corpus to be created.
    :returns: an SQLAlchemy model object representing the corpus.

    .. note::
    
        I have opted not to complicate corpora by giving meaning to the
        "restricted" tag where they are concerned.  Given that a corpus' forms
        can be determined by a form search model and are therefore variable, it
        does not seem practical to toggle restricted status based on the status
        of any number of forms.  The corpus files that may be associated to a
        corpus by requesting ``PUT /corpora/id/writetofile`` may, however, be
        restricted if a restricted form is written to file.

    """
    corpus = Corpus()
    corpus.UUID = unicode(uuid4())
    corpus.name = h.normalize(data['name'])
    corpus.description = h.normalize(data['description'])
    corpus.content = data['content']
    corpus.form_search = data['form_search']
    corpus.forms = data['forms']
    corpus.tags = data['tags']
    corpus.enterer = corpus.modifier = session['user']
    corpus.datetime_modified = corpus.datetime_entered = h.now()
    return corpus
Example #12
0
def compile_phonology(**kwargs):
    """Compile the foma script of a phonology and save it to the db with values that indicating compilation success.
    """
    phonology = Session.query(model.Phonology).get(kwargs['phonology_id'])
    phonology.compile(kwargs['timeout'])
    phonology.datetime_modified = h.now()
    phonology.modifier_id = kwargs['user_id']
    Session.commit()
Example #13
0
def create_new_phonology(data):
    """Create a new phonology.

    :param dict data: the data for the phonology to be created.
    :returns: an SQLAlchemy model object representing the phonology.

    """
    phonology = Phonology(
        parent_directory = h.get_OLD_directory_path('phonologies', config=config),
        word_boundary_symbol = h.word_boundary_symbol,
        UUID = unicode(uuid4()),
        name = h.normalize(data['name']),
        description = h.normalize(data['description']),
        script = h.normalize(data['script']).replace(u'\r', u''),  # normalize or not?
        enterer = session['user'],
        modifier = session['user'],
        datetime_modified = h.now(),
        datetime_entered = h.now()
    )
    return phonology
Example #14
0
def create_new_morphological_parser(data):
    """Create a new morphological parser.

    :param dict data: the data for the morphological parser to be created.
    :returns: an SQLAlchemy model object representing the morphological parser.

    """
    morphological_parser = MorphologicalParser(
        parent_directory=h.get_OLD_directory_path('morphologicalparsers',
                                                  config=config),
        UUID=unicode(uuid4()),
        name=h.normalize(data['name']),
        description=h.normalize(data['description']),
        enterer=session['user'],
        modifier=session['user'],
        datetime_modified=h.now(),
        datetime_entered=h.now(),
        phonology=data['phonology'],
        morphology=data['morphology'],
        language_model=data['language_model'])
    return morphological_parser
Example #15
0
def create_new_morphological_parser(data):
    """Create a new morphological parser.

    :param dict data: the data for the morphological parser to be created.
    :returns: an SQLAlchemy model object representing the morphological parser.

    """
    morphological_parser = MorphologicalParser(
        parent_directory = h.get_OLD_directory_path('morphologicalparsers', config=config),
        UUID = unicode(uuid4()),
        name = h.normalize(data['name']),
        description = h.normalize(data['description']),
        enterer = session['user'],
        modifier = session['user'],
        datetime_modified = h.now(),
        datetime_entered = h.now(),
        phonology = data['phonology'],
        morphology = data['morphology'],
        language_model = data['language_model']
    )
    return morphological_parser
Example #16
0
def generate_and_compile_parser(**kwargs):
    """Write the parser's morphophonology FST script to file and compile it if ``compile_`` is True.
    Generate the language model and pickle it.

    """
    parser = Session.query(model.MorphologicalParser).get(kwargs['morphological_parser_id'])
    parser.changed = False
    parser.write()
    if kwargs.get('compile', True):
        parser.compile(kwargs['timeout'])
    parser.modifier_id = kwargs['user_id']
    parser.datetime_modified = h.now()
    if parser.changed:
        parser.cache.clear(persist=True)
    Session.commit()
Example #17
0
    def update(self, id):
        """Update a user's remembered forms and return them.

        :URL: ``PUT /rememberedforms/id``
        :Request body: JSON object of the form ``{"forms": [...]}`` where the
            array contains the form ``id`` values that will constitute the
            user's ``remembered_forms`` collection after update.
        :param str id: the ``id`` value of the user model whose
            ``remembered_forms`` attribute is to be updated.
        :returns: the list of remembered forms of the user.

        .. note::

            Administrators can update any user's remembered forms;
            non-administrators can only update their own.

        """
        user = Session.query(User).options(subqueryload(
            User.remembered_forms)).get(id)
        if user:
            try:
                schema = FormIdsSchemaNullable
                values = json.loads(unicode(request.body, request.charset))
                data = schema.to_python(values)
                forms = [f for f in data['forms'] if f]
                accessible = h.user_is_authorized_to_access_model
                unrestricted_users = h.get_unrestricted_users()
                unrestricted_forms = [
                    f for f in forms if accessible(user, f, unrestricted_users)
                ]
                if set(user.remembered_forms) != set(unrestricted_forms):
                    user.remembered_forms = unrestricted_forms
                    user.datetime_modified = h.now()
                    Session.commit()
                    return user.remembered_forms
                else:
                    response.status_int = 400
                    return {
                        'error':
                        u'The update request failed because the submitted data were not new.'
                    }
            except h.JSONDecodeError:
                response.status_int = 400
                return h.JSONDecodeErrorResponse
            except Invalid, e:
                response.status_int = 400
                return {'errors': e.unpack_errors()}
Example #18
0
def create_new_keyboard(data):
    """Create a new keyboard.

    :param dict data: the data for the keyboard to be created.
    :returns: an SQLAlchemy model object representing the keyboard.

    """
    keyboard = Keyboard()
    keyboard.name = h.normalize(data['name'])
    keyboard.description = h.normalize(data['description'])
    keyboard.keyboard = h.normalize(data['keyboard'])

    # OLD-generated Data
    keyboard.datetime_entered = keyboard.datetime_modified = h.now()
    keyboard.enterer = keyboard.modifier = session['user']

    return keyboard
Example #19
0
def create_new_keyboard(data):
    """Create a new keyboard.

    :param dict data: the data for the keyboard to be created.
    :returns: an SQLAlchemy model object representing the keyboard.

    """
    keyboard = Keyboard()
    keyboard.name = h.normalize(data['name'])
    keyboard.description = h.normalize(data['description'])
    keyboard.keyboard = h.normalize(data['keyboard'])

    # OLD-generated Data
    keyboard.datetime_entered = keyboard.datetime_modified = h.now()
    keyboard.enterer = keyboard.modifier = session['user']

    return keyboard
Example #20
0
def compute_perplexity(**kwargs):
    """Evaluate the LM by attempting to calculate its perplexity and changing some attribute values to reflect the attempt.
    """
    lm = Session.query(model.MorphemeLanguageModel).get(kwargs['morpheme_language_model_id'])
    timeout = kwargs['timeout']
    iterations = 5
    try:
        lm.perplexity = lm.compute_perplexity(timeout, iterations)
    except Exception:
        lm.perplexity = None
    if lm.perplexity is None:
        lm.perplexity_computed = False
    else:
        lm.perplexity_computed = True
    lm.perplexity_attempt = unicode(uuid4())
    lm.modifier_id = kwargs['user_id']
    lm.datetime_modified = h.now()
    Session.commit()
Example #21
0
    def update(self, id):
        """Update a user's remembered forms and return them.

        :URL: ``PUT /rememberedforms/id``
        :Request body: JSON object of the form ``{"forms": [...]}`` where the
            array contains the form ``id`` values that will constitute the
            user's ``remembered_forms`` collection after update.
        :param str id: the ``id`` value of the user model whose
            ``remembered_forms`` attribute is to be updated.
        :returns: the list of remembered forms of the user.

        .. note::

            Administrators can update any user's remembered forms;
            non-administrators can only update their own.

        """
        user = Session.query(User).options(subqueryload(User.remembered_forms)).get(id)
        if user:
            try:
                schema = FormIdsSchemaNullable
                values = json.loads(unicode(request.body, request.charset))
                data = schema.to_python(values)
                forms = [f for f in data['forms'] if f]
                accessible = h.user_is_authorized_to_access_model
                unrestricted_users = h.get_unrestricted_users()
                unrestricted_forms = [f for f in forms
                                     if accessible(user, f, unrestricted_users)]
                if set(user.remembered_forms) != set(unrestricted_forms):
                    user.remembered_forms = unrestricted_forms
                    user.datetime_modified = h.now()
                    Session.commit()
                    return user.remembered_forms
                else:
                    response.status_int = 400
                    return {'error':
                        u'The update request failed because the submitted data were not new.'}
            except h.JSONDecodeError:
                response.status_int = 400
                return h.JSONDecodeErrorResponse
            except Invalid, e:
                response.status_int = 400
                return {'errors': e.unpack_errors()}
Example #22
0
def update_morphological_parser(morphological_parser, data):
    """Update a morphological parser.

    :param morphological_parser: the morphological parser model to be updated.
    :param dict data: representation of the updated morphological parser.
    :returns: the updated morphological parser model or, if ``changed`` has not been set
        to ``True``, ``False``.

    """
    changed = False
    changed = morphological_parser.set_attr('name', h.normalize(data['name']), changed)
    changed = morphological_parser.set_attr('description', h.normalize(data['description']), changed)
    changed = morphological_parser.set_attr('phonology', data['phonology'], changed)
    changed = morphological_parser.set_attr('morphology', data['morphology'], changed)
    changed = morphological_parser.set_attr('language_model', data['language_model'], changed)
    if changed:
        session['user'] = Session.merge(session['user'])
        morphological_parser.modifier = session['user']
        morphological_parser.datetime_modified = h.now()
        return morphological_parser
    return changed
Example #23
0
def update_keyboard(keyboard, data):
    """Update a keyboard.

    :param keyboard: the keyboard model to be updated.
    :param dict data: representation of the updated keyboard.
    :returns: the updated keyboard model or, if ``changed`` has not been set
        to ``True``, ``False``.

    """
    changed = False
    changed = keyboard.set_attr('name', h.normalize(data['name']), changed)
    changed = keyboard.set_attr('description',
        h.normalize(data['description']), changed)
    changed = keyboard.set_attr('keyboard',
        h.normalize(data['keyboard']), changed)
    if changed:
        keyboard.datetime_modified = h.now()
        session['user'] = Session.merge(session['user'])
        keyboard.modifier = session['user']
        return keyboard
    return changed
Example #24
0
def update_keyboard(keyboard, data):
    """Update a keyboard.

    :param keyboard: the keyboard model to be updated.
    :param dict data: representation of the updated keyboard.
    :returns: the updated keyboard model or, if ``changed`` has not been set
        to ``True``, ``False``.

    """
    changed = False
    changed = keyboard.set_attr('name', h.normalize(data['name']), changed)
    changed = keyboard.set_attr('description',
                                h.normalize(data['description']), changed)
    changed = keyboard.set_attr('keyboard', h.normalize(data['keyboard']),
                                changed)
    if changed:
        keyboard.datetime_modified = h.now()
        session['user'] = Session.merge(session['user'])
        keyboard.modifier = session['user']
        return keyboard
    return changed
Example #25
0
File: files.py Project: FieldDB/old
def add_standard_metadata(file, data):
    """Add the standard metadata to the file model using the data dictionary.
    
    :param file: file model object
    :param dict data: dictionary containing file attribute values.
    :returns: the updated file model object.
    
    """
    file.description = h.normalize(data['description'])
    file.utterance_type = data['utterance_type']
    file.date_elicited = data['date_elicited']
    if data['elicitor']:
        file.elicitor = data['elicitor']
    if data['speaker']:
        file.speaker = data['speaker']
    file.tags = [t for t in data['tags'] if t]
    file.forms = [f for f in data['forms'] if f]
    now = h.now()
    file.datetime_entered = now
    file.datetime_modified = now
    file.enterer = session['user']
    return file
Example #26
0
def update_phonology(phonology, data):
    """Update a phonology.

    :param page: the phonology model to be updated.
    :param dict data: representation of the updated phonology.
    :returns: the updated phonology model or, if ``changed`` has not been set
        to ``True``, ``False``.

    """
    changed = False
    # Unicode Data
    changed = phonology.set_attr('name', h.normalize(data['name']), changed)
    changed = phonology.set_attr('description', h.normalize(data['description']), changed)
    changed = phonology.set_attr('script', h.normalize(data['script']), changed)
    changed = phonology.set_attr('word_boundary_symbol', h.word_boundary_symbol, changed)

    if changed:
        session['user'] = Session.merge(session['user'])
        phonology.modifier = session['user']
        phonology.datetime_modified = h.now()
        return phonology
    return changed
Example #27
0
def update_morpheme_language_model(morpheme_language_model, data):
    """Update a morpheme language model.

    :param morpheme_language_model: the morpheme language model model to be updated.
    :param dict data: representation of the updated morpheme language model.
    :returns: the updated morpheme language model model or, if ``changed`` has not been set
        to ``True``, ``False``.

    """
    changed = False
    changed = morpheme_language_model.set_attr('name',
                                               h.normalize(data['name']),
                                               changed)
    changed = morpheme_language_model.set_attr(
        'description', h.normalize(data['description']), changed)
    changed = morpheme_language_model.set_attr('vocabulary_morphology',
                                               data['vocabulary_morphology'],
                                               changed)
    changed = morpheme_language_model.set_attr('corpus', data['corpus'],
                                               changed)
    changed = morpheme_language_model.set_attr('toolkit', data['toolkit'],
                                               changed)
    changed = morpheme_language_model.set_attr('order', data['order'], changed)
    changed = morpheme_language_model.set_attr('smoothing', data['smoothing'],
                                               changed)
    changed = morpheme_language_model.set_attr('categorial',
                                               data['categorial'], changed)
    changed = morpheme_language_model.set_attr('rare_delimiter',
                                               h.rare_delimiter, changed)
    changed = morpheme_language_model.set_attr('start_symbol', h.lm_start,
                                               changed)
    changed = morpheme_language_model.set_attr('end_symbol', h.lm_end, changed)
    if changed:
        session['user'] = Session.merge(session['user'])
        morpheme_language_model.modifier = session['user']
        morpheme_language_model.datetime_modified = h.now()
        return morpheme_language_model
    return changed
Example #28
0
            with codecs.open(corpus_file_path, 'w', 'utf8') as f:
                for id in form_references:
                    form = forms[id]
                    if not restricted and "restricted" in [t.name for t in form.tags]:
                        restricted = True
                    f.write(writer(form))
        gzipped_corpus_file_path = h.compress_file(corpus_file_path)
        create_tgrep2_corpus_file(gzipped_corpus_file_path, format_)
    except Exception, e:
        destroy_file(corpus_file_path)
        response.status_int = 400
        return error_msg(e)

    # Update/create the corpus_file object
    try:
        now = h.now()
        session['user'] = Session.merge(session['user'])
        user = session['user']
        corpus_filename = os.path.split(corpus_file_path)[1]
        if update:
            try:
                update_corpus_file(corpus, corpus_filename, user, now, restricted)
            except Exception:
                generate_new_corpus_file(corpus, corpus_filename, format_, user,
                                      now, restricted)
        else:
            generate_new_corpus_file(corpus, corpus_filename, format_, user, now,
                                  restricted)
    except Exception, e:
        destroy_file(corpus_file_path)
        response.status_int = 400
Example #29
0
    morphology = Session.query(model.Morphology).get(kwargs['morphology_id'])
    unknown_category = h.unknown_category
    try:
        morphology.write(unknown_category)
    except Exception, e:
        log.warn(e)
        pass
    if kwargs.get('compile', True):
        try:
            morphology.compile(kwargs['timeout'])
        except Exception, e:
            log.warn(e)
            pass
    morphology.generate_attempt = unicode(uuid4())
    morphology.modifier_id = kwargs['user_id']
    morphology.datetime_modified = h.now()
    Session.commit()

################################################################################
# MORPHEME LANGUAGE MODEL
################################################################################

def generate_language_model(**kwargs):
    """Write the requisite files (corpus, vocab, ARPA, LMTrie) of a morpheme LM to disk.

    :param str kwargs['morpheme_language_model_id']: ``id`` value of a morpheme LM.
    :param int/float kwargs['timeout']: seconds to allow for ARPA file creation.
    :param str kwargs['user_id']: ``id`` value of an OLD user.
    :returns: ``None``; side-effect is to change relevant attributes of LM object.

    """
Example #30
0
def update_collections_that_reference_this_collection(collection,
                                                      query_builder, **kwargs):
    """Update all collections that reference the input collection.
    
    :param collection: a collection model.
    :param query_builder: an :class:`SQLAQueryBuilder` instance.
    :param bool kwargs['contents_changed']: indicates whether the input
        collection's ``contents`` value has changed.
    :param bool kwargs['deleted']: indicates whether the input collection has
        just been deleted.
    :returns: ``None``

    Update the ``contents``, ``contents_unpacked``, ``html`` and/or ``form``
    attributes of every collection that references the input collection plus all
    of the collections that reference those collections, etc.  This function is
    called upon successful update and delete requests.

    If the contents of the ``collection`` have changed (i.e.,
    ``kwargs['contents_changed']==True``) , then retrieve all collections
    that reference ``collection`` and all collections that reference those
    referers, etc., and update their ``contents_unpacked``, ``html`` and
    ``forms`` attributes.

    If the ``collection`` has been deleted (i.e., ``kwargs['deleted']==True``),
    then recursively retrieve all collections referencing ``collection`` and
    update their ``contents``, ``contents_unpacked``, ``html`` and ``forms``
    attributes.

    If ``collection`` has just been tagged as restricted (i.e.,
    ``kwargs['restricted']==True``), then recursively restrict all collections
    that reference it.

    In all cases, update the ``datetime_modified`` value of every collection that
    recursively references ``collection``.

    """
    def update_contents_unpacked_etc(collection, **kwargs):
        deleted = kwargs.get('deleted', False)
        collection_id = kwargs.get('collection_id')
        if deleted:
            collection.contents = remove_references_to_this_collection(
                collection.contents, collection_id)
        collections_referenced = get_collections_referenced(
            collection.contents)
        collection.contents_unpacked = generate_contents_unpacked(
            collection.contents, collections_referenced)
        collection.html = h.get_HTML_from_contents(
            collection.contents_unpacked, collection.markup_language)
        collection.forms = [
            Session.query(Form).get(int(id)) for id in
            h.form_reference_pattern.findall(collection.contents_unpacked)
        ]

    def update_modification_values(collection, now):
        collection.datetime_modified = now
        session['user'] = Session.merge(session['user'])
        collection.modifier = session['user']

    restricted = kwargs.get('restricted', False)
    contents_changed = kwargs.get('contents_changed', False)
    deleted = kwargs.get('deleted', False)
    if restricted or contents_changed or deleted:
        collections_referencing_this_collection = get_collections_referencing_this_collection(
            collection, query_builder)
        collections_referencing_this_collection_dicts = [
            c.get_full_dict() for c in collections_referencing_this_collection
        ]
        now = h.now()
        if restricted:
            restricted_tag = h.get_restricted_tag()
            [
                c.tags.append(restricted_tag)
                for c in collections_referencing_this_collection
            ]
        if contents_changed:
            [
                update_contents_unpacked_etc(c)
                for c in collections_referencing_this_collection
            ]
        if deleted:
            [
                update_contents_unpacked_etc(c,
                                             collection_id=collection.id,
                                             deleted=True)
                for c in collections_referencing_this_collection
            ]
        [
            update_modification_values(c, now)
            for c in collections_referencing_this_collection
        ]
        [
            backup_collection(cd)
            for cd in collections_referencing_this_collection_dicts
        ]
        Session.add_all(collections_referencing_this_collection)
        Session.commit()
Example #31
0
def update_collections_that_reference_this_collection(collection, query_builder, **kwargs):
    """Update all collections that reference the input collection.
    
    :param collection: a collection model.
    :param query_builder: an :class:`SQLAQueryBuilder` instance.
    :param bool kwargs['contents_changed']: indicates whether the input
        collection's ``contents`` value has changed.
    :param bool kwargs['deleted']: indicates whether the input collection has
        just been deleted.
    :returns: ``None``

    Update the ``contents``, ``contents_unpacked``, ``html`` and/or ``form``
    attributes of every collection that references the input collection plus all
    of the collections that reference those collections, etc.  This function is
    called upon successful update and delete requests.

    If the contents of the ``collection`` have changed (i.e.,
    ``kwargs['contents_changed']==True``) , then retrieve all collections
    that reference ``collection`` and all collections that reference those
    referers, etc., and update their ``contents_unpacked``, ``html`` and
    ``forms`` attributes.

    If the ``collection`` has been deleted (i.e., ``kwargs['deleted']==True``),
    then recursively retrieve all collections referencing ``collection`` and
    update their ``contents``, ``contents_unpacked``, ``html`` and ``forms``
    attributes.

    If ``collection`` has just been tagged as restricted (i.e.,
    ``kwargs['restricted']==True``), then recursively restrict all collections
    that reference it.

    In all cases, update the ``datetime_modified`` value of every collection that
    recursively references ``collection``.

    """
    def update_contents_unpacked_etc(collection, **kwargs):
        deleted = kwargs.get('deleted', False)
        collection_id = kwargs.get('collection_id')
        if deleted:
            collection.contents = remove_references_to_this_collection(collection.contents, collection_id)
        collections_referenced = get_collections_referenced(collection.contents)
        collection.contents_unpacked = generate_contents_unpacked(
                                    collection.contents, collections_referenced)
        collection.html = h.get_HTML_from_contents(collection.contents_unpacked,
                                                  collection.markup_language)
        collection.forms = [Session.query(Form).get(int(id)) for id in
                    h.form_reference_pattern.findall(collection.contents_unpacked)]
    def update_modification_values(collection, now):
        collection.datetime_modified = now
        session['user'] = Session.merge(session['user'])
        collection.modifier = session['user']
    restricted = kwargs.get('restricted', False)
    contents_changed = kwargs.get('contents_changed', False)
    deleted = kwargs.get('deleted', False)
    if restricted or contents_changed or deleted:
        collections_referencing_this_collection = get_collections_referencing_this_collection(
            collection, query_builder)
        collections_referencing_this_collection_dicts = [c.get_full_dict() for c in
                                        collections_referencing_this_collection]
        now = h.now()
        if restricted:
            restricted_tag = h.get_restricted_tag()
            [c.tags.append(restricted_tag) for c in collections_referencing_this_collection]
        if contents_changed:
            [update_contents_unpacked_etc(c) for c in collections_referencing_this_collection]
        if deleted:
            [update_contents_unpacked_etc(c, collection_id=collection.id, deleted=True)
             for c in collections_referencing_this_collection]
        [update_modification_values(c, now) for c in collections_referencing_this_collection]
        [backup_collection(cd) for cd in collections_referencing_this_collection_dicts]
        Session.add_all(collections_referencing_this_collection)
        Session.commit()
Example #32
0
                    form = forms[id]
                    if not restricted and "restricted" in [
                            t.name for t in form.tags
                    ]:
                        restricted = True
                    f.write(writer(form))
        gzipped_corpus_file_path = h.compress_file(corpus_file_path)
        create_tgrep2_corpus_file(gzipped_corpus_file_path, format_)
    except Exception, e:
        destroy_file(corpus_file_path)
        response.status_int = 400
        return error_msg(e)

    # Update/create the corpus_file object
    try:
        now = h.now()
        session['user'] = Session.merge(session['user'])
        user = session['user']
        corpus_filename = os.path.split(corpus_file_path)[1]
        if update:
            try:
                update_corpus_file(corpus, corpus_filename, user, now,
                                   restricted)
            except Exception:
                generate_new_corpus_file(corpus, corpus_filename, format_,
                                         user, now, restricted)
        else:
            generate_new_corpus_file(corpus, corpus_filename, format_, user,
                                     now, restricted)
    except Exception, e:
        destroy_file(corpus_file_path)