Beispiel #1
0
def _build_vflist(elements):
    # Iterate through the list of elements, keeping track of state, and
    #   for each vf element create a VariantForm object based on current
    #   state (the set of dates and grammar information governing the vf).

    # 'state' is used to keep track of state:

    # state['outside'] keeps track of the main state, i.e. state outside
    #  parens. Includes:
    #   -- last encountered date-range;
    #   -- last encountered grammar information (<gr>);
    #   -- last encountered label (<la>).

    # state['inside'] keeps track of the state inside parens.
    #  Includes:
    #   -- last encountered date-range;
    #   -- last encountered grammar information (<gr>);
    #   -- last encountered label (<la>).
    #  This is cleared every time we hit a closing paren.

    # state['parens'] keeps track of whether we're inside or outside
    #  parens (value is 'inside' or 'outside').

    # Each time a <vf> is encountered, a VariantForm is created
    #  for it using information from the current state. Usually this is
    #  state['outside']. Values from state['inside'] are only used if the
    #  <vf> is inside parens (i.e. state['parens'] == 'inside') *and* the
    #  value of state['inside'] is populated; otherwise, the corresponding
    #  value from state['outside'] is used by default.

    # All __state is cleared every time we reach a hard break (e.g. a para).

    vf_list = []
    state = StateMachine()
    for element in elements:
        element = _adjust_element(element, state)
        if element.tag == 'vf':
            date_start, date_end = state.read('date_range')
            if not date_start:
                date_start, date_end = state.read('date_range',
                                                  paren_state='outside')
            grammar = (state.read('grammar') or
                       state.read('grammar', paren_state='outside'))
            label = (state.read('label') or
                     state.read('label', paren_state='outside'))
            variant_form = VariantFormFromParser(element.node,
                                                 date_start,
                                                 date_end)
            variant_form.set_grammatical_information(grammar)
            variant_form.label = label
            vf_list.append(variant_form)
        elif element.tag == 'vd':
            state.set('date_range', utilities.find_range(element.text))
            if element.previous != 'la':
                state.set('label', '')
        elif element.tag == 'hardBreak' or element.tag == 'p':
            state.clear()
        elif element.tag == 'softBreak':
            state.set('parens', 'outside')
            state.set('label', '')
        elif element.tag == 'openParen' and element.next != 'vf':
            state.set('parens', 'inside')
            # Make sure that grammar and date-range inside parens are set to null
            state.set('grammar', '')
            state.set('label', '')
            state.set('date_range', (0, 0))
        elif element.tag == 'closeParen':
            state.set('parens', 'outside')
        elif ((element.tag == 'gr' or element.tag == 'text') and
              element.text and
              not IGNORABLE_GRAMMAR_PATTERN.search(element.text)):
            state.set('grammar', element.text)
        elif element.tag == 'la':
            state.set('label', element.text)
        elif element.tag == 'newStart':
            vf_list = []
            state.clear()

    return vf_list