def data(self, data):
    data = data.lower().strip()
    if self.is_question:
      # A new question.
      if self.is_subject:
        clean_question = normalizeWithStopWordRemoval(data)
        if len(clean_question) > 3:
          # Dint find the question.
          if clean_question not in self.questions:
            self.questions[clean_question
                          ] = {ANSWER: set([]),
                               QUESTION: set([data])}
          # Found the question.
          if data not in self.questions[clean_question][QUESTION]:
            self.questions[clean_question][QUESTION].add(data)

          self.curr_clean_question = clean_question
          self.curr_question = data

      # Answer for current question.
      if self.is_best_answer:
        clean_answer = normalizeWithStopWordRemoval(data)
        if (self.curr_clean_question in self.questions
           ) and (clean_answer not in
                  self.questions[self.curr_clean_question][ANSWER]):
          self.questions[self.curr_clean_question][ANSWER].add(clean_answer)
  def test_parse_one_question(self):
    expected = {
        normalizeWithStopWordRemoval(u'Why are yawns contagious?'.lower()): {
            ANSWER: set([normalizeWithStopWordRemoval(u'body when'.lower())]),
            QUESTION:set( [u'Why are yawns contagious?'.lower()])
        }
    }

    ofile = open('parser_test.xml', 'w')
    ofile.write(
        '<document><uri>432470</uri> <subject>Why are yawns contagious?</subject>\
    <content> When people yawn, you see that other people in the room yawn, too. Why is that?</content>\
    <bestanswer>When your body </bestanswer></document>')
    ofile.close()
    self.assertDictEqual(etree.parse('parser_test.xml', self.parser), expected)
 def test_parse_multiple_questions(self):
   expected = {
       normalizeWithStopWordRemoval(u'Why are yawns contagious?'.lower()): {
           ANSWER: set([normalizeWithStopWordRemoval(u'body when'.lower())]),
           QUESTION: set([u'Why are yawns contagious?'.lower()])
       },
       normalizeWithStopWordRemoval(
           u'What\'s the best way to heat up a cold hamburger '.lower()): {
               ANSWER: set([normalizeWithStopWordRemoval(
                   u'If you must eat a heated In & Out hamburger'.lower())]),
               QUESTION: set([
                   u'What\'s the best way to heat up a cold hamburger '.lower()
               ])
           }
   }
   ofile = open('parser_test.xml', 'w')
   ofile.write(
       '<document><uri>432470</uri><subject>Why are yawns contagious?</subject>\
                                <content> When people yawn, you see that other people in the room yawn, too. Why is that?</content>\
                                <bestanswer>When your body </bestanswer></document><vespaadd><document type="wisdom"><uri>800062</uri>\
                                <subject> What\'s the best way to heat up a cold hamburger (In & Out)?</subject>\
                                <content> What\'s the best way to heat up a cold hamburger (In & Out)? </content>\
                                <bestanswer> If you must eat a heated In & Out hamburger then</bestanswer></vespaadd></document>')
   ofile.close()