Example #1
0
def create_main_feature(qid, word, token, text, entities, tags):
  tmp_bow = {}
  tmp_bow['qid'] = qid
  tmp_bow['TIENE_RAIZ'] = has_root(word)
  tmp_bow['FULL_MAYUSCULAS'] = full_upper(word)
  tmp_bow['FULL_MINUSCULAS'] = full_lower(word)
  tmp_bow['INICIO_MAYUSCULAS_RESTO_MINUSCULAS'] = is_capitalized(word)
  # print word.encode('utf-8')
  cat = sin_cat(word, tags)
  tmp_bow['CAT_SINTACTICA_' + cat] = 1
  tmp_bow['PALABRA_'+word] = 1
  tmp_bow['PALABRA_LARGO'] = word_len(word)
  tmp_bow['ES_TOKEN'] = token
  return tmp_bow
Example #2
0
def create_side(words, side, tmp_bow, text):
  if side == 'left':
    key = 'IZQUIERDA_'
    key_cat = 'IZQUIERDA_CAT_'
  elif side == 'right':
    key = 'DERECHA_'
    key_cat = 'DERECHA_CAT_'
  for w in words:
    key += key+str(w)
    cat = sin_cat(w, text)
    key_cat += key_cat+cat
    if w != '':
      tmp_bow[key] = 1
      tmp_bow[key_cat] = 1
    if key in tmp_bow:
      tmp_bow[key] += 1
    if key_cat in tmp_bow:
      tmp_bow[key_cat] += 1
  return tmp_bow
 def test_sin_cat_root(self):
     word_for_test = "was"
     text = "I think you have bought the cover story this is propaganda , absolutely opposite the truth , the man was trying to rebuild the western alliance in the threat of rising german aggression ! ."
     entities = ['/PERSON', '/LOCATION', '/ORGANIZATION']
     result = sin_cat(word_for_test, text, entities)
     self.assertEqual('VBD', result)