Ejemplo n.º 1
0
def tagBigboss():
  unsegBigBoss = 'C:\Users\\t-phgad\Documents\Project\Data\Bigboss\FromDocs\CSSentsWithIDs.csv'
  unsegSentences = Reader.readTaggsedSentencesCSV(unsegBigBoss, 2, 5)
  unsegSentences = map2UniWithLtag(unsegSentences)
  #for sent in unsegSentences:
  #  print '\n'.join(map(lambda x:x[1], sent))+'\n'
  #sys.exit()
  tagTaggedSentences(unsegSentences)
Ejemplo n.º 2
0
  
  assert len(enTestSents) == len(enTagger.outputs)
  for sentIndex in range(len(enTagger.outputs)):
    output = enTagger.outputs[sentIndex]
    taggedSentneces[enTestSents[sentIndex][1]].append((output, enTestSents[sentIndex][2]))
  
  assert len(hiTestSents) == len(hiTagger.outputs)
  for sentIndex in range(len(hiTagger.outputs)):
    output = hiTagger.outputs[sentIndex]
    taggedSentneces[hiTestSents[sentIndex][1]].append((output, hiTestSents[sentIndex][2]))
  
  for sentId in taggedSentneces:
    sent = []
    taggedChunks = sorted(taggedSentneces[sentId], cmp=lambda x, y:int(x[1]) - int(y[1]))
    for chunk in taggedChunks:
      sent.extend(chunk[0])
    taggedSentneces[sentId] = sent
  taggedFB = 'C:\Users\\t-phgad\Documents\Project\Data\\Bigboss\\longPosAutoTags.csv'
  writer = UnicodeWriter(open(taggedFB, 'w'), lineterminator='\n')
  for sent in taggedSentneces.itervalues():
    for line in sent:
      writer.writerow(line)
    writer.writerow(["", ""])

if __name__ == '__main__':
  unsegBigBoss = 'C:\Users\\t-phgad\Documents\Project\Data\Bigboss\FromDocs\CSSentsWithIDs.csv'
  unsegSentences = Reader.readTaggsedSentencesCSV(unsegBigBoss, 2, 5)
  unsegSentences = map2UniWithLtag(unsegSentences)
  # prepareTestDataUni(bigBossData, uniTestData)
  tagBigboss(unsegSentences)
  
Ejemplo n.º 3
0
def tagFB():
  exractedFB = 'C:\Users\\t-phgad\Documents\Project\Data\\FB\\FBPOSAnnotated.csv'
  unsegSentences = Reader.readTaggsedSentencesCSV(exractedFB, 0, 3)
  #tagUntaggedSentences(unsegSentences)
  tagTaggedSentences(unsegSentences)
Ejemplo n.º 4
0
def tagFB():
  exractedFB = 'C:\Users\\t-phgad\Documents\Project\Data\\FB\\FBPOSAnnotated.csv'
  unsegSentences = Reader.readTaggsedSentencesCSV(exractedFB, 0, 3)
  #tagWithBigbossTransitions(unsegSentences)
  tagWithManualTaggerTags(unsegSentences)