Ejemplo n.º 1
0
def tagBigboss():
  unsegBigBoss = 'C:\Users\\t-phgad\Documents\Project\Data\Bigboss\FromDocs\CSSentsWithIDs.csv'
  unsegSentences = Reader.readTaggsedSentencesCSV(unsegBigBoss, 2, 5)
  unsegSentences = map2UniWithLtag(unsegSentences)
  #for sent in unsegSentences:
  #  print '\n'.join(map(lambda x:x[1], sent))+'\n'
  #sys.exit()
  tagTaggedSentences(unsegSentences)
Ejemplo n.º 2
0
def prepareTestDataUni(bigBossData, uniTestData):
  reader = csv.reader(open(bigBossData))
  reader.next()
  sentences = []
  sentence = []
  for line in reader:
    if line[0] == '':
      sentences.append(sentence)
      sentence = []
      continue
    wordTagLang = (line[2], line[3], line[4])
    # print wordTagLang
    sentence.append(wordTagLang)
  if len(sentence) > 0:
    sentences.append(sentence)
  uniSentences = map2Uni.map2UniWithLtag(sentences)
  Writer.writePOSSentencesTSV(uniSentences, uniTestData)
Ejemplo n.º 3
0
  
  assert len(enTestSents) == len(enTagger.outputs)
  for sentIndex in range(len(enTagger.outputs)):
    output = enTagger.outputs[sentIndex]
    taggedSentneces[enTestSents[sentIndex][1]].append((output, enTestSents[sentIndex][2]))
  
  assert len(hiTestSents) == len(hiTagger.outputs)
  for sentIndex in range(len(hiTagger.outputs)):
    output = hiTagger.outputs[sentIndex]
    taggedSentneces[hiTestSents[sentIndex][1]].append((output, hiTestSents[sentIndex][2]))
  
  for sentId in taggedSentneces:
    sent = []
    taggedChunks = sorted(taggedSentneces[sentId], cmp=lambda x, y:int(x[1]) - int(y[1]))
    for chunk in taggedChunks:
      sent.extend(chunk[0])
    taggedSentneces[sentId] = sent
  taggedFB = 'C:\Users\\t-phgad\Documents\Project\Data\\Bigboss\\longPosAutoTags.csv'
  writer = UnicodeWriter(open(taggedFB, 'w'), lineterminator='\n')
  for sent in taggedSentneces.itervalues():
    for line in sent:
      writer.writerow(line)
    writer.writerow(["", ""])

if __name__ == '__main__':
  unsegBigBoss = 'C:\Users\\t-phgad\Documents\Project\Data\Bigboss\FromDocs\CSSentsWithIDs.csv'
  unsegSentences = Reader.readTaggsedSentencesCSV(unsegBigBoss, 2, 5)
  unsegSentences = map2UniWithLtag(unsegSentences)
  # prepareTestDataUni(bigBossData, uniTestData)
  tagBigboss(unsegSentences)