def get_tagged_sentences(self, file_name):
     sentences_w_tags = []
     count = 0
     words=[]
     tags=[]
     sentence_obj = Sentence()
     sentence_obj.add_token(Token(orig_token='<start>'))
     on_sentence = False
     for line in codecs.open(file_name, 'r', encoding="utf-8"):
     
         vals = line.split('\t')
         if (len(vals) > 1):
             on_sentence = True
             tok = Token()
             tok.orig = vals[1]
             tok.pos_tag = vals[3]
             tok.head = int(vals[6])
             tok.head_label = vals[7]
             sentence_obj.add_token(tok)
         elif (on_sentence):
             on_sentence=False
             sentence_obj.add_token(Token(orig_token='ROOT'))
             sentences_w_tags.append(sentence_obj)
             sentence_obj = Sentence()
             sentence_obj.add_token(Token(orig_token='<start>'))
 
     return sentences_w_tags # [ Sentence_obj, Sentence_obj]
def read_conll(loc):
    for sent_str in open(loc).read().strip().split('\n\n'):
        lines = [line.split() for line in sent_str.split('\n')]
        words = DefaultList(''); tags = DefaultList('')
        heads = [None]; labels = [None]
        for index, word,lem, pos, something, s1, head, label, s2, s3 in lines:
            words.append(intern(word))
            #words.append(intern(normalize(word)))
            tags.append(intern(pos))
            heads.append(int(head) if head != '0' else len(lines) )
            labels.append(label)
        pad_tokens(words); pad_tokens(tags)

        sent_obj = Sentence()
        sent_obj.add_words(words)
        sent_obj.set_pos_tags(tags)
        sent_obj.set_heads(heads)
        sent_obj.set_head_labels(labels)
        
        yield sent_obj