Python read_srl Examples, iais.io.read_corpus.read_srl Python Examples

Example #1

0

Show file

    def test_write_srl_labels(self):
        infilenames = [
            config.corpus_path + 'srl_iob.train',
            config.corpus_path + 'srl_iob.dev',
            config.corpus_path + 'srl_iob.test'
        ]
        outfilenames = [
            config.corpus_path + 'srl_labels.train',
            config.corpus_path + 'srl_labels.dev',
            config.corpus_path + 'srl_labels.test'
        ]

        #create categories file, remove infrequent categories
        categories = set()
        cat_count = dict()
        for filename in infilenames:
            sentences_tags_verbs = read_srl(filename)
            for stv in sentences_tags_verbs:
                _, tagslist, _ = stv
                for tags in tagslist:
                    for tag in tags:
                        tag = tag.strip()
                        categories.add(tag)
                        if (cat_count.has_key(tag)):
                            cat_count[tag] += 1
                        else:
                            cat_count[tag] = 1
        sorted_cat_dict = sorted(cat_count, key=cat_count.get, reverse=True)

        with open(config.corpus_path + "srl_Freqwithcount.categories",
                  'w') as wf:
            for i, w in enumerate(sorted_cat_dict):
                print w, cat_count.get(w)
                wf.write(w + " " + str(cat_count.get(w)) + '\n')

Example #2

0

Show file

File: test_functions.py Project: 5idaidai/MVRNN

 def test_write_srl_labels(self):
     infilenames = [config.corpus_path+'srl_iob.train', config.corpus_path+'srl_iob.dev', config.corpus_path+'srl_iob.test']
     outfilenames = [config.corpus_path+'srl_labels.train', config.corpus_path+'srl_labels.dev', config.corpus_path+'srl_labels.test'] 
     
     #create categories file, remove infrequent categories
     categories = set() 
     cat_count = dict()     
     for filename in infilenames:  
         sentences_tags_verbs = read_srl(filename)        
         for stv in sentences_tags_verbs:
             _,tagslist,_ = stv
             for tags in tagslist:
                 for tag in tags:
                     tag = tag.strip()
                     categories.add(tag)
                     if(cat_count.has_key(tag)):
                         cat_count[tag] += 1
                     else:
                         cat_count[tag] = 1
     sorted_cat_dict = sorted(cat_count, key=cat_count.get, reverse=True)
     
         
     with open(config.corpus_path+"srl_Freqwithcount.categories", 'w') as wf:
         for i,w in enumerate(sorted_cat_dict):
             print w, cat_count.get(w)
             wf.write(w+" "+str(cat_count.get(w))+'\n')

Example #3

0

Show file

File: rae.py Project: SemanticBeeng/MVRNN

def get_features(srl_iob_file, chunks_tags_file):

    outfile = open(config.corpus_path + "srl_vec_features.train", 'w')
    phnlabels = open(config.corpus_path + "srl_phrases_labels.train", 'w')

    mats = sio.loadmat(config.corpus_path + 'vars.normalized.100.mat')
    We_orig = mats.get('We')
    words = mats.get('words')

    words = words.flatten()
    keys = [str(words[i][0]).strip() for i in range(len(words))]
    values = range(len(words))
    word_dict = dict(zip(keys, values))

    params = sio.loadmat(config.corpus_path + 'params_rae.mat')
    W1 = params.get('W1')
    W2 = params.get('W2')
    b1 = params.get('b1')
    new_sents, _ = get_sent_phrases_srl(srl_iob_file, chunks_tags_file)
    sentences_tags_verbs = read_srl(srl_iob_file)
    for new_sent, sentence_tags_verbs in zip(new_sents, sentences_tags_verbs):
        sent, taglists, verbIds = sentence_tags_verbs
        for i, verbId in enumerate(verbIds):
            tags = taglists[i]
            offset = 0
            for wordOrPhrase in new_sent:
                try:
                    #                    wpvec = get_phrase_vector(wordOrPhrase, W1, W2, b1, We_orig, word_dict)
                    #                    verbVec = get_phrase_vector(sent[verbId], W1, W2, b1, We_orig, word_dict)
                    label = tags[offset]
                    offset += len(wordOrPhrase)
                    #                    row = " ".join([str(x) for x in wpvec]) + "\t" + " ".join([str(x) for x in verbVec]) \
                    #                            + "\t"+ label + '\n'
                    #                    outfile.write(row)
                    phnlabels.write(" ".join(wordOrPhrase) + "\t" + label +
                                    "\n")
                except:
                    print
    outfile.close()

Example #4

0

Show file

File: rae.py Project: 5idaidai/MVRNN

def get_features(srl_iob_file, chunks_tags_file):
    
    outfile = open(config.corpus_path+"srl_vec_features.train", 'w')
    phnlabels = open(config.corpus_path+"srl_phrases_labels.train", 'w')
    
    mats = sio.loadmat(config.corpus_path+'vars.normalized.100.mat')    
    We_orig = mats.get('We')
    words = mats.get('words')
    
    words = words.flatten()
    keys = [str(words[i][0]).strip() for i in range(len(words))]
    values = range(len(words))
    word_dict = dict(zip(keys, values))
    
    params = sio.loadmat(config.corpus_path+'params_rae.mat')
    W1 = params.get('W1')
    W2 = params.get('W2')
    b1 = params.get('b1')
    new_sents, _ = get_sent_phrases_srl(srl_iob_file, chunks_tags_file)
    sentences_tags_verbs = read_srl(srl_iob_file)
    for new_sent, sentence_tags_verbs in zip(new_sents, sentences_tags_verbs):
        sent, taglists, verbIds = sentence_tags_verbs
        for i, verbId in enumerate(verbIds):
            tags = taglists[i]
            offset = 0
            for wordOrPhrase in new_sent:
                try:
#                    wpvec = get_phrase_vector(wordOrPhrase, W1, W2, b1, We_orig, word_dict)
#                    verbVec = get_phrase_vector(sent[verbId], W1, W2, b1, We_orig, word_dict)
                    label = tags[offset]
                    offset += len(wordOrPhrase)
#                    row = " ".join([str(x) for x in wpvec]) + "\t" + " ".join([str(x) for x in verbVec]) \
#                            + "\t"+ label + '\n'
#                    outfile.write(row)
                    phnlabels.write(" ".join(wordOrPhrase)+ "\t" + label + "\n")
                except:
                    print
    outfile.close()

Example #5

0

Show file

File: rae.py Project: SemanticBeeng/MVRNN

def get_sent_phrases_srl(srl_iob_file, chunk_tags_file):
    ''' returns list of new sentences(reduced tokens) and list of phrases for each sentence.
        In list of new sentences, each new sentence contains either the single word or the phrase
        '''

    postags, chktags = read_sent_chunktags(chunk_tags_file)
    sentences_tags_verbs = read_srl(srl_iob_file)
    new_sent_tags_verbs = []
    new_sent_pos_chk = []
    for s, stv in enumerate(sentences_tags_verbs):
        sent, tagsList, verbIds = stv
        chktag = chktags[s]
        postag = postags[s]
        new_sent = []

        #construct new sent
        i = 0
        while (i < len(chktag) - 1):
            thistag = chktag[i]
            nexttag = chktag[i + 1]
            if (thistag.startswith('B-') and nexttag.startswith('I-')
                    and thistag != 'B-VP'):
                phrase = [sent[i]]
                j = i + 1
                while (j < len(chktag) and chktag[j].startswith('I-')):
                    phrase.append(sent[j])
                    j += 1
                new_sent.append(phrase)
                i = j

            else:
                new_sent.append([sent[i]])
                i += 1

            if (i == len(chktag) - 1):
                new_sent.append([sent[i]])

        #construct new pos n chk tags
        offset = 0
        new_posTags = []
        new_chkTags = []
        for i, phrase in enumerate(new_sent):
            offset += len(phrase)
            new_posTags.append(postag[offset - 1])
            new_chkTags.append(chktag[offset - 1])
        #construct  verbIds for this new sent
        new_verbIds = []
        for verbId in verbIds:
            offset = 0
            for i, phrase in enumerate(new_sent):
                if (offset == verbId):
                    new_verbIds.append(i)
                    break
                offset += len(phrase)
        #construct tags
        new_tagsList = []
        for nv, verbId in enumerate(verbIds):
            new_tags = []
            tags = tagsList[nv]
            offset = 0
            for i, phrase in enumerate(new_sent):
                new_tags.append(tags[offset])
                offset += len(phrase)
            new_tagsList.append(new_tags)

        new_sent_tags_verbs.append((new_sent, new_tagsList, new_verbIds))
        new_sent_pos_chk.append((new_posTags, new_chkTags))


#        all_new_sents.append(new_sent)
#        all_new_verbIds.append(new_verbIds)
#        all_new_tags.append(new_tagsList)

    return new_sent_tags_verbs, new_sent_pos_chk

Example #6

0

Show file

 def test_read_srl(self):
     filename = '/home/bhanu/workspace/MVRNN/data/corpus/srl_iob.dev'
     sentences_tags_verbs_ori = read_srl(filename)
     sentences_tags_verbs_pred = read_srl()

Example #7

0

Show file

File: test_functions.py Project: 5idaidai/MVRNN

 def test_read_srl(self):
     filename = '/home/bhanu/workspace/MVRNN/data/corpus/srl_iob.dev'
     sentences_tags_verbs_ori = read_srl(filename)
     sentences_tags_verbs_pred = read_srl()

Example #8

0

Show file

File: rae.py Project: 5idaidai/MVRNN

def get_sent_phrases_srl(srl_iob_file, chunk_tags_file):
    ''' returns list of new sentences(reduced tokens) and list of phrases for each sentence.
        In list of new sentences, each new sentence contains either the single word or the phrase
        '''

    postags, chktags = read_sent_chunktags(chunk_tags_file)
    sentences_tags_verbs = read_srl(srl_iob_file)
    new_sent_tags_verbs = []
    new_sent_pos_chk = []
    for s, stv in enumerate(sentences_tags_verbs):
        sent, tagsList, verbIds = stv
        chktag = chktags[s]
        postag = postags[s]
        new_sent = [] 
        
        #construct new sent
        i = 0
        while( i < len(chktag) - 1):
            thistag = chktag[i]
            nexttag = chktag[i+1]
            if(thistag.startswith('B-') and  nexttag.startswith('I-')
               and thistag != 'B-VP'):
                phrase = [sent[i]] 
                j = i+1
                while(j < len(chktag) and  chktag[j].startswith('I-')):
                    phrase.append(sent[j])
                    j += 1
                new_sent.append(phrase)                    
                i = j
                
            else:
                new_sent.append([sent[i]])
                i += 1
                    
            if(i == len(chktag) -1):
                new_sent.append([sent[i]])
        
        #construct new pos n chk tags
        offset = 0
        new_posTags = []
        new_chkTags = []
        for i, phrase in enumerate(new_sent):
            offset+=len(phrase)
            new_posTags.append(postag[offset-1]) 
            new_chkTags.append(chktag[offset-1])        
        #construct  verbIds for this new sent
        new_verbIds = []
        for verbId in verbIds:
            offset = 0
            for i, phrase in enumerate(new_sent):
                if(offset == verbId):
                    new_verbIds.append(i)
                    break
                offset += len(phrase)
        #construct tags
        new_tagsList =[]
        for nv, verbId in enumerate(verbIds):
            new_tags = []
            tags = tagsList[nv]
            offset = 0
            for i, phrase in enumerate(new_sent):
                new_tags.append(tags[offset])
                offset += len(phrase)
            new_tagsList.append(new_tags)
        
        new_sent_tags_verbs.append((new_sent, new_tagsList, new_verbIds))   
        new_sent_pos_chk.append((new_posTags, new_chkTags))     
#        all_new_sents.append(new_sent)  
#        all_new_verbIds.append(new_verbIds)
#        all_new_tags.append(new_tagsList)      
    
    return new_sent_tags_verbs, new_sent_pos_chk