コード例 #1
0
 def update_bigram_hash(self,word_array,bigrams_hash):
     bigrams=utils.generate_bigrams(word_array)
     for key in bigrams:
         if  key in bigrams_hash.keys():
           bigrams_hash[key]+=1
         else:
             bigrams_hash[key]={}
             bigrams_hash[key]=1
コード例 #2
0
    def identify_interaction(self,path_to_test_file):

        document_data=open(path_to_test_file,'rb')
        xml_data=parse(document_data)
        sentences = xml_data.getElementsByTagName("sentence")
        result={}
        for sentence in sentences:
            entity_collection={}
            sentence_attrs = dict(sentence.attributes.items())
            sentence_text=sentence_attrs["text"]
            sentence_id=sentence_attrs["id"]
            entities = sentence.getElementsByTagName("entity")
            for entity in entities:
                    entity_attrs = dict(entity.attributes.items())
                    id=entity_attrs["id"]
                    text=(entity_attrs["text"]).lower()
                    type=(entity_attrs["type"]).lower()
                    entity_collection[id]={}
                    entity_collection[id]={"text":text,"type":type}
            words=utils.tokenize_string_without_punctuations(sentence_text)

            bigrams=utils.generate_bigrams(words)
            trigrams=utils.generate_trigrams(words)
            result_found=0

            for bigram in bigrams:

                if bigram in self.bigrams_based_on_score.keys():
                          result[sentence_id]={}
                          result[sentence_id]=self.bigrams_based_on_score[bigram]
                          result_found=1
                          print "bigram as a result of which classification is done:"+bigram
                          break
            if result_found!=1:
               result[sentence_id]="false"
        #pprint.pprint(self.bigrams_based_on_score)
        document_data.close()
        return  result