Python keys 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: contractions.contractions

메소드/함수: keys

hotexamples.com에서의 예제들: 7

Python keys - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 contractions.contractions.keys에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

 def compress_twt_v2(self):
     global Pos_tag
     global Cont
     self.compressed_twt = []
     for twt in self.reduced_twt:
         word = twt["word"]
         time = twt["time"]
         contraction_keys = Cont.keys()
         if word in contraction_keys:
             new_words = Cont[word][0].split(" ")
             for new_word in new_words:
                 self.compressed_twt.append({
                     "word": new_word,
                     "original_word": word,
                     "time": time
                 })
         else:
             tag = Pos_tag(
                 [word])[0][1]  #-> Pos_tag(["geese"]) -> [("geese", "NN")]
             pos_type = "n"  #default lemmatize to a nounce
             if tag.find("VB") != -1:  #is a verb
                 pos_type = "v"
             new_word = self.Lemma.lemmatize(word, pos=pos_type)
             self.compressed_twt.append({
                 "word": new_word,
                 "orginal_word": word,
                 "time": time
             })

예제 #2

파일 보기

def expand_contractions(text):
    """expand shortened words, e.g. don't to do not"""
    contractions_re = re.compile('(%s)' % '|'.join(contractions.keys()))

    def replace(match):
        return contractions[match.group(0)]

    return contractions_re.sub(replace, text)

예제 #3

파일 보기

def expand_contractions(tweet):
    import re
    from contractions import contractions
    tweet = tweet.lower()
    #convert U+2019 to U+0027 (apostrophe)
    tweet = tweet.replace(u"\u2019", u"\u0027")
    contractions_re = re.compile('(%s)' % '|'.join(contractions.keys()))

    def replace(match):
        # expand the contraction with the most possible alternative : [0]
        return contractions[match.group(0)][0]

    return contractions_re.sub(replace, tweet)

예제 #4

파일 보기

    def eliminate_contraction(self):
        '''
			does: transform contraction to full words ex: there's -> there is
		'''
        global Cont
        global Pos_tag

        self.modified_transcript = ""
        reg = re.compile("\s+|\.")  #splitted by space and dot
        keys = Cont.keys()
        for word in reg.split(self.full_transcript):
            if word != "" and word != "\n":
                if word in keys:
                    word = Cont[word][0]
                else:
                    tag = Pos_tag([word])[0][1]
                    pos_type = "n"
                    if tag == "VBR":  #is a verb
                        pos_type = "v"
                    word = self.Lemma.lemmatize(word, pos=pos_type)
                self.modified_transcript += (" " + word)

예제 #5

파일 보기

파일: filter.py 프로젝트: danieldh100/bachelor-bin

def unfrequent_nouns(f):
    """
    - take a recognition result (raw text) and filter out the top x most common words. (plus variants of those like plurals) (x = 500 right now).
    - remove short words (<3 chars) and words with "'" in them
    - export those words and their counts into json (stdout)
    """

    top5000words_with_variants = set(top5000words)

    for w in top5000words:
        if len(w) >= 3:
            top5000words_with_variants |= {w + 's'}
            top5000words_with_variants |= {w + 'ing'}
            top5000words_with_variants |= {w + 'ting'}
            top5000words_with_variants |= {w + 'ed'}
            top5000words_with_variants |= {w + 'ped'}
            top5000words_with_variants |= {w + 'd'}
            top5000words_with_variants |= {w + '\'s'}
            top5000words_with_variants |= {w + '\'ll'}

    contractions = set(contractions_.keys())
    top5000words_with_variants |= contractions

    hyp = [unicode(w.lower(), 'utf-8') for w in open(f).read().split()]
    # hyp_nouns = (w for w,pos in pos_tag(hyp) if 
        # pos in ['NNP', 'NN', '-NONE-', 'NNS'])
    hyp_lemmatized = (lemmatize(w) for w in hyp)

    # remove short words and words with '
    hyp_lemmatized = \
        (w for w in hyp_lemmatized if 
            len(w) >= 3 and not "'" in w)

    hyp_special = (w for w in hyp_lemmatized if w not in top5000words)
    bag = Counter(hyp_special)

    json_ = json.dumps(
        {word: count for word, count in bag.most_common()}, indent=2)
    print(json_)

예제 #6

파일 보기

 def compress_twt(self):
     global Pos_tag
     global Cont
     self.compressed_twt = []
     self.uncompressed_twt = []
     compressed_twt = {}
     for twt in self.reduced_twt:
         word = twt["word"]
         time = twt["time"]
         contraction_keys = Cont.keys()
         if word in contraction_keys:
             self.uncompressed_twt.append({
                 "word": word,
                 "original_word": word,
                 "time": time
             })
         else:
             tag = Pos_tag(
                 [word])[0][1]  #-> Pos_tag(["geese"]) -> [("geese", "NN")]
             pos_type = "n"  #default lemmatize to a nounce
             if tag.find("VB") != -1:  #is a verb
                 pos_type = "v"
             new_word = self.Lemma.lemmatize(word, pos=pos_type)
             if not word in Stop_words:
                 compressed_twt.setdefault(new_word, [])
                 compressed_twt[new_word].append(time)
             self.uncompressed_twt.append({
                 "word": new_word,
                 "orginal_word": word,
                 "time": time
             })
     for key in compressed_twt.keys():
         self.compressed_twt.append({
             "word": key,
             "time": compressed_twt[key]
         })

예제 #7

파일 보기

파일: utils.py 프로젝트: jhashekhar/disaster-clf

def remove_contractions(text, contraction):
    for word in contractions.keys():
        if "" + word + "" in text:
            text = text.replace("" + word + "", "" + contractions[word] + "")
    return text