Esempio n. 1
0
    def Query(self, handles, status, abort):
        result = []

        for handle in handles:
            status.Advance()

            if abort.Aborting():
                return result

            title = handle.Format("[%title%]")
            artist = handle.Format("[%artist%]")

            try:
                l = get_lyric_list(title, artist)
                m = 0xffffffffffffffff
                best = None

                for url, ti, ar in l:
                    d = LevenshteinDistance(artist, ar) + LevenshteinDistance(
                        title, ti)
                    if m > d:
                        m = d
                        best = url

                if best == None:
                    result.append('')
                else:
                    lyric = get_lyric(best)
                    result.append(lyric)
                    continue
            except Exception, e:
                traceback.print_exc(file=sys.stdout)
                result.append('')
                continue
Esempio n. 2
0
    def Query(self, handles, status, abort):
        result = []

        for handle in handles:
            status.Advance()

            if abort.Aborting():
                return result

            try:
                artist = handle.Format("[%artist%]")
                title = handle.Format("[%title%]")
                s = urllib.urlopen(
                    "http://ttlrcct2.qianqian.com/dll/lyricsvr.dll?sh?Artist=%s&Title=%s&Flags=0"
                    % (self.ToQianQianHexString(artist),
                       self.ToQianQianHexString(title))).read()  ##这里是utf-8编码
                doc = minidom.parseString(s)
                m = 0xFFFFFFFFFFFFFFFF
                best = None

                for e in doc.getElementsByTagName("lrc"):
                    #    i = LevenshteinDistance(artist, e.getAttribute("artist")) + LevenshteinDistance(title, e.getAttribute("title"))#原来对比的是不同编码的文本
                    i = LevenshteinDistance(
                        artist,
                        e.getAttribute("artist").encode(
                            "utf-8")) + LevenshteinDistance(
                                title,
                                e.getAttribute("title").encode("utf-8"))
                    if m > i:
                        m = i
                        best = e.getAttribute("id"), e.getAttribute(
                            "artist"), e.getAttribute("title")

                if best == None:
                    result.append('')
                    continue

                Id, artist, title = best
                code = self.CreateQianQianCode(Id, artist, title)
                txheaders = {
                    'User-agent':
                    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'
                }
                req = urllib2.Request(
                    "http://ttlrcct2.qianqian.com/dll/lyricsvr.dll?dl?Id=%s&Code=%d"
                    % (Id, code), None, txheaders)
                lyric = urllib2.urlopen(req).read()

                if lyric.find("Search ID or Code error!") >= 0:
                    result.append('')
                    continue
                else:
                    result.append(lyric)
            except Exception, e:
                traceback.print_exc(file=sys.stdout)
                result.append('')
                continue
Esempio n. 3
0
 def sort_by_weighted_edit_dist(self):
     """
     Sort by weighted edit distance from scTIM
     again from John
     """
     self.clear_weight()
     leve = LeveDist(5, 5, 3)
     for seq in self.__sequence:
         seq.weight = leve.computeDistance(seq.seq, self.__scTIM.seq)
     self.__sequence.sort(key=lambda seq:seq.weight, reverse = True)
Esempio n. 4
0
 def sort_by_edit_dist(self):
     """
     Sort by edit distance from scTIM
     Original from John Wenskovitch in Java
     """
     self.clear_weight()
     leve  = LeveDist(1, 1, 1)
     for seq in self.__sequence:
         seq.weight = leve.computeDistance(seq.seq, self.__scTIM.seq)
     self.__sequence.sort(key=lambda seq:seq.weight, reverse = True)
Esempio n. 5
0
 def sort_by_edit_dist(self):
     """
     Sort by edit distance from scTIM
     Original from John Wenskovitch in Java
     """
     self.clear_weight()
     leve  = LeveDist(1, 1, 1)
     for seq in self.__sequence:
         seq.weight = leve.computeDistance(seq.seq, self.__scTIM.seq)
     
     self.__sequence.sort(key=lambda seq:seq.weight, reverse = True)
Esempio n. 6
0
class SpellChecker:
    sim_min = 0.75  # Minimum similarity, inclusive

    def __init__(self):
        self.keyword = ContextIdentifier().getKeyword()
        self.distance_counter = LevenshteinDistance()

    def getWordSuggestion(self, text):
        word_suggestion = set()
        suggested_word_candidate_set = set(self.keyword)
        word_set = {word.lower() for word in text.split(" ") if word != ""}
        for word in word_set:
            current_suggestion = suggested_word_candidate_set - word_suggestion
            for suggested_word_candidate in current_suggestion:
                distance = self.distance_counter.getDistance(
                    word, suggested_word_candidate)
                if distance == 0:
                    continue
                value = 1 - distance / max(len(word),
                                           len(suggested_word_candidate))
                if value > self.sim_min:
                    word_suggestion.add(suggested_word_candidate)
                    suggested_word_candidate_set.remove(
                        suggested_word_candidate)

        return list(word_suggestion)
Esempio n. 7
0
 def sort_by_weighted_edit_dist(self):
     """
     Sort by weighted edit distance from scTIM
     again from John
     """
     self.clear_weight()
     leve = LeveDist(5, 5, 3)
     for seq in self.__sequence:
         seq.weight = leve.computeDistance(seq.seq, self.__scTIM.seq)
         
     # set the dTIM_core's weight  to a large number, so that it will alway the top one
     self.__sequence[0].weight += 9999999999
     self.__sequence[-1].weight = self.__sequence[0].weight - 1
             
     # myp = ""        
     # for seq in self.__sequence:
     #     myp += str(seq._sequence__name) + " "
     # print myp
     
     self.__sequence.sort(key=lambda seq: seq.weight, reverse = True)
Esempio n. 8
0
 def __init__(self):
     self.keyword = ContextIdentifier().getKeyword()
     self.distance_counter = LevenshteinDistance()