def solveOne(self, A, B): x = product(A, B) h = [] for pair in x: h.append(sum(pair)) heapq._heapify_max(h) print heapq._nlargest(len(A), h)
def nlargest(n, iterable, key=None): """Find the n largest elements in a dataset. Equivalent to: sorted(iterable, key=key, reverse=True)[:n] """ if key is None: return _nlargest(n, iterable) in1, in2 = tee(iterable) it = izip(imap(key, in1), count(), in2) # decorate result = _nlargest(n, it) return map(itemgetter(2), result) # undecorate
def get_close_matches_indexes(word, possibilities, n=3, cutoff=0.6): """Use SequenceMatcher to return a list of the indexes of the best "good enough" matches. word is a sequence for which close matches are desired (typically a string). possibilities is a list of sequences against which to match word (typically a list of strings). Optional arg n (default 3) is the maximum number of close matches to return. n must be > 0. Optional arg cutoff (default 0.6) is a float in [0, 1]. Possibilities that don't score at least that similar to word are ignored. """ if not n > 0: raise ValueError("n must be > 0: %r" % (n,)) if not 0.0 <= cutoff <= 1.0: raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff,)) result = [] s = SequenceMatcher() s.set_seq2(word) for idx, x in enumerate(possibilities): s.set_seq1(x) if s.real_quick_ratio() >= cutoff and \ s.quick_ratio() >= cutoff and \ s.ratio() >= cutoff: result.append((s.ratio(), idx)) # Move the best scorers to head of list result = _nlargest(n, result) # Strip scores for the best n matches return [x for score, x in result]
def run(self): while True: global size_list_devices global list_devices data = conn.recv(2048) loaded_r = json.loads(data) print "LISTA" print loaded_r print "END LISTA" kvalues = heapq._nlargest(2, loaded_r.values()) print "Server received data:", kvalues if len(list_devices.values()) == size_list_devices: #transformar em json para enviar data = json.dumps(list_devices) print "enviadooooooooooooooooooooooooo" + data self.send_cloud(data) list_devices = {} else: #add dentro do dicionario name = "g" + str(len(list_devices.values())) list_devices[name] = kvalues MESSAGE = raw_input( "Multithreaded Python server : Enter Response from Server/Enter exit:" ) if MESSAGE == 'exit': break
def get_best_fit_index(self, word, possibilities, n=1, cutoff=0): """ Source: https://stackoverflow.com/questions/50861237/is-there-an-alternative-to-difflib-get-close-matches-that-returns-indexes-l Use SequenceMatcher to return a list of the indexes of the best "good enough" matches. word is a sequence for which close matches are desired (typically a string). possibilities is a list of sequences against which to match word (typically a list of strings). Optional arg n (default 3) is the maximum number of close matches to return. n must be > 0. Optional arg cutoff (default 0.6) is a float in [0, 1]. Possibilities that don't score at least that similar to word are ignored. """ if not n > 0: raise ValueError("n must be > 0: %r" % (n, )) if not 0.0 <= cutoff <= 1.0: raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff, )) result = [] s = SequenceMatcher() s.set_seq2(word) for idx, x in enumerate(possibilities): s.set_seq1(x) if s.real_quick_ratio() >= cutoff and \ s.quick_ratio() >= cutoff and \ s.ratio() >= cutoff: result.append((s.ratio(), idx)) # Move the best scorers to head of list result = _nlargest(n, result) # Strip scores for the best n matches return [x for score, x in result]
def get_close_matches_indexes(word, n=3, cutoff=0.6): data = pd.read_csv('scrap.csv') pattern = list(data['title']) possibilities = list(data['title']) if not n > 0: raise ValueError("n must be > 0: %r" % (n, )) if not 0.0 <= cutoff <= 1.0: raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff, )) result = [] s = SequenceMatcher() s.set_seq2(word) for idx, x in enumerate(possibilities): s.set_seq1(x) if s.real_quick_ratio() >= cutoff and \ s.quick_ratio() >= cutoff and \ s.ratio() >= cutoff: result.append((s.ratio(), idx)) result = _nlargest(n, result) b = [x for score, x in result] print(data.loc[b]['response']) d = data.loc[b]['response'] # for i in d: # print(i) return data.loc[b[0]]['response']
def run(self): while True: data = conn.recv(2048) print "Server received data:", data loaded_r = json.loads(data) lista_max = [] for i in range(0, 3): name = 'g' + str(i) l = list(loaded_r[name]) lista_max = lista_max + l print " 3 maiores temperaturas" list_larger = heapq._nlargest(3, lista_max) F = open('larger.txt', 'w') for i in range(0, 3): for j in range(0, len(list_larger)): name = 'g' + str(i) if list_larger[j] in loaded_r[name]: F.write(name) print name F.close() MESSAGE = raw_input( "Multithreaded Python server : Enter Response from Server/Enter exit:" ) if MESSAGE == 'exit': break
def case_insensitive_close_matches(word, possibilities, n=3, cutoff=0.6, excpt=None): """Use SequenceMatcher to return list of the best "good enough" matches. word is a sequence for which close matches are desired (typically a string). possibilities is a list of sequences against which to match word (typically a list of strings). Optional arg n (default 3) is the maximum number of close matches to return. n must be > 0. Optional arg cutoff (default 0.6) is a float in [0, 1]. Possibilities that don't score at least that similar to word are ignored. The best (no more than n) matches among the possibilities are returned in a list, sorted by similarity score, most similar first. >>> get_close_matches("appel", ["ape", "apple", "peach", "puppy"]) ['apple', 'ape'] >>> import keyword as _keyword >>> get_close_matches("wheel", _keyword.kwlist) ['while'] >>> get_close_matches("Apple", _keyword.kwlist) [] >>> get_close_matches("accept", _keyword.kwlist) ['except'] """ if not n > 0: raise ValueError("n must be > 0: %r" % (n, )) if not 0.0 <= cutoff <= 1.0: raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff, )) result = [] s = _SequenceMatcher() s.set_seq2(supercasefold(word)) for x in possibilities: x_ = supercasefold(x) s.set_seq1(x_) if s.real_quick_ratio() >= cutoff and \ s.quick_ratio() >= cutoff and \ s.ratio() >= cutoff: result.append((s.ratio(), x)) # Move the best scorers to head of list result = _nlargest(n, result) # Strip scores for the best n matches ret = [x for score, x in result] if not excpt is None: did_you_mean = "'{}' not found, did you mean {}?".format( word, " or ".join("'{}'".format(s) for s in ret)) raise excpt(did_you_mean) return ret
def case_insensitive_close_matches(word, possibilities, n=3, cutoff=0.6, excpt=None): """Get a list of the best "good enough" matches. Parameters ---------- word : str A base string for which close matches are desired possibilities : Collection[str] Word list against which to match word. n : int, default 3 Maximum number of close matches to return, must be > 0. cutoff : float, default 0.6 A float in the range [0, 1]. Possibilities that don't score at least that similar to `word` are ignored. The best (no more than n) matches among the possibilities are returned in a list, sorted by similarity score, most similar first. Examples -------- >>> case_insensitive_close_matches("appel", ["ape", "apple", "peach", "puppy"]) ['apple', 'ape'] >>> import keyword >>> case_insensitive_close_matches("wheel", keyword.kwlist) ['while'] >>> case_insensitive_close_matches("apples", keyword.kwlist) [] >>> case_insensitive_close_matches("Accept", keyword.kwlist) ['except'] >>> case_insensitive_close_matches("NonLocal", keyword.kwlist) ['nonlocal'] """ if not n > 0: raise ValueError("n must be > 0: %r" % (n,)) if not 0.0 <= cutoff <= 1.0: raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff,)) result = [] s = _SequenceMatcher() s.set_seq2(supercasefold(word)) for x in possibilities: x_ = supercasefold(x) s.set_seq1(x_) if s.real_quick_ratio() >= cutoff and \ s.quick_ratio() >= cutoff and \ s.ratio() >= cutoff: result.append((s.ratio(), x)) # Move the best scorers to head of list result = _nlargest(n, result) # Strip scores for the best n matches ret = [x for score, x in result] if not excpt is None: did_you_mean = "'{}' not found, did you mean {}?".format(word, " or ".join("'{}'".format(s) for s in ret)) raise excpt(did_you_mean) return ret
def solve(self, A, B): h = [] cnt = 0 #A = sorted(A, reverse=True) #B = sorted(B, reverse=True) d = dict() for x in product(A, B): sum_pair = sum(x) h.append(sum_pair) heapq._heapify_max(h) return heapq._nlargest(len(A), h)
def case_insensitive_close_matches(word, possibilities, n=3, cutoff=0.6, excpt=None): """Use SequenceMatcher to return list of the best "good enough" matches. word is a sequence for which close matches are desired (typically a string). possibilities is a list of sequences against which to match word (typically a list of strings). Optional arg n (default 3) is the maximum number of close matches to return. n must be > 0. Optional arg cutoff (default 0.6) is a float in [0, 1]. Possibilities that don't score at least that similar to word are ignored. The best (no more than n) matches among the possibilities are returned in a list, sorted by similarity score, most similar first. >>> get_close_matches("appel", ["ape", "apple", "peach", "puppy"]) ['apple', 'ape'] >>> import keyword as _keyword >>> get_close_matches("wheel", _keyword.kwlist) ['while'] >>> get_close_matches("Apple", _keyword.kwlist) [] >>> get_close_matches("accept", _keyword.kwlist) ['except'] """ if not n > 0: raise ValueError("n must be > 0: %r" % (n,)) if not 0.0 <= cutoff <= 1.0: raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff,)) result = [] s = _SequenceMatcher() s.set_seq2(supercasefold(word)) for x in possibilities: x_ = supercasefold(x) s.set_seq1(x_) if s.real_quick_ratio() >= cutoff and \ s.quick_ratio() >= cutoff and \ s.ratio() >= cutoff: result.append((s.ratio(), x)) # Move the best scorers to head of list result = _nlargest(n, result) # Strip scores for the best n matches ret = [x for score, x in result] if not excpt is None: did_you_mean = "'{}' not found, did you mean {}?".format(word, " or ".join("'{}'".format(s) for s in ret)) raise excpt(did_you_mean) return ret
def get_close_matches_indexes(word, possibilities, n=3, cutoff=0.6): """Use SequenceMatcher to return a list of the indexes of the best "good enough" matches. An simple example would be word = "hello", possibilities = ["hello", "i", "am", "hulk"]. This would return 0 The function is mostly taken from here: https://stackoverflow.com/questions/50861237/is-there-an-alternative-to- difflib-get-close-matches-that-returns-indexes-l :param word: is a sequence for which close matches are desired (typically a string). :param possibilities: is a list of sequences against which to match word (typically a list of strings). :param n: (default 3) is the maximum number of close matches to return. n must be > 0. :param cutoff: (default 0.6) is a float in [0, 1]. Possibilities that don't score at least that similar to word are ignored. :return: a list with indices of the close matches """ if not n > 0: raise ValueError("n must be > 0: %r" % (n, )) if not 0.0 <= cutoff <= 1.0: raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff, )) result = [] sequence_matcher = SequenceMatcher() # we set the second sequence in sequence matcher (it has slots 1 and 2) sequence_matcher.set_seq2(word) for idx, possibility in enumerate(possibilities): # set the second slot for comparison sequence_matcher.set_seq1(possibility) # check whether they are close enough if sequence_matcher.real_quick_ratio() >= cutoff and \ sequence_matcher.quick_ratio() >= cutoff and \ sequence_matcher.ratio() >= cutoff: result.append((sequence_matcher.ratio(), idx)) # Move the best scorers to head of list result = _nlargest(n, result) # Strip scores for the best n matches return [x for score, x in result]
def get_close_matches_indexes(word, possibilities, n=3, cutoff=0.6): if not n > 0: raise ValueError("n must be > 0: %r" % (n, )) if not 0.0 <= cutoff <= 1.0: raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff, )) result = [] s = SequenceMatcher() s.set_seq2(word) for idx, x in enumerate(possibilities): s.set_seq1(x) if s.real_quick_ratio() >= cutoff and \ s.quick_ratio() >= cutoff and \ s.ratio() >= cutoff: result.append((s.ratio(), idx)) result = _nlargest(n, result) return [x for score, x in result]
def main(list, file_path): """ :param list: list with top k documents names :param file_path: path to stemmed corpus :return: temp2: list with most cooccured terms """ print len(list) temp = [] ngram = 1 for x in range(len(list)): with open(str(file_path) + str(list[x]), 'r') as f: for line in f: list1 = [] list2 = [] for word in line.split(): temp.append(word) a = find_ngrams(temp, ngram) for y in range(len(a)): if list1.__contains__(a[y]): list2[list1.index(a[y])] += 1 else: list1.append(a[y]) list2.append(1) outF = open("term_frequency_table_" + str(ngram) + "_ngram.txt", "w") a = heapq._nlargest(len(list2), zip(list2, list1)) b = heapq._nsmallest(len(list2), zip(list2, list1)) for l in a: outF.write(str(l)) outF.write("\n") outF.close() temp2 = [] for x in range(0, len(b)): temp2.append(b.pop()[1]) return temp2