def intersect_count(a: collections.Counter, b: collections.Counter) -> collections.Counter: common: collections.Counter = a & b res: collections.Counter = collections.Counter() res['true_positive'] = sum(common.values()) res['false_positive'] = sum(a.values()) - res['true_positive'] res['false_negative'] = sum(b.values()) - res['true_positive'] return res
def reorganizeString(self, S): """ :type S: str :rtype: str AAAB (3 - 1) * 2 + 1 <= 4 ((most count - 1) * (N + 1)) + Counted value <= len(S), OK. ((most count - 1) * (N + 1)) + Counted value > len(S), Doomed. """ from collections import Counter as CC c = CC(S) most_counted_char = max(c.values()) total_cnt = (most_counted_char - 1) * 2 + \ c.values().count(most_counted_char) if total_cnt > len(S): return "" """ n list of chars. pop 1 by 1 from most to least. """ # create list ll = [] for i in c.most_common(): ll.append([i[0]] * i[1]) # [[2,2,2],[3,3],[1]] # [2,2] [3] [2] [3] # result: # 2 3 2 3 2 1 result = "" # v_v_v_ # vlv_v_ # vlvov while True: flip = 0 for l in ll: if l: if flip < 2: if result and result[-1] == l[-1]: continue result += l.pop() flip += 1 else: break if flip == 0: break return result
def _score_ngrams(ngrams: collections.Counter) -> float: """Compute n-gram based repeitition scores. Args: ngrams: A Counter object mapping each ngram to number of occurrences for the text. Returns: A Score float. """ num_total = sum(ngrams.values()) num_repeated = sum([c for c in ngrams.values() if c > 1]) return num_repeated / num_total if num_total > 0 else 0
def dedup_percentile(Counter): """ Remove UMIs with counts lower than 1% of the mean """ threshold = np.mean(list(Counter.values())) / 100 return len( [umi for umi in list(Counter.keys()) if Counter[umi] > threshold])
def create_freq_wordlist(cnt: collections.Counter, threshold=0.95) -> list: nthr = int(float(sum(cnt.values())) * threshold) n = 0 freq_wordlist = [] for item in create_wordstats(cnt): freq_wordlist.append(item) n += item[1] if n > nthr: break return freq_wordlist
def Symm( smiles: str, num_attached_atoms: int, num_attached_types: int, center_hybrid: Chem.rdchem.HybridizationType, count_rankings: collections.Counter, ) -> int: """Molecular symmetry.""" try: symm = symm_rules[num_attached_atoms][num_attached_types][ center_hybrid] except: logging.warning("symmetry exception: {}".format(smiles)) symm = np.nan # special case if symm == 0: vals = list(count_rankings.values()) symm = 3 if (vals == [1, 3] or vals == [3, 1]) else 2 return symm
def plot_data(d: collections.Counter, figure_path: pathlib.Path) -> None: """ Plots a line plot for the given Counter object and saves the figure to a file. :param d: Counter object of timestamps to plot :param figure_path: Path of file to save the plot in. """ x = list(d.keys()) y = list(d.values()) plt.plot(x, y) plt.xlabel("Date") plt.ylabel("Tweets received") plt.gca().xaxis.set_major_formatter(md.DateFormatter("%Y-%m-%d %H:%M")) plt.savefig(figure_path) plt.show()
def entropy(self,word): if self.labels==None: raise ValueError('Please execute self.build_clusters() before calculating entropy(word)') word_labels = [ self.labels[i] for i,text in enumerate(self.Snap['TEXT']) if word in text] WordLabelDistribution = Counter(word_labels) print WordLabelDistribution NoOfText = sum(WordLabelDistribution.values()) WordEntropy = sum([ -(nlabelText/NoOfText)*log(nlabelText/NoOfText) for nlabelText in Counter.values() ]) return WordEntropy
# mean,sd,no.of entries incomes = np.random.normal(100.0, 20.0, 10000) incomes = np.append(incomes, 1000000) mean = np.mean(incomes) median = np.median(incomes) plt.hist(incomes, 1) plt.show() # Create random array # Most frequent value using numpy array = np.random.randint(5, 15, 40) u, indices = np.unique(array, return_inverse=True) u[np.argmax(np.bincount(indices))] # Most frequent value from collections import Counter max1 = 0 freq = Counter(array) for k, v in freq.items(): if v > max1: max1 = k print(max1) k = max(Counter.values(freq)) # Normal distributed array = np.random.normal(150, 20, 1000) standard_deviation = np.std(array) variance = np.var(array) plt.hist(array, 100, color='r') plt.show()
# Фунция ввода прибыли за 4 квартала, на входе принимает пустой массив, на выходе заполненный массив def profit_company(array): i = 1 while i < 5: profit_company = int(input(f"Введите прибыль за {i}й квартал: ")) array[i] = profit_company i += 1 return array # Заполняем первую компанию company_1 = input("Введите название первой компании: ") prof_comp_1 = {} a = profit_company(prof_comp_1) sum_company_1 = sum(Counter.values(a)) # Заполняем вторую компанию company_2 = input("Введите название второй компании: ") prof_comp_2 = {} b = profit_company(prof_comp_2) sum_company_2 = sum(Counter.values(b)) print("*" * 50) # Считаем среднюю прибыль по двум компаниям за год avarage = (sum_company_1 + sum_company_2) / 2 print(f"Средняя прибыль за год двух компаний: {avarage}") # Выводим наименование убыточного и прибыльного предприятия if sum_company_1 < sum_company_2:
def dedup_none(Counter): """ Count all UMIs as unique """ return sum(Counter.values())
def _get_percentage(counts: collections.Counter) -> List[Tuple[int, float]]: """Convert to percentage.""" total_count = float(sum(counts.values())) return [(k, v / total_count) for k, v in sorted(counts.items())]
__author__ = 'Крымов Иван' # Пользователь вводит данные о количестве предприятий, их наименования и прибыль за 4 квартал # (т.е. 4 числа) для каждого предприятия. Программа должна определить среднюю прибыль (за год для всех # предприятий) и отдельно вывести наименования предприятий, чья прибыль выше среднего и ниже среднего. from collections import Counter orgs = {} n = int(input("Введите количество организаций: ")) for i in range(n): org_name = input("Введите название организации номер " + str(i + 1) + ": ") t_profit = 0 for j in range(1, 5): profit = int(input(f"Введите прибыль за квартал " + str(j) + ": ")) t_profit += profit orgs[org_name] = int(t_profit) print(orgs) avg = sum(Counter.values(orgs))/n for el in orgs: if orgs.get(el) < avg: print("Прибыль организации {} меньше среднего {} по группе компаний".format(el, avg)) else: print("Прибыль организации {} выше среднего {} по группе компаний".format(el, avg))