def index_jobs(jobs): index = {'title': defaultdict(list), 'company': defaultdict(list) } for job in jobs: title_combinations = combinations(job['title'].lower()) company_combinations = combinations(job['company'].lower()) for title in title_combinations: index['title'][title].append(job['id']) for company in company_combinations: index['company'][company].append(job['id']) return index
def test_combinations(): s = [ "A", "B", "C", "D" ] assert [["A"], ["B"], ["C"], ["D"]] == combinations(s, 1) assert [['A', 'B'], ['A', 'C'], ['A', 'D'], ['B', 'C'], ['B', 'D'], ['C', 'D']] == combinations(s, 2) assert [['A', 'B', 'C'], ['A', 'B', 'D'], ['A', 'C', 'D'], ['B', 'C', 'D']] == combinations(s, 3) assert [['A', 'B', 'C', 'D']] == combinations(s, 4)
def pair_prob(self, rel_label, irr_label): # Actually, it doesn't matter if rel_label or irr_label is relevant m = self.num_classes k = self.k d = self.d rho = self.rho topkYhat = self.topkYhat if rel_label in topkYhat and irr_label in topkYhat: if d > k - 2: return 1 - rho assert d <= k - 2 numerator = utils.combinations(k - 2, d) denominator = utils.combinations(k, d) return 1 - rho + rho * numerator / denominator elif rel_label in topkYhat or irr_label in topkYhat: if d == k: return rho assert d < k numerator_topk = utils.combinations(k - 1, d) denominator_topk = utils.combinations(k, d) result = numerator_topk / denominator_topk numerator_Ntopk = utils.combinations(m - k, d - 1) denominator_Ntopk = utils.combinations(m - k, d) result *= numerator_Ntopk / denominator_Ntopk return rho * result else: numerator = utils.combinations(m - k, d - 2) denominator = utils.combinations(m - k, d) return rho * numerator / denominator
def screen_by_t(a_base, B, t, equal_list): clearfile(f"lib/{B}/{t}/t_signs_{t}_{B}.txt") ### Посчет времени работы start_time = time.time() ### screening_list = [] for item in equal_list: # item - простые числа с одинаковой сигнатурой if len(item.primes) >= t - 1 and item.primes[0] > a_base[-1]: # берем больше, так как позже будем проверять по группам p1*p2*...*p(t-1)^2<B combine = combinations(item.primes, t - 1) # в порядке возрастания for prms in combine: prod = np.prod(prms) * prms[-1] if prod < B: screening_list.append(Signature(item.sign, prms)) ### total_time = "--- %s seconds ---\n" % (time.time() - start_time) ### ### Запись в файл for j in range(len(screening_list)): s = f"{j} {screening_list[j].sign} {screening_list[j].primes}\n" writefile(f"lib/{B}/{t}/t_signs_{t}_{B}.txt", s) writefile(f"lib/{B}/{t}/t_signs_{t}_{B}.txt", total_time) return screening_list
def smart_brute_force(slice_numbers, max_slices): """Too long in runtime""" opt_val = 0 opt_ind = [] for R in range(1, len(slice_numbers) + 1): opt_, opt_ind_ = combinations(slice_numbers, R, max_slices) if opt_ > opt_val: opt_val = opt_ opt_ind = opt_ind_ return opt_ind, opt_val
def generateChords(noteRange=(3, 4), maxWidth=1.999): for last in utils.integers(1): # last harmonic present in a chord first = int(math.ceil(last / maxWidth)) harms = range(first, last) # harmonics to choose if len(harms) == 0: continue for count in range(noteRange[0] - 1, noteRange[1]): # (3, 4) -> 2..3 if len(harms) < count: continue for c in utils.combinations(harms, count): chord = c + (last, ) if coprimes(*chord): yield chord
def __init__(self, dataset, n=-1, d=-1): """ Initialize the data frame and all possible preference pairs (pairs_index) :param dataset: string, name of the data set :param n: int, number of instances to consider :param d: int, number of attributes to consider """ self.X = utils.read_data_IL(dataset, n, d) self.nmax, self.dmax = self.X.shape self.n = self.nmax if n == -1 else n self.d = self.dmax if d == -1 else d self.pairs_index = utils.combinations(self.n)
def test_generator(self, m): """ Initialize testing pairs (list of indices) :param m: int, number of preferences :return: list of tuples, testing pairs which are different from training pairs """ n1 = 0 if self.n == self.nmax else self.n + 1 self.pairs_index = np.array([ (i, j) for (i, j) in utils.combinations(self.nmax, n1) ]) replace = True if m > len(self.pairs_index) else False idx = np.random.choice(len(self.pairs_index), m, replace=replace) pairs = self.pairs_index[idx] self.testing_pairs = tuple(map(tuple, pairs)) return self.testing_pairs
def t_more_3(a_base, B, t, primes_list): clearfile(f"res/jae/{t}/{a_base}/spsp_{B}.txt") spsp = [] ### Посчет времени работы start_time = time.time() ### i = 1 equal_list = parsefile(f"lib/equal/{a_base}/equal_signs.txt") for item in equal_list: # item - простые числа с одинаковой сигнатурой if len(item.primes) >= t - 1 and item.primes[0] > a_base[-1]: # берем больше, так как позже будем проверять по группам p1*p2*...*p(t-1)^2<B combine = combinations(item.primes, t - 1) # в порядке возрастания for prms in combine: prod = np.prod(prms) if prod * prms[-1] < B: a = a_base[0] mu = Lambda_list([a], prms) if gcd(mu, prod) > 1: continue else: import gmpy2 c = gmpy2.powmod(prod, -1, mu) for pt in primes_list: if pt > prms[ -1] and pt <= B / prod and pt % mu == c: if psp(a_base, pt * prod) and check_signs( a_base, [pt, prms[-1]]): item = Signature(Sign(a_base, pt), prms + [pt]) s = f"{i} {np.prod(item.primes)} {item.primes} {item.sign}\n" writefile( f"res/jae/{t}/{a_base}/spsp_{B}.txt", s) i += 1 spsp.append(item) else: break # к другому item'у т.к. combine упорядочен вертикально и горизонтально ### total_time = "--- %s seconds ---\n" % (time.time() - start_time) ### writefile(f"res/jae/{t}/{a_base}/spsp_{B}.txt", total_time) return spsp
def original_solution(): """ original_solution took 584.394 ms 584.394 ms (write is_int inline instead of as a function call) 741.234 ms (build a table of funcs instead of eval inline) 13419.094 ms (intuition: solution won't have a 0 in it (useless!)) 20296.724 ms (intuition: solution needs to have 1 in it) 50730.742 ms (save list of all operator combos instead of dynamic generation) 51467.405 ms (format instead of 3 string replaces) 53080.543 ms (essential set of combos) 91008.076 ms (initial) The answer (original) is: 1258 """ # all possible combinations of operators olist = [p for p in product(['+', '-', '*', '/'], repeat=3)] # all possible parenthesizations combos = [ '(a %c (b %c c)) %c d', '((a %c b) %c c) %c d', 'a %c (b %c (c %c d))', 'a %c ((b %c c) %c d)', '(a %c b) %c (c %c d)' ] # all possible functions funcs = [ eval('lambda a,b,c,d : %s' % (c % o)) for c in combos for o in olist ] m, answer = 0, '' for numbers in combinations(xrange(1, 10), 4): if not 1 in numbers: continue # intuition about requirements for solution outcomes = set() for a, b, c, d in permutations(numbers): for f in funcs: try: n = f(a, b, c, d) if 0 < n and int(n) == n: outcomes.add(n) except ZeroDivisionError: pass lcr = largest_continuous_range( sorted(outcomes)) #lcr = largest_continuous_range_new(outcomes) if m < lcr: m, answer = lcr, ''.join(map(str, numbers)) print 'new max: %d from %s' % (m, answer) return answer
def get_phonemes(word): u""" >>> get_phonemes("hello") [[u'HH', u'AH0', u'L', u'OW1'], [u'HH', u'EH0', u'L', u'OW1']] >>> get_phonemes("world") [[u'W', u'ER1', u'L', u'D']] >>> get_phonemes("Capitalized") == [] True >>> get_phonemes("Anneleen") == [] True """ if '-' in word: subwords = word.split(u'-') return map(utils.flatten, utils.combinations(*[get_phonemes(subword) for subword in subwords])) if word.endswith(u"'s"): word = word[0:-2] stresses = stress_dict.get(word, []) if stresses is not None: stresses = map(lambda l: l + [u'S'], stresses) else: # We have repeated logic here. # We need to encapsulate stress_dict.get(word, []) to a separate function stresses = stress_dict.get(word, []) return stresses
def original_solution(): """ original_solution took 584.394 ms 584.394 ms (write is_int inline instead of as a function call) 741.234 ms (build a table of funcs instead of eval inline) 13419.094 ms (intuition: solution won't have a 0 in it (useless!)) 20296.724 ms (intuition: solution needs to have 1 in it) 50730.742 ms (save list of all operator combos instead of dynamic generation) 51467.405 ms (format instead of 3 string replaces) 53080.543 ms (essential set of combos) 91008.076 ms (initial) The answer (original) is: 1258 """ # all possible combinations of operators olist = [p for p in product(['+', '-', '*', '/'], repeat=3)] # all possible parenthesizations combos = ['(a %c (b %c c)) %c d', '((a %c b) %c c) %c d', 'a %c (b %c (c %c d))', 'a %c ((b %c c) %c d)', '(a %c b) %c (c %c d)'] # all possible functions funcs = [eval('lambda a,b,c,d : %s' % (c % o)) for c in combos for o in olist] m, answer = 0, '' for numbers in combinations(xrange(1, 10), 4): if not 1 in numbers: continue # intuition about requirements for solution outcomes = set() for a,b,c,d in permutations(numbers): for f in funcs: try: n = f(a,b,c,d) if 0 < n and int(n) == n: outcomes.add(n) except ZeroDivisionError: pass lcr = largest_continuous_range(sorted(outcomes)) #lcr = largest_continuous_range_new(outcomes) if m < lcr: m, answer = lcr, ''.join(map(str, numbers)) print 'new max: %d from %s' % (m, answer) return answer
def test_3_tokens(self): new_york_combinations = list(combinations('New York City')) self.assertEqual(len(new_york_combinations), 3) self.assertEqual(new_york_combinations[0], 'New') self.assertEqual(new_york_combinations[1], 'New York') self.assertEqual(new_york_combinations[2], 'New York City')
def e15(): pascal_triangle_row = lambda n: map(lambda i: combinations(n, i), xrange(n + 1)) return sum(map(lambda n: n * n, pascal_triangle_row(20)))
def brute_force_abd(datacenters, group, params): """ Find placement for ABD using brute force """ dc_ids = [int(dc.id) for dc in datacenters] mincost = 999999 min_get_cost = 0 min_put_cost = 0 read_lat = 0 write_lat = 0 selected_placement = None m_g = 0 for m, q1, q2 in params: # May pre-compute this. (though itertool is optimized) # Get possible combination of DCs (of size m) from the set of DCs possible_dcs = combinations(dc_ids, m) for dcs in possible_dcs: # Get possible combination of DCs (of size q1 and q2) from the # m-sized set of DCs. possible_quorum_dcs = [] possible_quorum_dcs.append(combinations(dcs, q1)) possible_quorum_dcs.append(combinations(dcs, q2)) # Check if the selection meets latency constraints d = [] for dc in datacenters: col = [] for _iq1 in possible_quorum_dcs[0]: #_iq1 stores indices of non-zero iq1 variables for _iq2 in possible_quorum_dcs[1]: lat = group.client_dist[int(dc.id)] * \ (max([dc.latencies[j] for j in _iq1]) + \ max([dc.latencies[k] for k in _iq2])) col.append((lat, dcs, _iq1, _iq2)) d.append(col) print(len(d), len(d[0])) for comb in product(*d): lat = 0 _get_cost = 0 _put_cost = 0 for i, val in enumerate(comb): lat += val[0] _iq1 = val[2] _iq2 = val[3] _get_cost += group.client_dist[i] * \ (sum([datacenters[i].network_cost for j in _iq1]) + \ sum([datacenters[k].network_cost for k in _iq2])) _put_cost += group.client_dist[i] * \ (group.metadata_size*sum([datacenters[i].network_cost for j in _iq1]) + \ group.object_size*sum([datacenters[k].network_cost for k in _iq2])) if lat < group.slo_read and lat < group.slo_write: # Calculate cost get_cost = group.read_ratio * group.arrival_rate * group.object_size * _get_cost put_cost = group.read_ratio * group.arrival_rate * _put_cost if (get_cost + put_cost) < mincost: mincost = get_cost + put_cost min_get_cost = get_cost min_put_cost = put_cost selected_placement = comb read_lat = lat write_lat = lat m_g = m # Calculate other costs selected_dcs = selected_placement[0][1] storage_cost = group.num_objects*sum([datacenters[i].details["storage_cost"] \ for i in selected_dcs])*group.object_size vm_cost = sum([datacenters[i].details["price"] for i in selected_dcs]) iq1 = [[0] * len(dc_ids) for _ in range(len(dc_ids))] iq2 = [[0] * len(dc_ids) for _ in range(len(dc_ids))] # Generate iq1, iq2 for i, val in enumerate(selected_placement): for j in val[2]: iq1[i][j] = 1 for j in val[3]: iq2[i][j] = 1 return (m_g, selected_dcs, iq1, iq2, read_lat, write_lat, min_get_cost, min_put_cost, storage_cost, vm_cost)
def brute_force_cas(datacenters, group, params): """ Find placement for CAS using brute force """ dc_ids = [int(dc.id) for dc in datacenters] mincost = 999999 min_get_cost = 0 min_put_cost = 0 read_lat = 0 write_lat = 0 selected_placement = None M, K = 0, 0 for param in params: # May pre-compute this. (though itertool is optimized) # Get possible combination of DCs (of size m) from the set of DCs m_g = param[0] k_g = param[1] q_sizes = param[2:] possible_dcs = combinations(dc_ids, m_g) for dcs in possible_dcs: # Get possible combination of DCs (of size q1 and q2) from the # m-sized set of DCs. possible_quorum_dcs = [] for size in q_sizes: possible_quorum_dcs.append(combinations(dcs, size)) # Check if the selection meets latency constraints d = [] for dc in datacenters: col = [] for _iq1 in possible_quorum_dcs[0]: #_iq1 stores indices of non-zero iq1 variables for _iq2 in possible_quorum_dcs[1]: for _iq3 in possible_quorum_dcs[2]: for _iq4 in possible_quorum_dcs[3]: get_lat = group.client_dist[int(dc.id)] * \ (max([dc.latencies[j] for j in _iq1]) + \ max([dc.latencies[k] for k in _iq4])) put_lat = group.client_dist[int(dc.id)] * \ (max([dc.latencies[j] for j in _iq1]) + \ max([dc.latencies[k] for k in _iq2]) + \ max([dc.latencies[m] for m in _iq3])) col.append((get_lat, put_lat, dcs, _iq1, _iq2, _iq3, _iq4)) d.append(col) print(len(d), len(d[0])) for comb in product(*d): get_lat = 0 put_lat = 0 _get_cost = 0 _put_cost = 0 for i, val in enumerate(comb): get_lat += val[0] put_lat += val[1] _iq1 = val[3] _iq2 = val[4] _iq3 = val[5] _iq4 = val[6] _get_cost += group.client_dist[i] * \ (group.metadata_size*sum([datacenters[i].network_cost for j in _iq1]) + \ (group.object_size/k_g)*sum([datacenters[k].network_cost for k in _iq4])) _put_cost += group.client_dist[i] * \ (group.metadata_size*(sum([datacenters[i].network_cost for j in _iq1]) + \ sum([datacenters[i].network_cost for k in _iq3])) + \ (group.object_size/k_g)*sum([datacenters[m].network_cost for m in _iq2])) if get_lat < group.slo_read and put_lat < group.slo_write: # Calculate cost get_cost = group.read_ratio * group.arrival_rate * _get_cost put_cost = group.read_ratio * group.arrival_rate * _put_cost if (get_cost + put_cost) < mincost: mincost = get_cost + put_cost min_get_cost, min_put_cost = get_cost, put_cost read_lat, write_lat = get_lat, put_lat selected_placement = comb M, K = m_g, k_g # Calculate other costs selected_dcs = selected_placement[0][2] storage_cost = group.num_objects*sum([datacenters[i].details["storage_cost"] \ for i in selected_dcs])*group.object_size/K vm_cost = sum([datacenters[i].details["price"] for i in selected_dcs]) iq1 = [[0] * len(dc_ids) for _ in range(len(dc_ids))] iq2 = [[0] * len(dc_ids) for _ in range(len(dc_ids))] iq3 = [[0] * len(dc_ids) for _ in range(len(dc_ids))] iq4 = [[0] * len(dc_ids) for _ in range(len(dc_ids))] # Generate iq1, iq2 for i, val in enumerate(selected_placement): for j in val[3]: iq1[i][j] = 1 for j in val[4]: iq2[i][j] = 1 for j in val[5]: iq3[i][j] = 1 for j in val[6]: iq4[i][j] = 1 return (selected_dcs, iq1, iq2, iq3, iq4, M, K, read_lat, write_lat, \ min_get_cost, min_put_cost, storage_cost, vm_cost)
def min_latency_cas(datacenters, group, params): """ Latency based heuristic """ dc_ids = [int(dc.id) for dc in datacenters] mincost = 99999999999 min_get_cost = 0 min_put_cost = 0 read_lat = 0 write_lat = 0 selected_placement = None M, K = 0, 0 storage_cost, vm_cost = 0, 0 for m_g, k_g, q1, q2, q3, q4 in params: possible_dcs = combinations(dc_ids, m_g) for dcs in possible_dcs: get_lat = 0 put_lat = 0 _get_latencies = [] _put_latencies = [] _get_cost = 0 _put_cost = 0 combination = [] for datacenter in datacenters: # Get possible combination of DCs (of size q1 and q2) from the # m-sized set of DCs. latency_list = [(d, datacenter.latencies[d]) for d in dcs] latency_list.sort(key=lambda x: x[1]) _iq1 = [l[0] for l in latency_list[:q1]] _iq2 = [l[0] for l in latency_list[:q2]] _iq3 = [l[0] for l in latency_list[:q3]] _iq4 = [l[0] for l in latency_list[:q4]] # Check if the selection meets latency constraints i = int(datacenter.id) _get_latencies.append(max([datacenter.latencies[j] for j in _iq1]) + \ max([datacenter.latencies[k] for k in _iq4])) _put_latencies.append(max([datacenter.latencies[j] for j in _iq1]) + \ max([datacenter.latencies[k] for k in _iq2]) + \ max([datacenter.latencies[m] for m in _iq3])) _get_cost += group.client_dist[i] * \ (group.metadata_size*sum([datacenters[j].network_cost for j in _iq1]) + \ (group.object_size/k_g)*sum([datacenters[k].network_cost for k in _iq4])) _put_cost += group.client_dist[i] * \ (group.metadata_size*(sum([datacenters[j].network_cost for j in _iq1]) + \ sum([datacenters[i].network_cost for k in _iq3])) + \ (group.object_size/k_g)*sum([datacenters[i].network_cost for m in _iq2])) combination.append([dcs, _iq1, _iq2, _iq3, _iq4]) get_lat = max(_get_latencies) put_lat = max(_put_latencies) if get_lat < group.slo_read and put_lat < group.slo_write: get_cost = group.read_ratio * group.arrival_rate * _get_cost put_cost = group.write_ratio * group.arrival_rate * _put_cost _storage_cost = group.num_objects*sum([datacenters[i].details["storage_cost"]/730 \ for i in dcs])*(group.object_size/k_g) _vm_cost = sum([datacenters[i].details["price"] for i in dcs]) if (get_cost + put_cost + _storage_cost + _vm_cost) < mincost: mincost = get_cost + put_cost + _storage_cost + _vm_cost min_get_cost, min_put_cost = get_cost, put_cost storage_cost, vm_cost = _storage_cost, _vm_cost read_lat, write_lat = get_lat, put_lat selected_placement = combination M, K = m_g, k_g # Calculate other costs if selected_placement is None: return None selected_dcs = selected_placement[0][0] iq1 = [[0] * len(dc_ids) for _ in range(len(dc_ids))] iq2 = [[0] * len(dc_ids) for _ in range(len(dc_ids))] iq3 = [[0] * len(dc_ids) for _ in range(len(dc_ids))] iq4 = [[0] * len(dc_ids) for _ in range(len(dc_ids))] for i, val in enumerate(selected_placement): for j in val[1]: iq1[i][j] = 1 for j in val[2]: iq2[i][j] = 1 for j in val[3]: iq3[i][j] = 1 for j in val[4]: iq4[i][j] = 1 return (M, K, selected_dcs, iq1, iq2, iq3, iq4, read_lat, write_lat, \ min_get_cost, min_put_cost, storage_cost, vm_cost)
def min_latency_abd(datacenters, group, params): """ Latency based greedy heuristic """ dc_ids = [int(dc.id) for dc in datacenters] mincost = 99999999999 min_get_cost = 0 min_put_cost = 0 read_lat = 0 write_lat = 0 selected_placement = None m_g = 0 storage_cost, vm_cost = 0, 0 for m, q1, q2 in params: # May pre-compute this. (though itertool is optimized) # Get possible combination of DCs (of size m) from the set of DCs possible_dcs = combinations(dc_ids, m) for dcs in possible_dcs: _latencies = [] #latency = 0 _get_cost = 0 _put_cost = 0 combination = [] for datacenter in datacenters: latency_list = [(d, datacenter.latencies[d]) for d in dcs] latency_list.sort(key=lambda x: x[1]) # Get possible combination of DCs (of size q1 and q2) from the # m-sized set of DCs. possible_quorum_dcs = [] _iq1 = [l[0] for l in latency_list[:q1]] _iq2 = [l[0] for l in latency_list[:q2]] # Check if the selection meets latency constraints i = int(datacenter.id) _latencies.append( max([datacenter.latencies[j] for j in _iq1])+\ max(datacenter.latencies[k] for k in _iq2)) _get_cost += group.client_dist[i] * \ (sum([datacenters[j].network_cost for j in _iq1]) + \ sum([datacenters[i].network_cost for k in _iq2])) _put_cost += group.client_dist[i] * \ (group.metadata_size*sum([datacenters[j].network_cost for j in _iq1]) + \ group.object_size*sum([datacenters[i].network_cost for k in _iq2])) combination.append([dcs, _iq1, _iq2]) latency = max(_latencies) if latency < group.slo_read and latency < group.slo_write: get_cost = group.read_ratio * group.arrival_rate * group.object_size * _get_cost put_cost = group.write_ratio * group.arrival_rate * _put_cost _storage_cost = group.num_objects*\ sum([datacenters[i].details["storage_cost"]/730 for i in dcs])*\ group.object_size _vm_cost = sum([datacenters[i].details["price"] for i in dcs]) if (get_cost + put_cost + _storage_cost + _vm_cost) < mincost: mincost = get_cost + put_cost + _storage_cost + _vm_cost storage_cost, vm_cost = _storage_cost, _vm_cost min_get_cost, min_put_cost = get_cost, put_cost selected_placement = combination read_lat, write_lat = latency, latency m_g = m # Calculate other costs if selected_placement is None: return None selected_dcs = selected_placement[0][0] iq1 = [[0] * len(dc_ids) for _ in range(len(dc_ids))] iq2 = [[0] * len(dc_ids) for _ in range(len(dc_ids))] # Generate iq1, iq2 for i, val in enumerate(selected_placement): for j in val[1]: iq1[i][j] = 1 for j in val[2]: iq2[i][j] = 1 return (m_g, selected_dcs, iq1, iq2, read_lat, write_lat, min_get_cost, min_put_cost, storage_cost, vm_cost)