def calculate_inverted_index(n_gram, dgaps, filtered_file_terms, filtered_file): # check the value of n_gram and if the value is 1, then check if dgaps is enabled # dgap calculation is only to be performed for n_gram value = 1 if n_gram == '1': for term in filtered_file_terms: count = 0 # check for dgaps enabled or not if dgaps: term_positions = dgaps_encode( term, list(locate(filtered_file_terms, lambda x: x == term))) else: term_positions = list( locate(filtered_file_terms, lambda x: x == term)) if term not in inverted_index_with_term_positions: inverted_index_with_term_positions[term] = [[ filtered_file, len(list(locate(filtered_file_terms, lambda x: x == term))), list(locate(filtered_file_terms, lambda x: x == term)) ]] else: for i in range(len(inverted_index_with_term_positions[term])): if inverted_index_with_term_positions[term][i][ 0] == filtered_file: break else: count = count + 1 if count == len(inverted_index_with_term_positions[term]): inverted_index_with_term_positions[term].append( [filtered_file, len(term_positions), term_positions])
def parse_output(out): """Helper to parse output (stdout/stderr) and return a dictionary that includes the result plus line indices for compilation and attachments.""" lines = out.splitlines() compile_list = list(locate(lines, lambda l: COMPILE_STRING in l)) attach_list = list(locate(lines, lambda l: ATTACH_STRING in l)) return {'result': lines[-1], 'compile': compile_list, 'attach': attach_list}
def process_char2word(doc, ent_seqs, rel_seqs): sentences = doc.split('\n') sentence_list = [] start_idx = 0 end_idx = 0 word_range = [] in_word = False newline_list = list(mit.locate(doc, lambda x: x == "\n")) space_list = list(mit.locate(doc, lambda x: x == " ")) newline_and_space_list = sorted(newline_list+space_list) for s in sentences: s = s.strip() sentence_list.append(s.split(' ')) sentence_list = list(filter(lambda x: x!= [''], sentence_list)) remove_space_list = [] for sent in sentence_list: sent = list(filter(lambda x: x!= '', sent)) for i,s in enumerate(sent): if len(s.split())>1: sent[i] = s.split()[0] remove_space_list.append(sent) for i in range(len(doc)): if i not in newline_and_space_list and in_word==False: start_idx = i in_word = True elif i in newline_and_space_list and in_word==True: end_idx = i in_word = False word_range.append((start_idx, end_idx)) ent_lists = [] rel_lists = [] for wr in word_range: ent_lists.append(ent_seqs[wr[0]]) if ent_seqs[wr[0]]!='': if ent_seqs[wr[0]][0]=='L' or ent_seqs[wr[0]][0]=='U': rel_lists.append(rel_seqs[wr[0]]) else: rel_lists.append('') else: rel_lists.append('') return remove_space_list, ent_lists, rel_lists
def findAllIndexesOfValue(arr_list, w): ## Documentation for Lambda function # using the locate function from more_itertools library # find all indexes of the given number with a lambda # comparison wl = list(locate(arr_list, lambda a: a == w)) return wl
def sort_realignments(realignments, sequences): """returns multiple sequence alignments Parameters ---------- realignments : list aligned multiple sequences sequences : list list of sequences with fixed order Returns ------- realignments_sort : list sorted sequences with the same oder as input """ ## find name for realigned seqs sequences = [''.join(acts) for acts in sequences] realignments = [''.join(acts) for acts in realignments] realigns = [i.replace('-', '') for i in realignments] indexes = [] locate_list = [list(locate(sequences, lambda a: a == i)) for i in realigns] for ps in locate_list: for p in ps: if p not in indexes: indexes.append(p) break else: continue realignments_sort = [x for _,x in sorted(zip(indexes, realignments))] return realignments_sort
def find_edges(tiles): all_edges = [] for tile in tiles: for edge in tile.edges: all_edges.append(edge) print(all_edges) for i, tile in enumerate(tiles): print("~~~~~ {} ~~~~~ {} ~~~~~".format(tile.id, i)) cnt_unique = 0 poss_neighbours = [] for edge in tile.edges: edge_ids = list( locate(all_edges, lambda x: x == edge or x == reverse(edge))) overlap = [x // 4 for x in edge_ids] #print("{} --> {} --> {}".format(edge_ids, overlap, len(edge_ids))) if len(edge_ids) == 1: cnt_unique += 1 tile.unique_sides.append(edge) tile.unique_sides.append(reverse(edge)) for _id in overlap: if _id != i: poss_neighbours.append(_id) print("Has {} uniqe edges.".format(cnt_unique)) if cnt_unique == 1: tile.is_side = True if cnt_unique == 2: tile.is_edge = True print("Possible Neighbours: {}".format(poss_neighbours)) tile.neighbours = poss_neighbours.copy()
def select(self, k: int) -> int: """ Return k-th integer stored in this Elias-Fano structure. :param k: index of integer to be reconstructed. :return: k-th stored integer """ # if we are not at the last level if bool(self._level_2): # determine the 'h'-th lvl-2 bucket_id containing the k-th element's lower half # note that 'h' is a list index - NOT a prefix label h = first(locate(accumulate(map(lambda e: len(e), self._level_2.values())), lambda popcnt: popcnt > k)) # determine number 'l' of elements contained in lvl-2 buckets [0..h) l = sum(map(lambda e: len(e), islice(self._level_2.values(), 0, h))) # the '(k-l)'-th element from lvl-2 index with prefix_label['h'] must be the 'k'-th element's lower half inf = self._level_2[nth(self._level_2.keys(), h)].select(k - l) # get the 'k'-th element's upper half from lvl-1 and left_shift appropriately sup = self._level_1.select(h) << (int(math.log2(self._u)) - self._b) # return the combined upper and lower halves return sup + inf # if at last level else: # we don't have a lvl-2 index and therefore return the 'k-th element return self._level_1.select(k)
def fishers_score(data, labels): data_length = len(data) list_of_classes = [] for label in labels: if label not in list_of_classes: list_of_classes.append(label) number_of_classes = len(list_of_classes) print('Data contains: ', number_of_classes, ' classes.') numerator = 0 denominator = 0 columns = data.columns fishers_score_frame = pd.DataFrame(columns=columns) for column in columns: column_mean = np.mean(data.loc[:, column]) numerator = 0 denominator = 0 for label in list_of_classes: indexes = list(locate(labels, lambda x: x == label)) class_in_data = data.loc[indexes, column] class_mean = np.mean(class_in_data) class_std = np.std(class_in_data) class_proportion = len(indexes) / data_length numerator = numerator + class_proportion * (class_mean - column_mean)**2 denominator = denominator + class_proportion * class_std**2 if denominator != 0: fishers_score_frame.loc[0, column] = numerator / denominator else: fishers_score_frame.loc[0, column] = 0 print("Fisher's score(s) has/have been computed.") return fishers_score_frame
def match(self, value, ignore): # split value_bits into upper_bits and lower_bits sup_value, inf_value = self._split(value) # split dont_care_bits into upper_bits and lower_bits sup_ignore, inf_ignore = self._split(ignore) # filter matching upper halves in self._superiors if self._upper_bits > 0: sup_matches = list(filter(lambda idx: (idx & sup_ignore) == (sup_value & sup_ignore), locate(self._superiors, pred=lambda cnt: cnt > 0))) # for each matching upper half, we pair it with its matching lower halves matching_elements = list(map(lambda sup_match: (sup_match, filter( lambda inf: (inf & inf_ignore) == ( inf_value & inf_ignore), islice(self._inferiors, self._superiors_prefixSums[sup_match] - self._superiors[sup_match], self._superiors_prefixSums[sup_match]))), sup_matches)) # reconstruct stored integer return chain.from_iterable( map(lambda pair: map(lambda inf: (pair[0] << self._lower_bits) + inf, pair[1]), matching_elements) ) else: return filter(lambda low: (low & inf_ignore) == (inf_value & inf_ignore), self._inferiors)
def scatter_wrapper_3D(large_data_sample, large_labels_set, first_three_columns): list_of_classes = [] for label in large_labels_set: if label not in list_of_classes: list_of_classes.append(label) label_encoder = LabelEncoder() label_encoder.fit(list_of_classes) encoded_labels = label_encoder.transform(list_of_classes) large_labels_set_encoded = label_encoder.transform(large_labels_set) large_label_set_colored = mpl.cm.Set1(large_labels_set_encoded) figure_1 = plt.figure() ax = figure_1.add_subplot(111, projection='3d') for class_label in list_of_classes: indexes = list(locate(large_labels_set, lambda x: x == class_label)) class_in_data = large_data_sample.loc[indexes, :] ax.scatter(large_data_sample.loc[indexes, first_three_columns[0]], large_data_sample.loc[indexes, first_three_columns[1]], large_data_sample.loc[indexes, first_three_columns[2]], marker='o', s=15, c=large_label_set_colored[indexes], edgecolors=large_label_set_colored[indexes], label=class_label, alpha=0.99) ax.legend(loc='upper right', bbox_to_anchor=(1.1, 1.05)) figure_1.suptitle('Benign traffic: three "best" features') ax.set_xlabel(first_three_columns[0]) ax.set_ylabel(first_three_columns[1]) ax.set_zlabel(first_three_columns[2]) plt.show() path_to_results = "/Users/sven/data/IoT_Israeli_processed/" ffname = path_to_results + list_of_classes[0]+'against_complement.png' figure_1.savefig(ffname) print("function: scatter_wrapper_3D: That's all folks!!!")
def check_completed_dev(self, dev): ndx = list( mit.locate(self.status_handle, pred=lambda d: d["sn"] == dev))[0] if self.status_handle[ndx][ "completed"] is None: # if run_state is empty return None # slot is free, nothing completed else: return self.status_handle[ndx]["completed"]
def update(frame): global run_color ax.clear() color_map = ['green'] * lenght_G index_change = list(locate(list_G, lambda a: a == DFS_path[run_color]))[0] color_map[index_change] = 'red' nx.draw(G, node_color=color_map, pos=pos, with_labels=True) run_color += 1 if run_color == lenght_G: run_color = 0
def get_subsequence_from_property(self, property_key, property_value, condition, return_resnums=False, copy_letter_annotations=True): """Get a subsequence as a new SeqProp object given a certain property you want to find in this chain's letter_annotation See documentation for :func:`ssbio.protein.sequence.seqprop.SeqProp.get_subsequence_from_property` Args: property_key (str): Property key in the ``letter_annotations`` attribute that you want to filter using property_value (str): Property value that you want to filter by condition (str): ``<``, ``=``, ``>``, ``>=``, or ``<=`` to filter the values by return_resnums (bool): If resnums should be returned as well Returns: SeqProp: New SeqProp object that you can run computations on or just extract its properties """ if not self.seq_record: raise ValueError('No chain sequence stored') if property_key not in self.seq_record.letter_annotations: log.error(KeyError('{}: {} not contained in the letter annotations'.format(self.seq_record.id, property_key))) return if condition == 'in': subfeat_indices = list(locate(self.seq_record.letter_annotations[property_key], lambda x: x in property_value)) else: subfeat_indices = list(locate(self.seq_record.letter_annotations[property_key], lambda x: ssbio.utils.check_condition(x, condition, property_value))) subfeat_resnums = [x + 1 for x in subfeat_indices] new_sp = self.get_subsequence(resnums=subfeat_resnums, new_id='{}_{}_{}_{}_extracted'.format(self.pdb_parent, self.id, property_key, condition, property_value), copy_letter_annotations=copy_letter_annotations) if return_resnums: return new_sp, subfeat_resnums else: return new_sp
def insert_space_around_brackets(sentence): ''' (pulmonary edema) -> ( pulmonary edema ) ''' front_bracket_list = list(mit.locate(sentence, lambda x: x == "(")) for c in reversed(front_bracket_list): sentence = sentence[:c+1]+' '+sentence[c+1:] rear_bracket_list = list(mit.locate(sentence, lambda x: x == ")")) for c in reversed(rear_bracket_list): sentence = sentence[:c]+' '+sentence[c:] front_bracket_list = list(mit.locate(sentence, lambda x: x == "[")) for c in reversed(front_bracket_list): sentence = sentence[:c+1]+' '+sentence[c+1:] rear_bracket_list = list(mit.locate(sentence, lambda x: x == "]")) for c in reversed(rear_bracket_list): sentence = sentence[:c]+' '+sentence[c:] return sentence
def update_completed_dev(self, dev, task=None): index = list( mit.locate(self.status_handle, pred=lambda d: d["sn"] == dev))[0] if task == "wifi": self.status_handle[index]["completed"].append(0) if task == "ram": self.status_handle[index]["completed"].append(1) if task == "lan": self.status_handle[index]["completed"].append(2) return self.status_handle
def check_validity2(_input) -> bool: """ checks password given a password and it's letter + index requirement :param _input: of the form "x0-x1 l: <password>" where l is the required letter for the password to be valid, x0 and x1 are required positions that l much be in the password (exactly one of the 2 positions needs to be satisfied) and <password> is obviously the password to check """ x0, x1, l, p = extract_vars_pt2(_input) indices = list(locate(p, lambda x: x == l)) return bool(x0 in indices) ^ bool(x1 in indices)
def __iter__(self) -> Iterator[int]: """ Support for __iter__ and next """ if self._upper_bits: if self._lower_bits: # iterate elements in _superiors, _superiors[k] is the numbers of elements in _inferiors # to fetch and combine with k as their upper_half _inferiors_iter = iter(self._inferiors) return chain.from_iterable( map(lambda idx: [(idx << self._lower_bits) + inf for inf in islice(_inferiors_iter, self._superiors[idx])], locate(self._superiors, pred=lambda cnt: cnt > 0))) else: return map(lambda sup: (sup << self._lower_bits), locate(self._superiors, pred=lambda cnt: cnt > 0)) else: if self._lower_bits: return iter(self._inferiors) else: raise ValueError("Empty index!")
def mark_winning_lines(self): b = self._board lines = (tuple(b[c][r] for c, r in line) for line in _default_indices) for line_idx in locate(lines, _is_full): indices = _default_indices[line_idx] winner = b[indices[0][0]][indices[0][1]] emoji = _winning_tiles[winner] for c, r in indices: # TODO: Custom emojis for tiles? b[c][r] = emoji
def move_row(self, row): ''' Moves one row according to the rules of the game 2048 example [0, 2, 0, 2] => [4, 0, 0, 0] to test [2, 2, 2, 2] => [4, 4, 0, 0] to test [4, 2, 2, 0] => [4, 4, 0, 0] ''' # Move all numbers to left. (or remove all 0's) all_numbers = list(locate(row)) row_with_nmrs = [row[i] for i in all_numbers] if len(row_with_nmrs) > 1: for i in range(len(row_with_nmrs) - 1): if row_with_nmrs[i] == row_with_nmrs[i + 1]: row_with_nmrs[i] *= 2 self.move_score += row_with_nmrs[i] row_with_nmrs[i + 1] = 0 all_numbers = locate(row_with_nmrs) row = [row_with_nmrs[i] for i in all_numbers] # to add 0's at the end new_row = [0, 0, 0, 0] new_row[:len(row)] = row return new_row
def get_2020_number(in_list, stop = 2020): # 2020, 2019, or 2021 (0 or 1 as first index?) for i in range(stop-len(in_list)): #for i in range(10): last_number = in_list[-1] index_pos_list = list(locate(in_list, lambda a: a == last_number)) if len(index_pos_list) < 2: in_list.append(0) else: in_list.append(index_pos_list[-1] - index_pos_list[-2]) #print(in_list) print(in_list[-5:]) return in_list[-1]
def get_fit(population, knapsack): fit_lst = [] for chrom in population: ttl_weight, ttl_profit = tools.compute_knapsack(knapsack, chrom) while ttl_weight > knapsack['capacity'][0]: indexes = list(locate(chrom, lambda x: x == 1)) indexes = random.choices(indexes, k=int(len(chrom) * 0.3) + 1) for ind in indexes: chrom[ind] = 0 ttl_weight, ttl_profit = tools.compute_knapsack( knapsack, chrom) fit_lst.append((ttl_weight, ttl_profit)) return fit_lst
def tagger(value,word): temp = [] #Search for 10 most similar word2vec words try: values = [i[0] for i in model_wv.wv.most_similar(word)] except KeyError: return [] #Find if tag for i in range(10): temp.append(list(locate(paras[value], lambda x: x == values[i]))) return list(itertools.chain.from_iterable(temp))
def crossover(X, Y): # CX = [] # CY = [] common_stations = intersection(X, Y) nodes = random.sample(common_stations, 2) nodes = list(nodes) index_1 = [] index_2 = [] index_1.append(max(list(locate(X, lambda a: a == nodes[0])))) index_1.append(max(list(locate(X, lambda a: a == nodes[1])))) index_2.append(max(list(locate(X, lambda a: a == nodes[0])))) index_2.append(max(list(locate(X, lambda a: a == nodes[1])))) index_1.sort() index_2.sort() X_copy = X.copy() Y_copy = Y.copy() CX, CY = swap(X_copy, Y_copy, index_1, index_2) return (CX, CY)
def build_inverted_index_with_term_location(terms_in_clean_file, file): for term in terms_in_clean_file: count = 0 dgap = dgaps(list(locate(terms_in_clean_file, lambda x: x == term))) if term not in index_having_term_locations: index_having_term_locations[term] = [[file, dgap]] else: for i in range(len(index_having_term_locations[term])): if index_having_term_locations[term][i][0] == file: break else: count = count + 1 if count == len(index_having_term_locations[term]): index_having_term_locations[term].append([file, dgap])
def get_subsequence_from_property(self, property_key, property_value, condition, return_resnums=False): """Get a subsequence as a new SeqProp object given a certain property you want to find in this chain's letter_annotation See documentation for :func:`ssbio.protein.sequence.seqprop.SeqProp.get_subsequence_from_property` Args: property_key (str): Property key in the ``letter_annotations`` attribute that you want to filter using property_value (str): Property value that you want to filter by condition (str): ``<``, ``=``, ``>``, ``>=``, or ``<=`` to filter the values by return_resnums (bool): If resnums should be returned as well Returns: SeqProp: New SeqProp object that you can run computations on or just extract its properties """ if not self.seq_record: raise ValueError('No chain sequence stored') if property_key not in self.seq_record.letter_annotations: raise KeyError( '{}: {} not contained in the letter annotations'.format( self.seq_record.id, property_key)) subfeat_indices = list( locate( self.seq_record.letter_annotations[property_key], lambda x: ssbio.utils.check_condition(x, condition, property_value))) biop_compound_list = [] for idx in subfeat_indices: feat = FeatureLocation(idx, idx + 1) biop_compound_list.append(feat) sub_feature_location = CompoundLocation(biop_compound_list) sub_feature = sub_feature_location.extract(self.seq_record) new_sp = SeqProp(id='{}-{}_{}_{}_{}_extracted'.format( self.pdb_parent, self.id, property_key, condition, property_value), seq=sub_feature) new_sp.letter_annotations = sub_feature.letter_annotations if return_resnums: return new_sp, [x + 1 for x in subfeat_indices] else: return new_sp
def _get_candidates( self, util_info: BagValDict[ICaseString, InstrState]) -> Iterable[HostedInstr]: """Find candidate instructions in the donors of this sink. `self` is this instruction sink. `util_info` is the unit utilization information. """ candidates = (self._get_new_guests( pred, more_itertools.locate(util_info[pred], self._valid_candid)) for pred in self._donors) return self._pick_guests(itertools.chain.from_iterable(candidates), util_info)
def sort_arrivals(arrivals): arrivals_sorted = [] for arrival in arrivals: index_list = list( more_itertools.locate(arrivals_sorted, pred=lambda d: d["line"] == arrival["line"])) arrival["estimateArrive"] = process_time_left( arrival["estimateArrive"]) if (index_list): # False if empty arrivals_sorted[index_list[0]][ "estimateArrive"] += " " + arrival["estimateArrive"] else: arrivals_sorted.append(arrival) del index_list return arrivals_sorted
def find_index(sentence, entitys): index_list = [] ent_lists = [] for entity in entitys: ent_list = list(mit.locate(sentence, lambda x: x == entity)) ent_lists.append(set(ent_list)) for i, e_list in enumerate(ent_lists): ent_lists[i] = set([e-i for e in e_list]) result = reduce(set.intersection, ent_lists).pop() idxx = list(map(lambda x: x+result, range(len(entitys)))) return idxx
def fitness_calculator(): sum_w_list.clear() sum_v_list.clear() sum_fitness_list.clear() for ch in population: index_pos = list(locate(ch, lambda a: a == 1)) sum_w_ch = sum([weights[i] for i in index_pos]) sum_v_ch = sum([values[i] for i in index_pos]) if sum_w_ch > allowed_weight: fitness_ch = 0 else: fitness_ch = sum_v_ch sum_w_list.append(sum_w_ch) sum_v_list.append(sum_v_ch) sum_fitness_list.append(fitness_ch)
def checkNupdate(devfound, updatedict): """Check status and update result for each device """ try: if updatedict["scanInfo"]: res = updatedict["scanInfo"].copy() for i in devfound: index = list(mit.locate(res, pred=lambda d: d["sn"] == i)) devdict = res[index[0]]["macInfo"] if not any(a["scanstatus"] == '0' for a in devdict): # if value in the list of dict updatedict["scanInfo"][index[0]]["result"] = '1' # if not any fail add result success devsucess.append(i) else: updatedict["scanInfo"][index[0]]["result"] = '0' return updatedict except Exception as e: logging.warning("Device scan result update failed" + str(e))