def FindTriplicate(input_value, check_list): duplicates_list = list(duplicates(input_value)) triplicate_list = list(duplicates(duplicates_list)) if triplicate_list: check_list[1] += 1 return check_list
def validate_synapses(self): db = self.__get_db() synapse_collection = db["synapses"] all_synapses = [s for s in synapse_collection.find({})] # Check for duplicates: synapse_ids = [s["synapse_id"] for s in all_synapses] synapse_locs = [(s["x"], s["y"], s["z"]) for s in all_synapses] duplicate_synapse_ids = list(duplicates(synapse_ids)) duplicate_synapse_locs = list(duplicates(synapse_locs)) # Check that skeleton exists: skeleton_collection = db["skeletons"] unmatched_synapses = [] for synapse in all_synapses: if skeleton_collection.count_documents( {"skeleton_id": synapse["skeleton_id"]}) == 0: unmatched_synapses.append(synapse["synapse_id"]) return { "id_duplicates": duplicate_synapse_ids, "loc_duplicates": duplicate_synapse_locs, "no_skid_match": unmatched_synapses }
def mvs_north(mvs): """ Get multipole vectors on north hemisphere. Args: mvs (float array): array containing multipole vectors [theta, phi]. Returns: Float array [theta, phi] in radians. """ # First ordinary test: mvs_n = mvs[mvs[:, 0] < np.pi / 2] l = int(len(mvs) / 2) if len(mvs_n) == l: return mvs_n if len(mvs_n) == l + 1: # Find duplicates: duplicate_thetas = duplicates(mvs_n[:, 0]) if len(duplicate_thetas) == 1: result = np.delete( mvs_n, np.random.choice(np.where(mvs_n[:, 0] == duplicate_thetas)[0], size=1)[0], axis=0, ) if len(result[:, 0]) == l: return result # Delete highest value: else: result = np.delete( mvs_n, np.where(mvs_n[:, 0] == np.max(mvs_n[:, 0]))[0], axis=0, ) if len(result[:, 0]) == l: return result if len(mvs_n) == l - 1: result = mvs[mvs[:, 0] >= np.pi / 2] sorted_result = result[result[:, 0].argsort()][0:1 + 1] duplicate_thetas = duplicates(sorted_result[:, 0]) if len(duplicate_thetas) == 1: rand_theta = np.random.choice([0, 1]) result1 = np.vstack((result, sorted_result[rand_theta])) if len(result1[:, 0]) == l: return result1 else: result2 = np.vstack((result, sorted_result[0])) if len(result2[:, 0]) == l: return result2 if (len(mvs_n) != l + 1) or (len(mvs_n) != l - 1): raise ValueError("Found a bug!")
def FindDuplicates(input_value, check_list): duplicates_list = list(duplicates(input_value)) triplicate_list = list(duplicates(duplicates_list)) duplicates_list = [ value for value in duplicates_list + triplicate_list if value not in duplicates_list or value not in triplicate_list ] if duplicates_list: check_list[0] += 1 return check_list
def step(self, actions): self.players = [ limit_to_size(move(player, action, self.prob_right_direction), self.grid_size) for player, action in zip(self.players, actions) ] states = self.__get_state() is_at_goal = [ player == goal for player, goal in zip(self.players, self.goals) ] reward_is_at_goal = [-1 if x == False else 20 for x in is_at_goal] # detect a crash dup = list(unique_everseen(duplicates(self.players))) # if a player's position appears twice, add -20 to the current reward reward_is_crash = [ -19 if (player in dup) else 0 for player in self.players ] rewards = [a + b for a, b in zip(reward_is_at_goal, reward_is_crash)] done = True in is_at_goal if self.render_board: print("{}".format(self.render())) return (states, rewards, done)
def test_traverse(): """To test the traverse implementation we call gc.collect() while instances of all the C objects are still valid.""" acc = iteration_utilities.accumulate([]) app = iteration_utilities.applyfunc(lambda x: x, 1) cha = iteration_utilities.chained(int, float) cla = iteration_utilities.clamp([], 0, 1) com = iteration_utilities.complement(int) con = iteration_utilities.constant(1) dee = iteration_utilities.deepflatten([]) dup = iteration_utilities.duplicates([]) fli = iteration_utilities.flip(int) gro = iteration_utilities.grouper([], 2) ine = iteration_utilities.intersperse([], 1) iik = iteration_utilities.ItemIdxKey(10, 2) ite = iteration_utilities.iter_except(int, TypeError) mer = iteration_utilities.merge([]) nth = iteration_utilities.nth(1) pac = iteration_utilities.packed(int) par = iteration_utilities.partial(int, 10) rep = iteration_utilities.replicate([], 3) rou = iteration_utilities.roundrobin([]) see = iteration_utilities.Seen() sid = iteration_utilities.sideeffects([], lambda x: x) spl = iteration_utilities.split([], lambda x: True) sta = iteration_utilities.starfilter(lambda x: True, []) suc = iteration_utilities.successive([]) tab = iteration_utilities.tabulate(int) une = iteration_utilities.unique_everseen([]) unj = iteration_utilities.unique_justseen([]) gc.collect()
def check_duplicate_selected_match(self, annotation_name): ''' Test to check :return: ''' # check if there are double matches: there cannot be two or more system matches per annotation for one annotator all_gold_annotations = self.collect_annotations_by_annotator(annotation_name) flat_matches = {} for gold_anno_id, gold_annotations in all_gold_annotations.items(): flat_matches[gold_anno_id] = {} for gold_ann in gold_annotations: for sys_anno_id, sys_match in gold_ann.selected_match.items(): if sys_match is not None: sys_match = [self._get_annotation_from_match(m) for m in sys_match] flat_matches[gold_anno_id].setdefault(sys_anno_id, []).extend(sys_match) duplicate_entries = {} duplicates_found = False for gold_anno_id, sys_matches in flat_matches.items(): duplicate_entries[gold_anno_id] = {} for sys_anno_id, matches in sys_matches.items(): dupes = list(duplicates(matches)) if dupes: duplicates_found = True duplicate_entries[gold_anno_id][sys_anno_id] = [match for match in matches if match in dupes] return duplicates_found, duplicate_entries
def filterLatestVersionOnly(billFiles: List[str]): # For bills that are not unique, get only the latest one # Filter to get just the path before /text-versions billPaths = list(map(lambda f: f.split('/text')[0], billFiles)) print('Number of bills: ' + str(len(billPaths))) billPathsDupes = list(duplicates(billPaths)) print('Number of bills with multiple versions: ' + str(len(billPathsDupes))) billPathsUnique = list(filter(lambda f: f not in billPathsDupes, billPaths)) billFilesUnique = list( filter(lambda f: f.split('/text')[0] in billPathsUnique, billFiles)) billNumbersDupes = list( dict.fromkeys( filter( None, map(getBillNumberFromCongressScraperBillPath, billPathsDupes)))) latestBillVersions = list(map(getLatestBillVersion, billNumbersDupes)) print('Number of latestBillVersions: ' + str(len(latestBillVersions))) billFilesDupes = [ os.path.join(getBillPath(version), 'text-versions', re.sub(r'[0-9]+[a-z]+[0-9]+', '', version), 'document.xml') for i, version in enumerate(latestBillVersions) ] billFilesFiltered = billFilesUnique + billFilesDupes print('Number of bills (latest versions): ' + str(len(billFilesFiltered))) return billFilesFiltered
def add_options_to_parser(parser: Cmd2ArgumentParser): opts = get_all_options() shortcuts = [opt['shortcut'] for opt in opts.values() if 'shortcut' in opt] duplicate_shortcuts = set(duplicates(shortcuts)) assert not duplicate_shortcuts, f'the following shortcut commands are duplicates {",".join(duplicate_shortcuts)}' for opt_name, opt in opts.items(): if opt.get('type') is None: continue # internal option if 'shortcut' in opt: names = [f'-{opt["shortcut"]}'] else: names = [] if opt.get('type') == bool: parse_options = {'help': opt.get('help'), 'action': 'store_true'} if opt.get('default'): names = [f'--no_{opt_name}'] else: names.append(f'--{opt_name}') parser.add_argument(*names, **parse_options) else: parse_options = { k: opt.get(k) for k in ['type', 'choices', 'help'] } names.append(f'--{opt_name}') parser.add_argument(*names, **parse_options)
def validate_hemi_lineages(self): db = self.__get_db() hemi_lineage_collection = db["hemi_lineages"] all_hemi_lineages = [h for h in hemi_lineage_collection.find({})] # Check for duplicates: hemi_lineage_ids = [h["hemi_lineage_id"] for h in all_hemi_lineages] hemi_lineage_names = [ h["hemi_lineage_name"] for h in all_hemi_lineages ] duplicate_hemi_lineage_ids = list(duplicates(hemi_lineage_ids)) duplicate_hemi_lineage_names = list(duplicates(hemi_lineage_names)) return { "id_duplicates": duplicate_hemi_lineage_ids, "name_duplicates": duplicate_hemi_lineage_names }
def test_duplicates_getter1(): t = duplicates([T(1), T([0, 0]), T(3), T(1)]) assert not t.seen assert t.key is None assert next(t) == T(1) assert T(1) in t.seen assert T(3) in t.seen assert T([0, 0]) in t.seen assert t.key is None
def test_duplicates_unhashable1(): assert list(duplicates([{ T(1): T(1) }, { T(2): T(2) }, { T(1): T(1) }])) == [{ T(1): T(1) }]
def part1(text): def process(line): x, y, width, height = map(int, r.match(line).groups()) return starmap(complex, product(range(x, x + width), range(y, y + height))) r = re.compile(r"#\d+ @ (\d+),(\d+): (\d+)x(\d+)") return len( set(duplicates(chain.from_iterable(map(process, text.splitlines())))))
def _check_no_duplicate_object_ids(self): """ Checks if all object IDs are unique. Raises: ValueError: if duplicate IDs were found """ duplicate_obj_ids = list(unique_everseen(duplicates(self.object_ids))) if duplicate_obj_ids: raise PVConfigurationException( f'User configuration entries contain duplicate' f'object IDs: {duplicate_obj_ids}')
def limite_z(self, position_coeur, position_sein): List_z_coeur = [] List_z_Sein = [] List_tot_z = [] for i in position_coeur: List_z_coeur.append(i[2]) for j in position_sein: List_z_Sein.append(j[2]) List_z_coeur = list(unique_everseen(duplicates(List_z_coeur))) List_z_Sein = list(unique_everseen(duplicates(List_z_Sein))) slice_barycenter = int( np.ceil((np.min(List_z_Sein) + np.max(List_z_Sein)) / 2)) List_tot_z = List_z_Sein + List_z_coeur mini = np.min(List_tot_z) maxi = np.max(List_tot_z) # print("Mini = ", mini, "Maxi = ",maxi) return mini, maxi, slice_barycenter
def exercise159(filename): end_of_sentence = ".?!" line_counter = 0 prev_line = "" with open(filename, 'r') as f: for line in f.readlines(): if line[-1] in end_of_sentence: prev_line = "" sentence = line.split() if list(duplicates(sentence)) == sentence: continue else: print('Duplicate word in line % d' % line_counter) else: prev_line += line line_counter += 1
def find_repeated_info(df): global merged global info_with_multiple_userid_ls # find duplicate entries with many userid df_colunm1_ls = df[df.columns[1]].to_list() info_with_multiple_userid_ls = list( unique_everseen(duplicates(df_colunm1_ls))) # construct new df, to cut down on the number of rows info_with_multiple_userid = pd.DataFrame(info_with_multiple_userid_ls, columns=["info"]) info_with_multiple_userid.head() merged = pd.merge(info_with_multiple_userid, df, how="left", left_on="info", right_on=df.columns[1]).drop(df.columns[1], axis=1)
def validate_skeletons(self): db = self.__get_db() skeleton_collection = db["skeletons"] all_skeletons = [s for s in skeleton_collection.find({})] # Check for duplicates: skeleton_ids = [s["skeleton_id"] for s in all_skeletons] duplicate_skeleton_ids = list(duplicates(skeleton_ids)) # Check that hemi_lineage exists: hemi_lineage_collection = db["hemi_lineages"] unmatched_skeletons = [] for skeleton in all_skeletons: if hemi_lineage_collection.count_documents( {"hemi_lineage_id": skeleton["hemi_lineage_id"]}) == 0: unmatched_skeletons.append(skeleton["skeleton_id"]) return { "id_duplicates": duplicate_skeleton_ids, "no_hlid_match": unmatched_skeletons }
def printOverallNetworkStatistic(): #Global Variables: global tbs global mfb #Compute the total similarity average of all social troll bot networks used in the 2016 elections: tsa = sum(tbs) / (len(tbs)) #Obtain the list of most frequent commonly used social troll bots used in the 2016 elections: mfb = list(unique_everseen(duplicates(mfb))) #Print Statements: print( "The average % of similarty of all politically social troll bot networks used in the 2016 elections = " + str(tsa) + "%") print( "The list of the most commonly used social troll bots used in the 2016 elections: " + str(mfb)) print( "The no. of most commonly used social troll bots used in the 2016 elections: " + str(len(mfb))) return True
def query(self, vector, radius=1, top_k=5): res_indices = [] ## Need to improve index calculations indices = vector.dot(self.base_vector.T).reshape(self.num_tables, -1) > 0 if radius == 0: res_indices = indices.dot(2**np.arange( self.n_vectors)) + np.arange( self.num_tables) * 2**self.n_vectors elif radius == 1: clone_indices = indices.repeat(axis=0, repeats=self.n_vectors) rel_indices = (np.arange(self.num_tables) * 2**self.n_vectors).repeat(axis=0, repeats=self.n_vectors) translate = np.tile(np.eye(self.n_vectors), (self.num_tables, 1)) res_indices = (np.abs(clone_indices - translate).dot(2**np.arange( self.n_vectors)) + rel_indices).astype(int) res_indices = np.concatenate([ res_indices, indices.dot(2**np.arange(self.n_vectors)) + np.arange(self.num_tables) * 2**self.n_vectors ]) start = time.time() lst = self.hash_table[res_indices].tolist() self.lookup_index_times.append(time.time() - start) start = time.time() res = list(unique_everseen(duplicates(flatten(lst)))) sim_scores = vector.dot(self.vectors[res].T) max_sim_indices = sim_scores.argsort()[-top_k:][::-1] max_sim_scores = sim_scores[max_sim_indices] return [(self.names[res[i]], score) for i, score in zip(max_sim_indices, max_sim_scores)]
def test_duplicates_failure6(): # Failure (no TypeError) when trying to hash the value with pytest.raises(_hf.FailHash.EXC_TYP, match=_hf.FailHash.EXC_MSG): list(duplicates([T(1), _hf.FailHash()]))
def test_duplicates_failure5(): # Failure when comparing the object to the objects in the list with pytest.raises(_hf.FailEqNoHash.EXC_TYP, match=_hf.FailEqNoHash.EXC_MSG): list(duplicates([[T(1)], _hf.FailEqNoHash()]))
def test_duplicates_failure4(): # Too few arguments with pytest.raises(TypeError): duplicates()
def test_duplicates_failure3(): # Test that a failing iterator doesn't raise a SystemError with pytest.raises(_hf.FailNext.EXC_TYP, match=_hf.FailNext.EXC_MSG): next(duplicates(_hf.FailNext()))
def test_duplicates_failure2(): with pytest.raises(TypeError): list(duplicates([T(1), T(2), T(3), T('a')], abs))
def test_duplicates_failure1(): with pytest.raises(_hf.FailIter.EXC_TYP, match=_hf.FailIter.EXC_MSG): duplicates(_hf.FailIter())
def test_duplicates_getter2(): t = duplicates([T(1), T([0, 0]), T(3), T(1)], key=iteration_utilities.return_identity) assert t.key is iteration_utilities.return_identity
def test_duplicates_unhashable3(): assert list( duplicates([[T(1), T(1)], [T(1), T(2)], [T(1), T(3)]], operator.itemgetter(0))) == [[T(1), T(2)], [T(1), T(3)]]
def test_duplicates_unhashable2(): assert list(duplicates([[T(1)], [T(2)], [T(1)]])) == [[T(1)]]
def test_duplicates_key2(): assert list(duplicates([T(1), T(1), T(-1)], abs)) == toT([1, -1])