def beautify_similar_files(self, similar_files, fo_list, similarity): result_dict = {} for match in similar_files: match_dict = {fo.uid: None for fo in fo_list} for file in match: firm, sub = file.split(':') match_dict[firm] = sub if convert_uid_list_to_compare_id(match) in similarity.keys(): match_dict['similarity'] = similarity[convert_uid_list_to_compare_id(match)] else: match_dict['similarity'] = '' result_dict[self._match_id(match)] = match_dict return result_dict
def _calculate_compare_result_id(compare_result): general_dict = compare_result['general'] uid_set = set() for key in general_dict: uid_set.update(list(general_dict[key].keys())) comp_id = convert_uid_list_to_compare_id(list(uid_set)) return comp_id
def _app_show_start_compare(self): if 'uids_for_comparison' not in session or not isinstance( session['uids_for_comparison'], list) or len(session['uids_for_comparison']) < 2: return render_template('compare/error.html', error='No UIDs found for comparison') compare_id = convert_uid_list_to_compare_id( session['uids_for_comparison']) session['uids_for_comparison'] = None redo = True if request.args.get('force_recompare') else None with ConnectTo(CompareDbInterface, self._config) as sc: compare_exists = sc.compare_result_is_in_db(compare_id) if compare_exists and not redo: return redirect( url_for('/compare/<compare_id>', compare_id=compare_id)) try: with ConnectTo(CompareDbInterface, self._config) as sc: sc.check_objects_exist(compare_id) except FactCompareException as exception: return render_template('compare/error.html', error=exception.get_message()) with ConnectTo(InterComFrontEndBinding, self._config) as sc: sc.add_compare_task(compare_id, force=redo) return render_template('compare/wait.html', compare_id=compare_id)
def _get_similar_files(self, fo_list: List[FileObject], exclusive_files: Dict[str, List[str]]) -> Tuple[List[list], dict]: similar_files = [] similarity = {} for parent_one, parent_two in combinations(fo_list, 2): for file_one in exclusive_files[parent_one.uid]: for similar_file_pair, value in self._find_similar_file_for(file_one, parent_one.uid, parent_two): similar_files.append(similar_file_pair) similarity[convert_uid_list_to_compare_id(similar_file_pair)] = value similarity_sets = generate_similarity_sets(remove_duplicates_from_unhashable(similar_files)) return similarity_sets, similarity
def _get_similarity_value(group_of_similar_files: List[str], similarity_dict: Dict[str, str]) -> str: similarities_list = [] for id_tuple in combinations(group_of_similar_files, 2): similar_file_pair_id = convert_uid_list_to_compare_id(id_tuple) if similar_file_pair_id in similarity_dict: similarities_list.append(similarity_dict[similar_file_pair_id]) if not similarities_list: return '' if len(similarities_list) == 1: return similarities_list.pop() similarities_list = [int(v) for v in similarities_list] return '{} ‒ {}'.format(min(similarities_list), max(similarities_list))
def _get_similar_files(self, fo_list, exclusive_files): similars = list() similarity = dict() for index, _ in enumerate(fo_list): tmp_list = deepcopy(fo_list) parent_one = tmp_list.pop(index) for parent_two in tmp_list: for file_one in exclusive_files[parent_one.uid]: for item, value in self._find_similar_file_for(file=file_one, parent_id=parent_one.uid, potential_matches=parent_two): similars.append(item) similarity[convert_uid_list_to_compare_id(item)] = value similarity_sets = self.produce_similarity_sets(remove_duplicates_from_list_of_lists(similars)) remove_subsets_from_list_of_sets(similarity_sets) return remove_duplicates_from_list_of_lists(list_of_sets_to_list_of_lists(similarity_sets)), similarity