def generic_search(self, search_dict, skip=0, limit=0, only_fo_parent_firmware=False): try: if isinstance(search_dict, str): search_dict = json.loads(search_dict) query = self.firmwares.find(search_dict, {'_id': 1}, skip=skip, limit=limit, sort=[('vendor', 1)]) result = [match['_id'] for match in query] if len(result) < limit or limit == 0: max_firmware_results = self.get_firmware_number(query=search_dict) skip_fo = skip - max_firmware_results if skip > max_firmware_results else 0 limit_fo = limit - len(result) if limit > 0 else 0 if not only_fo_parent_firmware: query = self.file_objects.find(search_dict, {'_id': 1}, skip=skip_fo, limit=limit_fo, sort=[('file_name', 1)]) result.extend([match['_id'] for match in query]) else: # only searching for parents of matching file objects query = self.file_objects.find(search_dict, {'virtual_file_path': 1}) parent_uids = {uid for match in query for uid in match['virtual_file_path'].keys()} query_filter = {'$nor': [{'_id': {'$nin': list(parent_uids)}}, search_dict]} query = self.firmwares.find(query_filter, {'_id': 1}, skip=skip_fo, limit=limit_fo, sort=[('file_name', 1)]) parents = [match['_id'] for match in query] result = remove_duplicates_from_list(result + parents) except Exception as exception: error_message = 'could not process search request: {} {}'.format(sys.exc_info()[0].__name__, exception) logging.warning(error_message) return error_message return result
def _get_similar_files(self, fo_list: List[FileObject], exclusive_files: Dict[str, List[str]]) -> Tuple[List[list], dict]: similar_files = [] similarity = {} for parent_one, parent_two in combinations(fo_list, 2): for file_one in exclusive_files[parent_one.uid]: for similar_file_pair, value in self._find_similar_file_for(file_one, parent_one.uid, parent_two): similar_files.append(similar_file_pair) similarity[convert_uid_list_to_compare_id(similar_file_pair)] = value similarity_sets = generate_similarity_sets(remove_duplicates_from_list(similar_files)) return similarity_sets, similarity
def get_all_uids_in_string(string): result = re.findall(r'[a-f0-9]{64}_[0-9]+', string) result = remove_duplicates_from_list(result) result.sort() return result