Python HydrusData.Get64BitHammingDistance 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: hydrus.core

클래스/타입: HydrusData

메소드/함수: Get64BitHammingDistance

hotexamples.com에서의 예제들: 6

Python HydrusData.Get64BitHammingDistance - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 hydrus.core.HydrusData.Get64BitHammingDistance에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

ToHumanInt(30)

TimeHasPassed(30)

ShowException(30)

ShowText(30)

GetNow(30)

GenerateKey(30)

Print(30)

ContentUpdate(27)

TimeDeltaToPrettyTimeDelta(23)

DebugPrint(22)

PrintException(22)

GetSubprocessKWArgs(22)

CheckProgramIsNotShuttingDown(17)

ToHumanBytes(17)

GetNowPrecise(17)

ConvertValueRangeToPrettyString(17)

Call(12)

GetNowFloat(10)

BuildKeyToListDict(9)

SplitListIntoChunks(9)

BigJobPauser(7)

BuildKeyToSetDict(7)

Profile(6)

Get64BitHammingDistance(6)

GetNonDupeName(6)

TimestampToPrettyTimeDelta(6)

DedupeList(6)

ConvertValueRangeToBytes(5)

SplayListForDB(4)

SplitIteratorIntoChunks(3)

TimeHasPassedFloat(3)

PullNFromIterator(3)

MassUnion(3)

TimeHasPassedPrecise(3)

ConvertUnitToInt(3)

RestartProcess(2)

RecordRunningStart(2)

GetTimeDeltaUntilTime(2)

ConvertTimestampToPrettyTime(2)

IterateHexPrefixes(2)

IsAlreadyRunning(2)

HumanTextSortKey(2)

CleanRunningFile(2)

BaseTimestampToPrettyTimeDelta(2)

ConvertPixelsToInt(2)

ConvertTimestampToPrettyExpires(2)

LastShutdownWasBad(1)

BaseToHumanBytes(1)

ConvertIndexToPrettyOrdinalString(1)

ConvertIntToPrettyOrdinalString(1)

예제 #1

파일 보기

def DiscardBlankPerceptualHashes(phashes):

    phashes = {
        phash
        for phash in phashes
        if HydrusData.Get64BitHammingDistance(phash, CC.BLANK_PHASH) > 4
    }

    return phashes

예제 #2

파일 보기

파일: ClientImageHandling.py 프로젝트: floogulinc/hydrus

def DiscardBlankPerceptualHashes(perceptual_hashes):

    perceptual_hashes = {
        perceptual_hash
        for perceptual_hash in perceptual_hashes
        if HydrusData.Get64BitHammingDistance(perceptual_hash,
                                              CC.BLANK_PERCEPTUAL_HASH) > 4
    }

    return perceptual_hashes

예제 #3

파일 보기

파일: ClientDBSimilarFiles.py 프로젝트: Wyrrrd/suika-hydrus-server

    def Search(self, hash_id, max_hamming_distance):

        if max_hamming_distance == 0:

            similar_hash_ids = self._STL(
                self._c.execute(
                    'SELECT hash_id FROM shape_perceptual_hash_map WHERE phash_id IN ( SELECT phash_id FROM shape_perceptual_hash_map WHERE hash_id = ? );',
                    (hash_id, )))

            similar_hash_ids_and_distances = [
                (similar_hash_id, 0) for similar_hash_id in similar_hash_ids
            ]

        else:

            search_radius = max_hamming_distance

            top_node_result = self._c.execute(
                'SELECT phash_id FROM shape_vptree WHERE parent_id IS NULL;'
            ).fetchone()

            if top_node_result is None:

                return []

            (root_node_phash_id, ) = top_node_result

            search = self._STL(
                self._c.execute(
                    'SELECT phash FROM shape_perceptual_hashes NATURAL JOIN shape_perceptual_hash_map WHERE hash_id = ?;',
                    (hash_id, )))

            if len(search) == 0:

                return []

            similar_phash_ids_to_distances = {}

            num_cycles = 0
            total_nodes_searched = 0

            for search_phash in search:

                next_potentials = [root_node_phash_id]

                while len(next_potentials) > 0:

                    current_potentials = next_potentials
                    next_potentials = []

                    num_cycles += 1
                    total_nodes_searched += len(current_potentials)

                    for group_of_current_potentials in HydrusData.SplitListIntoChunks(
                            current_potentials, 10000):

                        # this is split into fixed lists of results of subgroups because as an iterable it was causing crashes on linux!!
                        # after investigation, it seemed to be SQLite having a problem with part of Get64BitHammingDistance touching phashes it presumably was still hanging on to
                        # the crash was in sqlite code, again presumably on subsequent fetch
                        # adding a delay in seemed to fix it as well. guess it was some memory maintenance buffer/bytes thing
                        # anyway, we now just get the whole lot of results first and then work on the whole lot
                        '''
                        #old method
                        select_statement = 'SELECT phash_id, phash, radius, inner_id, outer_id FROM shape_perceptual_hashes NATURAL JOIN shape_vptree WHERE phash_id = ?;'
                        
                        results = list( self._ExecuteManySelectSingleParam( select_statement, group_of_current_potentials ) )
                        '''

                        with HydrusDB.TemporaryIntegerTable(
                                self._c, group_of_current_potentials,
                                'phash_id') as temp_table_name:

                            # temp phash_ids to actual phashes and tree info
                            results = self._c.execute(
                                'SELECT phash_id, phash, radius, inner_id, outer_id FROM {} CROSS JOIN shape_perceptual_hashes USING ( phash_id ) CROSS JOIN shape_vptree USING ( phash_id );'
                                .format(temp_table_name)).fetchall()

                        for (node_phash_id, node_phash, node_radius,
                             inner_phash_id, outer_phash_id) in results:

                            # first check the node itself--is it similar?

                            node_hamming_distance = HydrusData.Get64BitHammingDistance(
                                search_phash, node_phash)

                            if node_hamming_distance <= search_radius:

                                if node_phash_id in similar_phash_ids_to_distances:

                                    current_distance = similar_phash_ids_to_distances[
                                        node_phash_id]

                                    similar_phash_ids_to_distances[
                                        node_phash_id] = min(
                                            node_hamming_distance,
                                            current_distance)

                                else:

                                    similar_phash_ids_to_distances[
                                        node_phash_id] = node_hamming_distance

                            # now how about its children?

                            if node_radius is not None:

                                # we have two spheres--node and search--their centers separated by node_hamming_distance
                                # we want to search inside/outside the node_sphere if the search_sphere intersects with those spaces
                                # there are four possibles:
                                # (----N----)-(--S--)    intersects with outer only - distance between N and S > their radii
                                # (----N---(-)-S--)      intersects with both
                                # (----N-(--S-)-)        intersects with both
                                # (---(-N-S--)-)         intersects with inner only - distance between N and S + radius_S does not exceed radius_N

                                if inner_phash_id is not None:

                                    spheres_disjoint = node_hamming_distance > (
                                        node_radius + search_radius)

                                    if not spheres_disjoint:  # i.e. they intersect at some point

                                        next_potentials.append(inner_phash_id)

                                if outer_phash_id is not None:

                                    search_sphere_subset_of_node_sphere = (
                                        node_hamming_distance +
                                        search_radius) <= node_radius

                                    if not search_sphere_subset_of_node_sphere:  # i.e. search sphere intersects with non-node sphere space at some point

                                        next_potentials.append(outer_phash_id)

            if HG.db_report_mode:

                HydrusData.ShowText(
                    'Similar file search touched {} nodes over {} cycles.'.
                    format(HydrusData.ToHumanInt(total_nodes_searched),
                           HydrusData.ToHumanInt(num_cycles)))

            # so, now we have phash_ids and distances. let's map that to actual files.
            # files can have multiple phashes, and phashes can refer to multiple files, so let's make sure we are setting the smallest distance we found

            similar_phash_ids = list(similar_phash_ids_to_distances.keys())

            with HydrusDB.TemporaryIntegerTable(self._c, similar_phash_ids,
                                                'phash_id') as temp_table_name:

                # temp phashes to hash map
                similar_phash_ids_to_hash_ids = HydrusData.BuildKeyToListDict(
                    self._c.execute(
                        'SELECT phash_id, hash_id FROM {} CROSS JOIN shape_perceptual_hash_map USING ( phash_id );'
                        .format(temp_table_name)))

            similar_hash_ids_to_distances = {}

            for (phash_id, hash_ids) in similar_phash_ids_to_hash_ids.items():

                distance = similar_phash_ids_to_distances[phash_id]

                for hash_id in hash_ids:

                    if hash_id not in similar_hash_ids_to_distances:

                        similar_hash_ids_to_distances[hash_id] = distance

                    else:

                        current_distance = similar_hash_ids_to_distances[
                            hash_id]

                        if distance < current_distance:

                            similar_hash_ids_to_distances[hash_id] = distance

            similar_hash_ids_and_distances = list(
                similar_hash_ids_to_distances.items())

        return similar_hash_ids_and_distances

예제 #4

파일 보기

파일: ClientDBSimilarFiles.py 프로젝트: Wyrrrd/suika-hydrus-server

    def _PopBestRootNode(self, node_rows):

        if len(node_rows) == 1:

            root_row = node_rows.pop()

            return root_row

        MAX_VIEWPOINTS = 256
        MAX_SAMPLE = 64

        if len(node_rows) > MAX_VIEWPOINTS:

            viewpoints = random.sample(node_rows, MAX_VIEWPOINTS)

        else:

            viewpoints = node_rows

        if len(node_rows) > MAX_SAMPLE:

            sample = random.sample(node_rows, MAX_SAMPLE)

        else:

            sample = node_rows

        final_scores = []

        for (v_id, v_phash) in viewpoints:

            views = sorted(
                (HydrusData.Get64BitHammingDistance(v_phash, s_phash)
                 for (s_id, s_phash) in sample if v_id != s_id))

            # let's figure out the ratio of left_children to right_children, preferring 1:1, and convert it to a discrete integer score

            median_index = len(views) // 2

            radius = views[median_index]

            num_left = len([1 for view in views if view < radius])
            num_radius = len([1 for view in views if view == radius])
            num_right = len([1 for view in views if view > radius])

            if num_left <= num_right:

                num_left += num_radius

            else:

                num_right += num_radius

            smaller = min(num_left, num_right)
            larger = max(num_left, num_right)

            ratio = smaller / larger

            ratio_score = int(ratio * MAX_SAMPLE / 2)

            # now let's calc the standard deviation--larger sd tends to mean less sphere overlap when searching

            mean_view = sum(views) / len(views)
            squared_diffs = [(view - mean_view)**2 for view in views]
            sd = (sum(squared_diffs) / len(squared_diffs))**0.5

            final_scores.append((ratio_score, sd, v_id))

        final_scores.sort()

        # we now have a list like [ ( 11, 4.0, [id] ), ( 15, 3.7, [id] ), ( 15, 4.3, [id] ) ]

        (ratio_gumpf, sd_gumpf, root_id) = final_scores.pop()

        for (i, (v_id, v_phash)) in enumerate(node_rows):

            if v_id == root_id:

                root_row = node_rows.pop(i)

                return root_row

예제 #5

파일 보기

파일: ClientDBSimilarFiles.py 프로젝트: Wyrrrd/suika-hydrus-server

    def _AddLeaf(self, phash_id, phash):

        result = self._c.execute(
            'SELECT phash_id FROM shape_vptree WHERE parent_id IS NULL;'
        ).fetchone()

        if result is None:

            parent_id = None

        else:

            (root_node_phash_id, ) = result

            ancestors_we_are_inside = []
            ancestors_we_are_outside = []

            an_ancestor_is_unbalanced = False

            next_ancestor_id = root_node_phash_id

            while next_ancestor_id is not None:

                ancestor_id = next_ancestor_id

                (
                    ancestor_phash, ancestor_radius, ancestor_inner_id,
                    ancestor_inner_population, ancestor_outer_id,
                    ancestor_outer_population
                ) = self._c.execute(
                    'SELECT phash, radius, inner_id, inner_population, outer_id, outer_population FROM shape_perceptual_hashes NATURAL JOIN shape_vptree WHERE phash_id = ?;',
                    (ancestor_id, )).fetchone()

                distance_to_ancestor = HydrusData.Get64BitHammingDistance(
                    phash, ancestor_phash)

                if ancestor_radius is None or distance_to_ancestor <= ancestor_radius:

                    ancestors_we_are_inside.append(ancestor_id)
                    ancestor_inner_population += 1
                    next_ancestor_id = ancestor_inner_id

                    if ancestor_inner_id is None:

                        self._c.execute(
                            'UPDATE shape_vptree SET inner_id = ?, radius = ? WHERE phash_id = ?;',
                            (phash_id, distance_to_ancestor, ancestor_id))

                        parent_id = ancestor_id

                else:

                    ancestors_we_are_outside.append(ancestor_id)
                    ancestor_outer_population += 1
                    next_ancestor_id = ancestor_outer_id

                    if ancestor_outer_id is None:

                        self._c.execute(
                            'UPDATE shape_vptree SET outer_id = ? WHERE phash_id = ?;',
                            (phash_id, ancestor_id))

                        parent_id = ancestor_id

                if not an_ancestor_is_unbalanced and ancestor_inner_population + ancestor_outer_population > 16:

                    larger = max(ancestor_inner_population,
                                 ancestor_outer_population)
                    smaller = min(ancestor_inner_population,
                                  ancestor_outer_population)

                    if smaller / larger < 0.5:

                        self._c.execute(
                            'INSERT OR IGNORE INTO shape_maintenance_branch_regen ( phash_id ) VALUES ( ? );',
                            (ancestor_id, ))

                        # we only do this for the eldest ancestor, as the eventual rebalancing will affect all children

                        an_ancestor_is_unbalanced = True

            self._c.executemany(
                'UPDATE shape_vptree SET inner_population = inner_population + 1 WHERE phash_id = ?;',
                ((ancestor_id, ) for ancestor_id in ancestors_we_are_inside))
            self._c.executemany(
                'UPDATE shape_vptree SET outer_population = outer_population + 1 WHERE phash_id = ?;',
                ((ancestor_id, ) for ancestor_id in ancestors_we_are_outside))

        radius = None
        inner_id = None
        inner_population = 0
        outer_id = None
        outer_population = 0

        self._c.execute(
            'INSERT OR REPLACE INTO shape_vptree ( phash_id, parent_id, radius, inner_id, inner_population, outer_id, outer_population ) VALUES ( ?, ?, ?, ?, ?, ?, ? );',
            (phash_id, parent_id, radius, inner_id, inner_population, outer_id,
             outer_population))

예제 #6

파일 보기

파일: ClientDBSimilarFiles.py 프로젝트: Wyrrrd/suika-hydrus-server

    def _GenerateBranch(self, job_key, parent_id, phash_id, phash, children):

        process_queue = collections.deque()

        process_queue.append((parent_id, phash_id, phash, children))

        insert_rows = []

        num_done = 0
        num_to_do = len(children) + 1

        while len(process_queue) > 0:

            job_key.SetVariable(
                'popup_text_2', 'generating new branch -- ' +
                HydrusData.ConvertValueRangeToPrettyString(
                    num_done, num_to_do))

            (parent_id, phash_id, phash, children) = process_queue.popleft()

            if len(children) == 0:

                inner_id = None
                inner_population = 0

                outer_id = None
                outer_population = 0

                radius = None

            else:

                children = sorted(
                    ((HydrusData.Get64BitHammingDistance(phash, child_phash),
                      child_id, child_phash)
                     for (child_id, child_phash) in children))

                median_index = len(children) // 2

                median_radius = children[median_index][0]

                inner_children = [(child_id, child_phash)
                                  for (distance, child_id,
                                       child_phash) in children
                                  if distance < median_radius]
                radius_children = [(child_id, child_phash)
                                   for (distance, child_id,
                                        child_phash) in children
                                   if distance == median_radius]
                outer_children = [(child_id, child_phash)
                                  for (distance, child_id,
                                       child_phash) in children
                                  if distance > median_radius]

                if len(inner_children) <= len(outer_children):

                    radius = median_radius

                    inner_children.extend(radius_children)

                else:

                    radius = median_radius - 1

                    outer_children.extend(radius_children)

                inner_population = len(inner_children)
                outer_population = len(outer_children)

                (inner_id, inner_phash) = self._PopBestRootNode(
                    inner_children)  #HydrusData.MedianPop( inner_children )

                if len(outer_children) == 0:

                    outer_id = None

                else:

                    (outer_id, outer_phash) = self._PopBestRootNode(
                        outer_children
                    )  #HydrusData.MedianPop( outer_children )

            insert_rows.append((phash_id, parent_id, radius, inner_id,
                                inner_population, outer_id, outer_population))

            if inner_id is not None:

                process_queue.append(
                    (phash_id, inner_id, inner_phash, inner_children))

            if outer_id is not None:

                process_queue.append(
                    (phash_id, outer_id, outer_phash, outer_children))

            num_done += 1

        job_key.SetVariable('popup_text_2',
                            'branch constructed, now committing')

        self._c.executemany(
            'INSERT OR REPLACE INTO shape_vptree ( phash_id, parent_id, radius, inner_id, inner_population, outer_id, outer_population ) VALUES ( ?, ?, ?, ?, ?, ?, ? );',
            insert_rows)