def __init__(self, deduplicator):
     self.config = DeduplicatorConfig()
     timeout = self.config.DAEMON_PROCESSING_TIMEOUT.value
     interval = timeout.total_seconds()
     super().__init__(interval)
     self.progress_manager = ProgressManager()
     self.deduplicator = deduplicator
     self.event_handler = EventHandler(self)
     self.observers = []
Esempio n. 2
0
def c_deduplicate(skip_analyse_phase: bool, dry_run: bool):
    config = DeduplicatorConfig()
    if dry_run is not None:
        config.DRY_RUN.value = dry_run
    deduplicator = ImageMatchDeduplicator(interactive=True)
    result = deduplicator.deduplicate_all(
        skip_analyze_phase=skip_analyse_phase, )

    echo()
    result.print_to_console()
Esempio n. 3
0
    def __init__(self, processing_manager):
        super().__init__()
        self.processing_manager = processing_manager

        self.config = DeduplicatorConfig()

        self.directory_regex = re.compile(
            rf"^({'|'.join(list(map(str, self.config.SOURCE_DIRECTORIES.value)))}).*$"
        )
        self.file_regex = re.compile(
            rf"^.*({'|'.join(self.config.FILE_EXTENSION_FILTER.value)})$",
            re.IGNORECASE)
    def __init__(self, interactive: bool):
        """

        :param interactive: whether cli output should be interactive or not
        """
        self.interactive = interactive

        self._progress_manager = ProgressManager()
        self._config = DeduplicatorConfig()
        self._persistence: ImageSignatureStore = ElasticSearchStoreBackend(
            host=self._config.ELASTICSEARCH_HOST.value,
            port=self._config.ELASTICSEARCH_PORT.value,
            el_index=self._config.ELASTICSEARCH_INDEX.value,
            use_exif_data=self._config.ANALYSIS_USE_EXIF_DATA.value,
            max_dist=self._config.ELASTICSEARCH_MAX_DISTANCE.value,
            setup_database=self._config.ELASTICSEARCH_AUTO_CREATE_INDEX.value)
Esempio n. 5
0
def c_daemon(dry_run: bool):
    echo("Starting daemon...")

    config: DeduplicatorConfig = DeduplicatorConfig()
    if dry_run is not None:
        config.DRY_RUN.value = dry_run

    if config.STATS_ENABLED.value:
        from prometheus_client import start_http_server
        echo("Starting prometheus reporter...")
        start_http_server(config.STATS_PORT.value)

    deduplicator = ImageMatchDeduplicator(interactive=False)
    processing_manager = ProcessingManager(deduplicator)

    deduplicator.deduplicate_all()
    processing_manager.start()

    try:
        while True:
            time.sleep(1)
    except KeyboardInterrupt:
        processing_manager.stop()
Esempio n. 6
0
 def setUp(self):
     self.config = DeduplicatorConfig()
     from py_image_dedup.library.deduplicator import ImageMatchDeduplicator
     self.under_test = ImageMatchDeduplicator(interactive=False)
Esempio n. 7
0
from py_image_dedup.config import DeduplicatorConfig
from py_image_dedup.library.deduplicator import ImageMatchDeduplicator

config = DeduplicatorConfig()
config.DRY_RUN.value = True
# config.ELASTICSEARCH_HOST.value = "192.168.2.24"
config.SOURCE_DIRECTORIES.value = [
    # r'/home/markus/py-image-dedup/dir1/',
    # r'/home/markus/py-image-dedup/dir2/'
    # r'/mnt/data/py-dedup-test/Syncthing/',
    # r'/mnt/sdb2/Sample/',
    r'./tests/images/'
]
config.SEARCH_ACROSS_ROOT_DIRS.value = True

config.ANALYSIS_THREADS.value = 8
config.ANALYSIS_USE_EXIF_DATA.value = False

config.ELASTICSEARCH_MAX_DISTANCE.value = 0.30
# config.MAX_FILE_MODIFICATION_TIME_DELTA.value = timedelta(minutes=5)
config.DEDUPLICATOR_DUPLICATES_TARGET_DIRECTORY.value = "./duplicates/"
config.REMOVE_EMPTY_FOLDERS.value = True

deduplicator = ImageMatchDeduplicator()

# max_file_modification_time_diff=1 * 1000 * 60 * 5,

result = deduplicator.deduplicate_all(skip_analyze_phase=False, )

result.print_to_console()