Exemple #1
0
def full_dedup(limit=1000):
    from designsafe.apps.data.models.elasticsearch import IndexedFile
    from elasticsearch import Elasticsearch
    from elasticsearch.helpers import bulk

    files_alias = settings.ES_INDICES['files']['alias']
    HOSTS = settings.ES_CONNECTIONS[settings.DESIGNSAFE_ENVIRONMENT]['hosts']
    es_client = Elasticsearch(hosts=HOSTS)
    file_search = IndexedFile.search().sort('_id').extra(size=limit)
    res = file_search.execute()

    while res.hits:
        for hit in res.hits:

            if hit.name is None or hit.path is None:
                continue

            print((hit.meta.id))
            try:
                IndexedFile.from_path(hit.system, hit.path)
            except Exception as e:
                print(e)

        search_after = res.hits.hits[-1]['sort']
        logger.debug(search_after)
        file_search = IndexedFile.search().sort('_id').extra(
            size=limit, search_after=search_after)
        res = file_search.execute()
Exemple #2
0
    def test_from_path_multiple_hits(self, mock_refresh, mock_get, mock_search,
                                     mock_delete):
        """
        When there are multiple files sharing a system and path, ensure we delete
        all but one and return the remaining document.
        """
        search_res = IndexedFile(**{
            'name': 'res1',
            'system': 'test.system',
            'path': '/path/to/res1'
        })

        sys_filter = Q('term', **{'system._exact': 'test.system'})
        path_filter = Q('term', **{'path._exact': '/path/to/res1'})

        # Need to mock either slicing the result or retrieving a single element.

        mock_res = MagicMock()
        mock_res.hits.total.value = 3
        mock_search().filter().execute.return_value = mock_res
        mock_get.return_value = search_res

        doc_from_path = IndexedFile.from_path('test.system', '/path/to/res1')

        self.assertEqual(mock_search().filter().delete.call_count, 1)

        self.assertEqual(doc_from_path, search_res)
Exemple #3
0
    def test_from_path_multiple_hits(self, mock_search, mock_delete):
        """
        When there are multiple files sharing a system and path, ensure we delete
        all but one and return the remaining document.
        """
        search_res = IndexedFile(**{
            'name': 'res1',
            'system': 'test.system',
            'path': '/path/to/res1'
        })

        # Need to mock either slicing the result or retrieving a single element.
        def mock_getitem(i):
            if type(i) is slice:
                return [search_res, search_res]
            else:
                return search_res

        # mock a search result with 3 hits and the ability to get/slice.
        mock_res = MagicMock()
        mock_res.hits.total = 3
        mock_res.__getitem__.side_effect = mock_getitem
        mock_search().filter().filter().execute.return_value = mock_res

        doc_from_path = IndexedFile.from_path('test.system', '/path/to/res1')

        mock_search().filter.assert_called_with(
            'term', **{'system._exact': 'test.system'})
        mock_search().filter().filter.assert_called_with(
            'term', **{'path._exact': '/path/to/res1'})

        self.assertEqual(mock_delete.call_count, 2)
        self.assertEqual(doc_from_path, search_res)
Exemple #4
0
def repair_paths(limit=1000):
    from designsafe.apps.data.models.elasticsearch import IndexedFile
    file_search = IndexedFile.search().sort('_uid').extra(size=limit)
    res = file_search.execute()

    while res.hits:
        for hit in res.hits:
            print hit.name, hit.path
            new_path = repair_path(hit.name, hit.path)
            hit.update(**{'path': new_path})
            hit.update(**{'basePath': os.path.dirname(new_path)})

            # use from_path to remove any duplicates.
            IndexedFile.from_path(hit.system, hit.path)

        search_after = res.hits.hits[-1]['sort']
        logger.debug(search_after)
        file_search = IndexedFile.search().sort('_uid').extra(
            size=limit, search_after=search_after)
        res = file_search.execute()
Exemple #5
0
    def test_from_path_1_hit(self, mock_search):
        search_res = IndexedFile(**{
            'name': 'res1',
            'system': 'test.system',
            'path': '/path/to/res1'
        })

        mock_res = MagicMock()
        mock_res.hits.total = 1
        mock_res.__getitem__.return_value = search_res
        mock_search().filter().filter().execute.return_value = mock_res

        doc_from_path = IndexedFile.from_path('test.system', '/path/to/res1')

        mock_search().filter.assert_called_with(
            'term', **{'system._exact': 'test.system'})
        mock_search().filter().filter.assert_called_with(
            'term', **{'path._exact': '/path/to/res1'})

        self.assertEqual(doc_from_path, search_res)
Exemple #6
0
 def test_from_path_raises_when_no_hits(self, mock_refresh, mock_search):
     mock_search().filter().execute.return_value.hits.total.value = 0
     with self.assertRaises(DocumentNotFound):
         IndexedFile.from_path('test.system', '/')
Exemple #7
0
 def test_from_path_with_404(self, mock_refresh, mock_search):
     mock_search().filter().execute.side_effect = TransportError(404)
     with self.assertRaises(TransportError):
         IndexedFile.from_path('test.system', '/')