Exemple #1
0
    def upsert_document(self, index_task):

        update_dict = {
            "set__url": index_task['url'],
            "set__url_hash": hash_url(index_task['url']),
            "set__host": crawlmanager.extract_hostname(index_task['url']),
            "set__meta_data": index_task['document']['meta_data'],
            "set__features": index_task['document']['features']
        }

        IndexDocument.objects(url=index_task['url']).update_one(
            upsert=True,
            **update_dict
        )
Exemple #2
0
    def test_index_document(self):

        cd = IndexDocument(url="http://example.com", host="example.com")
        cd.save()
        self.assertEqual(cd.url_hash, hash_url(cd.url))
Exemple #3
0
    def delete_document(self, index_task):

        IndexDocument.objects(url=index_task['url']).delete()