def test_search_datasets_tags(self):

        # nosetests -s -v widukind_common.tests.test_tags:SearchTagsTestCase.test_search_datasets_tags

        self.db[constants.COL_PROVIDERS].insert(self.doc_provider)
        self.assertEqual(self.db[constants.COL_PROVIDERS].count(), 1)

        self.db[constants.COL_DATASETS].insert(self.doc_dataset)
        self.assertEqual(self.db[constants.COL_DATASETS].count(), 1)

        tags_utils.update_tags_datasets(self.db, self.doc_provider["name"],
                                        self.doc_dataset["dataset_code"])

        query = {
            "provider_name": self.doc_dataset["provider_name"],
            "dataset_code": self.doc_dataset["dataset_code"]
        }
        dataset_doc = self.db[constants.COL_DATASETS].find_one(query)
        self.assertEqual(dataset_doc["tags"], [
            'country', 'd1', 'daily', 'dataset', 'estimate', 'france',
            'frequency', 'mars', 'monthly', 'observation', 'p1', 'provider',
            'status', 'test'
        ])

        cursor, query = tags_utils.search_datasets_tags(
            self.db, search_tags="France MARS Daily")
        self.assertEqual(cursor.count(), 1)

        cursor, query = tags_utils.search_datasets_tags(
            self.db, search_tags="France MARS Daily", provider_name="UNKNOW")

        self.assertEqual(cursor.count(), 0)
        """
    def test_search_datasets_tags(self):

        # nosetests -s -v widukind_common.tests.test_tags:SearchTagsTestCase.test_search_datasets_tags
        
        self.db[constants.COL_PROVIDERS].insert(self.doc_provider)
        self.assertEqual(self.db[constants.COL_PROVIDERS].count(), 1)

        self.db[constants.COL_DATASETS].insert(self.doc_dataset)
        self.assertEqual(self.db[constants.COL_DATASETS].count(), 1)

        tags_utils.update_tags_datasets(self.db, self.doc_provider["name"], self.doc_dataset["dataset_code"])
        
        query = {"provider_name": self.doc_dataset["provider_name"],
                 "dataset_code": self.doc_dataset["dataset_code"]}
        dataset_doc = self.db[constants.COL_DATASETS].find_one(query)
        self.assertEqual(dataset_doc["tags"], ['country', 'd1', 'daily', 'dataset', 'estimate', 'france', 'frequency', 'mars', 'monthly', 'observation', 'p1', 'provider', 'status', 'test'])        


        cursor, query = tags_utils.search_datasets_tags(self.db, 
                                                        search_tags="France MARS Daily")
        self.assertEqual(cursor.count(), 1)
        
        cursor, query = tags_utils.search_datasets_tags(self.db, 
                                                        search_tags="France MARS Daily",
                                                        provider_name="UNKNOW")
        
        self.assertEqual(cursor.count(), 0)
        
        """
    def test_update_tags_datasets(self):

        # nosetests -s -v widukind_common.tests.test_tags:UpdateTagsTestCase.test_update_tags_datasets

        self.db[constants.COL_PROVIDERS].insert(self.doc_provider)
        self.assertEqual(self.db[constants.COL_PROVIDERS].count(), 1)

        dataset = {
            "enable": True,
            "provider_name": self.doc_provider["name"],
            "dataset_code": "d1",
            "name": "dataset 1",
            "slug": "%s-d1" % self.doc_provider["slug"],
            "concepts": {
                "FREQ": "Frequency",
                "OBS_STATUS": "Observation Status",
            },
            "codelists": {
                "FREQ": {
                    "D": "Daily"
                },
                "OBS_STATUS": {
                    "E": "Estimate"
                }
            },
            "dimension_keys": ["FREQ"],
            "attribute_keys": ["OBS_STATUS"],
        }

        self.db[constants.COL_DATASETS].insert(dataset)
        self.assertEqual(self.db[constants.COL_DATASETS].count(), 1)

        result = tags_utils.update_tags_datasets(self.db,
                                                 self.doc_provider["name"],
                                                 dataset["dataset_code"])
        self.assertEqual(len(result["writeErrors"]), 0)
        self.assertEqual(result["nMatched"], 1)
        self.assertEqual(result["nModified"], 1)

        result = tags_utils.update_tags_datasets(self.db,
                                                 self.doc_provider["name"],
                                                 dataset["dataset_code"],
                                                 update_only=True)
        self.assertEqual(result["nMatched"], 0)
        self.assertEqual(result["nModified"], 0)

        query = {
            "provider_name": dataset["provider_name"],
            "dataset_code": dataset["dataset_code"]
        }
        dataset_doc = self.db[constants.COL_DATASETS].find_one(query)
        self.assertIsNotNone(dataset_doc)
        self.assertEqual(dataset_doc["tags"], [
            'd1', 'daily', 'dataset', 'estimate', 'frequency', 'mars',
            'observation', 'p1', 'provider', 'status', 'test'
        ])
    def test_update_tags_datasets(self):

        # nosetests -s -v widukind_common.tests.test_tags:UpdateTagsTestCase.test_update_tags_datasets
        
        self.db[constants.COL_PROVIDERS].insert(self.doc_provider)
        self.assertEqual(self.db[constants.COL_PROVIDERS].count(), 1)

        dataset = {
            "enable": True,
            "provider_name": self.doc_provider["name"],
            "dataset_code": "d1",
            "name": "dataset 1",
            "slug": "%s-d1" % self.doc_provider["slug"],
            "concepts": {
                "FREQ": "Frequency",
                "OBS_STATUS": "Observation Status",
            },
            "codelists": {
                "FREQ": {
                    "D": "Daily"
                },
                "OBS_STATUS": {
                    "E": "Estimate"
                }
            },
            "dimension_keys": ["FREQ"],
            "attribute_keys": ["OBS_STATUS"],
        } 

        self.db[constants.COL_DATASETS].insert(dataset)
        self.assertEqual(self.db[constants.COL_DATASETS].count(), 1)

        result = tags_utils.update_tags_datasets(self.db, 
                                                 self.doc_provider["name"], 
                                                 dataset["dataset_code"])
        self.assertEqual(len(result["writeErrors"]), 0)
        self.assertEqual(result["nMatched"], 1)
        self.assertEqual(result["nModified"], 1)
        
        result = tags_utils.update_tags_datasets(self.db, 
                                                 self.doc_provider["name"], 
                                                 dataset["dataset_code"],
                                                 update_only=True)
        self.assertEqual(result["nMatched"], 0)
        self.assertEqual(result["nModified"], 0)

        query = {"provider_name": dataset["provider_name"],
                 "dataset_code": dataset["dataset_code"]}
        dataset_doc = self.db[constants.COL_DATASETS].find_one(query)
        self.assertIsNotNone(dataset_doc)
        self.assertEqual(dataset_doc["tags"], ['d1', 'daily', 'dataset', 'estimate', 'frequency', 'mars', 'observation', 'p1', 'provider', 'status', 'test'])
    def test_search_series_tags(self):

        # nosetests -s -v widukind_common.tests.test_tags:SearchTagsTestCase.test_search_series_tags
        
        self.db[constants.COL_PROVIDERS].insert(self.doc_provider)
        self.assertEqual(self.db[constants.COL_PROVIDERS].count(), 1)

        self.db[constants.COL_DATASETS].insert(self.doc_dataset)
        self.assertEqual(self.db[constants.COL_DATASETS].count(), 1)
        tags_utils.update_tags_datasets(self.db, self.doc_provider["name"], self.doc_dataset["dataset_code"])
        
        self.db[constants.COL_SERIES].insert(self.doc_series)
        self.assertEqual(self.db[constants.COL_SERIES].count(), 1)
        tags_utils.update_tags_series(self.db, self.doc_provider["name"], self.doc_dataset["dataset_code"])
Exemple #6
0
def _update_tags(ctx, db, provider_name, dataset=None, max_bulk=100, update_only=False, dry_mode=False, async_mode=None):
        
    start = time.time()

    ctx.log("START update tags for [%s]" % provider_name)
    
    ctx.log("Update provider[%s] Datasets tags..." % provider_name)
    try:
        result = tags.update_tags_datasets(db,
                                  provider_name=provider_name,
                                  dataset_code=dataset, 
                                  max_bulk=max_bulk,
                                  update_only=update_only,
                                  dry_mode=dry_mode)
        ctx.log_warn("Update provider[%s] Datasets tags Success. Docs Updated[%s]" % (provider_name, result["nModified"]))
    except Exception as err:
        ctx.log_error("Update Datasets tags Fail - provider[%s] - [%s]" % (provider_name, str(err)))

    ctx.log("Update provider[%s] Series tags..." % provider_name)
    try:
        result = tags.update_tags_series(db,
                                  provider_name=provider_name,
                                  dataset_code=dataset, 
                                  max_bulk=max_bulk,
                                  update_only=update_only,
                                  async_mode=async_mode,
                                  dry_mode=dry_mode)
        if not async_mode:
            ctx.log_warn("Update provider[%s] Series tags Success. Docs Updated[%s]" % (provider_name, result["nModified"]))
    except Exception:
        ctx.log_error("Update Series tags Fail - provider[%s]: %s" % (provider_name, last_error()))

    end = time.time() - start
        
    ctx.log("update tags END: provider[%s] - time[%.3f]" % (provider_name, end))
    def test_search_series_tags(self):

        # nosetests -s -v widukind_common.tests.test_tags:SearchTagsTestCase.test_search_series_tags

        self.db[constants.COL_PROVIDERS].insert(self.doc_provider)
        self.assertEqual(self.db[constants.COL_PROVIDERS].count(), 1)

        self.db[constants.COL_DATASETS].insert(self.doc_dataset)
        self.assertEqual(self.db[constants.COL_DATASETS].count(), 1)
        tags_utils.update_tags_datasets(self.db, self.doc_provider["name"],
                                        self.doc_dataset["dataset_code"])

        self.db[constants.COL_SERIES].insert(self.doc_series)
        self.assertEqual(self.db[constants.COL_SERIES].count(), 1)
        tags_utils.update_tags_series(self.db, self.doc_provider["name"],
                                      self.doc_dataset["dataset_code"])
def _update_tags(ctx,
                 db,
                 provider_name,
                 dataset=None,
                 max_bulk=100,
                 update_only=False,
                 dry_mode=False,
                 async_mode=None):

    start = time.time()

    ctx.log("START update tags for [%s]" % provider_name)

    ctx.log("Update provider[%s] Datasets tags..." % provider_name)
    try:
        result = tags.update_tags_datasets(db,
                                           provider_name=provider_name,
                                           dataset_code=dataset,
                                           max_bulk=max_bulk,
                                           update_only=update_only,
                                           dry_mode=dry_mode)
        ctx.log_warn(
            "Update provider[%s] Datasets tags Success. Docs Updated[%s]" %
            (provider_name, result["nModified"]))
    except Exception as err:
        ctx.log_error("Update Datasets tags Fail - provider[%s] - [%s]" %
                      (provider_name, str(err)))

    ctx.log("Update provider[%s] Series tags..." % provider_name)
    try:
        result = tags.update_tags_series(db,
                                         provider_name=provider_name,
                                         dataset_code=dataset,
                                         max_bulk=max_bulk,
                                         update_only=update_only,
                                         async_mode=async_mode,
                                         dry_mode=dry_mode)
        if not async_mode:
            ctx.log_warn(
                "Update provider[%s] Series tags Success. Docs Updated[%s]" %
                (provider_name, result["nModified"]))
    except Exception:
        ctx.log_error("Update Series tags Fail - provider[%s]: %s" %
                      (provider_name, last_error()))

    end = time.time() - start

    ctx.log("update tags END: provider[%s] - time[%.3f]" %
            (provider_name, end))
def cmd_update_tags(fetcher=None, dataset=None, max_bulk=100, 
                    update_only=False, async_mode=None, 
                    dry_mode=False, **kwargs):
    """Create or Update field tags"""
    
    """
    Examples:
    
    dlstats fetchers tag -f BIS -d CNFS -S 
    dlstats fetchers tag -f BEA -d "10101 Ann" -S
    dlstats fetchers tag -f BEA -d "10101 Ann" -S
    dlstats fetchers tag -f Eurostat -d nama_10_a10 -S
    dlstats fetchers tag -f OECD -d MEI -S
    
    """

    ctx = client.Context(**kwargs)

    if ctx.silent or click.confirm('Do you want to continue?', abort=True):
        
        start = time.time()
        
        db = ctx.mongo_database()
        
        if fetcher:
            fetchers = [fetcher]
        else:
            fetchers = FETCHERS.keys()
            
        for provider in fetchers:

            provider_name = provider

            ctx.log("START update tags for [%s]" % provider_name)
    
            """
            ctx.log("Update provider[%s] Categories tags..." % provider_name)
            try:
                result = tags.update_tags_categories(db, 
                                          provider_name=provider_name, 
                                          max_bulk=max_bulk,
                                          update_only=update_only,
                                          dry_mode=dry_mode)
                ctx.log_ok("Update provider[%s] Categories tags Success. Docs Updated[%s]" % (provider_name, result["nModified"]))
            except Exception as err:
                ctx.log_error("Update Categories tags Fail - provider[%s] - [%s]" % (provider_name, str(err)))
            """
        
            ctx.log("Update provider[%s] Datasets tags..." % provider_name)
            try:
                result = tags.update_tags_datasets(db,
                                          provider_name=provider_name,
                                          dataset_code=dataset, 
                                          max_bulk=max_bulk,
                                          update_only=update_only,
                                          dry_mode=dry_mode)
                ctx.log_ok("Update provider[%s] Datasets tags Success. Docs Updated[%s]" % (provider_name, result["nModified"]))
            except Exception as err:
                ctx.log_error("Update Datasets tags Fail - provider[%s] - [%s]" % (provider_name, str(err)))
        
            ctx.log("Update provider[%s] Series tags..." % provider_name)
            try:
                result = tags.update_tags_series(db,
                                          provider_name=provider_name,
                                          dataset_code=dataset, 
                                          max_bulk=max_bulk,
                                          update_only=update_only,
                                          async_mode=async_mode,
                                          dry_mode=dry_mode)
                if not async_mode:
                    ctx.log_ok("Update provider[%s] Series tags Success. Docs Updated[%s]" % (provider_name, result["nModified"]))
            except Exception:
                ctx.log_error("Update Series tags Fail - provider[%s]: %s" % (provider_name, last_error()))
            
            end = time.time() - start
            
            ctx.log("END update tags for [%s] - time[%.3f]" % (provider_name, end))