コード例 #1
0
def _conf_db(app, db=None):
    from widukind_common.utils import get_mongo_db
    if not db:
        app.widukind_db = get_mongo_db(
            app.config.get("MONGODB_URL").strip('"'), connect=False)
    else:
        app.widukind_db = db
コード例 #2
0
def export_file_csv_dataset_unit(doc=None, 
                                 provider=None, dataset_code=None,
                                 slug=None):
    """Create CSV File from one Dataset and record in MongoDB GridFS
    """

    db = get_mongo_db()
    
    if not doc:
        if slug:
            doc = db[constants.COL_DATASETS].find_one({"slug": slug})
        else:
            if not provider:
                raise ValueError("provider is required")
            if not dataset_code:
                raise ValueError("dataset_code is required")
    
            query = {}
            query['provider_name'] = provider
            query['dataset_code'] = dataset_code
        
            doc = db[constants.COL_DATASETS].find_one(query, {'revisions': 0})
    
    if not doc:
        raise Exception("Dataset not found for provider[%s] - dataset[%s] - slug[%s]" % (provider, 
                                                                                         dataset_code,
                                                                                         slug))
    
    values = export_dataset(db, doc)
    
    return record_csv_file(db, values, 
                           provider_name=doc['provider_name'],
                           dataset_code=doc["dataset_code"],
                           slug=doc["slug"], 
                           prefix="dataset")
コード例 #3
0
ファイル: wsgi.py プロジェクト: srault95/widukind-web
def _conf_db(app, db=None):
    import gridfs
    from widukind_common.utils import get_mongo_db
    from widukind_web.utils import create_or_update_indexes
    if not db:
        app.widukind_db = get_mongo_db(app.config.get("MONGODB_URL").strip('"'), connect=False)
    else:
        app.widukind_db = db
    app.widukind_fs = gridfs.GridFS(app.widukind_db)
    create_or_update_indexes(app.widukind_db)
コード例 #4
0
ファイル: base.py プロジェクト: MichelJuillard/dlstats
    def setUp(self):
        BaseTestCase.setUp(self)
        
        db = get_mongo_db()
        self.db = db.client["widukind_test"] 
        
        self.assertEqual(self.db.name, "widukind_test")

        utils.clean_mongodb(self.db)
                
        create_or_update_indexes(self.db, force_mode=True)
コード例 #5
0
def _conf_db(app, db=None):
    import gridfs
    from widukind_common.utils import get_mongo_db
    from widukind_web.utils import create_or_update_indexes
    if not db:
        app.widukind_db = get_mongo_db(
            app.config.get("MONGODB_URL").strip('"'), connect=False)
    else:
        app.widukind_db = db
    app.widukind_fs = gridfs.GridFS(app.widukind_db)
    create_or_update_indexes(app.widukind_db)
コード例 #6
0
def consolidate_all_dataset(provider_name=None, db=None, max_bulk=20):

    db = db or utils.get_mongo_db()

    query = {"provider_name": provider_name}
    projection = {"_id": True, "dataset_code": True}

    cursor = db[constants.COL_DATASETS].find(query, projection)
    dataset_codes = [doc["dataset_code"] for doc in cursor]

    bulk_requests = db[constants.COL_DATASETS].initialize_unordered_bulk_op()
    bulk_size = 0
    results = []

    for dataset_code in dataset_codes:

        query, query_modify = consolidate_dataset(provider_name,
                                                  dataset_code,
                                                  db=db,
                                                  execute=False)

        if not query:
            logger.warning("bypass dataset [%s]" % dataset_code)
            continue

        bulk_size += 1
        bulk_requests.find(query).update_one(query_modify)

        if bulk_size > max_bulk:
            result = _run_bulk(db, bulk_requests)
            if result:
                results.append(result)
            bulk_requests = db[
                constants.COL_DATASETS].initialize_unordered_bulk_op()
            bulk_size = 0

    if bulk_size > 0:
        result = _run_bulk(db, bulk_requests)
        if result:
            results.append(result)

    results_details = {
        "matched_count": 0,
        "modified_count": 0,
    }
    for r in results:
        results_details["matched_count"] += r["nMatched"]
        results_details["modified_count"] += r["nModified"]

    return results_details
コード例 #7
0
ファイル: _commons.py プロジェクト: gitter-badger/dlstats
    def __init__(self, 
                 provider_name=None, 
                 db=None, 
                 is_indexes=True,
                 version=0,
                 max_errors=5,
                 use_existing_file=False,
                 not_remove_files=False,
                 async_mode=False,
                 async_framework="gevent",
                 **kwargs):
        """
        :param str provider_name: Provider Name
        :param pymongo.database.Database db: MongoDB Database instance        
        :param bool is_indexes: Bypass create_or_update_indexes() if False 

        :raises ValueError: if provider_name is None
        """        
        if not provider_name:
            raise ValueError("provider_name is required")

        self.provider_name = provider_name
        self.db = db or get_mongo_db()
        self.version = version
        self.max_errors = max_errors
        self.use_existing_file = use_existing_file
        self.not_remove_files = not_remove_files
        self.async_mode = async_mode
        self.async_framework = async_framework
        
        if self.async_mode:
            logger.info("ASYNC MODE ENABLE")
        else:
            logger.info("ASYNC MODE DISABLE")
        
        self.provider = None
        
        self.errors = 0

        self.categories_filter = [] #[category_code]
        self.datasets_filter = []   #[dataset_code]
        
        self.selected_datasets = {}
        
        self.store_path = os.path.abspath(os.path.join(tempfile.gettempdir(), 
                                                       self.provider_name))
        self.for_delete = []
        
        if IS_SCHEMAS_VALIDATION_DISABLE:
            logger.warning("schemas validation is disable")
コード例 #8
0
ファイル: base.py プロジェクト: ThomasRoca/dlstats
    def setUp(self):
        BaseTestCase.setUp(self)

        from widukind_common.utils import get_mongo_db, create_or_update_indexes
        from widukind_common import tests_tools as utils
        
        db = get_mongo_db()
        self.db = db.client["widukind_test"] 
        
        self.assertEqual(self.db.name, "widukind_test")

        utils.clean_mongodb(self.db)
                
        create_or_update_indexes(self.db, force_mode=True)

        self._collections_is_empty()
コード例 #9
0
def consolidate_all_dataset(provider_name=None, db=None, max_bulk=20):
    
    db = db or utils.get_mongo_db()
    
    query = {"provider_name": provider_name}
    projection = {"_id": True, "dataset_code": True}
    
    cursor = db[constants.COL_DATASETS].find(query, projection)
    dataset_codes = [doc["dataset_code"] for doc in cursor]

    bulk_requests = db[constants.COL_DATASETS].initialize_unordered_bulk_op()
    bulk_size = 0
    results = []
    
    for dataset_code in dataset_codes:
        
        query, query_modify = consolidate_dataset(provider_name, dataset_code, db=db, execute=False)
        
        if not query:
            logger.warning("bypass dataset [%s]" % dataset_code)
            continue
        
        bulk_size += 1
        bulk_requests.find(query).update_one(query_modify)
    
        if bulk_size > max_bulk:
            result = _run_bulk(db, bulk_requests)
            if result:
                results.append(result)
            bulk_requests = db[constants.COL_DATASETS].initialize_unordered_bulk_op()
            bulk_size = 0
    
    if bulk_size > 0:
        result = _run_bulk(db, bulk_requests)
        if result:
            results.append(result)
        
    results_details = {
        "matched_count": 0,
        "modified_count": 0,
    }
    for r in results:
        results_details["matched_count"] += r["nMatched"]
        results_details["modified_count"] += r["nModified"]

    return results_details
コード例 #10
0
def export_file_csv_dataset(provider=None, dataset_code=None, slug=None):
    """Create CSV File from one or more Dataset and record in MongoDB GridFS
    """
    
    db = get_mongo_db()
    projection = {'concepts': False, "codelists": False}
    
    query = {}
    if slug:
        query["slug"] = slug
    else:
        query['provider_name'] = provider
        query['dataset_code'] = dataset_code

    datasets = db[constants.COL_DATASETS].find(query, projection)

    return [export_file_csv_dataset_unit(doc=doc) for doc in datasets]
コード例 #11
0
def clean_mongodb(collection_list=None, db=None):
    """Drop all collections used by dlstats
    """
    db = db or get_mongo_db()
    collection_list = collection_list or constants.COL_ALL
    for col in collection_list:
        try:
            db.drop_collection(col)
        except:
            pass
    drop_gridfs(db)
    
    for col in collection_list:
        try:
            db.create_collection(col)
        except:
            pass
コード例 #12
0
def export_file_csv_dataset(provider=None, dataset_code=None, slug=None):
    """Create CSV File from one or more Dataset and record in MongoDB GridFS
    """

    db = get_mongo_db()
    projection = {'concepts': False, "codelists": False}

    query = {}
    if slug:
        query["slug"] = slug
    else:
        query['provider_name'] = provider
        query['dataset_code'] = dataset_code

    datasets = db[constants.COL_DATASETS].find(query, projection)

    return [export_file_csv_dataset_unit(doc=doc) for doc in datasets]
コード例 #13
0
ファイル: _commons.py プロジェクト: MichelJuillard/dlstats
    def __init__(self, 
                 provider_name=None, 
                 db=None, 
                 is_indexes=True):
        """
        :param str provider_name: Provider Name
        :param pymongo.database.Database db: MongoDB Database instance        
        :param bool is_indexes: Bypass create_or_update_indexes() if False 

        :raises ValueError: if provider_name is None
        """        
        if not provider_name:
            raise ValueError("provider_name is required")

        self.provider_name = provider_name
        self.db = db or get_mongo_db()
        self.provider = self.load_provider_from_db()
        
        if is_indexes:
            create_or_update_indexes(self.db)
コード例 #14
0
ファイル: forms.py プロジェクト: srault95/widukind-web
 def __init__(self, label='', 
              colname=None,
              id_attr='_id', label_attr='', 
              query={},
              validators=None, 
              sort=None,
              allow_blank=False, blank_text='---', **kwargs):
     super().__init__(label, validators, **kwargs)
     self.id_attr = id_attr
     self.label_attr = label_attr
     self.allow_blank = allow_blank
     self.blank_text = blank_text
     self.colname = colname
     self.query = query or {}
     self.sort = sort
     self.db = get_mongo_db()
     self.col = self.db[self.colname]
     self.queryset = self.db[self.colname].find(query)
     if self.sort and isinstance(self.sort, tuple) and len(self.sort) == 2:
         self.queryset = self.queryset.sort(*sort)
コード例 #15
0
def export_file_csv_series_unit(doc=None,
                                provider=None,
                                dataset_code=None,
                                key=None,
                                slug=None):
    """Create CSV File from one series and record in MongoDB GridFS
    """

    db = get_mongo_db()

    if not doc:
        if slug:
            doc = db[constants.COL_SERIES].find_one({"slug": slug})
        else:
            if not provider:
                raise ValueError("provider is required")
            if not dataset_code:
                raise ValueError("dataset_code is required")
            if not key:
                raise ValueError("key is required")

            query = {}
            query['provider_name'] = provider
            query['dataset_code'] = dataset_code
            query['key'] = key

            doc = db[constants.COL_SERIES].find_one(query)

    if not doc:
        msg = "Series not found for provider[%s] - dataset[%s] - key[%s] - slug[%s]"
        raise Exception(msg % (provider, dataset_code, key, slug))

    return record_csv_file(db,
                           export_series(doc),
                           provider_name=doc['provider_name'],
                           dataset_code=doc["dataset_code"],
                           key=doc["key"],
                           slug=doc["slug"],
                           prefix="series")
コード例 #16
0
def export_file_csv_dataset_unit(doc=None,
                                 provider=None,
                                 dataset_code=None,
                                 slug=None):
    """Create CSV File from one Dataset and record in MongoDB GridFS
    """

    db = get_mongo_db()

    if not doc:
        if slug:
            doc = db[constants.COL_DATASETS].find_one({"slug": slug})
        else:
            if not provider:
                raise ValueError("provider is required")
            if not dataset_code:
                raise ValueError("dataset_code is required")

            query = {}
            query['provider_name'] = provider
            query['dataset_code'] = dataset_code

            doc = db[constants.COL_DATASETS].find_one(query, {'revisions': 0})

    if not doc:
        raise Exception(
            "Dataset not found for provider[%s] - dataset[%s] - slug[%s]" %
            (provider, dataset_code, slug))

    values = export_dataset(db, doc)

    return record_csv_file(db,
                           values,
                           provider_name=doc['provider_name'],
                           dataset_code=doc["dataset_code"],
                           slug=doc["slug"],
                           prefix="dataset")
コード例 #17
0
ファイル: forms.py プロジェクト: Widukind/widukind-web
 def __init__(self,
              label='',
              colname=None,
              id_attr='_id',
              label_attr='',
              query={},
              validators=None,
              sort=None,
              allow_blank=False,
              blank_text='---',
              **kwargs):
     super().__init__(label, validators, **kwargs)
     self.id_attr = id_attr
     self.label_attr = label_attr
     self.allow_blank = allow_blank
     self.blank_text = blank_text
     self.colname = colname
     self.query = query or {}
     self.sort = sort
     self.db = get_mongo_db()
     self.col = self.db[self.colname]
     self.queryset = self.db[self.colname].find(query)
     if self.sort and isinstance(self.sort, tuple) and len(self.sort) == 2:
         self.queryset = self.queryset.sort(*sort)
コード例 #18
0
def export_file_csv_series_unit(doc=None, 
                                provider=None, dataset_code=None, key=None, 
                                slug=None):
    """Create CSV File from one series and record in MongoDB GridFS
    """

    db = get_mongo_db()

    if not doc:
        if slug:
            doc = db[constants.COL_SERIES].find_one({"slug": slug})
        else:
            if not provider:
                raise ValueError("provider is required")
            if not dataset_code:
                raise ValueError("dataset_code is required")
            if not key:
                raise ValueError("key is required")
    
            query = {}
            query['provider_name'] = provider
            query['dataset_code'] = dataset_code
            query['key'] = key
        
            doc = db[constants.COL_SERIES].find_one(query)
            
    if not doc:
        msg = "Series not found for provider[%s] - dataset[%s] - key[%s] - slug[%s]"
        raise Exception(msg % (provider, dataset_code, key, slug))
    
    return record_csv_file(db, export_series(doc), 
                           provider_name=doc['provider_name'],
                           dataset_code=doc["dataset_code"],
                           key=doc["key"],
                           slug=doc["slug"], 
                           prefix="series")
コード例 #19
0
ファイル: _commons.py プロジェクト: ThomasRoca/dlstats
 def remove_all(cls, provider_name, db=None):
     db = db or get_mongo_db()
     query = {"provider_name": provider_name}
     logger.info("remove all categories for [%s]" % provider_name)
     return db[constants.COL_CATEGORIES].remove(query)
コード例 #20
0
ファイル: wsgi.py プロジェクト: mmalter/widukind-api
def _conf_db(app):
    from widukind_common.utils import get_mongo_db
    app.widukind_db = get_mongo_db(app.config.get("MONGODB_URL"))
コード例 #21
0
def _conf_db(app):
    from widukind_common.utils import get_mongo_db
    app.widukind_db = get_mongo_db(app.config.get("MONGODB_URL"))
コード例 #22
0
ファイル: wsgi.py プロジェクト: srault95/widukind-api
def _conf_db(app, db=None):
    from widukind_common.utils import get_mongo_db
    if not db:
        app.widukind_db = get_mongo_db(app.config.get("MONGODB_URL").strip('"'), connect=False)
    else:
        app.widukind_db = db
コード例 #23
0
ファイル: _commons.py プロジェクト: ThomasRoca/dlstats
 def categories(cls, provider_name, db=None, **query):
     db = db or get_mongo_db()
     if not "provider_name" in query:
         query["provider_name"] = provider_name
     cursor = db[constants.COL_CATEGORIES].find(query)
     return dict([(doc["category_code"], doc) for doc in cursor])
コード例 #24
0
ファイル: _commons.py プロジェクト: ThomasRoca/dlstats
 def count(cls, provider_name, db=None):
     db = db or get_mongo_db()
     query = {"provider_name": provider_name}
     return db[constants.COL_CATEGORIES].count(query)
コード例 #25
0
def consolidate_dataset(provider_name=None, dataset_code=None, db=None, execute=True):
    db = db or utils.get_mongo_db()
    
    logger.info("START consolidate provider[%s] - dataset[%s]" % (provider_name, dataset_code))
    
    query = {"provider_name": provider_name, "dataset_code": dataset_code}
    projection = {"_id": False, "dimensions": True, "attributes": True, "values.attributes": True}
    
    cursor = db[constants.COL_SERIES].find(query, projection)

    projection = {"_id": True, "concepts": True, "codelists": True, "dimension_keys": True, "attribute_keys": True}             
    dataset = db[constants.COL_DATASETS].find_one(query, projection)
    
    codelists = {}
    
    for series in cursor:
        for k, v in series.get("dimensions").items():
            if not k in codelists: codelists[k] = []
            if not v in codelists[k]: codelists[k].append(v)
        
        if series.get("attributes"):
            for k, v in series.get("attributes").items():
                if not k in codelists: codelists[k] = []
                if not v in codelists[k]: codelists[k].append(v)
            
        for v in series.get("values"):
            if v.get("attributes"):
                for k1, v1 in v.get("attributes").items():
                    if not k1 in dataset["codelists"]: continue
                    if not k1 in codelists: codelists[k1] = []
                    if not v1 in codelists[k1]: codelists[k1].append(v1)
    
    if logger.isEnabledFor(logging.DEBUG):
        for k, v in dataset["codelists"].items():
            logger.debug("BEFORE - codelist[%s]: %s" % (k, len(v)))
        logger.debug("BEFORE - concepts[%s]" % list(dataset["concepts"].keys()))
        logger.debug("BEFORE - dimension_keys[%s]" % dataset["dimension_keys"])
        logger.debug("BEFORE - attribute_keys[%s]" % dataset["attribute_keys"])
    
    new_codelists = {}
    new_concepts = {}
    new_dimension_keys = []
    new_attribute_keys = []
    
    for k, values in dataset["codelists"].items():
        '''if entry in codelists from series'''
        if k in codelists:
            new_values = {}
            for v1 in codelists[k]:
                '''if codelist value in codelists from dataset'''
                if v1 in values:
                    new_values[v1] = values[v1]
            
            new_codelists[k] = new_values
            new_concepts[k] = dataset["concepts"].get(k)
            
            if k in dataset["dimension_keys"]:
                '''unordered dimension_keys'''
                new_dimension_keys.append(k)
            elif k in dataset["attribute_keys"]:
                '''unordered attribute_keys'''
                new_attribute_keys.append(k)
    
    '''original ordered for dimension_keys'''
    dimension_keys = [k for k in dataset["dimension_keys"] if k in new_dimension_keys]
    '''original ordered for attribute_keys'''
    attribute_keys = [k for k in dataset.get("attribute_keys") if k in new_attribute_keys]

    if logger.isEnabledFor(logging.DEBUG):
        for k, v in new_codelists.items():
            logger.debug("AFTER - codelist[%s]: %s" % (k, len(v)))
        logger.debug("AFTER - concepts[%s]" % list(new_concepts.keys()))
        logger.debug("AFTER - dimension_keys[%s]" % dimension_keys)
        logger.debug("AFTER - attribute_keys[%s]" % attribute_keys)

    '''verify change in codelists'''
    #is_modify = hash_dict(new_codelists) == hash_dict(dataset["codelists"])
    is_modify = new_codelists != dataset["codelists"]

    '''verify change in concepts'''
    #if not is_modify and hash_dict(new_concepts) != hash_dict(dataset["concepts"]):
    if is_modify is False and new_concepts != dataset["concepts"]:
        is_modify = True
    
    if is_modify is False:
        if execute:
            return None
        else:
            return None, None

    query = {"_id": dataset["_id"]}
    query_modify = {"$set": {
        "codelists": new_codelists, 
        "concepts": new_concepts,
        "dimension_keys": dimension_keys,
        "attribute_keys": attribute_keys
    }}
    
    if execute:
        return db[constants.COL_DATASETS].update_one(query, query_modify).modified_count
    else:
        return query, query_modify
コード例 #26
0
ファイル: _commons.py プロジェクト: ThomasRoca/dlstats
 def search_category_for_dataset(cls, provider_name, dataset_code, db=None):
     db = db or get_mongo_db()
     query = {"provider_name": provider_name,
              "datasets.0": {"$exists": True},
              "datasets.dataset_code": dataset_code}
     return db[constants.COL_CATEGORIES].find_one(query)
コード例 #27
0
def consolidate_dataset(provider_name=None,
                        dataset_code=None,
                        db=None,
                        execute=True):
    db = db or utils.get_mongo_db()

    logger.info("START consolidate provider[%s] - dataset[%s]" %
                (provider_name, dataset_code))

    query = {"provider_name": provider_name, "dataset_code": dataset_code}
    projection = {
        "_id": False,
        "dimensions": True,
        "attributes": True,
        "values.attributes": True
    }

    cursor = db[constants.COL_SERIES].find(query, projection)

    projection = {
        "_id": True,
        "concepts": True,
        "codelists": True,
        "dimension_keys": True,
        "attribute_keys": True
    }
    dataset = db[constants.COL_DATASETS].find_one(query, projection)
    if dataset is None:
        if execute:
            return None
        else:
            return None, None

    old_codelists = dataset.get("codelists") or {}
    old_concepts = dataset.get("concepts") or {}
    old_dimension_keys = dataset.get("dimension_keys") or []
    old_attribute_keys = dataset.get("attribute_keys") or []

    codelists = {}

    for series in cursor:
        for k, v in series.get("dimensions").items():
            if not k in codelists: codelists[k] = []
            if not v in codelists[k]: codelists[k].append(v)

        if series.get("attributes"):
            for k, v in series.get("attributes").items():
                if not k in codelists: codelists[k] = []
                if not v in codelists[k]: codelists[k].append(v)

        for v in series.get("values"):
            if v.get("attributes"):
                for k1, v1 in v.get("attributes").items():
                    if not k1 in old_codelists: continue
                    if not k1 in codelists: codelists[k1] = []
                    if not v1 in codelists[k1]: codelists[k1].append(v1)

    if logger.isEnabledFor(logging.DEBUG):
        for k, v in old_codelists.items():
            logger.debug("BEFORE - codelist[%s]: %s" % (k, len(v)))
        logger.debug("BEFORE - concepts[%s]" % list(old_concepts.keys()))
        logger.debug("BEFORE - dimension_keys[%s]" % old_dimension_keys)
        logger.debug("BEFORE - attribute_keys[%s]" % old_attribute_keys)

    new_codelists = {}
    new_concepts = {}
    new_dimension_keys = []
    new_attribute_keys = []

    for k, values in old_codelists.items():
        '''if entry in codelists from series'''
        if k in codelists:
            new_values = {}
            for v1 in codelists[k]:
                '''if codelist value in codelists from dataset'''
                if v1 in values:
                    new_values[v1] = values[v1]

            new_codelists[k] = new_values
            new_concepts[k] = old_concepts.get(k)

            if k in old_dimension_keys:
                '''unordered dimension_keys'''
                new_dimension_keys.append(k)
            elif k in old_attribute_keys:
                '''unordered attribute_keys'''
                new_attribute_keys.append(k)
    '''original ordered for dimension_keys'''
    dimension_keys = [k for k in old_dimension_keys if k in new_dimension_keys]
    '''original ordered for attribute_keys'''
    attribute_keys = [k for k in old_attribute_keys if k in new_attribute_keys]

    if logger.isEnabledFor(logging.DEBUG):
        for k, v in new_codelists.items():
            logger.debug("AFTER - codelist[%s]: %s" % (k, len(v)))
        logger.debug("AFTER - concepts[%s]" % list(new_concepts.keys()))
        logger.debug("AFTER - dimension_keys[%s]" % dimension_keys)
        logger.debug("AFTER - attribute_keys[%s]" % attribute_keys)
    '''verify change in codelists'''
    #is_modify = hash_dict(new_codelists) == hash_dict(old_codelists)
    is_modify = new_codelists != old_codelists
    '''verify change in concepts'''
    #if not is_modify and hash_dict(new_concepts) != hash_dict(old_concepts):
    if is_modify is False and new_concepts != old_concepts:
        is_modify = True

    if is_modify is False:
        if execute:
            return None
        else:
            return None, None

    query = {"_id": dataset["_id"]}
    query_modify = {
        "$set": {
            "codelists": new_codelists or None,
            "concepts": new_concepts or None,
            "dimension_keys": dimension_keys or None,
            "attribute_keys": attribute_keys or None,
        }
    }

    if execute:
        return db[constants.COL_DATASETS].update_one(
            query, query_modify).modified_count
    else:
        return query, query_modify
コード例 #28
0
ファイル: _commons.py プロジェクト: ThomasRoca/dlstats
 def root_categories(cls, provider_name, db=None):
     db = db or get_mongo_db()
     query = {"provider_name": provider_name, "parent": None}
     cursor = db[constants.COL_CATEGORIES].find(query)
     return cursor.sort([("position", 1), ("category_code", 1)])