Beispiel #1
0
    def test_db(self):
        '''
            Test database CRUD ops
        '''

        #display all items from DB
        self.load_all_items_from_database()

        #create new_object and read back from database
        json_data = {
            "title": "Wordpress website for Freelancers",
            "description":
            "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc molestie. ",
            "price": 250,
            "assigned_to": "John Doe"
        }
        new_object = DB_Object.build_from_dict(json_data)
        self.test_create(new_object)

        #update new_object and read back from database
        new_object.price = 350
        self.test_update(new_object)

        #delete new_object and try to read back from database
        self.test_delete(new_object)

        #Test nuking and reading anything back from database
        for i in range(3):
            self.repository.create(DB_Object.build_from_dict(json_data))
        self.test_delete_all()
Beispiel #2
0
def get_ingredients_as_list(p_list_or_i):
    '''
    Queries the products and ingredients DBs for ingredients contained within
    the products given by the input list of Object_Ids. Changing the tokenizer
    type variable 'T_TYPE' to ingredient causes this function to expect ObjectIds
    referring to infredients as input.
    Note: The each DB query is performed once using all object
    IDs simultaneously. This function performs no more than 2 queries when run.
    '''
    global PROD_COMO

    if not p_list_or_i:
        return []
    elif type(p_list_or_i) is str or type(p_list_or_i) is ObjectId:
        # Query a single ObjectId
        prod_fltr = {'_id': p_list_or_i}
    else:
        # Build list of ingredient ObjectIds contained in the product list
        prod_fltr = {'_id': {'$in': p_list_or_i}}

    if T_TYPE == 'product':
        prod_prjctn = {
            '_id': False,
            'ingredient_list': True,
            'comodegenic': True}
        db_objects = PRODUCTS_DB.read(prod_fltr, projection=prod_prjctn)

        # Get ObjectIds from all product ingredients
        ing_list = set()  # Using set eliminates duplicate values
        for i in db_objects:
            ing = DB_Object.build_from_dict(i)
            ing_list.update(ing.get('ingredient_list', ''))
            PROD_COMO.append(ing.get('comodegenic', 0))  # Create column of comodegenic scores

        # Build list of all ingredient names
        ing_fltr = {'_id': {'$in': list(ing_list)}}
        ing_prjctn = {'_id': False, 'ingredient_name': True}
        db_objects = INGREDIENTS_DB.read(ing_fltr, projection=ing_prjctn)
        return [DB_Object.build_from_dict(i).get('ingredient_name', '') for i in db_objects]
    elif T_TYPE == 'OCR_list':
        return get_db_ingredients(p_list_or_i)
    else:
        # Return the ingredient name
        ing_fltr = {'_id': p_list_or_i}
        ing_prjctn = {'_id': False, 'ingredient_name': True}
        db_objects = INGREDIENTS_DB.read(ing_fltr, projection=ing_prjctn)
        return [DB_Object.build_from_dict(i).get('ingredient_name', '') for i in db_objects]
Beispiel #3
0
def build_product_model(host, port, **kwargs):
    prod_model_data = 'prod_model_data.pickle'
    print("Loading products from database:")
    prod_filt = {'comodegenic': {'$type': 'int'}}  # Only return entries with comodegenic score
    prod_prjctn = {
        'ingredient_list': True,
        'comodegenic': True}
    db_objects = PRODUCTS_DB.read(prod_filt, projection=prod_prjctn)
    products = [DB_Object.build_from_dict(p) for p in db_objects]

    # The tfidf_vect will ignore the following words
    stop_words = [
        '',
        'water',
        'glycerin',
        'titanium dioxide',
        'iron oxides',
        'beeswax',
        'methylparaben',
        'propylparaben',
        'propylene glycol',
        'panthenol',
        'mica']

    # Tokenizer for product ingredient lists
    def get_prod_ings_as_list(product):
        '''
        Queries the ingredients DB for a given product's ingredient list
        and returns the ingredient list as a list of ingredient strings
        Note: The DB query is performed once using all ingredient object
        IDs simultaneously.
        '''
        fltr = {'_id': {'$in': product.get('ingredient_list', [])}}
        ing_prjctn = {'_id': False, 'ingredient_name': True}
        db_objects = INGREDIENTS_DB.read(fltr, projection=ing_prjctn)
        return [DB_Object.build_from_dict(i).get('ingredient_name', '') for i in db_objects]

    print('Vectorizing product ingredient lists')
    tfidf_vect = TfidfVectorizer(
        tokenizer=get_prod_ings_as_list,
        lowercase=False,
        stop_words=stop_words)
    X = tfidf_vect.fit_transform(products)
    y = [p['comodegenic'] for p in products]

    print('Storing vectorized data and training labels')
    # Flatten CSR sparse matrix to strings
    model = {
        'X': X,
        'y': y
    }

    print("Saving model data to disk for next time")
    # Insert the model into the model database
    MODEL_DB.create_file(pdumps(model, protocol=2), filename="ml_product_data")
    # Save model data to disk
    with open(prod_model_data, "wb") as pickle_out:
        pdump(model, pickle_out)
    print('[SUCCESS] Product model data post-processed and stored')
Beispiel #4
0
 def check_authentication(s, auth_str):
     """ Check geven credentials against DB"""
     in_auth = auth_str.strip().strip('Basic ')
     query = PEOPLE_DB.read({'auth': in_auth}, limit=1)
     if query.count() == 1:
         s.person_data = DB_Object.build_from_dict(query[0])
         return True
     else:
         return False
Beispiel #5
0
 def test_update(self, new_object):
     print("\n\nUpdating new_object in database")
     self.repository.update(new_object)
     print("new_object updated in database")
     print("Reloading new_object from database")
     db_objects = self.repository.read({'_id': new_object._id})
     for p in db_objects:
         project_from_db = DB_Object.build_from_dict(p)
         print("new_object = {}".format(project_from_db.get_as_dict()))
Beispiel #6
0
 def get_prod_ings_as_list(product):
     '''
     Queries the ingredients DB for a given product's ingredient list
     and returns the ingredient list as a list of ingredient strings
     Note: The DB query is performed once using all ingredient object
     IDs simultaneously.
     '''
     fltr = {'_id': {'$in': product.get('ingredient_list', [])}}
     ing_prjctn = {'_id': False, 'ingredient_name': True}
     db_objects = INGREDIENTS_DB.read(fltr, projection=ing_prjctn)
     return [DB_Object.build_from_dict(i).get('ingredient_name', '') for i in db_objects]
Beispiel #7
0
 def load_all_items_from_database(self):
     print("Loading all items from database:")
     db_objects = self.repository.read()
     at_least_one_item = False
     for p in db_objects:
         at_least_one_item = True
         tmp_project = DB_Object.build_from_dict(p)
         print("ID = {} | Title = {} | Price = {}".format(
             tmp_project._id, tmp_project.title, tmp_project.price))
     if not at_least_one_item:
         print("No items in the database")
Beispiel #8
0
def dump_db_to_json(host, port, dump_db):
    valid = [
        "people", "products", "ingredients", "testing", "comodegenic", "all"
    ]
    repos = [PEOPLE_DB, PRODUCTS_DB, INGREDIENTS_DB, TEST_DB, COMODEGENIC_DB]
    out_list = {}

    # Input validation
    if dump_db is None or dump_db is "":
        return
    if dump_db not in valid:
        return

    # Dump the specified DB
    print("Dumping database/s: '" + dump_db + "'")
    db_objects = repos[0].read()
    at_least_one_item = False
    if dump_db == 'all':
        for repo in repos:
            db_objects = repo.read()
            at_least_one_item = False
            out_list[repo.collection] = []
            for p in db_objects:
                at_least_one_item = True
                out_list[repo.collection].append(DB_Object.build_from_dict(p))
            if not at_least_one_item:
                print("No items in ", repo.collection, " database")
    else:
        repo_idx = valid.index(dump_db)
        db_objects = repos[repo_idx].read()
        at_least_one_item = False
        out_list[repos[repo_idx].collection] = []
        for p in db_objects:
            at_least_one_item = True
            out_list[repos[repo_idx].collection].append(
                DB_Object.build_from_dict(p).get_as_dict())
        if not at_least_one_item:
            print("No items in ", repos[repo_idx].collection, " database")

    with open('db_dump_%s.json' % dump_db, 'w') as f:
        json.dump(out_list, f, cls=JSONEncoder)
Beispiel #9
0
 def test_create(self, new_object):
     print("\n\nSaving new_object to database")
     result = self.repository.create(new_object)
     if result.acknowledged:
         new_object['_id'] = result.inserted_id
     else:
         print("[FAILED] Could not save object")
     print("new_object saved to database")
     print("Loading new_object from database")
     db_objects = self.repository.read({'_id': new_object._id})
     for p in db_objects:
         project_from_db = DB_Object.build_from_dict(p)
         print("new_object = {}".format(project_from_db.get_as_dict()))
Beispiel #10
0
    def test_delete(self, new_object):
        print("\n\nDeleting new_object from database")
        self.repository.delete(new_object)
        print("new_object deleted from database")
        print("Trying to reload new_object from database")
        db_objects = self.repository.read({'_id': new_object._id})
        found = False
        for p in db_objects:
            found = True
            project_from_db = DB_Object.build_from_dict(p)
            print("new_object = {}".format(project_from_db.get_as_dict()))

        if not found:
            print("Item with id = {} was not found in the database".format(
                new_object._id))
Beispiel #11
0
def get_ingredient_vocabulary(host, port, **kwargs):
    ''' Returns the set of all unique ingredient names including synonyms
    '''
    # Build list of all ingredient names
    ing_fltr = {}  # Get all ingredients
    ing_prjctn = {
        '_id': False,
        'ingredient_name': True,
        'synonym_list': True}
    db_objects = INGREDIENTS_DB.read(ing_fltr, projection=ing_prjctn)
    ingredients = [DB_Object.build_from_dict(i) for i in db_objects]
    ret = set()
    for ingredient in ingredients:
        ret.update([ingredient.get('ingredient_name', '')])
        for synonym in ingredient.get('synonym_list', []):
            ret.update([ingredient.get('ingredient_name', '')])
    return ret
Beispiel #12
0
    def get_suggestions(s, search_str, col='ingredient'):
        ''' Check DB to see if username is avaialble'''

        if not search_str:
            return []

        if col == 'ingredient':
            collection = INGREDIENTS_DB
            prjctn = {
                'ingredient_name': True,
                'cancer_score': True,
                'allergy_imm_tox_score': True,
                'ingredient_score': True,
                'dev_reprod_tox_score': True,
                'score': {
                    '$meta': 'textScore'
                }
            }
        else:
            collection = PRODUCTS_DB
            prjctn = {
                'product_name': True,
                'cancer_score': True,
                'allergy_imm_tox_score': True,
                'product_score': True,
                'dev_reprod_tox_score': True,
                'score': {
                    '$meta': 'textScore'
                }
            }

        query = collection.read(
            {'$text': {
                '$search': unquote_plus(search_str)
            }},
            limit=100,
            projection=prjctn)

        sorted_query = query.sort([('score', {'$meta': 'textScore'})])

        return [DB_Object.build_from_dict(item) for item in sorted_query]
Beispiel #13
0
def build_people_model(host, port, **kwargs):
    global PROD_COMO
    ppl_model_data = 'ppl_model_data.pickle'
    batch_size = kwargs.get('batch_size', 10000)
    vocabulary = get_ingredient_vocabulary(host, port)

    # The tfidf_vect will ignore the following words
    stop_words = [
        '',
        'water',
        'glycerin',
        'titanium dioxide',
        'iron oxides',
        'beeswax',
        'methylparaben',
        'propylparaben',
        'propylene glycol',
        'panthenol',
        'mica']

    # Create vectorizers
    d_vect = DictVectorizer(sparse=False)
    tfidf_vect = TfidfVectorizer(
        tokenizer=get_ingredients_as_list,
        lowercase=False,
        stop_words=stop_words,
        vocabulary=vocabulary)

    print("Loading people from database, batch_size:", str(batch_size))
    ppl_filt = {}
    ppl_prjctn = {
        '_id': False,
        'race': True,
        'birth_sex': True,
        'age': True,
        'acne': True,
        'skin': True,
        'acne_products': True}  # Don't include any PII
    db_objects = PEOPLE_DB.read(ppl_filt, projection=ppl_prjctn)

    y, demo_mult = [], []
    batch_num, pulled = 0, 0
    X = None

    # Work in batches to build dataset
    while pulled <= db_objects.count(with_limit_and_skip=True):
        # Initialize
        X_demo_lst, X_prod_lst = [], []
        people = []

        print('Parsing batch:', batch_num)

        try:
            # Build a batch
            for i in range(batch_size):
                people.append(DB_Object.build_from_dict(db_objects.next()))
                pulled += 1
        except StopIteration:
        # End of available data
            break

        # Extract features
        for person in people:
            # Create new entry for each product
            # Note: Model is only applicable to entries with products
            for product_id in person.pop('acne_products'):
                # Pull product ingredients info
                X_prod_lst.append([product_id])

                # Pull demographic info
                X_demo_lst.append(person)

                # Generate demographic multiplier
                mult = get_multiplier(person)
                demo_mult.append(mult)

        # Vectorize data
        X_demo = d_vect.fit_transform(X_demo_lst)  # X_demo is now a numpy array
        X_prod = tfidf_vect.fit_transform(X_prod_lst)  # X_prod is now a CSR sparse matrix

        # Add batch result to output matrix
        if X is not None:
            X_t = hstack([csr_matrix(X_demo), X_prod], format="csr")
            try:
                X = vstack([X, X_t], format="csr")
            except ValueError:
                break
        else:
            # Initialize X
            X = hstack([csr_matrix(X_demo), X_prod], format="csr")

        batch_num += 1

    for como, mult in zip(PROD_COMO, demo_mult):
        val = como * mult
        if val < 6:
            y.append(0)
        elif val < 12:
            y.append(1)
        else:
            y.append(2)

    print('Storing vectorized data and training labels')
    # Flatten CSR sparse matrix to strings
    model = {
        'X': X,
        'y': y,
        'd_vect': d_vect,
        'tfidf_vect': tfidf_vect,
        'vocabulary': vocabulary
    }

    print("Saving model data to disk for next time")
    # Insert the model into the model database
    MODEL_DB.create_file(pdumps(model, protocol=2), filename="ml_people_data")
    # Save model data to disk
    with open(ppl_model_data, "wb") as pickle_out:
        pdump(model, pickle_out)
    print('[SUCCESS] People model data post-processed and stored')
Beispiel #14
0
    def create_new_user(s, recv_data):

        return PEOPLE_DB.create(DB_Object.build_from_dict(recv_data))
Beispiel #15
0
def generate_people(host, port, num_generate_people=10000):

    # Connect to the required databases
    products_db = DB_CRUD(host, port, db='capstone', col='products')
    people_db = DB_CRUD(host, port, db='capstone', col='people')

    # Variables
    races = [
        'American Indian', 'Asian', 'Black', 'Pacific Islander', 'White',
        'mixed_other'
    ]
    birth_sexes = ['female', 'male']
    skin_types = ['normal', 'oily', 'dry']

    # Probabilities
    race_probs = [0.009, 0.048, 0.126, 0.002, 0.724, 0.091]
    sex_probs = [0.508, 0.492]
    skin_probs = [1.0 / 3, 1.0 / 3, 1.0 / 3]

    # Make sure user wants to destroy existing DB
    ppl_qstn = '[WARNING] This will erase the people database. Continue?'
    if not query_yes_no(ppl_qstn, default='no'):
        print("No actions taken")
        return

    # Get number of people to generate
    try:
        usr_input = int(input("# people to generate: "))
        num_generate_people = usr_input
    except ValueError:
        print("Invalid input, using default value", num_generate_people)
        pass

    print("Nuking people database")
    people_db.nuke()

    print("Creating search indexes")
    people_db.createIndex([('user_name', ASCENDING)],
                          unique=True,
                          default_language='english')

    # Generate random people data
    print("Generating race data")
    ppl_race = np.random.choice(races, num_generate_people, p=race_probs)
    print("Generating sex data")
    ppl_sex = np.random.choice(birth_sexes, num_generate_people, p=sex_probs)
    print("Generating age and acne data")
    ppl_ages, ppl_acne = generate_age_acne_lists(num_generate_people)
    print("Generating skin data")
    ppl_skins = np.random.choice(skin_types, num_generate_people, p=skin_probs)
    print("Generating names")
    ppl_names = [get_sex_name(s) for s in ppl_sex]
    print("Generating usernames")
    ppl_unames = [get_unique_username(full_name) for full_name in ppl_names]
    print("Generating user authentications")
    ppl_auths = [
        base64.b64encode(str(u_name + ":1234").encode()).decode()
        for u_name in ppl_unames
    ]

    # Generate dict of people
    print("Creating list of people dicts")
    fields = [
        'name', 'race', 'birth_sex', 'age', 'acne', 'skin', 'auth', 'user_name'
    ]
    p_data = zip(ppl_names, ppl_race, ppl_sex, ppl_ages, ppl_acne, ppl_skins,
                 ppl_auths, ppl_unames)
    p_list = [dict(zip(fields, d)) for d in p_data]

    # Get comodegenic products
    print("Getting list of comodegenic products")
    # 0 value comodegeinc scores are null data
    db_objects = products_db.read({'comodegenic': {"$gt": 0}})
    products = [DB_Object.build_from_dict(p) for p in db_objects]

    # Set scaling for comodogenic-ness of products
    # The scale value is 1 divided by the maximum comodegenic score
    # in the products database which works regardless of the scoring
    # method used when building the db.
    prod_filt = {'comodegenic': {'$type': 'int'}}
    prod_prjctn = {'comodegenic': True}
    db_objects = products_db.read(prod_filt,
                                  projection=prod_prjctn,
                                  sort=[("comodegenic", DESCENDING)],
                                  limit=1)
    como_scale = 1.0 / DB_Object.build_from_dict(db_objects[0])['comodegenic']

    print("Adding people to database")
    # Populate acne causing products for each person
    for person in p_list:
        p_products = []
        for i in range(np.random.choice(10)):
            rand_idx = np.abs(np.random.choice(len(products)) - 1)
            prod_como = products[rand_idx]['comodegenic']
            probs = [como_scale * prod_como, 1 - (como_scale * prod_como)]
            if person['acne']:
                # If a person has acne, probabilisticly add 0 to 5 known
                # comodegenic products. Otherwise probabilisticly add
                # 0 to 5 non-comodegenic products
                if np.random.choice([True, False], p=probs):
                    p_products.append(products[rand_idx]['_id'])
            else:
                if np.random.choice([False, True], p=probs):
                    p_products.append(products[rand_idx]['_id'])
        person['acne_products'] = p_products
        #import ipdb
        #ipdb.set_trace()

        # Add person to data base
        new_person = DB_Object.build_from_dict(person)
        people_db.create(new_person)

    print("[SUCCESS] people database is populated")
Beispiel #16
0
def build_db(host, port, **kwargs):
    # Get required file paths
    i_path = kwargs.get('i_path', '')
    p_path = kwargs.get('p_path', '')
    c_path = kwargs.get('c_path', '')
    score_max = kwargs.get('score_max', False)

    # Connect to the reequired databases
    products_db = DB_CRUD(host, port, db='capstone', col='products')
    ingredients_db = DB_CRUD(host, port, db='capstone', col='ingredients')
    comodegenic_db = DB_CRUD(host, port, db='capstone', col='comodegenic')

    # Make sure user wants to destroy existing DB
    db_qstn = ('[WARNING] This will erase the products, ingredients, '
               'and comodegenic items databases. Continue?')
    if not query_yes_no(db_qstn, default='no'):
        print("No actions taken")
        return

    # Drop databases
    print("Deleting products database")
    products_db.nuke()
    print("Deleting ingredients database")
    ingredients_db.nuke()
    print("Deleting comodegenic database")
    comodegenic_db.nuke()

    # Open files and load JSON data, exit if unsuccesful
    print("Attempting to open .json files.")
    try:
        i_f = open(i_path, 'rb')
        p_f = open(p_path, 'rb')
        c_f = open(c_path, 'rb')
    except IOError as e:
        print(e)
        exit()
    with i_f:
        ingredients_dict = json.load(i_f)
        ing_ins_len = len(ingredients_dict)
    with p_f:
        products_dict = json.load(p_f)
        prod_ins_len = len(products_dict)
    with c_f:
        cmdgnc_list = json.load(c_f)
        print("Populating comodegenic information")
        #cmdgnc_dict = {entry['ingredient']: entry for entry in cmdgnc_list}
        for entry in cmdgnc_list:
            # Create DB object from product
            new_entry = DB_Object.build_from_dict(entry)
            # Insert the product into the database
            comodegenic_db.create(new_entry)
        comodegenic_db.createIndex([('ingredient', TEXT)])

    # Clean and load ingredients into ingredient database
    print("Populating ingredients")
    for ingredient_id in list(ingredients_dict.keys()):
        ingredient = ingredients_dict[ingredient_id]
        # Remove the old id entry from ingredients_dict
        # This is to avoid storing redundant info in the DB, ingredient entries will still
        # be accessible using the ingredient_id when the product entries are added
        del (ingredient['ingredient_id'])
        # Get comodegenic info
        search_term = '"' + ingredient.get('ingredient_name', '') + '"'
        db_objects = comodegenic_db.read({'$text': {"$search": search_term}})
        entries = [DB_Object.build_from_dict(entry) for entry in db_objects]

        # Try to find ingredient in comodegenic DB, fall back to synonyms if necessary
        if entries:
            ingredient['comodegenic'] = int(entries[0]['level'])
        else:
            for synonym in ingredient.get('synonym_list', []):
                search_term = '"' + synonym + '"'
                db_objects = comodegenic_db.read(
                    {'$text': {
                        "$search": search_term
                    }})
                entries = [
                    DB_Object.build_from_dict(entry) for entry in db_objects
                ]
                if entries:
                    ingredient['comodegenic'] = int(entries[0]['level'])
                    break
        # Set null value for ingredients without comodegenic score information
        if not 'comodegenic' in ingredient:
            ingredient['comodegenic'] = None

        # Normalize text fields
        ingredient['ingredient_name'] = ingredient.get('ingredient_name',
                                                       '').strip().lower()
        norm_synonyms = []
        synonym_list = ingredient.get('synonym_list', [])
        for synonym in synonym_list:
            norm_synonyms.append(synonym.strip().lower())
        if synonym_list:
            ingredient['synonym_list'] = synonym_list

        # Create DB object from ingredient
        new_ingredient = DB_Object.build_from_dict(ingredient)

        # Insert the ingredient into the database
        db_op_res = ingredients_db.create(new_ingredient)

        # Add the new mongoDB id to the existing ingredients dictionary
        # if the insertion was successful
        if db_op_res.acknowledged:
            ingredient['_id'] = db_op_res.inserted_id
        else:
            err_msg = ("[FAIL] Database insertion for " + str(new_ingredient) +
                       " was unsuccessful")
            raise Exception(err_msg)

    print("Populating products")
    for product_id in list(products_dict.keys()):
        # Convert ingredient list IDs to Mongo DB object IDs
        new_ing_ids = []
        product = products_dict[product_id]
        for ingredient_id in product.get('ingredient_list', []):
            new_ing_id = ingredients_dict.get(ingredient_id,
                                              {}).get('_id', None)
            if new_ing_id:
                new_ing_ids.append(new_ing_id)
                # Set product comodegenic score
                # Determine whether comodegenic scores are calculated using
                # ingredient max comodegenic score or sum of ingredient comodegenic scores
                ing_como = ingredients_dict[ingredient_id].get(
                    'comodegenic', 0)
                prod_como = product.get('comodegenic', 0)

                if score_max:
                    product['comodegenic'] = max(prod_como, ing_como)
                else:
                    product[
                        'comodegenic'] = prod_como + ing_como if ing_como else prod_como

            else:
                raise KeyError(
                    "Check scraper, key should exist in ingredients JSON!\nKey: '{}'"
                    .format(ingredient_id))
        if new_ing_ids:
            product['ingredient_list'] = new_ing_ids
        # Set null value for products without comodegenic score information
        if not 'comodegenic' in product:
            product['comodegenic'] = None
        # Remove old style product id
        del (product['product_id'])
        # Create DB object from product
        new_product = DB_Object.build_from_dict(product)
        # Insert the product into the database
        products_db.create(new_product)

    # Test the build
    print("Testing data integrity")
    ing_read_len = ingredients_db.read().count()
    prod_read_len = products_db.read().count()

    print("Ingredients inserted: {}  Ingredients read: {}".format(
        ing_ins_len, ing_read_len))
    print("Products inserted: {}  Products read: {}".format(
        prod_ins_len, prod_read_len))

    if ing_read_len != ing_ins_len or prod_read_len != prod_ins_len:
        raise Exception("[FAIL] The number of inserted items does not match!")

    print("Creating search indexes")
    ingredients_db.createIndex([('ingredient_name', TEXT),
                                ('synonym_list', TEXT)],
                               weights={'ingredient_name': 10},
                               default_language='english')
    products_db.createIndex([('product_name', TEXT)],
                            default_language='english')
    products_db.createIndex([('comodegenic', DESCENDING)],
                            default_language='english')

    print("[SUCCESS] Database is populated")