def initialize_connections(host, port): # Connect to database global PEOPLE_DB global PRODUCTS_DB global INGREDIENTS_DB global TEST_DB global COMODEGENIC_DB global MODEL_DB PEOPLE_DB = DB_CRUD(host, port, db='capstone', col='people') PRODUCTS_DB = DB_CRUD(host, port, db='capstone', col='products') INGREDIENTS_DB = DB_CRUD(host, port, db='capstone', col='ingredients') TEST_DB = DB_CRUD(host, port, db='capstone', col='testing') COMODEGENIC_DB = DB_CRUD(host, port, db='capstone', col='comodegenic') MODEL_DB = DB_CRUD(host, port, db='capstone', col='model')
default=SV_PORT_NUMBER) parser.add_argument('-m', '--db_host', help='Database hostname', default=DB_HOST_NAME) parser.add_argument('-n', '--db_port', help='Database port', default=DB_PORT_NUMBER) args = parser.parse_args() # App databases stats_init(args.db_host, args.db_port) model_ops_init(args.db_host, args.db_port) PEOPLE_DB = DB_CRUD(args.db_host, args.db_port, db='capstone', col='people') PRODUCTS_DB = DB_CRUD(args.db_host, args.db_port, db='capstone', col='products') INGREDIENTS_DB = DB_CRUD(args.db_host, args.db_port, db='capstone', col='ingredients') COMODEGENIC_DB = DB_CRUD(args.db_host, args.db_port, db='capstone', col='comodegenic') # Load people model
def __init__(self, host='localhost', port=27017, db=None, col=None): # initializing the MongoClient, this helps to # access the MongoDB databases and collections self.repository = DB_CRUD(host, port, db=db, col=col)
class database_test(object): ''' Test operations for mongo db ''' def __init__(self, host='localhost', port=27017, db=None, col=None): # initializing the MongoClient, this helps to # access the MongoDB databases and collections self.repository = DB_CRUD(host, port, db=db, col=col) def load_all_items_from_database(self): print("Loading all items from database:") db_objects = self.repository.read() at_least_one_item = False for p in db_objects: at_least_one_item = True tmp_project = DB_Object.build_from_dict(p) print("ID = {} | Title = {} | Price = {}".format( tmp_project._id, tmp_project.title, tmp_project.price)) if not at_least_one_item: print("No items in the database") def test_create(self, new_object): print("\n\nSaving new_object to database") result = self.repository.create(new_object) if result.acknowledged: new_object['_id'] = result.inserted_id else: print("[FAILED] Could not save object") print("new_object saved to database") print("Loading new_object from database") db_objects = self.repository.read({'_id': new_object._id}) for p in db_objects: project_from_db = DB_Object.build_from_dict(p) print("new_object = {}".format(project_from_db.get_as_dict())) def test_update(self, new_object): print("\n\nUpdating new_object in database") self.repository.update(new_object) print("new_object updated in database") print("Reloading new_object from database") db_objects = self.repository.read({'_id': new_object._id}) for p in db_objects: project_from_db = DB_Object.build_from_dict(p) print("new_object = {}".format(project_from_db.get_as_dict())) def test_delete(self, new_object): print("\n\nDeleting new_object from database") self.repository.delete(new_object) print("new_object deleted from database") print("Trying to reload new_object from database") db_objects = self.repository.read({'_id': new_object._id}) found = False for p in db_objects: found = True project_from_db = DB_Object.build_from_dict(p) print("new_object = {}".format(project_from_db.get_as_dict())) if not found: print("Item with id = {} was not found in the database".format( new_object._id)) def test_delete_all(self): print("\n\nDeleting EVERYTHING from database") self.repository.nuke() print("NUKED database") print("Trying to reload new_object from database") db_objects = self.repository.read() at_least_one_item = False for p in db_objects: at_least_one_item = True if at_least_one_item: print("[FAILED] Items still in " + self.repository.collection + " database") else: print("[SUCCESS] No items in " + self.repository.collection + " database") def test_db(self): ''' Test database CRUD ops ''' #display all items from DB self.load_all_items_from_database() #create new_object and read back from database json_data = { "title": "Wordpress website for Freelancers", "description": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc molestie. ", "price": 250, "assigned_to": "John Doe" } new_object = DB_Object.build_from_dict(json_data) self.test_create(new_object) #update new_object and read back from database new_object.price = 350 self.test_update(new_object) #delete new_object and try to read back from database self.test_delete(new_object) #Test nuking and reading anything back from database for i in range(3): self.repository.create(DB_Object.build_from_dict(json_data)) self.test_delete_all()
def generate_people(host, port, num_generate_people=10000): # Connect to the required databases products_db = DB_CRUD(host, port, db='capstone', col='products') people_db = DB_CRUD(host, port, db='capstone', col='people') # Variables races = [ 'American Indian', 'Asian', 'Black', 'Pacific Islander', 'White', 'mixed_other' ] birth_sexes = ['female', 'male'] skin_types = ['normal', 'oily', 'dry'] # Probabilities race_probs = [0.009, 0.048, 0.126, 0.002, 0.724, 0.091] sex_probs = [0.508, 0.492] skin_probs = [1.0 / 3, 1.0 / 3, 1.0 / 3] # Make sure user wants to destroy existing DB ppl_qstn = '[WARNING] This will erase the people database. Continue?' if not query_yes_no(ppl_qstn, default='no'): print("No actions taken") return # Get number of people to generate try: usr_input = int(input("# people to generate: ")) num_generate_people = usr_input except ValueError: print("Invalid input, using default value", num_generate_people) pass print("Nuking people database") people_db.nuke() print("Creating search indexes") people_db.createIndex([('user_name', ASCENDING)], unique=True, default_language='english') # Generate random people data print("Generating race data") ppl_race = np.random.choice(races, num_generate_people, p=race_probs) print("Generating sex data") ppl_sex = np.random.choice(birth_sexes, num_generate_people, p=sex_probs) print("Generating age and acne data") ppl_ages, ppl_acne = generate_age_acne_lists(num_generate_people) print("Generating skin data") ppl_skins = np.random.choice(skin_types, num_generate_people, p=skin_probs) print("Generating names") ppl_names = [get_sex_name(s) for s in ppl_sex] print("Generating usernames") ppl_unames = [get_unique_username(full_name) for full_name in ppl_names] print("Generating user authentications") ppl_auths = [ base64.b64encode(str(u_name + ":1234").encode()).decode() for u_name in ppl_unames ] # Generate dict of people print("Creating list of people dicts") fields = [ 'name', 'race', 'birth_sex', 'age', 'acne', 'skin', 'auth', 'user_name' ] p_data = zip(ppl_names, ppl_race, ppl_sex, ppl_ages, ppl_acne, ppl_skins, ppl_auths, ppl_unames) p_list = [dict(zip(fields, d)) for d in p_data] # Get comodegenic products print("Getting list of comodegenic products") # 0 value comodegeinc scores are null data db_objects = products_db.read({'comodegenic': {"$gt": 0}}) products = [DB_Object.build_from_dict(p) for p in db_objects] # Set scaling for comodogenic-ness of products # The scale value is 1 divided by the maximum comodegenic score # in the products database which works regardless of the scoring # method used when building the db. prod_filt = {'comodegenic': {'$type': 'int'}} prod_prjctn = {'comodegenic': True} db_objects = products_db.read(prod_filt, projection=prod_prjctn, sort=[("comodegenic", DESCENDING)], limit=1) como_scale = 1.0 / DB_Object.build_from_dict(db_objects[0])['comodegenic'] print("Adding people to database") # Populate acne causing products for each person for person in p_list: p_products = [] for i in range(np.random.choice(10)): rand_idx = np.abs(np.random.choice(len(products)) - 1) prod_como = products[rand_idx]['comodegenic'] probs = [como_scale * prod_como, 1 - (como_scale * prod_como)] if person['acne']: # If a person has acne, probabilisticly add 0 to 5 known # comodegenic products. Otherwise probabilisticly add # 0 to 5 non-comodegenic products if np.random.choice([True, False], p=probs): p_products.append(products[rand_idx]['_id']) else: if np.random.choice([False, True], p=probs): p_products.append(products[rand_idx]['_id']) person['acne_products'] = p_products #import ipdb #ipdb.set_trace() # Add person to data base new_person = DB_Object.build_from_dict(person) people_db.create(new_person) print("[SUCCESS] people database is populated")
def build_db(host, port, **kwargs): # Get required file paths i_path = kwargs.get('i_path', '') p_path = kwargs.get('p_path', '') c_path = kwargs.get('c_path', '') score_max = kwargs.get('score_max', False) # Connect to the reequired databases products_db = DB_CRUD(host, port, db='capstone', col='products') ingredients_db = DB_CRUD(host, port, db='capstone', col='ingredients') comodegenic_db = DB_CRUD(host, port, db='capstone', col='comodegenic') # Make sure user wants to destroy existing DB db_qstn = ('[WARNING] This will erase the products, ingredients, ' 'and comodegenic items databases. Continue?') if not query_yes_no(db_qstn, default='no'): print("No actions taken") return # Drop databases print("Deleting products database") products_db.nuke() print("Deleting ingredients database") ingredients_db.nuke() print("Deleting comodegenic database") comodegenic_db.nuke() # Open files and load JSON data, exit if unsuccesful print("Attempting to open .json files.") try: i_f = open(i_path, 'rb') p_f = open(p_path, 'rb') c_f = open(c_path, 'rb') except IOError as e: print(e) exit() with i_f: ingredients_dict = json.load(i_f) ing_ins_len = len(ingredients_dict) with p_f: products_dict = json.load(p_f) prod_ins_len = len(products_dict) with c_f: cmdgnc_list = json.load(c_f) print("Populating comodegenic information") #cmdgnc_dict = {entry['ingredient']: entry for entry in cmdgnc_list} for entry in cmdgnc_list: # Create DB object from product new_entry = DB_Object.build_from_dict(entry) # Insert the product into the database comodegenic_db.create(new_entry) comodegenic_db.createIndex([('ingredient', TEXT)]) # Clean and load ingredients into ingredient database print("Populating ingredients") for ingredient_id in list(ingredients_dict.keys()): ingredient = ingredients_dict[ingredient_id] # Remove the old id entry from ingredients_dict # This is to avoid storing redundant info in the DB, ingredient entries will still # be accessible using the ingredient_id when the product entries are added del (ingredient['ingredient_id']) # Get comodegenic info search_term = '"' + ingredient.get('ingredient_name', '') + '"' db_objects = comodegenic_db.read({'$text': {"$search": search_term}}) entries = [DB_Object.build_from_dict(entry) for entry in db_objects] # Try to find ingredient in comodegenic DB, fall back to synonyms if necessary if entries: ingredient['comodegenic'] = int(entries[0]['level']) else: for synonym in ingredient.get('synonym_list', []): search_term = '"' + synonym + '"' db_objects = comodegenic_db.read( {'$text': { "$search": search_term }}) entries = [ DB_Object.build_from_dict(entry) for entry in db_objects ] if entries: ingredient['comodegenic'] = int(entries[0]['level']) break # Set null value for ingredients without comodegenic score information if not 'comodegenic' in ingredient: ingredient['comodegenic'] = None # Normalize text fields ingredient['ingredient_name'] = ingredient.get('ingredient_name', '').strip().lower() norm_synonyms = [] synonym_list = ingredient.get('synonym_list', []) for synonym in synonym_list: norm_synonyms.append(synonym.strip().lower()) if synonym_list: ingredient['synonym_list'] = synonym_list # Create DB object from ingredient new_ingredient = DB_Object.build_from_dict(ingredient) # Insert the ingredient into the database db_op_res = ingredients_db.create(new_ingredient) # Add the new mongoDB id to the existing ingredients dictionary # if the insertion was successful if db_op_res.acknowledged: ingredient['_id'] = db_op_res.inserted_id else: err_msg = ("[FAIL] Database insertion for " + str(new_ingredient) + " was unsuccessful") raise Exception(err_msg) print("Populating products") for product_id in list(products_dict.keys()): # Convert ingredient list IDs to Mongo DB object IDs new_ing_ids = [] product = products_dict[product_id] for ingredient_id in product.get('ingredient_list', []): new_ing_id = ingredients_dict.get(ingredient_id, {}).get('_id', None) if new_ing_id: new_ing_ids.append(new_ing_id) # Set product comodegenic score # Determine whether comodegenic scores are calculated using # ingredient max comodegenic score or sum of ingredient comodegenic scores ing_como = ingredients_dict[ingredient_id].get( 'comodegenic', 0) prod_como = product.get('comodegenic', 0) if score_max: product['comodegenic'] = max(prod_como, ing_como) else: product[ 'comodegenic'] = prod_como + ing_como if ing_como else prod_como else: raise KeyError( "Check scraper, key should exist in ingredients JSON!\nKey: '{}'" .format(ingredient_id)) if new_ing_ids: product['ingredient_list'] = new_ing_ids # Set null value for products without comodegenic score information if not 'comodegenic' in product: product['comodegenic'] = None # Remove old style product id del (product['product_id']) # Create DB object from product new_product = DB_Object.build_from_dict(product) # Insert the product into the database products_db.create(new_product) # Test the build print("Testing data integrity") ing_read_len = ingredients_db.read().count() prod_read_len = products_db.read().count() print("Ingredients inserted: {} Ingredients read: {}".format( ing_ins_len, ing_read_len)) print("Products inserted: {} Products read: {}".format( prod_ins_len, prod_read_len)) if ing_read_len != ing_ins_len or prod_read_len != prod_ins_len: raise Exception("[FAIL] The number of inserted items does not match!") print("Creating search indexes") ingredients_db.createIndex([('ingredient_name', TEXT), ('synonym_list', TEXT)], weights={'ingredient_name': 10}, default_language='english') products_db.createIndex([('product_name', TEXT)], default_language='english') products_db.createIndex([('comodegenic', DESCENDING)], default_language='english') print("[SUCCESS] Database is populated")