def add_food(): food_dish_name = str(input("Enter Food dish name: ")) food_data = Food.fetch_data() if not any(food['food_dish'].lower() == food_dish_name.lower() for food in food_data): food_price = int(input("Enter Price: Rs.")) DataBase.add_food_item(food_dish_name, food_price)
def top10_users(main_user): database = DataBase() like_users = main_user.like_users_get() like_users_dict = dict() like_users_photos = dict() for user in like_users: if database.check(user): other_user = User(str(user)) other_user.data_user_get() other_user.groups_get() other_user.friends_get() like_users_photos[user] = other_user.photos_get() like_users_dict[user] = main_user.compare_user_with(other_user) print('...') else: continue like_users_dict = sorted(like_users_dict.items(), key=lambda x: x[1], reverse=True) data_to_db = list() for user in like_users_dict[0:10]: top10_users_dict = dict() user_photos = like_users_photos[user[0]] photos_list = list() for photo in user_photos: photos_list.append(photo[0]) top10_users_dict['user_id'] = user[0] top10_users_dict['user_page'] = 'https://vk.com/id' + str(user[0]) top10_users_dict['photos'] = photos_list data_to_db.append(top10_users_dict) database.add(data_to_db) return data_to_db
def routine(channelid): db = DataBase() video_list = db.get_video_list(channelid) video_list = np.array(video_list) # video_list[:, 0] yt = YouTubeComment(db, channelid, DEVELOPER_KEY_LIST) for videoid in video_list[:, 0]: print(videoid, end='|', flush=True) yt.get_comments(videoid)
def fetch_data(): food_data = [] data = DataBase.get_food_data() for food in data: food_data.append({ "id": food[0], "food_dish": food[1], "price": food[2] }) return food_data
def printWithWellProdDate(self,wellID,prodDate,product): db = DataBase(config.get_file_dir() + 'database.xlsx') md = db.getMonthlyDataByWellProdMonthProduct(wellID,prodDate,product) well = db.getWell(wellID) royalty = db.getRoyaltyMaster(well.lease) lease = db.getLease(well.lease) calc = db.getCalcDataByWellProdMonthProduct(wellID,prodDate,product) ws = RoyaltyWorksheet() ws.printSaskOilRoyaltyRate(md,well,royalty,lease,calc)
def report_it(e): logger.error("产生了无法预知的错误") logger.error("错误内容如下:") error = form_report(e) logger.error(error['string']) logger.error('文件 %s' % error['file']) logger.error('行号 %s' % error['line']) logger.info('正在尝试反馈错误...') logger.info('尝试发送bug报告邮件...') send_report(error) logger.info('发送bug报告邮件成功') try: logger.info('尝试把bug发送到远程数据库...') from database.database import DataBase _db = DataBase() _db.error_report(error) except Exception as e2: logger.error('把bug发送到远程数据库失败') send_report(e2) logger.info('发送bug报告完成。') sys.exit(1)
async def app_factory(self): try: pool = await aiomysql.create_pool( host=db_config["host"], port=int(db_config["port"]), user=db_config["user"], password=db_config["password"], db=db_config["db"], minsize=int(db_config["minsize"]), maxsize=int(db_config["maxsize"]), loop=self.loop, autocommit=True, pool_recycle=7 * 3600) self.db = DataBase(pool) except Exception as e: logging.error(e)
def save_data_to_local_computer(field_names, path_directory=DEFAULT_DIRECTORY_FOR_SAVE): data = DataBase.get_all_data(field_names) data_dict = dict() for field_name in field_names: data_dict[field_name] = [] for e in data: for field_name in field_names: if field_name == '_id': data_dict['_id'].append(str(e['_id'])) else: data_dict[field_name].append(e[field_name]) df = pd.DataFrame(data_dict) df = df.set_index('_id') print() df.to_csv( os.path.join(CONFIG.ROOT_DIR, 'ai', 'data', "mini-data-clean.csv")) print('Number:', len(df))
def top10(main_user): database = DataBase() data_from_db = database.data_record() for user in data_from_db: print(user['users_list']) select_users = main_user.select_users_search() select_users_dict = dict() select_users_photos = dict() for user in select_users: if database.check(user): other_user = VKuser(str(user)) other_user.select_user_info() other_user.groups() other_user.friends() select_users_photos[user] = other_user.photos() select_users_dict[user] = main_user.compare_user_with(other_user) print('...') else: continue select_users_dict = sorted(select_users_dict.items(), key=lambda x: x[1], reverse=True) data_to_db = list() for user in select_users_dict[0:10]: top10_users_dict = dict() user_photos = select_users_photos[user[0]] photos_list = list() for photo in user_photos: photos_list.append(photo[0]) top10_users_dict['user_id'] = user[0] top10_users_dict['user_page'] = 'https://vk.com/id' + str(user[0]) top10_users_dict['photos'] = photos_list data_to_db.append(top10_users_dict) database.add(data_to_db) return data_to_db
def __init__(self, **kwargs): super(VectorLogicAdapter, self).__init__(**kwargs) self.db = DataBase() self.stemmer = Stemmer()
def __init__(self, sim_threshold=0.3, user=None, post=None, num_of_recommend_post=None): self.database = DataBase() self.user = user self.post = post self.num_of_recommend_post = num_of_recommend_post self.SIM_THRESHOLD = sim_threshold
def register_user(): database = DataBase() database.register_adherent(request.form) return redirect(url_for('register'))
def register(): database = DataBase() adherent_inputs = database.get_all_columns_of_table("adherent") types_of_adherent = database.find_all("type_of_adherent") return render_template('register.html', adherent_inputs=adherent_inputs, types_of_adherent=types_of_adherent)
def index(): database = DataBase() return render_template('index.html', users=database.find_all("adherent"), columns=database.get_all_columns_of_table("adherent"))
def test_getMonthlyDataByWellProdMonthProduct(self): db = DataBase(config.get_file_dir() + 'database new.xlsx') md = db.getMonthlyDataByWellProdMonthProduct(6, 201501, 'Oil') self.assertRaises(AppError, db.getMonthlyDataByWellProdMonthProduct, 99999, 201512, 'Oil')
class SimpleRecommendSystem(object): # Tim cac tin dua tren do tuong dong giua cac ban tin # Luat: Uu tien cac ban tin cung phuong, quan PATH_DATA_DIRECTORY = '../ai/data/' def __init__(self, sim_threshold=0.3, user=None, post=None, num_of_recommend_post=None): self.database = DataBase() self.user = user self.post = post self.num_of_recommend_post = num_of_recommend_post self.SIM_THRESHOLD = sim_threshold def simple_find_recommend_posts(self, online=False): # find candidate which is same district, wards recommend_docs = [] if online: candidate_docs = self.find_candidate_docs_online() else: candidate_docs = self.find_candidate_docs_offline() if len(candidate_docs) >= self.num_of_recommend_post: return candidate_docs[0:self.num_of_recommend_post] for doc in candidate_docs: recommend_docs.append(doc) if online: candidate_docs = self.find_candidate_docs_offline() else: candidate_docs = self.find_candidate_docs_online() if candidate_docs is not None and len(candidate_docs) != 0: for doc in candidate_docs[0:(self.num_of_recommend_post - len(recommend_docs))]: recommend_docs.append(doc) delete_index = -1 for index, doc in enumerate(recommend_docs): if str(doc['_id']) == str(self.post['_id']): delete_index = index print(delete_index) if delete_index != -1: del recommend_docs[delete_index] return recommend_docs def find_recommend_posts(self, online=False): # find candidate which is same district, wards recommend_docs = [] start = time.time() if online: candidate_docs = self.find_candidate_docs_online() else: candidate_docs = self.find_candidate_docs_offline() end = time.time() if len(candidate_docs) == 0: print('Cannot find candidate doc') return [] for doc in self.find_recommend_docs(source_doc=self.post, docs=candidate_docs, num_doc=self.num_of_recommend_post, sim_threshold=self.SIM_THRESHOLD): doc['_id'] = str(doc['_id']) recommend_docs.append(doc) if len(recommend_docs) < self.num_of_recommend_post: if online: candidate_docs = self.find_candidate_docs_online() else: candidate_docs = self.find_candidate_docs_offline() print(len(candidate_docs)) for doc in self.find_recommend_docs(source_doc=self.post, docs=candidate_docs, num_doc=self.num_of_recommend_post - len(recommend_docs), sim_threshold=self.SIM_THRESHOLD): doc['_id'] = str(doc['_id']) recommend_docs.append(doc) delete_index = -1 for index, doc in enumerate(recommend_docs): if str(doc['_id']) == str(self.post['_id']): delete_index = index print(delete_index) if delete_index != -1: del recommend_docs[delete_index] return recommend_docs def find_candidate_docs_online(self): filter_district = { "$eq": self.post['district'] } filter_ = { "district": filter_district } return self.database.find_real_estate_document(filter_) def find_candidate_docs_offline(self): df = pd.read_csv(os.path.join(CONFIG.ROOT_DIR, 'ai', 'data', 'mini-data-clean.csv'), index_col=0) df_candi = df[df['district'] == self.post['district']] try: df_candi.drop(index=[str(self.post['_id'])], inplace=True) except Exception: pass ids = df_candi.index.to_numpy() for index, id in enumerate(ids): ids[index] = ObjectId(id) filter_ = { "_id": { "$in": ids.tolist() } } projection = ['title', 'description', 'price', 'square', 'image'] start = time.time() candidates = self.database.find_real_estate_document(filter_, projection) end = time.time() return candidates @staticmethod def find_recommend_docs(source_doc, docs, num_doc, sim_threshold): result = [] count = 0 with open(os.path.join(CONFIG.ROOT_DIR, 'ai', 'data', 'tf_idf_vec_description.pk'), 'rb') as f: tf_idf_vec_description = pickle.load(f) with open(os.path.join(CONFIG.ROOT_DIR, 'ai', 'data', 'tf_idf_vec_title.pk'), 'rb') as f: tf_idf_vec_title = pickle.load(f) titles = [doc['title'] for doc in docs] descriptions = [doc['description'] for doc in docs] sim_titles = Similarity.tf_idf_similarities(source_doc['title'], titles, tf_idf_vec_title) sim_descriptions = Similarity.tf_idf_similarities(source_doc['description'], descriptions, tf_idf_vec_description) for index, doc in enumerate(docs): sim = 0 sim_square = Similarity.similarity_for_price(source_doc['price'], doc['price']) sim_price = Similarity.similarity_for_square(source_doc['square'], doc['square']) sim_title = sim_titles[index] sim_description = sim_descriptions[index] a = 0.1 b = 0.1 c = 0.4 d = 0.3 if a*sim_square + b*sim_price + c*sim_title + d*sim_description > sim_threshold: result.append(doc) count += 1 if count >= num_doc: break return result
class VectorLogicAdapter(LogicAdapter): def __init__(self, **kwargs): super(VectorLogicAdapter, self).__init__(**kwargs) self.db = DataBase() self.stemmer = Stemmer() def getVector(self, tokens, info): vec = [0 for x in range(200)] for t in tokens: v = self.db.getVector(t) if v is None: v = self.db.getVector(self.stemmer.stem(t)) if v is not None: vec = [x + y for x, y in zip(vec, v)] elif info: pass # print("No vector for word", t,"or",self.stemmer.stem(t)) return vec def cosine_similarity(self, vec1, vec2): dot = sum(x * y for x, y in zip(vec1, vec2)) mag1 = sum(x * x for x in vec1)**0.5 mag2 = sum(y * y for y in vec2)**0.5 div = mag1 * mag2 if div == 0: return 0 return dot / div def getStatementVec(self, statement): vec = self.db.getSentenceVector(statement.text) if vec is None: t = self.__tokenize(statement.text) vec = self.getVector(t, False) self.db.cacheSentence(statement.text, vec) return vec def get(self, input_statement): statement_list = self.chatbot.storage.get_response_statements() if not statement_list: if self.chatbot.storage.count(): # Use a randomly picked statement self.logger.info('No statements have known responses. ' + 'Choosing a random response to return.') random_response = self.chatbot.storage.get_random() random_response.confidence = 0 return random_response else: raise self.EmptyDatasetException() closest_match = input_statement closest_match.confidence = 0 closest_match.lev = 0 closest_match.cos = 0 # Find the closest chatbot known statement questionVector = self.getVector(input_statement.tokens, True) if not all(x == 0 for x in questionVector): # If there is a vector for the statement compare it with all other # statements in the database for statement in statement_list: vec = self.getStatementVec(statement) lev_similarity = comparisons.levenshtein_distance( Statement(input_statement.text), Statement(statement.text)) cosine_similarity = self.cosine_similarity(questionVector, vec) # normalize cosine_similarity = (cosine_similarity + 1) / 2 if all(x == 0 for x in vec): # There is no vector for the statement so the comparison is meaningless lev_similarity = 0 cosine_similarity = 0 # print(statement.text,cosine_similarity,lev_similarity) if cosine_similarity > closest_match.cos: closest_match = statement closest_match.lev = lev_similarity closest_match.cos = cosine_similarity elif abs(cosine_similarity - closest_match.cos ) < 0.01 and lev_similarity > closest_match.lev: closest_match = statement closest_match.lev = lev_similarity closest_match.cos = cosine_similarity closest_match.confidence = closest_match.cos print("Closest Match:", closest_match,\ "Lev:",closest_match.lev, \ "Cos:",closest_match.cos, "Confidence:",closest_match.confidence) return closest_match def process(self, input_statement): input_statement.tokens = self.__tokenize(input_statement.text) # Select the closest match to the input statement # print("Selecting closest match") closest_match = self.get(input_statement) self.logger.info('Using "{}" as a close match to "{}"'.format( input_statement.text, closest_match.text)) # Get all statements that are in response to the closest match response_list = self.chatbot.storage.filter( in_response_to__contains=closest_match.text) if response_list: self.logger.info( 'Selecting response from {} optimal responses.'.format( len(response_list))) response = self.select_response(input_statement, response_list) response.confidence = closest_match.confidence self.logger.info('Response selected. Using "{}"'.format( response.text)) else: response = self.chatbot.storage.get_random() self.logger.info( 'No response to "{}" found. Selecting a random response.'. format(closest_match.text)) # Set confidence to zero because a random response is selected response.confidence = 0 print("Confidence", response, response.confidence) return response.confidence, response def __tokenize(self, text): import re punctuations = (r"\.", r":", r";", r"\?", ",", "!") tokens = [] text = text.lower().strip() # processed = re.sub(r'<.*>', '', text) for p in punctuations: text = re.sub(p, " " + p.replace("\\", ""), text) tokens += [t for t in re.split(" +", text) if t != ""] return tokens
def test_getMonthlyDataByWellProdMonthProduct(self): db = DataBase(config.get_file_dir() + 'database new.xlsx') md = db.getMonthlyDataByWellProdMonthProduct(6,201501,'Oil') self.assertRaises(AppError,db.getMonthlyDataByWellProdMonthProduct,99999,201512,'Oil')
def create_index_search(field_name): db = DataBase() db.create_indexes_search(field_name)
global args args = parser.parse_args() logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__file__) logger.info(pformat(args)) # load model and tokenizer global model global tokenizer model, tokenizer = load_model_tokenizer(args) # # sample personality global personality personality = sample_personality(tokenizer, args) logger.info("Selected personality: %s", tokenizer.decode(chain(*personality))) # instantiate db connection global db db = DataBase() personality_decoded = [tokenizer.decode(x) for x in personality] db.push_personality(personality_decoded) # clear history collection in db db.clear_history() # generate_from_seed(args, model=model, tokenizer=tokenizer, personality=personality, db=db) # launch app run_with_ngrok(app) app.run()
from database.database import DataBase database = DataBase() database.create_tables()