def __init__(self): self.sina_url = 'http://top.weibo.com' self.hot_word_log() try: self.mongo = MongoDB() except: self.logObj.write('mongoDB connectd failed...')
def __init__(self): self.category, self.difficulty, self.question, self.correct_answer, self.answers, self.typ = webscrap_trivia( ) self.letter = ANSWERS_TRIVIA[self.answers.index(self.correct_answer)] self.losers = [] self.awaiting_answer = False self.question = html.unescape(self.question) for i in range(len(self.answers)): self.answers[i] = html.unescape(self.answers[i]) self.correct_answer = html.unescape(self.correct_answer) self.mongoDB = MongoDB()
from discord.ext import commands import validation from gw2ApiKey import GW2Api from mongoDB import MongoDB from utils import Utils, UtilsCommand, UtilsDiscordRoles, ErrorMessages from validation import Validation utils = Utils() description = utils.BOT_DESCRIPTION askForAPIKey = utils.API_QUESTION command_description = UtilsCommand() server_roles = UtilsDiscordRoles() error_messages = ErrorMessages() validation = Validation() mongoDb = MongoDB() class DiscordBot(commands.Bot): # old but u run the bot only on one server, its fine # current_server_joined = None def __init__(self): super().__init__(command_prefix="!", description=description, pm_help=True, has_permission=8, status=discord.Status.idle) self.add_command(self.be_rude) self.add_command(self.reg)
def checkUserExist(self, discord_id): return discord_id == MongoDB.getUser(discord_id)
class HotWordCrawler: def __init__(self): self.sina_url = 'http://top.weibo.com' self.hot_word_log() try: self.mongo = MongoDB() except: self.logObj.write('mongoDB connectd failed...') def hot_word_log(self): '''热词错误信息''' log_filename = os.path.join(PATH, 'log', 'hot_word_log.txt') self.logObj = codecs.open(log_filename, 'ab', encoding='utf-8') def baidu_crawler(self): '''抓取百度热词''' baidu_url = 'http://top.baidu.com' topic_url_filename = os.path.join(PATH, 'src', 'baidu_whole_topic_url.txt') no_num_pattern = re.compile(ur"([^\u4E00-\u9FA5]+)", re.U) with codecs.open(topic_url_filename, encoding='utf-8') as f: for line in f.readlines(): splited_line = line.split('#') topic_url = splited_line[0] topic_title = splited_line[-1].strip() timestamp = time.strftime('%Y_%m_%d_%H:%M:%S') try: html = urllib2.urlopen(topic_url, timeout = 60).read() except: time.sleep(120) try: html = urllib2.urlopen(topic_url, timeout = 60).read() except: time.sleep(120) try: html = urllib2.urlopen(topic_url, timeout = 60).read() except: self.logObj.write('baidu_spider %(timestamp)s timed out in url;%(topic_url)s\n'%{'timestamp':timestamp,'topic_url':topic_url}) continue time.sleep(8) soup = BeautifulSoup(html) try: keyword_list = [keyword_str.find('a').text.strip() for keyword_str in soup.find_all('td', class_='keyword')] search_count_list = [] for srarch_count_str in soup.find_all('td', class_='last'): search_count = srarch_count_str.text.strip() if search_count: search_count_list.append(search_count) else: search_count_list.append('1') except BaseException: self.logObj.write('baidu_spider %(timestamp)s div pattern changed in url;%(topic_url)s\n'%{'timestamp':timestamp, 'topic_url':topic_url}) # print keyword_list, search_count_list # print len(keyword_list), len(search_count_list) try: assert len(keyword_list) == len(search_count_list) except AssertionError: self.logObj.write('baidu_spider %(timestamp)s lenght of keyword_list and search_count_list do not equal in url;%(topic_url)s\n'%{'timestamp':timestamp, 'topic_url':topic_url}) keyword_search_count_tuple_list = zip(keyword_list, search_count_list) for keyword_search_count_tuple in keyword_search_count_tuple_list: keyword, search_count = keyword_search_count_tuple if no_num_pattern.search(keyword): continue else: now = datetime.datetime.now() # print keyword, baidu_url, topic_title, {'f':int(search_count),'d':now} self.mongo.update_word(keyword, baidu_url, topic_title, {'f':int(search_count),'d':now}) def sina_topicUrl_topicTitle(self): '''抓取新浪topic_url与topic_title所构成的tuple的数组''' sina_base_url = 'http://top.weibo.com/newtop/keyword' timestamp = time.strftime('%Y_%m_%d_%H:%M:%S') try: html = urllib2.urlopen(sina_base_url, timeout=20).read() except BaseException: time.sleep(120) try: html = urllib2.urlopen(sina_base_url, timeout=20).read() except BaseException: time.sleep(120) try: html = urllib2.urlopen(sina_base_url, timeout=20).read() except BaseException: self.logObj.write('sina base_url timeoutd out at: %s'%timestamp) return [] soup = BeautifulSoup(html) div_level_str = soup.find('div', id='pl_index_toplist') dl_level_str_list = div_level_str.find_all('dl', class_='bd_list clearfix') whole_dd_level_list = [] for dl_level_str in dl_level_str_list: whole_dd_level_list.extend(dl_level_str.find_all('dd')) topicUrl_topciTitle_tuple_list = [(self.sina_url+item.find('a')['href'], item.text) for item in whole_dd_level_list] return topicUrl_topciTitle_tuple_list def sina_crawler(self): '''关键字以及搜索量,并将插入到数据库中''' topicUrl_topciTitle_tuple_list = self.sina_topicUrl_topicTitle() no_num_pattern = re.compile(ur"([^\u4E00-\u9FA5]+)", re.U) timestamp = time.strftime('%Y_%m_%d_%H:%M:%S') for topicUrl_topicTitle_tuple in topicUrl_topciTitle_tuple_list: topic_url = topicUrl_topicTitle_tuple[0] topic_title = topicUrl_topicTitle_tuple[-1] try: html = urllib2.urlopen(topic_url, timeout = 60).read() except BaseException: time.sleep(120) try: html = urllib2.urlopen(topic_url, timeout = 60).read() except BaseException: time.sleep(120) try: html = urllib2.urlopen(topic_url, timeout = 60).read() except BaseException: self.logObj(self.logObj.write('sina_spider %(timestamp)s timed out in url;%(topic_url)s\n'%{'timestamp':timestamp,'topic_url':topic_url})) continue time.sleep(10) soup = BeautifulSoup(html) try: div_level_str = soup.find('div', class_='influ_frame_con') dt_level_str_list = div_level_str.find_all('dt') except BaseException: self.logObj.write('sina_spider %(timestamp)s div pattern changed in url;%(topic_url)s\n'%{'timestamp':timestamp,'topic_url':topic_url}) continue for dt_level_str in dt_level_str_list: try: keyword = dt_level_str.find('span', class_='key').text search_count = dt_level_str.find('span', class_='num').text except BaseException: self.logObj.write('sina_spider %(timestamp)s div pattern changed in url;%(topic_url)s\n'%{'timestamp':timestamp,'topic_url':topic_url}) continue if no_num_pattern.search(keyword): continue else: now = datetime.datetime.now() # print keyword, self.sina_url, topic_title, {'f':int(search_count),'d':now} # time.sleep(1) try: self.mongo.update_word(keyword, self.sina_url, topic_title, {'f':int(search_count),'d':now}) except: self.logObj.write('db update error in url;%s'%topic_url) def main(self): self.baidu_crawler() self.sina_crawler()
def __init__(self): self.clientObj = MongoDB() self.clientDB = self.clientObj.stockDB
def __init__(self, client): self.client = client self.question = Question() self.mongo_client = MongoDB() self.dict = {0: '🏅', 1: '🥈', 2: '🥉'}
import discord from discord.ext import commands, tasks import os import asyncio import datetime from datetime import time as t from datetime import timedelta from reddit import * from Question import * from mongoDB import MongoDB from cogs.news import top_news_from_world from itertools import cycle # initiates Bot with prefix ('.') client = commands.Bot(command_prefix='.') mongoDB = MongoDB() """ Lists of statuses (activities) that bot will have, depending on time of the day (all times in UTC, 24h format). ACTIVITY_LIST_MORNING - 5 to 11 ACTIVITY_LIST_GENERAL - 11 to ACTIVITY_LIST_EVENING - 19 to 24 ACTIVITY_LIST_NIGHT - 24 to 5 """ ACTIVITY_LIST_GENERAL = [ 'Smile often!', 'Az is dead!', 'Drink water!', 'Milica is a midget.', 'Spread love!', 'Stay positive!', 'Cenelia is handsome!', 'You are beautiful!', 'Believe in yourself!', 'Segment is a boomer!', 'Everything will be fine!', 'You can do it!', 'Be good to others!', 'Be good to yourself!' ] activity_list_general_cycle = cycle(ACTIVITY_LIST_GENERAL)
class HotWordCrawler: def __init__(self): self.sina_url = 'http://top.weibo.com' self.hot_word_log() try: self.mongo = MongoDB() except: self.logObj.write('mongoDB connectd failed...') def hot_word_log(self): '''热词错误信息''' log_filename = os.path.join(PATH, 'log', 'hot_word_log.txt') self.logObj = codecs.open(log_filename, 'ab', encoding='utf-8') def baidu_crawler(self): '''抓取百度热词''' baidu_url = 'http://top.baidu.com' topic_url_filename = os.path.join(PATH, 'src', 'baidu_whole_topic_url.txt') no_num_pattern = re.compile(ur"([^\u4E00-\u9FA5]+)", re.U) with codecs.open(topic_url_filename, encoding='utf-8') as f: for line in f.readlines(): splited_line = line.split('#') topic_url = splited_line[0] topic_title = splited_line[-1].strip() timestamp = time.strftime('%Y_%m_%d_%H:%M:%S') try: html = urllib2.urlopen(topic_url, timeout=60).read() except: time.sleep(120) try: html = urllib2.urlopen(topic_url, timeout=60).read() except: time.sleep(120) try: html = urllib2.urlopen(topic_url, timeout=60).read() except: self.logObj.write( 'baidu_spider %(timestamp)s timed out in url;%(topic_url)s\n' % { 'timestamp': timestamp, 'topic_url': topic_url }) continue time.sleep(8) soup = BeautifulSoup(html) try: keyword_list = [ keyword_str.find('a').text.strip() for keyword_str in soup.find_all('td', class_='keyword') ] search_count_list = [] for srarch_count_str in soup.find_all('td', class_='last'): search_count = srarch_count_str.text.strip() if search_count: search_count_list.append(search_count) else: search_count_list.append('1') except BaseException: self.logObj.write( 'baidu_spider %(timestamp)s div pattern changed in url;%(topic_url)s\n' % { 'timestamp': timestamp, 'topic_url': topic_url }) # print keyword_list, search_count_list # print len(keyword_list), len(search_count_list) try: assert len(keyword_list) == len(search_count_list) except AssertionError: self.logObj.write( 'baidu_spider %(timestamp)s lenght of keyword_list and search_count_list do not equal in url;%(topic_url)s\n' % { 'timestamp': timestamp, 'topic_url': topic_url }) keyword_search_count_tuple_list = zip(keyword_list, search_count_list) for keyword_search_count_tuple in keyword_search_count_tuple_list: keyword, search_count = keyword_search_count_tuple if no_num_pattern.search(keyword): continue else: now = datetime.datetime.now() # print keyword, baidu_url, topic_title, {'f':int(search_count),'d':now} self.mongo.update_word(keyword, baidu_url, topic_title, { 'f': int(search_count), 'd': now }) def sina_topicUrl_topicTitle(self): '''抓取新浪topic_url与topic_title所构成的tuple的数组''' sina_base_url = 'http://top.weibo.com/newtop/keyword' timestamp = time.strftime('%Y_%m_%d_%H:%M:%S') try: html = urllib2.urlopen(sina_base_url, timeout=20).read() except BaseException: time.sleep(120) try: html = urllib2.urlopen(sina_base_url, timeout=20).read() except BaseException: time.sleep(120) try: html = urllib2.urlopen(sina_base_url, timeout=20).read() except BaseException: self.logObj.write('sina base_url timeoutd out at: %s' % timestamp) return [] soup = BeautifulSoup(html) div_level_str = soup.find('div', id='pl_index_toplist') dl_level_str_list = div_level_str.find_all('dl', class_='bd_list clearfix') whole_dd_level_list = [] for dl_level_str in dl_level_str_list: whole_dd_level_list.extend(dl_level_str.find_all('dd')) topicUrl_topciTitle_tuple_list = [ (self.sina_url + item.find('a')['href'], item.text) for item in whole_dd_level_list ] return topicUrl_topciTitle_tuple_list def sina_crawler(self): '''关键字以及搜索量,并将插入到数据库中''' topicUrl_topciTitle_tuple_list = self.sina_topicUrl_topicTitle() no_num_pattern = re.compile(ur"([^\u4E00-\u9FA5]+)", re.U) timestamp = time.strftime('%Y_%m_%d_%H:%M:%S') for topicUrl_topicTitle_tuple in topicUrl_topciTitle_tuple_list: topic_url = topicUrl_topicTitle_tuple[0] topic_title = topicUrl_topicTitle_tuple[-1] try: html = urllib2.urlopen(topic_url, timeout=60).read() except BaseException: time.sleep(120) try: html = urllib2.urlopen(topic_url, timeout=60).read() except BaseException: time.sleep(120) try: html = urllib2.urlopen(topic_url, timeout=60).read() except BaseException: self.logObj( self.logObj.write( 'sina_spider %(timestamp)s timed out in url;%(topic_url)s\n' % { 'timestamp': timestamp, 'topic_url': topic_url })) continue time.sleep(10) soup = BeautifulSoup(html) try: div_level_str = soup.find('div', class_='influ_frame_con') dt_level_str_list = div_level_str.find_all('dt') except BaseException: self.logObj.write( 'sina_spider %(timestamp)s div pattern changed in url;%(topic_url)s\n' % { 'timestamp': timestamp, 'topic_url': topic_url }) continue for dt_level_str in dt_level_str_list: try: keyword = dt_level_str.find('span', class_='key').text search_count = dt_level_str.find('span', class_='num').text except BaseException: self.logObj.write( 'sina_spider %(timestamp)s div pattern changed in url;%(topic_url)s\n' % { 'timestamp': timestamp, 'topic_url': topic_url }) continue if no_num_pattern.search(keyword): continue else: now = datetime.datetime.now() # print keyword, self.sina_url, topic_title, {'f':int(search_count),'d':now} # time.sleep(1) try: self.mongo.update_word(keyword, self.sina_url, topic_title, { 'f': int(search_count), 'd': now }) except: self.logObj.write('db update error in url;%s' % topic_url) def main(self): self.baidu_crawler() self.sina_crawler()
def __init__(self): self._mongo = MongoDB() self._utils = Utils()
class SearchService(MongoDB, Utils): def __init__(self): self._mongo = MongoDB() self._utils = Utils() def getProductsByCode(self, iCode): try: lProducts = self._mongo.getProducts() return self.search_by_code(lProducts, iCode) except Exception as error: raise error def getProductsByDescr(self, sDescr): try: lProducts = self._mongo.getProducts() return self.search_by_description(lProducts, sDescr) except Exception as error: raise error """ Method search by description or brand Apply discount to all products if description or brand are palindromes in: bson lProducts, str description out: bson, Collection """ def search_by_description(self, lProducts, sDescription): qSearch = ".*{}.*".format(sDescription) productos = lProducts.find({ "$or": [{ "brand": { "$regex": qSearch } }, { "description": { "$regex": qSearch } }] }) lista = [] if productos is not None: for row in productos: self.initialize_row(row) if self._utils.isPalindrom(sDescription) is True: row = self.apply_discount(self.decode_bson(row), 0.5) del row['_id'] lista.append(row) return lista def initialize_row(self, row): try: row['newprice'] = row['price'] row['discount'] = 0 row['image'] = self._utils.setUrlImage(row['image']) except Exception as error: raise Exception("problem initialize row, {}", error) """ Method search by code Apply discount to all products if code is palindrome in: bson lProducts, int Code out: bson, row modified """ def search_by_code(self, lProducts, iCode): lista = [] row = lProducts.find_one({"id": iCode}) if row is not None: self.initialize_row(row) if self._utils.isPalindrom(str(iCode)) is True: row = self.apply_discount(self.decode_bson(row), 0.5) del row['_id'] lista.append(row) return lista """ Method decode bson to json in: bson product out: json product """ def decode_bson(self, row): str_json = dumps(row) decoded = json.loads(str_json) return decoded """ Method apply discount in: json product , float: discount out: product modified """ def apply_discount(self, product, discount): product['newprice'] = product['newprice'] * discount product['discount'] = discount * 100 return product
def getStockDB(): client = MongoDB() return client.stockDB
def __init__(self, client): self.client = client self.mongoDB = MongoDB()