def __init__(self): self.logger = Logger() self.mongo = mongo.Mongo(self.logger) self.rpc_connection = Proxy( f"http://{os.environ['CRYPTO_USER']}:{os.environ['CRYPTO_PASS']}@" f"127.0.0.1:{os.environ['CRYPTO_PORT']}") self.tx_cache = TxCache()
def __init__(self): log.debug("[Startup]: Initializing YouTube Module . . .") self.mongo = mongo.Mongo() self.queue = Queue() self.cache = ExpiringDict(max_age_seconds=10800, max_len=1000) self.search_cache = dict() self.session = aiohttp.ClientSession()
def run(self): for model in self.Models: contents = model.run() mongo.Mongo(model.target_save['ip'], model.target_save['port'], model.target_save['db_name'], model.target_save['db_table']).insert(contents) print(str(len(contents)) + '件入りました')
def __init__(self, metier, localisation, email): self.email = email self.metier = metier self.localisation = localisation self.counter = 0 self.preprocess = pp.preprocessing() self.db = mongo.Mongo()
def mongo(self): m = mongo.Mongo(self.client_name, self.db_name) m.upload_train_data(self.origin_data, self.feature_names, 'winequality-white_origin') m.upload_train_data(self.train_data, self.feature_names, 'winequality-white_train') m.upload_train_data(self.validation_data, self.feature_names, 'winequality-white_validation') m.upload_choice_matrix(self.choice_matrix)
def submitPurchase(request): connection = mongo.Mongo() context = {} # recieve = request.GET.get('send') # print(recieve[u'typeId']) connection.updateInstrument(1, 99) print "PURCHASE" # test() # send_mail() test.delay() return JsonResponse(context)
def index_sub(): import mongo email = request.form['mail'] mongo = mongo.Mongo() cond = mongo.check_mail(email) if cond: mongo.add_email(email) message = 'Votre email à été ajoutée à la liste !' else: message = 'Veuillez entrer un email valide' return render_template("scripts.html",message = message)
def getCurrent(request): connection = mongo.Mongo() r_server = MyRedis.getServer(self=MyRedis) r_server.incr('getCurrent_counter') recieve = json.loads(request.GET.get('send')) # items = Instrument.objects.filter(type="1", id=recieve[u'currentId'])\ # .values('id', 'manufacturer__name', 'model', 'type', 'coast') # json_items = json.dumps(list(items), default=decimal_default) json_items = json.dumps(connection.getInstrumentById( recieve[u'currentId']), default=decimal_default) context = {'content': json_items, 'information': 'INFOOOO'} print 'getCurrent total count ' + r_server.get('getCurrent_counter') return JsonResponse(context)
def register(): if request.method == 'POST': users = mongo.db.users existing_user = users.find_one({'user_email' : request.form['email']}) if existing_user is None: DB = mongo.Mongo() hashpass = bcrypt.hashpw(request.form['pass'].encode('utf-8'), bcrypt.gensalt()) DB.add_user(request.form['email']) users.insert_one({'name':request.form['username'], 'password': hashpass}) session['username'] = request.form['username'] return redirect(url_for('index')) return 'That username already exists!' return render_template('register.html')
def __init__(self, bot): self.dictionary = {} self.bot = bot self.log = logging_manager.LoggingManager() self.spotify = spotify.Spotify() self.youtube = youtube.Youtube() self.lastfm = lastfm.LastFM() self.mongo = mongo.Mongo() bot.remove_command("help") self.log.debug("[Startup]: Initializing Music Module . . .") def generate_key(length): letters = string.ascii_letters response = "" for a in range(0, length): response += random.choice(letters) return response restart_key = generate_key(64) asyncio.run_coroutine_threadsafe(self.mongo.set_restart_key(restart_key), self.bot.loop) if not discord.opus.is_loaded(): discord.opus.load_opus("/usr/lib/libopus.so")
def itemList(request): connection = mongo.Mongo() r_server = MyRedis.getServer(self=MyRedis) r_server.incr('itemList_counter') recieve = json.loads(request.GET.get('send')) print("Recieve: ") print(recieve) if (recieve[u'searchParams'] == {}): # items = Instrument.objects.filter(type=str(recieve[u'typeId'])) \ # .values('id', 'manufacturer__name', 'model', 'type', 'coast') json_items = json.dumps( connection.getInstrumentsByType(str(recieve[u'typeId']))) # mongo # items = connection.getInstrumentsByType(str(recieve[u'typeId'])) # mongo else: search = recieve[u'searchParams'] searchType = search[u'searchType'] searchValue = search[u'searchValue'] if (searchType == u'By manufacturer'): # items = Instrument.objects.filter(type=str(recieve[u'typeId']), manufacturer__name=str(searchValue))\ # .values('id', 'manufacturer__name', 'model', 'type', 'coast') json_items = json.dumps( connection.getInstrumentByManufacturer(str(searchValue))) if (searchType == u'By model'): # items = Instrument.objects.filter(type=str(recieve[u'typeId']), model__search=str(searchValue))\ # .values('id', 'manufacturer__name', 'model', 'type', 'coast') json_items = json.dumps( connection.getInstrumentByModel(str(searchValue))) if (searchType == u'By cost'): # items = Instrument.objects.filter(type=str(recieve[u'typeId']), coast=searchValue)\ # .values('id', 'manufacturer__name', 'model', 'type', 'coast') json_items = json.dumps( connection.getInstrumentByCost(str(searchValue))) # itemsLength = len(items) # print("SPLIT") # print(splitQuery(items)) # print(len(splitQuery(items))) # splitted = splitQuery(items) # items = searchCase[searchType] # print("Items: ") # print(items) # json_items = json.dumps(list(splitted[int(recieve[u'pageId']) - 1]), default=decimal_default) # print("JSON Items: ") # print(json_items) # print "HERE I AM" # for document in items: # print document # context = {'content': items, # 'pageCount': 1} # connection.insertInstrument() # print "HERE I AM" # print json_items # for document in json_items: # print document context = {'content': json_items, 'pageCount': 1} print 'itemList total count ' + r_server.get('itemList_counter') return JsonResponse(context)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Tue Apr 9 10:12:53 2019 @author: zanea """ import mongo import preProcessing import re mongo = mongo.Mongo() df = mongo.get_df() def process_location(location): """ Extrait le code postal (pour l'IDF) ou la ville (pour les autres villes), puis le classe dans la catégorie 'Bassin_emploi' correspondante. """ import villes_csv location = location.lower() # Extraction de la ville et du le code postal localisation = re.findall(r'(.*) \(?', location)[0] cp = int(re.findall(r'.* \(([0-9]*).*', location)[0]) bassin_emploi = villes_csv.get_circo(cp) return bassin_emploi, localisation
def run(self): for model in self.Models: model = model.run() mongo.Mongo('erobloadPortal', 'contents').insert(model) print(len(model)) print('件入りました')
def run_models(self): import mongo # Importation de la base de données en dataframe mongo = mongo.Mongo() df = mongo.get_df() cols = ['Bassin_emploi', 'Compagny', 'Contrat', 'Date', 'Date_scrap', 'Location', 'Poste', 'Resume', 'Salary', '_id','Forest','RBF'] df = pd.DataFrame(df,columns=cols) # Sélection des données qui comportent un salaire data = df[['Bassin_emploi', 'Contrat', 'Poste', 'Salary']][df['Salary'] != ''] # Encodage des données en numérique data = pd.get_dummies(data=data, columns={'Poste', 'Bassin_emploi', 'Contrat'}, drop_first=True) # Séparation des données indépendantes/target x = data.iloc[:, 1:] y = data['Salary'].astype('int64') x_train, x_test, y_train, y_test = train_test_split(x, y) # Implantation du Kernel RBF rbf = SVC() parameters = {'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100, 1000], 'shrinking' : [True, False]} clf_rbf = GridSearchCV(rbf, parameters, cv=5) clf_rbf.fit(x_train, y_train) clf_rbf.best_params_ y_pred_rbf = clf_rbf.predict(x_test) # # Comparaison données test/entraînement > seulement pour les tests # plt.scatter(range(len(y_test)), y_test, color = 'blue') # plt.scatter(range(len(y_pred_rbf)), y_pred_rbf, color = 'red') # plt.legend(('Training set', 'Test set')) # plt.title('Comparaison des résultats avec le modèle Kernel RBF') # Implantation du Random Forest rf = RandomForestClassifier() parameters = grid_param = {'n_estimators': [100, 300, 500, 800, 1000], 'criterion': ['gini', 'entropy'], 'bootstrap': [True, False]} clf_rf = GridSearchCV(rf, parameters, cv=5) clf_rf.fit(x_train, y_train) clf_rf.best_params_ y_pred_rf = clf_rf.predict(x_test) # # Score du modèle # accuracy_score(y_test, y_pred_rf) # f1_score(y_test, y_pred_rf, average='micro') # all_accuracies = cross_val_score(estimator=clf_rf, X=x_train, y=y_train, cv=5) # # Comparaison données test/entraînement > seulement pour les tests # plt.scatter(range(len(y_test)), y_test, color = 'blue') # plt.scatter(range(len(y_pred_rf)), y_pred_rf, color = 'red') # plt.legend(('Training set', 'Test set')) # plt.title('Comparaison des résultats avec le modèle Random Forest') # Prédiction : ajout dans la DB Mongo data_to_pred = df[['Bassin_emploi', 'Contrat', 'Poste', '_id']][df['Salary'] == ''] data_to_pred = pd.get_dummies(data=data_to_pred, columns={'Poste', 'Bassin_emploi', 'Contrat'}, drop_first=True) data_to_pred['Salaires_RBF'] = clf_rbf.predict(data_to_pred.iloc[:, 1:]) data_to_pred['Salaires_Random_Forest'] = clf_rf.predict(data_to_pred.iloc[:, 1:-1]) final_data = data_to_pred[['_id', 'Salaires_RBF', 'Salaires_Random_Forest']] indexes = final_data.index for i in indexes: forest = final_data.loc[i,'Salaires_Random_Forest'] rbf = final_data.loc[i,'Salaires_RBF'] df.loc[i,'Forest'] = forest df.loc[i,'RBF'] = rbf if mongo.final_df(df): print('DB updated') return True
# -*- coding: utf-8 -*- """ Éditeur de Spyder Ceci est un script temporaire. """ import dash import dash_core_components as dcc import dash_html_components as html import dash_table import pandas as pd import mongo db = mongo.Mongo() df = db.get_df() external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css'] app = dash.Dash(__name__, external_stylesheets=external_stylesheets) app.layout = html.Div(children=[ html.H1(children="DB"), dash_table.DataTable( id='table', columns=[{"name": i, "id": i} for i in df.columns], data=df.to_dict("rows"), style_table={ 'maxHeight': '300', 'overflowY': 'scroll' },
class Spider(object): base_url = "http://weixin.sogou.com/weixin" #基本url key_word = 'AI' #搜搜关键词 #list_url = [] #存储url和其callback 函数 #list_item = [] #临时存储url和callback函数, 只存储两个数据,添加到list_url 后,应该立即清空 #session = requests.Session() #开始设置 """ headers = { 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding':'gzip, deflate, sdch', 'Accept-Language':'zh-CN,zh;q=0.8', 'Cache-Control':'max-age=0', 'Connection':'keep-alive', 'Cookie':'sw_uuid=3334394017; sg_uuid=2065896719; dt_ssuid=853760490; pex=C864C03270DED3DD8A06887A372DA219231FFAC25A9D64AE09E82AED12E416AC; ssuid=8258541632; CXID=33A866F87888D6C8D1A553B76F2BADCA; SUV=00C02729B46B344C5B72F4ADF43D0798; ad=Vyllllllll2bt0CzlllllVHCuHYlllllWWn@vlllll9lllll9Vxlw@@@@@@@@@@@; SUID=53DA31773765860A5B11413D000B34E8; pgv_pvi=431889408; ABTEST=0|1534575838|v1; weixinIndexVisited=1; SUIR=CBA7F587B6B0C605F48511D2B6DE9810; ld=cyllllllll2bNDvxlllllVHThntlllllGUlvKyllllGlllll9klll5@@@@@@@@@@; LSTMV=385%2C26; LCLKINT=3828; ppinf=5|1534577030|1535786630|dHJ1c3Q6MToxfGNsaWVudGlkOjQ6MjAxN3x1bmlxbmFtZToyOkJDfGNydDoxMDoxNTM0NTc3MDMwfHJlZm5pY2s6MjpCQ3x1c2VyaWQ6NDQ6bzl0Mmx1RkZDdlpaY1dsTmdsalgwYzd4dE1iY0B3ZWl4aW4uc29odS5jb218; pprdig=R6R352FfiGDW1H3tvcqyhorgCkT3odPRYTPZ6thnUHaWCcL8UwwFEC0W9gzyUhzku8ScAL6CKkabRXXTfE-0dh1--l0JtsESkg17NAfPWozGHDP-9Cvpu2Ptq3VSXL_WM0U0R_tAFMHYEKwu3nrfiziia6XaFgqf5RrLXJuDUa0; sgid=16-36640129-AVt3yYYDq8t3XJ2MTXx5PMc; SNUID=9A8F42F88286F68589E463E1837385F0; IPLOC=CN2200; JSESSIONID=aaafIZVUumv_l7fFtEBvw; pgv_si=s8715192320; ppmdig=1535095739000000e7ea71c241fae9486d10ed0a4333c9a9; sct=14', 'DNT':'1', 'Host':'weixin.sogou.com', 'Upgrade-Insecure-Requests':'1', 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36', } """ cookies = { 'sw_uuid': '3334394017', 'sg_uuid': '2065896719', 'dt_ssuid': '853760490', 'pex': 'C864C03270DED3DD8A06887A372DA219231FFAC25A9D64AE09E82AED12E416AC', 'ssuid': '8258541632', 'CXID': '33A866F87888D6C8D1A553B76F2BADCA', 'SUV': '00C02729B46B344C5B72F4ADF43D0798', 'ad': 'Vyllllllll2bt0CzlllllVHCuHYlllllWWn@vlllll9lllll9Vxlw@@@@@@@@@@@', 'SUID': '53DA31773765860A5B11413D000B34E8', 'pgv_pvi': '431889408', 'ABTEST': '0|1534575838|v1', 'weixinIndexVisited': '1', 'ld': 'cyllllllll2bNDvxlllllVHThntlllllGUlvKyllllGlllll9klll5@@@@@@@@@@', 'LSTMV': '385%2C26', 'LCLKINT': '3828', 'SNUID': '9A8F42F88286F68589E463E1837385F0', 'IPLOC': 'CN2200', 'pgv_si': 's3126373376', 'sct': '17', 'JSESSIONID': 'aaaYnzdJQDdUUguOHzBvw', 'ppinf': '5|1535195192|1536404792|dHJ1c3Q6MToxfGNsaWVudGlkOjQ6MjAxN3x1bmlxbmFtZToyOkJDfGNydDoxMDoxNTM1MTk1MTkyfHJlZm5pY2s6MjpCQ3x1c2VyaWQ6NDQ6bzl0Mmx1RkZDdlpaY1dsTmdsalgwYzd4dE1iY0B3ZWl4aW4uc29odS5jb218', 'pprdig': 'eUprT1c-kM2bGGGiLWMW1FXo7TtbFMefITQzQeMOrxkE4dDJgEM15cuAXV1rcjDAXQR4-eqOc7Ycf8F7GwrWUylY1QiEjvrz-cMiEyjtWWMWAf8fkG4G5ZHbMpk0HR14pjbMQZGjZlrS57ZDsIiv3l_uGA5SpI7dIflpnoMu-ok', 'sgid': '16-36640129-AVuBODg3a8UkW2FdLzwf2W4', 'ppmdig': '1535195193000000ecfc6e6a5c33bb04d8a46936b64d2333' } csvfile = open("url.csv", "a", encoding="utf-8", newline='') writer = csv.writer(csvfile) browser = webdriver.Firefox() #声明浏览器 browser.get(base_url) #添加cookie for key, value in cookies.items(): name = key value = value cookie = { 'name': name, 'value': value, } browser.add_cookie(cookie) mongo = mongo.Mongo() #类的实例化 redis = db.Redis() #redis 的实例化 def start(self): """ 初始化,用来初始list_item 和 list_url """ #self.session.headers.update(self.headers) url = self.base_url + "?" + parse.urlencode({ 'type': '2', "query": self.key_word, }) url_item = [url, self.parse_index] self.redis.push(url_item) #self.list_url.append(url_item) def parse_index(self, html): """ 用于解析每个页面的链接,返回的值为url 和 调度函数组成的列表 """ print("链接页") try: doc = pq(html) items = doc('.news-list h3 a').items() for item in items: url = item.attr['href'].replace("amp:", "") self.writer.writerow([url]) #写入csv url_item = [url, self.parse_detail] yield url_item except: print("url提取错误") doc = pq(html) next_page = doc("#sogou_next").attr['href'] url = self.base_url + str(next_page) url_item = [url, self.parse_index] yield url_item def parse_detail(self, html): """ 用于解析每页的文章的详细信息, 返回的信息为dic """ print("详情页") try: doc = pq(html) data = { 'title': doc('#activity-name').text(), 'content': doc(".rich_media_content").text() } except: print("文章内容提取出错") else: yield data def request(self, url): try: time.sleep(3) self.browser.get(url) time.sleep(5) sroll_cnt = 0 while True: if sroll_cnt < 5: self.browser.execute_script('window.scrollBy(0, 1000)') time.sleep(1) sroll_cnt += 1 else: break html = self.browser.page_source #获取网页源代码 except: print("获取源代码错误") else: if len(html) != 0: return html else: print("获取源代码错误") return html def scheduler(self): """ 用于调度程序 """ self.redis.delete() #决定是否重新开始 if self.redis.llen(): #列表长度为空,则完成初始化 self.start() while True: if not self.redis.llen(): url_item = self.redis.pop() url = url_item[0] callback = url_item[1] print("schedulering url:", url) html = self.request(url) for item in callback(html): if isinstance(item, dict): self.mongo.save(item) elif isinstance(item, list): self.redis.push(item) #存储到redis else: print("url队列已经空了!") break def run(self): self.scheduler()