Beispiel #1
0
 def __init__(self):
     self.logger = Logger()
     self.mongo = mongo.Mongo(self.logger)
     self.rpc_connection = Proxy(
         f"http://{os.environ['CRYPTO_USER']}:{os.environ['CRYPTO_PASS']}@"
         f"127.0.0.1:{os.environ['CRYPTO_PORT']}")
     self.tx_cache = TxCache()
Beispiel #2
0
 def __init__(self):
     log.debug("[Startup]: Initializing YouTube Module . . .")
     self.mongo = mongo.Mongo()
     self.queue = Queue()
     self.cache = ExpiringDict(max_age_seconds=10800, max_len=1000)
     self.search_cache = dict()
     self.session = aiohttp.ClientSession()
Beispiel #3
0
 def run(self):
     for model in self.Models:
         contents = model.run()
         mongo.Mongo(model.target_save['ip'], model.target_save['port'],
                     model.target_save['db_name'],
                     model.target_save['db_table']).insert(contents)
         print(str(len(contents)) + '件入りました')
Beispiel #4
0
 def __init__(self, metier, localisation, email):
     self.email = email
     self.metier = metier
     self.localisation = localisation
     self.counter = 0
     self.preprocess = pp.preprocessing()
     self.db = mongo.Mongo()
Beispiel #5
0
 def mongo(self):
     m = mongo.Mongo(self.client_name, self.db_name)
     m.upload_train_data(self.origin_data, self.feature_names,
                         'winequality-white_origin')
     m.upload_train_data(self.train_data, self.feature_names,
                         'winequality-white_train')
     m.upload_train_data(self.validation_data, self.feature_names,
                         'winequality-white_validation')
     m.upload_choice_matrix(self.choice_matrix)
Beispiel #6
0
def submitPurchase(request):
    connection = mongo.Mongo()
    context = {}
    # recieve = request.GET.get('send')
    # print(recieve[u'typeId'])
    connection.updateInstrument(1, 99)
    print "PURCHASE"
    # test()
    # send_mail()
    test.delay()
    return JsonResponse(context)
Beispiel #7
0
def index_sub():
    import mongo
    email = request.form['mail']
    mongo = mongo.Mongo()
    cond = mongo.check_mail(email)
    if cond:
        mongo.add_email(email) 
        message = 'Votre email à été ajoutée à la liste !'
    else:
        message = 'Veuillez entrer un email valide'
    return render_template("scripts.html",message = message)
Beispiel #8
0
def getCurrent(request):
    connection = mongo.Mongo()
    r_server = MyRedis.getServer(self=MyRedis)
    r_server.incr('getCurrent_counter')

    recieve = json.loads(request.GET.get('send'))

    # items = Instrument.objects.filter(type="1", id=recieve[u'currentId'])\
    #     .values('id', 'manufacturer__name', 'model', 'type', 'coast')
    # json_items = json.dumps(list(items), default=decimal_default)
    json_items = json.dumps(connection.getInstrumentById(
        recieve[u'currentId']),
                            default=decimal_default)
    context = {'content': json_items, 'information': 'INFOOOO'}

    print 'getCurrent total count ' + r_server.get('getCurrent_counter')
    return JsonResponse(context)
Beispiel #9
0
def register():
    if request.method == 'POST':
        users = mongo.db.users
        existing_user = users.find_one({'user_email' : request.form['email']})

        if existing_user is None:
            DB = mongo.Mongo()
            hashpass = bcrypt.hashpw(request.form['pass'].encode('utf-8'), bcrypt.gensalt())
            DB.add_user(request.form['email'])
            
            users.insert_one({'name':request.form['username'], 'password': hashpass})
            session['username'] =  request.form['username']
            return redirect(url_for('index'))

        return 'That username already exists!'

    return render_template('register.html')
    def __init__(self, bot):
        self.dictionary = {}
        self.bot = bot
        self.log = logging_manager.LoggingManager()
        self.spotify = spotify.Spotify()
        self.youtube = youtube.Youtube()
        self.lastfm = lastfm.LastFM()
        self.mongo = mongo.Mongo()
        bot.remove_command("help")
        self.log.debug("[Startup]: Initializing Music Module . . .")

        def generate_key(length):
            letters = string.ascii_letters
            response = ""
            for a in range(0, length):
                response += random.choice(letters)
            return response

        restart_key = generate_key(64)
        asyncio.run_coroutine_threadsafe(self.mongo.set_restart_key(restart_key), self.bot.loop)

        if not discord.opus.is_loaded():
            discord.opus.load_opus("/usr/lib/libopus.so")
Beispiel #11
0
def itemList(request):
    connection = mongo.Mongo()

    r_server = MyRedis.getServer(self=MyRedis)
    r_server.incr('itemList_counter')

    recieve = json.loads(request.GET.get('send'))
    print("Recieve: ")
    print(recieve)

    if (recieve[u'searchParams'] == {}):
        # items = Instrument.objects.filter(type=str(recieve[u'typeId'])) \
        #     .values('id', 'manufacturer__name', 'model', 'type', 'coast')
        json_items = json.dumps(
            connection.getInstrumentsByType(str(recieve[u'typeId'])))  # mongo
        # items = connection.getInstrumentsByType(str(recieve[u'typeId']))  # mongo
    else:
        search = recieve[u'searchParams']
        searchType = search[u'searchType']
        searchValue = search[u'searchValue']

        if (searchType == u'By manufacturer'):
            # items = Instrument.objects.filter(type=str(recieve[u'typeId']), manufacturer__name=str(searchValue))\
            #     .values('id', 'manufacturer__name', 'model', 'type', 'coast')
            json_items = json.dumps(
                connection.getInstrumentByManufacturer(str(searchValue)))
        if (searchType == u'By model'):
            # items = Instrument.objects.filter(type=str(recieve[u'typeId']), model__search=str(searchValue))\
            #                     .values('id', 'manufacturer__name', 'model', 'type', 'coast')
            json_items = json.dumps(
                connection.getInstrumentByModel(str(searchValue)))
        if (searchType == u'By cost'):
            # items = Instrument.objects.filter(type=str(recieve[u'typeId']), coast=searchValue)\
            #                     .values('id', 'manufacturer__name', 'model', 'type', 'coast')
            json_items = json.dumps(
                connection.getInstrumentByCost(str(searchValue)))

    # itemsLength = len(items)
    # print("SPLIT")
    # print(splitQuery(items))
    # print(len(splitQuery(items)))
    # splitted = splitQuery(items)

    # items = searchCase[searchType]

    # print("Items: ")
    # print(items)

    # json_items = json.dumps(list(splitted[int(recieve[u'pageId']) - 1]), default=decimal_default)
    # print("JSON Items: ")
    # print(json_items)
    # print "HERE I AM"
    # for document in items:
    #     print document
    # context = {'content': items,
    #            'pageCount': 1}
    # connection.insertInstrument()

    # print "HERE I AM"
    # print json_items
    # for document in json_items:
    #     print document
    context = {'content': json_items, 'pageCount': 1}

    print 'itemList total count ' + r_server.get('itemList_counter')
    return JsonResponse(context)
Beispiel #12
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Apr  9 10:12:53 2019

@author: zanea
"""

import mongo
import preProcessing
import re

mongo = mongo.Mongo()

df = mongo.get_df()


def process_location(location):
    """ Extrait le code postal (pour l'IDF) ou la ville (pour les autres villes),
        puis le classe dans la catégorie 'Bassin_emploi' correspondante. """
    import villes_csv

    location = location.lower()
    # Extraction de la ville et du le code postal
    localisation = re.findall(r'(.*) \(?', location)[0]
    cp = int(re.findall(r'.* \(([0-9]*).*', location)[0])
    bassin_emploi = villes_csv.get_circo(cp)

    return bassin_emploi, localisation

Beispiel #13
0
 def run(self):
     for model in self.Models:
         model = model.run()
         mongo.Mongo('erobloadPortal', 'contents').insert(model)
         print(len(model))
         print('件入りました')
Beispiel #14
0
    def run_models(self):

        import mongo
        # Importation de la base de données en dataframe
        mongo = mongo.Mongo()
        df = mongo.get_df()
        cols = ['Bassin_emploi', 'Compagny', 'Contrat', 'Date', 'Date_scrap', 'Location', 'Poste', 'Resume', 'Salary', '_id','Forest','RBF']
        df = pd.DataFrame(df,columns=cols)

        # Sélection des données qui comportent un salaire
        data = df[['Bassin_emploi', 'Contrat', 'Poste', 'Salary']][df['Salary'] != '']

		# Encodage des données en numérique
        data = pd.get_dummies(data=data, columns={'Poste', 'Bassin_emploi', 'Contrat'}, drop_first=True)

		# Séparation des données indépendantes/target
        x = data.iloc[:, 1:]
        y = data['Salary'].astype('int64')
        x_train, x_test, y_train, y_test = train_test_split(x, y)

        # Implantation du Kernel RBF
        rbf = SVC()
        parameters = {'kernel': ['rbf'],
        			  'gamma': [1e-3, 1e-4],
        			  'C': [1, 10, 100, 1000],
        			  'shrinking' : [True, False]}
        clf_rbf = GridSearchCV(rbf, parameters, cv=5)
        clf_rbf.fit(x_train, y_train)
        clf_rbf.best_params_
        y_pred_rbf = clf_rbf.predict(x_test)

#        # Comparaison données test/entraînement > seulement pour les tests
#        plt.scatter(range(len(y_test)), y_test, color = 'blue')
#        plt.scatter(range(len(y_pred_rbf)), y_pred_rbf, color = 'red')
#        plt.legend(('Training set', 'Test set'))
#        plt.title('Comparaison des résultats avec le modèle Kernel RBF')

        # Implantation du Random Forest
        rf = RandomForestClassifier()
        parameters = grid_param = {'n_estimators': [100, 300, 500, 800, 1000],
        						   'criterion': ['gini', 'entropy'],
        						   'bootstrap': [True, False]}
        clf_rf = GridSearchCV(rf, parameters, cv=5)
        clf_rf.fit(x_train, y_train)
        clf_rf.best_params_
        y_pred_rf = clf_rf.predict(x_test)

#		 # Score du modèle
#        accuracy_score(y_test, y_pred_rf)
#        f1_score(y_test, y_pred_rf, average='micro')
#        all_accuracies = cross_val_score(estimator=clf_rf, X=x_train, y=y_train, cv=5)

#        # Comparaison données test/entraînement > seulement pour les tests
#        plt.scatter(range(len(y_test)), y_test, color = 'blue')
#        plt.scatter(range(len(y_pred_rf)), y_pred_rf, color = 'red')
#        plt.legend(('Training set', 'Test set'))
#        plt.title('Comparaison des résultats avec le modèle Random Forest')

        # Prédiction : ajout dans la DB Mongo
        data_to_pred = df[['Bassin_emploi', 'Contrat', 'Poste', '_id']][df['Salary'] == '']
        data_to_pred = pd.get_dummies(data=data_to_pred, columns={'Poste', 'Bassin_emploi', 'Contrat'}, drop_first=True)

        data_to_pred['Salaires_RBF'] = clf_rbf.predict(data_to_pred.iloc[:, 1:])
        data_to_pred['Salaires_Random_Forest'] = clf_rf.predict(data_to_pred.iloc[:, 1:-1])
        final_data = data_to_pred[['_id', 'Salaires_RBF', 'Salaires_Random_Forest']]

        indexes = final_data.index

        for i in indexes:
            forest = final_data.loc[i,'Salaires_Random_Forest']
            rbf = final_data.loc[i,'Salaires_RBF']
            df.loc[i,'Forest'] = forest
            df.loc[i,'RBF'] = rbf

        if mongo.final_df(df):
            print('DB updated')

        return True
Beispiel #15
0
# -*- coding: utf-8 -*-
"""
Éditeur de Spyder

Ceci est un script temporaire.
"""
import dash
import dash_core_components as dcc
import dash_html_components as html
import dash_table
import pandas as pd
import mongo

db = mongo.Mongo()

df = db.get_df()

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

app.layout = html.Div(children=[
    html.H1(children="DB"),
    dash_table.DataTable(
    id='table',
    columns=[{"name": i, "id": i} for i in df.columns],
    data=df.to_dict("rows"),
    style_table={
        'maxHeight': '300',
        'overflowY': 'scroll'
    },
Beispiel #16
0
class Spider(object):

    base_url = "http://weixin.sogou.com/weixin"  #基本url
    key_word = 'AI'  #搜搜关键词
    #list_url = []   #存储url和其callback 函数
    #list_item = []   #临时存储url和callback函数, 只存储两个数据,添加到list_url 后,应该立即清空
    #session = requests.Session()   #开始设置
    """
    headers = {
        'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Encoding':'gzip, deflate, sdch',
        'Accept-Language':'zh-CN,zh;q=0.8',
        'Cache-Control':'max-age=0',
        'Connection':'keep-alive',
        'Cookie':'sw_uuid=3334394017; sg_uuid=2065896719; dt_ssuid=853760490; pex=C864C03270DED3DD8A06887A372DA219231FFAC25A9D64AE09E82AED12E416AC; ssuid=8258541632; CXID=33A866F87888D6C8D1A553B76F2BADCA; SUV=00C02729B46B344C5B72F4ADF43D0798; ad=Vyllllllll2bt0CzlllllVHCuHYlllllWWn@vlllll9lllll9Vxlw@@@@@@@@@@@; SUID=53DA31773765860A5B11413D000B34E8; pgv_pvi=431889408; ABTEST=0|1534575838|v1; weixinIndexVisited=1; SUIR=CBA7F587B6B0C605F48511D2B6DE9810; ld=cyllllllll2bNDvxlllllVHThntlllllGUlvKyllllGlllll9klll5@@@@@@@@@@; LSTMV=385%2C26; LCLKINT=3828; ppinf=5|1534577030|1535786630|dHJ1c3Q6MToxfGNsaWVudGlkOjQ6MjAxN3x1bmlxbmFtZToyOkJDfGNydDoxMDoxNTM0NTc3MDMwfHJlZm5pY2s6MjpCQ3x1c2VyaWQ6NDQ6bzl0Mmx1RkZDdlpaY1dsTmdsalgwYzd4dE1iY0B3ZWl4aW4uc29odS5jb218; pprdig=R6R352FfiGDW1H3tvcqyhorgCkT3odPRYTPZ6thnUHaWCcL8UwwFEC0W9gzyUhzku8ScAL6CKkabRXXTfE-0dh1--l0JtsESkg17NAfPWozGHDP-9Cvpu2Ptq3VSXL_WM0U0R_tAFMHYEKwu3nrfiziia6XaFgqf5RrLXJuDUa0; sgid=16-36640129-AVt3yYYDq8t3XJ2MTXx5PMc; SNUID=9A8F42F88286F68589E463E1837385F0; IPLOC=CN2200; JSESSIONID=aaafIZVUumv_l7fFtEBvw; pgv_si=s8715192320; ppmdig=1535095739000000e7ea71c241fae9486d10ed0a4333c9a9; sct=14',
        'DNT':'1',
        'Host':'weixin.sogou.com',
        'Upgrade-Insecure-Requests':'1',
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
        }
    """
    cookies = {
        'sw_uuid': '3334394017',
        'sg_uuid': '2065896719',
        'dt_ssuid': '853760490',
        'pex':
        'C864C03270DED3DD8A06887A372DA219231FFAC25A9D64AE09E82AED12E416AC',
        'ssuid': '8258541632',
        'CXID': '33A866F87888D6C8D1A553B76F2BADCA',
        'SUV': '00C02729B46B344C5B72F4ADF43D0798',
        'ad':
        'Vyllllllll2bt0CzlllllVHCuHYlllllWWn@vlllll9lllll9Vxlw@@@@@@@@@@@',
        'SUID': '53DA31773765860A5B11413D000B34E8',
        'pgv_pvi': '431889408',
        'ABTEST': '0|1534575838|v1',
        'weixinIndexVisited': '1',
        'ld':
        'cyllllllll2bNDvxlllllVHThntlllllGUlvKyllllGlllll9klll5@@@@@@@@@@',
        'LSTMV': '385%2C26',
        'LCLKINT': '3828',
        'SNUID': '9A8F42F88286F68589E463E1837385F0',
        'IPLOC': 'CN2200',
        'pgv_si': 's3126373376',
        'sct': '17',
        'JSESSIONID': 'aaaYnzdJQDdUUguOHzBvw',
        'ppinf':
        '5|1535195192|1536404792|dHJ1c3Q6MToxfGNsaWVudGlkOjQ6MjAxN3x1bmlxbmFtZToyOkJDfGNydDoxMDoxNTM1MTk1MTkyfHJlZm5pY2s6MjpCQ3x1c2VyaWQ6NDQ6bzl0Mmx1RkZDdlpaY1dsTmdsalgwYzd4dE1iY0B3ZWl4aW4uc29odS5jb218',
        'pprdig':
        'eUprT1c-kM2bGGGiLWMW1FXo7TtbFMefITQzQeMOrxkE4dDJgEM15cuAXV1rcjDAXQR4-eqOc7Ycf8F7GwrWUylY1QiEjvrz-cMiEyjtWWMWAf8fkG4G5ZHbMpk0HR14pjbMQZGjZlrS57ZDsIiv3l_uGA5SpI7dIflpnoMu-ok',
        'sgid': '16-36640129-AVuBODg3a8UkW2FdLzwf2W4',
        'ppmdig': '1535195193000000ecfc6e6a5c33bb04d8a46936b64d2333'
    }

    csvfile = open("url.csv", "a", encoding="utf-8", newline='')

    writer = csv.writer(csvfile)

    browser = webdriver.Firefox()  #声明浏览器
    browser.get(base_url)

    #添加cookie
    for key, value in cookies.items():
        name = key
        value = value

        cookie = {
            'name': name,
            'value': value,
        }
        browser.add_cookie(cookie)

    mongo = mongo.Mongo()  #类的实例化
    redis = db.Redis()  #redis 的实例化

    def start(self):
        """
            初始化,用来初始list_item 和 list_url
        """
        #self.session.headers.update(self.headers)
        url = self.base_url + "?" + parse.urlencode({
            'type': '2',
            "query": self.key_word,
        })
        url_item = [url, self.parse_index]
        self.redis.push(url_item)

        #self.list_url.append(url_item)

    def parse_index(self, html):
        """
            用于解析每个页面的链接,返回的值为url 和 调度函数组成的列表
        """
        print("链接页")

        try:
            doc = pq(html)
            items = doc('.news-list h3 a').items()
            for item in items:
                url = item.attr['href'].replace("amp:", "")
                self.writer.writerow([url])  #写入csv
                url_item = [url, self.parse_detail]
                yield url_item

        except:
            print("url提取错误")

        doc = pq(html)
        next_page = doc("#sogou_next").attr['href']
        url = self.base_url + str(next_page)
        url_item = [url, self.parse_index]
        yield url_item

    def parse_detail(self, html):
        """
            用于解析每页的文章的详细信息, 返回的信息为dic
        """
        print("详情页")
        try:
            doc = pq(html)
            data = {
                'title': doc('#activity-name').text(),
                'content': doc(".rich_media_content").text()
            }
        except:
            print("文章内容提取出错")
        else:
            yield data

    def request(self, url):
        try:
            time.sleep(3)
            self.browser.get(url)
            time.sleep(5)
            sroll_cnt = 0
            while True:
                if sroll_cnt < 5:
                    self.browser.execute_script('window.scrollBy(0, 1000)')
                    time.sleep(1)
                    sroll_cnt += 1
                else:
                    break

            html = self.browser.page_source  #获取网页源代码
        except:
            print("获取源代码错误")

        else:
            if len(html) != 0:
                return html
            else:
                print("获取源代码错误")
                return html

    def scheduler(self):
        """
            用于调度程序
        """
        self.redis.delete()  #决定是否重新开始
        if self.redis.llen():  #列表长度为空,则完成初始化
            self.start()
        while True:
            if not self.redis.llen():
                url_item = self.redis.pop()
                url = url_item[0]
                callback = url_item[1]
                print("schedulering url:", url)
                html = self.request(url)
                for item in callback(html):
                    if isinstance(item, dict):
                        self.mongo.save(item)
                    elif isinstance(item, list):
                        self.redis.push(item)  #存储到redis
            else:
                print("url队列已经空了!")
                break

    def run(self):
        self.scheduler()