def handler(msg_head): decrypt = Security() handle = MongoDAO() rec_packet = pickle.loads(msg_head) metadata = decrypt.app_decrypt(rec_packet[1]) if rec_packet[2] == 'key': pub_key = handle.get_publicKey(metadata[1][0]) li = [pub_key, 'key'] send_to_client(pickle.dump(li)) elif rec_packet[2] == 'msg': active = False json = {"drop": msg_head, "status": "Not delivered"} handle.insert(metadata[1][0], json) if active: send_to_client(msg_head) elif rec_packet[2] == 'signup': exist = False exist = handle.if_user_exist(metadata[1][0]) if exist: unique = ["User ID is Unique"] send_to_client(pickle.dump(unique)) else: Error = unique = ["User ID already exists - ERROR"] send_to_client(pickle.dump(Error))
import MeCab from mongo_dao import MongoDAO import word_cloud from wordcloud import WordCloud target = "tenkou" #MeCab準備 tagger = MeCab.Tagger("-Ochasen") # mongoDBからデータを取得する mongo = MongoDAO("db", target) target_results = mongo.find() # 解析結果の格納用 positive_words = [] negative_words = [] neutral_words = [] tweet_score = 0 # DBの接続先を辞書データに変更 mongo = MongoDAO("db", "noun") for target_result in target_results: text = target_result['text'] mecab_results = tagger.parse(text) for result in mecab_results.split('\n'): word = result.split('\t')[0]
import json import networkx as nx import matplotlib.pyplot as plt from requests_oauthlib import OAuth1Session from mongo_dao import MongoDAO mongo = MongoDAO("db", "followers_info") start_screen_name = 'yurinaNECOPLA' # このユーザーのフォロワーのうち相互フォローとなるユーザーに関するソーシャルグラフを出力,可視化する(これを変える) # 新規グラフを作成 G = nx.Graph() # ノードを追加 G.add_node(start_screen_name) depth = 3 processed_list = [] def get_followers_list(screen_name): result = mongo.find(filter={"screen_name": screen_name}) followers_list = [] try: doc = result.next() if doc != None: for user in doc['followers_info']: followers_list.append(user['screen_name']) return followers_list except StopIteration: return followers_list
from mongo_dao import MongoDAO import pandas as pd db_url = 'mongodb://localhost:27017' db_name = 'house_price_predict' dao = MongoDAO(conn_url=db_url, dbname=db_name) all_collections = dict() for coll_name in dao.get_all_collection_names(): print(coll_name) all_collections[coll_name] = pd.DataFrame(dao.get_all_documents(coll_name)) #print(all_collections[coll_name]) df = all_collections[coll_name] print(df['price'].mean()) print(df['price'].std()) print(df['price'].count()) print(df['price'].min()) print(df['price'].max()) print(df[df['price'] < 8000000])
from mongo_dao import MongoDAO import codecs mongo = MongoDAO("db","noun") dict_path = './dict/noun_dict.trim' with codecs.open(dict_path, "r", "utf-8") as f: for line in f: d = line[:-1].split('\t') print(d) if d[1] == 'n': d.append(-1) elif d[1] == 'p': d.append(1) else: d.append(0) mongo.insert_one({"word": d[0], "np": d[1], "evaluation": d[2], "score": d[3]})
from ingatlan_com_crawler import IngatlanComCrawler from mongo_dao import MongoDAO db_url = 'mongodb://localhost:27017' db_name = 'house_price_predict' crawler = IngatlanComCrawler( starter_url='https://ingatlan.com/szukites/elado+lakas+baja', http_headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36' }) crawled_data = crawler.crawl() dao = MongoDAO(conn_url=db_url, dbname=db_name) dao.insert_documents(crawled_data) collection_name = dao.collname print('Visszaolvas db ' + collection_name) print(len(dao.get_all_documents(collection_name))) print(dao.get_all_collection_names())
words = [] for text in texts: tokens = t.tokenize(text) for token in tokens: # 品詞から名詞だけ抽出 pos = token.part_of_speech.split(',')[0] if pos in ['形容詞', '動詞']: # 必要ない単語を省く(実際の結果を見てから不必要そうな単語を記載しました) if token.base_form not in ["こと", "よう", "そう", "これ", "それ"]: words_count[token.base_form] += 1 words.append(token.base_form) return words_count, words target = 'tenkou' mongo = MongoDAO("db", target) results = mongo.find(projection={"text": 1}) texts = [] for result in results: s = result['text'].replace('\n', '') text = s.split('http') texts.append(text[0]) # with codecs.open('./output/tweet_data', 'r', 'utf-8') as f: # reader = csv.reader(f, delimiter='\t') # texts = [] # for row in reader: # if(len(row) > 0): # text = row[0].split('http') # texts.append(text[0])