def index(): mongo = MongoDB(col='host') ip_count = len(mongo.coll.distinct('ip')) result_count = mongo.coll.count() web_mongo = MongoDB(col='web') web_count = web_mongo.coll.count() return render_template('index.html', ip_count=ip_count, web_count=web_count, result_count=result_count)
class UserAPI: def __init__(self): print "User API instance" # Connect to Mongo self.db=MongoDB(db_name).db["users"] print "Total users in the db : %d"%self.db.count() print '-'*12 # fetch cities here=os.path.dirname(os.path.abspath(__file__)) provinces_file=os.path.join(here,"cities/provinces.csv") self.provinces={} with open(provinces_file, 'rb') as csvfile: provinces_data=csv.reader(csvfile) for row in provinces_data: self.provinces[row[0]]=row[1] def create_user(self, data): try: province =self.provinces[data[1]] except: province=0 u=User() u.uid=data[0] u.province=data[1] u.gender=data[2] u.verified=data[3] u.save() # store to mongo # print "user %s saved"%data[0] def get_user(self,_uid): user=self.db.find_one({"uid":_uid}) #.limit(c) return user def get_province(self,_uid): user=self.db.find_one({"uid":_uid}) #.limit(c) if user != None: return user["province"] else : return None
def __init__(self): print "User API instance" # Connect to Mongo self.db=MongoDB("tweets").db["weibousers"] print "Total users in the db : %d"%self.db.count() print '-'*12 # fetch cities provinces_file="/home/clemsos/Dev/mitras/lib/cities/provinces.csv" self.provinces={} with open(provinces_file, 'rb') as csvfile: provinces_data=csv.reader(csvfile) for row in provinces_data: self.provinces[row[0]]=row[1]
class UserAPI: def __init__(self): print "User API instance" # Connect to Mongo self.db=MongoDB("tweets").db["weibousers"] print "Total users in the db : %d"%self.db.count() print '-'*12 # fetch cities provinces_file="/home/clemsos/Dev/mitras/lib/cities/provinces.csv" self.provinces={} with open(provinces_file, 'rb') as csvfile: provinces_data=csv.reader(csvfile) for row in provinces_data: self.provinces[row[0]]=row[1] def create_user(self, data): try: province =self.provinces[data[1]] except: province=0 u=User() u.uid=data[0] u.province=data[1] u.gender=data[2] u.verified=data[3] u.save() # store to mongo # print "user %s saved"%data[0] def get_user(self,_uid): user=self.db.find_one({"uid":_uid}) #.limit(c) return user def get_province(self,_uid): user=self.db.find_one({"uid":_uid}) #.limit(c) if user != None: return user["province"] else : return None
def __init__(self): print "User API instance" # Connect to Mongo self.db = MongoDB(db_name).db["users"] print "Total users in the db : %d" % self.db.count() print '-' * 12 # fetch cities here = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) provinces_file = os.path.join(here, "data/provinces.csv") self.provinces = {} with open(provinces_file, 'rb') as csvfile: provinces_data = csv.reader(csvfile) for row in provinces_data: self.provinces[row[0]] = row[1]
class UserAPI: def __init__(self): print "User API instance" # Connect to Mongo self.db = MongoDB(db_name).db["users"] print "Total users in the db : %d" % self.db.count() print '-' * 12 # fetch cities here = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) provinces_file = os.path.join(here, "data/provinces.csv") self.provinces = {} with open(provinces_file, 'rb') as csvfile: provinces_data = csv.reader(csvfile) for row in provinces_data: self.provinces[row[0]] = row[1] def create_user(self, data): try: province = self.provinces[data[1]] except: province = 0 u = User() u.uid = data[0] u.province = data[1] u.gender = data[2] u.verified = data[3] u.save() # store to mongo # print "user %s saved"%data[0] def get_user(self, _uid): user = self.db.find_one({"uid": _uid}) #.limit(c) return user def get_province(self, _uid): user = self.db.find_one({"uid": _uid}) #.limit(c) if user != None: return user["province"] else: return None
def web_data(): referer = request.headers['Referer'] if referer.find('result') != -1: parse_result = parse.urlparse(referer) param_dict = parse.parse_qs(parse_result.query) try: q = param_dict['q'][0] q = q.strip().split(';') except: q = [''] else: q = [''] query = query_logic(q) draw = request.form.get('draw') start = request.form.get('start') length = request.form.get('length') draw = int(draw) start = int(start) length = int(length) mongo = MongoDB(col='web') recordsTotal = mongo.coll.count() recordsFiltered = mongo.coll.find(query).count() data = mongo.coll.find(query, projection={ '_id': False }).sort([('ip', 1), ('port', 1)]).skip(start).limit(length) dd = [] for d in data: tmp = [] tmp.append(d['ip']) tmp.append(d['domain']) tmp.append(d['port']) tmp.append(d['title']) tmp.append(d['server']) tmp.append(d['header']) tmp.append(d['date']) dd.append(tmp) result = { 'draw': draw, 'recordsTotal': recordsTotal, 'recordsFiltered': recordsFiltered, 'data': dd } return jsonify(result)
def __init__(self): print "User API instance" # Connect to Mongo self.db=MongoDB(db_name).db["users"] print "Total users in the db : %d"%self.db.count() print '-'*12 # fetch cities here=os.path.dirname(os.path.abspath(__file__)) provinces_file=os.path.join(here,"cities/provinces.csv") self.provinces={} with open(provinces_file, 'rb') as csvfile: provinces_data=csv.reader(csvfile) for row in provinces_data: self.provinces[row[0]]=row[1]
def load_tweets(self, collection, qty): t0 = time() # import libs self.add_relative_path() from lib.mongo import MongoDB # Connect to Mongo db=MongoDB("weibodata").db data=db[collection] tweets_count=data.count() print 10*"-" print str(tweets_count)+" tweets in the db" # Load data print "Loading "+str(qty)+" tweets from "+collection+" db..." _type="dico" query={_type: {"$not": {"$size": 0} } } tweets=data.find(query).limit(qty) print "loaded in %0.3fs" % (time() - t0) return list(tweets)
from lib.stats import get_tweets_volume_time_series from lib.visualizer import create_bar_graph,create_pie_chart,create_tag_cloud from time import time,strftime,strptime from datetime import datetime from bson.code import Code from collections import Counter import pylab as plt # Connect to Mongo collection="memes" memes_count = 20 db=MongoDB("weibodata").db data=db[collection] total_memes_count=data.count() print 10*"-" print "%s memes in the db"%total_memes_count print def find_hashtags_redundant(_memes): hash=[] print hash for i, meme in enumerate(_memes): hash_row= [] # hash[i]="" print len(meme["tweets"])
viz_path = root_path + "out/viz/" gephi_path = root_path + "out/gephi/" map_path = root_path + "out/maps/" # training_set=root_path+"data/train/trainset.csv" # sample_scale=4 #number of iterations chunksize = 5000 # init t0 = time() nlp = NLPMiner() # needs_header=True # init_files=[True,True,True,True,True] # Connect to Mongo db = MongoDB("weiboclean").db # db=MongoDB("test").db # where the raw data is collection = db["tweets"] # get corpus length tweets_count = collection.count() print str(tweets_count) + " tweets in the db" print 10 * "-" # add of stop-hahstags t remove most common occurence stop_hashtags_file = root_path + "/lib/stopwords/stop_hashtags" stop_hashtags = [i.strip() for i in open(stop_hashtags_file)] ### # CREATE HASHTAGS LIST
import os from flask import Flask from flask_wtf.csrf import CSRFProtect from config import * from lib.mongo import MongoDB from lib.log_handle import Log from datetime import timedelta from flask_apscheduler import APScheduler app = Flask(__name__) app.secret_key = os.environ.get('SECRET_KEY') or os.urandom(64) app.config.from_object(Config) app.permanent_session_lifetime = timedelta(hours=6) app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days=7) app.jinja_env.auto_reload = True app.config['TEMPLATES_AUTO_RELOAD'] = True csrf = CSRFProtect() csrf.init_app(app) apscheduler = APScheduler() mongo = MongoDB(host=MONGO_IP, port=MONGO_PORT, username=MONGO_USER, password=MONGO_PWD) log = Log()