Ejemplo n.º 1
0
 def setUp(self):
     self.weibo_x = XapianSearch(path='/home/arthas/dev/xapian_weibo/data/',
                                 name='master_timeline_weibo')
     self.user_x = XapianSearch(path='/home/arthas/dev/xapian_weibo/data/',
                                name='master_timeline_user',
                                schema_version=1)
     self.begin_ts = time.mktime(datetime.datetime(2011, 12, 1).timetuple())
     self.end_ts = time.mktime(datetime.datetime(2011, 12, 31).timetuple())
Ejemplo n.º 2
0
def init_xapian():
    return XapianSearch(path='/opt/xapian_weibo/data/', name='master_timeline')
Ejemplo n.º 3
0
    REDIS_PORT = 6379
    SQLALCHEMY_DATABASE_URI = 'mysql+mysqldb://root:@localhost/weibo?charset=utf8'
    DYNAMIC_XAPIAN_WEIBO_STUB_PATH = '/home/mirage/dev/data/stub/master_timeline_weibo_'
elif IS_PROD == 3:
    XAPIAN_WEIBO_DATA_PATH = '/home/ubuntu3/huxiaoqian/case/20140724/20140804/'
    XAPIAN_USER_DATA_PATH = '/home/xapian/xapian_user/'
    XAPIAN_DOMAIN_DATA_PATH = '/opt/xapian_weibo/data/20131130/'  #无
    MASTER_TIMELINE_STUB = '/home/mirage/dev/data/stub/master_timeline_weibo_stub'  #无
    LEVELDBPATH = '/home/ubuntu3/huxiaoqian/case_test/data/leveldbpath/'  # 无
    REDIS_HOST = '219.224.135.49'  #索引的redis服务器为49,应该用不到
    REDIS_PORT = 6379
    MONGODB_HOST = '219.224.135.47'
    MONGODB_PORT = 27019
    SQLALCHEMY_DATABASE_URI = 'mysql+mysqldb://root:@219.224.134.222/weibocase?charset=utf8'
    DYNAMIC_XAPIAN_WEIBO_STUB_PATH = '/home/ubuntu4/ljh/csv/stub/topic/master_timeline_weibo_topic'

xapian_search_user = XapianSearch(path=XAPIAN_USER_DATA_PATH,
                                  name='master_timeline_user',
                                  schema_version=1)

# Create application
app = Flask('xxx')

# Create dummy secrey key so we can use sessions
app.config['SECRET_KEY'] = 'A0Zr98j/3yX R~XHH!jmN]LWX/,?RT'

# Create database
app.config['SQLALCHEMY_DATABASE_URI'] = SQLALCHEMY_DATABASE_URI
app.config['SQLALCHEMY_ECHO'] = False
db = SQLAlchemy(app)
Ejemplo n.º 4
0
# -*- coding:utf-8 -*-

import time
import datetime

from xapian_weibo.xapian_backend import XapianSearch

s = XapianSearch(path='/opt/xapian_weibo/data/', name='master_timeline_user', schema_version=1)

begin_ts = time.mktime(datetime.datetime(2011, 1, 1).timetuple())
end_ts = time.mktime(datetime.datetime(2011, 12, 31).timetuple())


"""
query_dict = {
    'created_at': {
        '$gt': begin_ts,
        '$lt': end_ts,
    }
}
count, get_results = s.search(query=query_dict, max_offset=1, fields=['_id', 'name'], sort_by=['created_at'])

print count
for r in get_results():
    print r['_id'], r['name']
"""

"""
query_dict = {
    '$or': [
        {'_id': 1934744637},
Ejemplo n.º 5
0
 def setUp(self):
     self.n = 10000
     self.s = XapianSearch(path='/home/arthas/dev/xapian_weibo/data/',
                           name='master_timeline_weibo')
     self.weibo_ids = self._load_weibo_ids_from_xapian(self.n)
Ejemplo n.º 6
0
# -*- coding: utf-8 -*-

import pymongo, time, codecs, datetime
try:
    from xapian_weibo.xapian_backend import XapianSearch
    statuses_search = XapianSearch(path='/opt/xapian_weibo/data/', name='master_timeline_weibo', schema_version=2)
except:
    pass

def con_database():
    DB_HOST = '219.224.135.60'
    DB_PORT = 27017
    DB_USER = '******'
    DB_PWD = 'root'
    connection = pymongo.Connection(DB_HOST, DB_PORT)
    db = connection.admin
    db.authenticate(DB_USER, DB_PWD)
    return connection.test_crawler_liwenwen

def main(uid, startdate, enddate):
    startts = date2ts(startdate)
    endts = date2ts(enddate)
    db = con_database()
    print db.users.find({'uid': str(uid), 'ts':{'$gte': startts, '$lte': endts}}).count()
    cursor = db.users.find({'uid': str(uid), 'ts':{'$gte': startts, '$lte': endts}})
    for weibo in cursor:
        print weibo

def date2ts(date):
    return int(time.mktime(time.strptime(date, '%Y-%m-%d')))
Ejemplo n.º 7
0
import os

from xapian_weibo.xapian_backend import XapianSearch

from operator import itemgetter
import datetime
import time
import leveldb


LEVELDBPATH = '/home/mirage/leveldb'
global_user_field_bucket = leveldb.LevelDB(os.path.join(LEVELDBPATH, 'linhao_global_user_field_20131012'),
                                           block_cache_size=8 * (2 << 25), write_buffer_size=8 * (2 << 25))

xapian_search_weibo = XapianSearch(path='/opt/xapian_weibo/data/', name='master_timeline_weibo')  # search by index
xapian_search_user = XapianSearch(path='/opt/xapian_weibo/data/', name='master_timeline_user', schema_version=1)  # search by index
fields_value = ['culture', 'education', 'entertainment', 'fashion', 'finance', 'media', 'sports', 'technology']


def readProtoUser():
    protou = {}
    with open("/home/mirage/linhao/project_bishe/weibo/profile/user_classify/protou.txt") as f:
        for line in f:
            area = line.split(":")[0]
            if area not in protou:
                protou[area] = set()
            for u in line.split(":")[1].split():
                protou[area].add(int(u))
    return protou