예제 #1
0
    def calAct(self,col,cond):
        r_dict = {}
        res = dbconn.query(
        "select " + col + " as act,count(*) as count from mfwuserfeed where "+ cond +" group by " + col
        )
        for r in res:
            r_dict.update({r.act:r.count})

        r_dict = self.calDictRate(r_dict)
        return r_dict
예제 #2
0
    def calDense(self,col,cond):
        r_dict = {}
        res = dbconn.query(
        "select floor("+ col +"*10) as dense,count(*) as count from mfwuserfeed where "+ cond +" group by floor("+ col +"*10) "
        )

        for r in res:
            r_dict.update({r.dense:r.count})

        r_dict = self.calDictRate(r_dict)
        return r_dict
예제 #3
0
def fileToNickname():
    comp = re.compile(u"(\d+)_(\d+).html")
    for feedDir in os.listdir(tempDir):
        if not os.path.isdir(tempDir +"/"+ feedDir):
            continue
        for filename in os.listdir(tempDir +"/"+ feedDir):
            m = comp.search(filename)
            if not m:
                continue
            if int(m.group(2)) == 1:
                userid = int(m.group(1))
                if len(dbconn.query("select * from mfwuser where userid = $userid",vars=dict(userid=userid))) == 0:
                    nickname = getNickName(userid)
                    dbconn.insert("mfwuser",userid=userid,nickname=nickname)
예제 #4
0
def calActSummary(sql):
    res = dbconn.query(sql)
    summary = {}

    for r in res:
        actSummaryString = r.actSummaryString
        actStrings = actSummaryString[1:].split(u"$")
        for actString in actStrings:
            act,count = actString.split(u"|")[0], actString.split(u"|")[1]
            if summary.has_key(act):
                summary[act] = summary[act] + 1
            else:
                summary.update({act:1})

    summaryOrder = dictToOrderList(summary)

    return summaryOrder
예제 #5
0
def hasFeed(userid):
    res = dbconn.query("select * from mfwuserfeed where userid = $userid",vars=dict(userid=userid))
    if len(res) > 0:
        return True
    return False
예제 #6
0
#coding:utf-8
from bs4 import BeautifulSoup
import bs4,math,re,os
from publicsettings import useridRange,dbconn,tempDir,articleidRange
from datetime import datetime

actypeDict = {}
res = dbconn.query("select * from mfwactype")
for r in res:
    actypeDict.update({r.name:r.id})

    
def hasMoreFeedPage(html):
    soup = BeautifulSoup(html, from_encoding="utf8")
    page = soup.find("div","f_turnpage")
    if not page:
        return False
    pages = page.get_text()
    if pages.find(u"末页") > -1:
        return True
    else:
        return False

def getPagesAndCal(userid,lastpage):
    actDictList = []
    for pagenumber in range(1,lastpage+1):
        html = open(tempDir + "/" + str(userid)[0:2] +"/" +str(userid) + "_" + str(pagenumber) + ".html").read()
        actDictList.extend( getFeed(html) )
    if len(actDictList) == 0:
        dbconn.insert("mfwuserfeed",
                userid = userid,
예제 #7
0
def cal(className):
    if isinstance(className,list):
        tempName = ""
        for c in className:
            tempName = tempName + " " +c
        className = tempName
    if class_dict.has_key(className):
        class_dict[className] = class_dict[className] + 1
    else:
        class_dict.update({className:1})

##for mddDir in os.listdir(tempMddDir):
##    if not os.path.isdir(tempMddDir +"/"+ mddDir):
##        continue
for filename in os.listdir(tempMddDir +"/1"):
    ana(open(tempMddDir+"/1/"+filename,"r").read())

f = open("d:/log/mmd.log","wb")
for d in dictToOrderList(class_dict):
    f.write(d[0] + "," + str(d[1])+ "\r\n")
"""
res = dbconn.query("select distinct pid from mfwmdd where pid is not null")
comp = re.compile(u"<title>(.+)地区旅游地图")
for r in res:
    pid = r.pid
    html = open(tempMddDir+"/"+str(pid)[0]+"/"+str(pid)+".html","r").read()
    soup = BeautifulSoup(html,from_encoding="utf8")
    name = comp.search(unicode(soup.title)).group(1)
    dbconn.insert("mfwpid",pid=pid,name=name)
    
예제 #8
0
 def start_requests(self):
     res = dbconn.query("select distinct pid from mfwmdd where pid is not null")
     for r in res:
         yield self.make_requests_from_url("http://www.mafengwo.cn/mdd/smap.php?mddid=" + str(r.pid))