예제 #1
0
def insert(mddid,name,box_dict):
    dbconn.insert("mfwmdd",
        mddid = mddid,
        name = name,
        post_main = box_dict["post_main"],
        box_box_photo = box_dict["box_box_photo"],
        box_master = box_dict["box_master"],
        box_box_map = box_dict["box_box_map"],
        box_baike = box_dict["box_baike"],
        box_box_discuss = box_dict["box_box_discuss"],
        box_box_plan = box_dict["box_box_plan"],
        box_other_city = box_dict["box_other_city"],
        box_box_book = box_dict["box_box_book"],
        box_box_tips = box_dict["box_box_tips"],
        poi_nav = box_dict["poi_nav"],
        box_postBox = box_dict["box_postBox"],
        city_hd = box_dict["city_hd"],
        box_mb10 = box_dict["box_mb10"],
        box_mod_rec = box_dict["box_mod_rec"],
        box_hotBox2 = box_dict["box_hotBox2"],
        box_pathBox2 = box_dict["box_pathBox2"],
        box_pathBox1 = box_dict["box_pathBox1"],
        box_hotBox1 = box_dict["box_hotBox1"],
        info_cate = box_dict["info_cate"],
        box_other_mdd = box_dict["box_other_mdd"]
    )
예제 #2
0
def fileToNickname():
    comp = re.compile(u"(\d+)_(\d+).html")
    for feedDir in os.listdir(tempDir):
        if not os.path.isdir(tempDir +"/"+ feedDir):
            continue
        for filename in os.listdir(tempDir +"/"+ feedDir):
            m = comp.search(filename)
            if not m:
                continue
            if int(m.group(2)) == 1:
                userid = int(m.group(1))
                if len(dbconn.query("select * from mfwuser where userid = $userid",vars=dict(userid=userid))) == 0:
                    nickname = getNickName(userid)
                    dbconn.insert("mfwuser",userid=userid,nickname=nickname)
예제 #3
0
def getPagesAndCal(userid,lastpage):
    actDictList = []
    for pagenumber in range(1,lastpage+1):
        html = open(tempDir + "/" + str(userid)[0:2] +"/" +str(userid) + "_" + str(pagenumber) + ".html").read()
        actDictList.extend( getFeed(html) )
    if len(actDictList) == 0:
        dbconn.insert("mfwuserfeed",
                userid = userid,
                pageCount=lastpage,
                sumCount=0
        )
        return
    if len(actDictList) == 1:
        dbconn.insert("mfwuserfeed",
                userid = userid,
                pageCount=lastpage,
                sumCount=1,
                firstAct = actDictList[-1][0],
                firstActTime = actDictList[-1][1],
                mostAct = actDictList[-1][0],
                actSummaryString = "$" + str(actDictList[-1][0]) + "|1",
                actDense = 1,
                dateDense = 1
        )
        return
    ##registryTime = getRegistryTime()
    sumCount = len(actDictList)
    firstAct = actDictList[-1][0]
    firstActTime = actDictList[-1][1]
    actSummary,dateSummary = getActDateSummary(actDictList)
    mostAct = actSummary[0][0]
    actSummaryString = summaryToString(actSummary)
    actDense = calDense(actSummary)
    dateDense = calDense(dateSummary)
    longestPeriod, mostPeriod, deviation, avgPerd, middlePerd = calRate(dateSummary)
    print "sumCount:" + str(sumCount)
    print "firstAct:" + str(firstAct)
    print "firstActTime:" + str(firstActTime)
    print "mostAct:" + str(mostAct)
    print "actSummaryString:" + str(actSummaryString)
    print "actDense:" + str(actDense)
    print "dateDense:" + str(dateDense)
    print "longestPeriod:" + str(longestPeriod)
    print "mostPeriod:" + str(mostPeriod)
    print "deviation:" + str(deviation)
    print "avgPerd:" + str(avgPerd)
    print "middlePerd:" + str(middlePerd)
    dbconn.insert("mfwuserfeed",
                userid = userid,
                pageCount=lastpage,
                sumCount=sumCount,
                firstAct=firstAct,
                firstActTime = firstActTime,
                mostAct = mostAct,
                actSummaryString = actSummaryString,
                actDense = actDense,
                dateDense = dateDense,
                longestPeriod = longestPeriod,
                mostPeriod = mostPeriod,
                deviation = deviation,
                avgPerd = avgPerd,
                middlePerd = middlePerd
    )
예제 #4
0
def cal(className):
    if isinstance(className,list):
        tempName = ""
        for c in className:
            tempName = tempName + " " +c
        className = tempName
    if class_dict.has_key(className):
        class_dict[className] = class_dict[className] + 1
    else:
        class_dict.update({className:1})

##for mddDir in os.listdir(tempMddDir):
##    if not os.path.isdir(tempMddDir +"/"+ mddDir):
##        continue
for filename in os.listdir(tempMddDir +"/1"):
    ana(open(tempMddDir+"/1/"+filename,"r").read())

f = open("d:/log/mmd.log","wb")
for d in dictToOrderList(class_dict):
    f.write(d[0] + "," + str(d[1])+ "\r\n")
"""
res = dbconn.query("select distinct pid from mfwmdd where pid is not null")
comp = re.compile(u"<title>(.+)地区旅游地图")
for r in res:
    pid = r.pid
    html = open(tempMddDir+"/"+str(pid)[0]+"/"+str(pid)+".html","r").read()
    soup = BeautifulSoup(html,from_encoding="utf8")
    name = comp.search(unicode(soup.title)).group(1)
    dbconn.insert("mfwpid",pid=pid,name=name)
    
예제 #5
0
# coding:utf-8
from bs4 import BeautifulSoup
import bs4, os
from publicsettings import tempDir, dbconn


moveDict = {}

htmlfiles = os.listdir(tempDir)
for htmlfile in htmlfiles:
    html = open(tempDir + "/" + htmlfile)
    soup = BeautifulSoup(html, from_encoding="utf8")
    news_lists = soup.find_all("div", "news_list")
    for news_list in news_lists:
        con = news_list.find("div", "con")
        ##print con.string
        text = ""
        for i in con.children:
            if isinstance(i, bs4.element.NavigableString):
                i = i.strip().encode("utf8")
                if i <> "":
                    text = text + "$" + i
        if text <> "":
            if moveDict.has_key(text):
                moveDict[text] = moveDict[text] + 1
            else:
                moveDict.update({text: 1})
for m in moveDict.keys():
    dbconn.insert("mfwaction", name=m, count=moveDict[m])