from multiprocessing import Process,Pipe import syslog import sys import os from lib import sorting, wikicount if __name__ == '__main__' : STARTTIME= wikicount.fnStartTimer() syslog.syslog('sortMongoHD.csv: starting...') DAY,MONTH,YEAR,HOUR,expiretime= wikicount.fnReturnTimes() wikicount.fnSetStatusMsg('sortMongoHD',0) os.system("sed -i 1d /home/ec2-user/mongo.csv") sys.setrecursionlimit(2000) n=7 #number partitions to break into IFILE=open("/home/ec2-user/mongo.csv","r") SORTME=[] for line in IFILE: line=line.strip('"').split(',') HASH=line[1].replace("\"","") rec=(line[0],HASH) SORTME.append(rec) IFILE.close() print 'done reading list .... starting mulitple procs' # pconn,cconn=Pipe() # lyst=[] # p=Process(target=sorting.QuickSortMPListArray,args=(SORTME,cconn,n)) # p.start() # print 'main proc started'
import syslog from pymongo import Connection from lib import wikicount EXCEPTIONFILE = '/tmp/zExecption.log' STARTTIME = wikicount.fnStartTimer() conn = Connection() db = conn.wc DAY, MONTH, YEAR, HOUR, expiretimes = wikicount.fnReturnTimes() HOUR = wikicount.minusHour(int(HOUR)) HOUR, HOUR2, HOUR3 = wikicount.fnReturnLastThreeHours(HOUR) HOUR = wikicount.fnStrFmtDate(HOUR) HOUR2 = wikicount.fnStrFmtDate(HOUR2) HOUR3 = wikicount.fnStrFmtDate(HOUR3) SPAMLIST = [] wikicount.fnSetStatusMsg('threehrrollingavg', 0) SPAMCURSOR = db['spam'].find() SPAMLIST = SPAMCURSOR.distinct('_id') hourlies = [] TypeErrors = 0 KeyErrors = 0 z = 1 LANGUAGES = wikicount.LList for lang in LANGUAGES: hourlies = [] KeyErrors = 0 TypeErrors = 0 hhdTABLE = str(lang)+"_hitshourlydaily" hdTABLE = str(lang)+"_hitsdaily" outTABLE = str(lang)+"_threehour" lastTABLE = str(lang)+"_lastrollavg"
from lib import wikicount STARTTIME= wikicount.fnStartTimer() syslog.syslog('tophits.py: starting...') DAY,MONTH,YEAR,HOUR,expiretime= wikicount.fnReturnTimes() DAY,MONTH,HOUR= wikicount.fnFormatTimes(DAY,MONTH,HOUR) MONTHNAME= wikicount.fnGetMonthName() HOUR= wikicount.minusHour(int(HOUR)) conn=Connection() db=conn.wc RECCOUNT=1 DAYKEY=str(YEAR)+"_"+str(MONTH)+"_"+str(DAY) PLACEMAP="hitsplacemap" wikicount.fnSetStatusMsg('tophits',0) LANGLIST= wikicount.getLanguageList() for lang in LANGLIST: PLACEMAP=str(lang)+"_mapPlace" HITSMAP=str(lang)+"_mapHits" try: IFILE=open("/home/ec2-user/"+str(lang)+"_mongo.csv.sorted","r") except IOError: syslog.syslog("Error opening file for "+str(lang)) continue RESULT=[] RECCOUNT=0 for line in IFILE: if RECCOUNT < 1000: line=line.strip().split(",")
delta=item['Hits']-YHits print 'hello!' nameq=db.hitsdaily.find({'_id':item['_id']}) NEWPOST={'id':item['_id'],'delta':int(delta),'orPlace':item['place'],'title':nameq['title']} db.tmpHot.insert(NEWPOST) return STARTTIME= wikicount.fnStartTimer() wikicount.toSyslog('filltmpHot.py : starting...') DAY,MONTH,YEAR,HOUR,expiretime= wikicount.fnReturnTimes() DAYKEY=str(YEAR)+'_'+str(MONTH)+'_'+str(DAY) COLLECTIONNAME=str('tophits')+DAYKEY conn=Connection() db=conn.wc RECCOUNT=1 NUMRECS=250 wikicount.fnSetStatusMsg('fillTmpHot',0) print COLLECTIONNAME db.tmpHot.remove() RESULT=db[COLLECTIONNAME].find() #RESULT1=db[COLLECTIONNAME].find().limit(NUMRECS).skip(0) #RESULT2=db[COLLECTIONNAME].find().limit(NUMRECS).skip(NUMRECS) #RESULT3=db[COLLECTIONNAME].find().limit(NUMRECS).skip(NUMRECS*2) #RESULT4=db[COLLECTIONNAME].find().limit(NUMRECS).skip(NUMRECS*3) FillTmpHot(RESULT) RUNTIME= wikicount.fnEndTimerCalcRuntime(STARTTIME) wikicount.toSyslog('prepop_filltmpHot.py: runtime is '+str(RUNTIME)+' seconds.') wikicount.fnSetStatusMsg('fillTmpHot',3) wikicount.fnLaunchNextJob('fillTmpHot')