def geturls(cls,key,urlclass): """ get urls to spiders from mongodb """ LIMIT=100 fields = {urlclass:1,"md5":1,"_id":0} urls = [] DBUtil.get_db() #res = DBUtil.db.appmeta.find(key, {"url":1,"md5":1,"_id":0},limit=LIMIT) #for row in res: # urls.append(row[urlclass]) # cls.lock(row['md5']) return urls
def commenturls(): urls = [] db = DBUtil.get_db() res = db.appmeta.find({"$or":[{"market":'waptw'}],"name":None,"ulock":None},{"comment_url":1,"md5":1,"_id":0},limit=10000) for row in res: urls.append(row['comment_url']) #db.appmeta.update({"md5":row['md5']},{"$inc":{"lock":4}}) return urls
def updateurls(): urls = [] db = DBUtil.get_db() res = db.appmeta.find({"avaiable":1},{"url":1,"md5":1,"_id":0},limit=LIMIT) for row in res: urls.append(row['url']) #db.appmeta.update({"md5":row['md5']},{"$inc":{"lock":2}}) return urls
def contenturls(): urls = [] db = DBUtil.get_db() #res = db.appmeta.find({"$or":[{"market":'360'},{"market":'waptw'}],"name":None,"avaiable":None},{"url":1,"md5":1,"_id":0},limit=LIMIT) res = db.appmeta.find({"name":None,"avaiable":None},{"url":1,"md5":1,"_id":0},limit=LIMIT) for row in res: urls.append(row['url']) db.appmeta.update({"md5":row['md5']},{"$set":{"avaiable":0}}) return urls
def packages(): urls = [] db = DBUtil.get_db() res = db.appmeta.find({"package_url":{"$exists":True},"avaiable":1},{"app_id":1,"app_version":1,"market":1,"package_url":1,"md5":1,"_id":0},limit=LIMIT) for row in res: print row['package_url'] url = {} url['url'] = row['package_url'] url['md5'] = row['md5'] url['category_general'] = "app" urls.append(url) # urls.append(row['comment_url']) # #db.appmeta.update({"md5":row['md5']},{"$inc":{"lock":4}}) return urls
def unlock(): db = DBUtil.get_db() db.appmeta.update({"lock":1},{"$set":{"lock":None}})
def __init__(self): self.db = mongo.get_db() self.date = datetime.now().strftime("%Y-%m-%d")