Ejemplo n.º 1
0
 def geturls(cls,key,urlclass):
     """ get urls to spiders from mongodb """
     LIMIT=100
     fields = {urlclass:1,"md5":1,"_id":0}
     urls = []
     DBUtil.get_db()
     #res = DBUtil.db.appmeta.find(key, {"url":1,"md5":1,"_id":0},limit=LIMIT)
     #for row in res:
     #    urls.append(row[urlclass])
     #    cls.lock(row['md5'])
     return urls
Ejemplo n.º 2
0
def commenturls():
    urls = []
    db = DBUtil.get_db()
    res = db.appmeta.find({"$or":[{"market":'waptw'}],"name":None,"ulock":None},{"comment_url":1,"md5":1,"_id":0},limit=10000)
    for row in res:
        urls.append(row['comment_url'])
        #db.appmeta.update({"md5":row['md5']},{"$inc":{"lock":4}})
    return urls
Ejemplo n.º 3
0
def updateurls():
    urls = []
    db = DBUtil.get_db()
    res = db.appmeta.find({"avaiable":1},{"url":1,"md5":1,"_id":0},limit=LIMIT)
    for row in res:
        urls.append(row['url'])
        #db.appmeta.update({"md5":row['md5']},{"$inc":{"lock":2}})
    return urls
Ejemplo n.º 4
0
def contenturls():
    urls = []
    db = DBUtil.get_db()
    #res = db.appmeta.find({"$or":[{"market":'360'},{"market":'waptw'}],"name":None,"avaiable":None},{"url":1,"md5":1,"_id":0},limit=LIMIT)
    res = db.appmeta.find({"name":None,"avaiable":None},{"url":1,"md5":1,"_id":0},limit=LIMIT)
    for row in res:
        urls.append(row['url'])
        db.appmeta.update({"md5":row['md5']},{"$set":{"avaiable":0}})
    return urls
Ejemplo n.º 5
0
def packages():
    urls = []
    db = DBUtil.get_db()
    res = db.appmeta.find({"package_url":{"$exists":True},"avaiable":1},{"app_id":1,"app_version":1,"market":1,"package_url":1,"md5":1,"_id":0},limit=LIMIT)
    for row in res:
        print row['package_url']
        url = {}
        url['url'] = row['package_url']
        url['md5'] = row['md5']
        url['category_general'] = "app"
        urls.append(url)
    #    urls.append(row['comment_url'])
    #    #db.appmeta.update({"md5":row['md5']},{"$inc":{"lock":4}})
    return urls
Ejemplo n.º 6
0
def unlock():
    db = DBUtil.get_db()
    db.appmeta.update({"lock":1},{"$set":{"lock":None}})
Ejemplo n.º 7
0
 def __init__(self):
     self.db = mongo.get_db()
     self.date = datetime.now().strftime("%Y-%m-%d")