Exemple #1
0
 def get(self):
     url = "https://www.cdfangxie.com/Infor/type/typeid/36.html"
     html = requests.get(url, verify=False, headers=getHeader())
     bs = BeautifulSoup(html.text, "html.parser")
     tables = bs.find(class_='right_cont')
     tables_a = tables.find_all("a")
     db = PickleShareDB('data')
     history_list = db.get("history_list", [])
     smtpObj = self.get_smtp()
     if not smtpObj:
         logger.error("smtp error")
         exit(-1)
     for row in tables_a:
         try:
             title = row["title"]
             url = "https://www.cdfangxie.com" + row["href"]
             if title in history_list:
                 logger.info("Crawl:%s  result:pass", title)
             else:
                 logger.info("Crawl:%s  result:send_email", title)
                 if self.send_email(smtpObj, title, url):
                     logger.info("send_email success")
                     history_list.append(title)
                 else:
                     logger.info("send_email error")
         except:
             pass
     db["history_list"] = history_list
     smtpObj.quit()
def test_stress(tmpdir):
    db = PickleShareDB(tmpdir)
    import time,sys
    for i in range(100):
        for j in range(500):
            if i % 15 == 0 and i < 70:
                if str(j) in db:
                    del db[str(j)]
                continue

            if j%33 == 0:
                time.sleep(0.02)

            db[str(j)] = db.get(str(j), []) + [(i,j,"proc %d" % os.getpid())]
            db.hset('hash',j, db.hget('hash',j,15) + 1 )

        print(i, end=' ')
        sys.stdout.flush()
        if i % 10 == 0:
            db.uncache()
Exemple #3
0
def test_stress(tmpdir):
    db = PickleShareDB(tmpdir)
    import time, sys
    for i in range(100):
        for j in range(500):
            if i % 15 == 0 and i < 70:
                if str(j) in db:
                    del db[str(j)]
                continue

            if j % 33 == 0:
                time.sleep(0.02)

            db[str(j)] = db.get(str(j), []) + [(i, j, "proc %d" % os.getpid())]
            db.hset('hash', j, db.hget('hash', j, 15) + 1)

        print(i, end=' ')
        sys.stdout.flush()
        if i % 10 == 0:
            db.uncache()