def add_website(self, name, url, check_interval): """add a website to the list of websites to check. use : add name url check_interval""" self.lock.acquire() if name in self.websites: print_command("Website {} already exists.".format(name)) else: self.websites[name] = website.Website( name, url, int(check_interval), self.config) self.lock.release()
def main(wf): website_obj = website.Website() if wf.args == None or wf.args == []: query = "" else: query = wf.args[0].strip().replace("\\", "") sites = website_obj.query(query) for site in sites: wf.add_item(title=site["name"], subtitle=site["url"], arg=site["url"], valid=True) wf.send_feedback()
def main(args): while True: tg_api = telegram_api.TelegramApi() updates = tg_api.get_updates() if updates: ws = website.Website(args.db_file) ws.insert(updates) with open(args.output_html, "w") as fout: print(ws.render(args.chat_id, args.author, args.title), file=fout) if args.loop: logging.info(f"Sleeping {args.loop} seconds before next iteration") time.sleep(args.loop) else: break
def scrape_sites(spreadsheet, controls, keywords): """ Performs DFS on a series of unexplored sites, store data in results worksheet Parameters: spreadsheet (Spreadsheet): the target spreadsheet controls (dict): contains the contents of the controls sheet keywords (set): the set of all keywords """ website_data = get_website_data_for_scrape(spreadsheet, controls['num_websites_per_session']) for data in website_data: domain = data[0] base_url = data[1] query = data[2] row_num = data[3] print('scraping from', base_url) site = website.Website(base_url, domain, query, row_num) site.explore_links(deque([site.base_url]), keywords, max_pages=controls['num_pages_per_website']) store_scrape_results(spreadsheet, site, controls['auto_email'])
@Time :2020/3/24 14:08 """ import webclassifier import webcrawler import website if __name__ == "__main__": seed_web_url = ['https://bohaowu.github.io/BohaoWu/index_en.html'] seed_web = [] crawlers = [] # Crawl seed website. for url in seed_web_url: web = website.Website(url, webcrawler.get_soup(url), './web_source/' + url, True, 1) seed_web.append(web) print('Seed websites have been crawled.') # Add seed into classifier classifier = webclassifier.Classifier(seed_web) # Each seed website is used as root of crawler. for seed in seed_web: crawler = webcrawler.Crawler() crawler.set_root_website(seed) crawlers.append(crawler) print('Crawlers have been built.') # Do the crawler! round_count = 1
def testAlerting(self): # Adapt the user to an existing user username = '******' usr = user.User(username) testSite = website.Website( name='test', url='https://support.google.com/merchants/answer/160038?hl=en') usr.mySites['test'] = testSite print(usr) #Queues queueTwoMin = Queue() queueTenMin = Queue() queueHour = Queue() queueAlerts = Queue() # Triggers the monitoring testProcess = Process(target=monitor.startMonitor, args=(usr, queueTwoMin, queueTenMin, queueHour, queueAlerts)) testProcess.start() # Wait for some time print('Waiting a bit…') print(str(usr.mySites['test'])) time.sleep(120) # End the process testProcess.terminate() # Put invalid url in the sites websitename = list(usr.mySites.keys())[0] usr.mySites[ websitename].url = 'https://support.google.com/answer/160038?hl=en' print(str(usr.mySites[websitename])) # Retriggers the monitoring testProcess = Process(target=monitor.startMonitor, args=(usr, queueTwoMin, queueTenMin, queueHour, queueAlerts)) testProcess.start() # Wait for some time print('Waiting for the alert DOWN to come up') time.sleep(200) # End the process testProcess.terminate() # Get the Alert down - Make it possible to raise the up alert # Up alerts can't be raised if a down alert is not present in the alertDic alertDown = queueAlerts.get() testDic = {'test': alertDown} queueAlerts.put(alertDown) # Put valid url in the sites usr.mySites[ websitename].url = 'https://support.google.com/merchants/answer/160038?hl=en' print(str(usr.mySites[websitename])) # Retriggers the monitoring testProcess = Process(target=monitor.startMonitor, args=(usr, queueTwoMin, queueTenMin, queueHour, queueAlerts, testDic)) testProcess.start() # Wait for some time print('Waiting for the alert UP to come up') time.sleep(300) # End the process testProcess.terminate() # Get all the alerts triggered alertsTriggered = [] testAlerts = [] while not queueAlerts.empty(): alertsTriggered.insert(0, queueAlerts.get()) # Get all the alerts tested, there may be some more for alert in alertsTriggered: if alert['website'] == websitename: testAlerts.append(alert) # Get the Status of the alerts tested statusTestedAlerts = [] for alert in testAlerts: statusTestedAlerts.append(alert['status']) print(alertsTriggered) print(testAlerts) # Assertions - Only 2 alerts, one up one down self.assertEqual(len(testAlerts), 2) self.assertTrue('UP' in statusTestedAlerts) self.assertTrue('DOWN' in statusTestedAlerts)