def add_website(self, name, url, check_interval):
     """add a website to the list of websites to check.
     use : add name url check_interval"""
     self.lock.acquire()
     if name in self.websites:
         print_command("Website {} already exists.".format(name))
     else:
         self.websites[name] = website.Website(
                 name, url, int(check_interval), self.config)
     self.lock.release()
Ejemplo n.º 2
0
def main(wf):
    website_obj = website.Website()
    if wf.args == None or wf.args == []:
        query = ""
    else:
        query = wf.args[0].strip().replace("\\", "")
    sites = website_obj.query(query)
    for site in sites:
        wf.add_item(title=site["name"],
                    subtitle=site["url"],
                    arg=site["url"],
                    valid=True)
    wf.send_feedback()
Ejemplo n.º 3
0
def main(args):
    while True:
        tg_api = telegram_api.TelegramApi()
        updates = tg_api.get_updates()

        if updates:
            ws = website.Website(args.db_file)
            ws.insert(updates)

            with open(args.output_html, "w") as fout:
                print(ws.render(args.chat_id, args.author, args.title),
                      file=fout)

        if args.loop:
            logging.info(f"Sleeping {args.loop} seconds before next iteration")
            time.sleep(args.loop)
        else:
            break
Ejemplo n.º 4
0
def scrape_sites(spreadsheet, controls, keywords):
    """
    Performs DFS on a series of unexplored sites, store data in results worksheet

    Parameters:
        spreadsheet (Spreadsheet): the target spreadsheet
        controls (dict): contains the contents of the controls sheet
        keywords (set): the set of all keywords
    """
    website_data = get_website_data_for_scrape(spreadsheet,
                                               controls['num_websites_per_session'])
    for data in website_data:
        domain = data[0]
        base_url = data[1]
        query = data[2]
        row_num = data[3]
        print('scraping from', base_url)
        site = website.Website(base_url, domain, query, row_num)
        site.explore_links(deque([site.base_url]), keywords, max_pages=controls['num_pages_per_website'])
        store_scrape_results(spreadsheet, site, controls['auto_email'])
Ejemplo n.º 5
0
@Time    :2020/3/24 14:08
"""

import webclassifier
import webcrawler
import website


if __name__ == "__main__":
    seed_web_url = ['https://bohaowu.github.io/BohaoWu/index_en.html']
    seed_web = []
    crawlers = []

    # Crawl seed website.
    for url in seed_web_url:
        web = website.Website(url, webcrawler.get_soup(url), './web_source/' + url, True, 1)
        seed_web.append(web)
    print('Seed websites have been crawled.')

    # Add seed into classifier
    classifier = webclassifier.Classifier(seed_web)

    # Each seed website is used as root of crawler.
    for seed in seed_web:
        crawler = webcrawler.Crawler()
        crawler.set_root_website(seed)
        crawlers.append(crawler)
    print('Crawlers have been built.')

    # Do the crawler!
    round_count = 1
Ejemplo n.º 6
0
    def testAlerting(self):

        # Adapt the user to an existing user
        username = '******'
        usr = user.User(username)
        testSite = website.Website(
            name='test',
            url='https://support.google.com/merchants/answer/160038?hl=en')
        usr.mySites['test'] = testSite
        print(usr)

        #Queues
        queueTwoMin = Queue()
        queueTenMin = Queue()
        queueHour = Queue()
        queueAlerts = Queue()

        # Triggers the monitoring
        testProcess = Process(target=monitor.startMonitor,
                              args=(usr, queueTwoMin, queueTenMin, queueHour,
                                    queueAlerts))
        testProcess.start()

        # Wait for some time
        print('Waiting a bit…')
        print(str(usr.mySites['test']))
        time.sleep(120)

        # End the process
        testProcess.terminate()

        # Put invalid url in the sites
        websitename = list(usr.mySites.keys())[0]
        usr.mySites[
            websitename].url = 'https://support.google.com/answer/160038?hl=en'
        print(str(usr.mySites[websitename]))

        # Retriggers the monitoring
        testProcess = Process(target=monitor.startMonitor,
                              args=(usr, queueTwoMin, queueTenMin, queueHour,
                                    queueAlerts))
        testProcess.start()

        # Wait for some time
        print('Waiting for the alert DOWN to come up')
        time.sleep(200)

        # End the process
        testProcess.terminate()

        # Get the Alert down - Make it possible to raise the up alert
        # Up alerts can't be raised if a down alert is not present in the alertDic
        alertDown = queueAlerts.get()
        testDic = {'test': alertDown}
        queueAlerts.put(alertDown)

        # Put valid url in the sites
        usr.mySites[
            websitename].url = 'https://support.google.com/merchants/answer/160038?hl=en'
        print(str(usr.mySites[websitename]))

        # Retriggers the monitoring
        testProcess = Process(target=monitor.startMonitor,
                              args=(usr, queueTwoMin, queueTenMin, queueHour,
                                    queueAlerts, testDic))
        testProcess.start()

        # Wait for some time
        print('Waiting for the alert UP to come up')
        time.sleep(300)

        # End the process
        testProcess.terminate()

        # Get all the alerts triggered
        alertsTriggered = []
        testAlerts = []
        while not queueAlerts.empty():
            alertsTriggered.insert(0, queueAlerts.get())

        # Get all the alerts tested, there may be some more
        for alert in alertsTriggered:
            if alert['website'] == websitename:
                testAlerts.append(alert)

        # Get the Status of the alerts tested
        statusTestedAlerts = []
        for alert in testAlerts:
            statusTestedAlerts.append(alert['status'])

        print(alertsTriggered)
        print(testAlerts)

        # Assertions - Only 2 alerts, one up one down
        self.assertEqual(len(testAlerts), 2)
        self.assertTrue('UP' in statusTestedAlerts)
        self.assertTrue('DOWN' in statusTestedAlerts)