Exemple #1
0
def main():
    if len(sys.argv) != 2:
        print("Invalid command line arguments.")
        print("Usage: python3 diagnose.py <WEBSITE_URL>")
        exit()

    url = sys.argv[1]

    FILEPATH_PREFIX = "data/"
    FILEPATH_TEXT_SUFFIX_CLEAN = "_clean.txt"
    FILEPATH_TEXT_SUFFIX_BLOCK = "_block.txt"
    FILEPATH_IMAGE_SUFFIX_CLEAN = "_clean.png"
    FILEPATH_IMAGE_SUFFIX_BLOCK = "_block.png"

    txt_clean = FILEPATH_PREFIX + url + FILEPATH_TEXT_SUFFIX_CLEAN
    txt_block = FILEPATH_PREFIX + url + FILEPATH_IMAGE_SUFFIX_BLOCK
    img_clean = FILEPATH_PREFIX + url + FILEPATH_IMAGE_SUFFIX_CLEAN
    img_block = FILEPATH_PREFIX + url + FILEPATH_IMAGE_SUFFIX_BLOCK

    website_clean = Website(txt_clean, img_clean, "clean")
    website_block = Website(txt_block, img_block, "block")
    pair = WebsitePair(website_clean, website_block)

    # @TODO This if-else section can be expanded as future faults become detectable.
    if page_is_blank(website_clean, website_block, pair):
        print("Page is blank!")

    else:
        print("No faults detected.")
    def setUp(self):
        self.single_plan = Plan('Single', 49, 1)
        self.plus_plan = Plan('Plus', 99, 3)

        self.website_1 = Website('https://google.com')
        self.website_2 = Website('https://google.com')
        
        self.customer_1 = Customer('customer_1', '123456789', '*****@*****.**')
Exemple #3
0
 def serializeWebsite(self, website):
     if ("sitemap" in website.keys()):
         return Website(website['homepage'],
                        website['input_dict'],
                        website['lastmod'],
                        sitemap=website['sitemap'])
     else:
         return Website(website['homepage'], website['input_dict'],
                        website['lastmod'])
Exemple #4
0
def process_single_website(website_url):
    """Processes a single website and exports to csv string.
	"""
    txt_clean = FILEPATH_PREFIX + website_url + FILEPATH_TEXT_SUFFIX_CLEAN
    txt_block = FILEPATH_PREFIX + website_url + FILEPATH_IMAGE_SUFFIX_BLOCK
    img_clean = FILEPATH_PREFIX + website_url + FILEPATH_IMAGE_SUFFIX_CLEAN
    img_block = FILEPATH_PREFIX + website_url + FILEPATH_IMAGE_SUFFIX_BLOCK

    website_clean = Website(txt_clean, img_clean, "clean")
    website_block = Website(txt_block, img_block, "block")
    pair = WebsitePair(website_clean, website_block)

    print(get_csv_header(website_clean, website_block, pair))
def main():
    # Initialize different plans
    single_plan = Plan('Single', 49, 1)
    plus_plan = Plan('Plus', 99, 3)
    infinite_plan = Plan('Infinite', 249, -1)

    # Initialize multiple websites
    website_1 = Website('https://website_1.com')
    website_2 = Website('https://website_2.com')
    website_3 = Website('https://website_3.com')
    website_4 = Website('https://website_4.com')

    # Initialize multiple customers
    customer_1 = Customer('customer_1', '123456789', '*****@*****.**')
    customer_2 = Customer('customer_2', '123456789', '*****@*****.**')
    customer_3 = Customer('customer_3', '123456789', '*****@*****.**')

    # customer_1 subscribed for single_plan
    customer_1.add_subscription(single_plan)
    print("{} has subscribed for {} plan".format(customer_1,
                                                 customer_1.subscription.plan))

    # customer_1 added one website
    customer_1.add_website(website_1)
    print("{} has added website {} as per the {} plan".format(customer_1, \
            customer_1.websites, customer_1.subscription.plan))

    # customer_1 can not add more website in single_plan
    customer_1.add_website(website_2)
    print("{} can't add website {} as per the {} plan".format(customer_1, \
            website_2, customer_1.subscription.plan))

    # customer_1 can change plan from single_plan to plus_plan
    customer_1.change_plan(plus_plan)
    print("{} has changed his current plan {} to {} plan".format(customer_1, \
            single_plan, customer_1.subscription.plan))

    # customer_2 subscribe for infinite_plan
    customer_2.add_subscription(infinite_plan)

    # customer_2 can add multiple websites
    customer_2.add_website(website_1)
    customer_2.add_website(website_2)
    customer_2.add_website(website_3)
    customer_2.add_website(website_4)

    print("{} has added four websites {} under infinite plan".format(customer_2, \
            customer_2.websites))
Exemple #6
0
def main():
    logger.info("Cartriage v5.0")
    parser = argparse.ArgumentParser(
        description="Retrieves information from printers.")
    parser.add_argument(
        "l",
        type=open,
        metavar="printers",
        help="Text file containing printer IP addresses, one for each line.")
    parser.add_argument("o",
                        metavar="output",
                        help="Filename for resulting HTML page.")
    parser.add_argument("-v", action="store_true", help="Enable verbose mode.")
    try:
        args = parser.parse_args()
        if args.v:
            logger.info("Enabled verbose mode")
            logger.setLevel(logging.DEBUG)
        logger.debug(args)
        startTime = time.time()
        time.clock()
        scanned, successfullyScanned, printers = runScan(args.l)
        elapsedTime = "%d seconds" % (time.time() - startTime)
        site = Website(scanned, successfullyScanned, printers, elapsedTime)
        with open(args.o, "w") as output:
            output.write(str(site))
        logger.info("Done! Results available in file: %s" % args.o)
        sys.exit(0)
    except IOError, e:
        logger.error(str(e))
        sys.exit(1)
    def startup(self):
        """
        Some stuff that should get called after everything is loaded.
        """
        self.env.seishub.startup()
        self.nw_tree.startup()

        # Connect some slots.
        QtCore.QObject.connect(self.nw_tree.nw_select_model,
                               QtCore.SIGNAL("selectionChanged(QItemSelection, QItemSelection)"), \
                               self.waveforms.waveform_scene.add_channel)

        web = Website(env=self.env)
        web.startup()
        # Add a WebView to later display the map.
        file = open(os.path.join(self.env.temp_res_dir, 'map.html'))
        html = file.read()
        file.close()
        self.env.web.setHtml(html)
        self.picks.update()

        css_url = QtCore.QUrl.fromLocalFile(os.path.abspath(self.env.css))

        server = '%s/manage/seismology/stations' % self.env.seishub_server
        url = QtCore.QUrl(server)
        url.setUserName(self.env.seishub_user)
        url.setPassword(self.env.seishub_password)
        # Might work with some Qt version...
        self.env.station_browser.page().settings().setUserStyleSheetUrl(css_url)
        self.env.station_browser.load(url)
        self.env.station_browser.page().settings().setUserStyleSheetUrl(css_url)
    def go(self):
        self.work_pages(self.site)

        self.session.add(Website(url=self.site, title='', domain=self.site,
                                 pages_count=self.pages_count, HTML_version=0.0))

        self.session.commit()
    def get_website(self, url: str, check_interval: int):
        """
        Instantiates Website instance. Safely returns 
        instance or None depending on success.

        PARAMETERS: check_interval: Positive integer in seconds.
                        Ping refresh freuency e.g. 30 would 
                        equate to check every 30 seconds
                    url: String e.g. http://google.fr
        Instantiates Website instance.

        RETURNS: Website instance or None.
        """
        try:

            website = Website(url=url, check_interval=check_interval)

        except Exception:
            print(
                "I wasn't able to connect with that URL.\n"
                + "Please revise it, including 'http://'"
                + " or 'https://' as appropriate)."
            )
            return None

        return website
Exemple #10
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        ticker = dynamodb.Attribute(
            name='Ticker',
            type=dynamodb.AttributeType.STRING,
        )

        date = dynamodb.Attribute(
            name='Date',
            type=dynamodb.AttributeType.STRING,
        )

        table = dynamodb.Table(
            self,
            'StockHistory',
            partition_key=ticker,
            sort_key=date,
            billing_mode=dynamodb.BillingMode.PAY_PER_REQUEST,
            removal_policy=core.RemovalPolicy.DESTROY,
            point_in_time_recovery=True,
        )

        index_name = 'Date-index'
        table.add_global_secondary_index(
            index_name=index_name,
            partition_key=date,
            sort_key=ticker,
            projection_type=dynamodb.ProjectionType.INCLUDE,
            non_key_attributes=['Name'])

        Importer(self, 'Importer', table=table)
        restapi = RestApi(self, 'Api', table=table, index_name=index_name)
        Website(self, 'Website', api=restapi.api)
Exemple #11
0
def home():
    if request.method == 'POST':
        Website(request.form['url'])
    Website.check_all()
    return render_template(
        "home.html",
        pages=Website.all,
        length=len(Website.all)
    )
Exemple #12
0
def test_fix_link(link, hostname, scheme, result):
    mock_parsed_url = Mock()
    mock_parsed_url.hostname = hostname
    mock_parsed_url.scheme = scheme
    mock_parsed_url.netloc = hostname
    website = Website('seed_url')
    assert website.fix_link(link, mock_parsed_url) == (
        result, hostname
    )
    def getWebsites(self):
        websites = dict()

        with open('data1/websites.json') as data_file:
            websitesData = json.load(data_file)['websites']

        for website in websitesData:
            websites[website['id']] = Website(website)

        return websites
Exemple #14
0
def test_scrape(monkeypatch, page_content, links, to_visit):
    mock_response = Mock()
    mock_response.text = page_content
    mock_response.status_code = 200
    monkeypatch.setattr('website.requests.get', lambda x: mock_response)
    website = Website('http://hostname/url')
    website.scrape()
    # pages are 'http://hostname/url', 'http://hostname/new-url',
    # 'https://hostname/', 'http://hostname/', 'https://hostname/new-url'
    assert len(website.pages) == 5
Exemple #15
0
def test_scrape_url(monkeypatch, page_content, links, to_visit):
    mock_response = Mock()
    mock_response.text = page_content
    mock_response.status_code = 200
    monkeypatch.setattr('website.requests.get', lambda x: mock_response)
    website = Website('http://hostname/url')
    # Simulate visiting the page.
    url, _ = website.to_visit.popitem()
    website.scrape_url(url)
    assert website.to_visit == OrderedDict((key, None) for key in to_visit)
    assert website.pages[url].links == links
Exemple #16
0
def process_manifest():
    """Processes all websites in the manifest.
	"""
    m = manifest.MANIFEST
    for i in range(0, len(m)):
        entry = m[i]

        txt_clean = FILEPATH_PREFIX + entry[0] + FILEPATH_TEXT_SUFFIX_CLEAN
        txt_block = FILEPATH_PREFIX + entry[0] + FILEPATH_IMAGE_SUFFIX_BLOCK
        img_clean = FILEPATH_PREFIX + entry[0] + FILEPATH_IMAGE_SUFFIX_CLEAN
        img_block = FILEPATH_PREFIX + entry[0] + FILEPATH_IMAGE_SUFFIX_BLOCK

        website_clean = Website(txt_clean, img_clean, "clean")
        website_block = Website(txt_block, img_block, "block")
        pair = WebsitePair(website_clean, website_block)

        if i == 0:
            print(get_csv_header(website_clean, website_block, pair))

        print(get_csv_string(website_clean, website_block, pair))
Exemple #17
0
    def generate_text(self, sites_file, search_limit, keep_to_sites):
        with open(sites_file, "r") as f:
            for site in f.readlines():
                sys.stderr.write("Working on: " + site + '\n')
                ws = Website(home_page=site,
                             search_limit=search_limit,
                             keep_to_site=keep_to_sites)
                self.sites.append(ws)

        for site in self.sites:
            for link in site.links:
                self.text += site.get_page_text(link)
Exemple #18
0
    def __init__(self,
                 url,
                 keywords=None,
                 searchPageLimit=2,
                 websitesJsonFile="websites.json",
                 isInitialCrawl=False):

        # iinitialize class attributes
        self.baseUrl = url
        self.keywords = keywords
        self.articleLinks = []
        self.articleCount = 0
        self.searchPageLimit = searchPageLimit
        self.websitesJsonFile = websitesJsonFile
        self.isInitialCrawl = isInitialCrawl

        # instantiate a Website object to interact with the website to be crawled
        try:
            self.website = Website(url, websitesJsonFile=self.websitesJsonFile)

        # raise exception if there is an error connecting to the website
        except WebsiteFailedToInitialize:
            raise WebsiteFailedToInitialize(url)

        # open the json file containing websites and their attributes
        with open(self.websitesJsonFile) as data_file:
            self.websites = json.load(data_file)
            data_file.close()

        # set the searchQuery attribute to the appropriate search query structure in the websites json file
        for website, attributes in self.websites.items():
            if website in self.baseUrl:
                self.searchQuery = attributes["searchQuery"]
                self.nextPageType = attributes["nextPage"]

        # populate the exceptions attribute list with websites who's article urls need to be manually
        # crawled
        self.exceptions = [
            "https://www.ourmidland.com/", "https://www.lakecountystar.com/",
            "https://www.northernexpress.com/", "https://www.manisteenews.com/"
        ]

        print("\r" + bcolors.OKGREEN + "[+]" + bcolors.ENDC + " Crawling " +
              self.baseUrl + "..." + bcolors.ENDC,
              end="")
        sys.stdout.flush()

        # start crawling
        self.crawl()

        print("\r" + bcolors.OKGREEN + "[+]" + bcolors.ENDC + " Crawled " +
              self.baseUrl + ": " + bcolors.OKGREEN +
              str(len(self.articleLinks)) + " URLs retrieved" + bcolors.ENDC)
def read_file(filename):
    """
    Reads a file and returns a list of Website objects
    """
    lines = []
    with open(filename) as file:
        for line in file:
            url, interval = line.split(' ')
            interval = int(interval)
            website = Website(url, interval)
            lines.append(website)
    return lines
Exemple #20
0
 def getSites(self):
     global conn
     global cur
     cur.execute("SELECT * FROM sites")
     sitesData = cur.fetchall()
     allSiteObjs = []
     for site in sitesData:
         siteObj = Website(site['id'], site['name'], site['url'],
                           site['searchUrl'], site['resultListing'],
                           site['resultUrl'], site['absoluteUrl'],
                           site['pageTitle'], site['pageBody'])
         allSiteObjs.append(siteObj)
     return allSiteObjs
Exemple #21
0
def test_find_links(page_content, hostname, scheme, links, to_visit):
    mock_parsed_url = Mock()
    mock_parsed_url.hostname = hostname
    mock_parsed_url.scheme = scheme
    mock_parsed_url.netloc = hostname
    website = Website('http://hostname/url')
    # Simulate visiting the page.
    website.to_visit.popitem()
    page = Page('a_url')
    bs = BeautifulSoup(page_content, 'html.parser')
    website.find_links(page, bs, mock_parsed_url)
    assert page.links == links
    assert website.to_visit == OrderedDict((key, None) for key in to_visit)
Exemple #22
0
def run_website():
    website = Website()

    @website.route('/')
    def index():
        return 200, 'users list'

    @website.route('/users/([0-9]+)')
    def user(user_id):
        if user_id not in ['1', '2']:
            return 404, ''
        return 200, f'user {user_id}'

    website.run(_ADDRESS)
Exemple #23
0
    def __init__(self):
        self.w = Website()
        self.root = Tk()
        self.root.title("Auto site - Enter the fields")
        self.my_font = tkFont.Font(family="Helvetica", size=11)

        self.frame = Frame(self.root, height=800, width=800, \
                           padx=50, pady=10)
        self.frame.pack()

        self.fields()
        self.buttons()

        self.root.mainloop()
Exemple #24
0
def test_stocks():
    # READ IN ALL SP500 and NASDAQ INFO
    nasdaq = READ("nasdaq.txt", ".")
    sp100 = READ("sp500.txt")

    for stock in nasdaq.splitlines():
        print("NASDAQ SEARCH: " + stock)
        url = wikipedia.wiki_search(stock)
        webpage = Website(url)
        webpage.set_directory("./wikipedia/")
        html = webpage.get_html()
        xml = webpage.get_xml()
        websites.append(webpage)

    for stock in sp100.splitlines():
        print("EVALUATING STOCK SP500 " + stock)
        wikipedia.wiki_search(stock)
Exemple #25
0
def test_city():
    cities = []
    for city in URL_CITY_ARRAY:
        location = Website(city)
        location.set_directory('./wikipedia/')
        html = location.get_html()
        cities.append(location)
    """
    cities = []
    fact_book = []
    #for country in CIA_FACT_BOOK:
    #    CIA.cia_indexer(Website(country))
    for city in URL_CITY_ARRAY:
        cities.append(Website(city))
    for city in cities:
        wikipedia.wiki_study_city(city)
    """
    return
Exemple #26
0
def analyse_URL(jsonData):
    """
    Decide whether a website is phishing using its keywords and a Google search
    based on those.

    Parameters
    ----------
    jsonData: contains site data

    """

    ws = Website(json=jsonData)

    print(datetime.now().strftime("%H:%M:%S.%f") + "-- building vector")

    # build feature vector

    feat_vec_temp = {}
    feat_vect_site = build_feat_vec.feature_vector(extractor, ws)
    feat_vec_temp[0] = feat_vect_site
    feat_vect = DataFrame(feat_vec_temp)
    feat_vect = feat_vect.transpose().fillna(0)

    # prediction using gradient boosing
    exp = "238"

    features = feat_vect.columns

    print(datetime.now().strftime("%H:%M:%S.%f") +
          "-- vector done, start gradient boosting:")

    scoregb, predictiongb = _predict_gb(1, feat_vect, features, exp)
    gb_results = scoregb, predictiongb

    print(datetime.now().strftime("%H:%M:%S.%f") + "-- gradient done")
    global keep_track
    if keep_track:
        if gb_results[1] == 1:
            JSONtoFile(jsonData, True, jsonData['siteid'])
        else:
            JSONtoFile(jsonData, False, jsonData['siteid'])

    return gb_results, jsonData['jspageid'], jsonData['siteid']
def target_analyse(data):

    json_data = {'jspageid': data['jspageid']}
    json_data['siteid'] = data['siteid']

    ws = Website(json=data)
    target_identity = identify_target(ws)

    mld = '.'.join(split_mld_ps(data['landurl']))

    if mld == target_identity[0]:
        json_data['falsePositive'] = True
    else:
        json_data['falsePositive'] = False

    json_data['target'] = target_identity[0]
    json_data['otherTargets'] = target_identity[1]
    # print('Identified Target: ' + target_identity[0] + "\t/ other potential targets: " + str(target_identity[1]))

    return json_data
Exemple #28
0
    def add_site(self, url):
        """
        The function to enable user's add website.

        Parameters:
            url (str): Url of the website.

        Returns:
            Object: Website object with details of the website.
        """
        self.check_auth()
        subscription = database['subscriptions'].get(f'{self.email}')
        check_key(subscription, 'user has no subscription')
        subscription.check_sub()
        site_limit = subscription.plan.limit
        if site_limit and (site_limit == len(subscription.websites)):
            raise ValueError(
                f'Current plan can only allow {site_limit} website(s)')
        new_site = Website(self, url)
        subscription.websites[f'{new_site.id}'].append(new_site)
        subscription.save()
        return new_site
Exemple #29
0
    def crawler(self, classifier: webclassifier.Classifier):
        if len(self.website_nodes) == 0:
            print("There are no more websites need to be crawled.")
            return False

        new_website_nodes = []
        for website_node in self.website_nodes:
            for url in website_node.website.soup.find_all('a'):
                try:
                    url.attrs['href']
                except KeyError:
                    continue

                # Crawled website need not crawl again.
                if url.attrs['href'] in classifier.url:
                    print('This website has been added into classifier.')
                    continue

                # Decide which website can be stored
                try:
                    new_website_node = WebsiteNode(Website(url.attrs['href'], get_soup(url.attrs['href'])))
                except Exception:
                    print('Invalid url ', url['href'])
                else:
                    classifier.add_website(new_website_node.website)
                    classifier.cal()
                    new_website_node.website.relevance = -1
                    for seed in classifier.seed_websites:
                        rel = classifier.calculate_web_similarity_by_text(seed, new_website_node.website)
                        new_website_node.website.relevance = max(new_website_node.website.relevance, rel)
                    if new_website_node.website.relevance > self.threshold:
                        print(url.attrs['href'], 'is relative, relevance is %s' % new_website_node.website.relevance)
                        website_node.child.append(new_website_node)
                        new_website_nodes.append(new_website_node)

        classifier.cal()
        self.website_nodes = new_website_nodes
        return True
 def addWebsite(self):
     """
     Add a website to the users data
     :return:
     """
     print(msg.website_add_welc)
     name = input(msg.website_add_name)
     while True:
         url = input(msg.website_add_url)
         if (self.checkUrl(url)):
             break
         print(msg.website_url_inc)
     while True:
         checkInterval = input(msg.website_add_check)
         try:
             checkInterval = int(checkInterval)
             break
         except:
             print(msg.website_add_check_inc)
     newWebsite = Website(name=name, url=url, checkInterval=checkInterval)
     self.mySites[name] = newWebsite
     # update the data about the user
     data_utils.updateUser(self)
     return