Esempio n. 1
0
def SetURL():
    # Get the url
    url = request.form['url']

    if not url:
        return

    # Define valid url regex
    url_regex = re.compile(
        r'^(?:http)s?://'  # http:// or https://
        r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|'  #domain...
        r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'  # ...or ip
        r'(?::\d+)?'  # optional port
        r'(?:/?|[/?]\S+)$',
        re.IGNORECASE)

    # Validate the url
    if not url_regex.match(url):
        return render_template("index.html", error=True, urlVal=url)

    # Connect to the DB
    with engine.connect() as con:
        # See if there is already a key for the url
        try:
            sql = text("SELECT * FROM url WHERE url = :url")
            res = con.execute(sql, url=url).fetchall()
        except SQLAlchemyError as e:
            return redirect(
                url_for('routes.Error',
                        title='Error: Unhandled error',
                        msg=type(e)))
        except:
            return redirect(
                url_for('routes.Error', title='Error: Unhandled error'))

        # If there is a key display that link
        if len(res) > 0:
            return render_template("index.html",
                                   short=f'{request.url_root}{res[0].key}')

        # Generate a new key
        key = URL.GenerateKey()

        try:
            # Insert the KVP into the database
            kvp = URL(key, url)
            db.session.add(kvp)
            db.session.commit()
        except:
            return redirect(
                url_for('routes.Error', title='Error: Unhandled error'))

        # Display the new link from the key
        return render_template("index.html", short=f'{request.url_root}{key}')
Esempio n. 2
0
 def testPopNextURLAndMarkAsVisitedHandlesCount(self):
     # Populate the test database.
     session = self.database_handler.CreateSession()
     the_url = URL('http://www.microsoft.com/', 1)
     the_url.links_to = 500
     session.add(the_url)
     the_url = URL('http://www.google.com/', 1)
     the_url.links_to = 1000
     session.add(the_url)
     session.commit()
      # Test pop.
     crawler_thread = CrawlerThread(
         self.database_handler, None, self.url_lock)
     the_url = crawler_thread.PopNextURLAndMarkAsVisited()
     self.assertEqual('http://www.google.com/', the_url)
     # Test second pop.
     the_url = crawler_thread.PopNextURLAndMarkAsVisited()
     self.assertEqual('http://www.microsoft.com/', the_url)
Esempio n. 3
0
 def testHandleHtmlResourceIncrementsLinksTo(self):
     # Populate the test database.
     session = self.database_handler.CreateSession()
     the_url = URL('http://www.google.com/', 1)
     the_url.links_to = 1000
     session.add(the_url)
     session.commit()
     # Create test file.
     file_handle = StringIO.StringIO(textwrap.dedent("""
     <a href='http://www.google.com/'>Google</a>
     """))
     file_handle.url = 'http://www.test.com'
     # Test handling of HTML resource.
     crawler_thread = CrawlerThread(
         self.database_handler, None, self.url_lock)
     crawler_thread.HandleHtmlResource(file_handle)
     query = session.query(URL)
     results = query.filter(URL.url == 'http://www.google.com/')
     self.assertEqual(1, results.count())
     the_url = results.first()
     self.assertEqual(1001, the_url.links_to)