Beispiel #1
0
    def test_dequeue(self):

        db = Database("test.sqlite3")

        web_id_1 = db.insert_website(Website("", "", ""))
        web_id_2 = db.insert_website(Website("", "", ""))

        db.enqueue(web_id_1)
        db.enqueue(web_id_2, "postid")

        self.assertEqual(db.dequeue()[0], web_id_1)
        self.assertEqual(db.dequeue()[1], "postid")
        self.assertEqual(db.dequeue(), None)
        self.assertEqual(db.dequeue(), None)
Beispiel #2
0
    def test_queue(self):

        db = Database("test.sqlite3")

        db.enqueue(db.insert_website(Website("w1", "i1", "a1")))
        db.enqueue(db.insert_website(Website("w2", "i2", "a2")))
        db.enqueue(db.insert_website(Website("w3", "i3", "a3")))

        queue = db.queue()

        self.assertEqual(queue[0].url, "w1")
        self.assertEqual(queue[1].logged_ip, "i2")
        self.assertEqual(queue[2].logged_useragent, "a3")
        self.assertIsNotNone(queue[2].last_modified)
        self.assertEqual(len(queue), 3)
Beispiel #3
0
    def try_enqueue(url):
        url = os.path.join(url, "")
        url = od_util.get_top_directory(url)

        if not od_util.is_valid_url(url):
            return "<strong>Error:</strong> Invalid url. Make sure to include the appropriate scheme.", "warning"

        website = db.get_website_by_url(url)
        if website:
            return "Website already exists", "danger"

        website = db.website_exists(url)
        if website:
            return "A parent directory of this url has already been posted", "danger"

        if db.is_blacklisted(url):
            return "<strong>Error:</strong> " \
                   "Sorry, this website has been blacklisted. If you think " \
                   "this is an error, please <a href='/contribute'>contact me</a>.", "danger"

        if not od_util.is_od(url):
            return "<strong>Error:</strong>" \
                   "The anti-spam algorithm determined that the submitted url is not " \
                   "an open directory or the server is not responding. If you think " \
                   "this is an error, please <a href='/contribute'>contact me</a>.", "danger"

        website_id = db.insert_website(Website(url, str(request.remote_addr + "_" +
                                                        request.headers.get("X-Forwarded-For", "")),
                                               request.user_agent))

        task = Task(website_id, url, priority=1)
        taskManager.queue_task(task)

        return "The website has been added to the queue", "success"
Beispiel #4
0
def try_enqueue(url):
    url = os.path.join(url, "")
    url = od_util.get_top_directory(url)

    if not od_util.is_valid_url(url):
        return "<strong>Error:</strong> Invalid url. Make sure to include the appropriate scheme."

    website = db.get_website_by_url(url)
    if website:
        return "Website already exists"

    website = db.website_exists(url)
    if website:
        return "A parent directory of this url has already been posted"

    if db.is_blacklisted(url):
        return "<strong>Error:</strong> " \
               "Sorry, this website has been blacklisted. If you think " \
               "this is an error, please <a href='/contribute'>contact me</a>."

    if not od_util.is_od(url):
        return "<strong>Error:</strong>" \
               "The anti-spam algorithm determined that the submitted url is not " \
               "an open directory or the server is not responding. If you think " \
               "this is an error, please <a href='/contribute'>contact me</a>."

    website_id = db.insert_website(Website(url, "localhost", "mass_import.py"))

    task = Task(website_id, url, priority=2)
    taskManager.queue_task(task)

    return "The website has been added to the queue"
Beispiel #5
0
def try_enqueue(url):

    url = os.path.join(url, "")
    website = db.get_website_by_url(url)

    if website:
        return "Website already exists", "danger"

    website = db.website_exists(url)
    if website:
        return "A parent directory of this url has already been posted", "danger"

    if not od_util.is_valid_url(url):
        return "<strong>Error:</strong> Invalid url. Make sure to include the http(s):// suffix. " \
               "FTP is not supported", "danger"

    if od_util.is_blacklisted(url):

        return "<strong>Error:</strong> " \
              "Sorry, this website has been blacklisted. If you think " \
              "this is an error, please <a href='/contribute'>contact me</a>.", "danger"

    if not od_util.is_od(url):
        return "<strong>Error:</strong>" \
              "The anti-spam algorithm determined that the submitted url is not " \
              "an open directory or the server is not responding. If you think " \
              "this is an error, please <a href='/contribute'>contact me</a>.", "danger"

    web_id = db.insert_website(
        Website(url, str(request.remote_addr), str(request.user_agent)))
    db.enqueue(web_id)

    return "The website has been added to the queue", "success"
Beispiel #6
0
    def test_import_json(self):

        db = Database("test.sqlite3")

        website_url = "http://google.ca/"
        logged_ip = "127.0.0.1"
        logged_useragent = "firefox"

        db.import_json("test/test_scan1.json",
                       Website(website_url, logged_ip, logged_useragent))

        with sqlite3.connect("test.sqlite3") as conn:
            cursor = conn.cursor()

            cursor.execute(
                "SELECT * FROM File WHERE name='Bleach - Chapter 001.cbz'")
            db_file1 = cursor.fetchone()

            self.assertEqual(db_file1[4], 8770750)

            cursor.execute(
                "SELECT * FROM File WHERE name='Bleach - Chapter 007.cbz'")
            db_file2 = cursor.fetchone()

            self.assertEqual(db_file2[4], 3443820)
Beispiel #7
0
def traitment_link():
    liste = get_list_link()
    error = []
    for link in liste:
        code = validate_url(link.link)
        now = date_now()
        if code >= 500:
            get_db().save_log(link.link,now,code)
            p = Website(link.number, link.link,now, code)
            error.append(p)
        update_link(link,now,code)
    send_courriel(error)
Beispiel #8
0
    def test_get_website_by_id(self):

        db = Database("test.sqlite3")

        website_id = db.insert_website(Website("a", "b", "c"))

        website = db.get_website_by_id(website_id)

        self.assertEqual(website.id, website_id)
        self.assertEqual(website.url, "a")
        self.assertEqual(website.logged_ip, "b")
        self.assertEqual(website.logged_useragent, "c")
        self.assertIsNone(db.get_website_by_id(999))
Beispiel #9
0
def api_add_website():
    token = request.args.get("token")
    url = request.args.get("url")

    name = db.check_api_token(token)
    if name:

        website_id = db.insert_website(Website(url, str(request.remote_addr + "_" +
                                                        request.headers.get("X-Forwarded-For", "")),
                                               "API_CLIENT_" + name))
        logger.info("API add website '" + url + "' by " + name + "(" + str(website_id) + ")")
        return str(website_id)
    else:
        return abort(403)
Beispiel #10
0
    def test_select_website(self):

        db = Database("test.sqlite3")

        website_id = db.insert_website(
            Website("https://simon987.net/", "127.0.0.1", "firefox"))

        website = db.get_website_by_url("https://simon987.net/")

        self.assertEqual(website.url, "https://simon987.net/")
        self.assertEqual(website.logged_ip, "127.0.0.1")
        self.assertEqual(website.logged_useragent, "firefox")
        self.assertEqual(website.id, website_id)
        self.assertIsNotNone(website.last_modified)

        self.assertIsNone(db.get_website_by_url("does not exist"))
Beispiel #11
0
    def test_insert_website(self):

        db = Database("test.sqlite3")
        website_id = db.insert_website(
            Website("https://google.ca", "127.0.0.1", "firefox"))

        conn = sqlite3.connect("test.sqlite3")
        cursor = conn.cursor()
        cursor.execute("SELECT * FROM Website WHERE id=?", (website_id, ))

        db_website = cursor.fetchone()

        self.assertEqual(db_website[0], 1)
        self.assertEqual(db_website[1], "https://google.ca")
        self.assertEqual(db_website[2], "127.0.0.1")
        self.assertEqual(db_website[3], "firefox")
        self.assertIsNotNone(db_website[4])
Beispiel #12
0
    def test_enqueue(self):

        db = Database("test.sqlite3")

        web_id = db.insert_website(
            Website("https://simon987.net", "127.0.0.1", "firefox"))

        db.enqueue(web_id)
        db.enqueue(web_id)

        with sqlite3.connect("test.sqlite3") as conn:
            cursor = conn.cursor()

            cursor.execute("SELECT * FROM Queue")
            db_queued_website = cursor.fetchone()

            self.assertEqual(db_queued_website[0], 1)
            self.assertEqual(db_queued_website[1], web_id)
            self.assertIsNone(cursor.fetchone())
Beispiel #13
0
    def test_insert_files(self):

        db = Database("test.sqlite3")
        website_id = db.insert_website(Website("", "", ""))
        db.insert_files(
            [File(website_id, "/some/dir/", "text/plain", "file.txt", 1234)])

        conn = sqlite3.connect("test.sqlite3")
        cursor = conn.cursor()

        cursor.execute("SELECT * FROM File WHERE id=?", (1, ))
        db_file = cursor.fetchone()

        cursor.execute("SELECT * FROM WebsitePath WHERE id=?", (db_file[1], ))
        db_path = cursor.fetchone()

        self.assertEqual(db_file[0], 1)
        self.assertEqual(db_file[1], db_path[0])
        self.assertEqual(db_file[3], "file.txt")
        self.assertEqual(db_file[4], 1234)
        self.assertEqual(db_path[1], website_id)
        self.assertEqual(db_path[2], "/some/dir/")
                if not od_util.is_od(url):
                    print("Skipping reddit comment: Not an OD")
                    print(url)
                    bot.reply(
                        comment, "Hello, " + str(comment.author) +
                        ". Unfortunately it seems that the link you "
                        "provided: `" + url +
                        "` does not point to an open directory. This could also"
                        " mean that the website is not responding (in which case, feel free to retry in "
                        "a few minutes). If you think that this is an error, please "
                        "[contact my programmer](https://old.reddit.com/message/compose?to=Hexahedr_n)"
                    )
                    continue

                web_id = db.insert_website(
                    Website(url, "localhost", "reddit_bot"))
                db.enqueue(web_id, reddit_comment_id=comment.id,
                           priority=2)  # Medium priority for reddit comments
                print("Queued comment post: " + str(web_id))

# Check posts
for submission in subreddit.new(limit=3):
    submissions.append(submission)

for s in submissions:

    if not s.is_self:
        if not bot.has_crawled(s.id):

            url = os.path.join(s.url, "")  # add trailing slash
            scanned = db.website_has_been_scanned(url)