Beispiel #1
0
def find_xdcc_eu_packs(search_phrase: str) -> List[XDCCPack]:
    """
    Method that conducts the xdcc pack search for xdcc.eu

    :return: the search results as a list of XDCCPack objects
    """
    url = "https://www.xdcc.eu/search.php?searchkey=" + search_phrase
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    entries = soup.select("tr")
    entries.pop(0)

    packs = []
    for entry in entries:
        parts = entry.select("td")
        info = parts[1].select("a")[1]
        server = IrcServer(info["data-s"])
        pack_message = info["data-p"]
        bot, pack_number = pack_message.split(" xdcc send #")

        size = byte_string_to_byte_count(parts[5].text)
        filename = parts[6].text

        pack = XDCCPack(server, bot, int(pack_number))
        pack.set_size(size)
        pack.set_filename(filename)

        packs.append(pack)

    return packs
Beispiel #2
0
def find_horriblesubs_packs(search_phrase: str) -> List[XDCCPack]:
    """
    Method that conducts the xdcc pack search for xdcc.horriblesubs.info

    :return: the search results as a list of XDCCPack objects
    """
    if not search_phrase:
        return []

    search_query = search_phrase.replace(" ", "%20")
    search_query = search_query.replace("!", "%21")

    url = "http://xdcc.horriblesubs.info/search.php?t=" + search_query
    scraper = cfscrape.create_scraper()
    results = scraper.get(url).text.split(";")

    packs = []
    for result in results:

        try:
            botname = get_attribute(result, "b")
            filename = get_attribute(result, "f")
            filesize = get_attribute(result, "s")
            packnumber = get_attribute(result, "n")
            pack = XDCCPack(IrcServer("irc.rizon.net"), botname, packnumber)
            pack.set_filename(filename)
            pack.set_size(filesize)
            packs.append(pack)

        except IndexError:  # In case the line is not parseable
            pass
    return packs
Beispiel #3
0
def find_subsplease_packs(search_phrase: str) -> List[XDCCPack]:
    """
    Method that conducts the xdcc pack search for subsplease.org

    :return: the search results as a list of XDCCPack objects
    """
    if not search_phrase:
        return []

    search_query = search_phrase.replace(" ", "%20")
    search_query = search_query.replace("!", "%21")

    url = "https://subsplease.org/xdcc/search.php?t=" + search_query
    scraper = cfscrape.create_scraper()
    results = scraper.get(url).text.split(";")

    packs = []
    for result in results:

        try:
            result = parse_result(result)
            botname = result["b"]
            filename = result["f"]
            filesize = int(result["s"])
            packnumber = int(result["n"])
            pack = XDCCPack(IrcServer("irc.rizon.net"), botname, packnumber)
            pack.set_filename(filename)
            pack.set_size(filesize * 1000 * 1000)
            packs.append(pack)

        except IndexError:  # In case the line is not parseable
            pass

    return packs
Beispiel #4
0
def find_nibl_packs(search_phrase: str) -> List[XDCCPack]:
    """
    Searches for XDCC Packs matching the specified search string on nibl.co.uk

    :param search_phrase: The search phrase to search for
    :return:              The list of found XDCC Packs
    """

    # Prepare the search term, nibl.co.uk uses + symbols as spaces.
    split_search_term = search_phrase.split(" ")
    prepared_search_term = split_search_term[0]
    i = 1
    while i < len(split_search_term):
        prepared_search_term += "+" + split_search_term[i]
        i += 1

    # Get the data from the website

    url = "https://nibl.co.uk/bots.php?search=" + prepared_search_term
    html = requests.get(url).text

    print(html)

    content = BeautifulSoup(html, "html.parser")
    file_names = content.select(".filename")
    pack_numbers = content.select(".packnumber")
    bot_names = content.select(".name")
    file_sizes = content.select(".filesize")

    results = []
    i = 0  # We need a counter variable since we have four lists of data

    while i < len(file_names):

        # The filename has two links after it, which need to be cut out
        filename = file_names[i].text.rsplit(" \n", 1)[0]

        # The bot name has a link after it, which needs to be cut out
        print(bot_names)
        bot = bot_names[i].text.rsplit(" ", 1)[0]

        server = "irc.rizon.net"
        packnumber = int(pack_numbers[i].text)
        size = file_sizes[i].text

        result = XDCCPack(IrcServer(server), bot, packnumber)
        result.set_size(size)
        result.set_filename(filename)
        results.append(result)
        i += 1

    return results
Beispiel #5
0
def find_ixirc_packs(search_phrase: str) -> List[XDCCPack]:
    """
    Searches for XDCC Packs matching the specified search string on ixirc.com
    Implementation courtesy of Jean Wicht <*****@*****.**>.

    :param search_phrase: The search phrase to search for
    :return:              The list of found XDCC Packs
    """

    if not search_phrase:
        return []

    packs: List[XDCCPack] = []
    page_id = 0
    # the number of pages of results will be set properly in the request below
    page_count = 42
    while page_id < page_count:
        request = requests.get(
            "https://ixirc.com/api/",
            params={
                "q": search_phrase,
                "pn": str(page_id)
            },
        )

        if request.status_code != 200:
            return packs

        data = request.json()
        page_count = int(data["pc"])

        if "results" not in data:
            # no results
            return []

        for result in data["results"]:
            if "uname" not in result:
                # bot not online
                continue

            server = IrcServer(result["naddr"], result["nport"])
            pack = XDCCPack(server, result["uname"], int(result["n"]))
            pack.set_filename(result["name"])
            pack.set_size(result["sz"])
            packs.append(pack)

        page_id += 1  # next page

    return packs
Beispiel #6
0
def get_page_results(page_content: BeautifulSoup) -> List[XDCCPack]:
    """
    This parses a single ixIRC page to find all search results from that page

    :param page_content: The Beautifulsoup-parsed content of the page
    :return:             A list of XDCC Packs on that page
    """
    results = []
    packs = page_content.select("td")

    # Initialize the pack variables
    file_name = ""
    bot = ""
    server = ""
    pack_number = 0
    size = ""

    # Keeps track of which column the parser is currently working on
    column_count = 0

    # Counts how often the word "ago" was used,
    # which is used to keep track of on which
    # pack we currently are. Each pack has two instances of 'ago' occurring.
    ago_count = 0

    # The process is aborted whenever an invalid pack is encountered
    aborted = False

    # Flag that lets other parts of the loop know that
    # we are moving on to the next pack
    next_element = False

    # line_part is a x,y section of the rows and columns of the website.
    # We go through it in the order Left->Right, Top->Bottom
    for line_part in packs:

        if next_element and line_part.text == "":
            # Jumps to the next not-empty element
            # if we are currently jumping to the next pack
            continue

        elif next_element and not line_part.text == "":
            # We reached a new pack, start parsing the new pack
            next_element = False

        elif not next_element and line_part.text == "":
            # Invalid pack element if a string == "" in the middle of the pack,
            # abort the pack and jump to next element
            aborted = True

        elif "ago" in line_part.text and column_count > 6:
            # Counts the number of times 'ago' is seen by the parser.
            # The last two elements of a pack both end
            # with 'ago', which makes it ideal to use as a marker
            # for when a single pack element ends.
            # This only starts counting once we got all relevant information
            # from the pack itself (column_count > 6)
            # to avoid conflicts when the substring 'ago'
            # is contained inside the file name
            ago_count += 1

        # This gets the information from the pack and stores
        # them into variables.
        # This gets skipped if it has been established that the pack is invalid
        if not aborted:
            if column_count == 0:
                # File Name
                file_name = line_part.text
            elif column_count == 1:
                # Server Address
                server = "irc." + line_part.text.lower() + ".net"
            elif column_count == 2:
                # Channel Information, not needed due to /whois IRC queries
                pass
            elif column_count == 3:
                # Bot Name
                bot = line_part.text
            elif column_count == 4:
                # Pack Number
                pack_number = int(line_part.text)
            elif column_count == 5:
                pass  # This is the 'gets' section, we don't need that
            elif column_count == 6:
                size = line_part.text.replace("\xa0", " ").replace(" ", "")

        # Resets state after a pack was successfully parsed,
        # and adds xdcc pack to results
        if not aborted and ago_count == 2:
            ago_count = 0
            column_count = 0
            next_element = True

            # Generate XDCCPack and append it to the list
            result = XDCCPack(IrcServer(server), bot, pack_number)
            result.set_filename(file_name)
            result.set_size(byte_string_to_byte_count(size))
            results.append(result)

        # Resets state after invalid pack
        elif aborted and ago_count == 2:
            aborted = False
            ago_count = 0
            column_count = 0
            next_element = True

        if not next_element:
            # Only increment column_count in the middle of a pack,
            # not when we jump to the next pack element
            column_count += 1

    return results
Beispiel #7
0
class UnitTests(unittest.TestCase):
    def setUp(self):
        self.pack = XDCCPack(IrcServer("irc.namibsun.net"), "xdcc_servbot", 1)

    def tearDown(self):
        pass

    def test_getters(self):

        self.assertEqual(self.pack.get_packnumber(), 1)
        self.assertEqual(self.pack.get_bot(), "xdcc_servbot")
        self.assertEqual(self.pack.get_filename(), "")
        self.assertEqual(self.pack.get_size(), 0)
        self.assertEqual(self.pack.get_server().get_address(),
                         "irc.namibsun.net")
        self.assertEqual(self.pack.get_filepath(), os.getcwd() + os.sep)
        self.assertEqual(self.pack.get_request_message(), "xdcc send #1")

    def test_setting_filename(self):

        self.pack.set_filename("test")
        self.assertEqual(self.pack.get_filename(), "test")
        self.assertEqual(self.pack.get_filepath(),
                         os.path.join(os.getcwd(), "test"))

        self.pack.set_filename("something")
        self.assertEqual(self.pack.get_filename(), "test")
        self.assertEqual(self.pack.get_filepath(),
                         os.path.join(os.getcwd(), "test"))

        self.pack.set_filename("something", override=True)
        self.assertEqual(self.pack.get_filename(), "something")
        self.assertEqual(self.pack.get_filepath(),
                         os.path.join(os.getcwd(), "something"))

        self.pack.set_filename("something_else.txt")
        self.assertEqual(self.pack.get_filename(), "something.txt")
        self.assertEqual(self.pack.get_filepath(),
                         os.path.join(os.getcwd(), "something.txt"))

        self.pack.set_filename("something_else.txt")
        self.assertEqual(self.pack.get_filename(), "something.txt")
        self.assertEqual(self.pack.get_filepath(),
                         os.path.join(os.getcwd(), "something.txt"))

        self.pack.set_filename("something_else.mkv")
        self.assertEqual(self.pack.get_filename(), "something.txt.mkv")
        self.assertEqual(self.pack.get_filepath(),
                         os.path.join(os.getcwd(), "something.txt.mkv"))

        self.pack.set_directory(os.path.join(os.getcwd(), "test"))
        self.assertEqual(self.pack.get_filename(), "something.txt.mkv")
        self.assertEqual(
            self.pack.get_filepath(),
            os.path.join(os.getcwd(), "test", "something.txt.mkv"))

    def test_original_filename_check(self):

        self.assertTrue(self.pack.is_filename_valid("the_original"))
        self.assertTrue(self.pack.is_filename_valid("not_the_original"))
        self.pack.set_original_filename("the_original")
        self.assertTrue(self.pack.is_filename_valid("the_original"))
        self.assertFalse(self.pack.is_filename_valid("not_the_original"))

    def test_request_message(self):
        self.assertEqual(self.pack.get_request_message(), "xdcc send #1")
        self.assertEqual(self.pack.get_request_message(full=True),
                         "/msg xdcc_servbot xdcc send #1")

    def test_generating_from_xdcc_message_single(self):

        packs = xdcc_packs_from_xdcc_message("/msg xdcc_servbot xdcc send #1",
                                             "testdir", "irc.namibsun.net")
        self.assertEqual(len(packs), 1)
        pack = packs[0]

        self.assertEqual(pack.get_packnumber(), 1)
        self.assertEqual(pack.get_bot(), "xdcc_servbot")
        self.assertEqual(pack.get_server().get_address(), "irc.namibsun.net")
        self.assertEqual(pack.get_filepath(), "testdir" + os.sep)
        self.assertTrue(
            pack.get_request_message() in "/msg xdcc_servbot xdcc send #1")

    def test_generating_from_xdcc_message_range(self):

        packs = xdcc_packs_from_xdcc_message(
            "/msg xdcc_servbot xdcc send #1-100")
        self.assertEqual(len(packs), 100)

        for i, pack in enumerate(packs):
            self.assertEqual(pack.get_packnumber(), i + 1)
            self.assertEqual(pack.get_server().get_address(), "irc.rizon.net")
            self.assertEqual(pack.get_filepath(), os.getcwd() + os.sep)

    def test_generating_from_xdcc_message_range_with_jumps(self):

        packs = xdcc_packs_from_xdcc_message(
            "/msg xdcc_servbot xdcc send #1-100,2")
        self.assertEqual(len(packs), 50)

        i = 1
        for pack in packs:
            self.assertEqual(pack.get_packnumber(), i)
            i += 2

    def test_generating_invalid_xdcc_message(self):

        packs = xdcc_packs_from_xdcc_message("randomnonesense")
        self.assertEqual(len(packs), 0)