Beispiel #1
0
	def test_link_extractor(self):
		self.assertEqual(utils.clean_links('http://www.catb.org/jargon/html/S/schroedinbug.html\n'), ['http://www.catb.org/jargon/html/S/schroedinbug.html'])
		self.assertEqual(utils.clean_links('catb.org/jargon/html/S/schroedinbug.html'), [])
		self.assertEqual(utils.clean_links('http:// catb.org/jargon/html/S/schroedinbug.html'), [])
		self.assertEqual(utils.clean_links('http://www.catb.org/jargon/html/S/schroedinbug.html\nhttp://www.catb.org'), ['http://www.catb.org/jargon/html/S/schroedinbug.html', 'http://www.catb.org'])
		self.assertEqual(utils.clean_links('http://www.catb.org/jargon/html/S/schroedinbug.html http://www.catb.org'), ['http://www.catb.org/jargon/html/S/schroedinbug.html', 'http://www.catb.org'])
		self.assertEqual(utils.clean_links('http://www.catb.org/jargon/html/S/schroedinbug.html-http://www.catb.org'), ['http://www.catb.org/jargon/html/S/schroedinbug.html-http://www.catb.org'])
		self.assertEqual(utils.clean_links('https://www.catb.org/jargon/html/S/schroedinbug.html\nhttps://www.catb.org'), ['https://www.catb.org/jargon/html/S/schroedinbug.html', 'https://www.catb.org'])
		self.assertEqual(utils.clean_links('ftp://www.catb.org/jargon/html/S/schroedinbug.html\nftp://www.catb.org'), ['ftp://www.catb.org/jargon/html/S/schroedinbug.html', 'ftp://www.catb.org'])
Beispiel #2
0
 def test_link_extractor(self):
     self.assertEqual(
         utils.clean_links(
             'http://www.catb.org/jargon/html/S/schroedinbug.html\n'),
         ['http://www.catb.org/jargon/html/S/schroedinbug.html'])
     self.assertEqual(
         utils.clean_links('catb.org/jargon/html/S/schroedinbug.html'), [])
     self.assertEqual(
         utils.clean_links(
             'http:// catb.org/jargon/html/S/schroedinbug.html'), [])
     self.assertEqual(
         utils.clean_links(
             'http://www.catb.org/jargon/html/S/schroedinbug.html\nhttp://www.catb.org'
         ), [
             'http://www.catb.org/jargon/html/S/schroedinbug.html',
             'http://www.catb.org'
         ])
     self.assertEqual(
         utils.clean_links(
             'http://www.catb.org/jargon/html/S/schroedinbug.html http://www.catb.org'
         ), [
             'http://www.catb.org/jargon/html/S/schroedinbug.html',
             'http://www.catb.org'
         ])
     self.assertEqual(
         utils.clean_links(
             'http://www.catb.org/jargon/html/S/schroedinbug.html-http://www.catb.org'
         ), [
             'http://www.catb.org/jargon/html/S/schroedinbug.html-http://www.catb.org'
         ])
     self.assertEqual(
         utils.clean_links(
             'https://www.catb.org/jargon/html/S/schroedinbug.html\nhttps://www.catb.org'
         ), [
             'https://www.catb.org/jargon/html/S/schroedinbug.html',
             'https://www.catb.org'
         ])
     self.assertEqual(
         utils.clean_links(
             'ftp://www.catb.org/jargon/html/S/schroedinbug.html\nftp://www.catb.org'
         ), [
             'ftp://www.catb.org/jargon/html/S/schroedinbug.html',
             'ftp://www.catb.org'
         ])
Beispiel #3
0
	def add_links(self, inp):
		"""Returns true (evaluating expression) on nonemtpy and false on empty input
		"""
		if len(inp) == 0:
			return False

		parsed_input = utils.clean_links(inp)
		if self.type == "links":
			for link in parsed_input:
				self.links.append(link)
		else: # == dlc
			for link in parsed_input:
				dlc_links = dlc_to_links(link)
				if dlc_links != None:
					self.links.extend(dlc_links)

		return len(parsed_input)
Beispiel #4
0
    def add_links(self, inp):
        """Returns true (evaluating expression) on nonemtpy and false on empty input
		"""
        if len(inp) == 0:
            return False

        parsed_input = utils.clean_links(inp)
        if self.type == "links":
            for link in parsed_input:
                self.links.append(link)
        else:  # == dlc
            for link in parsed_input:
                dlc_links = dlc_to_links(link)
                if dlc_links != None:
                    self.links.extend(dlc_links)

        return len(parsed_input)