def test_named_address(self):
        addr = 'localhost:8000'

        option = config_options.IpAddress()
        value = option.validate(addr)
        self.assertEqual(utils.text_type(value), addr)
        self.assertEqual(value.host, 'localhost')
        self.assertEqual(value.port, 8000)
    def test_default_address(self):
        addr = '127.0.0.1:8000'

        option = config_options.IpAddress(default=addr)
        value = option.validate(None)
        self.assertEqual(utils.text_type(value), addr)
        self.assertEqual(value.host, '127.0.0.1')
        self.assertEqual(value.port, 8000)
    def test_valid_IPv6_address(self):
        addr = '[::1]:8000'

        option = config_options.IpAddress()
        value = option.validate(addr)
        self.assertEqual(utils.text_type(value), addr)
        self.assertEqual(value.host, '[::1]')
        self.assertEqual(value.port, 8000)
    def test_named_address(self):
        addr = 'localhost:8000'

        option = config_options.IpAddress()
        value = option.validate(addr)
        self.assertEqual(utils.text_type(value), addr)
        self.assertEqual(value.host, 'localhost')
        self.assertEqual(value.port, 8000)
    def test_default_address(self):
        addr = '127.0.0.1:8000'

        option = config_options.IpAddress(default=addr)
        value = option.validate(None)
        self.assertEqual(utils.text_type(value), addr)
        self.assertEqual(value.host, '127.0.0.1')
        self.assertEqual(value.port, 8000)
    def test_valid_IPv6_address(self):
        addr = '[::1]:8000'

        option = config_options.IpAddress()
        value = option.validate(addr)
        self.assertEqual(utils.text_type(value), addr)
        self.assertEqual(value.host, '[::1]')
        self.assertEqual(value.port, 8000)
Example #7
0
 def _add_entry(self, title, text, loc):
     """
     A simple wrapper to add an entry and ensure the contents
     is UTF8 encoded.
     """
     self._entries.append({
         'title': title,
         'text': utils.text_type(text.strip().encode('utf-8'), encoding='utf-8'),
         'location': loc
     })
Example #8
0
 def _add_entry(self, title, text, loc):
     """
     A simple wrapper to add an entry and ensure the contents
     is UTF8 encoded.
     """
     self._entries.append({
         'title': title,
         'text': utils.text_type(text.strip().encode('utf-8'), encoding='utf-8'),
         'location': loc
     })
Example #9
0
    def _add_entry(self, title, text, loc):
        """
        A simple wrapper to add an entry and ensure the contents
        is UTF8 encoded.
        """
        text = text.replace('\u00a0', ' ')
        text = re.sub(r'[ \t\n\r\f\v]+', ' ', text.strip())

        self._entries.append({
            'title': title,
            'text': utils.text_type(text.encode('utf-8'), encoding='utf-8'),
            'location': loc
        })
Example #10
0
    def _add_entry(self, title, text, loc):
        """
        A simple wrapper to add an entry and ensure the contents
        is UTF8 encoded.
        """
        text = text.replace('\u00a0', ' ')
        text = re.sub(r'[ \t\n\r\f\v]+', ' ', text.strip())

        self._entries.append({
            'title':
            title,
            'text':
            utils.text_type(text.encode('utf-8'), encoding='utf-8'),
            'location':
            loc
        })
Example #11
0
    def _add_entry(self, title, text, loc):
        """
        A simple wrapper to add an entry and ensure the contents
        is UTF8 encoded.
        """
        text = text.replace('\u00a0', ' ')
        text = re.sub(r'[ \t\n\r\f\v]+', ' ', text.strip())

        # self._entries.append({
        #     'title': title,
        #     'text': utils.text_type(text.encode('utf-8'), encoding='utf-8'),
        #     'location': loc
        # })
        text = utils.text_type(text.encode('utf-8'), encoding='utf-8')
        self._entries.append(
            dict(title=" ".join([
                token.strip()
                for token in jieba.cut(title.replace('\n', ''), True)
            ]),
                 text=" ".join([
                     token.strip()
                     for token in jieba.cut(text.replace('\n', ''), True)
                 ]),
                 location=loc))
Example #12
0
            found[ext].append(location)
            file = io.open(os.path.join(dirpath, name), 'r', encoding='utf8')
            logbody = file.read()

            #parse the html
            soup = BeautifulSoup(logbody,"lxml")

            #get title of the page
            title = soup.title

            page_details = soup.find_all(["p", "pre", "h1", "h2" , "h3", "h4"])
            for detail in page_details:
                text = detail.get_text()
                text = text.replace('\u00a0', ' ')
                text = re.sub(r'[ \t\n\r\f\v]+', ' ', text.strip())
                text = utils.text_type(text.encode('utf-8'), encoding='utf-8')
                if (text and str(os.path.relpath(location))):
                    hashVal = abs(hash((str(os.path.relpath(location)), text, str(title.get_text()))))
                    if hashVal not in addedHashes:
                        currentPage = {
                            'location' : str(os.path.relpath(location)),
                            'text' : text,
                            'title' : str(title.get_text()),
                            'index' : hashVal
                        }
                        addedHashes.append(hashVal)
                        searchData['docs'].append(currentPage)

# Write results to the json file
with open(outputjson, 'w') as logfile:
    json.dump(searchData, logfile)