Ejemplo n.º 1
0
 def __init__(self, language="en"):
     # TODO replace 'x' with class
     # to generate dynamic path for file to load
     if not language in self._cached_stop_words:
         path = "text/stopwords-%s.txt" % language
         try:
             self._cached_stop_words[language] = set(FileHelper.loadResourceFile(path).splitlines())
         except:
             self._cached_stop_words[language] = set(
                 FileHelper.loadResourceFile("text/stopwords-en.txt").splitlines()
             )
     self.STOP_WORDS = self._cached_stop_words[language]
Ejemplo n.º 2
0
 def contents(self):
     test, suite, module, cls, func = self.cls.id().split('.')
     path = os.path.join(os.path.dirname(CURRENT_PATH), "data", suite,
                         module, "%s.html" % func)
     path = os.path.abspath(path)
     content = FileHelper.loadResourceFile(path)
     yield self.cls.data['url'], content.encode('utf-8')
 def loadCustomSiteMapping(self):
     # TODO
     dataFile = FileHelper.loadResourceFile("images/known-image-css.txt")
     lines = dataFile.splitlines()
     for line in lines:
         domain, css = line.split('^')
         self.customSiteMapping.update({domain: css})
Ejemplo n.º 4
0
 def content(self, req):
     current_test = self.cls._get_current_testname()
     path = os.path.join(CURRENT_PATH, "data", "extractors",
                         "%s.html" % current_test)
     path = os.path.abspath(path)
     content = FileHelper.loadResourceFile(path)
     return content
Ejemplo n.º 5
0
 def html_content(self, req):
     current_test = self.cls._get_current_testname()
     path = os.path.join(os.path.dirname(CURRENT_PATH), "data",
                         "extractors", "images", current_test,
                         "%s.html" % current_test)
     path = os.path.abspath(path)
     return FileHelper.loadResourceFile(path)
 def loadCustomSiteMapping(self):
     # TODO
     dataFile = FileHelper.loadResourceFile("images/known-image-css.txt", "xx")
     lines = dataFile.splitlines()
     for line in lines:
         domain, css = line.split('^')
         self.customSiteMapping.update({domain:css})
Ejemplo n.º 7
0
 def html_content(self, req):
     current_test = self.cls._get_current_testname()
     path = os.path.join(
         os.path.dirname(CURRENT_PATH), "data", "extractors", "images", current_test, "%s.html" % current_test
     )
     path = os.path.abspath(path)
     return FileHelper.loadResourceFile(path)
Ejemplo n.º 8
0
 def __init__(self, language='zh'):
     # force zh languahe code
     language = 'zh'
     if not language in self._cached_stop_words:
         path = os.path.join('text', 'stopwords-%s.txt' % language)
         self._cached_stop_words[language] = set(FileHelper.loadResourceFile(path).splitlines())
     self.STOP_WORDS = self._cached_stop_words[language]
Ejemplo n.º 9
0
 def __init__(self, language='en'):
     # TODO replace 'x' with class
     # to generate dynamic path for file to load
     if not language in self._cached_stop_words:
         path = os.path.join('text', 'stopwords-%s.txt' % language)
         self._cached_stop_words[language] = set(FileHelper.loadResourceFile(path).splitlines())
     self.STOP_WORDS = self._cached_stop_words[language]
Ejemplo n.º 10
0
 def __init__(self, language="zh"):
     # force zh languahe code
     language = "zh"
     if not language in self._cached_stop_words:
         path = "text/stopwords-%s.txt" % language
         self._cached_stop_words[language] = set(FileHelper.loadResourceFile(path).splitlines())
     self.STOP_WORDS = self._cached_stop_words[language]
Ejemplo n.º 11
0
 def content(self, req):
     current_test = self.cls._get_current_testname()
     path = os.path.join(CURRENT_PATH, "data", "videos",
                         "%s.html" % current_test)
     path = os.path.abspath(path)
     content = FileHelper.loadResourceFile(path)
     return content
Ejemplo n.º 12
0
 def getRawHtml(self):
     test, suite, module, cls, func = self.id().split('.')
     path = os.path.join(os.path.dirname(CURRENT_PATH), "data", suite,
                         module, "%s.html" % func)
     path = os.path.abspath(path)
     content = FileHelper.loadResourceFile(path)
     return content
Ejemplo n.º 13
0
 def load_test_file(self, suffix):
     suite, module, cls, func = self.id().split('.')
     path = os.path.join(CURRENT_PATH, "data", module, "%s%s" % (func, suffix))
     path = os.path.abspath(path)
     try:
         return FileHelper.loadResourceFile(path)
     except IOError:
         print "No File"
Ejemplo n.º 14
0
 def content(self, req):
     current_test = self.cls._get_current_testname()
     path = os.path.join(CURRENT_PATH, "data", "extractors", "%s.html" % current_test)
     path = os.path.abspath(path)
     content = FileHelper.loadResourceFile(path)
     if content is None:
         raise Exception ("Test could not be found")
     return content
Ejemplo n.º 15
0
 def load_customesite_mapping(self):
     # TODO
     path = os.path.join('images', 'known-image-css.txt')
     data_file = FileHelper.loadResourceFile(path)
     lines = data_file.splitlines()
     for line in lines:
         domain, css = line.split('^')
         self.custom_site_mapping.update({domain: css})
Ejemplo n.º 16
0
 def load_customesite_mapping(self):
     # TODO
     path = os.path.join('images', 'known-image-css.txt')
     data_file = FileHelper.loadResourceFile(path)
     lines = data_file.splitlines()
     for line in lines:
         domain, css = line.split('^')
         self.custom_site_mapping.update({domain: css})
Ejemplo n.º 17
0
    def loadData(self):
        """\

        """
        suite, module, cls, func = self.id().split('.')
        path = os.path.join(CURRENT_PATH, "data", module, "%s.json" % func)
        path = os.path.abspath(path)
        content = FileHelper.loadResourceFile(path)
        self.data = json.loads(content)
Ejemplo n.º 18
0
    def loadData(self):
        """\

        """
        suite, module, cls, func = self.id().split('.')
        path = os.path.join(CURRENT_PATH, "data", module, "%s.json" % func)
        path = os.path.abspath(path)
        content = FileHelper.loadResourceFile(path)
        self.data = json.loads(content)
Ejemplo n.º 19
0
    def loadData(self):
        """\

        """
        test, suite, module, cls, func = self.id().split(".")
        path = os.path.join(os.path.dirname(CURRENT_PATH), "data", suite, module, func, "%s.json" % func)

        path = os.path.abspath(path)
        content = FileHelper.loadResourceFile(path)
        self.data = json.loads(content)
Ejemplo n.º 20
0
 def getRawHtml(self):
     test, suite, module, cls, func = self.id().split('.')
     path = os.path.join(
             os.path.dirname(CURRENT_PATH),
             "data",
             suite,
             module,
             "%s.html" % func)
     path = os.path.abspath(path)
     content = FileHelper.loadResourceFile(path)
     return content
Ejemplo n.º 21
0
 def __init__(self, language='en'):
     # TODO replace 'x' with class
     # to generate dynamic path for file to load
     if not language in self._cached_stop_words:
         path = os.path.join('text', 'stopwords-%s.txt' % language)
         try:
             content = FileHelper.loadResourceFile(path)
             word_list = content.splitlines()
         except IOError:
             word_list = []
         self._cached_stop_words[language] = set(word_list)
     self.STOP_WORDS = self._cached_stop_words[language]
Ejemplo n.º 22
0
 def __init__(self, language='en'):
     # TODO replace 'x' with class
     # to generate dynamic path for file to load
     if not language in self._cached_stop_words:
         path = os.path.join('text', 'stopwords-%s.txt' % language)
         try:
             content = FileHelper.loadResourceFile(path)
             word_list = content.splitlines()
         except IOError:
             word_list = []
         self._cached_stop_words[language] = set(word_list)
     self.STOP_WORDS = self._cached_stop_words[language]
Ejemplo n.º 23
0
 def __init__(self, language='en'):
     # TODO replace 'x' with class
     # to generate dynamic path for file to load
     if isinstance(language,str): language = [language]
     language = set(language)
     self.char_split = False
     if 'zh' in language or 'ko' in language or 'ja' in language: self.char_split = True
     self.STOP_WORDS = None
     for l in language:
         if not l in StopWords._cached_stop_words:
             path = 'text/stopwords-%s.txt' % l
             try:
                 _stop_list = FileHelper.loadResourceFile(path)
                 if l in ['zh','ko','ja']: _stop_list = _stop_list.decode('utf-8')
                 StopWords._cached_stop_words[l] = set(_stop_list.splitlines())
             except:
                 StopWords._cached_stop_words[l] = set()
         if self.STOP_WORDS is None: self.STOP_WORDS = StopWords._cached_stop_words[l]
         else: self.STOP_WORDS |= StopWords._cached_stop_words[l]
Ejemplo n.º 24
0
 def getHtml(self, filename):
     return FileHelper.loadResourceFile(filename)
Ejemplo n.º 25
0
 def get_html(self, filename):
     path = os.path.join(CURRENT_PATH, 'data', filename)
     return FileHelper.loadResourceFile(path)
Ejemplo n.º 26
0
 def get_html(self, filename):
     path = os.path.join(CURRENT_PATH, "data", filename)
     path = os.path.abspath(path)
     return FileHelper.loadResourceFile(path)
Ejemplo n.º 27
0
 def get_html(self, filename):
     path = os.path.join(CURRENT_PATH, 'data', filename)
     path = os.path.abspath(path)
     print "CURRENT_PATH %s" % path
     return FileHelper.loadResourceFile(path)
Ejemplo n.º 28
0
 def getHtml(self, filename):
     return FileHelper.loadResourceFile(filename, 'x')
Ejemplo n.º 29
0
 def __init__(self, language='en'):
     self.PUNCTUATION = re.compile("[^\\p{Ll}\\p{Lu}\\p{Lt}\\p{Lo}\\p{Nd}\\p{Pc}\\s]")
     # TODO replace 'x' with class
     # to generate dynamic path for file to load
     path = 'text/stopwords-%s.txt' % language
     self.STOP_WORDS = set(FileHelper.loadResourceFile(path, 'x').splitlines())