Beispiel #1
0
    def _is_categorie_folder(self, path):

        for fname in sorted(os.listdir(path)):
            fpath = os.path.join(path, fname)
            if os.path.isdir(fpath) and not variables.noloaddir(fpath):
                return False
        return True
Beispiel #2
0
    def add_folder_structure(self, fpath, offset, parent):
        listf = []
        for name in os.listdir(fpath):
            if variables.noloaddir(name) :
                continue
            path = os.path.join(fpath, name)
            if os.path.isdir(path):
                
                label = self.get_categorie_label(name, offset)
                if parent != None and label !=None:
                    label = parent + ";" + name
                
                                                 
                self.folder_structure[name] = [label, offset]
                self.folder_order.append(name)

                ll = self.add_folder_structure(path, (offset+1), label)
                if len(ll) > 0 :
                    self.folder_structure[name].append(ll)
                    
            elif name.endswith('.txt') :
                listf.append([name, self.file_index])
                self.file_index += 1
        
        
        return listf
Beispiel #3
0
 def _create_folder_clauses(self, fpath):
     for name in os.listdir(fpath):
         path = os.path.join(fpath, name)
         if os.path.isdir(path):
             if variables.noloaddir(name):
                 continue
             self._create_folder_clauses(path)
         elif name.endswith(".txt"):
             self._create_clauses(path)
Beispiel #4
0
 def create_ner(self, fpath):
     # first level only for directory
     for name in sorted(os.listdir(fpath)):
         if variables.noloaddir(name):
             continue
         path = os.path.join(fpath, name)
         if os.path.isdir(path):
             print("training => " + name)
             self.create_ner_in_directory(path)
         elif path.endswith('.txt'):
             self.file_named_tag(path)
Beispiel #5
0
    def create_subset_model(self, path, categorie):

        if self._is_categorie_folder(path):
            self.create_categorie_tagging(path, categorie)
        else:
            for fname in sorted(os.listdir(path)):
                fpath = os.path.join(path, fname)
                if os.path.isdir(fpath):
                    if variables.noloaddir(fpath):
                        continue
                    else:
                        self.create_categorie_tagging(fpath, fname)
Beispiel #6
0
 def update_corpus_folder(self, path, outfile, name) :
     
     if not variables.noloaddir(path) : 
         if not os.path.exists(outfile) :
             os.mkdir(outfile)
         for fname in sorted(os.listdir(path)):
             ipath = os.path.join(path, fname)
             ofile = os.path.join(outfile, fname);
             if os.path.isdir(ipath):
                 self.update_corpus_folder(ipath, ofile, fname)
             else :
                 self.update_corpus_text_file(ipath, ofile, fname)
Beispiel #7
0
 def load_corpus_directories(self, path, name, level, encoding='utf-8') :
 
     for fname in sorted(os.listdir(path)):
         fpath = os.path.join(path, fname)
         if os.path.isdir(fpath):
             if variables.noloaddir(fname) :
                 pass
             else :
                 self.load_corpus_directories(fpath, fname, (level+1))
         
         else :
             self.load_corpus_text_file(path, fname, name, level)
Beispiel #8
0
    def create_ner_in_directory(self, path):

        for fname in sorted(os.listdir(path)):
            fpath = os.path.join(path, fname)
            if variables.noloaddir(fname):
                continue
            if os.path.isdir(fpath):
                print("training => " + fname)
                self.create_ner_in_directory(fpath)
            elif fname.endswith('.txt'):
                self.file_named_tag(fpath)
                pass
Beispiel #9
0
    def folder_tagging(self, path, categorie, output_data):

        self.tagging.set_categorie(categorie)

        for fname in sorted(os.listdir(path)):
            fpath = os.path.join(path, fname)

            if os.path.isdir(fpath):
                if variables.noloaddir(fpath):
                    continue
                self.folder_tagging(fpath, categorie, output_data)
            else:
                self.tagging.tagging(fpath, output_data)
Beispiel #10
0
    def create_crf_model(self, path=None, subset=True):
        if path == None:
            path = config.TEMPLATE_DIR + "/TEXT"
        for fname in sorted(os.listdir(path)):
            fpath = os.path.join(path, fname)
            if variables.noloaddir(fpath):
                continue

            if os.path.isdir(fpath):
                if subset:
                    self.create_subset_model(fpath, fname)
                else:
                    self.create_categorie_tagging(fpath, fname)
Beispiel #11
0
 def create_corpus_documents(self, text_path) :
         
     self.folder_structure = self.load_folder_structure(text_path);
     
     for name in sorted(os.listdir(text_path)):
         path = os.path.join(text_path, name)
         if os.path.isdir(path) :
             if variables.noloaddir(name):
                 pass
             else :
                 self.load_corpus_directories(path, name, 1) 
         else :
             self.load_corpus_text_file(text_path, name, name, 1)
Beispiel #12
0
    def load_directory(self, path):

        for name in os.listdir(path):
            fpath = os.path.join(path, name)
            if os.path.isdir(fpath):
                if variables.noloaddir(name):
                    continue
                else:
                    self.load_directory(fpath)
            else:
                if name.endswith(".txt"):
                    #self.sentences.append(self.load_file(fpath))
                    self.load_file(fpath)
Beispiel #13
0
    def load_directory(self, path) :

        for fname in sorted(os.listdir(path)):
            fpath = os.path.join(path, fname)
            if os.path.isdir(fpath):
                if variables.noloaddir(fname) :
                    continue
                self.load_directory(fpath)
    
            elif fname.endswith(".txt"):
                self.load_file(fpath)
            else :
                #is not text file
                pass
Beispiel #14
0
    def files_to_text(self, fpath, ofile):
        """
        fpath : input file or directory
        
        ofile : output text file or directory
        
        """

        if not os.path.exists(ofile):
            os.mkdir(ofile)

        for name in os.listdir(fpath):
            if variables.noloaddir(name):
                continue
            path = os.path.join(fpath, name)
            o_file = os.path.join(ofile, name)
            if os.path.isdir(path):
                self.files_to_text(path, o_file)
            else:
                self._total += 1
                if path.lower().endswith(".txt") == False:
                    o_file += ".txt"

                self.file_to_text(path, o_file)