Exemple #1
0
    def __init__(self, fpath, lang, regex, any_ext=False):
        """

        :param fpath:
        :param lang:
        :param regex:
        :param any_ext:
        """
        # Check lang input
        if lang not in LANGUAGE_EXTENSIONS:
            raise ValueError('Unrecognized input language "{:s}"'.format(lang))

        # Check file extension
        if not any_ext:
            ext = getext(fpath)
            if ext not in LANGUAGE_EXTENSIONS[lang]:
                raise FileExistsError('File extension is not supported')

        # Check file content
        self.content = readfile(fpath)
        if self.content.strip() == '':
            raise FileExistsError('File is empty')

        # Define self properties
        self.lang = lang.lower()
        self.regex = regex

        # Parse file
        self.nodes = OrderedDict()
        self.founds = find_matches(self.content, self.regex['main'])
        if self.founds:
            self.parse()
Exemple #2
0
def parse(fpath):
    """
    Routes what parser to use depending on the file extension of a file

    :param fpath: file path of file
    :return: none
    """
    # Check existence
    if not isfile(fpath):
        raise FileNotFoundError('File is not existed')

    # Get file extension
    ext = getext(fpath)

    # Get module
    parser = None
    for lang, exts in LANGUAGE_EXTENSIONS.items():
        if ext in exts:
            parser = getattr(sys.modules[__name__], lang)
            break

    # If extension is not found
    if parser is None:
        raise FileExistsError('File extension is not supported')

    # Parse and return
    return parser(fpath)
Exemple #3
0
 def __parse(self):
     assert isxml(self.xml_file)
     with open(self.xml_file, 'rb') as fp:
         tree = ElementTree.parse(fp)
     pages = tree.findall('page')
     for page in pages:
         self.pages.append(
             TXTSEP.join(getext(text).strip() for text in page.findall('text')).strip()
         )
     rmfile(self.xml_file)
Exemple #4
0
 def update_gui(self):
     """
     Update label text,  with info about paths and file 
     size and number.
     """
     text = self.lng['label_file'] + str(self.set.set_file) + '\n' +  \
            self.lng['label_dir'] + str(self.set.set_dir) + '\n' +  \
            self.lng['label_dirout'] + str(self.set.set_dirout)
     # A directory is selected, so calculate the size
     # and the number of the files it contains. Skip
     # the calculation if user selected the same folder.
     if self.set.set_file != self.set.NOP:
         ext = helpers.getext(self.set.set_file)
         # See if the extension is described in the language file.
         try:
             ext = self.lng['ext_%s' % ext]
         except:
             pass
         text = text + '\n' + self.lng['label_type'] + ext
     elif self.set.set_dir != self.set.NOP:
         # Calculate file number and size:
         # TODO: Add an option to override this.
         #if self.set.set_dir != self.set.previous_folder:
         if 1:  # overrided for time being
             # Inform user that this migh take time:
             self.master.update_status('label_pleasewait', 0)
             # Pass the extension list, depending on the
             # conversion mode:
             if self.set.set_convmode == 'tolat':
                 extensions = self.set.set_extensions
                 self.master.tocyr.EXT = self.set.extensions
             else:
                 extensions = self.set.set_extensions_tocyr
                 self.master.tocyr.EXT = self.set.extensions_tocyr
             # Calculate
             self.master.tocyr.RECURSIVE = self.set.set_recursive
             self.filecount, self.filesize = \
             self.master.tocyr.calculatedirsize(self.set.set_dir)
             self.set.previous_folder = self.set.set_dir
             if not self.filecount:
                 self.master.update_status('label_nosupportedfiles', 0)
             else:
                 self.master.update_status('label_ok', 0)
         # The number of files
         text = text + '\n' + self.lng['label_number'] % self.filecount
         # The size of file(s)
         text = text +  '\n' + self.lng['label_size'] %  \
                '%0.2f' % self.filesize
         # The list of recognised extensions
         text = text +  '\n' + self.lng['options_extensions'] + \
                extensions.replace(",", ", ")
         # Conversion mode
         text = text +  '\n' + self.lng['label_conv%s' % \
                                            self.set.set_convmode]
     self.label_selection.configure(text=text, justify='left')
Exemple #5
0
 def check_allowed_extensions(self):
     """
     Check if file extension is allowed in the
     conversion mode.
     """
     # If file is slected, but the converson mode cannot be
     # applied to it, reset the path.
     file_selected = self.set.set_file != self.set.NOP
     if file_selected and (self.set.set_convmode == 'tocyr'):
         if helpers.getext(self.set.set_file) not in \
                 self.tocyr.EXTENSIONS:
             self.set.set_file = self.set.NOP
             messagebox.showwarning('',
                                    self.lng['msg_extension_not_supported'])