Beispiel #1
0
def main(args):
    print_log = False
    print args
    if args[1] == '-t':
        text = ' '.join(args[2:])
        show_json = True
    else:
        text = ' '.join(args[1:])
        show_json = False
    try:
        new_txt = Processor.process_text(text, [u'@', u'{', u'}'],
                                         [u'', u'{', u'}'], 1, print_log)
    except:
        try:
            text = text.decode('utf-8')
            new_txt = Processor.process_text(text, [u'@', u'{', u'}'],
                                             [u'', u'{', u'}'], 1, print_log)
        except:
            return 0
    if show_json:
        print new_txt[-1]
    else:
        print new_txt[0]

    return 0
    def iterate_root_web(cls, temp_filename):
        # try:
        tree = etree.parse(temp_filename)
        # except:
        #     return 'error', 'error'
        encoding = tree.docinfo.encoding
        root = tree.getroot()
        doctype = tree.docinfo.doctype
        standalone = tree.docinfo.standalone
        log_data = []
        markers = u'іѣъiѢЪIѣъѣіі'
        for child in root.iter():
            try:
                if u'i' in child.text or u'I' in child.text or u'і' in child.text or u'ѣ' in child.text or u'Ѣ' in child.text or u'ъ' in child.text or u'Ъ' in child.text or u'ѣ' in child.text or u'і' in child.text:
                    # old = child.text
                    new_text, changes, wrong_changes, _ = Processor.process_text(
                        child.text, 1, META['old_new_delimiters'][
                            META['current_delimiters_xml']], 0)
                    child.text = new_text
                    if changes:
                        log_data.append(changes)
            except:
                pass
            try:
                #for marker in markers:
                if u'i' in child.tail or u'I' in child.tail or u'і' in child.tail or u'ѣ' in child.tail or u'Ѣ' in child.tail or u'ъ' in child.tail or u'Ъ' in child.tail or u'ѣ' in child.tail or u'і' in child.tail:
                    # old = child.tail
                    new_text, changes, wrong_changes, _ = Processor.process_text(
                        child.tail, 1, META['old_new_delimiters'][
                            META['current_delimiters_xml']], 0)
                    child.tail = new_text
                    if changes:
                        log_data.append(changes)
            except:
                pass

        new_text = etree.tostring(root,
                                  xml_declaration=True,
                                  encoding=encoding,
                                  standalone=standalone,
                                  doctype=doctype)

        new_text = new_text.replace('&lt;choice&gt;', '<choice>')
        new_text = new_text.replace('&lt;reg&gt;', '<reg>')
        new_text = new_text.replace('&lt;/choice&gt;', '</choice>')
        new_text = new_text.replace('&lt;/reg&gt;', '</reg>')
        new_text = new_text.replace('&lt;orig&gt;', '<orig>')
        new_text = new_text.replace('&lt;/orig&gt;', '</orig>')

        return new_text, u'\n'.join(log_data)
    def log(self):
        in_text = self.entered.get('1.0', 'end')
        new_text, changes, _, _ = Processor.process_text(
            in_text, 1,
            META['old_new_delimiters'][META['current_delimiters_text']])

        s = Show(self.top, changes)
    def ok(self):
        in_text = self.entered.get('1.0', 'end')

        new_text, changes, _, _ = Processor.process_text(
            in_text, 1,
            META['old_new_delimiters'][META['current_delimiters_text']])

        self.result.config(state='normal')
        self.result.delete("1.0", "end")
        self.result.insert("end", new_text)
        self.result.config(state='disabled')
    def load(cls, text, root, delimiters):
        """
    Loading file
        """
        text.config(state='normal')
        text.insert("end", u"В обработке...\n")
        text.config(state='disabled')

        # name = os.path.basename(meta.filename)
        name = os.path.basename(META['filename'])

        try:
            # with codecs.open(filename, 'r', 'utf-8-sig') as f_dict:
            with codecs.open(META['filename'], 'r', 'utf-8') as f:
                data = f.read()  #считали файл
                # s_dict = r_dict.split() #поделили по пробелам
            text.config(state='normal')
            text.delete("1.0", "end")
            text.insert("end", u"Файл загружен\n")
            text.config(state='disabled')
            Dialog.dialog(root, text)

            check_brackets = 1  # учитывать скобки
            if META['flag'] == 1:  #???????
                new_text, changes, wrong_changes, _ = Processor.process_text(
                    data, 1, delimiters, check_brackets)  #транслитерировали
                SaveText.save_translit_text(text, new_text, changes)
            else:
                text.config(state='normal')
                text.delete("1.0", "end")
                text.insert("end", u"Вы не ввели имена выходных файлов\n")
                text.config(state='disabled')
            META['flag'] = 0

        except IOError as e:
            err = u"Проблема с чтением файла " + os.path.basename(META['filename']) +\
                      u": I/O error({0}): {1}".format(e.errno, e.strerror) +\
                      u"\nВыберите другой файл."
            Error.dialogError(err, root)
        except ValueError:
            err = u"Проблема с чтением файла " + os.path.basename(META['filename']) +\
                      u": Неверный формат данных." +\
                      u"\nВыберите другой файл."
            Error.dialogError(err, root)
        except:
            err = u"Проблема с чтением файла " + os.path.basename(META['filename']) +\
                      u": Неизвестная ошибка: " + str(sys.exc_info()[0]) +\
                      u"\nВыберите другой файл."
            Error.dialogError(err, root)
            raise
    def res(self):
        in_text = self.entered.get('1.0', 'end')
        new_text, changes, _, _ = Processor.process_text(
            in_text, 1,
            META['old_new_delimiters'][META['current_delimiters_text']])

        res = self.out.get()
        if res != '':
            res_name = META['default_directory'] + res + '.txt'

        else:
            res_name = META['default_directory'] + 'result.txt'

        with codecs.open(res_name, 'w', 'utf-8') as ouf:
            ouf.write(new_text)
    def iterate_root(cls, temp_filename, int_root, one_iteration):
        if one_iteration:
            try:
                tree = etree.parse(temp_filename)
            except:
                # tree = etree.parse(temp_filename)
                # print tree
                Error.dialogError(u'Ошибка в структуре xml/html', int_root)
                return -1, -1
                # tree = etree.parse(temp_filename)
        else:
            tree = etree.parse(temp_filename)
        # print 1
        encoding = tree.docinfo.encoding
        root = tree.getroot()
        doctype = tree.docinfo.doctype
        standalone = tree.docinfo.standalone
        log_data = []
        wrong_log = []
        # markers = u'іѣъiѢЪIѣъѣіі'
        check_brackets = 1
        new_markers = [u']']
        markers = [u'i', u'I', u'і', u'ѣ', u'Ѣ', u'ъ', u'Ъ', u'ѣ', u'і']
        markers += new_markers
        for child in root.iter():
            # print 'GO CHILD', child
            try:
                # print 'TRY CHILD TEXT', child.text
                # if u'i' in child.text or u'I' in child.text or u'і' in child.text or u'ѣ' in child.text or u'Ѣ' in child.text or u'ъ' in child.text or u'Ъ' in child.text or u'ѣ' in child.text or u'і' in child.text:
                if u'Евстратъ-то' in child.text:
                    print 1
                for marker in markers:
                    # print 'MARKER', marker
                    if child.text is not None and marker in child.text:
                        # print 'IN'
                        # old = child.text

                        new_text, changes, wrong_changes, _ = Processor.process_text(
                            child.text, 1, META['old_new_delimiters']
                            [META['current_delimiters_xml']], check_brackets)
                        child.text = new_text
                        if changes:
                            log_data.append(changes)
                        if wrong_changes:
                            wrong_log.append(wrong_changes)
                        break
            except:
                pass
            try:
                #for marker in markers:
                # if u'i' in child.tail or u'I' in child.tail or u'і' in child.tail or u'ѣ' in child.tail or u'Ѣ' in child.tail or u'ъ' in child.tail or u'Ъ' in child.tail or u'ѣ' in child.tail or u'і' in child.tail:
                # old = child.tail
                # print 'TRY CHILD TAIL', child.tail
                if u'Евстратъ-то' in child.tail:
                    print 1
                for marker in markers:
                    # print 'MARKER', marker
                    if child.tail is not None and marker in child.tail:
                        # print 'IN'
                        new_text, changes, wrong_changes, _ = Processor.process_text(
                            child.tail, 1, META['old_new_delimiters']
                            [META['current_delimiters_xml']], check_brackets)
                        child.tail = new_text
                        if changes:
                            log_data.append(changes)
                        if wrong_changes:
                            wrong_log.append(wrong_changes)
                        break
            except:
                pass
            print 'CHECKED'

        print 'FINISHED'
        new_text = etree.tostring(root,
                                  xml_declaration=True,
                                  encoding=encoding,
                                  standalone=standalone,
                                  doctype=doctype)

        new_text = new_text.replace('&lt;choice&gt;', '<choice>')
        new_text = new_text.replace('&lt;reg&gt;', '<reg>')
        new_text = new_text.replace('&lt;/choice&gt;', '</choice>')
        new_text = new_text.replace('&lt;/reg&gt;', '</reg>')
        new_text = new_text.replace('&lt;orig&gt;', '<orig>')
        new_text = new_text.replace('&lt;/orig&gt;', '</orig>')
        new_text = new_text.replace('&lt;sic&gt;', '<sic>')
        new_text = new_text.replace('&lt;/sic&gt;', '</sic>')
        new_text = new_text.replace('&lt;corr&gt;', '<corr>')
        new_text = new_text.replace('&lt;/corr&gt;', '</corr>')
        new_text = new_text.replace('&lt;choice original_editorial_correction',
                                    '<choice original_editorial_correction')
        new_text = new_text.replace("'&gt;<sic>", "'><sic>")
        # print 'CHANGE ORDER'
        # print new_text
        # new_text = re.sub(ur"&lt;(choice original_editorial_correction=\'[^\']+\')&gt;", "<\1>", new_text)
        # print 'CHANGE RE'
        return new_text, log_data, wrong_log
def web_converter():

    input_text = ''
    output_text = ''
    new_text = ''
    if request.method == 'POST':
        if META['tmp_folder'] != '':
            shutil.rmtree(META['tmp_folder'])
            META['tmp_folder'] = ''

        input_text = request.values.get('inp_text')
        #app.logger.info(input_text)
        both = request.form.getlist('both')
        if 'go' in request.values:
            if 'spell' in META['current_delimiters_text']:
                check_brackets = 1
            else:
                check_brackets = 0
            if 'show' in both:
                output_text, changes, wrong_changes, _ = Processor.process_text(
                    input_text, 1, META['old_new_delimiters'][
                        META['current_delimiters_text']], check_brackets)
            else:
                output_text, changes, wrong_changes, _ = Processor.process_text(
                    input_text, 0, META['old_new_delimiters'][
                        META['current_delimiters_text']], check_brackets)

        if 'clean' in request.values:
            input_text = ''
            output_text = ''

        if 'download_txt' in request.values:
            ftxt = request.files.getlist("f_txt")
            errors = []
            curr_time = time.time()
            clean_store(curr_time)
            tmp_folder = str(curr_time) + '_ptc'
            META['tmp_folder'] = META['tmp_store'] + tmp_folder
            os.mkdir(tmp_folder)
            for el in ftxt:
                try:
                    META['filename'] = secure_filename(el.filename)
                    input_text = el.read().decode('utf-8')
                    if 'spell' in META['current_delimiters_text']:
                        check_brackets = 1
                    else:
                        check_brackets = 0
                    if 'show' in both:
                        new_text, changes, wrong_changes, _ = Processor.process_text(
                            input_text, 1, META['old_new_delimiters']
                            [META['current_delimiters_text']], check_brackets)
                    else:
                        new_text, changes, wrong_changes, _ = Processor.process_text(
                            input_text, 0, META['old_new_delimiters']
                            [META['current_delimiters_text']], check_brackets)
                    with codecs.open('log', 'w', 'utf-8') as ou:
                        ou.write(changes)
                    print changes, 'THIS'
                    name = os.path.splitext(META['filename'])[0]
                    suffix = os.path.splitext(META['filename'])[1]
                    print 'GET NAME'
                    if suffix == '':
                        suffix = '.txt'
                    new_filename = name + "_transliterated" + suffix
                    log_filename = name + '_log.txt'
                    fnpath = tmp_folder + '/' + new_filename
                    lpath = tmp_folder + '/' + log_filename
                    print 'GET PATH'
                    with codecs.open(fnpath, 'w', 'utf-8') as ou1:
                        ou1.write(new_text)
                        print 'WRITE DATA'
                    with codecs.open(lpath, 'w', 'utf-8') as ou2:
                        ou2.write(changes)
                        print changes, 'WRITE LOG'
                except:
                    m = 'Error: file ' + secure_filename(el.filename)
                    errors.append(m)
            if errors:
                errors = u'\n'.join(errors)
                fer = tmp_folder + '/' + 'errors.txt'
                with codecs.open(fer, 'w', 'utf-8') as ou3:
                    ou3.write(errors)
            response = Response(generator(tmp_folder),
                                mimetype='application/zip')
            response.headers[
                'Content-Disposition'] = 'attachment; filename={}'.format(
                    'files.zip')
            return response

        if 'download_xml' in request.values:
            fxml = request.files.getlist("f_xml")
            errors = []
            tmp_folder = str(time.time()) + '_ptc'
            META['tmp_folder'] = tmp_folder
            os.mkdir(tmp_folder)
            for el in fxml:
                try:
                    META['filename'] = secure_filename(el.filename)
                    temp_filename = LoadData.get_temp_web(tmp_folder, el)
                    new_text, changes = LoadData.iterate_root_web(
                        temp_filename)
                    os.remove(temp_filename)
                    name = os.path.splitext(META['filename'])[0]
                    suffix = os.path.splitext(META['filename'])[1]
                    if suffix == '':
                        suffix = '.txt'
                    new_filename = name + "_transliterated" + suffix
                    log_filename = name + '_log.txt'
                    fnpath = tmp_folder + '/' + new_filename
                    lpath = tmp_folder + '/' + log_filename
                    with codecs.open(fnpath, 'w') as ou1:
                        ou1.write(new_text)
                    if changes != u'':
                        with codecs.open(lpath, 'w', 'utf-8') as ou2:
                            ou2.write(changes)
                except:
                    m = 'Error: file ' + secure_filename(el.filename)
                    errors.append(m)
            if errors:
                errors = u'\n'.join(errors)
                fer = tmp_folder + '/' + 'errors.txt'
                with codecs.open(fer, 'w', 'utf-8') as ou3:
                    ou3.write(errors)
            response = Response(generator(tmp_folder),
                                mimetype='application/zip')
            response.headers[
                'Content-Disposition'] = 'attachment; filename={}'.format(
                    'files.zip')
            return response

        return render_template("prereform_to_contemporary.html",
                               output_text=output_text,
                               input_text=input_text)

    return render_template("prereform_to_contemporary.html",
                           output_text=output_text,
                           input_text=input_text)