Ejemplo n.º 1
0
# -*- coding: utf-8 -*-
from hip2unicode.tools import corpus_converter
from hip2unicode.functions import all_hip_conversions
from hip2unicode.functions import compile_conversion
from hip2unicode.conversions import antconc_ucs8

# def corpus_converter(path=None, corpus_folder='corpus', converted_corpus_folder='converted_corpus', conversions=None):

compiled_conversion = compile_conversion(antconc_ucs8.conversion)
kwargs = {
    'converted_corpus_folder': 'corpus-ucs8',
    'conversions': all_hip_conversions(slav=compiled_conversion)
}

corpus_converter.corpus_converter(**kwargs)
Ejemplo n.º 2
0
# -*- coding: utf-8 -*-
import sys

from hip2unicode.functions import all_hip_conversions, compile_conversion, hip2unicode
from hip2unicode.conversions import antconc_ucs8

compiled_conversion = compile_conversion(antconc_ucs8.conversion)
conversions = all_hip_conversions(slav=compiled_conversion)

if len(sys.argv) < 2:
    print 'Необходимо передать текст для конвертации в виде аргументов.'
    sys.exit(1)

text = ' '.join(sys.argv[1:]).decode('utf-8')
print hip2unicode(text, conversions)
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import sys

import corpus_converter
from hip2unicode.conversions import hip_civilrus_accented
from hip2unicode.functions import all_hip_conversions
from hip2unicode.functions import compile_conversion

conversions = {
    'slav': compile_conversion(hip_civilrus_accented.conversion),
    'rus': 'delete',
    'lat': 'delete',
    'grec': 'delete',
}
args = {
    'converted_corpus_folder': 'corpus-civilrus',
    'conversions': all_hip_conversions(**conversions),
}
corpus_folder = None
converted_corpus_folder = None

if len(sys.argv) > 1:
    corpus_folder = sys.argv[1]
if corpus_folder:
    args['corpus_folder'] = corpus_folder

if len(sys.argv) == 3:
    converted_corpus_folder = sys.argv[2]
if converted_corpus_folder:
    args['converted_corpus_folder'] = converted_corpus_folder
Ejemplo n.º 4
0
import re

from hip2unicode.functions import convert
from hip2unicode.functions import compile_conversion
from hip2unicode.conversions import antconc_ucs8
from hip2unicode.conversions import antconc_ucs8_corrupted_antconc
from hip2unicode.conversions import antconc_ucs8_without_aspiration
from hip2unicode.conversions import antconc_civilrus
from hip2unicode.conversions import antconc_antconc_wo_titles

compiled_conversion_antconc_anticorrupt = compile_conversion(
        antconc_ucs8_corrupted_antconc.conversion)
compiled_conversion_civil = compile_conversion(antconc_civilrus.conversion)
compiled_conversion_with_aspiration = compile_conversion(
        antconc_ucs8.conversion)
compiled_conversion_without_aspiration = compile_conversion(
        antconc_ucs8_without_aspiration.conversion)
compiled_conversion_wo_titles = compile_conversion(
        antconc_antconc_wo_titles.conversion)

def html_escape(text):
    text = text.replace('&', '&amp;')
    text = text.replace('<', '&lt;')
    text = text.replace('>', '&gt;')
    text = text.replace('"', '&#34;')
    return text.replace("'", '&#39;')

def html_unescape(text):
    text = text.replace('&#39;', "'")
    text = text.replace('&#34;', '"')
    text = text.replace('&gt;',  '>')
Ejemplo n.º 5
0
# -*- coding: UTF-8 -*-
import re

from hip2unicode.functions import convert
from hip2unicode.functions import compile_conversion
from hip2unicode.conversions import antconc_ucs8
from hip2unicode.conversions import antconc_ucs8_without_aspiration
from hip2unicode.conversions import antconc_civilrus
from hip2unicode.conversions import antconc_antconc_wo_titles

compiled_conversion_wo_titles = compile_conversion(
        antconc_antconc_wo_titles.conversion)
compiled_conversion_with_aspiration = compile_conversion(
        antconc_ucs8.conversion)
compiled_conversion_without_aspiration = compile_conversion(
        antconc_ucs8_without_aspiration.conversion)
compiled_conversion_civil = compile_conversion(antconc_civilrus.conversion)

def html_escape(text):
    text = text.replace(u'&', u'&amp;')
    text = text.replace(u'<', u'&lt;')
    text = text.replace(u'>', u'&gt;')
    text = text.replace(u'"', u'&#34;')
    return text.replace(u"'", u'&#39;')

def html_unescape(text):
    text = text.replace(u'&#39;', u"'")
    text = text.replace(u'&#34;', u'"')
    text = text.replace(u'&gt;',  u'>')
    text = text.replace(u'&lt;',  u'<')
    return text.replace(u'&amp;', u'&')
Ejemplo n.º 6
0
# -*- coding: utf-8 -*-
import sys

from hip2unicode.conversions import hip_civilrus
from hip2unicode.functions import all_hip_conversions
from hip2unicode.functions import compile_conversion
from hip2unicode.tools import corpus_converter

conversions = {
    'slav': compile_conversion(hip_civilrus.conversion),
    'rus': 'delete',
    'lat': 'delete',
    'grec': 'delete',
}
args = {
    'converted_corpus_folder': 'corpus-civilrus',
    'conversions': all_hip_conversions(**conversions),
}
corpus_folder = None
converted_corpus_folder = None

if len(sys.argv) > 1:
    corpus_folder = sys.argv[1]
if corpus_folder:
    args['corpus_folder'] = corpus_folder

if len(sys.argv) == 3:
    converted_corpus_folder = sys.argv[2]
if converted_corpus_folder:
    args['converted_corpus_folder'] = converted_corpus_folder
Ejemplo n.º 7
0
# -*- coding: utf-8 -*-
import sys

from hip2unicode.conversions import hip_civilrus_accented
from hip2unicode.functions import all_hip_conversions
from hip2unicode.functions import compile_conversion
from hip2unicode.tools import corpus_converter

conversions = {
    'slav': compile_conversion(hip_civilrus_accented.conversion),
    'rus': 'delete',
    'lat': 'delete',
    'grec': 'delete',
}
args = {
    'converted_corpus_folder': 'corpus-civilrus',
    'conversions': all_hip_conversions(**conversions),
}
corpus_folder = None
converted_corpus_folder = None

if len(sys.argv) > 1:
    corpus_folder = sys.argv[1]
if corpus_folder:
    args['corpus_folder'] = corpus_folder

if len(sys.argv) == 3:
    converted_corpus_folder = sys.argv[2]
if converted_corpus_folder:
    args['converted_corpus_folder'] = converted_corpus_folder