Esempio n. 1
0
class TxtBuilder:

    def __init__(self):
        self.log = Log("w")
        self.log.open("log/txtbuilder.log", 0)
        self.logerr = Log("a")
        self.logerr.open("log/txtbuilder.ERR.log", 1)
        self.data_lst = []
        self.data_txt_lst = []
        self.data_span_lst = []
        self.from_to_lst = []
        self.txt_rows = []
        self.up = True
        self.w_liv = 100
        self.trace = False
        self.ramis=self.set_ramis_dict()

    def set_ramis_dict(self):
        js={}
        for r in RAMIS:
            k,v=r.split('|')
            js[k]={}
            ls=v.split(',')
            for xy in ls:
                x,y=xy.split(':')
                js[k][x]=y
        return js

    def get_ramis(self,key,ch):
        js=self.ramis.get(key,None)
        if js is None:
            return f"ERR{key}"
        r=js.get(ch,None)
        if r is None:
            return f"ERR{ch}"
        return r

    def fill_from_to_list(self):
        for data_span in self.data_span_lst:
            x_items = data_span.get('items', {})
            x_from = x_items.get('from', None)
            x_to = x_items.get('to', None)
            x_type = x_items.get('type', None)
            if x_from is None or x_to is None or x_type is None:
                self.logerr.log("fill_from_to_list ERROR.").prn()
                self.logerr.log(pp(data_span)).prn()
                sys.exit(1)
            item = {
                "id0": x_from,
                "id1": x_to,
                "type": x_type
            }
            self.from_to_lst.append(item)

    def from_to_set_data_txt(self):
        for i in range(0, len(self.from_to_lst)):
            from_to = self.from_to_lst[i]
            id_from = from_to['id0'].strip()
            id_to = from_to['id1'].strip()
            span_type = from_to['type'].strip()
            err = 0
            if id_from == '':
                err = 1
            if id_to == '':
                err = 2
            if err == 1:
                self.logerr.log(f"ERROR from is null. to:{id_to}.")
            elif err == 2:
                self.logerr.log(f"ERROR from={id_from}  to is null.")
            for i in range(0, len(self.data_txt_lst)):
                data_txt = self.data_txt_lst[i]
                id = data_txt['id']
                if id == '':
                    continue
                if id_from == id:
                    if span_type == MONOLOG:
                        if err == 0:
                            data_txt[START] = '['
                        else:
                            data_txt[START] = '[ERR '
                    elif span_type == DIRECT:
                        if err == 0:
                            data_txt[START] = '{'
                        else:
                            data_txt[START] = '{ERR '
                elif id_to == id:
                    if span_type == MONOLOG:
                        if err == 0:
                            data_txt[END] = ']'
                        else:
                            data_txt[END] = ' ERR]'
                    elif span_type == DIRECT:
                        if err == 0:
                            data_txt[END] = '}'
                        else:
                            data_txt[END] = ' ERR}'

    # def w_num(self, id):
    #     p = id.find('w')
    #     if p < 0:
    #         return -1
    #     return int(id[p+1:])

    def set_data_txt_list(self):
        """setta t_data utilizzano xml_data e csv_data
        """
        t_up = False
        sic = False
        w_num = 0
        for i, d in enumerate(self.data_txt_lst):
            #id = d["id"]
            liv = d["liv"]
            tag = d['tag'].lower().strip()
            d['tag'] = tag
            text = d['text'].strip()
            d['t_i'] = i
            sp = ''
            ln = False

            if text != '':
                if t_up:
                    self.data_txt_lst[i]['t_up'] = True
                    t_up = False
                if sic:
                    self.data_txt_lst[i]['text'] = ''
                    sic = False

            if tag == 'w':
                sp = ' '
                self.w_liv = liv
            elif tag == 'pc':
                if text in ['.', '!', '?']:
                    t_up = True
            elif tag in NAMES_UP:
                t_up = True
            elif tag in ['lg']:
                t_up = True
            elif tag == 'del':
                self.data_txt_lst[i]['text'] = ''
                self.data_txt_lst[i]['tail'] = ''
            elif tag == 'sic':
                sic = True
            elif tag == 'l':
                ln = True
            d['t_sp'] = sp
            d['t_ln'] = ln

    def is_in_xml_items(self, items, key, val):
        v = items.get(key, '')
        v = v.replace('#', '').strip()
        return v == val

    def build_txt_rows(self):
        """crea le righe di testo self._txt_rows
        utilizzando data_text=xml_data + csv_data + t_data
        """
        self.txt_rows = []
        words = []
        for i, d in enumerate(self.data_txt_lst):
            id = d['id']
            tag = d['tag'].strip()
            text = d['text'].strip()
            tail = d['tail'].strip()
            items = d['items']
            t_start = d['t_start']
            t_sp = d['t_sp']
            t_up = d['t_up']
            t_end = d['t_end']
            t_ln = d['t_ln']

            if tag == 'c':
                if len(text)==1:
                    k=items.get('ana',None)
                    if k is not None:
                        r=self.get_ramis(k,text)
                        text=r

            elif tag == 'w':
                # els
                if self.is_in_xml_items(items, 'ana', 'elis'):
                    text = f'{text}{ELIS}'
                    self.data_txt_lst[i+1]['t_sp'] = ''
                # encl
                if self.is_in_xml_items(items, 'ana', 'encl'):
                    text = f'{ENCL}{text} '
                    t_sp = ''

            if t_sp != '':
                words.append(t_sp)

            if t_start != '':
                words.append(t_start)

            if t_up:
                text = text.capitalize()
            else:
                text = text.lower()
            tail = tail.lower()

            w = f"{text}{tail}"
            if w != '':
                words.append(w)

            if t_end != '':
                words.append(t_end)

            if t_ln:
                row = ''.join(words)
                self.txt_rows.append(row)
                words = []
        row = ''.join(words).strip()
        self.txt_rows.append(row)

    def text_adjust(self):
        VIRG = '"'
        for i, rw in enumerate(self.txt_rows):
            rw = re.sub(r" ,", ", ", rw)
            rw = re.sub(r" ;", "; ", rw)
            rw = re.sub(r" \.", ". ", rw)

            rw = re.sub(r'\[\s*', ' "', rw)
            rw = re.sub(r'\]', '" ', rw)
            rw = re.sub(r'{\s*', ' "', rw)
            rw = re.sub(r'}', '" ', rw)
            rw = rw.replace(f"{ELIS} ", ELIS)
            rw = re.sub(r"\s{2,}", " ", rw)
            self.txt_rows[i] = rw.strip()

    def elab(self):
        for data in self.data_lst:
            if data['tag'] == 'span':
                self.data_span_lst.append(data)
            else:
                self.data_txt_lst.append(data)
        # popola la lista con gli id from to
        self.fill_from_to_list()
        # completa gli elemnti di data_txt_lst
        self.set_data_txt_list()
        # setta start ed end in datat_tx
        self.from_to_set_data_txt()
        # cra le righe di testo
        self.build_txt_rows()
        # sistema le righe du testo
        self.text_adjust()

    def add(self, data):
        self.data_lst.append(data)

    @property
    def txt(self):
        s = os.linesep.join(self.txt_rows)
        return s
Esempio n. 2
0
class PrjMgr(object):
    """
    gestisce i progetti codificti in json.
    digitando senza argomenti
    vengono visualizzate tutte le opzioni
    es.
    prjmgr.py prol_txt.json:
    
    prol_txt.json:
    {
    "log": "0",
    "exe": [
        [
        "teimxml.py ",
        "-i tou1/prol.txt",
        "-t teimcfg/teimtags.csv",
        "-o tou1/log/prol_teim.txt"
        ]
    ]
    }

    """    
    def __init__(self):
        self.logerr = Log("a")
        self.log = Log("a")
        self.logerr.open("log/prjmgr.ERR.log", 1)
        self.log.open("log/prjmgr.log", 0)

    def kv_split(self, s, sep):
        sp = s.split(sep)
        s0 = sp[0].strip()
        s1 = ''
        if len(sp) > 1:
            s1 = sp[1].strip()
        return s0, s1

    def list2str(self, data):
        if isinstance(data, str):
            return data.strip()
        s = " ".join(data)
        return s.strip()

    def get(self, js, k):
        s = js.get(k, None)
        if s is None:
            raise Exception(f"{k} not found.{os.linesep}")
        return s

    def files_of_dir(self, d, e):
        p = pl.Path(d)
        if p.exists() is False:
            raise Exception(f'{d} not found.')
        fs = sorted(list(p.glob(e)))
        return fs

    def chmod(self, path):
        os.chmod(path, stat.S_IRWXG + stat.S_IRWXU + stat.S_IRWXO)

    def include_files(self, include):
        """nel file host sostitusce ogni parametro
        con il file ad esso collegato

        Args:
            js (dict): "include". ramo del project
        """
        self.log.log(os.linesep, ">> include")
        try:
            file_host = include.get("host", None)
            file_dest = include.get("dest", None)
            file_lst = include.get("files", [])
            param_lst = include.get("params", [])
            #
            with open(file_host, "rt") as f:
                host = f.read()
            #
            for param_path in file_lst:
                param, path = self.kv_split(param_path, '|')
                self.log.log(f"{param}: {path}")
                with open(path, "rt") as f:
                    txt = f.read()
                host = host.replace(param, txt)
            #
            for key_val in param_lst:
                key, val = self.kv_split(key_val, '|')
                self.log.log(f"{key}: {val}")
                host = host.replace(key, val)
            #
            with open(file_dest, "w+") as f:
                f.write(host)
            self.chmod(file_dest)
        except Exception as e:
            self.logerr.log("include")
            self.logerr.log(e)
            sys.exit(1)

    def execute_files_of_dir(self, exe_dir):
        self.log.log(">> exe_dir").prn()
        try:
            dr = self.get(exe_dir, 'dir')
            ptrn = self.get(exe_dir, 'pattern')
            exe_lst = self.get(exe_dir, 'exe_file')
            par_name = self.get(exe_dir, 'par_name')
            par_subst = self.get(exe_dir, 'par_subst')
            # replace par in par_name
            k, v = self.kv_split(par_subst, '|')
            files = self.files_of_dir(dr, ptrn)
            for f in files:
                file_name = os.path.basename(f)
                file_par = file_name.replace(k, v)
                for exe in exe_lst:
                    exe = self.list2str(exe)
                    x = exe.replace(par_name, file_par)
                    self.log.log(x)
                    r = os.system(x)
                    if r != 0:
                        raise Exception(f"execute:{x}")
        except Exception as e:
            self.logerr.log("ERROR","exe_dir")
            self.logerr.log(e)
            # self.logerr.log(pp(exe_dir))
            #sys.exit(1)

    def remove_files_of_dir(self, remove_dir):
        self.log.log(">> remove_dir").prn()
        try:
            for de in remove_dir:
                self.log.log(de)
                dr = de.get('dir')
                ptrn = de.get('pattern')
                files = self.files_of_dir(dr, ptrn)
                for f in files:
                    self.log.log(f)
                    os.remove(f)
        except Exception as e:
            self.logerr.log("remove_dir")
            self.logerr.log(e)
            self.logerr.log(pp(remove_dir))
            #sys.exit(1)

    def merge_files_of_list(self, merge_files):
        self.log.log(">> merge_files").prn()
        out = self.get(merge_files, "out_path")
        files = self.get(merge_files, "files")
        fout = open(out, "w+")
        for f in files:
            self.log.log(f)
            with open(f, "rt") as f:
                txt = f.read()
            fout.write(txt)
            fout.write(os.linesep)
        fout.close()
        self.log.log(out)
        self.chmod(out)

    def merge_files_of_dir(self, merge_dir):
        self.log.log(">> merge_dir").prn()
        try:
            dr = self.get(merge_dir, 'dir')
            ptrn = self.get(merge_dir, 'pattern')
            out_path = self.get(merge_dir, 'out_path')
            files = self.files_of_dir(dr, ptrn)
            file_out = open(out_path, "w")
            for fpath in files:
                self.log.log(fpath)
                with open(fpath, "rt") as f:
                    txt = f.read()
                file_out.write(txt)
                file_out.write(os.linesep)
            file_out.close()
            self.chmod(out_path)
            self.log.log(out_path)
        except Exception as e:
            self.logerr.log("merge_dir")
            self.logerr.log(e)
            self.logerr.log(pp(merge_dir))
            #sys.exit(1)

    def execute_list_progs(self, exe):
        self.log.log( ">> exe").prn()
        try:
            for x in exe:
                x = self.list2str(x)
                self.log.log(x)
                r = os.system(x)
                if r != 0:
                    raise Exception(str(r))
        except Exception as e:
            self.logerr.log("exe")
            self.logerr.log(e)
            self.logerr.log(pp(exe))
            #sys.exit(1)

    def copy_file(self, copy_file):
        self.log.log(">> copy_file").prn()
        try:
            for x in copy_file:
                in_path = self.get(x, 'in_path')
                out_path = self.get(x, 'out_path')
                aw = self.get(x, "aw")
                self.log.log(in_path)
                with open(in_path, "rt") as f:
                    text = f.read()
                with open(out_path, aw) as f:
                    f.write(text)
                    if aw == 'a':
                        f.write(os.linesep)
                self.chmod(out_path)
                self.log.log(out_path)
        except Exception as e:
            self.logerr.log("copy_file")
            self.logerr.log(e)
            self.logerr.log(pp(copy_file))
            sys.exit(1)

    def write_text(self, write_text):
        self.log.log(">> write_text").prn()
        try:
            text = self.get(write_text, 'text')
            out_path = self.get(write_text, 'out_path')
            aw = self.get(write_text, "aw")
            with open(out_path, aw) as f:
                f.write(text)
                if aw == 'a':
                    f.write(os.linesep)
            self.chmod(out_path)
            self.log.log(out_path)
        except Exception as e:
            self.logerr.log("write_text")
            self.logerr.log(e)
            self.logerr.log(pp(write_text))
            sys.exit(1)

    def parse_json(self, js):
        for k, v in js.items():
            # accetta  tag del tipo exe.1 exe.2 ..
            k = k.split('.')[0]
            if k == "exe":
                self.execute_list_progs(v)
            elif k == "merge_files":
                self.merge_files_of_list(v)
            elif k == "merge_dir":
                self.merge_files_of_dir(v)
            elif k == "include":
                self.include_files(v)
            elif k == "exe_dir":
                self.execute_files_of_dir(v)
            elif k == "remove_dir":
                self.remove_files_of_dir(v)
            elif k == "write_text":
                self.write_text(v)
            elif k == "copy_file":
                self.copy_file(v)
            elif k == "log":
                l = int(v)
                self.log.set_liv(l)
            else:
                self.logerr.log(f"ERROR option:{k} not implemented")

    def parse_file(self, in_path):
        try:
            with open(in_path, "r") as f:
                txt = f.read()
            js = json.loads(txt)
        except Exception as e:
            self.logerr.log("prjmgr.py json ERROR")
            self.logerr.log(e)
            sys.exit(1)
        self.parse_json(js)

    def parse_jsons(self,*js):
        lst=list(js)
        for j in lst:
            self.parse_json(j)
Esempio n. 3
0
class TxtBuilder:

    def __init__(self):
        self.log = Log("w")
        self.log.open("log/txtbuilder.log", 0)
        self.logerr = Log("a")
        self.logerr.open("log/txtbuilder.ERR.log", 1)
        self.data_lst = []
        self.data_txt_lst = []
        self.data_span_lst = []
        self.from_to_lst = []
        self.txt_rows = []
        self.up = True
        self.w_liv = 100
        self.trace = False
        self.ramis = self.set_ramis_dict()

    def set_ramis_dict(self):
        js = {}
        for r in RAMIS:
            k, v = r.split('|')
            js[k] = {}
            ls = v.split(',')
            for xy in ls:
                x, y = xy.split(':')
                js[k][x] = y
        return js

    def get_ramis(self, key, ch):
        js = self.ramis.get(key, None)
        if js is None:
            return f"ERR{key}"
        r = js.get(ch, None)
        if r is None:
            return f"ERR{ch}"
        return r

    def fill_from_to_list(self):
        for data_span in self.data_span_lst:
            x_items = data_span.get('items', {})
            x_from = x_items.get('from', None)
            x_to = x_items.get('to', None)
            x_type = x_items.get('type', None)
            if x_from is None or x_to is None or x_type is None:
                self.logerr.log("fill_from_to_list ERROR.").prn()
                self.logerr.log(pp(data_span)).prn()
                sys.exit(1)
            item = {
                "id0": x_from,
                "id1": x_to,
                "type": x_type
            }
            self.from_to_lst.append(item)

    def from_to_set_data_txt(self):
        for i in range(0, len(self.from_to_lst)):
            from_to = self.from_to_lst[i]
            id_from = from_to['id0'].strip()
            id_to = from_to['id1'].strip()
            span_type = from_to['type'].strip()
            err = 0
            if id_from == '':
                err = 1
            if id_to == '':
                err = 2
            if err == 1:
                self.logerr.log(f"ERROR from is null. to:{id_to}.")
            elif err == 2:
                self.logerr.log(f"ERROR from={id_from}  to is null.")
            for i in range(0, len(self.data_txt_lst)):
                data_txt = self.data_txt_lst[i]
                id = data_txt['id']
                if id == '':
                    continue
                if id_from == id:
                    if span_type == MONOLOG:
                        if err == 0:
                            data_txt[START] = '['
                        else:
                            data_txt[START] = '[ERR '
                    elif span_type == DIRECT:
                        if err == 0:
                            data_txt[START] = '{'
                        else:
                            data_txt[START] = '{ERR '
                elif id_to == id:
                    if span_type == MONOLOG:
                        if err == 0:
                            data_txt[END] = ']'
                        else:
                            data_txt[END] = ' ERR]'
                    elif span_type == DIRECT:
                        if err == 0:
                            data_txt[END] = '}'
                        else:
                            data_txt[END] = ' ERR}'

    # def w_num(self, id):
    #     p = id.find('w')
    #     if p < 0:
    #         return -1
    #     return int(id[p+1:])

    def set_data_txt_list(self):
        """setta t_data utilizzano xml_data e csv_data
        """
        t_up = False
        sic = False
        #w_num = 0
        for i, d in enumerate(self.data_txt_lst):
            #id = d["id"]
            liv = d["liv"]
            tag = d['tag'].lower().strip()
            d['tag'] = tag
            text = d['text'].strip()
            d['t_i'] = i
            sp = ''
            ln = False

            if text != '':
                if t_up:
                    self.data_txt_lst[i]['t_up'] = True
                    t_up = False
                if sic:
                    self.data_txt_lst[i]['text'] = ''
                    sic = False

            if tag == 'w':
                sp = ' '
                self.w_liv = liv
            elif tag == 'pc':
                if text in ['.', '!', '?']:
                    t_up = True
            elif tag in NAMES_UP:
                t_up = True
            elif tag in ['lg']:
                t_up = True
            elif tag == 'del':
                self.data_txt_lst[i]['text'] = ''
                self.data_txt_lst[i]['tail'] = ''
            elif tag == 'sic':
                sic = True
            elif tag == 'l':
                ln = True
            d['t_sp'] = sp
            d['t_ln'] = ln

    def is_in_xml_items(self, items, key, val):
        v = items.get(key, '')
        v = v.replace('#', '').strip()
        return v == val

    def adjust_tail_inversion(self):
        """
        <w xml:id="Kch2h1w14">des
         <expan corresp="#ab-sus-tu">t
            <ex>r</ex>u
        </expan>c
        <c ana="#hiat">i</c>on
        </w>

     <w xml:id="Kch1p1w104">
      <expan corresp="#ab-tir-9">
        <ex>con</ex>
      </expan>
      <expan corresp="#ab-tild-q">q<ex>ue</ex>
      </expan>re

     </w>

    errattO:  con q re ue
    corretto: con q ue re

        "re"  tail di expan
        "ue"  text di ex 
        "u" è stampato dopo perchè <ex> segue <expan>

        souzione:
            spostare "ue" prima di "re"
            <ex>text => prima di <expan>tail

        """
        le = len(self.data_txt_lst)-1
        for i, t_curr in enumerate(self.data_txt_lst):
            if i == 0:
                continue
            if i >= le:
                continue
            t_prec = self.data_txt_lst[i-1]
            t_succ = self.data_txt_lst[i+1]
            if t_curr['tail'] != '':
                if t_succ['liv'] > t_curr['liv']:

                    # text e tail  di <ex>
                    text_succ = t_succ['text']
                    t_succ['text'] = ''
                    tail_succ = t_succ['tail']
                    t_succ['tail'] = ''

                    # il tail di<expan>
                    tail_curr = t_curr['tail']
                    s = f'{text_succ}{tail_succ}{tail_curr}'
                    t_curr['tail'] = s

    def build_txt_rows(self):
        """crea le righe di testo self._txt_rows
        utilizzando data_text=xml_data + csv_data + t_data
        """
        self.adjust_tail_inversion()

        self.txt_rows = []
        words = []
        # n=8000
        for i, d in enumerate(self.data_txt_lst):
            id_ = d['id']
            tag = d['tag'].strip()
            text = d['text'].strip()
            tail = d['tail'].strip()
            items = d['items']

            t_start = d['t_start']
            t_sp = d['t_sp']
            t_up = d['t_up']
            t_end = d['t_end']
            t_ln = d['t_ln']

            if tag == 'c':
                if len(text) == 1:
                    k = items.get('ana', None)
                    if k is not None:
                        r = self.get_ramis(k, text)
                        text = r

            elif tag == 'w':
                # els
                if self.is_in_xml_items(items, 'ana', 'elis'):
                    text = f'{text}{ELIS}'
                    self.data_txt_lst[i+1]['t_sp'] = ''
                # encl
                if self.is_in_xml_items(items, 'ana', 'encl'):
                    text = f'{ENCL}{text} '
                    t_sp = ''

            if t_sp != '':
                words.append(t_sp)

            if t_start != '':
                words.append(t_start)

            if t_up:
                text = text.capitalize()
            else:
                text = text.lower()
            tail = tail.lower()

            w = f"{text}{tail}"

            if w != '':
                words.append(w)

            if t_end != '':
                words.append(t_end)

            if t_ln:
                row = ''.join(words)
                self.txt_rows.append(row)
                words = []

            #
            # if xtc['tag'] == 'w':
            #     xtw = xtc
            # if tail != "" and i < xle:
            #     if xts['liv'] > xtc['liv']:
            #         print(pp(xtw, 20))
            #         s = xtw['val'].replace(" ", "")
            #         print(pp(xtp, 20))
            #         print(pp(xtc, 20))
            #         print(pp(xts, 20))
            #         print(s)
            #         input("?")
            # xtc = d
            # if xtc['tag'] == 'w' and tail != "":
            #     print(pp(xtc))
            #     input("?")

            # if id_ == "Kch1p1w104":
            #     #self.trace = True
            #     pass
            # if self.trace:
            #     print(pp(d, 20))
            #     print(text)
            #     # print(d)
            #     set_trace()

            # if id_ == "Kch2h1w14":
            #     n = i
            # if tag == 'w':
            #     xtw = self.data_txt_lst[i]
            # if i == n+1:
            #     xtp = self.data_txt_lst[i-1]
            #     xtc = d
            #     xts = self.data_txt_lst[i+1]
            #     print(pp(xtw, 20))
            #     s = xtw['val'].replace(" ", "")
            #     print(pp(xtp, 20))
            #     print(pp(xtc, 20))
            #     print(pp(xts, 20))
            #     print(s)
            #     input("?")

        row = ''.join(words).strip()
        self.txt_rows.append(row)

    def text_adjust(self):
        VIRG = '"'
        for i, rw in enumerate(self.txt_rows):
            rw = re.sub(r" ,", ", ", rw)
            rw = re.sub(r" ;", "; ", rw)
            rw = re.sub(r" \.", ". ", rw)

            rw = re.sub(r'\[\s*', ' "', rw)
            rw = re.sub(r'\]', '" ', rw)
            rw = re.sub(r'{\s*', ' "', rw)
            rw = re.sub(r'}', '" ', rw)
            rw = rw.replace(f"{ELIS} ", ELIS)
            rw = re.sub(r"\s{2,}", " ", rw)
            self.txt_rows[i] = rw.strip()

    def elab(self):
        for data in self.data_lst:
            if data['tag'] == 'span':
                self.data_span_lst.append(data)
            else:
                self.data_txt_lst.append(data)
        # popola la lista con gli id from to
        self.fill_from_to_list()
        # completa gli elemnti di data_txt_lst
        self.set_data_txt_list()
        # setta start ed end in data_txt
        self.from_to_set_data_txt()
        # cra le righe di testo
        self.build_txt_rows()
        # sistema le righe du testo
        self.text_adjust()

    def add(self, data):
        self.data_lst.append(data)

    @property
    def txt(self):
        s = os.linesep.join(self.txt_rows)
        return s