def add_arguments(title):
    parser = fill_arg_for_processor(title)
    parser.add_argument('--ignore_mess', default=None)
    parser.add_argument('--show_mess', default=None)
    parser.add_argument('--show_files', default=None)
    parser.add_argument('--scheme', default=None)
    parser.add_argument('--media', default=False, action='store_true')
    return parser
                    fout.write('"{0}" - {1} \n'.format(val_k,val_v[2]))
        # self.put_counter('changes_zh.txt', self.changes_zh)
        # self.put_counter('changes_zh1.txt', self.changes_zh1)
        '''
        if self.feats_loader.wrong:
            if '' in self.feats_loader.wrong:
                print("Empty!")
            print("Wrong feats: {0}".format(','.join(sorted(self.feats_loader.wrong))))

        with open(Path('~/Documents/china_gr.txt').expanduser(), 'w') as fout:
            fout.write("china feats:\n\n")
            fout.write('\n'.join(sorted(self.feats))+'\n')
            fout.write("china feat sets:\n\n")
            fout.write('\n'.join(sorted(self.feat_grs))+'\n')
        '''


if __name__ == '__main__':
    parser = fill_arg_for_processor('feat checker')
    parser_args = parser.parse_args()
    feats_checker = FeatsChecker(parser_args)
    feats_checker.process()
    feats_checker.put_info()







Beispiel #3
0
class VersesCutter(ProcessorBasic):
    def process_lxml_tree(self, tree):
        handler = MyContentHandler()
        sax.saxify(tree, handler)
        if handler.max_br > 0:
            print(self.inpname, handler.max_br, "строф")
        return handler.etree


class VersesCutter2(ProcessorBasic):
    def __init__(self, args):
        super().__init__(args)
        self.res = open(args.found, "w")

    def process_lxml_tree(self, tree):
        verses = tree.xpath('//p[@class="verse"]')
        for verse in verses:
            lines = verse.xpath("count(//br)")
            if lines > 1000:
                print(self.inpname, lines, "строк в строфе")
                self.res.write(self.inpname)
        return None


if __name__ == '__main__':
    parser = fill_arg_for_processor('verses cutter')
    parser.add_argument("--found", required=True)
    parser_args = parser.parse_args()
    cutter = VersesCutter2(parser_args)
    cutter.process()
Beispiel #4
0
                self.outfile_write(attr)
                start = False
            self.outfile_write('}')
        # self.outfile_write('['+to_str[info[TEXT_POS]]+', '+to_str[info[TAIL_POS]]+']')
        self.outfile_write(
            '\t' + os.path.relpath(info[FIRST_DOC_POS], self.common_part) +
            '\t\n')
        for value in info[ELEM_POS].values():
            self.put_info(value, shift + info[TAG_POS] + '/')

    def process(self):
        inp_paths = self.inppath.split('|')
        for self.inppath in inp_paths:
            if not super(Schema, self).process():
                return
        self.common_part = os.sep.join(self.common_part)
        with open(self.schema, 'w') as self.outfile:
            self.outfile_write('tags sequence\texample\tcomment\n')
            for value in self.glob_info.values():
                self.put_info(value, '')
            self.outfile_write('\n(common path of examples - ' +
                               self.common_part + ')')


if __name__ == '__main__':
    parser = fill_arg_for_processor('schema processing')
    parser.add_argument('--schema', required=True)
    parser_args = parser.parse_args()
    processor = Schema(parser_args)
    processor.process()
Beispiel #5
0
            for lang, l_counter in self.counters.items():
                with open(root_dir/Path(lang+".txt"), 'w') as f_count:
                    for stat in l_counter.most_common():
                        sym = stat[0]
                        if len(sym) > 1:
                            raise Exception('logic error')
                        cat = unicodedata.category(sym)
                        try:
                            sym_name = unicodedata.name(sym)
                        except ValueError:
                            sym = '#{0}'.format(ord(sym))
                            sym_name = 'UNKNOWN NAME'
                        cat_gr = cat[0]
                        count = stat[1]
                        if cat_gr == 'L' or cat == 'Cc' or cat == 'Nd':
                            continue
                        f_count.write('"{0}" {1}, {2}: {3} time(s)\n'.format(sym, cat, sym_name, count))
        except (OSError, IOError) as e:
            self.fatal_error("can't write statistics into {0}: {1}".format(self.sym_stat, e.message))


if __name__ == '__main__':
    parser = fill_arg_for_processor('symbol counter')
    parser.add_argument('--sym_stat', required=True)
    parser_args = parser.parse_args()
    counter = SymbolCounter(parser_args)
    counter.process()
    counter.report()


        self.line = -1
    """
    def change_accent(self, text):
        if text is None:
            return text
        n = text.count(COMBINING_GRAVE_ACCENT)
        if n == 0:
            return text
        self.count_mess(
            "COMBINING_GRAVE_ACCENT changed to COMBINING_ACUTE_ACCENT", n)
        return text.replace(COMBINING_GRAVE_ACCENT, COMBINING_ACUTE_ACCENT)

    def process_lxml_tree(self, tree):
        root = tree.getroot()
        body = root.find('body')
        if body is None:
            return None
        for elem in body.iter():
            if self.nostructured(elem):
                continue
            elem.text = self.change_accent(elem.text)
            elem.tail = self.change_accent(elem.tail)
        return tree


if __name__ == '__main__':
    parser = fill_arg_for_processor('speech converter', True)
    parser_args = parser.parse_args()
    converter = ConverterStihi(parser_args)
    converter.process()