def __init__( self, word: str, wordclass: str, class_detail1: str, class_detail2: str, class_detail3: str, grammer_type: str, conjugation: str, basic_type: str, reading: str = None, pronounce: str = None, options: str = None, ): self._word = assertion.is_str(word) self._wordclass = assertion.is_str(wordclass) self._class_detail1 = assertion.is_str(class_detail1) self._class_detail2 = assertion.is_str(class_detail2) self._class_detail3 = assertion.is_str(class_detail3) self._grammer_type = assertion.is_str(grammer_type) self._conjugation = assertion.is_str(conjugation) self._basic_type = assertion.is_str(basic_type) self._reading = assertion.is_str(reading) if reading else self._word self._pronounce = assertion.is_str( pronounce) if pronounce else self._word self._options = options if options: LOG.critical(f"Unknown mecab arguments: {options}")
def _to_web(self, src: RawData) -> TextList: LOG.info('FORMAT: to_web') tmp = [] in_dialogue = False for line in assertion.is_instance(src, RawData).data: if isinstance(line, FormatTag): if line is FormatTag.DESCRIPTION_HEAD: if in_dialogue: tmp.append('\n') in_dialogue = False elif line is FormatTag.DIALOGUE_HEAD: if not in_dialogue: tmp.append('\n') in_dialogue = True elif line is FormatTag.SYMBOL_HEAD: pass elif line is FormatTag.TAG_HEAD: pass else: pass else: assertion.is_str(line) tmp.append(line) return TextList(*tmp)
def has_rubi_exclusions(self, src: str, ex_words: (tuple, list)) -> bool: ''' Check whether the string has a exclusion rubi-words. ''' for word in assertion.is_listlike(ex_words): if assertion.is_str(word) in assertion.is_str(src): return True return False
def __init__(self, name: str, category: str='', info: str=''): super().__init__(name) self._category = assertion.is_str(category) self._info = assertion.is_str(info)
def dict_from_string(src: str, splitter: str) -> dict: ''' Convert a dictionary from a string. ''' if assertion.is_str(splitter) in assertion.is_str(src): tmp = src.split(splitter) return dict([(k,v) for k,v in zip(tmp[0::2], tmp[1::2])]) else: raise ValueError(f'Invalid string, cannot convert a dictionary: {src}')
def __init__(self, name: str, parent: str='', geometry: tuple=None, info: str=''): super().__init__(name) self._parent = assertion.is_str(parent) self._geometry = assertion.is_tuple(geometry) if geometry else (0,0) self._info = assertion.is_str(info)
def name_set_from(basename: str, name: str) -> tuple: lastname = firstname = fullname = exfullname = '' tmp = assertion.is_str(basename) if basename else assertion.is_str(name) if ',' in tmp: lastname, firstname = tmp.split(',') else: lastname = firstname = tmp fullname = tmp.replace(',', '') exfullname = f'{firstname}・{lastname}' if firstname != lastname else tmp return (firstname, lastname, fullname, exfullname)
def string_replaced_by_tag(src: str, tags: dict, prefix: str='$') -> str: ''' Replace the target word in a string by tags. ''' tmp = assertion.is_str(src) for k, v in assertion.is_dict(tags).items(): if assertion.is_str(prefix) in tmp: tmp = re.sub(r'\{}{}'.format(prefix, k), v, tmp) else: return tmp return tmp
def __init__(self, name: str, fullname: str, age: int, birth: tuple, sex: str, job: str, calling: (str, dict)='me:私', info: str=''): super().__init__(name) self._basename = assertion.is_str(fullname) self._age = assertion.is_int(age) self._birth = assertion.is_tuple(birth) self._sex = assertion.is_str(sex) self._job = assertion.is_str(job) self._calling = calling_dict_from(calling, name) self._info = assertion.is_str(info) # names self._firstname, self._lastname, self._fullname, self._exfullname = name_set_from(self._basename, name)
def _conv_from_tag(self, src: SCode, head_info: str, nums: tuple, is_comment: bool, is_plot: bool, is_data: bool, in_material: bool) -> Tuple[str, tuple]: assertion.is_str(head_info) tmp = '' ch_num, ep_num, sc_num = assertion.is_tuple(nums) if assertion.is_instance(src, SCode).cmd is SCmd.TAG_BR: tmp = '\n\n' elif src.cmd is SCmd.TAG_COMMENT: if is_comment: if src.option == 'outline': tmp = f'<!--\n【{"。".join(src.script)}】\n-->\n\n' else: if in_material: tmp = f'{"。".join(src.script)}\n' else: tmp = f'<!--{"。".join(src.script)}-->\n' elif src.cmd is SCmd.TAG_HR: tmp = '--------' * 9 elif src.cmd is SCmd.TAG_SYMBOL: tmp = f'\n{"".join(src.script)}\n\n' elif src.cmd is SCmd.TAG_TITLE: if isinstance(src.option, str) and 'contents' in src.option: if not is_plot and not is_data and src.option == 'contents:1': tmp = f'---\n# CONTENTS\n{src.script[0]}\n---\n' elif is_plot and src.option == 'contents:0': tmp = f'---\n# CONTENTS\n{src.script[0]}\n---\n' elif is_data and src.option == 'contents:2': tmp = f'---\n# CONTENTS\n{src.script[0]}\n---\n' else: head = '#' * src.option if isinstance(src.option, int) else '##' info_str = f' {head_info}' if head_info else '' title = ''.join(src.script) head_info = '' if src.option == 1: tmp = f'{head} {title}{info_str}\n\n' elif src.option == 2: tmp = f'{head} Ch-{ch_num}: {title}{info_str}\n\n' ch_num += 1 elif src.option == 3: tmp = f'{head} Ep-{ep_num}: {title}{info_str}\n\n' ep_num += 1 elif src.option == 4: tmp = f'_S-{sc_num} {title}_ {info_str}\n' sc_num += 1 else: tmp = f'\n{head} {title}\n\n' else: LOG.debug(f'Other tag: {src.cmd}') return (tmp, (ch_num, ep_num, sc_num))
def _rid_tag(self, src: TextList) -> TextList: LOG.info('ANALYZER: rid tags start') tmp = [] for line in assertion.is_instance(src, TextList).data: assertion.is_str(line) if line.startswith('#') or line.startswith('\n#'): continue elif line.startswith('---') or line.startswith('\n---'): continue elif line in ('\n', '\n\n'): continue else: tmp.append(line) return TextList(*tmp)
def get(self, key: str) -> SObject: if assertion.is_str(key) in self.persons: return self._persons[key] elif key in self.stages: return self._stages[key] elif key.startswith('on_') and key.replace('on_', '') in self.stages: return self._stages[key.replace('on_', '')] elif key in self.days: return self._days[key] elif key.startswith('in_') and key.replace('in_', '') in self.days: return self._days[key.replace('in_', '')] elif key in self.times: return self._times[key] elif key.startswith('at_') and key.replace('at_', '') in self.times: return self._times[key.replace('at_', '')] elif key in self.items: return self._items[key] elif key.startswith('i_') and key.replace('i_', '') in self.items: return self._items[key.replace('i_', '')] elif key in self.words: return self._words[key] elif key.startswith('w_') and key.replace('w_', '') in self.words: return self._words[key.replace('w_', '')] else: msg = f'Not found the key in DB: {key}' LOG.error(msg) return None
def _add_rubi_on_novel(self, src: RawData, rubis: dict) -> RawData: LOG.info('COMP: add_rubi_on_novel start') tmp = [] discards = [] checker = Checker() conv = Converter() for line in assertion.is_instance(src, RawData).data: if isinstance(line, FormatTag) \ or checker.has_tag_top(assertion.is_str(line)) \ or checker.is_breakline(line) \ or checker.has_tag_comment(line): tmp.append(line) else: for key, rubi in rubis.items(): if key in discards: continue elif checker.has_rubi_key(line, key): if checker.has_rubi_exclusions( line, assertion.is_instance(rubi, Rubi).exclusions): continue line = conv.add_rubi(line, key, rubi.rubi) if not rubi.is_always: discards.append(key) tmp.append(line) return RawData(*tmp)
def _out_to_file(self, src: TextList, filename: str, suffix: str, extention: str, builddir: str) -> bool: LOG.info('OUTPUT: out to file') is_succeeded = True if not os.path.isdir(assertion.is_str(builddir)): os.makedirs(builddir) fullpath = os.path.join(builddir, "{}{}.{}".format( assertion.is_str(filename), assertion.is_str(suffix), assertion.is_str(extention) )) with open(fullpath, 'w') as f: for line in assertion.is_instance(src, TextList).data: f.write(f"{line}") f.write(f'{datetime.datetime.now()}') return is_succeeded
def _rid_topspace(self, src: str): if assertion.is_str(src).startswith(' '): if src.startswith(' '): return src[4:] else: return src[1:] else: return src
def set_version(self, *args: (str, int, tuple)) -> None: if isinstance(args[0], tuple): self._version = args[0] elif len(args) >= 3 and isinstance(args[0], int) and isinstance( args[1], int) and isinstance(args[2], int): self._version = (args[0], args[1], args[2]) else: self._version = (assertion.is_str(args[0]), )
def test_is_str(self): data = [ # (val, expect) (True, "1", "1",), (False, 1, 1,), ] validate_with_fail(self, "is_str", lambda v,expect: self.assertEqual(assertion.is_str(v), expect), data)
def conv_to_mode(cls, mode: str) -> FormatMode: if assertion.is_str(mode) in ('w', 'web'): return FormatMode.WEB elif mode in ('s', 'smartphone', 'phone'): return FormatMode.SMARTPHONE elif mode in ('p', 'plain'): return FormatMode.PLAIN else: return FormatMode.DEFAULT
def __init__(self, name: str, rubi: str, exclusions: tuple = None, is_always: bool = False): super().__init__(name) self._rubi = assertion.is_str(rubi) self._exclusions = assertion.is_tuple(exclusions) if exclusions else () self._is_always = assertion.is_bool(is_always)
def calling_dict_from(calling: (str, dict), name: str) -> dict: ''' Construct a calling dictionary for Person class. ''' from builder.utils.util_str import dict_from_string tmp = {} if isinstance(calling, dict): tmp = calling else: tmp = dict_from_string(assertion.is_str(calling), ':') me = tmp['me'] if 'me' in tmp else '私' return combine_dict(tmp, {'S': name, 'M': me})
def __init__(self, name: str, month: int = 1, day: int = 1, year: int = 2020, info: str = ''): super().__init__(name) self._date = datetime.date(month=assertion.is_int(month), day=assertion.is_int(day), year=assertion.is_int(year)) self._info = assertion.is_str(info)
def validate_string_duplicate_chopped(src: str) -> str: ''' Chop a duplicated string end. NOTE: 。。。 -> 。 、、、 -> 、 、。 -> 、 ?、 -> ?\u3000 !。 -> !\u3000 ''' return re.sub(r'(。)+', r'\1', re.sub(r'(、)+', r'\1', re.sub(r'。、', r'。', re.sub(r'、。', r'、', re.sub(r'([!?!?])\u3000[、。]', r'\1', re.sub(r'([!?!?])([^ \u3000!?!?」』])', r'\1 \2', re.sub(r'([!?!?])[、。]', r'\1 ', assertion.is_str(src))))))))
def _append_object(self, key: str, *args: Any, obj: SObject) -> None: if obj is Person: tmp = Person(*args) self._persons[assertion.is_str(key)] = tmp self._tags[key] = tmp.name self._tags['n_' + key] = tmp.name self._tags['ln_' + key] = tmp.lastname self._tags['fn_' + key] = tmp.firstname self._tags['full_' + key] = tmp.fullname self._tags['exfull_' + key] = tmp.exfullname elif obj is Stage: tmp = Stage(*args) self._stages[assertion.is_str(key)] = tmp self._tags[key] = tmp.name self._tags['on_' + key] = tmp.name elif obj is Day: tmp = Day(*args) self._days[assertion.is_str(key)] = tmp self._tags[key] = tmp.name self._tags['in_' + key] = tmp.name elif obj is Time: tmp = Time(*args) self._times[assertion.is_str(key)] = tmp self._tags[key] = tmp.name self._tags['at_' + key] = tmp.name elif obj is Item: tmp = Item(*args) self._items[assertion.is_str(key)] = tmp self._tags[key] = tmp.name self._tags['i_' + key] = tmp.name elif obj is Word: tmp = Word(*args) self._words[assertion.is_str(key)] = tmp self._tags[key] = tmp.name self._tags['w_' + key] = tmp.name elif obj is Rubi: self._rubis[assertion.is_str(key)] = Rubi(key, *args) else: msg = f'Unknown a story object for appending to DB: {obj}' LOG.error(msg)
def validate_dialogue_brackets(src: str) -> str: ''' Chop invalid brackets. ''' return re.sub(r'』『', '。', re.sub(r'」「', '。', assertion.is_str(src)))
def katakana_list_from(src: str) -> list: ''' Get a katakana list. ''' return REG_KATAKANA.findall(assertion.is_str(src))
def kanji_list_from(src: str) -> list: ''' Get a kanji list. ''' return REG_KANJI.findall(assertion.is_str(src))
def hiragana_list_from(src: str) -> list: ''' Get a hiragana list. ''' return REG_HIRAGANA.findall(assertion.is_str(src))
def set_filename(self, filename: str) -> None: self._filename = assertion.is_str(filename)
def set_log_level(self, loglevel: str) -> None: self._log_level = assertion.is_str(loglevel)
def set_builddir(self, builddir: str) -> None: self._builddir = assertion.is_str(builddir)