Esempio n. 1
0
    def __init__(self, config: Config, callback=None):
        self.config = config
        self.count: int = 0
        self.callback = self.result_callback if callback is None else callback

        # prepare all words and hash values:
        self.groups: tuple = ordered_unique(
            list_to_bytes(
                chain([
                    self._prep_group(g, True) for g in self.config.groups_raw
                ], [self._prep_group(g, False)
                    for g in self.config.groups]))) or (b'', )

        self.words: tuple = ordered_unique(
            list_to_bytes(
                chain(
                    *[
                        self._prep_word(w, True, False)
                        for w in self.config.words_raw
                    ], *[
                        self._prep_word(w, False, False)
                        for w in self.config.words
                    ])))
        self.words_tr: tuple = ordered_unique(
            list_to_bytes(
                chain(
                    *[
                        self._prep_word(w, True, True)
                        for w in self.config.words_raw
                    ], *[
                        self._prep_word(w, False, True)
                        for w in self.config.words
                    ])))
        if len(self.words_tr) == len(self.words):
            self.words_tr = self.words  # nothing changed, keep original instance

        self.prefixes: tuple = ordered_unique(
            list_to_bytes(self.config.prefixes)) or (b'', )
        self.postfixes: tuple = ordered_unique(
            list_to_bytes(self.config.postfixes)) or (b'', )

        if self.is_product_groups:
            self.targets: dict = dict(
                (t, (t, b'', b'')) for t in self.config.targets)
        elif self.is_product_postfixes:
            self.targets: dict = dict(
                (invhash32(g, t), (t, g, b''))
                for t, g in product(self.config.targets, self.groups))
        else:
            self.targets: dict = dict(
                (invhash32(p + g, t), (t, g, p)) for t, p, g in product(
                    self.config.targets, self.postfixes, self.groups))

        self.init: int = 0 if self.is_product_prefixes else hash32(
            self.prefixes[0])
Esempio n. 2
0
    def _handle_result(self, words: tuple):
        self.count += 1
        value: int = hash32(b''.join(words), self.init)
        result = self.targets[value][0]
        group, postfix = list_to_str(self.targets[value][1:3])
        prefix = to_str(self.prefixes[0])
        words = list_to_str(words)

        if self.is_product_groups:
            (group, postfix), words = words[-2:], words[:-2]
        elif self.is_product_postfixes:
            postfix, words = words[-1], words[:-1]
        if self.is_product_prefixes:
            prefix, words = words[0], words[1:]

        self.callback(self, len(words), prefix, words, postfix, group, result)
Esempio n. 3
0
def scan_mjs(filename:str, *, debug_mode:bool=False, pre_greedy:bool=False):
    mjsreader = MjsReader(filename, encoding='utf-8', debug_mode=debug_mode, pre_greedy=pre_greedy)
    mjsreader.read()
    source = os.path.basename(filename)
    for sig in [*mjsreader.var_hashes.values(), *mjsreader.func_hashes.values()]:
        hashvalue = '        ' if sig.hash is None else f'{sig.hash:08x}'
        name     = '    ' if sig.name is None else sig.name
        typename = '    '
        if isinstance(sig, FunctionSig) and sig.is_void:
            typename = Typedef.VOID.value
        else:
            NAMES = ('VOID', 'INT', 'FLOAT', 'STRING', 'INT_ARRAY', 'FLOAT_ARRAY', 'STRING_ARRAY')
            for N in NAMES:
                if sig.type.name == N:
                    if sig.type is MjoType.INT:
                        typename = Typedef.INT_UNK.value
                    else:
                        typename = getattr(Typedef, N).value
                    break
        # if sig.type.name == Typedef.INT.name: typename = Typedef.INT.value
        # typename = '    ' if sig.type is None else sig.type.value
        print(f'{S.BRIGHT}{F.RED}{hashvalue}{S.RESET_ALL}\t{S.BRIGHT}{F.CYAN}{source}{S.RESET_ALL}\t{S.BRIGHT}{F.BLUE}{typename}{S.RESET_ALL}\t{S.BRIGHT}{F.YELLOW}{name}{S.RESET_ALL}', end='')
        if sig.group is not None:
            if sig.group == GROUP_LOCAL:
                groupname = '@'
            else:
                groupname = '' if not sig.group else f'@{sig.group}'
            print(f'{S.DIM}{F.GREEN}{groupname}{S.RESET_ALL}', end='')
        if isinstance(sig, FunctionSig):
            # if not sig.arguments:
            #     print(f'\t{""}', end='')
            # else:
            #     print(f'\t{sig.args_str}', end='')
            if not sig.arguments:
                print(f'({S.BRIGHT}{F.BLUE}void{S.RESET_ALL})', end='')
            else:
                print(f'({S.BRIGHT}{F.CYAN}{sig.args_str}{S.RESET_ALL})', end='')
        print()
    for grp in mjsreader.group_names:
        hashvalue = hash32(f'$main@{grp}')
        name = grp
        print(f'{S.BRIGHT}{F.RED}{hashvalue:08x}{S.RESET_ALL}\t{S.BRIGHT}{F.CYAN}{source}{S.RESET_ALL}\t{S.BRIGHT}{F.GREEN}{name}{S.RESET_ALL}')
    print('Total:', len([*mjsreader.var_hashes.values(), *mjsreader.func_hashes.values(), *mjsreader.group_names]))
    def read_file(reader: csv.DictReader):
        if args.write_unknown:
            unkwriter = open('syscalls_unknown_cached.txt',
                             'wt+',
                             encoding='utf-8')
        if args.write_collisions:
            clnwriter = open('syscalls_collisions_cached.txt',
                             'wt+',
                             encoding='utf-8')
        status_counts = OrderedDict(
        )  #[(c,0) for c in Status.__members__.values()])
        return_counts = OrderedDict()
        keyword_counts = OrderedDict()
        letter_counts = OrderedDict()

        for row in reader:
            hashvalue: int = int(row[Field.HASH.value], 16)
            address: str = row[Field.ADDRESS.value]  # pylint: disable=unused-variable
            retvalue: str = row[Field.RETURN.value]
            name: str = row[Field.NAME.value]
            arguments: str = row[Field.ARGUMENTS.value]  # pylint: disable=unused-variable
            status: Status = Status(row[Field.STATUS.value])
            notes: str = row[Field.NOTES.value]  # pylint: disable=unused-variable

            # name corrections:
            if name and name[0] != '$':  # syscalls don't include '$' prefix
                name = f'${name}'
            cleanname: str = name.strip('#@%$_')
            fullname = f'{name}@{GROUP_SYSCALL}'
            if retvalue in (
                    'file', 'page',
                    'sprite'):  # older sheets before adding '*' for ptr types
                retvalue = f'{retvalue}*'

            # name lookups:
            rettype = TYPEDEF_LOOKUP[retvalue]

            # if status not in (Status.UNHASHED, Status.CONFIRMED):
            if args.write_unknown and status not in (Status.UNHASHED,
                                                     Status.CONFIRMED,
                                                     Status.COLLISION):
                unkwriter.write(f'{hashvalue:08x} ')
            if args.write_collisions and status is Status.COLLISION:
                clnwriter.write(f'{hashvalue:08x} ')

            # validation/errors/warnings:
            if name and status in (Status.UNHASHED, Status.COLLISION,
                                   Status.LIKELY, Status.CONFIRMED):
                fullhash = hash32(fullname)
                if fullhash != hashvalue:
                    print(
                        f'{S.BRIGHT}{F.RED}ERROR:{S.RESET_ALL} hashvalue mismatch! {hashvalue:08x} vs {fullhash:08x} : {name}'
                    )
            if name and (True or status in (Status.UNHASHED, Status.COLLISION,
                                            Status.LIKELY, Status.CONFIRMED)):
                # postfix_str = MjoType.getpostfix_fromname(name)
                postfix = MjoType.frompostfix_name(
                    name, allow_unk=True, allow_alt=True
                )  # allow '!' (for $rand!) and '~' (%Op_internalCase~)
                if rettype not in (Ellipsis,
                                   MjoType.UNKNOWN) and rettype != postfix:
                    print(
                        f'{S.BRIGHT}{F.RED}ERROR:{S.RESET_ALL} return/postfix mismatch! {hashvalue:08x} : {name}'
                    )

            # statistics:
            status_counts.setdefault(status, 0)
            status_counts[status] += 1
            return_counts.setdefault(retvalue, 0)
            return_counts[retvalue] += 1
            if status in (Status.UNHASHED, Status.COLLISION, Status.CONFIRMED):
                kwds = [n for n in cleanname.split('_') if n]
                for i, kwd in enumerate(kwds):
                    # [total, prefix, middle, postfix]
                    keyword_counts.setdefault(kwd, [0, 0, 0, 0])
                    keyword_counts[kwd][0] += 1
                    #NOTE: entire words are treated as prefix
                    if i == 0:
                        keyword_counts[kwd][1] += 1
                    elif i + 1 < len(kwds):
                        keyword_counts[kwd][2] += 1
                    else:
                        keyword_counts[kwd][3] += 1
                    # letter stats:
                    for j, c in enumerate(kwd):
                        # [total, word prefix, word middle, word postfix]
                        letter_counts.setdefault(c, [0, 0, 0, 0])
                        letter_counts[c][0] += 1
                        if j == 0:
                            letter_counts[c][1] += 1
                        elif j + 1 < len(kwd):
                            letter_counts[c][2] += 1
                        else:
                            letter_counts[c][3] += 1
        if args.write_unknown:
            unkwriter.flush()
            unkwriter.close()
            del unkwriter
        if args.write_collisions:
            clnwriter.flush()
            clnwriter.close()
            del clnwriter
        # print statistics:
        max_len = max([len(k.value) for k in status_counts.keys()] +
                      [len(k) for k in return_counts.keys()])

        # print status statistics:
        if show_status:
            total = sum(status_counts.values())
            total_cat = sum(
                [c for s, c in status_counts.items() if s is not Status.NONE])
            print(f'{S.BRIGHT}{F.BLUE}CATEGORIES: [STATUS]{S.RESET_ALL}')
            for k in Status.__members__.values():
                cnt = status_counts.get(k, 0)
                #for k,cnt in status_counts.items():
                if cnt == 0: print(f'{S.BRIGHT}{F.BLACK}', end='')
                print(f' {k.value.ljust(max_len)} : {cnt:d}{S.RESET_ALL}')
            print(
                f'{S.BRIGHT}{F.WHITE} {"total".ljust(max_len)} : {total_cat:d}/{total:d}{S.RESET_ALL}'
            )

        # print return type statistics:
        if show_returns:
            for t, keys in TYPEDEFS.items():
                name = 'OTHER' if t is Ellipsis else t.name
                print(f'{S.BRIGHT}{F.BLUE}RETURNS: [{name}]{S.RESET_ALL}')
                for k in keys:
                    cnt = return_counts.get(
                        k, 0)  # '*' now included for pointer int types
                    if cnt == 0: print(f'{S.BRIGHT}{F.BLACK}', end='')
                    print(f' {k.ljust(max_len)} : {cnt:d}{S.RESET_ALL}')

        # print keyword statistics
        max_kwd_len = max([len(k) for k in keyword_counts.keys()])
        COLS = (f'{S.BRIGHT}{F.GREEN}', f'{S.BRIGHT}{F.YELLOW}',
                f'{S.BRIGHT}{F.RED}', f'{S.DIM}{F.BLACK}')
        if keywords_min is not None:
            # kwds_sorted = list(keyword_counts.keys())
            # print(f' {S.BRIGHT}{F.BLUE}{"keyword".ljust(max_kwd_len)}{S.RESET_ALL} : {S.BRIGHT}{F.BLUE}total{S.RESET_ALL} {S.DIM}{F.GREEN}pre {F.YELLOW}mid {F.RED}post{S.RESET_ALL}  {S.BRIGHT}{F.BLUE}i  %  $  #  %#  $#{S.RESET_ALL}')
            # print('{S.BRIGHT}{F.BLUE}',end='')
            # print('')
            # print(f' keyword      : total pre mid post')
            # print(' sprite        : 37    37  37  37')
            # """
            #  keyword       : total pre mid post  i  %  $  #  %#  $#
            # KEYWORDS: [37]
            #  sprite        : 37,  [37, 0,  0  ]  00,
            # KEYWORDS: [28]
            #  set           : 28,  [19, 2,  7  ]
            # KEYWORDS: [27]
            # """
            keyword_counts_sorted = {}
            for k, cnt in keyword_counts.items():
                keyword_counts_sorted.setdefault(cnt[0], []).append((k, cnt))
            cnts = list(keyword_counts_sorted.keys())
            cnts.sort(reverse=True)
            print()
            print(
                f' {S.BRIGHT}{F.BLUE}{"keyword".ljust(max_kwd_len)}{S.RESET_ALL} : {S.BRIGHT}{F.BLUE}total{S.RESET_ALL} {S.DIM}{F.GREEN}pre {F.YELLOW}mid {F.RED}post{S.RESET_ALL}'
            )
            for cnt in cnts:
                if cnt <= keywords_min:
                    continue
                print(f'{S.DIM}{F.CYAN}KEYWORDS: [{cnt}]{S.RESET_ALL}')
                kwds = keyword_counts_sorted[cnt]
                # kwds.sort()
                kwds.sort(key=lambda k: k[0])
                for k, cnts in kwds:
                    # print(f' {k.ljust(max_kwd_len)} : {cnt:<3d}{S.RESET_ALL}   ',end='')
                    # {cnts[1:]}
                    cnts_parts = ''  #[]
                    # cnts_parts = []
                    for j, cntx in enumerate(cnts[1:]):
                        comma = ',' if j < 2 else ''
                        # just = 4 if j < 2 else 3
                        if not cntx:
                            j, cntx = -1, '0'
                            # j,cntx = -1,'-'
                        # cnts_parts.append(f'{COLS[j]}{cntx:<3}{S.RESET_ALL}')
                        # cnt_part = f"{cntx}{comma}".ljust(just)
                        # cnts_parts += f'{COLS[j]}{f"{cntx}{comma}".ljust(just))}'
                        cnts_parts += f'{COLS[j]}{cntx}{S.DIM}{F.BLACK}{comma}{S.RESET_ALL}' + (
                            ' ' * (3 - len(str(cntx))))
                        # cnts_parts.append(f'{COLS[j]}{cntx:<3}{S.RESET_ALL}')
                        # cnts_parts += (',' if j < 2 else '')
                        # print(f'{COLS[j]}{cntx:<3}{S.RESET_ALL} ', end='')
                        # print(f'{(COLS[j] if cntx else COLS[-1])}{cntx:<3d}{S.RESET_ALL} ', end='')
                    # print(f' {k.ljust(max_kwd_len)} : {f"{cnt},".ljust(4)}{S.RESET_ALL} [{" ".join(cnts_parts)}]')
                    # print(f' {k.ljust(max_kwd_len)} : {f"{cnt},".ljust(4)}{S.RESET_ALL} [{cnts_parts}]')
                    print(
                        f' {k.ljust(max_kwd_len)} : {cnt}{S.DIM}{F.BLACK},{S.RESET_ALL}{"".ljust(3-len(str(cnt)))} [{cnts_parts}]'
                    )
                    # print(f' {k.ljust(max_kwd_len)} : {cnt:<3d}{S.RESET_ALL}  [{" ".join(cnts_parts)}]')
                    # print()
            # for cnt,kwds in keyword_counts_sorted.items():
            #     print(f'{S.BRIGHT}{F.BLUE}KEYWORDS: [{cnt}]{S.RESET_ALL}')
            #     kwds.sort()
            #     print(f' {k.ljust(max_kwd_len)} : {cnt:d}{S.RESET_ALL}')

        # letter statistics:
        if letter_sort is not None:
            max_kwd_len = len("letter")
            # print(letter_counts)
            for c in string.ascii_lowercase + string.digits:
                letter_counts.setdefault(
                    c,
                    [0, 0, 0, 0])  # add any letters not appearing in syscalls
            letter_counts_alpha = list(letter_counts.items())
            letter_counts_alpha.sort(key=lambda pair: pair[0])

            #mean, median, mode, stdev, variance
            # def sort_letter(pair:tuple):
            #     l:str = pair[0]
            #     if l in string.ascii_lowercase:
            #         return chr(ord(l) - ord('a'))
            #     if l in string.ascii_lowercase:
            #         return chr(ord(l) - ord('a'))
            # sort to place in order of: lowercase, uppercase, digits
            letter_counts_alpha.sort(key=lambda pair: ('\x80' + pair[0]) if
                                     pair[0].isdigit() else pair[0].swapcase())
            letter_counts_ordered = list(letter_counts_alpha)
            letter_counts_ordered.sort(key=lambda pair: pair[1][0],
                                       reverse=True)
            letter_counts_nonzero_nums = [
                cnts[0] for l, cnts in letter_counts_ordered if cnts[0]
            ]
            letter_counts_nonzero = [
                l for l, cnts in letter_counts_ordered if cnts[0]
            ]
            letter_counts_zero = [
                l for l, cnts in letter_counts_ordered if not cnts[0]
            ]
            if letter_sort is True:
                letter_counts_alpha = letter_counts_ordered
                # letter_counts_alpha = list(letter_counts_ordered)
            print()
            print(f'{S.DIM}{F.CYAN}LETTERS: [FREQUENCY]{S.RESET_ALL}')
            max_letter_count = max(letter_counts_nonzero_nums)
            print(
                f' appear: ', end=''
            )  #{S.BRIGHT}{F.WHITE}{"".join(letter_counts_nonzero)}{S.RESET_ALL}')#, end='')
            # cnt_mode = -1
            for l in letter_counts_nonzero:
                cnt = letter_counts[l][0]
                # if cnt_mode
                if cnt >= max_letter_count / 3:
                    print(f'{S.BRIGHT}{F.WHITE}', end='')
                elif cnt >= max_letter_count / 6:
                    print(f'{S.NORMAL}{F.WHITE}', end='')
                elif cnt >= max_letter_count / 25:
                    print(f'{S.DIM}{F.WHITE}', end='')
                else:
                    print(f'{S.BRIGHT}{F.BLACK}', end='')
                print(f'{l}{S.RESET_ALL}', end='')
            print()
            # print(f' appear: {S.BRIGHT}{F.WHITE}{"".join(letter_counts_nonzero)}{S.RESET_ALL}')#, end='')
            # print()
            print(
                f'  never: {S.BRIGHT}{F.BLACK}{"".join(letter_counts_zero)}{S.RESET_ALL}'
            )  #, end='')
            # print()
            print(f'    max: {max(letter_counts_nonzero_nums)}')
            print(f' median: {statistics.median(letter_counts_nonzero_nums)}')
            print(f'    min: {min(letter_counts_nonzero_nums)}')
            print(f'    sum: {sum(letter_counts_nonzero_nums)}')
            print(f'   mean: {statistics.mean(letter_counts_nonzero_nums):g}')
            # print(f'      mode: {repr(Counter(letter_counts_nonzero_nums).most_common(1)[0])[1:-1]}')
            print(f'  stdev: {statistics.stdev(letter_counts_nonzero_nums):g}')
            print()
            for l, cnts in letter_counts_ordered:
                cnt = cnts[0]

            print(
                f' {S.BRIGHT}{F.BLUE}{"letter".ljust(max_kwd_len)}{S.RESET_ALL} : {S.BRIGHT}{F.BLUE}total{S.RESET_ALL}  {S.DIM}{F.GREEN}pre  {F.YELLOW}mid  {F.RED}post{S.RESET_ALL}'
            )
            print(f'{S.DIM}{F.CYAN}LETTERS: [COUNTS]{S.RESET_ALL}')
            for k, cnts in letter_counts_alpha:  #string.ascii_lowercase:
                # cnts = letter_counts.get(k, (0, 0, 0, 0))
                cnt = cnts[0]
                cnts_parts = ''  #[]
                # cnts_parts = []
                color = f'' if cnt else f'{S.BRIGHT}{F.BLACK}'
                for j, cntx in enumerate(cnts[1:]):
                    comma = ',' if j < 2 else ''
                    # just = 5 if j < 2 else 4
                    if not cntx:
                        j, cntx = -1, '0'
                        # j,cntx = -1,'-'
                    # cnts_parts.append(f'{COLS[j]}{cntx:<3}{S.RESET_ALL}')
                    # cnt_part = f"{cntx}{comma}".ljust(just)
                    # cnts_parts += f'{COLS[j]}{f"{cntx}{comma}".ljust(just))}'
                    cnts_parts += f'{COLS[j]}{cntx}{S.DIM}{F.BLACK}{comma}{S.RESET_ALL}' + (
                        ' ' * (4 - len(str(cntx))))
                    # cnts_parts.append(f'{COLS[j]}{cntx:<3}{S.RESET_ALL}')
                    # cnts_parts += (',' if j < 2 else '')
                    # print(f'{COLS[j]}{cntx:<3}{S.RESET_ALL} ', end='')
                    # print(f'{(COLS[j] if cntx else COLS[-1])}{cntx:<3d}{S.RESET_ALL} ', end='')
                # print(f' {k.ljust(max_kwd_len)} : {f"{cnt},".ljust(4)}{S.RESET_ALL} [{" ".join(cnts_parts)}]')
                # print(f' {k.ljust(max_kwd_len)} : {f"{cnt},".ljust(4)}{S.RESET_ALL} [{cnts_parts}]')
                print(
                    f' {color}{k.ljust(max_kwd_len)} : {cnt}{S.DIM}{F.BLACK},{S.RESET_ALL}{"".ljust(4-len(str(cnt)))} {color}[{S.RESET_ALL}{cnts_parts}{color}]{S.RESET_ALL}'
                )
Esempio n. 5
0
def main(argv: list = None) -> int:
    ## PARSER SETUP ##

    DEFAULT_MJS = [
        f'../data/mjs/{f}' for f in [
            'adv.mjh.bak',
            'console.mjs.old',
            'console.mjs.old2',
        ]
    ]
    # scenario.arc/originals.7z/* from "Ame no Marginal -Rain Marginal-"" (EN)
    #  this data is not included in the repository for obvious reasons
    AME_ORIGINALS = [
        f'../data/copyright/ame_originals/{f}' for f in [
            'おまけ.txt',
            'オリジナル0.txt',
            'オリジナル01.txt',
            'オリジナル02.txt',
            'オリジナル03.txt',
            'オリジナル04.txt',
            'オリジナル05.txt',
            'オリジナル06.txt',
            'オリジナル07.txt',
            'オリジナル08.txt',
            'オリジナル09.txt',
            'オリジナル010.txt',
            'スタッフとも.txt',
        ]
    ]

    import argparse
    parser = argparse.ArgumentParser(
        description=
        'Read Majiro Engine hashes and names from a multitude of files and write to py+json output files',
        add_help=True)

    # pgroup = parser.add_mutually_exclusive_group()
    parser.add_argument('-i',
                        '--input',
                        dest='inputs',
                        metavar='MJSFILE',
                        nargs='+',
                        default=[],
                        required=False,
                        help='parse hashes and groups from mjs/mjh files')
    parser.add_argument(
        '-I',
        '--input-mjs',
        dest='inputs2',
        action='store_const',
        const=DEFAULT_MJS,
        default=[],
        required=False,
        help='parse hashes and groups from all repo mjs/mjh files')
    parser.add_argument(
        '-A',
        '--ame-mjs',
        dest='inputs3',
        action='store_const',
        const=AME_ORIGINALS,
        default=[],
        required=False,
        help=
        'parse hashes and groups from all repo \"Ame no Marginal original\" script files (not included)'
    )
    parser.add_argument(
        '-P',
        '--python',
        dest='python',
        default=False,
        action='store_true',
        required=False,
        help='read hashes from python mjotool.known_hashes module')
    parser.add_argument(
        '-G',
        '--google',
        dest='google',
        default=False,
        action='store_true',
        required=False,
        help='read hashes, groups, and callbacks from Google Sheets')
    parser.add_argument(
        '-U',
        '--update',
        dest='update',
        default=False,
        action='store_true',
        required=False,
        help='update Google Sheet cached files and always download new copies')
    parser.add_argument('-H',
                        '--hashes',
                        metavar='JSONFILE',
                        nargs='+',
                        default=[],
                        required=False,
                        help='parse user-defined hashes from json files')
    parser.add_argument('-s',
                        '--syscalls',
                        metavar='JSONFILE',
                        nargs='+',
                        default=[],
                        required=False,
                        help='parse syscall hashes from json files')
    parser.add_argument('-g',
                        '--groups',
                        metavar='JSONFILE',
                        nargs='+',
                        default=[],
                        required=False,
                        help='parse groups from json files')
    parser.add_argument('-c',
                        '--callbacks',
                        metavar='JSONFILE',
                        nargs='+',
                        default=[],
                        required=False,
                        help='parse callbacks from json files')
    parser.add_argument(
        '-a',
        '--all',
        metavar='JSONFILE',
        nargs='+',
        default=[],
        required=False,
        help=
        'parse "syscalls", "functions", "variables", and "groups" from json files'
    )

    pgroup = parser.add_mutually_exclusive_group()
    pgroup.add_argument('--csv',
                        dest='format',
                        default='csv',
                        action='store_const',
                        const='csv',
                        required=False,
                        help='csv Google Sheets format')
    pgroup.add_argument('--tsv',
                        dest='format',
                        action='store_const',
                        const='tsv',
                        required=False,
                        help='tsv Google Sheets format')

    parser.add_argument('-q',
                        '--quiet-includes',
                        dest='verbose_includes',
                        action='store_false',
                        default=True,
                        required=False,
                        help='disable printing of included sources')
    parser.add_argument(
        '-T',
        '--test',
        dest='test_name',
        action='store_const',
        default='',
        const='__test',
        required=False,
        help='write hashes to files with \"__test\" appended to the name')

    ###########################################################################

    args = parser.parse_args(argv)

    # print(args)
    # return 0

    ## VARIABLE SETUP ##

    callback_names: Set[str] = set()
    group_names: Set[str] = set()
    var_hashes: Dict[int, str] = {}
    func_hashes: Dict[int, str] = {}
    sys_hashes: Dict[int, str] = {}
    sys_list: List[int] = []

    # predefined known hashes that won't show up as declarations
    EXTRA_VAR_HASHES = {0xa704bdbd: "__SYS__NumParams@"}
    EXTRA_FUNC_HASHES = {}
    EXTRA_SYS_HASHES = {}  # nothing yet

    var_hashes.update(EXTRA_VAR_HASHES)
    func_hashes.update(EXTRA_FUNC_HASHES)
    sys_hashes.update(EXTRA_SYS_HASHES)

    ###########################################################################

    ## DEFAULT BEHAVIOR ##

    if not args.google and not args.python and not (
            args.inputs + args.inputs2 + args.inputs3 + args.hashes +
            args.syscalls + args.groups + args.callbacks + args.all):
        for scriptfile in DEFAULT_MJS:
            load_mjs_hashes(scriptfile,
                            var_hashes,
                            func_hashes,
                            group_names,
                            verbose=args.verbose_includes)

    ## LOAD FILES ##

    for scriptfile in set(args.inputs + args.inputs2 + args.inputs3):
        load_mjs_hashes(scriptfile,
                        var_hashes,
                        func_hashes,
                        group_names,
                        verbose=args.verbose_includes)

    for jsonfile in args.hashes:
        # combination of all user-defined hashes (these can be seperated later by the prefix)
        user_hashes: Dict[int, str] = {}
        load_json_hashes(jsonfile, user_hashes, verbose=args.verbose_includes)
        for k, v in user_hashes.items():
            if v.fullname == '$':
                func_hashes[k] = v
            else:
                var_hashes[k] = v
        del user_hashes

    for jsonfile in args.syscalls:
        load_json_hashes(jsonfile, sys_hashes, verbose=args.verbose_includes)

    for jsonfile in args.groups:
        load_json_groups(jsonfile, group_names, verbose=args.verbose_includes)

    for jsonfile in args.callbacks:
        load_json_callbacks(jsonfile,
                            callback_names,
                            verbose=args.verbose_includes)

    for jsonfile in args.all:
        load_json_all(jsonfile,
                      var_hashes,
                      func_hashes,
                      sys_hashes,
                      group_names,
                      callback_names,
                      verbose=args.verbose_includes)

    if args.python:
        sys_list = []
        load_python_hashes(var_hashes,
                           func_hashes,
                           sys_hashes,
                           group_names,
                           callback_names,
                           sys_list,
                           verbose=args.verbose_includes)

    if args.google:
        sys_list = []
        load_sheets_all(var_hashes,
                        func_hashes,
                        sys_hashes,
                        group_names,
                        callback_names,
                        sys_list,
                        format=args.format,
                        update=args.update,
                        allow_collisions=('%Op_internalCase~@MAJIRO_INTER', ),
                        verbose=args.verbose_includes)

    # add main function hashes for all known groups (even if they aren't used)
    for group in group_names:
        funcname = f'$main@{group}'
        func_hashes[hash32(funcname)] = funcname

    # generate hash lookups used for following types,
    #  group hashes are stored as a hash with the `$main` function for easy `#group` preprocessor identification
    group_hashes: Dict[int, str] = dict(
        (hash32(f'$main@{g}'), g) for g in group_names)
    callback_hashes: Dict[int, str] = dict(
        (hash32(c), c) for c in callback_names)

    ###########################################################################

    # datasets to initialize with list comprehension
    def _fmt_local(name: str) -> str:
        return name[:-1] if (len(name) > 1 and name[-1] == '@') else name

    def _fmt_syscall(name: str) -> str:
        return name[:-len('@MAJIRO_INTER')] if name.endswith(
            '@MAJIRO_INTER') else name

    def _strip_group(name: str) -> str:
        idx_at = name.rfind('@', 1)
        return name[:idx_at] if (idx_at != -1) else name

    def _fmt_names(names: Dict[int, str],
                   fmt_func) -> Dict[int, Tuple[str, str]]:
        return dict(
            (k, (fmt_func(v), _strip_group(v))) for k, v in names.items())

    def fmt_locals(names: Dict[int, str]) -> Dict[int, Tuple[str, str]]:  # pylint: disable=unused-variable
        return _fmt_names(names, _fmt_local)

    def fmt_syscalls(names: Dict[int, str]) -> Dict[int, Tuple[str, str]]:
        return _fmt_names(names, _fmt_syscall)

    def fmt_sort(names: Dict[int, str]) -> Dict[int, Tuple[str, str]]:
        return _fmt_names(names, str)

    def fmt_none(names: Dict[int, str]) -> Dict[int, Tuple[str, str]]:
        return dict((k, (v, v)) for k, v in names.items())

    PY_HASHES = (
        ('local_vars', var_hashes, ('_', ), fmt_sort,
         'hashes for all four variable types: local, thread, savefile, persistent\n'
         ),  #fmt_locals),
        ('thread_vars', var_hashes, ('%', ), fmt_sort, None),
        ('savefile_vars', var_hashes, ('@', ), fmt_sort, None),
        ('persistent_vars', var_hashes, ('#', ), fmt_sort, None),
        #('variables', var_hashes, (), fmt_sort, None),
        #('usercalls', func_hashes, (), fmt_sort, None),
        ('functions', func_hashes, (), fmt_sort,
         '\n==HR==\n\nhashes for user-defined and system-defined internal functions\n'
         ),
        ('syscalls', sys_hashes, (), fmt_syscalls,
         '\nsystem call hashes all use the group name `$syscall@MAJIRO_INTER`'
         ),
        ('groups', group_hashes, (), fmt_none,
         '\n==HR==\n\ngroup hashes are listed as the hash of `$main@GROUPNAME`,\n this is done in order to identify a file\'s common group from the entrypoint function hash'
         ),
        ('callbacks', callback_hashes, (), fmt_none,
         'event "callback" names used with `$event_*` system calls'),
    )

    JSON_HASHES = (
        ('variables', var_hashes, (), fmt_sort, None),
        #('usercalls', func_hashes, (), fmt_sort, None),
        ('functions', func_hashes, (), fmt_sort, None),
        ('syscalls', sys_hashes, (), fmt_syscalls, None),
        ('groups', group_hashes, (), fmt_none, None),
        ('callbacks', callback_hashes, (), fmt_none, None),
    )
    GROUPS = ((
        'syscall', 'syscalls_list', sys_list, int,
        '==HR==\n\nlist of all system call hash values, whether or not a name is known for the hash'
    ),
              #('groups', 'groups', group_names, None),
              )
    if not sys_list:
        GROUPS = ()

    if args.verbose_includes:
        print()
    NAMES = (('syscalls', sys_hashes), ('functions', func_hashes),
             ('variables', var_hashes), ('groups', group_names),
             ('callbacks', callback_names))
    print(
        f'{S.BRIGHT}{F.BLUE}Found:{S.RESET_ALL}',
        ', '.join(f"{S.BRIGHT}{F.WHITE}{len(v)} {k}{S.RESET_ALL}"
                  for k, v in NAMES) + ',',
        f'and {S.BRIGHT}{F.WHITE}{len(sys_list)} total syscall hashes{S.RESET_ALL}'
    )

    ###########################################################################

    print()

    # write python for our library
    hash_items = [
        HashSelection(
            n.replace('_', ' ').rstrip('s'), n.upper(), f'{n.upper()}_LOOKUP',
            fn(d), p, c) for n, d, p, fn, c in PY_HASHES
    ]
    group_items = [
        GroupSelection(n1.replace('_', ' ').rstrip('s'), n2.upper(), l, t, c)
        for n1, n2, l, t, c in GROUPS
    ]
    filename = f'../src/mjotool/known_hashes/_hashes{args.test_name}.py'
    print(
        f'{S.BRIGHT}{F.GREEN}Writing:{S.RESET_ALL} {S.BRIGHT}{F.BLUE}{filename}{S.RESET_ALL}'
    )
    with open(filename, 'wt+', encoding='utf-8') as writer:
        write_python_file(writer,
                          hash_items,
                          group_items,
                          readable=False,
                          sort=True)
        writer.flush()

    # write compact af json for non-humans
    hash_items = [
        HashSelection(n, n, f'{n}_lookup', fn(d), p, c)
        for n, d, p, fn, c in JSON_HASHES
    ]
    group_items = [
        GroupSelection(n1, n2, l, t, c) for n1, n2, l, t, c in GROUPS
    ]
    filename = f'../data/known_hashes_compact{args.test_name}.json'
    print(
        f'{S.BRIGHT}{F.GREEN}Writing:{S.RESET_ALL} {S.BRIGHT}{F.CYAN}{filename}{S.RESET_ALL}'
    )
    with open(filename, 'wt+', encoding='utf-8') as writer:
        write_json_file(writer,
                        hash_items,
                        group_items,
                        tab='\t',
                        readable=False,
                        sort=True)
        writer.flush()
    # write """readable""" json for everybody else
    filename = f'../data/known_hashes_readable{args.test_name}.json'
    print(
        f'{S.BRIGHT}{F.GREEN}Writing:{S.RESET_ALL} {S.BRIGHT}{F.CYAN}{filename}{S.RESET_ALL}'
    )
    with open(filename, 'wt+', encoding='utf-8') as writer:
        write_json_file(writer,
                        hash_items,
                        group_items,
                        tab='\t',
                        readable=True,
                        sort=True)
        writer.flush()

    print(f'{S.BRIGHT}{F.WHITE}[Finished]{S.RESET_ALL}')
    return 0