def fix_empty(taxid=9606):
    from collections import Counter
    global_settings.verbose = False
    glitchy = 0
    fine = 0
    fixed = 0
    path = os.path.join(global_settings.pickle_folder, f'taxid{taxid}')
    for pf in os.listdir(path):
        p = ProteinGatherer().load(file=os.path.join(path, pf))
        if len(p.sequence) == 0:
            print('****************************************')
            print(f'Attempting to fix {p.gene_name}')
            try:
                global_settings.verbose = True
                p.parse_uniprot()
                p.parse_swissmodel()
                p.compute_params()
                p.parse_gnomAD()
                p.get_PTM()
                assert len(p.sequence) > 0, 'Darn. Sequence is zero AA long'
                p.dump()
                fixed += 1
                global_settings.verbose = False
            except Exception:
                traceback.print_exc(file=sys.stdout)
                glitchy += 1
        else:
            fine += 1
    print('****************************************')
    print(f'Fine: {fine:,}, Fixed {fixed:,}, Glitchy: {glitchy:,}')
def iterate_taxon(taxid=9606):
    """
    This is an ad hoc fix to fix humans or similar. For full deployment use ProteomeParser.
    :param taxid:
    :return:
    """
    path = os.path.join(global_settings.pickle_folder, f'taxid{taxid}')
    for pf in os.listdir(path):
        try:
            protein = ProteinGatherer().load(file=os.path.join(path, pf))
            protein.gnomAD = []
            protein.parse_gnomAD()
            protein.get_PTM()
            protein.compute_params()
            protein.dump()
            #michelanglo_protein.get_offsets().parse_gnomAD().compute_params()
            #michelanglo_protein.dump()
        except:
            pass
           namedexfile=os.path.join(global_settings.dictionary_folder,
                                    'taxid9606-names2uniprot.json'),
           folder=os.path.join(global_settings.temp_folder, 'gnomAD')).split()


if __name__ == '__main__':
    global_settings.verbose = True  #False
    global_settings.error_tolerant = True
    global_settings.startup(data_folder='../protein-data')
    global_settings.retrieve_references(ask=False, refresh=False)
    ## Phosphosite
    #Phosphosite().split().write()
    message('Phosphosite split')
    ## Uniprot
    UniprotMasterReader(first_n_protein=0)
    message('Uniprot split')
    # gnomAD data needs to be split up after that the dictionaries are made.
    # _gnomad()
    message('gnomAD split')
    taxid = 9606  # that's humans
    path = os.path.join(global_settings.pickle_folder, f'taxid{taxid}')
    for pf in os.listdir(path):
        try:
            protein = ProteinGatherer().load(file=os.path.join(path, pf))
            protein.gnomAD = []
            protein.parse_gnomAD()
            protein.get_PTM()
            protein.dump()
        except:
            pass
    message('Done.')