Example #1
0
redo_all_matches = True
if len(sys.argv) == 3:
    id_start = int(sys.argv[1])
    id_end = int(sys.argv[2])
else:
    id_start = 1
    id_end = 2

assert id_end >= id_start
assert id_end > 0
assert id_start > 0

# Open the manga json file and load
time_start = time.time()
t01 = time.time()
manga_data = manga_utils.read_raw_manga_data_files(dir_in)
t11 = time.time()
print("loaded " + str(len(manga_data)) + " mangas (" +
      str(round(t11 - t01, 2)) + " seconds)")

# get our list of labels (genres, demographics, themes, etc)
t02 = time.time()
labels_dict = manga_utils.get_used_labels(manga_data)
print("loaded " + str(len(labels_dict)) + " labels")
labels_vec = []
for label in sorted(labels_dict.keys()):
    # print("    " + str(labels_dict[label]) + " " + label + " in data")
    labels_vec.append(label)
labels_weights = manga_utils.get_label_ranks(labels_vec)

# print largest to smallest generes
assert id_end > 0
assert id_start > 0

# set api call settings
cookies = {}
if os.environ.get('mangadex_session'):
    cookies['mangadex_session'] = os.environ.get('mangadex_session')
if os.environ.get('mangadex_rememberme_token'):
    cookies['mangadex_rememberme_token'] = os.environ.get('mangadex_rememberme_token')
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:71.0) Gecko/20100101 Firefox/77.0'
}

# if we have a cache'ed old file load it in
manga_data = []
manga_data_old = manga_utils.read_raw_manga_data_files(dir_inout)
print("loaded " + str(len(manga_data_old)) + " from file")
time_start = time.time()

# create output direction if not exists
cache_files = False
path_cache_manga_api = "data/page_manga_api/"
path_cache_manga_ext = "data/page_manga_ext/"
if cache_files:
    manga_utils.make_dir_if_not(path_cache_manga_api)
    manga_utils.make_dir_if_not(path_cache_manga_ext)

# loop through each index page, and extract the mangas
manga_count_updated = 0
manga_count_new = 0
manga_count = id_start