def main(): global map_catalog global base_catalog args = parse_args() map_id = args.map_id catalog = adm.read_json(catalog_path) map_catalog = catalog['maps'] base_catalog = catalog['base'] #for k in catalog.keys(): #print(k) is_map = map_id in map_catalog is_base = map_id in base_catalog if not is_base and not is_map: print('Download URL not found in Map Catalog: %s'%args.map_id) sys.exit(1) # create init.json which sets initial coords and zoom if is_map: init = {} map = map_catalog[map_id] init['region'] = map['region'] init['zoom'] = map['zoom'] init['center_lon'] = map['center_lon'] init['center_lat'] = map['center_lat'] init_fn = viewer_path + '/init.json' adm.write_json_file(init, init_fn) installed_maps = get_installed_tiles() print('installed_maps') print(repr(installed_maps)) write_vector_map_idx_v2(installed_maps)
def do_content(this_preset_dir, noscan): global content if role_stats['kalite']['active']: content["kalite"] = {'lang_code': 'en', 'topics': []} # defaults content_file = this_preset_dir + 'content.json' if os.path.exists(content_file): old_content = adm.read_json(content_file) else: old_content = {} if noscan: content_from_menu(this_preset_dir) else: content_from_files() # read list of maps if os.path.exists(map_path): excl_maps = [''] map_list = os.listdir(map_path) for fname in map_list: content["maps"].append(fname) # preserve any kalite for now content["kalite"] = old_content.get("kalite", {}) if role_stats['kalite']['active']: lang = get_kalite_lang() content["kalite"]["lang_code"] = lang get_kalite_complete('khan/', lang) content["kalite"]["topics"] = kalite_topics adm.write_json_file(content, content_file)
def main(): global download_source global zim_path args = parse_args() zim_fname = args.zim_file if args.path: # allow override of path zim_path = args.path if args.source: # allow override of download source download_source = args.source zim_full_path = zim_path + '/' + zim_fname if not os.path.isfile(zim_full_path): print('Zim file ' + zim_full_path + ' not found.') sys.exit(1) # create temp library.xml and read back json kiwix_library_xml = "/tmp/library.xml" try: os.remove(kiwix_library_xml) except OSError: pass iiab.add_libr_xml(kiwix_library_xml, zim_path, zim_fname, '') zims_installed, _ = iiab.read_library_xml(kiwix_library_xml, kiwix_exclude_attr=['']) zim_id = list(zims_installed)[0] name_parts = zim_fname.split('_') name = '_' if name_parts[-2] in ['mini', 'nopic', 'maxi']: flavour = '_' + name_parts[-2] name = name.join(name_parts[:-2]) else: flavour = '' name = name.join(name_parts[:-1]) url = download_source + zim_fname zims_installed[zim_id]['url'] = url zims_installed[zim_id]['name'] = name zims_installed[zim_id]['flavour'] = flavour zim_json = zim_fname.replace('.zim', '.json') adm.write_json_file(zims_installed, zim_json) try: os.remove(kiwix_library_xml) except OSError: pass sys.exit()
def do_preset(this_preset_dir): preset_file = this_preset_dir + 'preset.json' preset = {} preset["name"] = "Put a name or title here" preset["description"] = "Put a longer description here" preset["default_lang"] = "en or another code" preset["location"] = "Optional location this was installed" preset["size_in_gb"] = 115 today = str(date.today()) preset["last_modified"] = today if not os.path.exists(preset_file): adm.write_json_file(preset, preset_file)
def write_vector_map_idx_v2(installed_maps): # modified from adm_lib for new maps catalog = map_catalog catalog.update(base_catalog) map_dict = {} idx_dict = {} for fname in installed_maps: map_id = fname map_dict = catalog.get(map_id, None) if not map_dict : continue # Create the idx file in format required by js-menu system item = map_dict['perma_ref'] idx_dict[item] = {} idx_dict[item]['file_name'] = fname # ? os.path.basename(map_dict['detail_url']) idx_dict[item]['menu_item'] = map_dict['perma_ref'] idx_dict[item]['size'] = map_dict['size'] idx_dict[item]['date'] = map_dict['date'] idx_dict[item]['region'] = map_dict['region'] #idx_dict[item]['language'] = map_dict['perma_ref'][:2] idx_dict[item]['language'] = 'en' adm.write_json_file(idx_dict, vector_map_idx_dir + '/vector-map-idx.json')
for topic in topics: category = topic.li.a.text print(category) items = topic.ul.find_all('li') for item in items: title = item.a.text print(title) zip_url = item.a['href'] moddir = zip_url.split('/')[-1] moddir = moddir.split('.zip')[0] if moddir[0:3] == 'tr_': moddir = moddir[3:] else: moddir = 'en-' + moddir item_info = {} item_info = deepcopy(item_base) item_info['module_id'] = str(uuid.uuid4()) item_info['moddir'] = moddir item_info['category'] = category item_info['title'] = title item_info['category'] = category item_info['lang'] = moddir[0:2] # may not work for Chinese item_info['category'] = category item_info['zip_http_url'] = zip_url gcf_catalog[moddir] = item_info adm.write_json_file(gcf_catalog, 'gcf-catalog.json')
#article_parts = page.find_all("div", {"class":"card"}) alpha_block = page.find('ul',{'class':'alpha-block-list'}) items = alpha_block.find_all(['li']) for item in items: link = item.find('a') alpha_pages.append(link['href']) # OR string.ascii_uppercase (but is string, not array) and 0-9 for sub_menu in alpha_pages: response = requests.get('https://rarediseases.info.nih.gov' + sub_menu) response.encoding = 'utf-8' # to be safe html = response.text soup = BeautifulSoup(html, "html.parser") list_block = soup.find('ul',{'class':'listing-diseases'}) items = list_block.find_all(['li']) for item in items: print(item) try: link = item.find('a') url = link['href'] disease_catalog[url] = link.text except: continue # div id = what's new for new or changed articles adm.write_json_file(disease_catalog, 'disease-catalog.json')
def main (): global verbose global download_flag oer2go_catalog = {} err_num = 0 err_str = "SUCCESS" args = parse_args() if args.verbose: verbose = True if args.no_download: download_flag = False # make sure we have menu js_menu_dir if args.menu true if args.menu: if not os.path.isdir(adm.CONST.js_menu_dir): sys.stdout.write("GET-OER2GO-CAT ERROR - iiab-menu not installed and --menu option given\n") sys.stdout.flush() sys.exit(99) # always get our catalog # failure is fatal try: url_handle = urllib.request.urlopen(adm.CONST.iiab_module_cat_url) iiab_catalog_json = url_handle.read() url_handle.close() iiab_catalog = json.loads(iiab_catalog_json) except (urllib.error.URLError) as exc: sys.stdout.write("GET-OER2GO-CAT ERROR - " + str(exc.reason) +'\n') sys.stdout.flush() sys.exit(2) # for now we will assume that old modules are still in the current catalog # get new oer2go catalog unless told not to if download_flag: err_num, err_str, oer2go_catalog = get_oer2go_cat() if err_num != 0: download_flag = False if not download_flag: # get local copy local_oer2go_catalog = adm.read_json(adm.CONST.oer2go_catalog_file) oer2go_catalog = local_oer2go_catalog['modules'] # start with iiab_catalog.json for item in iiab_catalog: moddir = item['moddir'] id = item['module_id'] module = item iiab_oer2go_catalog[moddir] = module working_dir = adm.CONST.rachel_working_dir + str(uuid.uuid4()) + "/" os.mkdir(working_dir) #os.mkdir(iiab_menu_download_dir) for item in oer2go_catalog: # structure of local and remote catalogs is different if not download_flag: # local moddir = item module = oer2go_catalog[moddir] module_id = module['module_id'] else: # remote moddir = item['moddir'] module_id = item['module_id'] module = item if moddir is None: # skip items with no moddir continue menu_item_name = moddir if str(module_id) in dup_list: msg = "Skipping module not needed by Internet in a Box" if verbose: print("%s %s %s" % (msg, module_id, moddir)) continue if module.get('type') != 'html': continue is_downloaded, has_menu_def = adm.get_module_status (module) #if args.menu and is_downloaded: if args.menu: if not has_menu_def: menu_item_name = adm.create_module_menu_def(module, working_dir, incl_extra_html = False) msg = "Generating menu files" if verbose: print("%s %s %s" % (msg, module_id, moddir)) if is_downloaded: adm.update_menu_json(menu_item_name) # only adds if not already in menu iiab_oer2go_catalog[moddir] = module # write catalog even if not downloaded as our could have changed dated_oer2go_cat = {} dated_oer2go_cat['download_date'] = time.strftime("%Y-%m-%d.%H:%M:%S") dated_oer2go_cat['modules'] = iiab_oer2go_catalog adm.write_json_file(dated_oer2go_cat, adm.CONST.oer2go_catalog_file) shutil.rmtree(working_dir) sys.stdout.write(err_str) sys.stdout.flush() sys.exit(err_num)
# derived from https://www.ncbi.nlm.nih.gov/books/NBK430685/ import os import requests from bs4 import BeautifulSoup import json import iiab.adm_lib as adm stat_pearl_catalog = {} with open('stat-pearl-catalog.html', 'r') as f: cat_html = f.read() page = BeautifulSoup(cat_html, "html.parser") items = page.find_all(['li']) for i, item in enumerate(items): # has class of form 'toc_itm_NBK430685_' but NBK430685 is not the nih code which in this case is 554556 # actually NBK430685 is the number of the catalog, not the article and is in every <li> links = item.find_all(['a', 'link']) for j, link in enumerate(links): if link.has_attr('href'): url = link['href'] if 'article-' in url: article_id = url.split('article-')[1].split('/')[0] article_info = {'url': url, 'title': link.text} stat_pearl_catalog[article_id] = article_info adm.write_json_file(stat_pearl_catalog, 'stat-pearl-catalog.json')