def process_obs_to_html(dataset_file): """ Looks up each Observation in the MOLES catalogue, matches phenomena to it from ES and then writes HTML pages listing them. """ lines = util.read_file_into_list(dataset_file) summary_info = [] counter = 0 for line in lines: path = line.split("=")[1].rstrip() try: print "searching path {}".format(path) results = fbs_api.get_dir_info(path) except: continue #if len(results["formats"]) > 0: #print "Formats in directory {} are {} and some files {}".format(data_path, results["formats"], results["sample_names"]) record = (line, results["formats"], results["sample_names"]) summary_info.append(record) #counter += 1 #if counter >10: # break print create_html_table(summary_info)
def process_obs_to_html(paths_page="http://catalogue.ceda.ac.uk/export/paths/"): """ Looks up each Observation in the MOLES catalogue, matches phenomena to it from ES and then writes HTML pages listing them. """ lines = urllib.urlopen(paths_page).readlines() lines.sort() n = len(lines) SPLIT = 100 page_number = 1 TEMPL = "extracted_phenomena_%02d.html" while lines: lines_to_process = lines[:SPLIT] lines = lines[SPLIT:] content = "" for i, line in enumerate(lines_to_process): if i > 50000000: lines = [] break data_path, ob_url = line.strip().split() print "Working on: %s" % data_path try: results = fbs_api.get_dir_info(data_path) if len(results["formats"]) > 0: print results["formats"] except: continue html = render_results(ob_url, data_path, results, i + 1, n) content += "\n" + html title = "File-based search review of MOLES records: %d" % page_number previous_link = TEMPL % (page_number - 1) next_link = TEMPL % (page_number + 1) page = PAGE_TEMPLATE % vars() fpath = "%s/extracted_phenomena_%02d.html" % (OUT_DIR, page_number) with open(fpath, "w") as html_writer: html_writer.write(page) print "Wrote: %s" % fpath page_number += 1