def __download(self, catalog_url): retval = list() # get catalogs resp = download(catalog_url) tree = etree.HTML(resp) for a in tree.findall(".//a[@href]"): cat = self._html + a.attrib.get("href") if cat.find("/catalog/") > 0: resp = download(cat) catree = etree.HTML(resp) dataset = "" for a2 in catree.findall(".//a[@href]"): if a2.attrib.get("href").find("best") > 0: index = a2.attrib.get("href").find("=") + 1 dataset = a2.attrib.get("href")[index:] dsname = dataset.rpartition("/")[2] url = cat.replace("catalog.html", dsname) url = url.replace("/catalog/", "/iso/") # download iso iso = download_iso(url, catalog=cat, dataset=dataset) if iso is not None: retval.append(iso) return retval
def download_isos(self): """ Should download all Agg files from the html and put them in the correct glos catalog folder """ resp = download(self._html) tree = etree.HTML(resp) datasets = list() files = dict() # get all of the datasets to download for elm in tree.findall(".//a[@href]"): for key in elm.attrib.keys(): val = elm.attrib[key] if val.find("dataset") > 0: ds = val[val.find("=") + 1 :] datasets.append(ds) # download isos for ds in datasets: url = self._iso + ds files[ds] = download_iso(url, catalog=self._html, dataset=ds) # move them for key in files.keys(): move_iso(files[key], self._iso_dir)
def test_download(self): """ download from the lake michigan nowcast 3d 2006 best model """ url = 'http://64.9.200.113:8080/thredds/iso/glos/glcfs/archive2006/michigan/ncfmrc-3d/Lake_Michigan_-_Nowcast_-_3D_-_2006_best.ncd' catalog = unquote('http%3A%2F%2F64.9.200.113%3A8080%2Fthredds%2Fcatalog%2Fglos%2Fglcfs%2Farchive2006%2Fmichigan%2Fncfmrc-3d%2Fcatalog.html') dataset = unquote('glos%2Fglcfs%2Farchive2006%2Fmichigan%2Fncfmrc-3d%2FLake_Michigan_-_Nowcast_-_3D_-_2006_best.ncd') file_name = download_iso(url, catalog=catalog, dataset=dataset) assert file_name == 'Lake_Michigan_-_Nowcast_-_3D_-_2006_best.ncd.xml'
def download_isos(self): # get datasets resp = download(self._html) tree = etree.HTML(resp) for a in tree.findall('.//a[@href]'): dataset = a.attrib.get('href') # will have to correct for a typo in the address dataset = dataset.replace('Lastest','Latest') if dataset.find('dataset') > 0: dataset = dataset[dataset.find('=')+1:] # get iso if dataset.find('Agg') > 0: url = self._agg + dataset + '.nc' else: url = self._latest + dataset + '.nc' fname = download_iso(url, catalog=self._html, dataset=dataset) # move iso move_iso(fname, self._iso)
def download_isos(self): resp = download(self._html+self._sources) tree = etree.HTML(resp) for source in tree.findall(".//li"): if source.text in self._iso_dirs: # get list of files in source dirlist = download(self._html+source.text+'/list.html') dirtree = etree.HTML(dirlist) for iso in dirtree.findall('.//li'): sname = download_iso(self._html+source.text+'/'+iso.text.strip()) if source.text != 'STORET': move_iso(sname, self._iso_dirs[source.text]) elif sname is not None: # storet needs to be compressed before being moved as a single archive if path.exists(self._storet_zip_dir + sname): remove(self._storet_zip_dir + sname) move('./pyiso/iso_tmp/' + sname, self._storet_zip_dir) if source.text == 'STORET': # archive the storet directory in iso_tmp and move it to the ISOs directory f = file(path.abspath('./pyiso/iso_tmp/storet.zip'), 'w') with ZipFile(f, 'w') as zip_file: fileiter = (f for root, _, files in walk(self._storet_zip_dir) for f in files) xmlfilter = (f for f in fileiter if f.endswith('.xml')) for xml in xmlfilter: zip_file.write(self._storet_zip_dir + xml, arcname=xml) # move the zip file dest = '../ISOs/' + self._iso_dirs[source.text] + '/storet.zip' if path.exists(dest): remove(dest) move('./pyiso/iso_tmp/storet.zip', dest)