def __download(self, catalog_url): retval = list() # get catalogs resp = download(catalog_url) tree = etree.HTML(resp) for a in tree.findall(".//a[@href]"): cat = self._html + a.attrib.get("href") if cat.find("/catalog/") > 0: resp = download(cat) catree = etree.HTML(resp) dataset = "" for a2 in catree.findall(".//a[@href]"): if a2.attrib.get("href").find("best") > 0: index = a2.attrib.get("href").find("=") + 1 dataset = a2.attrib.get("href")[index:] dsname = dataset.rpartition("/")[2] url = cat.replace("catalog.html", dsname) url = url.replace("/catalog/", "/iso/") # download iso iso = download_iso(url, catalog=cat, dataset=dataset) if iso is not None: retval.append(iso) return retval
def download_isos(self): """ Should download all Agg files from the html and put them in the correct glos catalog folder """ resp = download(self._html) tree = etree.HTML(resp) datasets = list() files = dict() # get all of the datasets to download for elm in tree.findall(".//a[@href]"): for key in elm.attrib.keys(): val = elm.attrib[key] if val.find("dataset") > 0: ds = val[val.find("=") + 1 :] datasets.append(ds) # download isos for ds in datasets: url = self._iso + ds files[ds] = download_iso(url, catalog=self._html, dataset=ds) # move them for key in files.keys(): move_iso(files[key], self._iso_dir)
def download_isos(self): # download zip file resp = download(self._html) f = open('./pyiso/iso_tmp/geo.txt', 'w+') f.write(resp) f.close() # remove previous corrupted file try: os.remove(abspath(self._iso_path + 'corrupt_metadata_ids.txt')) except: pass for line in open('./pyiso/iso_tmp/geo.txt','r'): line_spl = line.split('\t') if line_spl[1].strip() == 'id': continue else: iid = int(line_spl[1].strip()) html = self._meta + str(iid) # download the iso xmlstr = download(html) try: tree = etree.fromstring(xmlstr) # need the file identifier fi = tree.find('.//' + self._file_identifier + '/' + self._character_string) fname = fi.text path = self._iso_path + fname + self._meta_folder path = abspath(path) print '\nWriting to: ' + path if not os.path.exists(path): os.makedirs(path) f = open(path + '/' + self._meta_file, 'w+') f.write(xmlstr) f.close() except: path = abspath(self._iso_path + 'corrupt_metadata_ids.txt') f = open(path, 'a+') f.write('\nCorrupted/Missing xml for id: ' + str(iid)) f.close()
def download_isos(self): resp = download(self._html+self._sources) tree = etree.HTML(resp) for source in tree.findall(".//li"): if source.text in self._iso_dirs: # get list of files in source dirlist = download(self._html+source.text+'/list.html') dirtree = etree.HTML(dirlist) for iso in dirtree.findall('.//li'): sname = download_iso(self._html+source.text+'/'+iso.text.strip()) if source.text != 'STORET': move_iso(sname, self._iso_dirs[source.text]) elif sname is not None: # storet needs to be compressed before being moved as a single archive if path.exists(self._storet_zip_dir + sname): remove(self._storet_zip_dir + sname) move('./pyiso/iso_tmp/' + sname, self._storet_zip_dir) if source.text == 'STORET': # archive the storet directory in iso_tmp and move it to the ISOs directory f = file(path.abspath('./pyiso/iso_tmp/storet.zip'), 'w') with ZipFile(f, 'w') as zip_file: fileiter = (f for root, _, files in walk(self._storet_zip_dir) for f in files) xmlfilter = (f for f in fileiter if f.endswith('.xml')) for xml in xmlfilter: zip_file.write(self._storet_zip_dir + xml, arcname=xml) # move the zip file dest = '../ISOs/' + self._iso_dirs[source.text] + '/storet.zip' if path.exists(dest): remove(dest) move('./pyiso/iso_tmp/storet.zip', dest)
def download_isos(self): # get datasets resp = download(self._html) tree = etree.HTML(resp) for a in tree.findall('.//a[@href]'): dataset = a.attrib.get('href') # will have to correct for a typo in the address dataset = dataset.replace('Lastest','Latest') if dataset.find('dataset') > 0: dataset = dataset[dataset.find('=')+1:] # get iso if dataset.find('Agg') > 0: url = self._agg + dataset + '.nc' else: url = self._latest + dataset + '.nc' fname = download_iso(url, catalog=self._html, dataset=dataset) # move iso move_iso(fname, self._iso)