Exemple #1
0
    def __download(self, catalog_url):
        retval = list()
        # get catalogs
        resp = download(catalog_url)
        tree = etree.HTML(resp)

        for a in tree.findall(".//a[@href]"):
            cat = self._html + a.attrib.get("href")
            if cat.find("/catalog/") > 0:
                resp = download(cat)
                catree = etree.HTML(resp)
                dataset = ""
                for a2 in catree.findall(".//a[@href]"):
                    if a2.attrib.get("href").find("best") > 0:
                        index = a2.attrib.get("href").find("=") + 1
                        dataset = a2.attrib.get("href")[index:]

                dsname = dataset.rpartition("/")[2]
                url = cat.replace("catalog.html", dsname)
                url = url.replace("/catalog/", "/iso/")

                # download iso
                iso = download_iso(url, catalog=cat, dataset=dataset)
                if iso is not None:
                    retval.append(iso)

        return retval
Exemple #2
0
    def download_isos(self):
        """
			Should download all Agg files from the html and put them in the correct glos catalog folder

		"""
        resp = download(self._html)
        tree = etree.HTML(resp)

        datasets = list()
        files = dict()

        # get all of the datasets to download
        for elm in tree.findall(".//a[@href]"):
            for key in elm.attrib.keys():
                val = elm.attrib[key]
                if val.find("dataset") > 0:
                    ds = val[val.find("=") + 1 :]
                    datasets.append(ds)

                    # download isos
        for ds in datasets:
            url = self._iso + ds
            files[ds] = download_iso(url, catalog=self._html, dataset=ds)

            # move them
        for key in files.keys():
            move_iso(files[key], self._iso_dir)
Exemple #3
0
	def download_isos(self):
		# download zip file
		resp = download(self._html)
		f = open('./pyiso/iso_tmp/geo.txt', 'w+')
		f.write(resp)
		f.close()

		# remove previous corrupted file
		try:
			os.remove(abspath(self._iso_path + 'corrupt_metadata_ids.txt'))
		except:
			pass

		for line in open('./pyiso/iso_tmp/geo.txt','r'):
			line_spl = line.split('\t')
			if line_spl[1].strip() == 'id':
				continue
			else:
				iid = int(line_spl[1].strip())
				html = self._meta + str(iid)
				# download the iso
				xmlstr = download(html)
				try:
					tree = etree.fromstring(xmlstr)
					# need the file identifier
					fi = tree.find('.//' + self._file_identifier + '/' + self._character_string)
					fname = fi.text
					path = self._iso_path + fname + self._meta_folder
					path = abspath(path)
					print '\nWriting to: ' + path
					if not os.path.exists(path):
						os.makedirs(path)

					f = open(path + '/' + self._meta_file, 'w+')
					f.write(xmlstr)
					f.close()
				except:
					path = abspath(self._iso_path + 'corrupt_metadata_ids.txt')
					f = open(path, 'a+')
					f.write('\nCorrupted/Missing xml for id: ' + str(iid))
					f.close()
Exemple #4
0
	def download_isos(self):
		resp = download(self._html+self._sources)
		tree = etree.HTML(resp)

		for source in tree.findall(".//li"):
			if source.text in self._iso_dirs:
				# get list of files in source
				dirlist = download(self._html+source.text+'/list.html')
				dirtree = etree.HTML(dirlist)
				for iso in dirtree.findall('.//li'):
					sname = download_iso(self._html+source.text+'/'+iso.text.strip())
					if source.text != 'STORET':
						move_iso(sname, self._iso_dirs[source.text])
					elif sname is not None:
						# storet needs to be compressed before being moved as a single archive
						if path.exists(self._storet_zip_dir + sname):
							remove(self._storet_zip_dir + sname)
						move('./pyiso/iso_tmp/' + sname, self._storet_zip_dir)

				if source.text == 'STORET':
					# archive the storet directory in iso_tmp and move it to the ISOs directory
					f = file(path.abspath('./pyiso/iso_tmp/storet.zip'), 'w')
					with ZipFile(f, 'w') as zip_file:
						fileiter = (f
							for root, _, files in walk(self._storet_zip_dir)
							for f in files)
						xmlfilter = (f for f in fileiter if f.endswith('.xml'))
						for xml in xmlfilter:
							zip_file.write(self._storet_zip_dir + xml, arcname=xml)

					# move the zip file
					dest = '../ISOs/' + self._iso_dirs[source.text] + '/storet.zip'
					if path.exists(dest):
						remove(dest)

					move('./pyiso/iso_tmp/storet.zip', dest)
Exemple #5
0
	def download_isos(self):
		# get datasets
		resp = download(self._html)
		tree = etree.HTML(resp)

		for a in tree.findall('.//a[@href]'):
			dataset = a.attrib.get('href')
			# will have to correct for a typo in the address
			dataset = dataset.replace('Lastest','Latest')
			if dataset.find('dataset') > 0:
				dataset = dataset[dataset.find('=')+1:]
				# get iso
				if dataset.find('Agg') > 0:
					url = self._agg + dataset + '.nc'
				else:
					url = self._latest + dataset + '.nc'

				fname = download_iso(url, catalog=self._html, dataset=dataset)
				# move iso
				move_iso(fname, self._iso)