verbose). This is manly for debugging purposes. --format=<format>, determines the output format for the dataset. Currently the tool supports only DSPL (Dataset publishing Language). arguments: filename, the filename to write the output to. """ % { 'regions': "\n ".join ([regions_dict.get_name(r[0]) for r in regions_dict.all()]), 'pollutants': "\n ".join ([ "%(formula)s (%(name)s)" % { 'formula': pollutants_dict.get_formula(p[0]), 'name': pollutants_dict.get_name(p[0]), } for p in pollutants_dict.all() ]), } class OptionsManager(object): """Provides options management. """ long_options = [ "help", "keep", "local", "verbosity=", "from=", "to=", "year=",
def __call__(self): if '.' not in self._out: self._out = self._out + ".zip" # disk cleanup if (os.path.exists(self._tmpdir)): shutil.rmtree(self._tmpdir, True) # TODO add something for errors os.mkdir(self._tmpdir) azip = zipfile.ZipFile(self._out, "w" ) # write output to file fullpath = os.path.join(self._tmpdir, "brace.xml") xml = open(fullpath, "wt") xml.write(build_dspl_xml()) xml.close() azip.write(fullpath) # write aggregates csv file fullpath = os.path.join(self._tmpdir, "aggregates.csv") aggrcsv = open(fullpath, "wt") aggrcsv.write("aggregate, description\n") for (id, desc) in [ ( "max", "Maximum daily concentration" ), ( "avg", "Average daily concentration" ), ]: entry = u"%(id)s, %(description)s\n" % { 'id': id, 'description': desc, } aggrcsv.write(entry) aggrcsv.close() azip.write(fullpath) # write regions csv file fullpath = os.path.join(self._tmpdir, "regions.csv") regcsv = open(fullpath, "wt") regcsv.write("region, name, latitude, longitude\n") for r in opts_mgr.regions: entry = u"%(region)s, %(region)s, %(latitude)s, %(longitude)s\n" % { 'region': regions_dict.get_name(r), 'latitude': regions_dict.get_latitude(r), 'longitude': regions_dict.get_longitude(r), } regcsv.write(entry) regcsv.close() azip.write(fullpath) # write stations csv file fullpath = os.path.join(self._tmpdir, "stations.csv") stscsv = open(fullpath, "wt") stscsv.write("station, name, region, latitude, longitude\n") for (regcode, name, latitude, longitude) in stations_dict.all(): entry = u"%(station)s, %(station)s, %(region)s, %(latitude)s, %(longitude)s\n" % { 'station': name, 'region': regions_dict.get_name(regcode), 'latitude': latitude, 'longitude': longitude, } stscsv.write(entry) stscsv.close() azip.write(fullpath) # write pollutants csv file fullpath = os.path.join(self._tmpdir, "pollutants.csv") csv = open(fullpath, "wt") csv.write("pollutant, description\n") for (_, formula, description) in pollutants_dict.all(): entry = u"%(formula)s, %(description)s\n" % { 'formula': formula, 'description': escape(description), } csv.write(entry) csv.close() azip.write(fullpath) # write pollutants csv files for slice tables # Remark: as csv file *must* be sorted according to dimensions # it is necessary to build two separate temp files and then # join them together when every row has been processed. :-/ data_fullpath = os.path.join(self._tmpdir, "data.csv") data_csv = open(data_fullpath, "wt") data_csv.write("region, station, aggregate, pollutant, day, measurement\n") max_file = tempfile.TemporaryFile() avg_file = tempfile.TemporaryFile() # generate aggregated data for (region, station, day, pollutant, max_, avg_) in self._yield(): formula = pollutants_dict.get_formula(pollutant) entry = u"%(region)s, %(station)s, max, %(formula)s, %(day)s, %(qty).3f\n" % { 'region': region, 'station': station, 'formula': formula, 'day': time.strftime("%Y-%m-%d", day +(0, ) * 6), 'qty': max_, } max_file.write(entry) entry = u"%(region)s, %(station)s, avg, %(formula)s, %(day)s, %(qty).3f\n" % { 'region': region, 'station': station, 'formula': formula, 'day': time.strftime("%Y-%m-%d", day +(0, ) * 6), 'qty': avg_, } avg_file.write(entry) # concatenate max_file and avg_file files max_file.seek(0) for l in max_file: data_csv.write(l) max_file.close() # get rid of temp file avg_file.seek(0) for l in avg_file: data_csv.write(l) avg_file.close() # get rid of temp file data_csv.close() azip.write(data_fullpath) # disk cleanup if (os.path.exists(self._tmpdir)): shutil.rmtree(self._tmpdir, True) # TODO add something for errors
# Temporary storage prefix TMP_DIR = "tmp/" OUT_DIR = "out/" try: opts_mgr(sys.argv[1:]) except Exception, e: print (e) sys.exit(-1) # hackish :-/ if not opts_mgr.pollutants: opts_mgr.pollutants = [ p[0] for p in pollutants_dict.all()] if not opts_mgr.regions: opts_mgr.regions = [ r[0] for r in regions_dict.all()] # main body if __name__ == "__main__": data_mgr = DataManager() # Phase 1. Fetch data total_rows = 0 for pollutant_code in opts_mgr.pollutants: pollutant_formula = pollutants_dict.get_formula(pollutant_code) pollutant_name = pollutants_dict.get_name(pollutant_code)