Example #1
0
  verbose). This is manly for debugging purposes.

  --format=<format>, determines the output format for the dataset.
  Currently the tool supports only DSPL (Dataset publishing Language).

arguments:

  filename, the filename to write the output to.
""" % {
    'regions': "\n  ".join ([regions_dict.get_name(r[0])
                             for r in regions_dict.all()]),

    'pollutants': "\n  ".join ([ "%(formula)s (%(name)s)" % {
            'formula': pollutants_dict.get_formula(p[0]),
            'name': pollutants_dict.get_name(p[0]),
            } for p in pollutants_dict.all() ]),
}


class OptionsManager(object):
    """Provides options management.
    """

    long_options = [
        "help",
        "keep",
        "local",
        "verbosity=",
        "from=",
        "to=",
        "year=",
Example #2
0
    def __call__(self):

        if '.' not in self._out:
            self._out = self._out + ".zip"

        # disk cleanup
        if (os.path.exists(self._tmpdir)):
            shutil.rmtree(self._tmpdir, True)  # TODO add something for errors
        os.mkdir(self._tmpdir)

        azip = zipfile.ZipFile(self._out, "w" )

        # write output to file
        fullpath = os.path.join(self._tmpdir, "brace.xml")
        xml = open(fullpath, "wt")
        xml.write(build_dspl_xml())
        xml.close()
        azip.write(fullpath)

        # write aggregates csv file
        fullpath = os.path.join(self._tmpdir, "aggregates.csv")
        aggrcsv = open(fullpath, "wt")
        aggrcsv.write("aggregate, description\n")
        for (id, desc) in [ ( "max", "Maximum daily concentration" ),
                            ( "avg", "Average daily concentration" ), ]:
            entry = u"%(id)s, %(description)s\n" % {
                'id': id,
                'description': desc,
            }
            aggrcsv.write(entry)
        aggrcsv.close()
        azip.write(fullpath)

        # write regions csv file
        fullpath = os.path.join(self._tmpdir, "regions.csv")
        regcsv = open(fullpath, "wt")
        regcsv.write("region, name, latitude, longitude\n")
        for r in opts_mgr.regions:
            entry = u"%(region)s, %(region)s, %(latitude)s, %(longitude)s\n" % {
                'region': regions_dict.get_name(r),
                'latitude': regions_dict.get_latitude(r),
                'longitude': regions_dict.get_longitude(r),
            }
            regcsv.write(entry)
        regcsv.close()
        azip.write(fullpath)

        # write stations csv file
        fullpath = os.path.join(self._tmpdir, "stations.csv")
        stscsv = open(fullpath, "wt")
        stscsv.write("station, name, region, latitude, longitude\n")
        for (regcode, name, latitude, longitude) in stations_dict.all():
            entry = u"%(station)s, %(station)s, %(region)s, %(latitude)s, %(longitude)s\n" % {
                'station': name,
                'region': regions_dict.get_name(regcode),
                'latitude': latitude,
                'longitude': longitude,
            }
            stscsv.write(entry)
        stscsv.close()
        azip.write(fullpath)

        # write pollutants csv file
        fullpath = os.path.join(self._tmpdir, "pollutants.csv")
        csv = open(fullpath, "wt")
        csv.write("pollutant, description\n")
        for (_, formula, description) in pollutants_dict.all():
            entry = u"%(formula)s, %(description)s\n" % {
                'formula': formula,
                'description': escape(description),
                }
            csv.write(entry)
        csv.close()
        azip.write(fullpath)

        # write pollutants csv files for slice tables
        # Remark: as csv file *must* be sorted according to dimensions
        # it is necessary to build two separate temp files and then
        # join them together when every row has been processed. :-/
        data_fullpath = os.path.join(self._tmpdir, "data.csv")
        data_csv = open(data_fullpath, "wt")
        data_csv.write("region, station, aggregate, pollutant, day, measurement\n")

        max_file = tempfile.TemporaryFile()
        avg_file = tempfile.TemporaryFile()

        # generate aggregated data
        for (region, station, day, pollutant, max_, avg_) in self._yield():

            formula = pollutants_dict.get_formula(pollutant)

            entry = u"%(region)s, %(station)s, max, %(formula)s, %(day)s, %(qty).3f\n" % {
                'region': region,
                'station': station,
                'formula': formula,
                'day': time.strftime("%Y-%m-%d", day +(0, ) * 6),
                'qty': max_,
            }
            max_file.write(entry)

            entry = u"%(region)s, %(station)s, avg, %(formula)s, %(day)s, %(qty).3f\n" % {
                'region': region,
                'station': station,
                'formula': formula,
                'day': time.strftime("%Y-%m-%d", day +(0, ) * 6),
                'qty': avg_,
            }
            avg_file.write(entry)

        # concatenate max_file and avg_file files
        max_file.seek(0)
        for l in max_file:
            data_csv.write(l)
        max_file.close()  # get rid of temp file

        avg_file.seek(0)
        for l in avg_file:
            data_csv.write(l)
        avg_file.close()  # get rid of temp file

        data_csv.close()
        azip.write(data_fullpath)

        # disk cleanup
        if (os.path.exists(self._tmpdir)):
            shutil.rmtree(self._tmpdir, True)  # TODO add something for errors
Example #3
0
# Temporary storage prefix
TMP_DIR = "tmp/"
OUT_DIR = "out/"

try:
    opts_mgr(sys.argv[1:])

except Exception, e:
    print (e)
    sys.exit(-1)

# hackish :-/
if not opts_mgr.pollutants:
    opts_mgr.pollutants = [
        p[0] for p in pollutants_dict.all()]
if not opts_mgr.regions:
    opts_mgr.regions = [
        r[0] for r in regions_dict.all()]


# main body
if __name__ == "__main__":

    data_mgr = DataManager()

    # Phase 1. Fetch data
    total_rows = 0
    for pollutant_code in opts_mgr.pollutants:
        pollutant_formula = pollutants_dict.get_formula(pollutant_code)
        pollutant_name = pollutants_dict.get_name(pollutant_code)