Exemple #1
0
def query(region, pollutant, year):
    """
    """
    region_code = regions_dict.get_pk(region)
    region_name = regions_dict.get_name(region)

    pollutant_code = pollutants_dict.get_pk(pollutant)
    pollutant_formula = pollutants_dict.get_formula(pollutant)
    # pollutant_name = pollutants_dict.get_name(pollutant)
    # name = '%sdownload/%s_%s_%s.zip' % (
    #     URL_PREFIX,
    #     region_name.upper(),
    #     pollutant_formula.upper(),
    #     year,
    # )

    query = urllib.urlencode({
            'p_comp': pollutant_code,
            'p_comp_name': pollutant_formula.upper(),
            'p_reg': region_code,
            'p_reg_name': region_name.upper(),
            'p_anno': year,
    })

    genfile = "%(prefix)s/servlet/zipper?%(query)s" % {
        'prefix': URL_PREFIX,
        'query': query,
    }

    link = download(genfile)
    if link is None:
        raise IOError("Could not fetch '%s'." % genfile)

    soup = BeautifulSoup(link)

    location = \
        soup.find('script').contents[0].split('"')[1].\
        replace("../download/", "")

    link.close()

    archive = download(
        "%(prefix)s/download/%(location)s" % {
            'prefix': URL_PREFIX,
            'location': location })

    if archive is None:
        raise IOError("Could not fetch '%s'." % genfile)

    return archive
Exemple #2
0
    def __call__(self):

        if '.' not in self._out:
            self._out = self._out + ".zip"

        # disk cleanup
        if (os.path.exists(self._tmpdir)):
            shutil.rmtree(self._tmpdir, True)  # TODO add something for errors
        os.mkdir(self._tmpdir)

        azip = zipfile.ZipFile(self._out, "w" )

        # write output to file
        fullpath = os.path.join(self._tmpdir, "brace.xml")
        xml = open(fullpath, "wt")
        xml.write(build_dspl_xml())
        xml.close()
        azip.write(fullpath)

        # write aggregates csv file
        fullpath = os.path.join(self._tmpdir, "aggregates.csv")
        aggrcsv = open(fullpath, "wt")
        aggrcsv.write("aggregate, description\n")
        for (id, desc) in [ ( "max", "Maximum daily concentration" ),
                            ( "avg", "Average daily concentration" ), ]:
            entry = u"%(id)s, %(description)s\n" % {
                'id': id,
                'description': desc,
            }
            aggrcsv.write(entry)
        aggrcsv.close()
        azip.write(fullpath)

        # write regions csv file
        fullpath = os.path.join(self._tmpdir, "regions.csv")
        regcsv = open(fullpath, "wt")
        regcsv.write("region, name, latitude, longitude\n")
        for r in opts_mgr.regions:
            entry = u"%(region)s, %(region)s, %(latitude)s, %(longitude)s\n" % {
                'region': regions_dict.get_name(r),
                'latitude': regions_dict.get_latitude(r),
                'longitude': regions_dict.get_longitude(r),
            }
            regcsv.write(entry)
        regcsv.close()
        azip.write(fullpath)

        # write stations csv file
        fullpath = os.path.join(self._tmpdir, "stations.csv")
        stscsv = open(fullpath, "wt")
        stscsv.write("station, name, region, latitude, longitude\n")
        for (regcode, name, latitude, longitude) in stations_dict.all():
            entry = u"%(station)s, %(station)s, %(region)s, %(latitude)s, %(longitude)s\n" % {
                'station': name,
                'region': regions_dict.get_name(regcode),
                'latitude': latitude,
                'longitude': longitude,
            }
            stscsv.write(entry)
        stscsv.close()
        azip.write(fullpath)

        # write pollutants csv file
        fullpath = os.path.join(self._tmpdir, "pollutants.csv")
        csv = open(fullpath, "wt")
        csv.write("pollutant, description\n")
        for (_, formula, description) in pollutants_dict.all():
            entry = u"%(formula)s, %(description)s\n" % {
                'formula': formula,
                'description': escape(description),
                }
            csv.write(entry)
        csv.close()
        azip.write(fullpath)

        # write pollutants csv files for slice tables
        # Remark: as csv file *must* be sorted according to dimensions
        # it is necessary to build two separate temp files and then
        # join them together when every row has been processed. :-/
        data_fullpath = os.path.join(self._tmpdir, "data.csv")
        data_csv = open(data_fullpath, "wt")
        data_csv.write("region, station, aggregate, pollutant, day, measurement\n")

        max_file = tempfile.TemporaryFile()
        avg_file = tempfile.TemporaryFile()

        # generate aggregated data
        for (region, station, day, pollutant, max_, avg_) in self._yield():

            formula = pollutants_dict.get_formula(pollutant)

            entry = u"%(region)s, %(station)s, max, %(formula)s, %(day)s, %(qty).3f\n" % {
                'region': region,
                'station': station,
                'formula': formula,
                'day': time.strftime("%Y-%m-%d", day +(0, ) * 6),
                'qty': max_,
            }
            max_file.write(entry)

            entry = u"%(region)s, %(station)s, avg, %(formula)s, %(day)s, %(qty).3f\n" % {
                'region': region,
                'station': station,
                'formula': formula,
                'day': time.strftime("%Y-%m-%d", day +(0, ) * 6),
                'qty': avg_,
            }
            avg_file.write(entry)

        # concatenate max_file and avg_file files
        max_file.seek(0)
        for l in max_file:
            data_csv.write(l)
        max_file.close()  # get rid of temp file

        avg_file.seek(0)
        for l in avg_file:
            data_csv.write(l)
        avg_file.close()  # get rid of temp file

        data_csv.close()
        azip.write(data_fullpath)

        # disk cleanup
        if (os.path.exists(self._tmpdir)):
            shutil.rmtree(self._tmpdir, True)  # TODO add something for errors
Exemple #3
0
    def __call__(self, args):

        opts, args = getopt.getopt(args, "",
                                   self.long_options)

        for o, a in opts:

            if o == "--year":
                year = int(a)
                if year < DEFAULT_FROM_YEAR:
                    raise getopt.GetoptError(
                        "No data available before %d" % DEFAULT_FROM_YEAR)

                self.from_year = year
                logger.debug("Setting starting year to %d", year)

                self.to_year = year
                logger.debug("Setting ending year to %d", year)

            elif o == "--from":
                from_year = int(a)
                if from_year < DEFAULT_FROM_YEAR:
                    raise getopt.GetoptError(
                        "No data available before %d" % DEFAULT_FROM_YEAR)

                self.from_year = from_year
                logger.debug("Setting starting year to %d", from_year)

            elif o == "--to":
                to_year = int(a)
                if DEFAULT_TO_YEAR < to_year:
                    raise getopt.GetoptError(
                        "No data available after %d" % DEFAULT_TO_YEAR)

                self.to_year = to_year
                logger.debug("Setting ending year to %d", to_year)

            elif o == "--region":
                region_name = regions_dict.get_name(a)
                region_code = regions_dict.get_pk(a)

                self.regions.append(region_code)
                logger.debug("Adding region '%s'", region_name)

            elif o == '--pollutant':
                pollutant_formula = pollutants_dict.get_formula(a)
                pollutant_name = pollutants_dict.get_name(a)
                pollutant_code = pollutants_dict.get_pk(a)

                self.pollutants.append(pollutant_code)
                logger.debug("Adding pollutant '%s' (%s)",
                             pollutant_formula, pollutant_name)

            elif o == "--verbosity":
                level = int(a)
                self.verbosity = level

                if level == 0:
                    logger.setLevel(logging.ERROR)
                elif level == 1:
                    logger.setLevel(logging.WARNING)
                elif level == 2:
                    logger.setLevel(logging.INFO)
                elif level == 3:
                    logger.setLevel(logging.DEBUG)

                else:
                    assert False, "Unsupported verbosity level"

                logger.debug("Setting verbosity level to %s",
                             ["ERROR", "WARNING", "INFO", "DEBUG"][level])

            elif o == "--keep":
                self.keep = True
                if self.local:
                    raise getopt.GetoptError(
                        "--local and --keep are not supported together")

            elif o == "--local":
                self.local = True
                if self.keep:
                    raise getopt.GetoptError(
                        "--local and --keep are not supported together")


            elif o == "--help":
                print usage
                sys.exit()

            else:
                assert False, "unhandled option"
Exemple #4
0
  --verbosity=<level>, adjusts the level of verbosity of the
  tool. This is a number between 0(quiet) and 3(extremely
  verbose). This is manly for debugging purposes.

  --format=<format>, determines the output format for the dataset.
  Currently the tool supports only DSPL (Dataset publishing Language).

arguments:

  filename, the filename to write the output to.
""" % {
    'regions': "\n  ".join ([regions_dict.get_name(r[0])
                             for r in regions_dict.all()]),

    'pollutants': "\n  ".join ([ "%(formula)s (%(name)s)" % {
            'formula': pollutants_dict.get_formula(p[0]),
            'name': pollutants_dict.get_name(p[0]),
            } for p in pollutants_dict.all() ]),
}


class OptionsManager(object):
    """Provides options management.
    """

    long_options = [
        "help",
        "keep",
        "local",
        "verbosity=",
        "from=",
Exemple #5
0
    opts_mgr.pollutants = [
        p[0] for p in pollutants_dict.all()]
if not opts_mgr.regions:
    opts_mgr.regions = [
        r[0] for r in regions_dict.all()]


# main body
if __name__ == "__main__":

    data_mgr = DataManager()

    # Phase 1. Fetch data
    total_rows = 0
    for pollutant_code in opts_mgr.pollutants:
        pollutant_formula = pollutants_dict.get_formula(pollutant_code)
        pollutant_name = pollutants_dict.get_name(pollutant_code)

        for region_code in opts_mgr.regions:
            region_name = regions_dict.get_name(region_code)

            for year in range(opts_mgr.from_year,
                              1 + opts_mgr.to_year):

                if not opts_mgr.local:
                    # fetch remote archive
                    logger.info(
                        "Trying to fetch data for year %d, pollutant '%s' (%s), "
                        "region '%s'", year, pollutant_formula, pollutant_name,
                        region_name)
                    archive = query(region_code, pollutant_code, year)
Exemple #6
0
    def filter_by_formula(self, formula):

        for row in self._data:
            if pollutants_dict.get_formula(row.pollutant) == formula:
                yield row