Ejemplo n.º 1
0
def test_column_task(klass):
    # Ensure every column task runs and produces some kind of independent
    # metadata.
    # TODO: test columnstasks that have params
    if klass.get_param_names():
        raise SkipTest("Cannot test ColumnsTask with params")
    task = klass()
    runtask(task)
    assert_greater(current_session().query(OBSColumn).filter(OBSColumn.id.startswith(classpath(task))).count(), 0)
Ejemplo n.º 2
0
def test_column_task(klass):
    # Ensure every column task runs and produces some kind of independent
    # metadata.
    # TODO: test columnstasks that have params
    if klass.get_param_names():
        raise SkipTest('Cannot test ColumnsTask with params')
    task = klass()
    runtask(task)
    assert_greater(
        current_session().query(OBSColumn).filter(
            OBSColumn.id.startswith(classpath(task))).count(), 0)
Ejemplo n.º 3
0
def load_sumlevels():
    '''
    Load summary levels from JSON. Returns a dict by sumlevel number.
    '''
    with open(os.path.join(os.path.dirname(__file__), 'summary_levels.json')) as fhandle:
        sumlevels_list = json.load(fhandle)
    sumlevels = {}
    for slevel in sumlevels_list:
        # Replace pkey ancestors with paths to columns
        # We subtract 1 from the pkey because it's 1-indexed, unlike python
        fields = slevel['fields']
        for i, ancestor in enumerate(fields['ancestors']):
            colpath = os.path.join('columns', classpath(load_sumlevels),
                                   sumlevels_list[ancestor - 1]['fields']['slug'])
            fields['ancestors'][i] = colpath
        if fields['parent']:
            fields['parent'] = os.path.join(
                'columns', classpath(load_sumlevels),
                sumlevels_list[fields['parent'] - 1]['fields']['slug'])

        sumlevels[fields['summary_level']] = fields
    return sumlevels
Ejemplo n.º 4
0
def load_sumlevels():
    '''
    Load summary levels from JSON. Returns a dict by sumlevel number.
    '''
    with open(os.path.join(os.path.dirname(__file__), 'summary_levels.json')) as fhandle:
        sumlevels_list = json.load(fhandle)
    sumlevels = {}
    for slevel in sumlevels_list:
        # Replace pkey ancestors with paths to columns
        # We subtract 1 from the pkey because it's 1-indexed, unlike python
        fields = slevel['fields']
        for i, ancestor in enumerate(fields['ancestors']):
            colpath = os.path.join('columns', classpath(load_sumlevels),
                                   sumlevels_list[ancestor - 1]['fields']['slug'])
            fields['ancestors'][i] = colpath
        if fields['parent']:
            fields['parent'] = os.path.join(
                'columns', classpath(load_sumlevels),
                sumlevels_list[fields['parent'] - 1]['fields']['slug'])

        sumlevels[fields['summary_level']] = fields
    return sumlevels
Ejemplo n.º 5
0
    def run(self):
        shapefiles = shell(
            'ls {dir}/*.shp'.format(dir=os.path.join('tmp', classpath(
                self), str(self.year), self.geography))).strip().split('\n')

        cmd = 'ogrinfo {shpfile_path}'.format(shpfile_path=shapefiles[0])
        resp = shell(cmd)
        if 'Polygon' in resp:
            nlt = '-nlt MultiPolygon'
        else:
            nlt = ''

        cmd = 'PG_USE_COPY=yes PGCLIENTENCODING=latin1 ' \
              'ogr2ogr -f PostgreSQL "PG:dbname=$PGDATABASE active_schema={schema}" ' \
              '-t_srs "EPSG:4326" {nlt} -nln {tablename} ' \
              '-lco OVERWRITE=yes ' \
              '-lco SCHEMA={schema} {shpfile_path} '.format(
                  tablename=self.output().tablename,
                  schema=self.output().schema, nlt=nlt,
                  shpfile_path=shapefiles.pop())
        shell(cmd)

        # chunk into 500 shapefiles at a time.
        for i, shape_group in enumerate(grouper(shapefiles, 500)):
            shell('export PG_USE_COPY=yes PGCLIENTENCODING=latin1; '
                  'echo \'{shapefiles}\' | xargs -P 16 -I shpfile_path '
                  'ogr2ogr -f PostgreSQL "PG:dbname=$PGDATABASE '
                  'active_schema={schema}" -append '
                  '-t_srs "EPSG:4326" {nlt} -nln {tablename} '
                  'shpfile_path '.format(shapefiles='\n'.join(
                      [shp for shp in shape_group if shp]),
                                         tablename=self.output().tablename,
                                         nlt=nlt,
                                         schema=self.output().schema))
            print('imported {} shapefiles'.format((i + 1) * 500))

        session = current_session()
        # Spatial index
        session.execute(
            'ALTER TABLE {qualified_table} RENAME COLUMN '
            'wkb_geometry TO geom'.format(qualified_table=self.output().table))
        session.execute(
            'CREATE INDEX ON {qualified_table} USING GIST (geom)'.format(
                qualified_table=self.output().table))
Ejemplo n.º 6
0
    def run(self):
        shapefiles = shell('ls {dir}/*.shp'.format(
            dir=os.path.join('tmp', classpath(self), str(self.year), self.geography)
        )).strip().split('\n')

        cmd = 'PG_USE_COPY=yes PGCLIENTENCODING=latin1 ' \
                'ogr2ogr -f PostgreSQL "PG:dbname=$PGDATABASE active_schema={schema}" ' \
                '-t_srs "EPSG:4326" -nlt MultiPolygon -nln {tablename} ' \
                '-lco OVERWRITE=yes ' \
                '-lco SCHEMA={schema} {shpfile_path} '.format(
                    tablename=self.output().tablename,
                    schema=self.output().schema,
                    shpfile_path=shapefiles.pop())
        shell(cmd)

        # chunk into 500 shapefiles at a time.
        for i, shape_group in enumerate(grouper(shapefiles, 500)):
            shell(
                'export PG_USE_COPY=yes PGCLIENTENCODING=latin1; '
                'echo \'{shapefiles}\' | xargs -P 16 -I shpfile_path '
                'ogr2ogr -f PostgreSQL "PG:dbname=$PGDATABASE '
                'active_schema={schema}" -append '
                '-t_srs "EPSG:4326" -nlt MultiPolygon -nln {tablename} '
                'shpfile_path '.format(
                    shapefiles='\n'.join([shp for shp in shape_group if shp]),
                    tablename=self.output().tablename,
                    schema=self.output().schema))
            print 'imported {} shapefiles'.format((i + 1) * 500)

        session = current_session()
        # Spatial index
        session.execute('ALTER TABLE {qualified_table} RENAME COLUMN '
                        'wkb_geometry TO geom'.format(
                            qualified_table=self.output().table))
        session.execute('CREATE INDEX ON {qualified_table} USING GIST (geom)'.format(
            qualified_table=self.output().table))
Ejemplo n.º 7
0
 def directory(self):
     return os.path.join('tmp', classpath(self), str(self.year), self.geography)
Ejemplo n.º 8
0
 def directory(self):
     return os.path.join('tmp', classpath(self), str(self.year), self.geography)
Ejemplo n.º 9
0
 def output(self):
     return LocalTarget(os.path.join('tmp', classpath(self), self.task_id) +
                        '_' + underscore_slugify(self.last_time) + '.csv')
Ejemplo n.º 10
0
 def output(self):
     return LocalTarget(
         path=os.path.join('tmp', classpath(self), self.filename()))
Ejemplo n.º 11
0
 def output(self):
     return LocalTarget(
         os.path.join('tmp', classpath(self),
                      unqualified_task_id(self.task_id) + '.dump'))
Ejemplo n.º 12
0
 def output(self):
     return LocalTarget(
         os.path.join('tmp', classpath(self), self.task_id) + '_' +
         underscore_slugify(self.last_time) + '.csv')
Ejemplo n.º 13
0
 def output(self):
     return LocalTarget(
         os.path.join('tmp', classpath(self), self.seq).lower())
Ejemplo n.º 14
0
 def output(self):
     return LocalTarget(os.path.join('tmp', classpath(self), self.task_id, 'au_mb_all_merged.shp'))
 def output(self):
     return LocalTarget(path=os.path.join(classpath(self), self.filename()))
Ejemplo n.º 16
0
 def output(self):
     return LocalTarget(os.path.join('tmp', classpath(self), self.task_id))
Ejemplo n.º 17
0
 def output(self):
     return LocalTarget(os.path.join('tmp', classpath(self), self.task_id))
Ejemplo n.º 18
0
 def __init__(self, task):
     self.path = os.path.join('tmp', classpath(task), task.task_id)
     self._target = LocalTarget(self.path)