Ejemplo n.º 1
0
    def setUp(self):
        session.rollback()
        # Ensure we have metadata loaded into the database
        # to mimic the behavior of metadata ingestion preceding file ingestion.
        drop_meta('dog_park_permits')
        drop_meta('community_radio_events')
        drop_meta('public_opera_performances')

        # Make new MetaTable objects
        self.unloaded_meta = MetaTable(url='nightvale.gov/events.csv',
                                       human_name='Community Radio Events',
                                       business_key='Event Name',
                                       observed_date='Date',
                                       latitude='lat',
                                       longitude='lon',
                                       approved_status=True)

        self.existing_meta = MetaTable(url='nightvale.gov/dogpark.csv',
                                       human_name='Dog Park Permits',
                                       business_key='Hooded Figure ID',
                                       observed_date='Date',
                                       latitude='lat',
                                       longitude='lon',
                                       approved_status=False)

        self.opera_meta = MetaTable(url='nightvale.gov/opera.csv',
                                    human_name='Public Opera Performances',
                                    business_key='Event Name',
                                    observed_date='Date',
                                    location='Location',
                                    approved_status=False)
        session.add_all(
            [self.existing_meta, self.opera_meta, self.unloaded_meta])
        session.commit()

        # Also, let's have one table pre-loaded...
        self.existing_table = sa.Table(
            'dog_park_permits', MetaData(), Column('hooded_figure_id',
                                                   Integer),
            Column('point_date', TIMESTAMP, nullable=False),
            Column('date', Date, nullable=True),
            Column('lat', Float, nullable=False),
            Column('lon', Float, nullable=False),
            Column('hash', String(32), primary_key=True),
            Column('geom', Geometry('POINT', srid=4326), nullable=True))
        drop_if_exists(self.existing_table.name)
        self.existing_table.create(bind=app_engine)

        # ... with some pre-existing data
        ins = self.existing_table.insert().values(
            hooded_figure_id=1,
            point_date=date(2015, 1, 2),
            lon=-87.6495076896,
            lat=41.7915865543,
            geom=None,
            hash='addde9be7f59e95fc08e54e29b2a947f')
        app_engine.execute(ins)
Ejemplo n.º 2
0
def ingest_from_fixture(fixture_meta, fname):
    md = MetaTable(**fixture_meta)
    session.add(md)
    session.commit()
    path = os.path.join(fixtures_path, fname)
    point_etl = PlenarioETL(md, source_path=path)
    point_etl.add()
Ejemplo n.º 3
0
def point_meta_from_submit_form(form, is_approved):
    columns, labels = form_columns(form)
    name = slugify(form['dataset_name'], delimiter='_')[:50]

    metatable = MetaTable(
        url=form['file_url'],
        view_url=form.get('view_url'),
        dataset_name=name,
        human_name=form['dataset_name'],
        attribution=form.get('dataset_attribution'),
        description=form.get('dataset_description'),
        update_freq=form['update_frequency'],
        contributor_name=form['contributor_name'],
        contributor_organization=form.get('contributor_organization'),
        contributor_email=form['contributor_email'],
        approved_status=is_approved,
        observed_date=labels['observed_date'],
        latitude=labels.get('latitude', None),
        longitude=labels.get('longitude', None),
        location=labels.get('location', None),
        column_names=columns)

    postgres_session.add(metatable)
    postgres_session.commit()
    return metatable
Ejemplo n.º 4
0
def add_dataset_to_metatable(request, url, dataset_id, dataset_info,
                             socrata_source, approved_status):
    data_types = []
    business_key = None
    observed_date = None
    latitude = None
    longitude = None
    location = None
    for k, v in request.form.iteritems():
        if k.startswith('data_type_'):
            key = k.replace("data_type_", "")
            data_types.append({"field_name": key, "data_type": v})

        if k.startswith('key_type_'):
            key = k.replace("key_type_", "")
            if (v == "business_key"): business_key = key
            if (v == "observed_date"): observed_date = key
            if (v == "latitude"): latitude = key
            if (v == "longitude"): longitude = key
            if (v == "location"): location = key

    if socrata_source:
        data_types = dataset_info['columns']
        url = dataset_info['source_url']

    d = {
        'dataset_name': slugify(request.form.get('dataset_name'),
                                delim=u'_')[:50],
        'human_name': request.form.get('dataset_name'),
        'attribution': request.form.get('dataset_attribution'),
        'description': request.form.get('dataset_description'),
        'source_url': url,
        'source_url_hash': dataset_id,
        'update_freq': request.form.get('update_frequency'),
        'business_key': business_key,
        'observed_date': observed_date,
        'latitude': latitude,
        'longitude': longitude,
        'location': location,
        'contributor_name': request.form.get('contributor_name'),
        'contributor_organization':
        request.form.get('contributor_organization'),
        'contributor_email': request.form.get('contributor_email'),
        'contributed_data_types': json.dumps(data_types),
        'approved_status': approved_status,
        'is_socrata_source': socrata_source
    }

    # add this to meta_master
    md = MetaTable(**d)
    session.add(md)
    session.commit()

    return md
Ejemplo n.º 5
0
def submit_dataset():
    # Slightly dumb way to make sure that POSTs are only coming from
    # originating domain for the time being
    referer = request.headers.get('Referer')
    if referer:
        referer = urlparse(referer).netloc
        req_url = urlparse(request.url).netloc
        if referer != req_url:
            abort(401)
    else:
        abort(401)
    resp = {'status': 'ok', 'message': ''}
    status_code = 200
    errors = []
    post = request.form.get('data')
    if not post:
        try:
            post = request.form.keys()[0]
        except IndexError:
            resp['status'] = 'error'
            resp['message'] = 'Unable to decode POST data'
            status_code = 400
    if status_code == 200:
        post = json.loads(post)
        if post.get('view_url'):
            if post.get('socrata'):
                source_domain = urlparse(post['view_url']).netloc
                four_by_four = re.findall(r'/([a-z0-9]{4}-[a-z0-9]{4})',
                                          post['view_url'])[-1]
                view_url = 'http://%s/api/views/%s' % (source_domain,
                                                       four_by_four)
                dataset_info, errors, status_code = get_socrata_data_info(
                    view_url)
                source_url = '%s/rows.csv?accessType=DOWNLOAD' % view_url
            else:
                dataset_info = {
                    'attribution': '',
                    'description': '',
                }
                source_url = post['view_url']
                dataset_info['name'] = urlparse(source_url).path.split('/')[-1]
            if errors:
                resp['message'] = ', '.join([e for e in errors])
                resp['status'] = 'error'
                status_code = 400
            else:
                dataset_id = md5(source_url).hexdigest()
                md = session.query(MetaTable).get(dataset_id)
                if not md:
                    d = {
                        'dataset_name': slugify(dataset_info['name'],
                                                delim=u'_'),
                        'human_name': dataset_info['name'],
                        'attribution': dataset_info['attribution'],
                        'description': dataset_info['description'],
                        'source_url': source_url,
                        'source_url_hash': dataset_id,
                        'update_freq': post['update_frequency'],
                        'business_key': post['field_definitions']['id_field'],
                        'observed_date':
                        post['field_definitions']['date_field'],
                        'latitude': post['field_definitions'].get('latitude'),
                        'longitude':
                        post['field_definitions'].get('longitude'),
                        'location': post['field_definitions'].get('location')
                    }
                    if len(d['dataset_name']) > 49:
                        d['dataset_name'] = d['dataset_name'][:50]
                    md = MetaTable(**d)
                    session.add(md)
                    session.commit()
                add_dataset.delay(md.source_url_hash,
                                  data_types=post.get('data_types'))
                resp[
                    'message'] = 'Dataset %s submitted successfully' % dataset_info[
                        'name']
        else:
            resp['status'] = 'error'
            resp['message'] = 'Must provide a url where data can be downloaded'
            status_code = 400
    resp = make_response(json.dumps(resp, default=dthandler), status_code)
    resp.headers['Content-Type'] = 'application/json'
    return resp