def setUp(self): session.rollback() # Ensure we have metadata loaded into the database # to mimic the behavior of metadata ingestion preceding file ingestion. drop_meta('dog_park_permits') drop_meta('community_radio_events') drop_meta('public_opera_performances') # Make new MetaTable objects self.unloaded_meta = MetaTable(url='nightvale.gov/events.csv', human_name='Community Radio Events', business_key='Event Name', observed_date='Date', latitude='lat', longitude='lon', approved_status=True) self.existing_meta = MetaTable(url='nightvale.gov/dogpark.csv', human_name='Dog Park Permits', business_key='Hooded Figure ID', observed_date='Date', latitude='lat', longitude='lon', approved_status=False) self.opera_meta = MetaTable(url='nightvale.gov/opera.csv', human_name='Public Opera Performances', business_key='Event Name', observed_date='Date', location='Location', approved_status=False) session.add_all( [self.existing_meta, self.opera_meta, self.unloaded_meta]) session.commit() # Also, let's have one table pre-loaded... self.existing_table = sa.Table( 'dog_park_permits', MetaData(), Column('hooded_figure_id', Integer), Column('point_date', TIMESTAMP, nullable=False), Column('date', Date, nullable=True), Column('lat', Float, nullable=False), Column('lon', Float, nullable=False), Column('hash', String(32), primary_key=True), Column('geom', Geometry('POINT', srid=4326), nullable=True)) drop_if_exists(self.existing_table.name) self.existing_table.create(bind=app_engine) # ... with some pre-existing data ins = self.existing_table.insert().values( hooded_figure_id=1, point_date=date(2015, 1, 2), lon=-87.6495076896, lat=41.7915865543, geom=None, hash='addde9be7f59e95fc08e54e29b2a947f') app_engine.execute(ins)
def ingest_from_fixture(fixture_meta, fname): md = MetaTable(**fixture_meta) session.add(md) session.commit() path = os.path.join(fixtures_path, fname) point_etl = PlenarioETL(md, source_path=path) point_etl.add()
def point_meta_from_submit_form(form, is_approved): columns, labels = form_columns(form) name = slugify(form['dataset_name'], delimiter='_')[:50] metatable = MetaTable( url=form['file_url'], view_url=form.get('view_url'), dataset_name=name, human_name=form['dataset_name'], attribution=form.get('dataset_attribution'), description=form.get('dataset_description'), update_freq=form['update_frequency'], contributor_name=form['contributor_name'], contributor_organization=form.get('contributor_organization'), contributor_email=form['contributor_email'], approved_status=is_approved, observed_date=labels['observed_date'], latitude=labels.get('latitude', None), longitude=labels.get('longitude', None), location=labels.get('location', None), column_names=columns) postgres_session.add(metatable) postgres_session.commit() return metatable
def add_dataset_to_metatable(request, url, dataset_id, dataset_info, socrata_source, approved_status): data_types = [] business_key = None observed_date = None latitude = None longitude = None location = None for k, v in request.form.iteritems(): if k.startswith('data_type_'): key = k.replace("data_type_", "") data_types.append({"field_name": key, "data_type": v}) if k.startswith('key_type_'): key = k.replace("key_type_", "") if (v == "business_key"): business_key = key if (v == "observed_date"): observed_date = key if (v == "latitude"): latitude = key if (v == "longitude"): longitude = key if (v == "location"): location = key if socrata_source: data_types = dataset_info['columns'] url = dataset_info['source_url'] d = { 'dataset_name': slugify(request.form.get('dataset_name'), delim=u'_')[:50], 'human_name': request.form.get('dataset_name'), 'attribution': request.form.get('dataset_attribution'), 'description': request.form.get('dataset_description'), 'source_url': url, 'source_url_hash': dataset_id, 'update_freq': request.form.get('update_frequency'), 'business_key': business_key, 'observed_date': observed_date, 'latitude': latitude, 'longitude': longitude, 'location': location, 'contributor_name': request.form.get('contributor_name'), 'contributor_organization': request.form.get('contributor_organization'), 'contributor_email': request.form.get('contributor_email'), 'contributed_data_types': json.dumps(data_types), 'approved_status': approved_status, 'is_socrata_source': socrata_source } # add this to meta_master md = MetaTable(**d) session.add(md) session.commit() return md
def submit_dataset(): # Slightly dumb way to make sure that POSTs are only coming from # originating domain for the time being referer = request.headers.get('Referer') if referer: referer = urlparse(referer).netloc req_url = urlparse(request.url).netloc if referer != req_url: abort(401) else: abort(401) resp = {'status': 'ok', 'message': ''} status_code = 200 errors = [] post = request.form.get('data') if not post: try: post = request.form.keys()[0] except IndexError: resp['status'] = 'error' resp['message'] = 'Unable to decode POST data' status_code = 400 if status_code == 200: post = json.loads(post) if post.get('view_url'): if post.get('socrata'): source_domain = urlparse(post['view_url']).netloc four_by_four = re.findall(r'/([a-z0-9]{4}-[a-z0-9]{4})', post['view_url'])[-1] view_url = 'http://%s/api/views/%s' % (source_domain, four_by_four) dataset_info, errors, status_code = get_socrata_data_info( view_url) source_url = '%s/rows.csv?accessType=DOWNLOAD' % view_url else: dataset_info = { 'attribution': '', 'description': '', } source_url = post['view_url'] dataset_info['name'] = urlparse(source_url).path.split('/')[-1] if errors: resp['message'] = ', '.join([e for e in errors]) resp['status'] = 'error' status_code = 400 else: dataset_id = md5(source_url).hexdigest() md = session.query(MetaTable).get(dataset_id) if not md: d = { 'dataset_name': slugify(dataset_info['name'], delim=u'_'), 'human_name': dataset_info['name'], 'attribution': dataset_info['attribution'], 'description': dataset_info['description'], 'source_url': source_url, 'source_url_hash': dataset_id, 'update_freq': post['update_frequency'], 'business_key': post['field_definitions']['id_field'], 'observed_date': post['field_definitions']['date_field'], 'latitude': post['field_definitions'].get('latitude'), 'longitude': post['field_definitions'].get('longitude'), 'location': post['field_definitions'].get('location') } if len(d['dataset_name']) > 49: d['dataset_name'] = d['dataset_name'][:50] md = MetaTable(**d) session.add(md) session.commit() add_dataset.delay(md.source_url_hash, data_types=post.get('data_types')) resp[ 'message'] = 'Dataset %s submitted successfully' % dataset_info[ 'name'] else: resp['status'] = 'error' resp['message'] = 'Must provide a url where data can be downloaded' status_code = 400 resp = make_response(json.dumps(resp, default=dthandler), status_code) resp.headers['Content-Type'] = 'application/json' return resp