def ingest_from_fixture(fixture_meta, fname): md = MetaTable(**fixture_meta) session.add(md) session.commit() path = os.path.join(fixtures_path, fname) point_etl = PlenarioETL(md, source_path=path) point_etl.add()
def update_meta(metatable, table): """ After ingest/update, update the metatable registry to reflect table information. :param metatable: MetaTable instance to update. :param table: Table instance to update from. :returns: None """ metatable.update_date_added() metatable.obs_from, metatable.obs_to = session.query( func.min(table.c.point_date), func.max(table.c.point_date) ).first() metatable.bbox = session.query( func.ST_SetSRID( func.ST_Envelope(func.ST_Union(table.c.geom)), 4326 ) ).first()[0] session.add(metatable) try: session.commit() except: session.rollback() raise
def point_meta_from_submit_form(form, is_approved): column_names, labels = form_columns(form) name = slugify(form['dataset_name'], delim=u'_')[:50] md = MetaTable( url=form['file_url'], view_url=form.get('view_url'), dataset_name=name, human_name=form['dataset_name'], attribution=form.get('dataset_attribution'), description=form.get('dataset_description'), update_freq=form['update_frequency'], contributor_name=form['contributor_name'], contributor_organization=form.get('contributor_organization'), contributor_email=form['contributor_email'], approved_status=is_approved, observed_date=labels['observed_date'], latitude=labels.get('latitude', None), longitude=labels.get('longitude', None), location=labels.get('location', None), column_names=column_names ) session.add(md) session.commit() return md
def init_user(): if plenario.settings.DEFAULT_USER: print 'creating default user %s' % plenario.settings.DEFAULT_USER['name'] user = plenario.models.User(**plenario.settings.DEFAULT_USER) session.add(user) try: session.commit() except Exception as e: session.rollback() raise e
def add_dataset_to_metatable(request, url, dataset_id, dataset_info, socrata_source, approved_status): data_types = [] business_key = None observed_date = None latitude = None longitude = None location = None for k, v in request.form.iteritems(): if k.startswith('data_type_'): key = k.replace("data_type_", "") data_types.append({"field_name": key, "data_type": v}) if k.startswith('key_type_'): key = k.replace("key_type_", "") if (v == "business_key"): business_key = key if (v == "observed_date"): observed_date = key if (v == "latitude"): latitude = key if (v == "longitude"): longitude = key if (v == "location"): location = key if socrata_source: data_types = dataset_info['columns'] url = dataset_info['source_url'] d = { 'dataset_name': slugify(request.form.get('dataset_name'), delim=u'_')[:50], 'human_name': request.form.get('dataset_name'), 'attribution': request.form.get('dataset_attribution'), 'description': request.form.get('dataset_description'), 'source_url': url, 'source_url_hash': dataset_id, 'update_freq': request.form.get('update_frequency'), 'business_key': business_key, 'observed_date': observed_date, 'latitude': latitude, 'longitude': longitude, 'location': location, 'contributor_name': request.form.get('contributor_name'), 'contributor_organization': request.form.get('contributor_organization'), 'contributor_email': request.form.get('contributor_email'), 'contributed_data_types': json.dumps(data_types), 'approved_status': approved_status, 'is_socrata_source': socrata_source } # add this to meta_master md = MetaTable(**d) session.add(md) session.commit() return md
def init_master_meta_user(): print 'creating master, meta and user tables' Base.metadata.create_all(bind=app_engine) if plenario.settings.DEFAULT_USER: print 'creating default user %s' % plenario.settings.DEFAULT_USER['name'] user = plenario.models.User(**plenario.settings.DEFAULT_USER) session.add(user) try: session.commit() except IntegrityError: pass
def reset_password(): form = ResetPasswordForm() errors = [] if form.validate_on_submit(): user = db_session.query(User).get(flask_session['user_id']) check = user.check_password(user.name, form.old_password.data) if check: user.password = form.new_password.data db_session.add(user) db_session.commit() flash('Password reset successful!', 'success') else: errors.append('Password is not correct') return render_template('reset-password.html', form=form, errors=errors)
def init_user(): if DEFAULT_USER['name']: print 'Creating default user %s' % DEFAULT_USER['name'] if session.query(plenario.models.User).count() > 0: print 'Users already exist. Skipping this step.' return user = plenario.models.User(**DEFAULT_USER) session.add(user) try: session.commit() except Exception as e: session.rollback() print "Problem while creating default user: ", e else: print 'No default user specified. Skipping this step.'
def add(cls, human_name, source_url, approved_status, **kwargs): table_name = ShapeMetadata.make_table_name(human_name) new_shape_dataset = ShapeMetadata( # Required params dataset_name=table_name, human_name=human_name, source_url=source_url, approved_status=approved_status, # Params that reflect just-submitted, not yet ingested status. is_ingested=False, bbox=None, num_shapes=None, date_added=datetime.now().date(), # The rest **kwargs) session.add(new_shape_dataset) return new_shape_dataset
def add_user(): form = AddUserForm() if form.validate_on_submit(): user_info = { 'name': form.name.data, 'email': form.email.data, 'password': form.password.data } user = User(**user_info) db_session.add(user) db_session.commit() context = { 'form': form, 'name': form.name.data, 'email': form.email.data, 'users': db_session.query(User).all() } return render_template('admin/add-user.html', **context)
def add_user(): form = AddUserForm() if form.validate_on_submit(): user_info = { 'name': form.name.data, 'email': form.email.data, 'password': form.password.data } user = User(**user_info) db_session.add(user) db_session.commit() context = { 'form': form, 'name': form.name.data, 'email': form.email.data, 'users': db_session.query(User).all() } return render_template('add-user.html', **context)
def update_meta(metadata, table): """ After ingest/update, update the metadata registry to reflect :param metadata: :param table: """ metadata.update_date_added() metadata.obs_from, metadata.obs_to =\ session.query(func.min(table.c.point_date), func.max(table.c.point_date)).first() bbox = session.query( func.ST_SetSRID(func.ST_Envelope(func.ST_Union(table.c.geom)), 4326)).first()[0] metadata.bbox = bbox session.add(metadata) try: session.commit() except: session.rollback() raise
def update_meta(metadata, table): """ After ingest/update, update the metadata registry to reflect :param metadata: :param table: """ metadata.update_date_added() metadata.obs_from, metadata.obs_to =\ session.query(func.min(table.c.point_date), func.max(table.c.point_date)).first() bbox = session.query(func.ST_SetSRID( func.ST_Envelope(func.ST_Union(table.c.geom)), 4326 )).first()[0] metadata.bbox = bbox session.add(metadata) try: session.commit() except: session.rollback() raise
def point_meta_from_submit_form(form, is_approved): column_names, labels = form_columns(form) name = slugify(form['dataset_name'], delim=u'_')[:50] md = MetaTable( url=form['file_url'], view_url=form.get('view_url'), dataset_name=name, human_name=form['dataset_name'], attribution=form.get('dataset_attribution'), description=form.get('dataset_description'), update_freq=form['update_frequency'], contributor_name=form['contributor_name'], contributor_organization=form.get('contributor_organization'), contributor_email=form['contributor_email'], approved_status=is_approved, observed_date=labels['observed_date'], latitude=labels.get('latitude', None), longitude=labels.get('longitude', None), location=labels.get('location', None), column_names=column_names) session.add(md) session.commit() return md
def update_meta(metatable, table): """ After ingest/update, update the metatable registry to reflect table information. :param metatable: MetaTable instance to update. :param table: Table instance to update from. :returns: None """ try: metatable.update_date_added() metatable.obs_from, metatable.obs_to = session.query( func.min(table.c.point_date), func.max(table.c.point_date) ).first() metatable.bbox = session.query( func.ST_SetSRID( func.ST_Envelope(func.ST_Union(table.c.geom)), 4326 ) ).first()[0] metatable.column_names = { c.name: str(c.type) for c in metatable.column_info() if c.name not in {u'geom', u'point_date', u'hash'} } session.add(metatable) session.commit() except: session.rollback() raise
def submit_dataset(): # Slightly dumb way to make sure that POSTs are only coming from # originating domain for the time being referer = request.headers.get('Referer') if referer: referer = urlparse(referer).netloc req_url = urlparse(request.url).netloc if referer != req_url: abort(401) else: abort(401) resp = {'status': 'ok', 'message': ''} status_code = 200 errors = [] post = request.form.get('data') if not post: try: post = request.form.keys()[0] except IndexError: resp['status'] = 'error' resp['message'] = 'Unable to decode POST data' status_code = 400 if status_code == 200: post = json.loads(post) if post.get('view_url'): if post.get('socrata'): source_domain = urlparse(post['view_url']).netloc four_by_four = re.findall(r'/([a-z0-9]{4}-[a-z0-9]{4})', post['view_url'])[-1] view_url = 'http://%s/api/views/%s' % (source_domain, four_by_four) dataset_info, errors, status_code = get_socrata_data_info( view_url) source_url = '%s/rows.csv?accessType=DOWNLOAD' % view_url else: dataset_info = { 'attribution': '', 'description': '', } source_url = post['view_url'] dataset_info['name'] = urlparse(source_url).path.split('/')[-1] if errors: resp['message'] = ', '.join([e for e in errors]) resp['status'] = 'error' status_code = 400 else: dataset_id = md5(source_url).hexdigest() md = session.query(MetaTable).get(dataset_id) if not md: d = { 'dataset_name': slugify(dataset_info['name'], delim=u'_'), 'human_name': dataset_info['name'], 'attribution': dataset_info['attribution'], 'description': dataset_info['description'], 'source_url': source_url, 'source_url_hash': dataset_id, 'update_freq': post['update_frequency'], 'business_key': post['field_definitions']['id_field'], 'observed_date': post['field_definitions']['date_field'], 'latitude': post['field_definitions'].get('latitude'), 'longitude': post['field_definitions'].get('longitude'), 'location': post['field_definitions'].get('location') } if len(d['dataset_name']) > 49: d['dataset_name'] = d['dataset_name'][:50] md = MetaTable(**d) session.add(md) session.commit() add_dataset.delay(md.source_url_hash, data_types=post.get('data_types')) resp[ 'message'] = 'Dataset %s submitted successfully' % dataset_info[ 'name'] else: resp['status'] = 'error' resp['message'] = 'Must provide a url where data can be downloaded' status_code = 400 resp = make_response(json.dumps(resp, default=dthandler), status_code) resp.headers['Content-Type'] = 'application/json' return resp
def setUpClass(cls): # Assume there exists a test database with postgis at the connection string specified in test_settings.py tables_to_drop = [ "census_blocks", "dat_flu_shot_clinic_locations", "dat_master", "meta_master", "meta_shape", "plenario_user", ] drop_tables(tables_to_drop) # Create meta, master, user tables init_master_meta_user() # Ingest the census blocks init_census() # TODO: support local ingest of csv # For now, ingest Chicago's csv of 2013 flu shot locations from the data portal. # It's a nice little Chicago dataset that won't change. # So, adding the dataset to meta_table happens in view.py. # I don't want to mock out a whole response object with form data and such, # so here's a janky way. url = "https://data.cityofchicago.org/api/views/g5vx-5vqf/rows.csv?accessType=DOWNLOAD" url_hash = md5(url).hexdigest() d = { "dataset_name": u"flu_shot_clinic_locations", "human_name": u"flu_shot_clinic_locations", "attribution": u"foo", "description": u"bar", "source_url": url, "source_url_hash": url_hash, "update_freq": "yearly", "business_key": u"Event", "observed_date": u"Date", "latitude": u"Latitude", "longitude": u"Longitude", "location": u"Location", "contributor_name": u"Frederick Mcgillicutty", "contributor_organization": u"StrexCorp", "contributor_email": u"*****@*****.**", "contributed_data_types": None, "approved_status": True, "is_socrata_source": False, } # add this to meta_master md = MetaTable(**d) session.add(md) session.commit() meta = { "dataset_name": u"flu_shot_clinic_locations", "source_url": url, "business_key": u"Event", "observed_date": u"Date", "latitude": u"Latitude", "longitude": u"Longitude", "location": u"Location", "source_url_hash": url_hash, } point_etl = PlenarioETL(meta) point_etl.add() cls.app = create_app().test_client()
def submit_dataset(): # Slightly dumb way to make sure that POSTs are only coming from # originating domain for the time being referer = request.headers.get('Referer') if referer: referer = urlparse(referer).netloc req_url = urlparse(request.url).netloc if referer != req_url: abort(401) else: abort(401) resp = {'status': 'ok', 'message': ''} status_code = 200 errors = [] post = request.form.get('data') if not post: try: post = request.form.keys()[0] except IndexError: resp['status'] = 'error' resp['message'] = 'Unable to decode POST data' status_code = 400 if status_code == 200: post = json.loads(post) if post.get('view_url'): if post.get('socrata'): source_domain = urlparse(post['view_url']).netloc four_by_four = re.findall(r'/([a-z0-9]{4}-[a-z0-9]{4})', post['view_url'])[-1] view_url = 'http://%s/api/views/%s' % (source_domain, four_by_four) dataset_info, errors, status_code = get_socrata_data_info(view_url) source_url = '%s/rows.csv?accessType=DOWNLOAD' % view_url else: dataset_info = { 'attribution': '', 'description': '', } source_url = post['view_url'] dataset_info['name'] = urlparse(source_url).path.split('/')[-1] if errors: resp['message'] = ', '.join([e for e in errors]) resp['status'] = 'error' status_code = 400 else: dataset_id = md5(source_url).hexdigest() md = session.query(MetaTable).get(dataset_id) if not md: d = { 'dataset_name': slugify(dataset_info['name'], delim=u'_'), 'human_name': dataset_info['name'], 'attribution': dataset_info['attribution'], 'description': dataset_info['description'], 'source_url': source_url, 'source_url_hash': dataset_id, 'update_freq': post['update_frequency'], 'business_key': post['field_definitions']['id_field'], 'observed_date': post['field_definitions']['date_field'], 'latitude': post['field_definitions'].get('latitude'), 'longitude': post['field_definitions'].get('longitude'), 'location': post['field_definitions'].get('location') } if len(d['dataset_name']) > 49: d['dataset_name'] = d['dataset_name'][:50] md = MetaTable(**d) session.add(md) session.commit() add_dataset.delay(md.source_url_hash, data_types=post.get('data_types')) resp['message'] = 'Dataset %s submitted successfully' % dataset_info['name'] else: resp['status'] = 'error' resp['message'] = 'Must provide a url where data can be downloaded' status_code = 400 resp = make_response(json.dumps(resp, default=dthandler), status_code) resp.headers['Content-Type'] = 'application/json' return resp