Beispiel #1
0
def ingest_from_fixture(fixture_meta, fname):
    md = MetaTable(**fixture_meta)
    session.add(md)
    session.commit()
    path = os.path.join(fixtures_path, fname)
    point_etl = PlenarioETL(md, source_path=path)
    point_etl.add()
Beispiel #2
0
def update_meta(metatable, table):
    """
    After ingest/update, update the metatable registry to reflect table information.

    :param metatable: MetaTable instance to update.
    :param table: Table instance to update from.

    :returns: None
    """

    metatable.update_date_added()

    metatable.obs_from, metatable.obs_to = session.query(
        func.min(table.c.point_date),
        func.max(table.c.point_date)
    ).first()

    metatable.bbox = session.query(
        func.ST_SetSRID(
            func.ST_Envelope(func.ST_Union(table.c.geom)),
            4326
        )
    ).first()[0]

    session.add(metatable)

    try:
        session.commit()
    except:
        session.rollback()
        raise
Beispiel #3
0
def point_meta_from_submit_form(form, is_approved):
    column_names, labels = form_columns(form)
    name = slugify(form['dataset_name'], delim=u'_')[:50]

    md = MetaTable(
        url=form['file_url'],
        view_url=form.get('view_url'),
        dataset_name=name,
        human_name=form['dataset_name'],
        attribution=form.get('dataset_attribution'),
        description=form.get('dataset_description'),
        update_freq=form['update_frequency'],
        contributor_name=form['contributor_name'],
        contributor_organization=form.get('contributor_organization'),
        contributor_email=form['contributor_email'],
        approved_status=is_approved,
        observed_date=labels['observed_date'],
        latitude=labels.get('latitude', None),
        longitude=labels.get('longitude', None),
        location=labels.get('location', None),
        column_names=column_names
    )
    session.add(md)
    session.commit()
    return md
Beispiel #4
0
def ingest_from_fixture(fixture_meta, fname):
    md = MetaTable(**fixture_meta)
    session.add(md)
    session.commit()
    path = os.path.join(fixtures_path, fname)
    point_etl = PlenarioETL(md, source_path=path)
    point_etl.add()
Beispiel #5
0
def init_user():
    if plenario.settings.DEFAULT_USER:
        print 'creating default user %s' % plenario.settings.DEFAULT_USER['name']
        user = plenario.models.User(**plenario.settings.DEFAULT_USER)
        session.add(user)
        try:
            session.commit()
        except Exception as e:
            session.rollback()
            raise e
Beispiel #6
0
def init_user():
    if plenario.settings.DEFAULT_USER:
        print 'creating default user %s' % plenario.settings.DEFAULT_USER['name']
        user = plenario.models.User(**plenario.settings.DEFAULT_USER)
        session.add(user)
        try:
            session.commit()
        except Exception as e:
            session.rollback()
            raise e
Beispiel #7
0
def add_dataset_to_metatable(request, url, dataset_id, dataset_info,
                             socrata_source, approved_status):
    data_types = []
    business_key = None
    observed_date = None
    latitude = None
    longitude = None
    location = None
    for k, v in request.form.iteritems():
        if k.startswith('data_type_'):
            key = k.replace("data_type_", "")
            data_types.append({"field_name": key, "data_type": v})

        if k.startswith('key_type_'):
            key = k.replace("key_type_", "")
            if (v == "business_key"): business_key = key
            if (v == "observed_date"): observed_date = key
            if (v == "latitude"): latitude = key
            if (v == "longitude"): longitude = key
            if (v == "location"): location = key

    if socrata_source:
        data_types = dataset_info['columns']
        url = dataset_info['source_url']

    d = {
        'dataset_name': slugify(request.form.get('dataset_name'),
                                delim=u'_')[:50],
        'human_name': request.form.get('dataset_name'),
        'attribution': request.form.get('dataset_attribution'),
        'description': request.form.get('dataset_description'),
        'source_url': url,
        'source_url_hash': dataset_id,
        'update_freq': request.form.get('update_frequency'),
        'business_key': business_key,
        'observed_date': observed_date,
        'latitude': latitude,
        'longitude': longitude,
        'location': location,
        'contributor_name': request.form.get('contributor_name'),
        'contributor_organization':
        request.form.get('contributor_organization'),
        'contributor_email': request.form.get('contributor_email'),
        'contributed_data_types': json.dumps(data_types),
        'approved_status': approved_status,
        'is_socrata_source': socrata_source
    }

    # add this to meta_master
    md = MetaTable(**d)
    session.add(md)
    session.commit()

    return md
Beispiel #8
0
def init_master_meta_user():
    print 'creating master, meta and user tables'
    Base.metadata.create_all(bind=app_engine)
    if plenario.settings.DEFAULT_USER:
        print 'creating default user %s' % plenario.settings.DEFAULT_USER['name']
        user = plenario.models.User(**plenario.settings.DEFAULT_USER)
        session.add(user)
        try:
            session.commit()
        except IntegrityError:
            pass
Beispiel #9
0
def add_dataset_to_metatable(request, url, dataset_id, dataset_info, socrata_source, approved_status):
    data_types = []
    business_key = None
    observed_date = None
    latitude = None
    longitude = None 
    location = None
    for k, v in request.form.iteritems():
        if k.startswith('data_type_'):
            key = k.replace("data_type_", "")
            data_types.append({"field_name": key, "data_type": v})

        if k.startswith('key_type_'):
            key = k.replace("key_type_", "")
            if (v == "business_key"): business_key = key
            if (v == "observed_date"): observed_date = key
            if (v == "latitude"): latitude = key
            if (v == "longitude"): longitude = key
            if (v == "location"): location = key

    if socrata_source:
        data_types = dataset_info['columns']
        url = dataset_info['source_url']

    d = {
        'dataset_name': slugify(request.form.get('dataset_name'), delim=u'_')[:50],
        'human_name': request.form.get('dataset_name'),
        'attribution': request.form.get('dataset_attribution'),
        'description': request.form.get('dataset_description'),
        'source_url': url,
        'source_url_hash': dataset_id,
        'update_freq': request.form.get('update_frequency'),
        'business_key': business_key,
        'observed_date': observed_date,
        'latitude': latitude,
        'longitude': longitude,
        'location': location,
        'contributor_name': request.form.get('contributor_name'),
        'contributor_organization': request.form.get('contributor_organization'),
        'contributor_email': request.form.get('contributor_email'),
        'contributed_data_types': json.dumps(data_types),
        'approved_status': approved_status,
        'is_socrata_source': socrata_source
    }

    # add this to meta_master
    md = MetaTable(**d)
    session.add(md)
    session.commit()

    return md
Beispiel #10
0
def reset_password():
    form = ResetPasswordForm()
    errors = []
    if form.validate_on_submit():
        user = db_session.query(User).get(flask_session['user_id'])
        check = user.check_password(user.name, form.old_password.data)
        if check:
            user.password = form.new_password.data
            db_session.add(user)
            db_session.commit()
            flash('Password reset successful!', 'success')
        else:
            errors.append('Password is not correct')
    return render_template('reset-password.html', form=form, errors=errors)
Beispiel #11
0
def reset_password():
    form = ResetPasswordForm()
    errors = []
    if form.validate_on_submit():
        user = db_session.query(User).get(flask_session['user_id'])
        check = user.check_password(user.name, form.old_password.data)
        if check:
            user.password = form.new_password.data
            db_session.add(user)
            db_session.commit()
            flash('Password reset successful!', 'success')
        else:
            errors.append('Password is not correct')
    return render_template('reset-password.html', form=form, errors=errors)
Beispiel #12
0
def init_user():
    if DEFAULT_USER['name']:
        print 'Creating default user %s' % DEFAULT_USER['name']
        if session.query(plenario.models.User).count() > 0:
            print 'Users already exist. Skipping this step.'
            return
        user = plenario.models.User(**DEFAULT_USER)
        session.add(user)
        try:
            session.commit()
        except Exception as e:
            session.rollback()
            print "Problem while creating default user: ", e
    else:
        print 'No default user specified. Skipping this step.'
Beispiel #13
0
 def add(cls, human_name, source_url, approved_status, **kwargs):
     table_name = ShapeMetadata.make_table_name(human_name)
     new_shape_dataset = ShapeMetadata(
         # Required params
         dataset_name=table_name,
         human_name=human_name,
         source_url=source_url,
         approved_status=approved_status,
         # Params that reflect just-submitted, not yet ingested status.
         is_ingested=False,
         bbox=None,
         num_shapes=None,
         date_added=datetime.now().date(),
         # The rest
         **kwargs)
     session.add(new_shape_dataset)
     return new_shape_dataset
Beispiel #14
0
 def add(cls, human_name, source_url, approved_status, **kwargs):
     table_name = ShapeMetadata.make_table_name(human_name)
     new_shape_dataset = ShapeMetadata(
         # Required params
         dataset_name=table_name,
         human_name=human_name,
         source_url=source_url,
         approved_status=approved_status,
         # Params that reflect just-submitted, not yet ingested status.
         is_ingested=False,
         bbox=None,
         num_shapes=None,
         date_added=datetime.now().date(),
         # The rest
         **kwargs)
     session.add(new_shape_dataset)
     return new_shape_dataset
Beispiel #15
0
def add_user():
    form = AddUserForm()
    if form.validate_on_submit():
        user_info = {
            'name': form.name.data,
            'email': form.email.data,
            'password': form.password.data
        }
        user = User(**user_info)
        db_session.add(user)
        db_session.commit()
    context = {
        'form': form,
        'name': form.name.data,
        'email': form.email.data,
        'users': db_session.query(User).all()
    }
    return render_template('admin/add-user.html', **context)
Beispiel #16
0
def add_user():
    form = AddUserForm()
    if form.validate_on_submit():
        user_info = {
            'name': form.name.data,
            'email': form.email.data,
            'password': form.password.data
        }
        user = User(**user_info)
        db_session.add(user)
        db_session.commit()
    context = {
        'form': form,
        'name': form.name.data,
        'email': form.email.data,
        'users': db_session.query(User).all()
    }
    return render_template('add-user.html', **context)
Beispiel #17
0
def update_meta(metadata, table):
    """
    After ingest/update, update the metadata registry to reflect
    :param metadata:
    :param table:
    """
    metadata.update_date_added()
    metadata.obs_from, metadata.obs_to =\
        session.query(func.min(table.c.point_date),
                      func.max(table.c.point_date)).first()

    bbox = session.query(
        func.ST_SetSRID(func.ST_Envelope(func.ST_Union(table.c.geom)),
                        4326)).first()[0]
    metadata.bbox = bbox
    session.add(metadata)
    try:
        session.commit()
    except:
        session.rollback()
        raise
Beispiel #18
0
def update_meta(metadata, table):
    """
    After ingest/update, update the metadata registry to reflect
    :param metadata:
    :param table:
    """
    metadata.update_date_added()
    metadata.obs_from, metadata.obs_to =\
        session.query(func.min(table.c.point_date),
                      func.max(table.c.point_date)).first()

    bbox = session.query(func.ST_SetSRID(
                                         func.ST_Envelope(func.ST_Union(table.c.geom)),
                                         4326
                                         )).first()[0]
    metadata.bbox = bbox
    session.add(metadata)
    try:
        session.commit()
    except:
        session.rollback()
        raise
Beispiel #19
0
def point_meta_from_submit_form(form, is_approved):
    column_names, labels = form_columns(form)
    name = slugify(form['dataset_name'], delim=u'_')[:50]

    md = MetaTable(
        url=form['file_url'],
        view_url=form.get('view_url'),
        dataset_name=name,
        human_name=form['dataset_name'],
        attribution=form.get('dataset_attribution'),
        description=form.get('dataset_description'),
        update_freq=form['update_frequency'],
        contributor_name=form['contributor_name'],
        contributor_organization=form.get('contributor_organization'),
        contributor_email=form['contributor_email'],
        approved_status=is_approved,
        observed_date=labels['observed_date'],
        latitude=labels.get('latitude', None),
        longitude=labels.get('longitude', None),
        location=labels.get('location', None),
        column_names=column_names)
    session.add(md)
    session.commit()
    return md
Beispiel #20
0
def update_meta(metatable, table):
    """
    After ingest/update, update the metatable registry to reflect table information.

    :param metatable: MetaTable instance to update.
    :param table: Table instance to update from.

    :returns: None
    """

    try:
        metatable.update_date_added()

        metatable.obs_from, metatable.obs_to = session.query(
            func.min(table.c.point_date),
            func.max(table.c.point_date)
        ).first()

        metatable.bbox = session.query(
            func.ST_SetSRID(
                func.ST_Envelope(func.ST_Union(table.c.geom)),
                4326
            )
        ).first()[0]

        metatable.column_names = {
            c.name: str(c.type) for c in metatable.column_info()
            if c.name not in {u'geom', u'point_date', u'hash'}
        }

        session.add(metatable)
        session.commit()

    except:
        session.rollback()
        raise
Beispiel #21
0
def submit_dataset():
    # Slightly dumb way to make sure that POSTs are only coming from
    # originating domain for the time being
    referer = request.headers.get('Referer')
    if referer:
        referer = urlparse(referer).netloc
        req_url = urlparse(request.url).netloc
        if referer != req_url:
            abort(401)
    else:
        abort(401)
    resp = {'status': 'ok', 'message': ''}
    status_code = 200
    errors = []
    post = request.form.get('data')
    if not post:
        try:
            post = request.form.keys()[0]
        except IndexError:
            resp['status'] = 'error'
            resp['message'] = 'Unable to decode POST data'
            status_code = 400
    if status_code == 200:
        post = json.loads(post)
        if post.get('view_url'):
            if post.get('socrata'):
                source_domain = urlparse(post['view_url']).netloc
                four_by_four = re.findall(r'/([a-z0-9]{4}-[a-z0-9]{4})',
                                          post['view_url'])[-1]
                view_url = 'http://%s/api/views/%s' % (source_domain,
                                                       four_by_four)
                dataset_info, errors, status_code = get_socrata_data_info(
                    view_url)
                source_url = '%s/rows.csv?accessType=DOWNLOAD' % view_url
            else:
                dataset_info = {
                    'attribution': '',
                    'description': '',
                }
                source_url = post['view_url']
                dataset_info['name'] = urlparse(source_url).path.split('/')[-1]
            if errors:
                resp['message'] = ', '.join([e for e in errors])
                resp['status'] = 'error'
                status_code = 400
            else:
                dataset_id = md5(source_url).hexdigest()
                md = session.query(MetaTable).get(dataset_id)
                if not md:
                    d = {
                        'dataset_name': slugify(dataset_info['name'],
                                                delim=u'_'),
                        'human_name': dataset_info['name'],
                        'attribution': dataset_info['attribution'],
                        'description': dataset_info['description'],
                        'source_url': source_url,
                        'source_url_hash': dataset_id,
                        'update_freq': post['update_frequency'],
                        'business_key': post['field_definitions']['id_field'],
                        'observed_date':
                        post['field_definitions']['date_field'],
                        'latitude': post['field_definitions'].get('latitude'),
                        'longitude':
                        post['field_definitions'].get('longitude'),
                        'location': post['field_definitions'].get('location')
                    }
                    if len(d['dataset_name']) > 49:
                        d['dataset_name'] = d['dataset_name'][:50]
                    md = MetaTable(**d)
                    session.add(md)
                    session.commit()
                add_dataset.delay(md.source_url_hash,
                                  data_types=post.get('data_types'))
                resp[
                    'message'] = 'Dataset %s submitted successfully' % dataset_info[
                        'name']
        else:
            resp['status'] = 'error'
            resp['message'] = 'Must provide a url where data can be downloaded'
            status_code = 400
    resp = make_response(json.dumps(resp, default=dthandler), status_code)
    resp.headers['Content-Type'] = 'application/json'
    return resp
Beispiel #22
0
    def setUpClass(cls):
        # Assume there exists a test database with postgis at the connection string specified in test_settings.py
        tables_to_drop = [
            "census_blocks",
            "dat_flu_shot_clinic_locations",
            "dat_master",
            "meta_master",
            "meta_shape",
            "plenario_user",
        ]
        drop_tables(tables_to_drop)

        # Create meta, master, user tables
        init_master_meta_user()

        # Ingest the census blocks
        init_census()

        # TODO: support local ingest of csv
        # For now, ingest Chicago's csv of 2013 flu shot locations from the data portal.
        # It's a nice little Chicago dataset that won't change.

        # So, adding the dataset to meta_table happens in view.py.
        # I don't want to mock out a whole response object with form data and such,
        # so here's a janky way.
        url = "https://data.cityofchicago.org/api/views/g5vx-5vqf/rows.csv?accessType=DOWNLOAD"
        url_hash = md5(url).hexdigest()

        d = {
            "dataset_name": u"flu_shot_clinic_locations",
            "human_name": u"flu_shot_clinic_locations",
            "attribution": u"foo",
            "description": u"bar",
            "source_url": url,
            "source_url_hash": url_hash,
            "update_freq": "yearly",
            "business_key": u"Event",
            "observed_date": u"Date",
            "latitude": u"Latitude",
            "longitude": u"Longitude",
            "location": u"Location",
            "contributor_name": u"Frederick Mcgillicutty",
            "contributor_organization": u"StrexCorp",
            "contributor_email": u"*****@*****.**",
            "contributed_data_types": None,
            "approved_status": True,
            "is_socrata_source": False,
        }

        # add this to meta_master
        md = MetaTable(**d)
        session.add(md)
        session.commit()

        meta = {
            "dataset_name": u"flu_shot_clinic_locations",
            "source_url": url,
            "business_key": u"Event",
            "observed_date": u"Date",
            "latitude": u"Latitude",
            "longitude": u"Longitude",
            "location": u"Location",
            "source_url_hash": url_hash,
        }
        point_etl = PlenarioETL(meta)
        point_etl.add()

        cls.app = create_app().test_client()
Beispiel #23
0
def submit_dataset():
    # Slightly dumb way to make sure that POSTs are only coming from
    # originating domain for the time being
    referer = request.headers.get('Referer')
    if referer:
        referer = urlparse(referer).netloc
        req_url = urlparse(request.url).netloc
        if referer != req_url:
            abort(401)
    else:
        abort(401)
    resp = {'status': 'ok', 'message': ''}
    status_code = 200
    errors = []
    post = request.form.get('data')
    if not post:
        try:
            post = request.form.keys()[0]
        except IndexError:
            resp['status'] = 'error'
            resp['message'] = 'Unable to decode POST data'
            status_code = 400
    if status_code == 200:
        post = json.loads(post)
        if post.get('view_url'):
            if post.get('socrata'):
                source_domain = urlparse(post['view_url']).netloc
                four_by_four = re.findall(r'/([a-z0-9]{4}-[a-z0-9]{4})', post['view_url'])[-1]
                view_url = 'http://%s/api/views/%s' % (source_domain, four_by_four)
                dataset_info, errors, status_code = get_socrata_data_info(view_url)
                source_url = '%s/rows.csv?accessType=DOWNLOAD' % view_url
            else:
                dataset_info = {
                    'attribution': '',
                    'description': '',
                }
                source_url = post['view_url']
                dataset_info['name'] = urlparse(source_url).path.split('/')[-1]
            if errors:
                resp['message'] = ', '.join([e for e in errors])
                resp['status'] = 'error'
                status_code = 400
            else:
                dataset_id = md5(source_url).hexdigest()
                md = session.query(MetaTable).get(dataset_id)
                if not md:
                    d = {
                        'dataset_name': slugify(dataset_info['name'], delim=u'_'),
                        'human_name': dataset_info['name'],
                        'attribution': dataset_info['attribution'],
                        'description': dataset_info['description'],
                        'source_url': source_url,
                        'source_url_hash': dataset_id,
                        'update_freq': post['update_frequency'],
                        'business_key': post['field_definitions']['id_field'],
                        'observed_date': post['field_definitions']['date_field'],
                        'latitude': post['field_definitions'].get('latitude'),
                        'longitude': post['field_definitions'].get('longitude'),
                        'location': post['field_definitions'].get('location')
                    }
                    if len(d['dataset_name']) > 49:
                        d['dataset_name'] = d['dataset_name'][:50]
                    md = MetaTable(**d)
                    session.add(md)
                    session.commit()
                add_dataset.delay(md.source_url_hash, data_types=post.get('data_types'))
                resp['message'] = 'Dataset %s submitted successfully' % dataset_info['name']
        else:
            resp['status'] = 'error'
            resp['message'] = 'Must provide a url where data can be downloaded'
            status_code = 400
    resp = make_response(json.dumps(resp, default=dthandler), status_code)
    resp.headers['Content-Type'] = 'application/json'
    return resp