Python PlenarioETL 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: plenario.utils.etl

클래스/타입: PlenarioETL

hotexamples.com에서의 예제들: 9

Python PlenarioETL - 9개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 plenario.utils.etl.PlenarioETL에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

PlenarioETL(4)

add(2)

update(2)

_add_weather_info(1)

_get_or_create_data_table(1)

예제 #1

파일 보기

def update_dataset(self, source_url_hash, s3_path=None):
    md = session.query(MetaTable).get(source_url_hash)
    if md.result_ids:
        ids = md.result_ids
        ids.append(self.request.id)
    else:
        ids = [self.request.id]
    with engine.begin() as c:
        c.execute(MetaTable.__table__.update()\
            .where(MetaTable.source_url_hash == source_url_hash)\
            .values(result_ids=ids))
    etl = PlenarioETL(md.as_dict())
    etl.update(s3_path=s3_path)
    return 'Finished updating {0} ({1})'.format(md.human_name, md.source_url_hash)

예제 #2

파일 보기

파일: tasks.py 프로젝트: hectron/plenario

def update_dataset(self, source_url_hash, s3_path=None):
    md = session.query(MetaTable).get(source_url_hash)
    if md.result_ids:
        ids = md.result_ids
        ids.append(self.request.id)
    else:
        ids = [self.request.id]
    with engine.begin() as c:
        c.execute(
            MetaTable.__table__.update().where(MetaTable.source_url_hash == source_url_hash).values(result_ids=ids)
        )
    etl = PlenarioETL(md.as_dict())
    etl.update(s3_path=s3_path)
    return "Finished updating {0} ({1})".format(md.human_name, md.source_url_hash)

예제 #3

파일 보기

def update_dataset(source_url_hash, s3_path=None):

    md = session.query(MetaTable).get(source_url_hash)
    etl = PlenarioETL(md.as_dict())
    etl.update(s3_path=s3_path)
    return 'Finished updating %s' % md.human_name

예제 #4

파일 보기

def add_dataset(source_url_hash, s3_path=None, data_types=None):
    md = session.query(MetaTable).get(source_url_hash)
    etl = PlenarioETL(md.as_dict(), data_types=data_types)
    etl.add(s3_path=s3_path)
    return 'Finished adding %s' % md.human_name

예제 #5

파일 보기

파일: coltypes.py 프로젝트: EmilyWebber/plenario

from sqlalchemy import Table, MetaData
from sqlalchemy.sql.sqltypes import DATE
from plenario.database import session, app_engine
from plenario.models import MetaTable
from sqlalchemy.exc import NoSuchTableError
from plenario.utils.helpers import slugify
from plenario.utils.etl import PlenarioETL
import traceback

if __name__ == "__main__":
    them = session.query(MetaTable).all()
    meta = MetaData()
    for t in them:
        try:
            table = Table('dat_{0}'.format(t.dataset_name), meta, 
                autoload=True, autoload_with=app_engine, keep_existing=True)
            try:
               date_col = getattr(table.c, slugify(t.observed_date))
               if type(date_col.type) == DATE:
                   e = PlenarioETL(t.as_dict())
                   e._get_or_create_data_table()
                   e._add_weather_info()
                   print 'added weather for {0}'.format(t.dataset_name)
            except AttributeError, e:
                raise e
                print 'no col {0}'.format(t.observed_date)
                pass
        except NoSuchTableError:
            print 'no table {0}'.format(t.dataset_name) 
            pass

예제 #6

파일 보기

파일: shape_tests.py 프로젝트: hectron/plenario

    def setUpClass(cls):
        # Assume there exists a test database with postgis at the connection string specified in test_settings.py
        tables_to_drop = [
            "census_blocks",
            "dat_flu_shot_clinic_locations",
            "dat_master",
            "meta_master",
            "meta_shape",
            "plenario_user",
        ]
        drop_tables(tables_to_drop)

        # Create meta, master, user tables
        init_master_meta_user()

        # Ingest the census blocks
        init_census()

        # TODO: support local ingest of csv
        # For now, ingest Chicago's csv of 2013 flu shot locations from the data portal.
        # It's a nice little Chicago dataset that won't change.

        # So, adding the dataset to meta_table happens in view.py.
        # I don't want to mock out a whole response object with form data and such,
        # so here's a janky way.
        url = "https://data.cityofchicago.org/api/views/g5vx-5vqf/rows.csv?accessType=DOWNLOAD"
        url_hash = md5(url).hexdigest()

        d = {
            "dataset_name": u"flu_shot_clinic_locations",
            "human_name": u"flu_shot_clinic_locations",
            "attribution": u"foo",
            "description": u"bar",
            "source_url": url,
            "source_url_hash": url_hash,
            "update_freq": "yearly",
            "business_key": u"Event",
            "observed_date": u"Date",
            "latitude": u"Latitude",
            "longitude": u"Longitude",
            "location": u"Location",
            "contributor_name": u"Frederick Mcgillicutty",
            "contributor_organization": u"StrexCorp",
            "contributor_email": u"*****@*****.**",
            "contributed_data_types": None,
            "approved_status": True,
            "is_socrata_source": False,
        }

        # add this to meta_master
        md = MetaTable(**d)
        session.add(md)
        session.commit()

        meta = {
            "dataset_name": u"flu_shot_clinic_locations",
            "source_url": url,
            "business_key": u"Event",
            "observed_date": u"Date",
            "latitude": u"Latitude",
            "longitude": u"Longitude",
            "location": u"Location",
            "source_url_hash": url_hash,
        }
        point_etl = PlenarioETL(meta)
        point_etl.add()

        cls.app = create_app().test_client()

예제 #7

파일 보기

파일: tasks.py 프로젝트: jqnatividad/plenario

def update_dataset(source_url_hash, s3_path=None):
    
    md = session.query(MetaTable).get(source_url_hash)
    etl = PlenarioETL(md.as_dict())
    etl.update(s3_path=s3_path)
    return 'Finished updating %s' % md.human_name

예제 #8

파일 보기

파일: tasks.py 프로젝트: jqnatividad/plenario

def add_dataset(source_url_hash, s3_path=None, data_types=None):
    md = session.query(MetaTable).get(source_url_hash)
    etl = PlenarioETL(md.as_dict(), data_types=data_types)
    etl.add(s3_path=s3_path)
    return 'Finished adding %s' % md.human_name

예제 #9

파일 보기

파일: coltypes.py 프로젝트: EzanLTD/plenario

from plenario.models import MetaTable
from sqlalchemy.exc import NoSuchTableError
from plenario.utils.helpers import slugify
from plenario.utils.etl import PlenarioETL
import traceback

if __name__ == "__main__":
    them = session.query(MetaTable).all()
    meta = MetaData()
    for t in them:
        try:
            table = Table('dat_{0}'.format(t.dataset_name),
                          meta,
                          autoload=True,
                          autoload_with=app_engine,
                          keep_existing=True)
            try:
                date_col = getattr(table.c, slugify(t.observed_date))
                if type(date_col.type) == DATE:
                    e = PlenarioETL(t.as_dict())
                    e._get_or_create_data_table()
                    e._add_weather_info()
                    print 'added weather for {0}'.format(t.dataset_name)
            except AttributeError, e:
                raise e
                print 'no col {0}'.format(t.observed_date)
                pass
        except NoSuchTableError:
            print 'no table {0}'.format(t.dataset_name)
            pass