def cubetl_config(ctx): ctx.add('estat.sql.connection', sql.Connection(url='sqlite:///estat.sqlite3')) # Load SDMX schema and transform it to CubETL OLAP entities sdmx.SDMXToOLAP.sdmx2olap(ctx, path_dsd='data/eip_ext1.dsd.xml', fact_name='estat_eip', fact_label='Eurostat / Entrepreneurship Indicator Programme') # Generate a SQL schema from the OLAP schema sqlschema.OLAPToSQL.olap2sql(ctx, connection=ctx.get('estat.sql.connection')) # store_mode='insert' ctx.get('olap2sql.olapmapper').entity_mapper(ctx.get('smdx2olap.fact.estat_eip')).store_mode = TableMapper.STORE_MODE_INSERT # Define the data load process ctx.add('estat.process', flow.Chain(steps=[ #ctx.get('cubetl.config.print'), # Generate a Cubes model cubes10.Cubes10ModelWriter(olapmapper=ctx.get('olap2sql.olapmapper'), model_path="estat.cubes-model.json", config_path="estat.cubes-config.ini"), sql.Transaction(connection=ctx.get('estat.sql.connection')), flow.Chain(fork=True, steps=[ sdmx.SDMXFileReader(path_dsd='data/eip_ext1.dsd.xml', path_sdmx='data/eip_ext1.sdmx.xml'), ctx.get('cubetl.util.print'), olap.Store(entity=ctx.get('smdx2olap.fact.estat_eip'), mapper=ctx.get('olap2sql.olapmapper')), log.LogPerformance(), ]), ]))
def cubetl_config(ctx): ctx.props['file_path'] = ctx.props.get('file_path', '../loganalyzer/access.log') ctx.include('${ ctx.library_path }/datetime.py') ctx.include('${ ctx.library_path }/http.py') #ctx.include('${ ctx.library_path }/datetime.py') ctx.add('es.connection', elasticsearch.ElasticsearchConnection(url='http://localhost:9200')) # Define the data load process ctx.add('es.process', flow.Chain(steps=[ fs.FileLineReader(path='${ ctx.props["file_path"] }', encoding=None), ctx.get('cubetl.http.parse.apache_combined'), util.PrettyPrint(depth=4), elasticsearch.Index(es=ctx.get("es.connection"), index="test-index", doc_type="logline", data_id=lambda m: m['data']), log.LogPerformance(), ])) ctx.add('es.search', flow.Chain(steps=[ elasticsearch.Search(es=ctx.get("es.connection"), index="test-index", query=None), util.PrettyPrint(depth=4), log.LogPerformance(), ]))
def cubetl_config(ctx): ctx.add('directorycsv.process', flow.Chain(steps=[ # Generates a message for each file in the given directory fs.DirectoryList(path=lambda ctx: ctx.props.get("path", "/"), maxdepth=0), fs.FileInfo(), # path=lambda m: m['path']) script.Function(process_data), # Print the message util.Print(), # Generates CSV header and rows and writes them csv.CsvFileWriter(), # path="/tmp/files", overwrite=True log.LogPerformance(), ]))
def cubetl_config(ctx): """ This is a simple ETL process. It simply lists files in the library path. Then prints the resulting messages to standard output. """ ctx.add( 'my_app.process', flow.Chain( fork=False, steps=[ # Log a message through the logging system log.Log(message='CubETL Example (Calling CubETL from Python)', level=log.Log.LEVEL_WARN), # Generates a message for each file in the given directory fs.DirectoryList(path=ctx.library_path), # Print the message (use -q when calling cubetl to hide print output) ctx.get('cubetl.util.print'), ]))
def cubetl_config(ctx): """ This is a simple ETL process. It simply lists files in the library path, and adds some extra data. Then prints the resulting messages to standard output. """ ctx.add( 'directorylist.process', flow.Chain(steps=[ # Log a message through the logging system log.Log(message='CubETL Example', level=log.Log.LEVEL_WARN), # Generates a message for each file in the given directory fs.DirectoryList(path=ctx.library_path), # Manipulate each message with a custom function script.Function(process_data), # Print the message (use -q when calling cubetl to hide print output) ctx.get('cubetl.util.print'), ]))
def cubetl_config(ctx): ctx.include('${ ctx.library_path }/datetime.py') #ctx.include('${ ctx.library_path }/datetime.py') ctx.add('odoo.sql.connection', sql.Connection(url='sqlite:///odoo.sqlite3')) ''' ctx.add('ine.autonomy', olap.Dimension( name='autonomy', label='Autonomy', attributes=[olap.Attribute('autonomy', type='String')])) ctx.add('ine.province', olap.Dimension( name='province', label='Province', attributes=[olap.Attribute('province', type='String')])) ctx.add('ine.autonomyprovince', olap.HierarchyDimension( name='autonomyprovince', label='Province', attributes=[DimensionAttribute(ctx.get('ine.autonomy')), DimensionAttribute(ctx.get('ine.province'))])) ctx.add('ine.nationality', olap.Dimension( name='nationality', label='Nationality', attributes=[olap.Attribute('nationality', type='String')])) ctx.add('ine.census', olap.Fact( name='census', label='Census', #must_slice=ctx.get('cubetl.datetime.datemonthly'), # study when and how dimensions can be aggregated, this cube requires slicing by date or results are invalid #natural_key= #notes='', attributes=[DimensionAttribute(ctx.get('cubetl.datetime.datemonthly')), DimensionAttribute(ctx.get('ine.autonomyprovince')), DimensionAttribute(ctx.get('ine.nationality')), Measure(name='census', type='Integer', label="Population")])) # TODO: Should not present avg/max/min ''' # Generate a SQL star schema and mappings automatically #sqlschema.OLAPToSQL.olap2sql(ctx, connection=ctx.get('ine.sql.connection')) #ctx.get('olap2sql.olapmapper').entity_mapper(ctx.get('ine.census')).store_mode = TableMapper.STORE_MODE_INSERT ctx.add( "odoo.conn", odoo.OdooConnection(url="http://127.0.0.1:8069", database="test", username="******", password="******")) # Define the data load process ctx.add( 'odoo.process', flow.Chain(steps=[ #ctx.get('cubetl.config.print'), # Generate a Cubes model #cubes10.Cubes10ModelWriter(olapmapper=ctx.get('olap2sql.olapmapper'), # model_path="ine.cubes-model.json", # config_path="ine.cubes-config.ini"), sql.Transaction(connection=ctx.get('odoo.sql.connection')), #odoo.Execute(), #odoo.Dump('account.move'), script.Function(test), script.Function(test2), flow.Chain( fork=True, steps=[ #script.Delete(['data', 'pcaxis']), #script.Function(process_data), #flow.Filter(condition="${ m['date'].year < 2002 }"), #cache.CachedTableLookup( # table=ctx.get("ine.autonomy_province.table"), # lookup={'province': lambda m: m['province_name']}), #ctx.get('cubetl.util.print'), util.PrettyPrint(depth=4), #util.Print(), #olap.Store(entity=ctx.get('ine.census'), # mapper=ctx.get('olap2sql.olapmapper')), log.LogPerformance(), ]), ]))
def cubetl_config(ctx): ctx.include('${ ctx.library_path }/datetime.py') ctx.include('${ ctx.library_path }/person.py') ctx.add('ine.sql.connection', sql.Connection(url='sqlite:///ine.sqlite3')) ctx.add( 'ine.autonomy_province.table', table.CSVMemoryTable(data=''' province,autonomy Albacete,Castilla la Mancha Alicante/Alacant,Comunidad Valenciana Almería,Andalucía Araba/Álava,País Vasco Asturias,Asturias Ávila,Castilla y León Badajoz,Extremadura "Balears, Illes",Comunidad Balear Barcelona,Cataluña Bizkaia,País Vasco Burgos,Castilla y León Cáceres,Extremadura Cádiz,Andalucía Cantabria,Cantabria Castellón/Castelló,Comunidad Valenciana Ciudad Real,Castilla la Mancha Córdoba,Andalucía "Coruña, A",Galicia Cuenca,Castilla la Mancha Gipuzkoa,País Vasco Girona,Cataluña Granada,Andalucía Guadalajara,Castilla la Mancha Huelva,Andalucía Huesca,Aragón Jaén,Andalucía León,Castilla y León Lleida,Cataluña Lugo,Galicia Madrid,Madrid Málaga,Andalucía Murcia,Murcia Navarra,Aragón Ourense,Galicia Palencia,Castilla y León "Palmas, Las",Canarias Pontevedra,Galicia "Rioja, La",Rioja Salamanca,Castilla y León Santa Cruz de Tenerife,Canarias Segovia,Castilla y León Sevilla,Andalucía Soria,Castilla y León Tarragona,Cataluña Teruel,Aragón Toledo,Castilla la Mancha Valencia/València,Comunidad Valenciana Valladolid,Castilla y León Zamora,Castilla y León Zaragoza,Aragón Ceuta,Ciudades Autónomas Melilla,Ciudades Autónomas ''')) ctx.add( 'ine.autonomy', olap.Dimension(name='autonomy', label='Autonomy', attributes=[olap.Attribute('autonomy', type='String')])) ctx.add( 'ine.province', olap.Dimension(name='province', label='Province', attributes=[olap.Attribute('province', type='String')])) ctx.add( 'ine.autonomyprovince', olap.HierarchyDimension(name='autonomyprovince', label='Province', attributes=[ DimensionAttribute( ctx.get('ine.autonomy')), DimensionAttribute(ctx.get('ine.province')) ])) ctx.add( 'ine.nationality', olap.Dimension( name='nationality', label='Nationality', attributes=[olap.Attribute('nationality', type='String')])) ctx.add( 'ine.census', olap.Fact( name='census', label='Census', #must_slice=ctx.get('cubetl.datetime.datemonthly'), # study when and how dimensions can be aggregated, this cube requires slicing by date or results are invalid #natural_key= #notes='', attributes=[ DimensionAttribute(ctx.get('cubetl.datetime.datemonthly')), DimensionAttribute(ctx.get('ine.autonomyprovince')), DimensionAttribute(ctx.get('ine.nationality')), DimensionAttribute(ctx.get('cubetl.person.gender')), DimensionAttribute(ctx.get('cubetl.person.age_range')), Measure(name='census', type='Integer', label="Population") ])) # TODO: Should not present avg/max/min # Generate a SQL star schema and mappings automatically sqlschema.OLAPToSQL.olap2sql(ctx, connection=ctx.get('ine.sql.connection')) ctx.get('olap2sql.olapmapper').entity_mapper( ctx.get('ine.census')).store_mode = TableMapper.STORE_MODE_INSERT # Define the data load process ctx.add( 'ine.process', flow.Chain(steps=[ #ctx.get('cubetl.config.print'), # Generate a Cubes model cubes10.Cubes10ModelWriter(olapmapper=ctx.get( 'olap2sql.olapmapper'), model_path="ine.cubes-model.json", config_path="ine.cubes-config.ini"), sql.Transaction(connection=ctx.get('ine.sql.connection')), fs.FileReader(path='census-2002.px', encoding=None), pcaxis.PCAxisParser(), flow.Chain( fork=True, steps=[ pcaxis.PCAxisIterator(), script.Delete(['data', 'pcaxis']), flow.Filter(condition="${ m['Sexo'] != 'Ambos sexos' }"), flow.Filter( condition= "${ m['Grupo quinquenal de edad'] != 'Total' }"), #flow.Filter(condition="${ m['Grupo de edad'] != 'Total' }"), flow.Filter(condition="${ m['Nacionalidad'] != 'Total' }"), flow.Filter( condition="${ m['Provincias'] != 'Total Nacional' }"), #flow.Skip(skip="${ random.randint(1, 1000) }"), #flow.Limit(limit=5000), script.Function(process_data), #flow.Filter(condition="${ m['date'].year < 2002 }"), cache.CachedTableLookup( table=ctx.get("ine.autonomy_province.table"), lookup={'province': lambda m: m['province_name']}), ctx.get('cubetl.util.print'), olap.Store(entity=ctx.get('ine.census'), mapper=ctx.get('olap2sql.olapmapper')), log.LogPerformance(), ]), ]))
def cubetl_config(ctx): #ctx.include('${ ctx.library_path }/datetime.py') #ctx.include('${ ctx.library_path }/geo.py') #ctx.include('${ ctx.library_path }/net.py') ctx.include('${ ctx.library_path }/http.py') # Process configuration ctx.props['db_url'] = 'sqlite:///loganalyzer.sqlite3' ctx.props['domain'] = 'cubesviewer.com' # ctx.interpolate('${ }') ctx.props['file_path'] = ctx.props.get('file_path', 'access.log') ctx.props['download_extensions'] = 'zip, tgz, gz, 7z, rar, iso, msi, exe, avi, mp3, mp4, ogg, mkv, pdf' ctx.props['download_extensions_list'] = [e.strip().lower() for e in ctx.props['download_extensions'].split(',')] ctx.props['download_size_bytes'] = 10 * 1024 * 1024 # Database connection for loaded OLAP data ctx.add('loganalyzer.sql.connection', sql.Connection(url=ctx.interpolate(None, '${ ctx.props["db_url"] }'))) # Generate a SQL star schema and mappings automatically sqlschema.OLAPToSQL.olap2sql(ctx, connection=ctx.get('loganalyzer.sql.connection')) ctx.get('olap2sql.olapmapper').entity_mapper(ctx.get('cubetl.http.request')).store_mode = TableMapper.STORE_MODE_INSERT # Processes a log file and loads the database for OLAP ctx.add('loganalyzer.process', flow.Chain(steps=[ ctx.get('cubetl.config.print'), # Generate a Cubes model cubes10.Cubes10ModelWriter(olapmapper=ctx.get('olap2sql.olapmapper'), model_path="loganalyzer.cubes-model.json", config_path="loganalyzer.cubes-config.ini"), script.Delete(['cubesmodel', 'cubesmodel_json']), sql.Transaction(connection=ctx.get('loganalyzer.sql.connection')), fs.FileLineReader(path='${ ctx.props["file_path"] }', encoding=None), ctx.get('cubetl.http.parse.apache_combined'), geoip.GeoIPFromAddress(data="${ m['address'] }"), useragent.UserAgentParse(data="${ m['user_agent_string'] }"), cache.CachedTableLookup( table=ctx.get("cubetl.http.status.table"), lookup={'status_code': lambda m: m['status_code']}, default={'status_description': 'Unknown'}), script.Function(process_data), ctx.get('cubetl.util.print'), olap.Store(entity=ctx.get('cubetl.http.request'), mapper=ctx.get('olap2sql.olapmapper')), log.LogPerformance(), ])) # This node runs several test queries ctx.add('loganalyzer.query', flow.Chain(steps=[ #ctx.get('cubetl.config.print'), query.OlapQueryAggregate(fact=ctx.get('cubetl.http.request'), mapper=ctx.get('olap2sql.olapmapper'), #drills=['referer.source'], cuts={'contcountry.id': 16}), #query.OlapQueryFacts(fact=ctx.get('cubetl.http.request'), # mapper=ctx.get('olap2sql.olapmapper'), # cuts={'contcountry': 16}), #query.OlapQueryDimension(fact=ctx.get('cubetl.http.request'), # mapper=ctx.get('olap2sql.olapmapper'), # drill=['contcountry.country']), ctx.get('cubetl.util.print'), ]))
def cubetl_config(ctx): ctx.props['file_path'] = ctx.props.get('file_path', 'spain-latest.osm.pbf') ctx.props['osmiumelastic.elastic.mappings'] = { "mappings": { "properties": { "description": { "type": "text", }, "location": { "type": "geo_point" }, "name": { "type": "text", }, "tagkeys": { "type": "keyword", }, "timestamp": { "type": "date" }, "type": { "type": "keyword", }, "uid": { "type": "long" }, #"user": { # "type": "keyword", # #"index": "not_analyzed" #} } } } #ctx.include('${ ctx.library_path }/datetime.py') ctx.add('osmiumelastic.connection', elasticsearch.ElasticsearchConnection(url='http://localhost:9200')) ctx.add( 'osmiumelastic.process', flow.Chain(steps=[ elasticsearch.IndexCreate( es=ctx.get("osmiumelastic.connection"), index="osm-index", mappings=ctx.props["osmiumelastic.elastic.mappings"]), osm_osmium.OsmiumNode(filename="spain-latest.osm.pbf"), util.PrettyPrint(depth=4), log.LogPerformance(), elasticsearch.Index(es=ctx.get("osmiumelastic.connection"), index="osm-index", doc_type="osm", data_id=lambda m: m['id']), ])) ctx.add( 'osmiumelastic.search', flow.Chain(steps=[ elasticsearch.Search(es=ctx.get("osmiumelastic.connection"), index="osm-index", query="${ m.get('q', None) }"), util.PrettyPrint(depth=4), log.LogPerformance(), ]))