Esempio n. 1
0
def cubetl_config(ctx):

    #ctx.include('${ ctx.library_path }/geo.py')

    ctx.add('cubetl.net.domain.tld', Dimension(name='tld', label='TLD'))
    # TODO: Reference GEO/country if available (add "N/A" to Geo/Country)

    ctx.add('cubetl.net.domain.domain', Dimension(name='domain',
                                                  label='Domain'))

    ctx.add('cubetl.net.domain.subdomain',
            Dimension(name='subdomain', label='Subdomain'))

    ctx.add(
        'cubetl.net.domain',
        HierarchyDimension(
            name='domain3',
            label='Domain',
            #info={"cv-formatter": "'.'.join()"},
            hierarchies=[
                Hierarchy(name='domain3',
                          label='Domain',
                          levels=['tld', 'domain', 'subdomain'])
            ],
            attributes=[
                DimensionAttribute(ctx.get('cubetl.net.domain.tld')),
                DimensionAttribute(ctx.get('cubetl.net.domain.domain')),
                DimensionAttribute(ctx.get('cubetl.net.domain.subdomain'))
            ]))
Esempio n. 2
0
    def sql2olap(ctx, debug=False, prefix="sql2olap"):
        """
        This method generates a CubETL OLAP schema from an SQL schema defined by CubETL SQL components
        (such a schema can automatically be generated from an existing SQL database using `sql2cubetl`
        function).

        The process can be controlled via a dictionary of options passed via the `options` argument.

        Options:

          * `<object_uri>.type=ignore` ignores the given SQL column.
          * `<object_uri>.type=attribute` forces the SQL column to be used as fact attribute.
          * `<object_uri>.type=dimension` forces the SQL column to be used as dimension.

        Details:

        This method works by walking objects of class SQLTable in the context, and generating an
        cubetl.olap.Fact for each. Tables referenced via foreign keys are included as dimensions.
        """

        # TODO: New generation refactor

        # Create a new Dimension for each found field, unless configuration says they are the same dimension
        # (or can be deduced: ie: same column name + size + same user type (raw dates, 0/1 boolean...).

        # Then, instance an olap sql-to-olap (process tables and columns, generate olap and olap mappings)

        # Implement querying

        # Move these SQL/OLAP method to Cubetl components.

        # Normalize/formalize column/name/id/schema/database usage

        # (optionally, at the end, export to cubes)
        # (should theorically be able to create olap-2-star-schema mappings, then create tables and load)
        # (theorically, we should be able to generate the same mappings from the generated star-schema (would require identifying split dims/hierarchies)

        #exclude_columns = ['key', 'entity_id']
        #force_dimensions = dimensions if dimensions else []

        # Load datetime
        ctx.include(ctx.library_path + "/datetime.py")

        # Mappings for datetime
        datedimension = ctx.get("cubetl.datetime.date")

        facts = {}
        factattributes = []
        olapmappers = []

        logger.info("Generating CubETL Olap schema from SQL schema.")

        sqltables = ctx.find(type=cubetl.sql.sql.SQLTable)
        for sqltable in sqltables:

            olap_type_table = _match_config(
                ctx.props, 'sql2olap.table.%s.type' % sqltable.name, None)

            logger.info("Fact: %s" % sqltable.name)
            if olap_type_table == 'ignore':
                logger.info("SQL2OLAP ignoring SQL table: %s", sqltable)
                continue

            factmappings = []
            factattributes = []
            key_count = 0

            for dbcol in sqltable.columns:

                olap_type = _match_config(ctx.props,
                                          'sql2olap.%s.type' % dbcol.urn, None)
                if olap_type:
                    logger.info("Column: %s (forced type: %s)" %
                                (dbcol, olap_type))
                else:
                    logger.info("Column: %s" % (dbcol))

                if olap_type == 'ignore':
                    logger.info("SQL2OLAP ignoring SQL column: %s", dbcol)
                    continue

                if dbcol.pk:
                    key_urn = "%s.fact.%s.key.%s" % (prefix, sqltable.name,
                                                     dbcol.name)
                    key = ctx.add(
                        key_urn,
                        Key(name=dbcol.name,
                            type=dbcol.type,
                            label=dbcol.label))
                    factattributes.append(key)

                    factmapping = OlapMapping(path=[key], sqlcolumn=dbcol)
                    factmappings.append(factmapping)

                    key_count += 1

                if isinstance(dbcol, cubetl.sql.sql.SQLColumnFK):
                    #if len(dbcol.foreign_keys) > 1:
                    #    raise Exception("Multiple foreign keys found for column: %s" % (dbcol.name))

                    related_fact_name = dbcol.fk_sqlcolumn.sqltable.name
                    if related_fact_name == sqltable.name:
                        # Reference to self
                        # TODO: This does not account for circular dependencies across other entities
                        logger.warning(
                            "Ignoring foreign key reference to self: %s",
                            dbcol.name)
                        continue

                    related_fact = facts.get(related_fact_name, None)
                    if related_fact is None:
                        logger.warning(
                            "Ignoring foreign key reference from %s.%s to not available entity: %s",
                            dbcol.sqltable.name, dbcol.name, related_fact_name)
                        continue

                    # Create dimension attribute
                    dimension_attribute = olap.DimensionAttribute(
                        related_fact, name=dbcol.name, label=dbcol.label)
                    factattributes.append(dimension_attribute)

                    # Create a mapping
                    factdimensionmapping = OlapMapping(
                        path=[dimension_attribute], sqlcolumn=dbcol)
                    factmappings.append(factdimensionmapping)

                if not dbcol.pk and not isinstance(
                        dbcol, cubetl.sql.sql.SQLColumnFK) and (
                            olap_type == 'dimension' or
                            (olap_type is None and dbcol.type == "String")
                        ):  # or (dbcol.name in force_dimensions)
                    # Embedded dimension (single column, string or integer, treated as a dimension)

                    dimension_attribute = olap.Attribute(name=dbcol.name,
                                                         type=dbcol.type,
                                                         label=dbcol.label)
                    dimension = olap.Dimension(
                        name=dbcol.name,
                        label=dbcol.label,
                        attributes=[dimension_attribute])

                    factattributes.append(
                        DimensionAttribute(dimension, dimension.name,
                                           dimension.label))

                    # This dimension is mapped in the parent table
                    factmapping = OlapMapping(
                        path=[dimension, dimension_attribute], sqlcolumn=dbcol)
                    factmappings.append(factmapping)

                if not dbcol.pk and not isinstance(
                        dbcol, cubetl.sql.sql.SQLColumnFK) and (
                            olap_type == 'attribute'):
                    # Attribute (detail)
                    attribute = Attribute(name=dbcol.name,
                                          type=dbcol.type,
                                          label=dbcol.label)
                    factattributes.append(attribute)

                    factmapping = OlapMapping(path=[attribute],
                                              sqlcolumn=dbcol)
                    factmappings.append(factmapping)

                if not dbcol.pk and not isinstance(
                        dbcol, cubetl.sql.sql.SQLColumnFK) and (
                            olap_type == 'measure' or
                            (olap_type is None
                             and dbcol.type in ("Float", "Integer"))):

                    measure = Measure(name=dbcol.name,
                                      type=dbcol.type,
                                      label=dbcol.label)
                    factattributes.append(measure)

                    factmapping = OlapMapping(path=[measure], sqlcolumn=dbcol)
                    factmappings.append(factmapping)

                elif dbcol.type in ("DateTime"):

                    # Date dimension
                    datedimension = ctx.get("cubetl.datetime.date")

                    # Create dimension attribute
                    dimension_attribute = olap.DimensionAttribute(
                        datedimension, name=dbcol.name, label=dbcol.label)
                    factattributes.append(dimension_attribute)

                    # TODO: This shall be common
                    #mapper = olap.sql.EmbeddedDimensionMapper(entity=datedimension, sqltable=None)
                    #olapmapper.mappers.append(mapper)

                    mapping = OlapMapping(path=[
                        dimension_attribute,
                        dimension_attribute.dimension.attribute('year')
                    ],
                                          sqlcolumn=dbcol,
                                          function=OlapMapping.FUNCTION_YEAR)
                    factmappings.append(mapping)

                    #mapping = OlapMapping(entity=datedimension, attribute=datedimension.attribute("quarter"), sqlcolumn=dbcol, function=OlapMapping.FUNCTION_QUARTER)
                    #factmappings.append(mapping)

                    mapping = OlapMapping(path=[
                        dimension_attribute,
                        dimension_attribute.dimension.attribute('month')
                    ],
                                          sqlcolumn=dbcol,
                                          function=OlapMapping.FUNCTION_MONTH)
                    factmappings.append(mapping)

                    mapping = OlapMapping(path=[
                        dimension_attribute,
                        dimension_attribute.dimension.attribute('day')
                    ],
                                          sqlcolumn=dbcol,
                                          function=OlapMapping.FUNCTION_DAY)
                    factmappings.append(mapping)

                    mapping = OlapMapping(path=[
                        dimension_attribute,
                        dimension_attribute.dimension.attribute('week')
                    ],
                                          sqlcolumn=dbcol,
                                          function=OlapMapping.FUNCTION_WEEK)
                    factmappings.append(mapping)

                    # Create an alias for this dimension seen from this datetime field point of view
                    # This approach creates a dimension for each different foreign key column name used
                    '''
                    aliasdimension_urn = "%s.dim.datetime.%s.alias.%s" % (prefix, datedimension.name, dbcol.name)
                    aliasdimension = ctx.get(aliasdimension_urn, False)
                    if not aliasdimension:
                        aliasdimension = ctx.add(aliasdimension_urn,
                                                 olap.AliasDimension(dimension=datedimension, name=dbcol.name, label=dbcol.label))
                    fact.dimensions.append(olap.DimensionAttribute(aliasdimension))

                    # Create a mapping
                    aliasdimensionmapping = OlapMapping(entity=aliasdimension, sqlcolumn=dbcol)
                    factmappings.append(aliasdimensionmapping)
                    mapper = olap.sql.AliasDimensionMapper(entity=aliasdimension)
                    mapper.mappings = [
                        # These mappings don't have a sqlcolumn because they are meant to be embedded
                        OlapMapping(entity=ctx.get("cubetl.datetime.year"), sqlcolumn=dbcol, function=OlapMapping.FUNCTION_YEAR),
                        OlapMapping(entity=ctx.get("cubetl.datetime.quarter"), sqlcolumn=dbcol, function=OlapMapping.FUNCTION_QUARTER),
                        OlapMapping(entity=ctx.get("cubetl.datetime.month"), sqlcolumn=dbcol, function=OlapMapping.FUNCTION_MONTH),
                        OlapMapping(entity=ctx.get("cubetl.datetime.day"), sqlcolumn=dbcol, function=OlapMapping.FUNCTION_DAY),
                        OlapMapping(entity=ctx.get("cubetl.datetime.week"), sqlcolumn=dbcol, function=OlapMapping.FUNCTION_WEEK)
                    ]
                    olapmapper.mappers.append(mapper)
                    '''
            '''
            if len(factmappings) == 0:
                factmappings = [ { 'name': 'index', 'pk': True, 'type': 'Integer' } ]
            '''

            # Ignore table if more than one primary key was found
            if key_count > 1:
                logger.warning(
                    "Multiple primary key found in table %s (not supported, ignoring table)",
                    sqltable.name)
                continue

            # Ignore table if it contains no primary key
            if key_count == 0:
                logger.warning(
                    "No primary key found in table %s (not supported, ignoring table)",
                    sqltable.name)
                continue

            # Define fact
            fact_urn = "%s.fact.%s" % (prefix, sqltable.name)
            fact = ctx.add(
                fact_urn,
                olap.Fact(name=sqltable.name,
                          label=sqltable.label,
                          attributes=factattributes))
            facts[fact.name] = fact
            # Create an olapmapper for this fact
            olapmapper = olap.OlapMapper(
            )  # TODO: review whether this is necessary or we could use a single mapper

            mapper = olap.sql.TableMapper(entity=fact,
                                          sqltable=sqltable,
                                          mappings=factmappings)
            olapmapper.mappers.append(mapper)
            olapmappers.append(olapmapper)
            #ctx.register(mapper)  #, uri='%s:fact' % ctx.uri(sqltable)

            # IDs should be defined in mappings, not entity Keys
            #  mappings:
            #  - name: id
            #    pk: True
            #    type: Integer
            #    value: ${ int(m["id"]) }

        #printconfig = PrintConfig()
        #printflow = Chain(fork=True, steps=[printconfig])
        #result = ctx.process(printflow)
        '''
        process = sql.StoreRow(sqltable)
        result = ctx.process(process)

        connection = ctx.find(sql.Connection)[0]
        process = sql.Query(connection, lambda: "SELECT * FROM fin_account_accountmovement", embed=True)
        result = ctx.process(process)
        print(result)
        '''
        '''
        process = olap.OlapQueryAggregate()
        result = ctx.process(process, {'fact': 'fin_account_accountmovement', 'cuts': None, 'drill': None})
        print result
        '''

        olapmapper = olap.OlapMapper()
        olapmapper.include = [i for i in olapmappers]
        olapmapper_urn = "%s.olapmapper" % (prefix)
        ctx.add(olapmapper_urn, olapmapper)

        return ctx
Esempio n. 3
0
def cubetl_config(ctx):

    ctx.add(
        'cubetl.datetime.year',
        Dimension(
            name='year',
            label='Year',
            role='year',
            attributes=[Attribute(name='year', type='Integer', label='Year')]))

    ctx.add(
        'cubetl.datetime.half',
        Dimension(
            name='half',
            label='Half',
            role='half',
            attributes=[Attribute(name='half', type='Integer', label='Half')]))

    ctx.add(
        'cubetl.datetime.quarter',
        Dimension(name='quarter',
                  label='Quarter',
                  role='quarter',
                  attributes=[
                      Attribute(name='quarter',
                                type='Integer',
                                label='Quarter')
                  ]))

    ctx.add(
        'cubetl.datetime.month',
        Dimension(name='month',
                  label='Month',
                  role='month',
                  attributes=[
                      Attribute(name='month', type='Integer', label='Month')
                  ]))

    ctx.add(
        'cubetl.datetime.week',
        Dimension(
            name='week',
            label='Week',
            role='week',
            attributes=[Attribute(name='week', type='Integer', label='Week')]))

    ctx.add(
        'cubetl.datetime.day',
        Dimension(
            name='day',
            label='Day',
            role='day',
            attributes=[Attribute(name='day', type='Integer', label='Day')]))

    ctx.add(
        'cubetl.datetime.date',
        HierarchyDimension(
            name='date',
            label='Date',
            role='date',
            hierarchies=[
                Hierarchy(name='daily',
                          label='Daily',
                          levels=['year', 'month', 'day']),
                Hierarchy(name='weekly',
                          label='Weekly',
                          levels=['year', 'week'])
            ],
            attributes=[
                DimensionAttribute(dimension=ctx.get('cubetl.datetime.year')),
                DimensionAttribute(dimension=ctx.get('cubetl.datetime.month')),
                DimensionAttribute(dimension=ctx.get('cubetl.datetime.day')),
                DimensionAttribute(dimension=ctx.get('cubetl.datetime.week'))
            ]))

    ctx.add(
        'cubetl.datetime.datemonthly',
        HierarchyDimension(
            name='datemonthly',
            label='Month',
            role='date',
            hierarchies=[
                Hierarchy(name='monthly',
                          label='Monthly',
                          levels=['year', 'quarter', 'month'])
            ],
            attributes=[
                DimensionAttribute(dimension=ctx.get('cubetl.datetime.year')),
                DimensionAttribute(
                    dimension=ctx.get('cubetl.datetime.quarter')),
                DimensionAttribute(dimension=ctx.get('cubetl.datetime.month'))
            ]))

    ctx.add(
        'cubetl.datetime.dateyqmd',
        HierarchyDimension(
            name='dateyqmd',
            label='Date',
            role='date',
            hierarchies=[
                Hierarchy(name='daily',
                          label='Daily',
                          levels=['year', 'quarter', 'month', 'day']),
                Hierarchy(name='weekly',
                          label='Weekly',
                          levels=['year', 'week'])
            ],
            attributes=[
                DimensionAttribute(dimension=ctx.get('cubetl.datetime.year')),
                DimensionAttribute(
                    dimension=ctx.get('cubetl.datetime.quarter')),
                DimensionAttribute(dimension=ctx.get('cubetl.datetime.month')),
                DimensionAttribute(dimension=ctx.get('cubetl.datetime.day')),
                DimensionAttribute(dimension=ctx.get('cubetl.datetime.week'))
            ]))

    ctx.add(
        'cubetl.datetime.dateyh',
        HierarchyDimension(
            name='dateyh',
            label='Date',
            role='date',
            hierarchies=[
                Hierarchy(name='yearhalf',
                          label='Year + Half',
                          levels=['year', 'half'])
            ],
            attributes=[
                DimensionAttribute(dimension=ctx.get('cubetl.datetime.year')),
                DimensionAttribute(dimension=ctx.get('cubetl.datetime.half'))
            ]))
Esempio n. 4
0
def cubetl_config(ctx):

    ctx.include('${ ctx.library_path }/datetime.py')
    ctx.include('${ ctx.library_path }/geo.py')
    ctx.include('${ ctx.library_path }/net.py')

    ctx.add('cubetl.http.request.client_address',
            Dimension(name='client_address', label='Address'))

    ctx.add('cubetl.http.request.client_domain',
            Dimension(name='client_domain', label='Domain'))

    ctx.add('cubetl.http.request.username',
            Dimension(name='username', label='Username'))

    ctx.add('cubetl.http.request.method',
            Dimension(name='http_method', label='HTTP Method'))

    ctx.add('cubetl.http.request.path', HierarchyDimension(
        name='request_path',
        label='Path',
        hierarchies=[Hierarchy(name='path14', label='Path', levels=["path1", "path2", "path3", "path4"])],
        attributes=[DimensionAttribute(dimension=Dimension(name='path1', label='Path 1')),
                    DimensionAttribute(dimension=Dimension(name='path2', label='Path 2')),
                    DimensionAttribute(dimension=Dimension(name='path3', label='Path 3')),
                    DimensionAttribute(dimension=Dimension(name='path4', label='Path 4'))]))

    ctx.add('cubetl.http.protocol',
            Dimension(name='protocol', label='Protocol'))


    ctx.add('cubetl.http.response.status.code',
            Dimension(name='status_code', label='Status', attributes=[
                Attribute(name='status_code', type='Integer', label='Status Code'),
                Attribute(name='status_description', type='String', label='Status Description')]))

    ctx.add('cubetl.http.response.status.type',
            Dimension(name='status_type', label='Status Type', attributes=[
                Attribute(name='status_type_label', type='String', label='Status Type'),
                Attribute(name='status_type_code', type='String', label='Status Type Code')]))

    ctx.add('cubetl.http.response.status', HierarchyDimension(
        name='response_status',
        label='Status',
        hierarchies=[Hierarchy(name='http_status', label='Status', levels=['status_type', 'status_code'])],
        attributes=[DimensionAttribute(ctx.get('cubetl.http.response.status.type')),
                    DimensionAttribute(ctx.get('cubetl.http.response.status.code'))]))

    ctx.add('cubetl.http.request.referer_path',
            Dimension(name='referer_path', label='Referer Path'))

    '''

    !!python/object:cubetl.olap.HierarchyDimension
    id: cubetl.http.referer
    name: referer
    label: Referer
    hierarchies:
    - name: referer
      label: Referer
      levels: referer_domain, referer_path
    levels:
    - !ref cubetl.http.referer.domain
    - !ref cubetl.http.referer.path
    '''

    ctx.add('cubetl.http.user_agent', HierarchyDimension(
        name='user_agent',
        label='User Agent',
        attributes=[DimensionAttribute(Dimension(name='user_agent_family', label='User Agent')),
                    DimensionAttribute(Dimension(name='user_agent_version', label='Version'))
                    ]))

    ctx.add('cubetl.os.operating_system', HierarchyDimension(
        name='operating_system',
        label='Operating System',
        attributes=[DimensionAttribute(Dimension(name='operating_system_family', label='OS')),
                    DimensionAttribute(Dimension(name='operating_system_version', label='Version'))
                    ]))

    ctx.add('cubetl.http.mimetype', HierarchyDimension(
        name='mimetype',
        label='MIME Type',
        attributes=[DimensionAttribute(Dimension(name='mimetype_type', label='Type')),
                    DimensionAttribute(Dimension(name='mimetype_subtype', label='Subtype'))
                    ]))

    ctx.add('cubetl.os.device',
            Dimension(name='device', label='Device'))

    ctx.add('cubetl.http.request.is_bot',
            Dimension(name='is_bot', label='Is Bot', attributes=[
                Attribute(name='is_bot', type='Boolean', label='Is Bot')]))

    ctx.add('cubetl.http.request.is_mobile',
            Dimension(name='is_mobile', label='Is Mobile', attributes=[
                Attribute(name='is_mobile', type='Boolean', label='Is Mobile')]))

    ctx.add('cubetl.http.request.is_pc',
            Dimension(name='is_pc', label='Is PC', attributes=[
                Attribute(name='is_pc', type='Boolean', label='Is PC')]))

    ctx.add('cubetl.http.request.is_tablet',
            Dimension(name='is_tablet', label='Is Tablet', attributes=[
                Attribute(name='is_tablet', type='Boolean', label='Is Tablet')]))

    ctx.add('cubetl.http.request.file_extension',
            Dimension(name='file_extension', label='File Extension'))

    ctx.add('cubetl.http.request.referer_origin',
            Dimension(name='referer_origin', label='Referer Origin'))

    ctx.add('cubetl.http.response.is_download',
            Dimension(name='is_download', label='Is Download', attributes=[
                Attribute(name='is_download', type='Boolean', label='Is Download')]))

    ctx.add('cubetl.http.request', Fact(
        name='http_request',
        label='HTTP Request',
        #natural_key=
        #notes='',
        attributes=[
            DimensionAttribute(ctx.get('cubetl.datetime.date'), name='request_date', label="Request Date"),
            #ctx.get('cubetl.datetime.date'),
            DimensionAttribute(ctx.get('cubetl.http.protocol')),
            DimensionAttribute(ctx.get('cubetl.http.request.client_address')),
            DimensionAttribute(ctx.get('cubetl.http.request.username')),
            DimensionAttribute(ctx.get('cubetl.http.request.method')),
            DimensionAttribute(ctx.get('cubetl.http.request.path')),
            DimensionAttribute(ctx.get('cubetl.http.request.file_extension')),
            DimensionAttribute(ctx.get('cubetl.http.request.referer_origin')),
            DimensionAttribute(ctx.get('cubetl.http.request.is_bot')),
            DimensionAttribute(ctx.get('cubetl.http.request.is_pc')),
            DimensionAttribute(ctx.get('cubetl.http.request.is_tablet')),
            DimensionAttribute(ctx.get('cubetl.http.request.is_mobile')),
            DimensionAttribute(ctx.get('cubetl.geo.contcountry')),
            DimensionAttribute(ctx.get('cubetl.http.response.status')),
            DimensionAttribute(ctx.get('cubetl.http.mimetype')),
            DimensionAttribute(ctx.get('cubetl.http.response.is_download')),
            DimensionAttribute(ctx.get('cubetl.net.domain'), name="referer_domain", label="Referer Domain"),
            DimensionAttribute(ctx.get('cubetl.http.request.referer_path')),
            #ctx.get('cubetl.http.referer'),  # TODO: try nested hierarchydimensions in "referer" hierarchydimension
            DimensionAttribute(ctx.get('cubetl.http.user_agent')),
            DimensionAttribute(ctx.get('cubetl.os.operating_system')),
            DimensionAttribute(ctx.get('cubetl.os.device')),

            Attribute(name='user_agent_string', type='String', label='User Agent String'),
            Attribute(name='verb', type='String', label='Verb'),
            #Attribute(name='referer', type='String', label='Referer')

            Measure(name='served_bytes', type='Integer', label="Served Bytes"),
        ]))

    '''

    - !ref
    #- !ref cubetl.http.request.client_domain
    #- !ref cubetl.http.request.rlogname
    #- !ref cubetl.http.request.username
    '''


    ctx.add('cubetl.http.parse.apache_combined',
            RegExp(regexp=r'([\d\.]+) (-) (-) \[(.*?)\] "(.*?)" (\d+) (\S+) "(.*?)" "(.*?)"',
                   errors=RegExp.ERRORS_WARN,
                   names='address, rlogname, username, date_string, verb, status_code, served_bytes, referer, user_agent_string'))


    ctx.add('cubetl.http.status.table', table.CSVMemoryTable(
        data='''
            status_code,status_description,status_type
            100,Continue,Informational
            101,Switching Protocols,Informational
            200,OK,Successful
            201,Created,Successful
            202,Accepted,Successful
            203,Non-Authoritative Information,Successful
            204,No Content,Successful
            205,Reset Content,Successful
            206,Partial Content,Successful
            300,Multiple Choices,Redirection
            301,Moved Permanently,Redirection
            302,Found,Redirection
            303,See Other,Redirection
            304,Not Modified,Redirection
            305,Use Proxy,Redirection
            307,Temporary Redirect,Redirection
            400,Bad Request,Client Error
            401,Unauthorized,Client Error
            402,Payment Required,Client Error
            403,Forbidden,Client Error
            404,Not Found,Client Error
            405,Method Not Allowed,Client Error
            406,Not Acceptable,Client Error
            407,Proxy Authentication Required,Client Error
            408,Request Timeout,Client Error
            409,Conflict,Client Error
            410,Gone,Client Error
            411,Length Required,Client Error
            412,Precondition Failed,Client Error
            413,Request Entity Too Large,Client Error
            414,Request-URI Too Long,Client Error
            415,Unsupported Media Type,Client Error
            416,Requested Range Not Satisfiable,Client Error
            417,Expectation Failed,Client Error
            500,Internal Server Error,Server Error
            501,Not Implemented,Server Error
            502,Bad Gateway,Server Error
            503,Service Unavailable,Server Error
            504,Gateway Timeout,Server Error
            505,HTTP Version Not Supported,Server Error
        '''))
Esempio n. 5
0
def cubetl_config(ctx):

    ctx.add(
        'cubetl.geo.continent',
        Dimension(name='continent',
                  label='Continent',
                  attributes=[
                      Attribute(name='continent_code',
                                type='String',
                                label='Continent Code'),
                      Attribute(name='continent_name',
                                type='String',
                                label='Continent')
                  ]))

    ctx.add(
        'cubetl.geo.country',
        Dimension(name='country',
                  label='Country',
                  role="geo",
                  info={
                      'cv-flag-field':
                      'country_iso2',
                      'cv-geo-ref-model-attribute':
                      'country_iso2',
                      'cv-geo-ref-feature-attribute':
                      'iso_a2',
                      'cv-geo-ref-layer':
                      'countries',
                      'cv-geo-map-layers': [{
                          "name": "countries",
                          "type": "vector",
                          "attribution": "&copy; NaturalEarth",
                          "params": {
                              "url": "maps/ne_110m_admin_0_countries.geojson",
                              "format": "geojson",
                              "wrapX": True
                          }
                      }]
                  },
                  attributes=[
                      Attribute(name='country_iso2',
                                type='String',
                                label='Country Code'),
                      Attribute(name='country_name',
                                type='String',
                                label='Country')
                  ]))

    ctx.add(
        'cubetl.geo.contcountry',
        HierarchyDimension(
            name='contcountry',
            label='Country',
            hierarchies=[
                Hierarchy(name='contcountry',
                          label='Country',
                          levels=['continent', 'country'])
            ],
            attributes=[
                DimensionAttribute(ctx.get('cubetl.geo.continent')),
                DimensionAttribute(ctx.get('cubetl.geo.country'))
            ]))

    ctx.add(
        'cubetl.geo.region_3',
        Dimension(name='region_3',
                  label='Region',
                  attributes=[
                      Attribute(name='region_3_code',
                                type='String',
                                label='Region Code'),
                      Attribute(name='region_3_name',
                                type='String',
                                label='Region')
                  ]))

    ctx.add(
        'cubetl.geo.region_4',
        Dimension(name='region_4',
                  label='Province',
                  attributes=[
                      Attribute(name='region_4_code',
                                type='String',
                                label='Province Code'),
                      Attribute(name='region_4_name',
                                type='String',
                                label='Province')
                  ]))

    ctx.add(
        'cubetl.geo.region_5',
        Dimension(name='region_5',
                  label='Town',
                  attributes=[
                      Attribute(name='region_5_code',
                                type='String',
                                label='Town Code'),
                      Attribute(name='region_5_name',
                                type='String',
                                label='Town')
                  ]))

    ctx.add(
        'cubetl.geo.region_3_4',
        HierarchyDimension(
            name='Region',
            label='Region',
            hierarchies=[
                Hierarchy(name='region_3_4',
                          label='Region',
                          levels=['region_3', 'region_4'])
            ],
            attributes=[
                DimensionAttribute(ctx.get('cubetl.geo.region_3')),
                DimensionAttribute(ctx.get('cubetl.geo.region_4'))
            ]))

    ctx.add(
        'cubetl.geo.region_3_5',
        HierarchyDimension(
            name='Region',
            label='Region',
            hierarchies=[
                Hierarchy(name='region_3_5',
                          label='Region',
                          levels=['region_3', 'region_4', 'region_5'])
            ],
            attributes=[
                DimensionAttribute(ctx.get('cubetl.geo.region_3')),
                DimensionAttribute(ctx.get('cubetl.geo.region_4')),
                DimensionAttribute(ctx.get('cubetl.geo.region_5'))
            ]))
Esempio n. 6
0
def cubetl_config(ctx):

    ctx.include('${ ctx.library_path }/datetime.py')
    ctx.include('${ ctx.library_path }/person.py')

    ctx.add('ine.sql.connection', sql.Connection(url='sqlite:///ine.sqlite3'))

    ctx.add(
        'ine.autonomy_province.table',
        table.CSVMemoryTable(data='''
            province,autonomy
            Albacete,Castilla la Mancha
            Alicante/Alacant,Comunidad Valenciana
            Almería,Andalucía
            Araba/Álava,País Vasco
            Asturias,Asturias
            Ávila,Castilla y León
            Badajoz,Extremadura
            "Balears, Illes",Comunidad Balear
            Barcelona,Cataluña
            Bizkaia,País Vasco
            Burgos,Castilla y León
            Cáceres,Extremadura
            Cádiz,Andalucía
            Cantabria,Cantabria
            Castellón/Castelló,Comunidad Valenciana
            Ciudad Real,Castilla la Mancha
            Córdoba,Andalucía
            "Coruña, A",Galicia
            Cuenca,Castilla la Mancha
            Gipuzkoa,País Vasco
            Girona,Cataluña
            Granada,Andalucía
            Guadalajara,Castilla la Mancha
            Huelva,Andalucía
            Huesca,Aragón
            Jaén,Andalucía
            León,Castilla y León
            Lleida,Cataluña
            Lugo,Galicia
            Madrid,Madrid
            Málaga,Andalucía
            Murcia,Murcia
            Navarra,Aragón
            Ourense,Galicia
            Palencia,Castilla y León
            "Palmas, Las",Canarias
            Pontevedra,Galicia
            "Rioja, La",Rioja
            Salamanca,Castilla y León
            Santa Cruz de Tenerife,Canarias
            Segovia,Castilla y León
            Sevilla,Andalucía
            Soria,Castilla y León
            Tarragona,Cataluña
            Teruel,Aragón
            Toledo,Castilla la Mancha
            Valencia/València,Comunidad Valenciana
            Valladolid,Castilla y León
            Zamora,Castilla y León
            Zaragoza,Aragón
            Ceuta,Ciudades Autónomas
            Melilla,Ciudades Autónomas
        '''))

    ctx.add(
        'ine.autonomy',
        olap.Dimension(name='autonomy',
                       label='Autonomy',
                       attributes=[olap.Attribute('autonomy', type='String')]))

    ctx.add(
        'ine.province',
        olap.Dimension(name='province',
                       label='Province',
                       attributes=[olap.Attribute('province', type='String')]))

    ctx.add(
        'ine.autonomyprovince',
        olap.HierarchyDimension(name='autonomyprovince',
                                label='Province',
                                attributes=[
                                    DimensionAttribute(
                                        ctx.get('ine.autonomy')),
                                    DimensionAttribute(ctx.get('ine.province'))
                                ]))

    ctx.add(
        'ine.nationality',
        olap.Dimension(
            name='nationality',
            label='Nationality',
            attributes=[olap.Attribute('nationality', type='String')]))

    ctx.add(
        'ine.census',
        olap.Fact(
            name='census',
            label='Census',
            #must_slice=ctx.get('cubetl.datetime.datemonthly'),  # study when and how dimensions can be aggregated, this cube requires slicing by date or results are invalid
            #natural_key=
            #notes='',
            attributes=[
                DimensionAttribute(ctx.get('cubetl.datetime.datemonthly')),
                DimensionAttribute(ctx.get('ine.autonomyprovince')),
                DimensionAttribute(ctx.get('ine.nationality')),
                DimensionAttribute(ctx.get('cubetl.person.gender')),
                DimensionAttribute(ctx.get('cubetl.person.age_range')),
                Measure(name='census', type='Integer', label="Population")
            ]))  # TODO: Should not present avg/max/min

    # Generate a SQL star schema and mappings automatically
    sqlschema.OLAPToSQL.olap2sql(ctx, connection=ctx.get('ine.sql.connection'))
    ctx.get('olap2sql.olapmapper').entity_mapper(
        ctx.get('ine.census')).store_mode = TableMapper.STORE_MODE_INSERT

    # Define the data load process
    ctx.add(
        'ine.process',
        flow.Chain(steps=[

            #ctx.get('cubetl.config.print'),

            # Generate a Cubes model
            cubes10.Cubes10ModelWriter(olapmapper=ctx.get(
                'olap2sql.olapmapper'),
                                       model_path="ine.cubes-model.json",
                                       config_path="ine.cubes-config.ini"),
            sql.Transaction(connection=ctx.get('ine.sql.connection')),
            fs.FileReader(path='census-2002.px', encoding=None),
            pcaxis.PCAxisParser(),
            flow.Chain(
                fork=True,
                steps=[
                    pcaxis.PCAxisIterator(),
                    script.Delete(['data', 'pcaxis']),
                    flow.Filter(condition="${ m['Sexo'] != 'Ambos sexos' }"),
                    flow.Filter(
                        condition=
                        "${ m['Grupo quinquenal de edad'] != 'Total' }"),
                    #flow.Filter(condition="${ m['Grupo de edad'] != 'Total' }"),
                    flow.Filter(condition="${ m['Nacionalidad'] != 'Total' }"),
                    flow.Filter(
                        condition="${ m['Provincias'] != 'Total Nacional' }"),

                    #flow.Skip(skip="${ random.randint(1, 1000) }"),
                    #flow.Limit(limit=5000),
                    script.Function(process_data),

                    #flow.Filter(condition="${ m['date'].year < 2002 }"),
                    cache.CachedTableLookup(
                        table=ctx.get("ine.autonomy_province.table"),
                        lookup={'province': lambda m: m['province_name']}),
                    ctx.get('cubetl.util.print'),
                    olap.Store(entity=ctx.get('ine.census'),
                               mapper=ctx.get('olap2sql.olapmapper')),
                    log.LogPerformance(),
                ]),
        ]))