Esempio n. 1
0
 def build_sqlalchemy_columns(
         self, fields, base_query_tables=None, custom_columns=None):
     '''
     Returns an ordered dict of sqlalchemy.sql.schema.Column objects, associated 
     with the sqlalchemy.sql.schema.Table definitions, which are bound to 
     the sqlalchemy.engine.Engine: 
     "Connects a Pool and Dialect together to provide a source of database 
     connectivity and behavior."
     
     @param fields - field definitions, from the resource schema
     @param bridge - a reports.utils.sqlalchemy_bridge.Bridge
     @param base_query_tables - if specified, the fields for these tables 
     will be available as part of the base query, so the column definitions
     become simpler, and do not need to be joined in. 
     @param manual_includes - columns to include even if the field 
     visibility is not set
     '''
     DEBUG_BUILD_COLUMNS = False or logger.isEnabledFor(logging.DEBUG)
     base_query_tables = base_query_tables or []
     custom_columns = custom_columns or []
     
     try:
         columns = OrderedDict()
         for field in fields:
             key = field['key']
             if key in custom_columns:
                 if DEBUG_BUILD_COLUMNS: 
                     logger.info(
                         'custom field: %r, %r', key,custom_columns[key])
                 columns[key] = custom_columns[key].label(key)
                 continue
             
             if DEBUG_BUILD_COLUMNS: 
                 logger.info('build column: %r, %r', field['key'], field)
             field_name = field.get('field', None)
             if not field_name:
                 field_name = field['key']
             
             field_table = field.get('table', None)
             
             if not field_table and DEBUG_BUILD_COLUMNS:
                 logger.info(
                     'field: %r, val: %r, skipping field because there is no '
                     '"field_table" value set',key,field)
                 continue
             if DEBUG_BUILD_COLUMNS: 
                 logger.info(
                     'field: %r, field_table: %r', field['key'], field_table )
             
             if field_table in base_query_tables:
                 # simple case: table.fields already selected in the base query:
                 # just need to specify them
                 if field_name in get_tables()[field_table].c:
                     col = get_tables()[field_table].c[field_name]
                 else:
                     raise Exception(
                         'field: %r, not found in table: %r'
                         % (field_name, field_table))
                 col = col.label(key)
                 columns[key] = col
                 
             elif field.get('linked_field_value_field', None):
                 link_table = field['table']
                 link_table_def = get_tables()[link_table]
                 linked_field_parent = field['linked_field_parent']
                 link_field = linked_field_parent + '_id'
                 join_args = { 
                     'link_table': link_table, 'link_field': link_field,
                     'parent_table': linked_field_parent
                     }
                 
                 if field['linked_field_type'] != 'fields.ListField':
                     join_stmt = select([link_table_def.c[field_name]]).\
                         where(text('{link_table}.{link_field}='
                                 '{parent_table}.{link_field}'.format(**join_args)))
                     if field.get('linked_field_meta_field', None):
                         # TODO: test - the linked meta field is the "datacolumn type"
                         linked_field_meta_field = field['linked_field_meta_field']
                         meta_field_obj = MetaHash.objects.get(
                             key=field['key'], scope=field['scope'])
                         meta_table_def = get_tables()['metahash']
                         join_stmt.join(meta_table_def, 
                             link_table_def.c[linked_field_meta_field]==
                                 getattr(meta_field_obj,'pk') )
                     join_stmt = join_stmt.label(key)
                     columns[key] = join_stmt
                 elif field['linked_field_type'] == 'fields.ListField':
                     join_stmt = select([link_table_def.c[field_name]]).\
                         where(text('{link_table}.{link_field}='
                                 '{parent_table}.{link_field}'.format(**join_args)))
 
                     if field.get('linked_field_meta_field', None):
                         # TODO: test - the linked meta field is the "datacolumn type"
                         linked_field_meta_field = field['linked_field_meta_field']
                         meta_field_obj = MetaHash.objects.get(
                             key=field['key'], scope=field['scope'])
                         meta_table_def = get_tables()['metahash']
                         join_stmt.join(meta_table_def, 
                             link_table_def.c[linked_field_meta_field]==
                                 getattr(meta_field_obj,'pk') )
                     
                     ordinal_field = field.get('ordinal_field', None)
                     if ordinal_field:
                         join_stmt = join_stmt.order_by(link_table_def.c[ordinal_field])
                     join_stmt = join_stmt.alias('a')
                     stmt2 = select([func.array_to_string(
                                     func.array_agg(column(field_name)),
                                                    LIST_DELIMITER_SQL_ARRAY)])
                     stmt2 = stmt2.select_from(join_stmt).label(key)
                     columns[key] = stmt2
             else:
                 if DEBUG_BUILD_COLUMNS:        
                     logger.info(
                         'field is not in the base tables %r, nor in a linked field, '
                         'and is not custom: %s', base_query_tables, key)
         if DEBUG_BUILD_COLUMNS: 
             logger.info('columns: %r', columns.keys())
         return columns
     except Exception, e:
         logger.exception('on build sqlalchemy columns')
         raise e   
Esempio n. 2
0
def _concat(*args):
    '''
    Use as a replacement for sqlalchemy.sql.functions.concat
    - "concat" is not available in postgresql 8.4
    '''
    return func.array_to_string(array([x for x in args]),'')
Esempio n. 3
0
    def analytics_query(self, start, end, locations=None, library=None):
        """Build a database query that fetches rows of analytics data.

        This method uses low-level SQLAlchemy code to do all
        calculations and data conversations in the database. It's
        modeled after Work.to_search_documents, which generates a
        large JSON document entirely in the database.

        :return: An iterator of results, each of which can be written
            directly to a CSV file.
        """

        clauses = [
            CirculationEvent.start >= start,
            CirculationEvent.start < end,
        ]

        if locations:
            event_types = [
                CirculationEvent.CM_CHECKOUT,
                CirculationEvent.CM_FULFILL,
                CirculationEvent.OPEN_BOOK,
            ]
            locations = locations.strip().split(",")

            clauses += [
                CirculationEvent.type.in_(event_types),
                CirculationEvent.location.in_(locations),
            ]

        if library:
            clauses += [CirculationEvent.library == library]

        # Build the primary query. This is a query against the
        # CirculationEvent table and a few other tables joined against
        # it. This makes up the bulk of the data.
        events_alias = (
            select(
                [
                    func.to_char(CirculationEvent.start, "YYYY-MM-DD HH24:MI:SS").label(
                        "start"
                    ),
                    CirculationEvent.type.label("event_type"),
                    Identifier.identifier,
                    Identifier.type.label("identifier_type"),
                    Edition.sort_title,
                    Edition.sort_author,
                    case(
                        [(Work.fiction == True, literal_column("'fiction'"))],
                        else_=literal_column("'nonfiction'"),
                    ).label("fiction"),
                    Work.id.label("work_id"),
                    Work.audience,
                    Edition.publisher,
                    Edition.imprint,
                    Edition.language,
                    CirculationEvent.location,
                ],
            )
            .select_from(
                join(
                    CirculationEvent,
                    LicensePool,
                    CirculationEvent.license_pool_id == LicensePool.id,
                )
                .join(Identifier, LicensePool.identifier_id == Identifier.id)
                .join(Work, Work.id == LicensePool.work_id)
                .join(Edition, Work.presentation_edition_id == Edition.id)
            )
            .where(and_(*clauses))
            .order_by(CirculationEvent.start.asc())
            .alias("events_alias")
        )

        # A subquery can hook into the main query by referencing its
        # 'work_id' field in its WHERE clause.
        work_id_column = literal_column(
            events_alias.name + "." + events_alias.c.work_id.name
        )

        # This subquery gets the names of a Work's genres as a single
        # comma-separated string.
        #

        # This Alias selects some number of rows, each containing one
        # string column (Genre.name). Genres with higher affinities with
        # this work go first.
        genres_alias = (
            select([Genre.name.label("genre_name")])
            .select_from(join(WorkGenre, Genre, WorkGenre.genre_id == Genre.id))
            .where(WorkGenre.work_id == work_id_column)
            .order_by(WorkGenre.affinity.desc(), Genre.name)
            .alias("genres_subquery")
        )

        # Use array_agg() to consolidate the rows into one row -- this
        # gives us a single value, an array of strings, for each
        # Work. Then use array_to_string to convert the array into a
        # single comma-separated string.
        genres = select(
            [func.array_to_string(func.array_agg(genres_alias.c.genre_name), ",")]
        ).select_from(genres_alias)

        # This subquery gets the a Work's target age as a single string.
        #

        # This Alias selects two fields: the lower and upper bounds of
        # the Work's target age. This reuses code originally written
        # for Work.to_search_documents().
        target_age = Work.target_age_query(work_id_column).alias("target_age_subquery")

        # Concatenate the lower and upper bounds with a dash in the
        # middle. If both lower and upper bound are empty, just give
        # the empty string. This simulates the behavior of
        # Work.target_age_string.
        target_age_string = select(
            [
                case(
                    [
                        (
                            or_(target_age.c.lower != None, target_age.c.upper != None),
                            func.concat(target_age.c.lower, "-", target_age.c.upper),
                        )
                    ],
                    else_=literal_column("''"),
                )
            ]
        ).select_from(target_age)

        # Build the main query out of the subqueries.
        events = events_alias.c
        query = select(
            [
                events.start,
                events.event_type,
                events.identifier,
                events.identifier_type,
                events.sort_title,
                events.sort_author,
                events.fiction,
                events.audience,
                events.publisher,
                events.imprint,
                events.language,
                target_age_string.label("target_age"),
                genres.label("genres"),
                events.location,
            ]
        ).select_from(events_alias)
        return query