예제 #1
0
    def get_action_return_with_sub_tags():
        log.debug('cache gen - subtags')
        # Get a list of all the tags for all the trackids
        # Group them by tag name
        # only allow tags in the allowed list (there could be 100's of title's and from's), we just want the browsable ones
        alias_parent_tag = aliased(Tag)

        # The SQL engine cannot cope with an 'IN' clause with over 1000 enties
        # The solution is a convoluted way of batching the requests up into chunks
        # of 1000 and merging the results

        def get_sub_tags_batch(trackids):
            def slice_batch(trackids, batch_size):
                for batch_number in range(0,
                                          (len(trackids) // batch_size) + 1):
                    yield trackids[batch_number *
                                   batch_size:(batch_number + 1) * batch_size]

            tags = {}
            for trackids_batch in slice_batch(trackids, 900):
                for tag, count in get_sub_tags(trackids_batch):
                    tag_dict = tag.to_dict('full')
                    id = tag_dict['id']
                    tags.setdefault(id, tag_dict).setdefault('count', 0)
                    tags[id]['count'] += count
            return tags.values()

        def get_sub_tags(trackids):
            return DBSession.query(Tag,
                func.count(TrackTagMapping.tag_id)).\
                join(TrackTagMapping).\
                join(alias_parent_tag, Tag.parent).\
                filter(TrackTagMapping.track_id.in_(trackids)).\
                filter(alias_parent_tag.name.in_(sub_tags_allowed)).\
                group_by('tag_1.id', alias_parent_tag.name, Tag.id).\
                order_by(alias_parent_tag.name, Tag.name).\
                options(joinedload(Tag.parent)
            )

        # This if branch con probably be removed - we don't want differnt behaviour for differnt db engines
        #  TODO: need to check if postgres can actually handle this properly
        if request.registry.settings.get('sqlalchemy.url',
                                         '').startswith('sqlite'):
            sub_tags = [tag for tag in get_sub_tags_batch(trackids)]
        else:
            sub_tags = [
                update_dict(tag.to_dict('full'), {'count': count})
                for tag, count in get_sub_tags(trackids)
            ]

        # AllanC - RRRRRRRAAAAAAAAA!!!! Postgres creates an alias 'tag_1.id' under the hood, but wont actually return results unless it's in the group_by clause
        #          it works without the tag_1.id in sqllite. So right now, the SQLLite version is broken with 'tag_1' and postgres dies without it.
        #          is there a way to alias this properly?
        # tried alias's 'tag_1.id','tag_2.name'

        action_return['data'].update({
            'sub_tags': sub_tags,
        })
        return action_return
예제 #2
0
파일: search.py 프로젝트: richlanc/KaraKara
    def get_action_return_with_sub_tags():
        log.debug('cache gen - subtags')
        # Get a list of all the tags for all the trackids
        # Group them by tag name
        # only allow tags in the allowed list (there could be 100's of title's and from's), we just want the browsable ones
        alias_parent_tag = aliased(Tag)

        # The SQL engine cannot cope with an 'IN' clause with over 1000 enties
        # The solution is a convoluted way of batching the requests up into chunks
        # of 1000 and merging the results

        def get_sub_tags_batch(trackids):

            def slice_batch(trackids, batch_size):
                for batch_number in range(0, (len(trackids)//batch_size)+1):
                    yield trackids[batch_number * batch_size:(batch_number + 1) * batch_size]

            tags = {}
            for trackids_batch in slice_batch(trackids, 900):
                for tag, count in get_sub_tags(trackids_batch):
                    tag_dict = tag.to_dict('full')
                    id = tag_dict['id']
                    tags.setdefault(id, tag_dict).setdefault('count', 0)
                    tags[id]['count'] += count
            return tags.values()

        def get_sub_tags(trackids):
            return DBSession.query(Tag,
                func.count(TrackTagMapping.tag_id)).\
                join(TrackTagMapping).\
                join(alias_parent_tag, Tag.parent).\
                filter(TrackTagMapping.track_id.in_(trackids)).\
                filter(alias_parent_tag.name.in_(sub_tags_allowed)).\
                group_by('tag_1.id', alias_parent_tag.name, Tag.id).\
                order_by(alias_parent_tag.name, Tag.name).\
                options(joinedload(Tag.parent)
            )

        # This if branch con probably be removed - we don't want differnt behaviour for differnt db engines
        #  TODO: need to check if postgres can actually handle this properly
        if request.registry.settings.get('sqlalchemy.url', '').startswith('sqlite'):
            sub_tags = [tag for tag in get_sub_tags_batch(trackids)]
        else:
            sub_tags = [update_dict(tag.to_dict('full'), {'count': count}) for tag, count in get_sub_tags(trackids)]

        # AllanC - RRRRRRRAAAAAAAAA!!!! Postgres creates an alias 'tag_1.id' under the hood, but wont actually return results unless it's in the group_by clause
        #          it works without the tag_1.id in sqllite. So right now, the SQLLite version is broken with 'tag_1' and postgres dies without it.
        #          is there a way to alias this properly?
        # tried alias's 'tag_1.id','tag_2.name'

        action_return['data'].update({
            'sub_tags': sub_tags,
        })
        return action_return
예제 #3
0
def main():
    args = get_args()
    logging.basicConfig(level=logging.DEBUG if args['verbose'] else logging.INFO)

    try:
        with open(args['cache_filename'], 'rb') as f:
            data = pickle.load(f)
    except IOError:
        with open(args['cache_filename'], 'wb') as f:
            data = hash_source_dest(**args)
            pickle.dump(data, f)

    symlink_matched_files(**update_dict(args.copy(), data))
예제 #4
0
파일: search.py 프로젝트: shish/KaraKara
 def get_action_return_with_sub_tags():
     log.debug('cache gen - subtags')
     # Get a list of all the tags for all the trackids
     # Group them by tag name
     # only allow tags in the allowed list (there could be 100's of title's and from's), we just want the browsable ones
     alias_parent_tag = aliased(Tag)
     sub_tags = DBSession.query(Tag  ,func.count(TrackTagMapping.tag_id)).\
                         join(TrackTagMapping).\
                         join(alias_parent_tag, Tag.parent).\
                         filter(TrackTagMapping.track_id.in_(trackids)).\
                         filter(alias_parent_tag.name.in_(sub_tags_allowed)).\
                         group_by('tag_1.id',alias_parent_tag.name,Tag.id).\
                         order_by(alias_parent_tag.name,Tag.name).\
                         options(joinedload(Tag.parent))
     
     # AllanC - RRRRRRRAAAAAAAAA!!!! Postgres creates an alias 'tag_1.id' under the hood, but wont actually return results unless it's in the group_by clause
     #          it works without the tag_1.id in sqllite. So right now, the SQLLite version is broken with 'tag_1' and postgres dies without it.
     #          is there a way to alias this properly?
     # tried alias's 'tag_1.id','tag_2.name'
     
     action_return['data'].update({
         'sub_tags': [update_dict(tag.to_dict('full'),{'count':count}) for tag,count in sub_tags],
     })
     return action_return