def get_action_return_with_sub_tags(): log.debug('cache gen - subtags') # Get a list of all the tags for all the trackids # Group them by tag name # only allow tags in the allowed list (there could be 100's of title's and from's), we just want the browsable ones alias_parent_tag = aliased(Tag) # The SQL engine cannot cope with an 'IN' clause with over 1000 enties # The solution is a convoluted way of batching the requests up into chunks # of 1000 and merging the results def get_sub_tags_batch(trackids): def slice_batch(trackids, batch_size): for batch_number in range(0, (len(trackids) // batch_size) + 1): yield trackids[batch_number * batch_size:(batch_number + 1) * batch_size] tags = {} for trackids_batch in slice_batch(trackids, 900): for tag, count in get_sub_tags(trackids_batch): tag_dict = tag.to_dict('full') id = tag_dict['id'] tags.setdefault(id, tag_dict).setdefault('count', 0) tags[id]['count'] += count return tags.values() def get_sub_tags(trackids): return DBSession.query(Tag, func.count(TrackTagMapping.tag_id)).\ join(TrackTagMapping).\ join(alias_parent_tag, Tag.parent).\ filter(TrackTagMapping.track_id.in_(trackids)).\ filter(alias_parent_tag.name.in_(sub_tags_allowed)).\ group_by('tag_1.id', alias_parent_tag.name, Tag.id).\ order_by(alias_parent_tag.name, Tag.name).\ options(joinedload(Tag.parent) ) # This if branch con probably be removed - we don't want differnt behaviour for differnt db engines # TODO: need to check if postgres can actually handle this properly if request.registry.settings.get('sqlalchemy.url', '').startswith('sqlite'): sub_tags = [tag for tag in get_sub_tags_batch(trackids)] else: sub_tags = [ update_dict(tag.to_dict('full'), {'count': count}) for tag, count in get_sub_tags(trackids) ] # AllanC - RRRRRRRAAAAAAAAA!!!! Postgres creates an alias 'tag_1.id' under the hood, but wont actually return results unless it's in the group_by clause # it works without the tag_1.id in sqllite. So right now, the SQLLite version is broken with 'tag_1' and postgres dies without it. # is there a way to alias this properly? # tried alias's 'tag_1.id','tag_2.name' action_return['data'].update({ 'sub_tags': sub_tags, }) return action_return
def get_action_return_with_sub_tags(): log.debug('cache gen - subtags') # Get a list of all the tags for all the trackids # Group them by tag name # only allow tags in the allowed list (there could be 100's of title's and from's), we just want the browsable ones alias_parent_tag = aliased(Tag) # The SQL engine cannot cope with an 'IN' clause with over 1000 enties # The solution is a convoluted way of batching the requests up into chunks # of 1000 and merging the results def get_sub_tags_batch(trackids): def slice_batch(trackids, batch_size): for batch_number in range(0, (len(trackids)//batch_size)+1): yield trackids[batch_number * batch_size:(batch_number + 1) * batch_size] tags = {} for trackids_batch in slice_batch(trackids, 900): for tag, count in get_sub_tags(trackids_batch): tag_dict = tag.to_dict('full') id = tag_dict['id'] tags.setdefault(id, tag_dict).setdefault('count', 0) tags[id]['count'] += count return tags.values() def get_sub_tags(trackids): return DBSession.query(Tag, func.count(TrackTagMapping.tag_id)).\ join(TrackTagMapping).\ join(alias_parent_tag, Tag.parent).\ filter(TrackTagMapping.track_id.in_(trackids)).\ filter(alias_parent_tag.name.in_(sub_tags_allowed)).\ group_by('tag_1.id', alias_parent_tag.name, Tag.id).\ order_by(alias_parent_tag.name, Tag.name).\ options(joinedload(Tag.parent) ) # This if branch con probably be removed - we don't want differnt behaviour for differnt db engines # TODO: need to check if postgres can actually handle this properly if request.registry.settings.get('sqlalchemy.url', '').startswith('sqlite'): sub_tags = [tag for tag in get_sub_tags_batch(trackids)] else: sub_tags = [update_dict(tag.to_dict('full'), {'count': count}) for tag, count in get_sub_tags(trackids)] # AllanC - RRRRRRRAAAAAAAAA!!!! Postgres creates an alias 'tag_1.id' under the hood, but wont actually return results unless it's in the group_by clause # it works without the tag_1.id in sqllite. So right now, the SQLLite version is broken with 'tag_1' and postgres dies without it. # is there a way to alias this properly? # tried alias's 'tag_1.id','tag_2.name' action_return['data'].update({ 'sub_tags': sub_tags, }) return action_return
def main(): args = get_args() logging.basicConfig(level=logging.DEBUG if args['verbose'] else logging.INFO) try: with open(args['cache_filename'], 'rb') as f: data = pickle.load(f) except IOError: with open(args['cache_filename'], 'wb') as f: data = hash_source_dest(**args) pickle.dump(data, f) symlink_matched_files(**update_dict(args.copy(), data))
def get_action_return_with_sub_tags(): log.debug('cache gen - subtags') # Get a list of all the tags for all the trackids # Group them by tag name # only allow tags in the allowed list (there could be 100's of title's and from's), we just want the browsable ones alias_parent_tag = aliased(Tag) sub_tags = DBSession.query(Tag ,func.count(TrackTagMapping.tag_id)).\ join(TrackTagMapping).\ join(alias_parent_tag, Tag.parent).\ filter(TrackTagMapping.track_id.in_(trackids)).\ filter(alias_parent_tag.name.in_(sub_tags_allowed)).\ group_by('tag_1.id',alias_parent_tag.name,Tag.id).\ order_by(alias_parent_tag.name,Tag.name).\ options(joinedload(Tag.parent)) # AllanC - RRRRRRRAAAAAAAAA!!!! Postgres creates an alias 'tag_1.id' under the hood, but wont actually return results unless it's in the group_by clause # it works without the tag_1.id in sqllite. So right now, the SQLLite version is broken with 'tag_1' and postgres dies without it. # is there a way to alias this properly? # tried alias's 'tag_1.id','tag_2.name' action_return['data'].update({ 'sub_tags': [update_dict(tag.to_dict('full'),{'count':count}) for tag,count in sub_tags], }) return action_return