Ejemplo n.º 1
0
def push_pop():
    #push, pop: asc / dec / rand
    min_num = 10
    max_num = 200000
    step = 5000
    num_datas = list(range(min_num, max_num, step))
    result_dics = list(
        tqdm(map(lambda n: expr_result(n, 3), num_datas),
             total=len(num_datas)))
    exprs = F.join_with(list, result_dics)
    #return num_datas, exprs

    pprint(exprs)
    print(num_datas)

    y_keys = F.lremove(lambda key: 'merge' in key or 'pop' in key,
                       exprs.keys())

    gradient_dic = {}
    for key in sorted(y_keys):
        plt.plot(num_datas,
                 exprs[key],
                 label=key,
                 marker='.' if 'bh' in key else 'x',
                 linestyle='-' if 'bh' in key else '--')
        gradient_dic[key] = np.polyfit(num_datas, exprs[key], 1)[0]

    pprint(gradient_dic)
    plt.xlabel('number of keys')
    plt.ylabel('milli seconds')
    plt.legend()
    plt.show()

    y_keys = F.lremove(lambda key: 'merge' in key or 'push' in key,
                       exprs.keys())

    gradient_dic = {}
    for key in sorted(y_keys):
        plt.plot(num_datas,
                 exprs[key],
                 label=key,
                 marker='.' if 'bh' in key else 'x',
                 linestyle='-' if 'bh' in key else '--')
        gradient_dic[key] = np.polyfit(num_datas, exprs[key], 1)[0]

    pprint(gradient_dic)
    plt.xlabel('number of keys')
    plt.ylabel('milli seconds')
    plt.legend()
    plt.show()
Ejemplo n.º 2
0
def merge():
    min_num = 10
    max_num = 5000
    step = 100
    num_datas = list(range(min_num, max_num, step))
    result_dics = list(
        tqdm(map(lambda n: expr_result(3, n), num_datas),
             total=len(num_datas)))

    def avrg_merge_result(dic):
        def avrg(dics):
            return sum(map(lambda d: d['time'], dics)) / len(dics)

        dic['h.merge.2rand'] = avrg(dic['h.merge.2rand'])
        dic['bh.merge.2rand'] = avrg(dic['bh.merge.2rand'])
        return dic

    result_dics = F.lmap(avrg_merge_result, result_dics)
    print(result_dics)
    exprs = F.join_with(list, result_dics)
    #pprint(exprs)

    y_keys = F.lremove(lambda key: 'pop' in key or 'push' in key, exprs.keys())
    #print(y_keys)

    gradient_dic = {}
    for key in sorted(y_keys):
        plt.plot(num_datas,
                 exprs[key],
                 label=key,
                 marker='.' if 'bh' in key else 'x',
                 linestyle='-' if 'bh' in key else '--')
        gradient_dic[key] = np.polyfit(num_datas, exprs[key], 1)[0]

    pprint(gradient_dic)
    plt.xlabel('max number of keys')
    plt.ylabel('milli seconds')
    plt.legend()
    plt.show()
Ejemplo n.º 3
0
def dnfs(qs):
    """
    Converts query condition tree into a DNF of eq conds.
    Separately for each alias.

    Any negations, conditions with lookups other than __exact or __in,
    conditions on joined models and subrequests are ignored.
    __in is converted into = or = or = ...
    """
    SOME = object()
    SOME_TREE = [[(None, None, SOME, True)]]

    def negate(term):
        return (term[0], term[1], term[2], not term[3])

    def _dnf(where):
        """
        Constructs DNF of where tree consisting of terms in form:
            (alias, attribute, value, negation)
        meaning `alias.attribute = value`
         or `not alias.attribute = value` if negation is False

        Any conditions other then eq are dropped.
        """
        if isinstance(where, Lookup):
            # If where.lhs don't refer to a field then don't bother
            if not hasattr(where.lhs, 'target'):
                return SOME_TREE
            # Don't bother with complex right hand side either
            if isinstance(where.rhs, (QuerySet, Query, Subquery, RawSQL)):
                return SOME_TREE
            # Skip conditions on non-serialized fields
            if isinstance(where.lhs.target, settings.CACHEOPS_SKIP_FIELDS):
                return SOME_TREE

            attname = where.lhs.target.attname
            if isinstance(where, Exact):
                return [[(where.lhs.alias, attname, where.rhs, True)]]
            elif isinstance(where, IsNull):
                return [[(where.lhs.alias, attname, None, where.rhs)]]
            elif isinstance(where, In) and len(
                    where.rhs) < settings.CACHEOPS_LONG_DISJUNCTION:
                return [[(where.lhs.alias, attname, v, True)]
                        for v in where.rhs]
            else:
                return SOME_TREE
        elif isinstance(where, NothingNode):
            return []
        elif isinstance(where, (ExtraWhere, SubqueryConstraint)):
            return SOME_TREE
        elif len(where) == 0:
            return [[]]
        else:
            chilren_dnfs = lmap(_dnf, where.children)

            if len(chilren_dnfs) == 0:
                return [[]]
            elif len(chilren_dnfs) == 1:
                result = chilren_dnfs[0]
            else:
                # Just unite children joined with OR
                if where.connector == OR:
                    result = lcat(chilren_dnfs)
                # Use Cartesian product to AND children
                else:
                    result = lmap(lcat, product(*chilren_dnfs))

            # Negating and expanding brackets
            if where.negated:
                result = [lmap(negate, p) for p in product(*result)]

            return result

    def clean_conj(conj, for_alias):
        conds = {}
        for alias, attname, value, negation in conj:
            # "SOME" conds, negated conds and conds for other aliases should be stripped
            if value is not SOME and negation and alias == for_alias:
                # Conjs with fields eq 2 different values will never cause invalidation
                if attname in conds and conds[attname] != value:
                    return None
                conds[attname] = value
        return conds

    def clean_dnf(tree, aliases):
        cleaned = [
            clean_conj(conj, alias) for conj in tree for alias in aliases
        ]
        # Remove deleted conjunctions
        cleaned = [conj for conj in cleaned if conj is not None]
        # Any empty conjunction eats up the rest
        # NOTE: a more elaborate DNF reduction is not really needed,
        #       just keep your querysets sane.
        if not all(cleaned):
            return [{}]
        return cleaned

    def query_dnf(query):
        def table_for(alias):
            if alias == main_alias:
                return alias
            return query.alias_map[alias].table_name

        dnf = _dnf(query.where)

        # NOTE: we exclude content_type as it never changes and will hold dead invalidation info
        main_alias = query.model._meta.db_table
        aliases = {alias for alias, join in query.alias_map.items()
                   if query.alias_refcount[alias]} \
                | {main_alias} - {'django_content_type'}
        tables = group_by(table_for, aliases)
        return {
            table: clean_dnf(dnf, table_aliases)
            for table, table_aliases in tables.items()
        }

    if qs.query.combined_queries:
        return join_with(lcat,
                         (query_dnf(q) for q in qs.query.combined_queries))
    else:
        return query_dnf(qs.query)
Ejemplo n.º 4
0
def cached_as(*samples,
              timeout=None,
              extra=None,
              lock=None,
              keep_fresh=False,
              key_func=func_cache_key):
    """
    Caches results of a function and invalidates them same way as given queryset(s).
    NOTE: Ignores queryset cached ops settings, always caches.

    If keep_fresh is True, this will prevent caching if the given querysets are
    invalidated during the function call. This prevents prolonged caching of
    stale data.
    """
    if not samples:
        raise TypeError('Pass a queryset, a model or an object to cache like')

    # If we unexpectedly get list instead of queryset return identity decorator.
    # Paginator could do this when page.object_list is empty.
    if len(samples) == 1 and isinstance(samples[0], list):
        return lambda func: func

    def _get_queryset(sample):
        if isinstance(sample, models.Model):
            queryset = sample.__class__.objects.filter(pk=sample.pk)
        elif isinstance(sample, type) and issubclass(sample, models.Model):
            queryset = sample.objects.all()
        else:
            queryset = sample

        queryset._require_cacheprofile()

        return queryset

    querysets = lmap(_get_queryset, samples)
    dbs = list({qs.db for qs in querysets})
    cond_dnfs = join_with(lcat, map(dnfs, querysets))
    key_extra = [qs._cache_key(prefix=False) for qs in querysets]
    key_extra.append(extra)
    if timeout is None:
        timeout = min(qs._cacheprofile['timeout'] for qs in querysets)
    if lock is None:
        lock = any(qs._cacheprofile['lock'] for qs in querysets)

    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            if not settings.CACHEOPS_ENABLED or transaction_states.is_dirty(
                    dbs):
                return func(*args, **kwargs)

            prefix = get_prefix(func=func, _cond_dnfs=cond_dnfs, dbs=dbs)
            cache_key = prefix + 'as:' + key_func(func, args, kwargs,
                                                  key_extra)

            with redis_client.getting(cache_key, lock=lock) as cache_data:
                cache_read.send(sender=None,
                                func=func,
                                hit=cache_data is not None)
                if cache_data is not None:
                    return pickle.loads(cache_data)
                else:
                    if keep_fresh:
                        # We call this "asp" for "as precall" because this key is
                        # cached before the actual function is called. We randomize
                        # the key to prevent falsely thinking the key was not
                        # invalidated when in fact it was invalidated and the
                        # function was called again in another process.
                        suffix = key_func(func, args, kwargs,
                                          key_extra + [random()])
                        precall_key = prefix + 'asp:' + suffix
                        # Cache a precall_key to watch for invalidation during
                        # the function call. Its value does not matter. If and
                        # only if it remains valid before, during, and after the
                        # call, the result can be cached and returned.
                        cache_thing(prefix,
                                    precall_key,
                                    'PRECALL',
                                    cond_dnfs,
                                    timeout,
                                    dbs=dbs)
                    else:
                        precall_key = ''

                    result = func(*args, **kwargs)
                    cache_thing(prefix,
                                cache_key,
                                result,
                                cond_dnfs,
                                timeout,
                                dbs=dbs,
                                precall_key=precall_key)
                    return result

        return wrapper

    return decorator
    def handle(self, *args, **options):
        series = {}
        samples = {}

        platform_created_on = join_with(
            min, [{p: ceil_attrs_date(s)
                   for p in s.platforms} for s in Series.objects.all()])
        platform_qs = Platform.objects.annotate(probes_count=Count('probes'))\
            .values('gpl_name', 'probes_count')
        platforms = {}
        platforms_probes = {}

        series_annotations = {}
        sample_annotations = {}
        concordant_series_annotations = {}
        concordant_sample_annotations = {}

        series_tags = {}
        concordant_series_tags = {}
        sample_tags = {}
        concordant_sample_tags = {}

        series_validations = {}
        sample_validations = {}
        concordant_series_validations = {}
        concordant_sample_validations = {}

        for specie in SPECIES.values():
            series[specie] = accumulate(
                count_by(ceil_attrs_date,
                         Series.objects.filter(specie=specie)))

            qs = Sample.objects.filter(platform__specie=specie)
            iterator = tqdm(queryset_iterator(qs, 30000),
                            total=qs.count(),
                            desc='{0} samples'.format(specie))
            samples[specie] = accumulate(count_by(ceil_attrs_date, iterator))

            platforms_data = [[
                platform_created_on[item['gpl_name']], item['probes_count']
            ] for item in platform_qs.filter(specie=specie)]
            platforms[specie] = accumulate(count_by(first, platforms_data))
            group = group_values(platforms_data)
            platforms_probes[specie] = accumulate(walk_values(sum, group))

            qs = SeriesAnnotation.objects.filter(series__specie=specie)
            series_annotations[specie], \
                sample_annotations[specie] = distribute_series_and_sample_annotations(qs)

            concordant_series_annotations[specie], \
                concordant_sample_annotations[specie] = distribute_series_and_sample_annotations(
                    qs.filter(best_cohens_kappa=1))

            qs = SeriesTag.objects.filter(platform__specie=specie,
                                          is_active=True)
            series_tags[specie] = distribute_by_created_on(qs)
            concordant_series_tags[specie] = distribute_by_created_on(
                qs.exclude(agreed=None))

            qs = SampleTag.objects.filter(sample__platform__specie=specie,
                                          is_active=True)
            sample_tags[specie] = distribute_by_created_on(qs)
            concordant_sample_tags[specie] = distribute_by_created_on(
                qs.exclude(series_tag__agreed=None))

            qs = SerieValidation.objects.filter(platform__specie=specie,
                                                ignored=False,
                                                by_incompetent=False)
            series_validations[specie] = distribute_by_created_on(qs)
            concordant_series_validations[specie] = distribute_by_created_on(
                qs.filter(best_kappa=1))

            qs = SampleValidation\
                .objects\
                .filter(sample__platform__specie=specie,
                        serie_validation__ignored=False,
                        serie_validation__by_incompetent=False)
            sample_validations[specie] = distribute_by_created_on(qs)
            concordant_sample_validations[specie] = distribute_by_created_on(
                qs.filter(
                    Q(serie_validation__best_kappa=1) | Q(concordant=True)))

        users = accumulate(
            count_by(ceil_date,
                     User.objects.values_list('date_joined', flat=True)))
        tags = accumulate(
            count_by(ceil_date, Tag.objects.values_list('created_on',
                                                        flat=True)))

        delta = CURRENT_DATE - START_DATE
        keys = sorted(
            set(
                ceil_date(START_DATE + timedelta(days=index * 20))
                for index in range(delta.days / 20 + 1)))

        specie_data = {
            'series':
            series,
            'samples':
            samples,
            'platforms':
            platforms,
            'platforms_probes':
            platforms_probes,
            'series_annotations':
            series_annotations,
            'sample_annotations':
            sample_annotations,
            'concordant_series_annotations':
            concordant_series_annotations,
            'concordant_sample_annotations':
            concordant_sample_annotations,
            'series_tags':
            series_tags,
            'sample_tags':
            sample_tags,
            'concordant_series_tags':
            concordant_series_tags,
            'concordant_sample_tags':
            concordant_sample_tags,
            'series_validations':
            series_validations,
            'sample_validations':
            sample_validations,
            'concordant_series_validations':
            concordant_series_validations,
            'concordant_sample_validations':
            concordant_sample_validations,
            'series_tags_by_users':
            distribute_by_user_id(SeriesTag.objects.filter(is_active=True)),
            'sample_tags_by_users':
            distribute_by_user_id(SampleTag.objects.filter(is_active=True)),
            'series_validations_by_users':
            distribute_by_user_id(
                SerieValidation.objects.filter(ignored=False,
                                               by_incompetent=False)),
            'sample_validations_by_users':
            distribute_by_user_id(
                SampleValidation.objects.filter(
                    serie_validation__ignored=False,
                    serie_validation__by_incompetent=False)),
            'series_tag_history':
            get_series_tag_history(),
        }

        data = {
            'users': users,
            'tags': tags,
        }

        with transaction.atomic():
            HistoricalCounter.objects.filter(
                created_on__lte=CURRENT_DATE).delete()
            HistoricalCounter.objects.bulk_create([
                HistoricalCounter(created_on=key,
                                  counters=merge(
                                      walk_values(get_value(keys, index),
                                                  data),
                                      walk_values(
                                          lambda value: walk_values(
                                              get_value(keys, index), value),
                                          specie_data)))
                for index, key in enumerate(keys)
            ])
Ejemplo n.º 6
0
def dnfs(qs):
    """
    Converts query condition tree into a DNF of eq conds.
    Separately for each alias.

    Any negations, conditions with lookups other than __exact or __in,
    conditions on joined models and subrequests are ignored.
    __in is converted into = or = or = ...
    """
    SOME = object()
    SOME_TREE = [[(None, None, SOME, True)]]

    def negate(term):
        return (term[0], term[1], term[2], not term[3])

    def _dnf(where):
        """
        Constructs DNF of where tree consisting of terms in form:
            (alias, attribute, value, negation)
        meaning `alias.attribute = value`
         or `not alias.attribute = value` if negation is False

        Any conditions other then eq are dropped.
        """
        if isinstance(where, Lookup):
            # If where.lhs don't refer to a field then don't bother
            if not hasattr(where.lhs, 'target'):
                return SOME_TREE
            # Don't bother with complex right hand side either
            if isinstance(where.rhs, (QuerySet, Query)):
                return SOME_TREE
            # Skip conditions on non-serialized fields
            if isinstance(where.lhs.target, NOT_SERIALIZED_FIELDS):
                return SOME_TREE

            attname = where.lhs.target.attname
            if isinstance(where, Exact):
                return [[(where.lhs.alias, attname, where.rhs, True)]]
            elif isinstance(where, IsNull):
                return [[(where.lhs.alias, attname, None, where.rhs)]]
            elif isinstance(where, In) and len(where.rhs) < LONG_DISJUNCTION:
                return [[(where.lhs.alias, attname, v, True)] for v in where.rhs]
            else:
                return SOME_TREE
        elif isinstance(where, EverythingNode):
            return [[]]
        elif isinstance(where, NothingNode):
            return []
        elif isinstance(where, (ExtraWhere, SubqueryConstraint)):
            return SOME_TREE
        elif len(where) == 0:
            return [[]]
        else:
            chilren_dnfs = lmap(_dnf, where.children)

            if len(chilren_dnfs) == 0:
                return [[]]
            elif len(chilren_dnfs) == 1:
                result = chilren_dnfs[0]
            else:
                # Just unite children joined with OR
                if where.connector == OR:
                    result = lcat(chilren_dnfs)
                # Use Cartesian product to AND children
                else:
                    result = lmap(lcat, product(*chilren_dnfs))

            # Negating and expanding brackets
            if where.negated:
                result = [lmap(negate, p) for p in product(*result)]

            return result

    def clean_conj(conj, for_alias):
        conds = {}
        for alias, attname, value, negation in conj:
            # "SOME" conds, negated conds and conds for other aliases should be stripped
            if value is not SOME and negation and alias == for_alias:
                # Conjs with fields eq 2 different values will never cause invalidation
                if attname in conds and conds[attname] != value:
                    return None
                conds[attname] = value
        return conds

    def clean_dnf(tree, aliases):
        cleaned = [clean_conj(conj, alias) for conj in tree for alias in aliases]
        # Remove deleted conjunctions
        cleaned = [conj for conj in cleaned if conj is not None]
        # Any empty conjunction eats up the rest
        # NOTE: a more elaborate DNF reduction is not really needed,
        #       just keep your querysets sane.
        if not all(cleaned):
            return [[]]
        return cleaned

    def query_dnf(query):
        def table_for(alias):
            if alias == main_alias:
                return alias
            return query.alias_map[alias].table_name

        dnf = _dnf(query.where)

        # NOTE: we exclude content_type as it never changes and will hold dead invalidation info
        main_alias = query.model._meta.db_table
        aliases = {alias for alias, (join, cnt) in zip_dicts(query.alias_map, query.alias_refcount)
                   if cnt and family_has_profile(table_to_model(join.table_name))} \
                | {main_alias} - {'django_content_type'}
        tables = group_by(table_for, aliases)
        return {table: clean_dnf(dnf, table_aliases) for table, table_aliases in tables.items()}

    if django.VERSION >= (1, 11) and qs.query.combined_queries:
        return join_with(lcat, (query_dnf(q) for q in qs.query.combined_queries))
    else:
        return query_dnf(qs.query)
Ejemplo n.º 7
0
            universal_newlines=True))

#min_n = 3 if len(sys.argv) != 3 else int(sys.argv[1])
#max_n = 10 if len(sys.argv) != 3 else int(sys.argv[2])

#num_expr = 3 if len(sys.argv) != 2 else int(sys.argv[1])
num_expr = 10 if len(sys.argv) != 2 else int(sys.argv[1])
#block = 2**29 // num_expr
block = 2**10 // num_expr
exprs = list(tqdm(
    map(expr_result, [n * block for n in range(1,num_expr)]),
    total = num_expr
))

with open('result_%d.yml' % (num_expr,), 'w') as f:
    yaml.dump(F.join_with(list, exprs), f)
with open('result_%d.yml' % (num_expr,)) as f:
    result_dic = yaml.safe_load(f)

y_keys = F.lremove('data.num', result_dic.keys())
#print(F.lmap(result_dic, y_keys))

import numpy as np
xs = result_dic['data.num']
gradient_dic = {}
for key in y_keys:
    plt.plot(xs, result_dic[key], label=key,
             marker='x' if 'q' in key else 'o',
             linestyle='--' if 'q' in key else '-',)
    gradient_dic[key] = np.polyfit(xs, result_dic[key], 1)[0]