Esempio n. 1
0
def chert_post_load(chert_obj):
    with open(PROJECTS_JSON_PATH) as f:
        data = json.load(f)
        projects = data['projects']

    zv_projects, emeritus_projects = partition(projects,
                                               lambda p: p['is_zerover'])
    zv_project_table = None
    emeritus_project_table = None

    for entry in chert_obj.all_entries:
        for part in entry.loaded_parts:
            content = part['content']
            if '[ZEROVER_PROJECT_TABLE]' not in content and '[EMERITUS_PROJECT_TABLE]' not in content:
                continue
            if zv_project_table is None:
                try:
                    zv_project_table = _zv_to_htmltable(zv_projects)
                except Exception as e:
                    # import pdb;pdb.post_mortem()
                    raise
                emeritus_project_table = _emeritus_to_htmltable(
                    emeritus_projects)  # TODO: emeritus table format
            content = content.replace('[ZEROVER_PROJECT_TABLE]',
                                      zv_project_table)
            content = content.replace('[EMERITUS_PROJECT_TABLE]',
                                      emeritus_project_table)
            part['content'] = content
    return
Esempio n. 2
0
    def delete_params(cls,
                      company_id: str,
                      task_id: str,
                      hyperparams=Sequence[HyperParamKey]) -> int:
        properties_only = cls._normalize_params(hyperparams)
        task = cls._get_task_for_update(company=company_id,
                                        id=task_id,
                                        allow_all_statuses=properties_only)

        with_param, without_param = iterutils.partition(
            hyperparams, key=lambda p: bool(p.name))
        sections_to_delete = {p.section for p in without_param}
        delete_cmds = {
            f"unset__hyperparams__{ParameterKeyEscaper.escape(section)}": 1
            for section in sections_to_delete
        }

        for item in with_param:
            section = ParameterKeyEscaper.escape(item.section)
            if item.section in sections_to_delete:
                raise errors.bad_request.FieldsConflict(
                    "Cannot delete section field if the whole section was scheduled for deletion"
                )
            name = ParameterKeyEscaper.escape(item.name)
            delete_cmds[f"unset__hyperparams__{section}__{name}"] = 1

        return task.update(**delete_cmds, last_update=datetime.utcnow())
Esempio n. 3
0
def _main():
    with open('projects.json') as f:
        data = json.load(f)
        projects = data['projects']

    zv_projects, emeritus_projects = partition(projects,
                                               lambda p: p['is_zerover'])

    return
Esempio n. 4
0
 def split_projection(
     cls, projection: Sequence[str]
 ) -> Tuple[Collection[str], Collection[str]]:
     """Return include and exclude lists based on passed projection and class definition"""
     if projection:
         include, exclude = partition(
             projection, key=lambda x: x[0] != ProjectionHelper.exclusion_prefix,
         )
     else:
         include, exclude = [], []
     exclude = {x.lstrip(ProjectionHelper.exclusion_prefix) for x in exclude}
     return include, set(cls.get_exclude_fields()).union(exclude).difference(include)
Esempio n. 5
0
 def __init__(
     self,
     is_published: Callable[[T], bool],
     document_type: Type[T],
     children: Iterable[T],
 ):
     """
     :param is_published: predicate returning whether items is considered published
     :param document_type: type of output
     :param children: output documents
     """
     self.published, self.draft = map(
         lambda x: DocumentGroup(document_type, x),
         partition(children, key=is_published),
     )
Esempio n. 6
0
def chain_argspec(func_list, provides, inner_name):
    provided_sofar = set([inner_name])  # the inner function name is an extremely special case
    optional_sofar = set()
    required_sofar = set()
    for f, p in zip(func_list, provides):
        # middlewares can default the same parameter to different values;
        # can't properly keep track of default values
        fb = get_fb(f)
        arg_names = fb.get_arg_names()
        defaults_dict = fb.get_defaults_dict()

        defaulted, undefaulted = iterutils.partition(arg_names, key=defaults_dict.__contains__)

        optional_sofar.update(defaulted)
        # keep track of defaults so that e.g. endpoint default param
        # can pick up request injected/provided param
        required_sofar |= set(undefaulted) - provided_sofar
        provided_sofar.update(p)

    return required_sofar, optional_sofar
Esempio n. 7
0
def chain_argspec(func_list, provides, inner_name):
    provided_sofar = set([inner_name])  # the inner function name is an extremely special case
    optional_sofar = set()
    required_sofar = set()
    for f, p in zip(func_list, provides):
        # middlewares can default the same parameter to different values;
        # can't properly keep track of default values
        fb = get_fb(f)
        arg_names = fb.get_arg_names()
        defaults_dict = fb.get_defaults_dict()

        defaulted, undefaulted = iterutils.partition(arg_names, key=defaults_dict.__contains__)

        optional_sofar.update(defaulted)
        # keep track of defaults so that e.g. endpoint default param
        # can pick up request injected/provided param
        required_sofar |= set(undefaulted) - provided_sofar
        provided_sofar.update(p)

    return required_sofar, optional_sofar
Esempio n. 8
0
    def delete_params(
        cls,
        company_id: str,
        task_id: str,
        hyperparams: Sequence[HyperParamKey],
        force: bool,
    ) -> int:
        with TimingContext("mongo", "delete_hyperparams"):
            properties_only = cls._normalize_params(hyperparams)
            task = get_task_for_update(
                company_id=company_id,
                task_id=task_id,
                allow_all_statuses=properties_only,
                force=force,
            )

            with_param, without_param = iterutils.partition(
                hyperparams, key=lambda p: bool(p.name))
            sections_to_delete = {p.section for p in without_param}
            delete_cmds = {
                f"unset__hyperparams__{ParameterKeyEscaper.escape(section)}": 1
                for section in sections_to_delete
            }

            for item in with_param:
                section = ParameterKeyEscaper.escape(item.section)
                if item.section in sections_to_delete:
                    raise errors.bad_request.FieldsConflict(
                        "Cannot delete section field if the whole section was scheduled for deletion"
                    )
                name = ParameterKeyEscaper.escape(item.name)
                delete_cmds[f"unset__hyperparams__{section}__{name}"] = 1

            return update_task(task,
                               update_cmds=delete_cmds,
                               set_last_update=not properties_only)
Esempio n. 9
0
    def from_api(cls, campaign, timestamp=None):
        timestamp = timestamp if timestamp is not None else datetime.datetime.utcnow(
        )
        ret = cls(campaign=campaign,
                  timestamp=timestamp,
                  campaign_results=None,
                  goal_results=None,
                  article_results=None)

        article_list = []
        article_title_list = campaign.article_title_list

        base_desc = 'Scanning %s @ %s' % (campaign.name,
                                          timestamp.isoformat().split('.')[0])
        article_title_list = tqdm(
            article_title_list,
            desc=base_desc,
            disable=None,  # autodisable on non-tty
            unit='article')

        def async_pta_update(pta, attr_func_map):
            jobs = []
            for attr, func in attr_func_map.items():
                _debug_log_func = tlog.wrap('debug')(func)
                cur = gevent.spawn(
                    lambda pta=pta, attr=attr, func=_debug_log_func: setattr(
                        pta, attr, func(pta)))
                jobs.append(cur)
            gevent.wait(jobs, timeout=20)
            return

        for title in article_title_list:
            new_desc = base_desc + ' ({:16.16})'.format(title)
            article_title_list.set_description(new_desc)
            pta = PTArticle(lang=campaign.lang,
                            title=title,
                            timestamp=timestamp)
            pta.talk_title = 'Talk:' + title
            async_pta_update(pta, {
                'rev_id': metrics.get_revid,
                'talk_rev_id': metrics.get_talk_revid
            })

            if pta.rev_id:
                async_pta_update(
                    pta, {
                        'templates': metrics.get_templates,
                        'talk_templates': metrics.get_talk_templates,
                        'assessments': metrics.get_assessments,
                        'citations': metrics.get_citations,
                        'wikidata_item': metrics.get_wikidata_item
                    })
                pta.wikiprojects = metrics.get_wikiprojects(
                    pta)  # relies on templates (no network)

            pta.results = eval_article_goals(pta, campaign.goals)

            article_list.append(pta)
        ret.article_list = article_list

        gres = {}  # goal results
        for goal in campaign.goals:
            key = slugify(goal['name'])
            target_ratio = float(goal.get('ratio', 1.0))
            results = [a.results[key]['done'] for a in article_list]
            # TODO: average/median metric value

            done, not_done = partition(results)
            # TODO: need to integrate start state for progress tracking
            ratio = 1.0 if not not_done else float(
                len(done)) / len(article_list)
            gres[key] = {
                'done_count': len(done),
                'not_done_count': len(not_done),
                'total_count': len(article_list),
                'ratio': ratio,
                'target_ratio': target_ratio,
                'key': key,
                'name': goal['name'],
                'desc': goal.get('desc'),
                'progress': ratio / target_ratio,
                'done': ratio >= target_ratio
            }

        ret.campaign_results = glom(
            gres, {
                'done_count': (T.values(), ['done_count'], sum),
                'not_done_count': (T.values(), ['not_done_count'], sum),
                'total_count': (T.values(), ['total_count'], sum)
            })
        ret.campaign_results['ratio'] = ret.campaign_results[
            'done_count'] / ret.campaign_results['total_count']

        ret.goal_results = gres
        ret.article_results = [attr.asdict(a) for a in article_list]
        return ret