def chert_post_load(chert_obj): with open(PROJECTS_JSON_PATH) as f: data = json.load(f) projects = data['projects'] zv_projects, emeritus_projects = partition(projects, lambda p: p['is_zerover']) zv_project_table = None emeritus_project_table = None for entry in chert_obj.all_entries: for part in entry.loaded_parts: content = part['content'] if '[ZEROVER_PROJECT_TABLE]' not in content and '[EMERITUS_PROJECT_TABLE]' not in content: continue if zv_project_table is None: try: zv_project_table = _zv_to_htmltable(zv_projects) except Exception as e: # import pdb;pdb.post_mortem() raise emeritus_project_table = _emeritus_to_htmltable( emeritus_projects) # TODO: emeritus table format content = content.replace('[ZEROVER_PROJECT_TABLE]', zv_project_table) content = content.replace('[EMERITUS_PROJECT_TABLE]', emeritus_project_table) part['content'] = content return
def delete_params(cls, company_id: str, task_id: str, hyperparams=Sequence[HyperParamKey]) -> int: properties_only = cls._normalize_params(hyperparams) task = cls._get_task_for_update(company=company_id, id=task_id, allow_all_statuses=properties_only) with_param, without_param = iterutils.partition( hyperparams, key=lambda p: bool(p.name)) sections_to_delete = {p.section for p in without_param} delete_cmds = { f"unset__hyperparams__{ParameterKeyEscaper.escape(section)}": 1 for section in sections_to_delete } for item in with_param: section = ParameterKeyEscaper.escape(item.section) if item.section in sections_to_delete: raise errors.bad_request.FieldsConflict( "Cannot delete section field if the whole section was scheduled for deletion" ) name = ParameterKeyEscaper.escape(item.name) delete_cmds[f"unset__hyperparams__{section}__{name}"] = 1 return task.update(**delete_cmds, last_update=datetime.utcnow())
def _main(): with open('projects.json') as f: data = json.load(f) projects = data['projects'] zv_projects, emeritus_projects = partition(projects, lambda p: p['is_zerover']) return
def split_projection( cls, projection: Sequence[str] ) -> Tuple[Collection[str], Collection[str]]: """Return include and exclude lists based on passed projection and class definition""" if projection: include, exclude = partition( projection, key=lambda x: x[0] != ProjectionHelper.exclusion_prefix, ) else: include, exclude = [], [] exclude = {x.lstrip(ProjectionHelper.exclusion_prefix) for x in exclude} return include, set(cls.get_exclude_fields()).union(exclude).difference(include)
def __init__( self, is_published: Callable[[T], bool], document_type: Type[T], children: Iterable[T], ): """ :param is_published: predicate returning whether items is considered published :param document_type: type of output :param children: output documents """ self.published, self.draft = map( lambda x: DocumentGroup(document_type, x), partition(children, key=is_published), )
def chain_argspec(func_list, provides, inner_name): provided_sofar = set([inner_name]) # the inner function name is an extremely special case optional_sofar = set() required_sofar = set() for f, p in zip(func_list, provides): # middlewares can default the same parameter to different values; # can't properly keep track of default values fb = get_fb(f) arg_names = fb.get_arg_names() defaults_dict = fb.get_defaults_dict() defaulted, undefaulted = iterutils.partition(arg_names, key=defaults_dict.__contains__) optional_sofar.update(defaulted) # keep track of defaults so that e.g. endpoint default param # can pick up request injected/provided param required_sofar |= set(undefaulted) - provided_sofar provided_sofar.update(p) return required_sofar, optional_sofar
def delete_params( cls, company_id: str, task_id: str, hyperparams: Sequence[HyperParamKey], force: bool, ) -> int: with TimingContext("mongo", "delete_hyperparams"): properties_only = cls._normalize_params(hyperparams) task = get_task_for_update( company_id=company_id, task_id=task_id, allow_all_statuses=properties_only, force=force, ) with_param, without_param = iterutils.partition( hyperparams, key=lambda p: bool(p.name)) sections_to_delete = {p.section for p in without_param} delete_cmds = { f"unset__hyperparams__{ParameterKeyEscaper.escape(section)}": 1 for section in sections_to_delete } for item in with_param: section = ParameterKeyEscaper.escape(item.section) if item.section in sections_to_delete: raise errors.bad_request.FieldsConflict( "Cannot delete section field if the whole section was scheduled for deletion" ) name = ParameterKeyEscaper.escape(item.name) delete_cmds[f"unset__hyperparams__{section}__{name}"] = 1 return update_task(task, update_cmds=delete_cmds, set_last_update=not properties_only)
def from_api(cls, campaign, timestamp=None): timestamp = timestamp if timestamp is not None else datetime.datetime.utcnow( ) ret = cls(campaign=campaign, timestamp=timestamp, campaign_results=None, goal_results=None, article_results=None) article_list = [] article_title_list = campaign.article_title_list base_desc = 'Scanning %s @ %s' % (campaign.name, timestamp.isoformat().split('.')[0]) article_title_list = tqdm( article_title_list, desc=base_desc, disable=None, # autodisable on non-tty unit='article') def async_pta_update(pta, attr_func_map): jobs = [] for attr, func in attr_func_map.items(): _debug_log_func = tlog.wrap('debug')(func) cur = gevent.spawn( lambda pta=pta, attr=attr, func=_debug_log_func: setattr( pta, attr, func(pta))) jobs.append(cur) gevent.wait(jobs, timeout=20) return for title in article_title_list: new_desc = base_desc + ' ({:16.16})'.format(title) article_title_list.set_description(new_desc) pta = PTArticle(lang=campaign.lang, title=title, timestamp=timestamp) pta.talk_title = 'Talk:' + title async_pta_update(pta, { 'rev_id': metrics.get_revid, 'talk_rev_id': metrics.get_talk_revid }) if pta.rev_id: async_pta_update( pta, { 'templates': metrics.get_templates, 'talk_templates': metrics.get_talk_templates, 'assessments': metrics.get_assessments, 'citations': metrics.get_citations, 'wikidata_item': metrics.get_wikidata_item }) pta.wikiprojects = metrics.get_wikiprojects( pta) # relies on templates (no network) pta.results = eval_article_goals(pta, campaign.goals) article_list.append(pta) ret.article_list = article_list gres = {} # goal results for goal in campaign.goals: key = slugify(goal['name']) target_ratio = float(goal.get('ratio', 1.0)) results = [a.results[key]['done'] for a in article_list] # TODO: average/median metric value done, not_done = partition(results) # TODO: need to integrate start state for progress tracking ratio = 1.0 if not not_done else float( len(done)) / len(article_list) gres[key] = { 'done_count': len(done), 'not_done_count': len(not_done), 'total_count': len(article_list), 'ratio': ratio, 'target_ratio': target_ratio, 'key': key, 'name': goal['name'], 'desc': goal.get('desc'), 'progress': ratio / target_ratio, 'done': ratio >= target_ratio } ret.campaign_results = glom( gres, { 'done_count': (T.values(), ['done_count'], sum), 'not_done_count': (T.values(), ['not_done_count'], sum), 'total_count': (T.values(), ['total_count'], sum) }) ret.campaign_results['ratio'] = ret.campaign_results[ 'done_count'] / ret.campaign_results['total_count'] ret.goal_results = gres ret.article_results = [attr.asdict(a) for a in article_list] return ret