def __init__(self, index, dc_collection, unit_data): FilterUnit.__init__(self, index, unit_data, "zygosity") self._setScreened( self.getIndex().getWS().getApp().hasRunOption("no-custom")) self.mFamilyInfo = self.getIndex().getWS().getFamilyInfo() assert ("size" not in unit_data or unit_data["size"] == len(self.mFamilyInfo)) self.mIsOK = (self.mFamilyInfo is not None and len(self.mFamilyInfo) > 1) if not self.mIsOK: return self.mColumns = [ dc_collection.makeColumn(self, "%s_%d" % (self.getName(), idx), dc_collection.ATOM_DATA_TYPE_INT) for idx, member_name in enumerate(self.mFamilyInfo.getMembers()) ] self.mConfig = unit_data.get("config", dict()) self.mXCondition = None labels = AnfisaConfig.configOption("zygosity.cases") self.mVariantSet = VariantSet([ labels[key] for key in ("homo_recess", "x_linked", "dominant", "compens") ]) self.getIndex().getCondEnv().addSpecialUnit(self)
def rq__xl_export(self, rq_args): condition = self.mCondEnv.parseSeq(json.loads(rq_args["conditions"])) rec_count = self.evalTotalCount(condition) assert rec_count <= AnfisaConfig.configOption("max.export.size") rec_no_seq = self.evalRecSeq(condition, rec_count) fname = self.getApp().makeExcelExport(self.getName(), self, rec_no_seq) return {"kind": "excel", "fname": fname}
def reportRecord(output, workspace, research_mode, rec_no, port): startHtmlPage(output, css_files=["base.css", "a_rec.css", "tags.css"], js_files=["a_rec.js", "tags.js"]) if port == "2": print >> output, ('<body onload="init_r(2, \'%s\');">' % workspace.getFirstAspectID()) elif port == "1": print >> output, ('<body onload="init_r(1, \'%s\');">' % workspace.getLastAspectID()) else: print >> output, ( '<body onload="init_r(0, \'%s\', \'%s\', %d);">' % (workspace.getFirstAspectID(), workspace.getName(), rec_no)) print >> output, '<div class="r-tab">' print >> output, ( '<span id="img-wrap" onclick="tabCfgChange();">' '<img id="img-tab2" src="ui/images/tab2-exp.png"/></span>') asp_data_seq = workspace.getViewRepr(rec_no, research_mode) for asp_data in asp_data_seq: print >> output, ( '<button class="r-tablnk %s" id="la--%s" ' 'onclick="pickAspect(\'%s\')">%s</button>' % (asp_data["kind"], asp_data["name"], asp_data["name"], AnfisaConfig.decorText(asp_data["title"]))) tags_asp_name = AnfisaConfig.configOption("aspect.tags.name") print >> output, ('<button class="r-tablnk %s" id="la--%s" ' 'onclick="pickAspect(\'%s\')">%s</button>' % ("tech", tags_asp_name, tags_asp_name, AnfisaConfig.textMessage("aspect.tags.title"))) print >> output, '</div>' print >> output, '<div id="r-cnt-container">' for asp_data in asp_data_seq: print >> output, ('<div id="a--%s" class="r-tabcnt">' % asp_data["name"]) _reportAspect(output, asp_data) print >> output, '</div>' print >> output, ('<div id="a--%s" class="r-tabcnt">' % tags_asp_name) tagsBlock(output) print >> output, '</div>' print >> output, '</div>' print >> output, '</body>' print >> output, '</html>'
def __init__(self, dataset_h, descr): XL_Unit.__init__(self, dataset_h, descr) if descr.get("family") and self.getDS().getFamilyInfo() is None: self.getDS()._setFamilyInfo(descr["family"]) self._setScreened(self.getDS().getApp().hasRunOption("no-custom")) self.mIsOK = (self.getDS().getFamilyInfo() is not None and len(self.getDS().getFamilyInfo()) > 1) self.mLabels = AnfisaConfig.configOption("zygosity.cases") self.mConfig = descr.get("config", dict()) self.mXCondition = None self.getDS().getCondEnv().addSpecialUnit(self)
def rq__xl_export(self, rq_args): context = { "cond": XL_Condition.parseSeq(json.loads(rq_args["conditions"]), self.getParseContext()) } rec_count = self.evalTotalCount(context) assert rec_count <= AnfisaConfig.configOption("max.export.size") rec_no_seq = self.evalRecSeq(context, rec_count) fname = self.getDataVault().getApp().makeExcelExport( self.getName(), self, rec_no_seq) return {"kind": "excel", "fname": fname}
def __init__(self, xl_ds, descr): XL_Unit.__init__(self, xl_ds, descr) if descr.get("family") and self.getDS().getFamilyInfo() is None: self.getDS()._setFamilyInfo(descr["family"]) self.mIsOK = (self.getDS().getFamilyInfo() is not None and len(self.getDS().getFamilyInfo()) > 1) self.mLabels = AnfisaConfig.configOption("zygosity.cases") self.mConfig = descr.get("config", dict()) self.mXCondition = XL_Condition.parse( self.mConfig.get("x_cond", ConditionMaker.condEnum("Chromosome", ["chrX"])))
def __init__(self, dataset, ws_name, base_version=None, conditions=None, markup_batch=None): ExecutionTask.__init__(self, "Secondary WS creation") self.mDS = dataset self.mWSName = ws_name self.mBaseVersion = base_version self.mConditions = conditions self.mMarkupBatch = markup_batch self.mReportLines = AnfisaConfig.configOption("report.lines")
def collectRecSeq(self, dataset): max_ws_size = AnfisaConfig.configOption("max.ws.size") ret = set() for point in self.mPointList: if (point.getPointKind() == "term" and point.getDecision() is True): condition = point.actualCondition() point_count = dataset.evalTotalCount(condition) assert point_count < max_ws_size if point_count > 0: seq = dataset.evalRecSeq(condition, point_count) ret |= set(seq) assert len(ret) < max_ws_size return sorted(ret)
def __init__(self, proband_rel): setup = AnfisaConfig.configOption("comp-hets.setup") self.mF_zFamily = [ setup["zygosity"] + '_' + str(member_idx) for member_idx in proband_rel ] self.mF_Genes = setup["Genes"] self.mF_Result = setup["Compound_heterozygous"] self.mView_Result = setup["ws_compound_heterosygotes"] self.mGenesF = defaultdict(set) self.mGenesM = defaultdict(set) self.mCounts = [0, 0, 0] self.mResTab = None
class DruidAgent: GRANULARITY = "all" INTERVAL = "2015-01-01/2015-12-31" sDefaultUrls = { "index": "http://localhost:8090/druid/indexer/v1/task", "query": "http://localhost:8082/druid/v2", "sql": "http://localhost:8082/druid/v2/sql", "coord": "http://localhost:8081/druid/coordinator/v1" } sStdFMark = AnfisaConfig.configOption("filter.std.mark") def __init__(self, config): druid_cfg = config.get("druid", dict()) self.mRestAgents = { mode: RestAgent(druid_cfg.get(mode, url), mode) for mode, url in self.sDefaultUrls.items() } self.mStdFilters = { self.sStdFMark + flt_name: deepcopy(conditions) for flt_name, conditions in STD_XL_FILTERS } self.mVaultPrefix = druid_cfg["vault-prefix"] def call(self, mode, request_data, method="POST", add_path=""): return self.mRestAgents[mode].call(request_data, method, add_path) def getStdFilterNames(self): return self.mStdFilters.keys() def goodOpFilterName(self, flt_name): return (flt_name and not flt_name.startswith(self.sStdFMark) and flt_name[0].isalpha() and ' ' not in flt_name) def hasStdFilter(self, filter_name): return filter_name in self.mStdFilters def getStdFilterConditions(self, flt_name): return self.mStdFilters.get(flt_name) def normDataSetName(self, ds_name): if not self.mVaultPrefix: return ds_name assert not ds_name.startswith(self.mVaultPrefix) return self.mVaultPrefix + '.' + ds_name
def execIt(self): if not self.correctWSName(self.mWSName): self.setStatus("Incorrect workspace name") return None self.setStatus("Prepare creation") logging.info("Prepare workspace creation: %s" % self.mWSName) if (self.mBaseVersion is not None): tree = DecisionTree.parse(self.mDS.getMongoDS().getVersionTree( self.mBaseVersion)) rec_no_seq = tree.collectRecSeq(self.mDS) else: context = { "cond": XL_Condition.parseSeq(json.loads(self.mConditions), self.mDS.getParseContext()) } rec_count = self.mDS.evalTotalCount(context) assert rec_count <= AnfisaConfig.configOption("max.ws.size") rec_no_seq = self.mDS.evalRecSeq(context, rec_count) rec_no_seq = sorted(rec_no_seq) rec_no_set = set(rec_no_seq) ws_dir = self.mDS.getDataVault().getDir() + "/" + self.mWSName if os.path.exists(ws_dir): self.setStatus("Dataset already exists") return None fdata_seq = [] with self.mDS._openFData() as inp: for rec_no, line in enumerate(inp): if rec_no in rec_no_set: fdata_seq.append(json.loads(line.rstrip())) assert len(fdata_seq) == len(rec_no_seq) view_schema = deepcopy(self.mDS.getViewSchema()) flt_schema = deepcopy(self.mDS.getFltSchema()) if self.mMarkupBatch is not None: self.setStatus("Markup evaluation") for rec_no, fdata in zip(rec_no_seq, fdata_seq): self.mMarkupBatch.feed(rec_no, fdata) self.mMarkupBatch.finishUp(view_schema, flt_schema) for rec_no, fdata in zip(rec_no_seq, fdata_seq): self.mMarkupBatch.transformFData(rec_no, fdata) os.mkdir(ws_dir) logging.info("Fill workspace %s datafiles..." % self.mWSName) with FormatterIndexBZ2(ws_dir + "/vdata.ixbz2") as vdata_out: for out_rec_no, rec_no in enumerate(rec_no_seq): if out_rec_no > 0 and (out_rec_no % self.mReportLines) == 0: self.setStatus("Prepare records: %d/%d" % (out_rec_no, len(rec_no_seq))) rec_data = self.mDS.getRecordData(rec_no) if self.mMarkupBatch is not None: self.mMarkupBatch.transformRecData(rec_no, rec_data) vdata_out.putLine(json.dumps(rec_data, ensure_ascii=False)) self.setStatus("Prepare fdata") with gzip.open(ws_dir + "/fdata.json.gz", 'wb') as fdata_out: for fdata in fdata_seq: print >> fdata_out, json.dumps(fdata, ensure_ascii=False) self.setStatus("Prepare pdata") with gzip.open(ws_dir + "/pdata.json.gz", 'wb') as fdata_out: with self.mDS._openPData() as inp: for rec_no, line in enumerate(inp): if rec_no in rec_no_set: print >> fdata_out, line.rstrip() self.setStatus("Finishing...") logging.info("Finishing up workspace %s" % self.mWSName) ds_info = { "name": self.mWSName, "kind": "ws", "view_schema": view_schema, "flt_schema": flt_schema, "total": len(rec_no_seq), "mongo": self.mWSName, "family": (self.mDS.getFamiyInfo().dump() if self.mDS.getFamiyInfo() is not None else None), "meta": self.mDS.getDataInfo().get("meta") } with codecs.open(ws_dir + "/dsinfo.json", "w", encoding="utf-8") as outp: print >> outp, json.dumps(ds_info, sort_keys=True, indent=4) with codecs.open(ws_dir + "/active", "w", encoding="utf-8") as outp: print >> outp, "" self.mDS.getDataVault().loadNewWS(self.mWSName) self.setStatus("Done") return {"ws": self.mWSName}
class Index: sStdFMark = AnfisaConfig.configOption("filter.std.mark") def __init__(self, ws_h): self.mWS = ws_h self.mCondEnv = WS_CondEnv() self.mDCCollection = DataColumnCollecton() self.mUnits = [RulesEvalUnit(self, self.mDCCollection)] for unit_data in self.mWS.getFltSchema(): unit_h = loadWSFilterUnit(self, self.mDCCollection, unit_data) if unit_h is not None: self.mUnits.append(unit_h) self.mUnitDict = {unit_h.getName(): unit_h for unit_h in self.mUnits} assert len(self.mUnitDict) == len(self.mUnits) for unit_h in self.mUnits: unit_h.setup() self.mRecords = [] with self.mWS._openFData() as inp: for line in inp: inp_data = json.loads(line.decode("utf-8")) rec = self.mDCCollection.initRecord() for unit_h in self.mUnits: unit_h.fillRecord(inp_data, rec) self.mUnits[0].fillRulesPart(inp_data, rec) self.mRecords.append(rec) assert len(self.mRecords) == self.mWS.getTotal() self.mStdFilters = deepcopy(STD_WS_FILTERS) self.mFilterCache = dict() for filter_name, cond_seq in self.mStdFilters.items(): self.cacheFilter(self.sStdFMark + filter_name, cond_seq, None) def updateRulesEnv(self): with self.mWS._openFData() as inp: for rec_no, line in enumerate(inp): inp_data = json.loads(line.decode("utf-8")) self.mUnits[0].fillRulesPart(inp_data, self.mRecords[rec_no]) to_update = [] for filter_name, filter_info in self.mFilterCache.items(): if any([cond_info[1] == "Rules" for cond_info in filter_info[0]]): to_update.append(filter_name) for filter_name in to_update: filter_info = self.mFilterCache[filter_name] self.cacheFilter(filter_name, filter_info[0], filter_info[3]) def getWS(self): return self.mWS def getCondEnv(self): return self.mCondEnv def getUnit(self, unit_name): return self.mUnitDict[unit_name] def getRulesUnit(self): return self.mUnits[0] def iterUnits(self): return iter(self.mUnits) def goodOpFilterName(self, flt_name): return (flt_name and not flt_name.startswith(self.sStdFMark) and flt_name[0].isalpha() and ' ' not in flt_name) def hasStdFilter(self, filter_name): return filter_name in self.mStdFilters def parseCondSeq(self, cond_seq): return self.mCondEnv.parseSeq(cond_seq) def evalCondition(self, condition): rec_no_seq = [] for rec_no in range(self.mWS.getTotal()): if condition(self.mRecords[rec_no]): rec_no_seq.append(rec_no) return rec_no_seq def checkResearchBlock(self, cond_seq): for cond_info in cond_seq: if self.getUnit(cond_info[1]).checkResearchBlock(False): return True return False def cacheFilter(self, filter_name, cond_seq, time_label): condition = self.mCondEnv.parseSeq(cond_seq) self.mFilterCache[filter_name] = (cond_seq, self.evalCondition(condition), self.checkResearchBlock(cond_seq), time_label) def dropFilter(self, filter_name): if filter_name in self.mFilterCache: del self.mFilterCache[filter_name] def getFilterList(self, research_mode): ret = [] for filter_name, flt_info in self.mFilterCache.items(): if filter_name.startswith('_'): continue ret.append([ filter_name, self.hasStdFilter(filter_name), research_mode or not flt_info[2], flt_info[3] ]) return sorted(ret) def makeStatReport(self, filter_name, research_mode, condition=None, repr_context=None): rec_no_seq = self.getRecNoSeq(filter_name, condition) rec_seq = [self.mRecords[rec_no] for rec_no in rec_no_seq] stat_list = [] for unit_h in self.mUnits: if (not unit_h.checkResearchBlock(research_mode) and not unit_h.isScreened()): stat_list.append(unit_h.makeStat(rec_seq, repr_context)) report = { "total": self.mWS.getTotal(), "count": len(rec_seq), "stat-list": stat_list, "filter-list": self.getFilterList(research_mode), "cur-filter": filter_name } if (filter_name and filter_name in self.mFilterCache and not filter_name.startswith('_')): report["conditions"] = self.mFilterCache[filter_name][0] return report def makeUnitStatReport(self, unit_name, condition, repr_context): rec_seq = [ self.mRecords[rec_no] for rec_no in self.getRecNoSeq(None, condition) ] return self.mUnitDict[unit_name].makeStat(rec_seq, repr_context) def getRecNoSeq(self, filter_name=None, condition=None): if filter_name is None and condition is not None: return self.evalCondition(condition) if filter_name in self.mFilterCache: return self.mFilterCache[filter_name][1] return range(self.mWS.getTotal())[:] def getRecFilters(self, rec_no, research_mode): ret0, ret1 = [], [] for filter_name, flt_info in self.mFilterCache.items(): if not research_mode and flt_info[2]: continue if self.hasStdFilter(filter_name): ret0.append(filter_name) elif self.goodOpFilterName(filter_name): ret1.append(filter_name) return sorted(ret0) + sorted(ret1)
class Index: sStdFMark = AnfisaConfig.configOption("filter.std.mark") def __init__(self, ws_h): self.mWS = ws_h self.mDCCollection = DataColumnCollecton() self.mUnits = [RulesEvalUnit(self, self.mDCCollection, 0)] for unit_data in self.mWS.getFltSchema(): unit = loadWSFilterUnit(self, self.mDCCollection, unit_data, len(self.mUnits)) if unit is not None: self.mUnits.append(unit) self.mUnitDict = {unit.getName(): unit for unit in self.mUnits} assert len(self.mUnitDict) == len(self.mUnits) self.mRecords = [] with self.mWS._openFData() as inp: for line in inp: inp_data = json.loads(line.decode("utf-8")) rec = self.mDCCollection.initRecord() for unit in self.mUnits: unit.fillRecord(inp_data, rec) self.mUnits[0].fillRulesPart(inp_data, rec) self.mRecords.append(rec) assert len(self.mRecords) == self.mWS.getTotal() self.mStdFilters = deepcopy(STD_WS_FILTERS) self.mFilterCache = dict() for filter_name, conditions in self.mStdFilters.items(): self.cacheFilter(self.sStdFMark + filter_name, conditions, None) def updateRulesEnv(self): with self.mWS._openFData() as inp: for rec_no, line in enumerate(inp): inp_data = json.loads(line.decode("utf-8")) self.mUnits[0].fillRulesPart(inp_data, self.mRecords[rec_no]) to_update = [] for filter_name, filter_info in self.mFilterCache.items(): if any([cond_info[1] == "Rules" for cond_info in filter_info[0]]): to_update.append(filter_name) for filter_name in to_update: filter_info = self.mFilterCache[filter_name] self.cacheFilter(filter_name, filter_info[0], filter_info[3]) def getWS(self): return self.mWS def getUnit(self, unit_name): return self.mUnitDict[unit_name] def getRulesUnit(self): return self.mUnits[0] def iterUnits(self): return iter(self.mUnits) def goodOpFilterName(self, flt_name): return (flt_name and not flt_name.startswith(self.sStdFMark) and flt_name[0].isalpha() and ' ' not in flt_name) def hasStdFilter(self, filter_name): return filter_name in self.mStdFilters @staticmethod def numericFilterFunc(bounds, use_undef): bound_min, bound_max = bounds if bound_min is None: if bound_max is None: if use_undef: return lambda val: val is None assert False return lambda val: True if use_undef: return lambda val: val is None or val <= bound_max return lambda val: val is not None and val <= bound_max if bound_max is None: if use_undef: return lambda val: val is None or bound_min <= val return lambda val: val is not None and bound_min <= val if use_undef: return lambda val: val is None or (bound_min <= val <= bound_max) return lambda val: val is not None and (bound_min <= val <= bound_max) @staticmethod def enumFilterFunc(filter_mode, base_idx_set): if filter_mode == "NOT": return lambda idx_set: len(idx_set & base_idx_set) == 0 if filter_mode == "ONLY": return lambda idx_set: (len(idx_set) > 0 and len( idx_set - base_idx_set) == 0) if filter_mode == "AND": all_len = len(base_idx_set) return lambda idx_set: len(idx_set & base_idx_set) == all_len return lambda idx_set: len(idx_set & base_idx_set) > 0 def _applyCondition(self, rec_no_seq, cond_info): cond_type, unit_name = cond_info[:2] unit_h = self.getUnit(unit_name) if cond_type == "numeric": bounds, use_undef = cond_info[2:] filter_func = self.numericFilterFunc(bounds, use_undef) elif cond_info[0] == "enum": filter_mode, variants = cond_info[2:] filter_func = self.enumFilterFunc( filter_mode, unit_h.getVariantSet().makeIdxSet(variants)) else: assert False cond_f = unit_h.recordCondFunc(filter_func) flt_rec_no_seq = [] for rec_no in rec_no_seq: if cond_f(self.mRecords[rec_no]): flt_rec_no_seq.append(rec_no) return flt_rec_no_seq def evalConditions(self, conditions): rec_no_seq = range(self.mWS.getTotal())[:] for cond_info in conditions: rec_no_seq = self._applyCondition(rec_no_seq, cond_info) if len(rec_no_seq) == 0: break return rec_no_seq def checkResearchBlock(self, conditions): for cond_info in conditions: if self.getUnit(cond_info[1]).checkResearchBlock(False): return True return False def cacheFilter(self, filter_name, conditions, time_label): self.mFilterCache[filter_name] = (conditions, self.evalConditions(conditions), self.checkResearchBlock(conditions), time_label) def dropFilter(self, filter_name): if filter_name in self.mFilterCache: del self.mFilterCache[filter_name] def getFilterList(self, research_mode): ret = [] for filter_name, flt_info in self.mFilterCache.items(): if filter_name.startswith('_'): continue ret.append([ filter_name, self.hasStdFilter(filter_name), research_mode or not flt_info[2], flt_info[3] ]) return sorted(ret) def makeStatReport(self, filter_name, research_mode, conditions=None): rec_no_seq = self.getRecNoSeq(filter_name, conditions) rec_seq = [self.mRecords[rec_no] for rec_no in rec_no_seq] stat_list = [] for unit in self.mUnits: if not unit.checkResearchBlock(research_mode): stat_list.append(unit.collectStatJSon(rec_seq)) report = { "stat-list": stat_list, "filter-list": self.getFilterList(research_mode), "cur-filter": filter_name } if (filter_name and filter_name in self.mFilterCache and not filter_name.startswith('_')): report["conditions"] = self.mFilterCache[filter_name][0] return report def getRecNoSeq(self, filter_name=None, conditions=None): if filter_name is None and conditions: return self.evalConditions(conditions) if filter_name in self.mFilterCache: return self.mFilterCache[filter_name][1] return range(self.mWS.getTotal())[:] def getRecFilters(self, rec_no, research_mode): ret0, ret1 = [], [] for filter_name, flt_info in self.mFilterCache.items(): if not research_mode and flt_info[2]: continue if self.hasStdFilter(filter_name): ret0.append(filter_name) elif self.goodOpFilterName(filter_name): ret1.append(filter_name) return sorted(ret0) + sorted(ret1)