def align_other_clinical(a): # extract the clinical id. clinical_id = a['_id'] # lookup any matches. cbio = CBioEngine(settings.MONGO_URI, settings.MONGO_DBNAME, data_model.match_schema, muser=settings.MONGO_USERNAME, mpass=settings.MONGO_PASSWORD, collection_clinical=settings.COLLECTION_CLINICAL, collection_genomic=settings.COLLECTION_GENOMIC) clinical_db = cbio._c # look for record with sample MRN. related = list(clinical_db.find({"MRN": a['MRN']})) # remove self. tmp = [] for clinical in related: for nm in ["FIRST_NAME", "LAST_NAME", "FIRST_LAST", "LAST_FIRST"]: del clinical[nm] if clinical['_id'] == a['_id']: continue tmp.append(clinical) # add them to record. a['RELATED'] = tmp # remove patient name for nm in ["FIRST_NAME", "LAST_NAME", "FIRST_LAST", "LAST_FIRST"]: del a[nm]
def rerun_filters(dpi=None): """ re-runs all filters against new data. preserves options set on old matches. :return: count of new matches """ # get the database links. match_db = database.get_collection('match') filter_db = database.get_collection('filter') # create the object. cbio = CBioEngine(settings.MONGO_URI, settings.MONGO_DBNAME, data_model.match_schema, muser=settings.MONGO_USERNAME, mpass=settings.MONGO_PASSWORD, collection_clinical=settings.COLLECTION_CLINICAL, collection_genomic=settings.COLLECTION_GENOMIC) query = { 'status': 1, 'temporary': False, 'trial_watch': { '$exists': False } } filters = list(filter_db.find(query)) for filter_ in filters: # lots of logging. logging.info("rerun_filters: filter: %s" % filter_['_id']) # prepare the filters. c, g, txt = prepare_criteria(filter_) # execute the match. cbio.match(c=c, g=g) if cbio.match_df is not None and cbio.genomic_df is not None and cbio.clinical_df is not None: logging.info( "rerun_filters: new matches: match=%d, genomic=%d, clinical=%d" % (len(cbio.match_df), len( cbio.genomic_df), len(cbio.clinical_df))) # get existing matches for this filter. matches = list(match_db.find({'FILTER_ID': ObjectId(filter_['_id'])})) rec_cnt = 0 for m in matches: rec_cnt += len(m['VARIANTS']) logging.info("rerun_filters: exisiting: %d %d" % (len(matches), rec_cnt)) # parse the old matches. clinical_old_id = set() old_lu = {} match_lu = {} for match in matches: # get the clincal id. clinical_id = match['CLINICAL_ID'] # now build tuples of variants. for genomic_id in match['VARIANTS']: # make pair pair = (clinical_id, genomic_id) clinical_old_id.add(pair) # build id lookup. old_lu[pair] = match['_id'] # cache matches. match_lu[pair] = match # parse the new matches. clinical_new_id = set() new_lu = {} i = 0 for match in cbio.match_iter(): # simplify. clinical_id = match['CLINICAL_ID'] genomic_id = match['GENOMIC_ID'] # build set. pair = (clinical_id, genomic_id) clinical_new_id.add(pair) # cache matches. match_lu[pair] = match # build lookup. new_lu[pair] = i i += 1 # find the ones which need to be deleted and delete them. to_delete = clinical_old_id - clinical_new_id logging.info("rerun_filters: removing: %d" % len(to_delete)) updated = list() for pair in to_delete: # extract ids match_id = old_lu[pair] match = match_lu[pair] # find the variant. good = list() hit = False for v in match['VARIANTS']: if v != pair[1]: good.append(v) else: hit = True # update it if necessary. if hit: # check if will empty this. if len(good) == 0: # delete it. match_db.delete_one({'_id': match_id}) else: # just update it. match_db.update({"_id": match_id}, {"$set": { "VARIANTS": good }}) # update the local one to make sure we delete all variants match['VARIANTS'] = good # find the intersection and remove them from data frame. remove_frame = clinical_new_id.intersection(clinical_old_id) bad_list = [] for pair in remove_frame: # lookup index. idx = new_lu[pair] bad_list.append(idx) logging.info("rerun_filters: skipping: %d" % len(bad_list)) # remove them. if cbio.match_df is not None and len(cbio.match_df) > 0: cbio.match_df.drop(cbio.match_df.index[bad_list], inplace=True) # insert the counts. count_matches(cbio, filter_) # insert the matches if not temporary. insert_matches(cbio, filter_, from_filter=False, dpi=dpi)
def rerun_filters(dpi=None): """ re-runs all filters against new data. preserves options set on old matches. :return: count of new matches """ # get the database links. match_db = database.get_collection('match') filter_db = database.get_collection('filter') # create the object. cbio = CBioEngine(settings.MONGO_URI, settings.MONGO_DBNAME, data_model.match_schema, muser=settings.MONGO_USERNAME, mpass=settings.MONGO_PASSWORD, collection_clinical=settings.COLLECTION_CLINICAL, collection_genomic=settings.COLLECTION_GENOMIC) query = {'status': 1, 'temporary': False, 'trial_watch': {'$exists': False}} filters = list(filter_db.find(query)) for filter_ in filters: # lots of logging. logging.info("rerun_filters: filter: %s" % filter_['_id']) # prepare the filters. c, g, txt = prepare_criteria(filter_) # execute the match. cbio.match(c=c, g=g) if cbio.match_df is not None and cbio.genomic_df is not None and cbio.clinical_df is not None: logging.info("rerun_filters: new matches: match=%d, genomic=%d, clinical=%d" % (len(cbio.match_df), len(cbio.genomic_df), len(cbio.clinical_df))) # get existing matches for this filter. matches = list(match_db.find({'FILTER_ID': ObjectId(filter_['_id'])})) rec_cnt = 0 for m in matches: rec_cnt += len(m['VARIANTS']) logging.info("rerun_filters: exisiting: %d %d" % (len(matches), rec_cnt)) # parse the old matches. clinical_old_id = set() old_lu = {} match_lu = {} for match in matches: # get the clincal id. clinical_id = match['CLINICAL_ID'] # now build tuples of variants. for genomic_id in match['VARIANTS']: # make pair pair = (clinical_id, genomic_id) clinical_old_id.add(pair) # build id lookup. old_lu[pair] = match['_id'] # cache matches. match_lu[pair] = match # parse the new matches. clinical_new_id = set() new_lu = {} i = 0 for match in cbio.match_iter(): # simplify. clinical_id = match['CLINICAL_ID'] genomic_id = match['GENOMIC_ID'] # build set. pair = (clinical_id, genomic_id) clinical_new_id.add(pair) # cache matches. match_lu[pair] = match # build lookup. new_lu[pair] = i i += 1 # find the ones which need to be deleted and delete them. to_delete = clinical_old_id - clinical_new_id logging.info("rerun_filters: removing: %d" % len(to_delete)) updated = list() for pair in to_delete: # extract ids match_id = old_lu[pair] match = match_lu[pair] # find the variant. good = list() hit = False for v in match['VARIANTS']: if v != pair[1]: good.append(v) else: hit = True # update it if necessary. if hit: # check if will empty this. if len(good) == 0: # delete it. match_db.delete_one({'_id': match_id}) else: # just update it. match_db.update({"_id": match_id}, {"$set": {"VARIANTS": good}}) # update the local one to make sure we delete all variants match['VARIANTS'] = good # find the intersection and remove them from data frame. remove_frame = clinical_new_id.intersection(clinical_old_id) bad_list = [] for pair in remove_frame: # lookup index. idx = new_lu[pair] bad_list.append(idx) logging.info("rerun_filters: skipping: %d" % len(bad_list)) # remove them. if cbio.match_df is not None and len(cbio.match_df) > 0: cbio.match_df.drop(cbio.match_df.index[bad_list], inplace=True) # insert the counts. count_matches(cbio, filter_) # insert the matches if not temporary. insert_matches(cbio, filter_, from_filter=False, dpi=dpi)
def find_match(items): """ computes matches and saves results. called after insertion in DB is complete :param items: dict """ db = app.data.driver.db cbio = CBioEngine(settings.MONGO_URI, settings.MONGO_DBNAME, data_model.match_schema, muser=settings.MONGO_USERNAME, mpass=settings.MONGO_PASSWORD, collection_clinical=settings.COLLECTION_CLINICAL, collection_genomic=settings.COLLECTION_GENOMIC) for item in items: c, g, txt = miner.prepare_criteria(item) gen_txt, clin_txt = txt cancer, age, gender = clin_txt c_test = cancer == "" g_test = gender == "" a_test = age == "" if not c_test: description = "%s in %s" % (gen_txt, cancer) else: description = gen_txt if not g_test and a_test: description = "%s, Gender: %s" % (description, gender) elif not g_test and not a_test: description = "%s, Gender: %s, Age %s" % (description, gender, age) elif g_test and not a_test: description = "%s, Age %s" % (description, age) if isinstance(description, list) and len(description) == 0: description = '' query = {"_id": item["_id"]} update = {"$set": {"description": description}} _ = db.filter.update_one(query, update) item['description'] = description # only recompute match if there was an update. updated = miner.detect_update(cbio, item) if updated: miner.remove_matches(cbio, item) cbio.match(c=c, g=g) miner.count_matches(cbio, item) dpi = get_data_push_id(db) if not item["temporary"]: miner.insert_matches(cbio, item, dpi=dpi) else: # pass along status variable to matches. miner.update_match_status(cbio, item)
def align_matches_genomic(a): # short circuit. if len(a['_items']) == 0: return # get the user. if settings.NO_AUTH: logging.info("NO AUTH enabled. align_matches_genomic") accounts = app.data.driver.db['user'] user = accounts.find_one({"last_name": "Doe"}) else: user = app.auth.get_request_auth_value() # extract the clinical id. clinical_id = a['_items'][0]['CLINICAL_ID'] # lookup any matches. cbio = CBioEngine(settings.MONGO_URI, settings.MONGO_DBNAME, data_model.match_schema, muser=settings.MONGO_USERNAME, mpass=settings.MONGO_PASSWORD, collection_clinical=settings.COLLECTION_CLINICAL, collection_genomic=settings.COLLECTION_GENOMIC) match_db = cbio.connection[cbio.mongo_dbname]['match'] filter_db = cbio.connection[cbio.mongo_dbname]['filter'] variants = dict() for match in match_db.find({"CLINICAL_ID": clinical_id}): for variant_id in match['VARIANTS']: if variant_id not in variants: variants[variant_id] = list() variants[variant_id].append(match['FILTER_ID']) for item in a['_items']: if item['_id'] in variants: for filter_id in variants[item['_id']]: filter_doc = filter_db.find_one(filter_id) if filter_doc is None: continue # check status. if filter_doc['status'] != 1: continue # check ownership. if filter_doc['TEAM_ID'] not in set(user['teams']): continue # embed this in filter. if 'FILTER' not in item: item['FILTER'] = list() item['FILTER'].append(filter_doc) # merge genetic event with cytoband if 'GENETIC_EVENT' in item and 'CYTOBAND' in item and item[ 'GENETIC_EVENT'] is not None: item['CYTOBAND'] = '%s %s' % (item['CYTOBAND'], item['GENETIC_EVENT'])