def get_order_type(dataset_id, ordertype): data = dt.get(dataset_id).aslist() #pat_ids = [int(d['PAT_ID'] for d in data]) #if pat_ids == PAT_ID: filtered_data = [x for x in data if x['ORDER_CATALOG_TYPE'] == ordertype ] filtered_data.sort(key=lambda x: to_data_time(x['ORDER_DTM']), reverse=True) return filtered_data
def calc_diff(id1, id2, direction, ops): """ Calculate a detailed difference between two tables :param id1: :param id2: :param direction: :param ops: :return: """ ds1 = dataset.get(id1) ds2 = dataset.get(id2) def stratify_matrix(m): # NOTE: ids must match with the hard-coded ones in taco/src/data_set_selector.ts -> prepareTCGAData() row_strat = dataset.get(m.id + '4CnmfClustering') col_strat = dataset.get(m.id + 'TreeClusterer1') data = m.asnumpy() rows = m.rows() if row_strat is None else row_strat.rows() cols = m.cols() if col_strat is None else col_strat.rows() if row_strat is not None: rowids = list(m.rowids()) row_indices = [rowids.index(o) for o in row_strat.rowids()] data = data[row_indices, ...] if col_strat is not None: colids = list(m.colids()) col_indices = [colids.index(o) for o in col_strat.rowids()] data = data[..., col_indices] return Table(rows, cols, data) # create the table object table1 = stratify_matrix(ds1) table2 = stratify_matrix(ds2) diff_finder = DiffFinder(table1, table2, ds1.rowtype, ds2.coltype, direction) t2 = timeit.default_timer() d = diff_finder.generate_diff(ops) t3 = timeit.default_timer() _log.debug("TIMER: time to generate diff", t3 - t2) if isinstance(d, Diff): d.add_union(diff_finder.union) return d
def get_info(ids, dataset_id): data = dt.get(dataset_id).aslist() # find the first entry for each patient result = {} for id in ids: result[id] = [] for row in data: if int(row['PAT_ID']) in ids: result[int(row['PAT_ID'])].append(row) return result
def stratify_matrix(m): # NOTE: ids must match with the hard-coded ones in taco/src/data_set_selector.ts -> prepareTCGAData() row_strat = dataset.get(m.id + '4CnmfClustering') col_strat = dataset.get(m.id + 'TreeClusterer1') data = m.asnumpy() rows = m.rows() if row_strat is None else row_strat.rows() cols = m.cols() if col_strat is None else col_strat.rows() if row_strat is not None: rowids = list(m.rowids()) row_indices = [rowids.index(o) for o in row_strat.rowids()] data = data[row_indices, ...] if col_strat is not None: colids = list(m.colids()) col_indices = [colids.index(o) for o in col_strat.rowids()] data = data[..., col_indices] return Table(rows, cols, data)
def get_first_info(PAT_ID): data = dt.get('Orders').aslist() # find the first entry for each patient first_ent = [] first_time = pd.datetime.today() for row in data: if row['PAT_ID'] == PAT_ID: if to_data_time(row['ORDER_DTM']) < first_time: first_ent = row first_time = to_data_time(row['ORDER_DTM']) return first_ent
def get_first_info(PAT_ID): data = dt.get('PRO').aslist() # find the first entry for each patient first_ent = [] first_time = pd.datetime.today() for row in data: if row['PAT_ID'] == PAT_ID: if to_date_time(row['ASSESSMENT_START_DTM']) < first_time: first_ent = row first_time = to_date_time(row['ASSESSMENT_START_DTM']) return first_ent
def get_latest_info(): data = dt.get('Demo').aslist() pat_ids = set([int(d['PAT_ID']) for d in data]) pats = [] pat_weights = [] for id in pat_ids: temp_pat = [] for row in data: if row['PAT_ID'] == id: temp_pat.append(row) temp_pat.sort(key=lambda x: to_date_time(x['ADM_DATE']), reverse=True) pats.append(temp_pat[0]) pat_weights.append( [t['WEIGHT_KG'] for t in temp_pat if t['WEIGHT_KG']]) return jsonify({'rows': pats, 'WEIGHT_KG': pat_weights})
def get_similarity_score(PAT_ID, dataset_id): data = dt.get(dataset_id).aslist() pat_ids = set([int(d['PAT_ID']) for d in data]) # find the first entry for each patient pats = {} #for id in pat_ids: # pats[id] = get_first_info(id) for row in data: pats[int(row['PAT_ID'])] = row scores = {} target_pat = pats[PAT_ID] for id in pat_ids: similarity_score = 0 if id == PAT_ID: ## NOTE! continue curr_pat = pats[id] for h in list(weights_Demo.keys()): if weights_Demo[h] == 0: continue elif not curr_pat[h] or not target_pat[h]: similarity_score += 0.5 * weights_Demo[h] elif h in categorical_data: similarity_score += weights_Demo[h] if target_pat[ h] == curr_pat[h] else 0 elif h in numerical_data: similarity_score += weights_Demo[h] if target_pat[ h] == curr_pat[h] else 0 ## within range elif h in years: ## age! ## within range similarity_score += weights_Demo[h] if float( extract_year(target_pat[h])) == float( extract_year(curr_pat[h])) else 0 else: similarity_score += weights_Demo[h] if target_pat[ h] == curr_pat[h] else 0 scores[id] = similarity_score return scores
def get_info_by_functions(id): my_data = dt.get(id) #range = rng.RangeElem(2, 5, 1) return jsonify({ 'id': id, 'my_data': my_data, 'rows': my_data.rows(), 'data': my_data.asjson(), 'rowids': my_data.rowids(), 'aslist': my_data.aslist(), 'aspandas': my_data.aspandas(), 'columns': my_data.columns, 'CSVEntry.to_description': my_data.to_description(), 'CSVEntry.idtypes': my_data.idtypes() #, #'asList( range(2,5,1) )': my_data.aslist(range), #'rows( range(2,5,1) )': my_data.rows(range), #'aspandas( range(2,5,1) )': my_data.aspandas(range) })
def get_all_info_for_pat(dataset_id, PAT_ID): my_data = dt.get(dataset_id) info_rows = [] indices = [] for i in range(0, len(my_data.rows())): if int(my_data.aspandas()['PAT_ID'][my_data.rows()[i]]) == int(PAT_ID): indices.append(i) info_rows.append(my_data.aslist()[i]) if dataset_id == 'Demo': info_rows.sort(key=lambda r: to_date_time(r["ADM_DATE"])) elif dataset_id == 'PRO': info_rows.sort(key=lambda r: to_date_time(r["ASSESSMENT_START_DTM"])) elif dataset_id == 'PT': info_rows.sort(key=lambda r: to_date_time(r["ADM_DATE"])) elif dataset_id == 'VAS': info_rows.sort(key=lambda r: to_date_time(r["RECORDED_TIME"])) return info_rows
def get_stat(dataset): my_data = dt.get(dataset_hash['Demo'][dataset]) data = my_data.aslist() length = 0 gender = [0, 0] bmi = [0, 0, 0, 0, 0, 0, 0] age = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] today = pd.datetime.today() for row in data: length += 1 if row['PAT_GENDER'] == 'F': gender[0] += 1 elif row['PAT_GENDER'] == 'M': gender[1] += 1 if not row['BMI']: bmi[0] += 1 elif row['BMI'] <= 18: bmi[1] += 1 elif row['BMI'] > 30: bmi[6] += 1 else: bmi[int(math.ceil((row['BMI'] - 15) / 3))] += 1 pat_age = int( (today - to_date_time(row['PAT_BIRTHDATE'])).days / 365.25) if pat_age <= 10: age[0] += 1 elif pat_age > 100: age[10] += 1 else: age[int(math.floor((pat_age - 1) / 10))] += 1 return jsonify({ 'length': length, 'GENDER': gender, 'BMI': bmi, 'AGE': age })
def get_data(id): return jsonify({'data': dt.get(id)})
def get_similarity_score(PAT_ID, dataset_id): data = dt.get(dataset_id).aslist() # divide data to 3 groups # find the first entry for each patient in each group pat_ids = set([int(d['PAT_ID']) for d in data]) groups = { 'PROMIS Bank v1.2 - Physical Function': {}, 'Oswestry Index (ODI)': {}, 'PROMIS Bank v1.0 - Depression': {} } for row in data: promisType = row['FORM'] if int(row['PAT_ID']) in groups[promisType]: if to_date_time(groups[promisType][row['PAT_ID']] ['ASSESSMENT_START_DTM']) > to_date_time( row['ASSESSMENT_START_DTM']): groups[promisType][int(row['PAT_ID'])] = row else: groups[promisType][int(row['PAT_ID'])] = row physical = groups['PROMIS Bank v1.2 - Physical Function'][int(PAT_ID)] oswerstry = groups['Oswestry Index (ODI)'][int(PAT_ID)] depression = groups['PROMIS Bank v1.0 - Depression'][int(PAT_ID)] scores = {} for id in pat_ids: if id == int(PAT_ID): continue # only promis score similarity_score = 0 if physical and id in groups['PROMIS Bank v1.2 - Physical Function']: similarity_score += weights_PRO['SCORE'] if abs( int(physical['SCORE']) - int(groups['PROMIS Bank v1.2 - Physical Function'][id] ['SCORE'])) <= 10 else 0 ## within range of 10 else: similarity_score += weights_PRO['SCORE'] * 0.5 if oswerstry and id in groups['Oswestry Index (ODI)']: similarity_score += weights_PRO['SCORE'] if abs( int(oswerstry['SCORE']) - int(groups['Oswestry Index (ODI)'][id]['SCORE']) ) <= 10 else 0 ## within range of 10 else: similarity_score += weights_PRO['SCORE'] * 0.5 if depression and id in groups['PROMIS Bank v1.0 - Depression']: similarity_score += weights_PRO['SCORE'] if abs( int(depression['SCORE']) - int(groups['PROMIS Bank v1.0 - Depression'][id]['SCORE']) ) <= 10 else 0 ## within range of 10 else: similarity_score += weights_PRO['SCORE'] * 0.5 scores[id] = similarity_score return scores
def get_col_by_name(id, col_name): my_data = dt.get(id) cols = my_data.aspandas() return jsonify({'cols': cols[col_name]})
def get_col_titles(id): my_data = dt.get(id) return jsonify({'cols': my_data.columns})
def get_filteres_orders(dataset_id, order): data = dt.get(dataset_id).aslist() filtered_data = [x for x in data if x['PRIMARY_MNEMONIC'].upper() == order ] filtered_data.sort(key=lambda x: to_data_time(x['ORDER_DTM']), reverse=True) return filtered_data
def get_all_rows(dataset_id): my_data = dt.get(dataset_id) rows = my_data.aslist() return jsonify({dataset_id: rows[:50]})
def get_row_by_index(id, index): my_data = dt.get(id) rows = my_data.aslist() return jsonify({'row': rows[int(index)]})