def test_studies(db): study = Study(pmid=345345, title='test study', authors='Jokkin, Eumast', journal='Journal of Nonexistent Findings', year=2008) study.peaks = [Peak(x=-12, y=14, z=40), Peak(x=22, y=22, z=22)] db.session.add(study) db.session.commit() assert Peak.query.count() == 2 assert Study.query.count() == 1
def get_studies(val=None): x, y, z, radius = get_params(val) points = Peak.closestPeaks(radius, x, y, z) # Track number of peaks and study details for each found study, # keeping only peaks that haven't been previously seen for current # study/x/y/z combination. seen = {} study_counts = defaultdict(list) for p in points: key = hash((p.pmid, round(p.x, 2), round(p.y, 2), round(p.z, 2))) if key in seen: next study_counts[p.pmid].append(p) seen[key] = 1 if 'dt' in request.args: data = [] for pmid, peaks in study_counts.items(): s = peaks[0].study link = '<a href={0}>{1}</a>'.format(url_for('studies.show', val=pmid), s.title) data.append([link, s.authors, s.journal, len(peaks)]) else: data = [{'pmid': pmid, 'peaks': len(peaks)} for pmid, peaks in study_counts.items()] return jsonify(data=data)
def location_api(val): args = [int(i) for i in val.split('_')] if len(args) == 3: args.append(10) x, y, z, radius = args ### PEAKS ### # Limit search to 20 mm to keep things fast if radius > 20: radius = 20 points = Peak.closestPeaks(radius,x,y,z) points = points.group_by(Peak.pmid) #prevents duplicate studies points = points.add_columns(sqlalchemy.func.count(Peak.id)) #counts duplicate peaks ### IMAGES ### location = Location.query.filter_by(x=x, y=y, z=z).first() images = [] if location is None else location.images images = [{'label': i.label, 'id': i.id} for i in images if i.display] if 'draw' in request.args: data = [] for p in points: s = p[0].study link = '<a href={0}>{1}</a>'.format(url_for('studies.show',val=s.pmid),s.title) data.append([link, s.authors, s.journal,p[1]]) data = jsonify(data=data) else: data = { 'studies': [{'pmid':p[0].study.pmid,'peaks':p[1] } for p in points], 'images': images } data = jsonify(data=data) return data
def get_location(): """ Retrieve location data --- tags: - locations responses: 200: description: Location data default: description: No locations found parameters: - in: query name: x description: x-coordinate required: true type: integer - in: query name: y description: y-coordinate required: true type: integer - in: query name: z description: z-coordinate required: true type: integer - in: query name: r description: Radius of sphere within which to search for study activations, in mm (default = 6, max = 20). required: false type: integer """ x = int(request.args['x']) y = int(request.args['y']) z = int(request.args['z']) # Radius: 6 mm by default, max 2 cm r = min(int(request.args.get('r', 6)), 20) # Check validity of coordinates and redirect if necessary check_xyz(x, y, z) loc = Location.query.filter_by(x=x, y=y, z=z).first() if loc is None: from nsweb.controllers.locations import make_location loc = make_location(x, y, z) peaks = Peak.closestPeaks(r, x, y, z) peaks = peaks.group_by(Peak.pmid) peaks = peaks.add_columns(func.count(Peak.id)) loc.studies = [p[0].study for p in peaks] schema = LocationSchema() return jsonify(data=schema.dump(loc).data)
def get_studies(val=None): x, y, z, radius = get_params(val) points = Peak.closestPeaks(radius, x, y, z) # prevents duplicate studies points = points.group_by(Peak.pmid) # counts duplicate peaks points = points.add_columns(sqlalchemy.func.count(Peak.id)) if 'dt' in request.args: data = [] for p in points: s = p[0].study link = '<a href={0}>{1}</a>'.format(url_for('studies.show', val=s.pmid), s.title) data.append([link, s.authors, s.journal, p[1]]) data = jsonify(data=data) else: data = [{'pmid': p[0].study.pmid, 'peaks':p[1]} for p in points] data = jsonify(data=data) return data
def add_studies(self, analyses=None, threshold=0.001, limit=None, reset=False): """ Add studies to the DB. Args: analyses: list of names of analyses to map studies onto. If None, use all available. threshold: Float or integer; minimum value in AnalysisTable data array for inclusion. limit: integer; maximum number of studies to add (order will be randomized). reset: Drop all existing records before populating. Notes: By default, will not create new Study records if an existing one matches. This ensures that we can gracefully add new analysis associations without mucking up the DB. To explicitly replace old records, pass reset=True. """ if reset: Study.query.delete() # For efficiency, get all analysis data up front, so we only need to # densify array once if analyses is None: analyses = self._get_feature_names() feature_data = self.dataset.get_feature_data(features=analyses) study_inds = self.dataset.activations['id'].unique() if limit is not None: random.shuffle(study_inds) study_inds = study_inds[:limit] # SQL DBs generally don't like numpy dtypes study_inds = [int(ind) for ind in study_inds] all_rows = self.dataset.activations.query('id in @study_inds') all_rows[['doi', 'table_num']] = all_rows[['doi', 'table_num']] \ .astype(str).replace('nan', '') # Create Study records for i, pmid in enumerate(study_inds): activ = all_rows.query('id == @pmid') study = Study.query.get(pmid) if study is None: peaks = [Peak(x=p['x'], y=p['y'], z=p['z'], table=p['table_num']) for (ind, p) in activ.iterrows()] # Track in Python to avoid issuing SQL count() queries n_peaks = len(peaks) data = activ.iloc[0, :] study = Study( pmid=int(pmid), space=data['space'], doi=data['doi'], title=data['title'], journal=data['journal'], authors=data['authors'], year=int(data['year'])) study.peaks.extend(peaks) self.db.session.add(study) # Map analyses onto studies via a Frequency join table that also # stores frequency info pmid_frequencies = feature_data.loc[pmid, :] to_keep = pmid_frequencies[pmid_frequencies >= threshold] for analysis_name, freq in to_keep.iteritems(): freq_inst = Frequency( study=study, analysis=self.analyses[analysis_name][0], frequency=freq) self.db.session.add(freq_inst) # Track number of studies and peaks so we can update # Analysis table more efficiently later self.analyses[analysis_name][1] += 1 self.analyses[analysis_name][2] += n_peaks # Commit records in batches to conserve memory and speed up querying. if (i + 1) % 100 == 0: print("Saving study %d..." % i) self.db.session.commit() self.db.session.commit() # Commit any remaining studies # Update all analysis counts self._update_analysis_counts()
def add_studies(self, analyses=None, threshold=0.001, limit=None, reset=False): """ Add studies to the DB. Args: analyses: list of names of analyses to map studies onto. If None, use all available. threshold: Float or integer; minimum value in AnalysisTable data array for inclusion. limit: integer; maximum number of studies to add (order will be randomized). reset: Drop all existing records before populating. Notes: By default, will not create new Study records if an existing one matches. This ensures that we can gracefully add new analysis associations without mucking up the DB. To explicitly replace old records, pass reset=True. """ if reset: Study.query.delete() # For efficiency, get all analysis data up front, so we only need to # densify array once if analyses is None: analyses = self._get_feature_names() feature_data = self.dataset.get_feature_data(features=analyses) analysis_names = list(feature_data.columns) study_inds = range(len(self.dataset.mappables)) if limit is not None: random.shuffle(study_inds) study_inds = study_inds[:limit] # Create Study records for i in study_inds: m = self.dataset.mappables[i] id = int(m.id) study = Study.query.get(id) if study is None: peaks = [Peak(x=float(p.x), y=float(p.y), z=float(p.z), table=str(p.table_num).replace('nan', '') ) for (ind, p) in m.data.iterrows()] data = m.data.iloc[0] study = Study( pmid=id, space=data['space'], doi=str(data['doi']).replace('nan', ''), title=data['title'], journal=data['journal'], authors=data['authors'], year=data['year']) study.peaks.extend(peaks) self.db.session.add(study) # Map analyses onto studies via a Frequency join table that also # stores frequency info pmid_frequencies = list(feature_data.ix[m.id, :]) for (y, analysis_name) in enumerate(analysis_names): freq = pmid_frequencies[y] if pmid_frequencies[y] >= threshold: freq_inst = Frequency( study=study, analysis=self.analyses[analysis_name][0], frequency=freq) self.db.session.add(freq_inst) # Track number of studies and peaks so we can update # Analysis table more efficiently later self.analyses[analysis_name][1] += 1 self.analyses[analysis_name][2] += study.peaks.count() # Commit records in batches to conserve memory. # This is very slow because we're relying on the declarative base. # Ideally should replace this with use of SQLAlchemy core, but probably # not worth the trouble considering we only re-create the DB once in a # blue moon. if (i + 1) % 100 == 0: self.db.session.commit() self.db.session.commit() # Commit any remaining studies # Update all analysis counts self._update_analysis_counts()