def get_sig(run_id, sig_level = .05): """ Returns the significant networks at the given significance by Benjamini-Hochberg """ myitem = None for item in DataForDisplay.query(run_id): if not myitem: myitem = item elif myitem and item.timestamp > myitem.timestamp: myitem = item bucket = myitem.data_bucket pv_file = myitem.data_file conn = boto.connect_s3() b = conn.get_bucket(bucket) k = b.get_key(pv_file) with tempfile.TemporaryFile() as fp: k.get_contents_to_file(fp) fp.seek(0) table = pandas.read_csv(fp, sep='\t') nv = NetworkTSV() cutoffs = nv.get_fdr_cutoffs( myitem.identifier, myitem.timestamp, [sig_level] ) valid = [] for k,v in cutoffs.iteritems(): for cut in v.itervalues(): valid += table[table[k] <= cut]['networks'].tolist() return list(set(valid))
def get_fdr_cutoffs( self, identifier, timestamp, alphas=[.05]): """ By benjamini-hochberg """ res = DataForDisplay.get(identifier, timestamp) s3 = boto.connect_s3() bucket = s3.get_bucket( res.data_bucket ) k = bucket.get_key( res.data_file ) with tempfile.TemporaryFile() as fp: k.get_contents_to_file(fp) fp.seek(0) res = stat.get_fdr_cutoffs(fp, alphas=alphas) return res
def get_nets_for_display(): from datadirac.aggregate import DataForDisplay import json res = DataForDisplay.scan() rlist = [r.attribute_values for r in res] for a in rlist: for k,v in a.iteritems(): if type(v) is set: a[k]=list(v) if res: return Response( json.dumps(rlist), mimetype='application/json') else: abort(400)
def set_qval_table( self, identifier, timestamp ): res = DataForDisplay.get(identifier, timestamp) s3 = boto.connect_s3() bucket = s3.get_bucket( res.data_bucket ) k = bucket.get_key( res.data_file ) with tempfile.TemporaryFile() as fp: k.get_contents_to_file(fp) fp.seek(0) qv = stat.get_qval_table(fp) with tempfile.TemporaryFile() as fp2: qv.to_csv( fp2, sep='\t', index_label='networks' ) fp2.seek(0) k = Key(bucket) k.key = 'qvals-' + identifier + '-' + timestamp + '.tsv' k.set_contents_from_file(fp2) res.qvalue_file = 'qvals-' + identifier + '-' + timestamp + '.tsv' res.save()
def _available(self): res = DataForDisplay.scan() results = {} for r in res: results[r.identifier +'-' +r.timestamp] = r.attribute_values return results