def getFeatures(table_name, sensors=None, task=None, hand=None, order=None, limit=0, keys=None, asdict=False): '''Query BQ for data from table_name based on SQL query entries given above. can return as dict or tuple of keys, array of values. - if limit is <=0, will return all. otherwise will limit to limit. - Order needs to be formatted as the second part of SQL ORDER BY query.''' #sensors = sensors if sensors else getSensors(table_name) sensorquery = qSensors(table_name, sensors, task, hand, keys, order, limit) print sensorquery results = queryGoogle(sensorquery) if asdict: #Return data as {key:np array, key:np.array ...} hugedict={} for row in results['rows']: #If this ID is not a key in the dict, add it if row['f'][0]['v'] not in hugedict.keys(): hugedict[row['f'][0]['v']] = [] #Regardless, append these values to that list hugedict[row['f'][0]['v']].append( [float(vals['v']) for vals in row['f'][1:] ]) for k, v in hugedict.iteritems(): hugedict[k] = np.array(v)#better/faster way? return hugedict else: #return tuple of [key, key...], [mxn raw data] ids = [] #to hold the id corresponding to each timepoint values = [] #to hold the raw data for row in results['rows']: ids.append(row['f'][0]['v']) values.append([float(vals['v']) for vals in row['f'][1:] ]) return np.array(ids), np.array(values)
def createThresholds(table_name): sensors = getSensors(table_name) thresholds = cStringIO.StringIO() for task in getTasks(table_name): #create dict of sensor: quantiles binned into 1000 bins quantiles = {sensor: queryGoogle(qQuantiles(sensor, table_name, task)) for sensor in sensors} for field in permil._fields: #permil._fields is essentially a dict of threshold names and bin value p = getattr(permil, field) #p is going to be one of (4, 19 979 994) for sensor in sensors: thresholds.write('{0},{1},{2},{3},{4}\n'.format(task, table_name, field, sensor, quantiles[sensor]['rows'][p]['f'][0]['v'])) #thresholds = cStringIO.StringIO(open('thresholds.csv').read()) #to hardcode thresholds from matlab return thresholds
def download_codebook(date): qs = "SELECT dataset, task, json FROM data.codebooks WHERE date ='{0}'".format('10-OCT-2012 1:33') data = queryGoogle(qs) cdbkL = data['rows'][0]['f'][2]['v'] cdbkR = data['rows'][1]['f'][2]['v'] cdbkL = np.array(json.loads(cdbkL)) cdbkR = np.array(json.loads(cdbkR)) dataset = str(data['rows'][0]['f'][0]['v']) task = str(data['rows'][0]['f'][1]['v'] ) return cdbkL, cdbkR, dataset, task
def getThresholds(table_name, dataset=None, task_type=None): dataset = dataset if dataset else table_name thresholds = defaultdict(lambda: defaultdict(lambda: defaultdict(float))) #data = queryTableData('data', 'thresholds') qs = "SELECT task, table_name, threshold, sensor_name, sensor_value FROM data.thresholds WHERE table_name='{0}'".format(dataset) data = queryGoogle(qs) for row in data['rows']: cells = row['f'] task = 'pegtransfer' if cells[0]['v']=='PegTx' else cells[0]['v'].lower() ttype = cells[2]['v'] sensor = cells[3]['v'] thresholds[task][sensor][ttype] = cells[4]['v'] if task_type: return thresholds.get(task_type, 'ERROR: task not found') return thresholds
def getTasks(table_name): tasks = queryGoogle("SELECT task from data.{0} GROUP BY task".format(table_name)) return [task['f'][0]['v'] for task in tasks['rows']]