def save_data(): if current_data is None: raise DataError("Cannot save data; it has not yet been loaded! Please try again soon.") with lock: current_data["timestamp"] = time.time() with open("data/data.pickle", "wb") as f: pickle.dump(current_data, f)
def build_data_from_fields(ws, fields): header = ws.rows[0] header_index = {} ret = [] for key in fields.required_attributes: index = get_index_by_value(header, key) if index is False: raise DataError('Failed to obtain header value: {}'.format(key)) header_index[key] = index ws_end = len(ws.columns[0]) if not ws_end: raise DataError('Failed to find end of the ws data') # This skips the header row, as we are embedding all of the required data into dictionaries. for row in ws.rows[1:ws_end]: d = {key: row[index].value for key, index in header_index.items()} ret.append(d) return ret
def parse(self, wb): log.debug('Parsing workbook') # First, we want to grab data from the synthesis table though. root_sheet = wb.get_sheet_by_name( self.required_sheet_names.get('root_data')) synthesis_sheet = wb.get_sheet_by_name( self.required_sheet_names.get('synthesis_data')) self._process_synthesis_table(ws=synthesis_sheet) self._process_root_table(ws=root_sheet) if set(self.root_data.keys()) != set(self.synthesis_data.keys()): log.error('# Root data keys [{}]'.format(len( self.root_data.keys()))) log.error('# Syn data keys [{}]'.format( len(self.synthesis_data.keys()))) log.error(self.synthesis_data) raise DataError( 'Tube numbers from root_data does not match the tube numbers from the synthesis_data' ) log.info('Processing collected data') for tn in self.root_data: log.info('Processing data for tube [{}]'.format(tn)) tube_obj = tube.Tube(tn) raw_roots = self.root_data.get(tn) for root_obj in raw_roots: rsession = root_obj.get('Session#') if rsession > tube_obj.maxSessionCount: tube_obj.maxSessionCount = rsession log.debug('Max session count updated to {}'.format( tube_obj.maxSessionCount)) if rsession not in tube_obj.sessionDates: tube_obj.sessionDates[rsession] = root_obj.get('Date') log.debug('Inserted session {} - Date {}'.format( rsession, root_obj.get('Date'))) final_roots = [] log.info('Inserting roots into tube [{}]'.format(tn)) for root_obj in raw_roots: tube_obj.insert_or_update_root(root_obj) if root_obj.get('Session#') == tube_obj.maxSessionCount: final_roots.append(root_obj) log.info('Finalizing roots') for root_obj in final_roots: status = tube_obj.finalize_root(root_obj, root_fields=self.root_fields) if not status: log.error('Failed to finalize root {}'.format( root_obj.identity)) log.info('Inserting synthesis data') # Insert the sythesis data (containing the tip stats) into the roots. sdata = self.synthesis_data.get(tn) tube_obj.insert_synthesis_data(sdata) self.tubes.append(tube_obj)
def connection(self, collectionname, dbname=None): """Get a cursor to a collection by name. raises `DataError` on names with unallowable characters. :Parameters: - `collectionname`: the name of the collection - `dbname`: (optional) overide the default db for a connection """ if not collectionname or ".." in collectionname: raise DataError("collection names cannot be empty") if "$" in collectionname and not (collectionname.startswith("oplog.$main") or collectionname.startswith("$cmd")): raise DataError("collection names must not " "contain '$': %r" % collectionname) if collectionname.startswith(".") or collectionname.endswith("."): raise DataError("collecion names must not start " "or end with '.': %r" % collectionname) if "\x00" in collectionname: raise DataError("collection names must not contain the " "null character") return Cursor(dbname or self._pool._dbname, collectionname, self._pool)
def _process_synthesis_table(self, ws): log.info('Extracting synthesis data') synthesis_data = utility.build_data_from_fields( ws, self.synthesis_fields) for d in synthesis_data: tn = d.get('Tube#') if tn not in self.synthesis_data: self.synthesis_data[tn] = {} sd = self.synthesis_data.get(tn) root_identity = root.RootIdentity( rootname=d.get('RootName'), location=d.get('Location#'), birthsession=d.get('BirthSession')) if root_identity in sd: raise DataError( 'Duplicate root encountered in synthesis data: {}'.format( root_identity)) sd[root_identity] = d return True
def _root_from_dict(self, d): attr_map = dict(self.root_fields.required_attributes) for k, v in self.synthesis_fields.required_attributes.items(): if k in attr_map: continue attr_map[k] = v root_obj = root.Root(attr_map=attr_map, rootname=d.get('RootName'), location=d.get('Location#'), birthsession=d.get('BirthSession')) # Check for anomalous roots num_tips = d.get('NumberOfTips') tip_liv_status = d.get('TipLivStatus') if num_tips == 1: root_obj.anomaly = False if tip_liv_status.startswith('A'): root_obj.isAlive = 'A' # XXX Configurable status! elif tip_liv_status.startswith(('D', 'G')): root_obj.isAlive = 'G' else: raise DataError('Unknown tip_liv_status [{}][{}]'.format( root_obj.identity, tip_liv_status)) else: root_obj.anomaly = True root_obj.isAlive = 'A' # Set required attributes for k in self.root_fields.required_attributes: root_obj.set(k, d.get(k)) # Check to see if the current root is gone # XXX Configurable value! if root_obj.isAlive.startswith(('D', 'G')): # XXX Eww hardcorded attribute access! root_obj.DeathSession = root_obj.get('Session#') # Now we add arbitrary keys to the root. for key in self.root_fields.custom_attributes: root_obj.set(key, d.get(key)) return root_obj