def _update(self): if self._alg().abort(): raise dawgie.AbortAEError() name = '.'.join([self._tn(), self._task(), self._algn()]) logging.getLogger(__name__ + '.Interface').info( "update: Acquiring for %s", name) lok = self._acquire('update: ' + name) valid = True try: for sv in self._alg().state_vectors(): for k in sv.keys(): if not self.__verify(sv[k]): logging.getLogger(__name__).critical \ ('offending item is ' + '.'.join ([self._task(), self._algn(), sv.name(), k])) valid = False continue pass pass if not valid: raise dawgie.NotValidImplementationError\ ('StateVector contains data that does not extend ' + 'dawgie.Value correctly. See log for details.') if self._tn() not in self._keys(Table.target): \ self._set (self._tn(), Table.target, None) for sv in self._alg().state_vectors(): for k in sv.keys(): runid, tn, task = self._runid(), self._tn(), self._task() algn, svn, vn = self._algn(), sv.name(), k vname = self.__to_key(runid, tn, task, algn, svn, vn) isnew = self._set(vname, Table.primary, sv[k]) self._bot().new_values((vname, isnew)) pass pass finally: logging.getLogger(__name__ + '.Interface').info( "update: Releaseing for %s", name) self._release(lok) pass return
def _update_msv(self, msv): # pylint: disable=too-many-locals,too-many-statements if self._alg().abort(): raise dawgie.AbortAEError() log.info("in Interface update process metrics") conn = dawgie.db.post._conn() cur = dawgie.db.post._cur(conn) primes = [] valid = True for vn, val in msv.items(): result = dawgie.db.util.move(*dawgie.db.util.encode(val))[0] # Put result in primary. Make sure to get task_ID and other # primary keys from their respective tables # get the target ID tn_ID = self.__tn_id(conn, cur) # Get task id that matches task name cur.execute('SELECT * from TASK WHERE name = %s;', [self._task()]) task_ID = _fetchone(cur, 'Dataset update: Could not find task ID') # Get Alg id cur.execute( 'SELECT * from Algorithm WHERE name = %s and ' + 'task_ID = %s and ' + 'design = %s and bugfix = %s and implementation = %s;', (self._alg().name(), task_ID, self._alg().design(), self._alg().bugfix(), self._alg().implementation())) alg_ID = _fetchone( cur, 'Dataset update: Could not find ' + 'algorithm ID') # Get state vector id that matches msv name args = ('SELECT * from StateVector WHERE name = %s and ' + 'alg_ID = %s and ' + 'design = %s and bugfix = %s and implementation = %s;', (msv.name(), alg_ID, msv.design(), msv.bugfix(), msv.implementation())) cur.execute(*args) sv_ID = cur.fetchone() if sv_ID is None: try: cur.execute( 'INSERT into StateVector ' + '(name, alg_ID, design, bugfix, ' + 'implementation) values (%s, %s, %s, %s, %s);', (msv.name(), alg_ID, msv.design(), msv.bugfix(), msv.implementation())) conn.commit() except psycopg2.IntegrityError: conn.rollback() cur.execute(*args) sv_ID = cur.fetchone() pass # Get the value id that matches the value if not self.__verify(val): log.critical('offending item is ' + '.'.join( [self._task(), self._alg().name(), msv.name(), vn])) valid = False continue args = ('SELECT * from Value where name = %s and ' + 'sv_ID = %s and ' + 'design = %s and bugfix = %s and implementation = %s;', (vn, sv_ID[0], val.design(), val.bugfix(), val.implementation())) cur.execute(*args) val_ID = cur.fetchone() if val_ID is None: try: cur.execute( 'INSERT into Value (name, sv_id, design, ' + 'bugfix, implementation) values ' + '(%s, %s, %s, %s, %s);', (vn, sv_ID[0], val.design(), val.bugfix(), val.implementation())) conn.commit() except psycopg2.IntegrityError: conn.rollback() cur.execute(*args) val_ID = cur.fetchone() pass primes.append(('INSERT into Prime (run_ID, task_ID, tn_ID, ' + 'alg_ID, sv_ID, val_ID, blob_name) values ' + '(%s, %s, %s, %s, %s, %s, %s);', (self._runid(), task_ID, tn_ID, alg_ID, sv_ID[0], val_ID[0], result))) pass if not valid: raise dawgie.NotValidImplementationError\ ('MetricStateVector contains data that does not extend ' + 'dawgie.Value correctly. See log for details.') while primes: cur = dawgie.db.post._cur(conn) try: for args in primes: cur.execute(*args) conn.commit() primes.clear() except psycopg2.IntegrityError: conn.rollback() pass cur.close() conn.close() return
def _load(self, algref=None, err=True, ver=None): # Load state vectors with data from db into algorithm # Take highest run number row from primary table for that task, # algorithm, state vector if current run does not exist in # Primary table. # pylint: disable=too-many-locals,too-many-statements if self._alg().abort(): raise dawgie.AbortAEError() log.info("In Interface load") conn = dawgie.db.post._conn() cur = dawgie.db.post._cur(conn) if algref: ft = dawgie.Factories.resolve(algref) tn = self._tn() if ft == dawgie.Factories.analysis: args = (dawgie.util.task_name(algref.factory), self._bot()._ps_hint(), self._bot()._runid()) tn = '__all__' elif ft == dawgie.Factories.regress: args = (dawgie.util.task_name(algref.factory), self._bot()._ps_hint(), self._tn()) elif ft == dawgie.Factories.task: args = (dawgie.util.task_name(algref.factory), self._bot()._ps_hint(), self._bot()._runid(), self._tn()) else: raise KeyError('Unknown factory type {}'.format( algref.factory.__name__)) child = connect(algref.impl, algref.factory(*args), tn) child.load(err=err, ver=ver) else: # get the target tn_ID = self.__tn_id(conn, cur) # Get task id that matches task name cur.execute('SELECT * from TASK WHERE name = %s;', [self._task()]) task_ID = _fetchone(cur, 'Dataset load: Could not find task ID') cur.execute( 'SELECT pk FROM Algorithm WHERE name = %s AND ' + 'task_ID = %s;', [self._alg().name(), task_ID]) alg_ID = list(set([pk[0] for pk in cur.fetchall()])) msv = dawgie.util.MetricStateVector( dawgie.METRIC(-1, -1, -1, -1, -1, -1), dawgie.METRIC(-1, -1, -1, -1, -1, -1)) for sv in self._alg().state_vectors() + [msv]: cur.execute( 'SELECT pk FROM StateVector WHERE name = %s AND ' + 'alg_ID = ANY(%s);', [sv.name(), alg_ID]) sv_ID = list(set([pk[0] for pk in cur.fetchall()])) cur.execute( 'SELECT run_ID FROM Prime WHERE tn_ID = %s AND ' + 'task_ID = %s AND alg_ID = ANY(%s) AND ' + 'sv_ID = ANY(%s);', [tn_ID, task_ID, alg_ID, sv_ID]) run_ID = set([pk[0] for pk in cur.fetchall()]) if not run_ID: log.info('Dataset load: Could not find any runs that ' + 'match given the algorithm and state vector') continue else: run_ID = self._runid() if self._runid() in run_ID \ else max(run_ID) cur.execute( 'SELECT alg_ID,sv_ID FROM Prime WHERE ' + 'run_ID = %s AND tn_ID = %s AND task_ID = %s ' + ' AND alg_ID = ANY(%s) and sv_ID = ANY(%s);', [run_ID, tn_ID, task_ID, alg_ID, sv_ID]) narrowed = set(cur.fetchall()) if len(narrowed) != 1: log.critical( 'Dataset load: The postgres db is corrupt ' + 'because found %d IDs', len(narrowed)) pass na_ID, nsv_ID = narrowed.pop() self.__fill(cur, sv, na_ID, run_ID, task_ID, tn_ID, nsv_ID) cur.execute( 'SELECT design,implementation,bugfix ' + 'FROM Algorithm WHERE pk = %s;', [na_ID]) av = cur.fetchone() cur.execute( 'SELECT design,implementation,bugfix ' + 'FROM StateVector WHERE pk= %s;', [nsv_ID]) svv = cur.fetchone() self._alg()._version_seal_ = dawgie.VERSION(*av) sv._version_seal_ = dawgie.VERSION(*svv) pass self.msv = msv pass conn.commit() cur.close() conn.close() return
def _die(signum): if signum == signal.SIGABRT: raise dawgie.AbortAEError('OS signaled an abort') return
def _load(self, algref=None, err=True, ver=None, lok=None): # pylint: disable=too-many-branches,too-many-nested-blocks,too-many-locals,too-many-statements if self._alg().abort(): raise dawgie.AbortAEError() parent = False if lok is None: name = '.'.join([self._tn(), self._task(), self._algn()]) parent = True logging.getLogger(__name__ + '.Interface').info( "load: Acquiring for %s", name) lok = self._acquire('load: ' + name) pass try: if algref: ft = dawgie.Factories.resolve(algref) tn = self._tn() if ft == dawgie.Factories.analysis: args = (dawgie.util.task_name(algref.factory), self._bot()._ps_hint(), self._bot()._runid()) tn = '__all__' elif ft == dawgie.Factories.regress: args = (dawgie.util.task_name(algref.factory), self._bot()._ps_hint(), tn) elif ft == dawgie.Factories.task: args = (dawgie.util.task_name(algref.factory), self._bot()._ps_hint(), self._bot()._runid(), tn) else: raise KeyError('Unknown factory type {}'.format( algref.factory.__name__)) child = connect(algref.impl, algref.factory(*args), tn) child._load(err=err, ver=ver, lok=lok) pass else: msv = dawgie.util.MetricStateVector( dawgie.METRIC(-1, -1, -1, -1, -1, -1), dawgie.METRIC(-1, -1, -1, -1, -1, -1)) for sv in self._alg().state_vectors() + [msv]: base = self.__to_key(self._bot()._runid(), self._tn(), self._task(), self._algn(), sv.name(), None) + '.' if not [ k for k in filter( lambda n, base=base: n.startswith(base), self._keys(Table.primary)) ]: base = self.__to_key(None, self._tn(), self._task(), self._algn(), sv.name(), None) + '.' prev = -1 for k in self._keys(Table.primary): runid = int(k.split('.')[0]) if '.'.join(k.split('.')[1:]).startswith(base): prev = max(prev, runid) pass pass pass else: prev = self._bot()._runid() base = self.__to_key(prev, self._tn(), self._task(), self._algn(), sv.name(), None) + '.' for k in filter(lambda n, b=base: n.startswith(b), self._keys(Table.primary)): sv[k.split('.')[-1]] = self._get(k, Table.primary) pass pass self.msv = msv pass finally: if parent: logging.getLogger(__name__ + '.Interface').info( "load: Releaseing for %s", name) self._release(lok) pass pass return