def remove(self): db = persistence.db() c = db.cursor() c.execute('DELETE FROM CLASSIFIER_META WHERE classifier_uid=?', [self.uid]) c.execute('DELETE FROM CLASSIFIER_RESOURCE WHERE classifier_uid=?', [self.uid]) c.execute('DELETE FROM CLASSIFIER WHERE uid=?', [self.uid]) db.commit() logging.info('Removed classifier %s' % self.uid)
def save(self): db = persistence.db() c = db.cursor() if self.saved: return if Classifier.exists(self.uid): raise Exception('Classifier uid %s exists already' % self.uid) # check existence of resources for key, resource_uid in self.resources.iteritems(): if not Resource.exists(resource_uid): raise Exception( 'Required resource %s of Classifier %s does not exist locally' % (resource_uid, self.uid)) # instert into DB try: c.execute( """INSERT INTO CLASSIFIER ( uid, type, title, enabled, language, test_accuracy, training_set_size, created_on, finished_on, local_created_on, state) VALUES (?,?,?,?,?,?,?,?,?,?,?)""", [ self.uid, self.model_type, self.title, self.enabled, self.language, self.test_accuracy, self.training_set_size, self.created_on, self.finished_on, self.local_created_on, self.state ]) for key, resource_uid in self.resources.iteritems(): c.execute( """INSERT INTO CLASSIFIER_RESOURCE ( classifier_uid, key, resource_uid) VALUES (?,?,?)""", [self.uid, key, resource_uid]) for key, value in self.meta.iteritems(): c.execute( """INSERT INTO CLASSIFIER_META ( classifier_uid, key, value) VALUES (?,?,?)""", [self.uid, key, value]) db.commit() except: db.rollback() raise Exception('Failed to insert classifier %s into DB' % self.uid) self.saved = True logging.info('Inserted new classifier %s' % self.uid)
def set_enabled(self, enabled=True): db = persistence.db() c = db.cursor() self.enabled = enabled try: c.execute("UPDATE CLASSIFIER SET enabled=? WHERE uid=?", [self.enabled, self.uid]) db.commit() except: db.rollback() raise Exception('Failed to update classifier %s as enabled=%s' % (self.uid, self.enabled)) logging.info('Set classifier enabled=%s %s' % (enabled, self.uid))
def create(cls, classifier_uid): db = persistence.db() c = db.cursor() # generate uid uid = str(uuid.uuid1()) dir_name = '%s-%s' % (datetime.datetime.now().strftime('%Y%m%d%H%M%S'), uid) # create and fill object job = JobContext(uid=uid, dir_name=dir_name, created_on=str(datetime.datetime.now()), classifier_uid=classifier_uid) job.save() job.logger.info('Created job %s' % uid) return job
def remove(self): db = persistence.db() c = db.cursor() c.execute("""SELECT * FROM CLASSIFIER_RESOURCE WHERE resource_uid=?""", [self.uid]) if c.fetchone() != None: raise Exception( 'Cannot delete resource %s because of existing dependencies' % (self.uid)) try: shutil.rmtree(os.path.join(config.RESOURCES_PATH, self.path)) except: pass c.execute('DELETE FROM RESOURCE WHERE UID=?', [self.uid]) db.commit() logging.info('Removed resource %s' % self.uid)
def save(self): db = persistence.db() c = db.cursor() if JobContext.exists(self.uid): try: c.execute( """UPDATE JOB SET status=?, progress_percentage=?, progress_text=? WHERE uid=?""", [ self.status, self.progress_percentage, self.progress_text, self.uid ]) db.commit() except: db.rollback() raise Exception('Failed to update job status of %s' % self.uid) else: # instert into DB try: c.execute( """INSERT INTO JOB ( uid, dir_name, created_on, classifier_uid, status, progress_percentage, progress_text) VALUES (?,?,?,?,?,?,?)""", [ self.uid, self.dir_name, self.created_on, self.classifier_uid, self.status, self.progress_percentage, self.progress_text ]) db.commit() except: db.rollback() raise Exception('Failed to insert job %s' % self.uid)
def train(self, job, in_csv, in_desc_col, in_res_col, in_class_col): db = persistence.db() c = db.cursor() if not self.saved: self.save() if self.trained(): raise Exception('Model %s is already trained' % self.uid) # set training start logging.info('Starting to train classifier %s' % self.uid) self.state = 'Training' try: c.execute("UPDATE CLASSIFIER SET state=? WHERE uid=?", [self.state, self.uid]) db.commit() except: db.rollback() raise Exception( 'Failed to update classifier %s as training started' % self.uid) # run training # TODO # modelregistry.train(type, meta, resources, in_csv, in_text_col, in_class_col) # (...............) # set training end self.state = 'Ready' self.finished_on = str(datetime.datetime.now()) try: c.execute( "UPDATE CLASSIFIER SET finished_on=?, state=? WHERE uid=?", [self.finished_on, self.state, self.uid]) db.commit() except: db.rollback() raise Exception( 'Failed to update classifier %s as training started' % self.uid) logging.info('Finished training classifier %s' % self.uid)
def add(cls, uid, resource_type, title, created_on, local_created_on, path): db = persistence.db() c = db.cursor() # check existence c.execute('SELECT * FROM RESOURCE WHERE uid=?', [uid]) if c.fetchone() != None: raise Exception( 'Cannot add resource %s because it already exists' % uid) # create and fill object resource = Resource() resource.uid = uid resource.resource_type = resource_type resource.title = title resource.created_on = created_on resource.local_created_on = local_created_on resource.path = path # insert to DB try: c.execute( """INSERT INTO RESOURCE ( uid, type, title, created_on, local_created_on, path) VALUES (?,?,?,?,?,?)""", [ uid, resource_type, title, created_on, local_created_on, path ]) db.commit() except: raise Exception('Failed to insert resource %s into DB' % uid) # return new object logging.info('Inserted new resource %s' % uid) return resource
import tarfile import datetime import json import logging import requests from threading import Thread from werkzeug import secure_filename import logging.handlers import fnmatch from multiprocessing.dummy import Pool as ThreadPool import persistence import config import model_registry db = persistence.db() c = db.cursor() _pool = ThreadPool(config.PARALLEL_JOBS) sql = """CREATE TABLE IF NOT EXISTS CLASSIFIER ( uid TEXT PRIMARY KEY NOT NULL, type TEXT, title TEXT, enabled INT, language TEXT, test_accuracy REAL, training_set_size INT, created_on TIMESTAMP, local_created_on TIMESTAMP, finished_on TIMESTAMP, state TEXT