def statsGraphs(): dbConn = DbConnector('wurst.db') opens = dbConn.getStatOpen() dbConn.close() usedNum = [] usedDate = [] genNum = [] genDate = [] for date, use in sorted(opens['used'], key=lambda tup: tup[0]): if (len(usedNum) > 0): usedNum.append(usedNum[-1] + use) else: usedNum.append(use) usedDate.append(datetime.datetime.fromtimestamp(date)) for date, gen in sorted(opens['generated'], key=lambda tup: tup[0]): if (len(genNum) > 0): genNum.append(genNum[-1] + gen) else: genNum.append(gen) genDate.append(datetime.datetime.fromtimestamp(date)) # output to static HTML file root_dir = os.path.dirname(os.getcwd()) output_file(os.path.join(root_dir, 'tmp', "lines.html")) # create a new plot with a title and axis labels p = figure(title="Wurst Down Chart", x_axis_label='Date', y_axis_label='', x_axis_type="datetime", plot_width=1000, plot_height=600) p.yaxis.major_tick_line_color = None # turn off y-axis major ticks p.yaxis.minor_tick_line_color = None # turn off y-axis minor ticks p.yaxis.visible = False # add a line renderer with legend and line thickness p.line(usedDate, usedNum, legend="used Wurstchers", line_width=2, color='#be1e3c') p.line(genDate, genNum, legend="generated Wurstchers", line_width=2, color='#66b4d3') p.line(usedDate, list(600 - np.asarray(usedNum)), legend="Wurst Reserve", line_width=2, color='#e16d00') p.legend.location = "top_left" # show the results save(p) time.sleep(0.1) return send_from_directory(os.path.join(root_dir, 'tmp'), 'lines.html')
def getCode(): dbConn = DbConnector('wurst.db') attr = json.loads(request.data.decode()) try: code = dbConn.getCode(attr['volume'], attr['method']) ret = {'code': code} except InvalidPubMethodError as e: ret = {'error': e.message} dbConn.close() return json.dumps(ret)
def methodStuff(method): dbConn = DbConnector('wurst.db') if request.method == 'PUT': dbConn.addPubMethod(method) return '' if request.method == 'POST': dbConn.enablePubMethod(method) return '' if request.method == 'DELETE': dbConn.blackList(method) return '' dbConn.close()
def createDbManager(args, pipeEnd): """ Create a listener ready to query the database. This method is blocking and SHOULD be used in a separate process. The object will listen to its pipeEnd to receive orders to query the database. The orders SHOULD be either Markers.END to signal that the process can end or a tuple (value from Markers, args corresponding to the marker). Markers.TEXT_LIST: associated with a list of CIDs, check if they are already stored in the database. Markers.TEXT: associated with a list of parsed texts, store them in the database. Parameters ---------- args : tuple Arguments to create a LegiConnector object. pipeEnd : multiprocessing.connection.Connection This connection MUST be read/write. The easiest way to get such an object is to use the second return value of multiprocessing.Pipe(True). """ order = pipeEnd.recv() with DbConnector(*args) as connector: prepareStatements(connector) while order != Markers.END: if order[0] == Markers.TEXT_LIST: _checkIfKnown(connector, pipeEnd, order[1]) elif order[0] == Markers.TEXT: _storeText(connector, pipeEnd, order[1]) order = pipeEnd.recv()
from Spyder, it must be run in an external terminal. To do that: Run > Configuration per file > Execute in an external system terminal """ from converter import createTextProvider, createDbManager, Middleman if __name__ == "__main__": from multiprocessing import Process, Pipe init_db = False runTest = True import secret from converter import SearchFilters, Markers if init_db: print("Initialising DB") from dbStructure import initDb from dbConnector import DbConnector initDb(DbConnector(secret.DB_NAME, secret.DB_USER, secret.DB_PW)) print("DB initialised") if runTest: print("Setting up query process") legi1, legi2 = Pipe(True) db1, db2 = Pipe(True) command1, command2 = Pipe(True) legiProcess = Process(target=createTextProvider, args=((secret.CLIENT_ID, secret.CLIENT_SECRET), legi2)) dbProcess = Process(target=createDbManager, args=((secret.DB_NAME, secret.DB_USER, secret.DB_PW), db2)) middleProcess = Process(target=Middleman.create, args=(legi1, db1, command1)) for query in SearchFilters:
def statsOpen(): dbConn = DbConnector('wurst.db') opens = dbConn.getStatOpen() dbConn.close() return json.dumps(opens)
def checkCode(code): dbConn = DbConnector('wurst.db') valid = dbConn.checkCode(code) dbConn.close() return json.dumps({'valid': valid})
def run(): """Run the classification process Raises: SystemExit: Description """ legalPath = args.outputDir + "/legalModel.clf" labelPath = args.outputDir + "/labelModel.clf" scalePath = args.outputDir + "/scaleModel.clf" svmType = args.svmType if args.svmType is not None else os.environ[ "CLASSIFIER_SVM_TYPE"] kernelType = args.kernelType if args.kernelType is not None else os.environ[ "CLASSIFIER_KERNEL_TYPE"] cost = args.cost if args.cost is not None else literal_eval( os.environ["CLASSIFIER_COST"]) nu = args.nu if args.nu is not None else literal_eval( os.environ["CLASSIFIER_NU"]) degree = args.degree if args.degree is not None else literal_eval( os.environ["CLASSIFIER_DEGREE"]) gamma = args.gamma if args.gamma is not None else literal_eval( os.environ["CLASSIFIER_GAMMA"]) rValue = args.rValue if args.rValue is not None else literal_eval( os.environ["CLASSIFIER_R"]) kFold = args.kFold if args.kFold is not None else literal_eval( os.environ["CLASSIFIER_KFOLD"]) cacheSize = args.cacheSize if args.cacheSize is not None else literal_eval( os.environ["CLASSIFIER_CACHE_SIZE"]) shrinking = args.shrinking if args.shrinking is not None else literal_eval( os.environ["CLASSIFIER_SHRINKING"]) probability = args.probability if args.probability is not None else literal_eval( os.environ["CLASSIFIER_PROBABILITY"]) tol = args.eps if args.eps is not None else literal_eval( os.environ["CLASSIFIER_EPS"]) labelClfTrained = False legalClfTrained = False scaleModelTrained = False try: labelClf = restoreModel(labelPath, "LabelClassifier") labelClfTrained = True except Exception as e: logger.warning(str(e)) logger.warning( "Cannot restore previous label classifier - creating new one") try: labelClf = Classifier.fromParams(svmType=svmType, kernelType=kernelType, cost=cost, nu=nu, degree=degree, gamma=gamma, rValue=rValue, kFold=kFold, cacheSize=cacheSize, shrinking=shrinking, probability=probability, tol=tol, name="LabelClassifier") except Exception as e: logger.exception(str(e)) logger.error("Could not create label classifier instance") raise SystemExit(-1) try: legalClf = restoreModel(legalPath, "LegalClassifier") legalClfTrained = True except Exception as e: logger.warning(str(e)) logger.warning( "Cannot restore previous label classifier - creating new one") try: legalClf = Classifier.fromParams(svmType=svmType, kernelType=kernelType, cost=cost, nu=nu, degree=degree, gamma=gamma, rValue=rValue, kFold=kFold, cacheSize=cacheSize, shrinking=shrinking, probability=probability, tol=tol, name="LegalClassifier") except Exception as e: logger.exception(str(e)) logger.error("Could not creat legal classifier instance") raise SystemExit(-1) try: scaler = joblib.load(scalePath) scaleModelTrained = True except Exception as e: try: scaler = StandardScaler() except Exception as e: logger.exception(str(e)) logger.error("could not create scaler - giving up") raise SystemExit(-1) db = DbConnector(dbName=os.environ["TDSE_DB_NAME"], userName=os.environ["TDSE_DB_USER"], host=os.environ["DB_HOST"], port=os.environ["TDSE_DB_PORT"], password=os.environ["TDSE_DB_PASSWORD"]) labels, labelSession = db.getAllLabels() if len(labels) == 0: insertLabels(db) labels, _ = db.getAllLabels(labelSession) labelModelsByLabel = {} for label in labels: labelModelsByLabel[label.label] = label mode = args.mode if not mode: if args.datasetPath: mode = "train" elif labelClfTrained and legalClfTrained: mode = "apply" else: logger.error("Cannot apply empty models. Please train first") raise SystemExit(-1) else: if mode == "apply" and not labelClfTrained and not legalClfTrained: logger.error("Cannot apply empty models. Please train first") raise SystemExit(-1) language = args.language if args.language is not None else os.environ[ "CLASSIFIER_LANGUAGE"] languageId = None if language is not None and language != "all": languageModel, languageSession = db.getLanguage(language) languageId = languageModel.languageId languageSession.commit() languageSession.close() dfQuantile = args.minDocFrequency if args.minDocFrequency is not None else literal_eval( os.environ["CLASSIFIER_MIN_DF_FREQ"]) quantile = args.quantile if args.quantile is not None else literal_eval( os.environ["CLASSIFIER_QUANTILE"]) limit = args.limit if args.limit is not None else literal_eval( os.environ["CLASSIFIER_LIMIT"]) if mode == "train": dataset, trainingSession = db.getTrainingData(limit=limit, quantile=quantile, mode="bow", dfQuantile=dfQuantile, languageIds=tuple( [languageId])) X_train = [] Y_label = [] Y_legal = [] for dataEntry in dataset: X_train.append(dataEntry[0]) model = dataEntry[1] Y_legal.append(1 if model.legal else 0) Y_label.append(model.primaryLabelLabelId) X_train = np.array(X_train) Y_legal = np.array(Y_legal) Y_label = np.array(Y_label) # Init scaler if not yet done scaler.fit(X_train) X_train = scaler.transform(X_train) # store scaler (will be needed in the application phase) storeModel(scalePath, scaler) # train both classifiers labelClf.train(X_train, Y_label) legalClf.train(X_train, Y_legal) storeModel(labelPath, labelClf.clf) storeModel(legalPath, legalClf.clf) trainingSession.commit() trainingSession.close() elif mode == "apply": if not scaleModelTrained: logger.error( "Please first run a train run - Otherwise, classification is impossible" ) logger.error( "If you did a test run check the outputModels directory - does it contain a scaleModel.clf?" ) raise ValueError("scale model has to be trained first") first = True dataset = [] while len(dataset) >= limit or first: first = False dataset, labellingSession = db.getLabellingData( limit=limit, mode="bow", dfQuantile=dfQuantile, languageIds=tuple([languageId])) X_apply = [] cleanContents = [] for dataEntry in dataset: X_apply.append(dataEntry[0]) cleanContents.append(dataEntry[1]) X_apply = np.array(X_apply) X_apply = scaler.transform(X_apply) Y_label_r = labelClf.apply(X_apply) Y_legal_r = legalClf.apply(X_apply) for idx, cleanContent in enumerate(cleanContents): label = Y_label_r[idx][0] legal = Y_legal_r[idx][0] labelCertainty = Y_label_r[idx][1] legalCertainty = Y_legal_r[idx][1] cleanContent.primaryLabelLabelId = label cleanContent.legal = legal cleanContent.labelCertainty = labelCertainty cleanContent.legalCertainty = legalCertainty labellingSession.commit() labellingSession.close() # Insertion via: Insert => on conflict do update legal&label # => this way we get bulkupdate capabilities -- if needed elif mode == "insert": # TODO: read in specified csv file, insert those labelled entries into the db # likely useful to introduce upsert behaviour if not args.datasetPath: logger.error( "Please specify a dataset with -d if --mode insert is specified" ) raise SystemExit(-1) with open(args.datasetPath, ) as datasetFile: reader = csv.DictReader(datasetFile, delimiter=";", quoting=csv.QUOTE_NONE) insertSession = db.Session() for row in reader: values = {} values["legalCertainty"] = 1.0 values["labelCertainty"] = 1.0 label = row["label"] values["legal"] = "legal" == row["legal"] try: values["primaryLabelLabelId"] = labelModelsByLabel[ label].labelId except Exception as e: logger.exception(str(e)) logger.error("Label: " + label) raise SystemExit(-1) insertSession.query(db.cleanContents).\ filter_by(cleanContentId = row["cleanContentId"]).\ update(values) insertSession.commit() insertSession.close() labelSession.commit() labelSession.close()