Beispiel #1
0
def statsGraphs():
    dbConn = DbConnector('wurst.db')
    opens = dbConn.getStatOpen()
    dbConn.close()
    usedNum = []
    usedDate = []
    genNum = []
    genDate = []

    for date, use in sorted(opens['used'], key=lambda tup: tup[0]):
        if (len(usedNum) > 0):
            usedNum.append(usedNum[-1] + use)
        else:
            usedNum.append(use)
        usedDate.append(datetime.datetime.fromtimestamp(date))
    for date, gen in sorted(opens['generated'], key=lambda tup: tup[0]):
        if (len(genNum) > 0):
            genNum.append(genNum[-1] + gen)
        else:
            genNum.append(gen)
        genDate.append(datetime.datetime.fromtimestamp(date))
    # output to static HTML file
    root_dir = os.path.dirname(os.getcwd())

    output_file(os.path.join(root_dir, 'tmp', "lines.html"))

    # create a new plot with a title and axis labels
    p = figure(title="Wurst Down Chart",
               x_axis_label='Date',
               y_axis_label='',
               x_axis_type="datetime",
               plot_width=1000,
               plot_height=600)

    p.yaxis.major_tick_line_color = None  # turn off y-axis major ticks
    p.yaxis.minor_tick_line_color = None  # turn off y-axis minor ticks
    p.yaxis.visible = False

    # add a line renderer with legend and line thickness
    p.line(usedDate,
           usedNum,
           legend="used Wurstchers",
           line_width=2,
           color='#be1e3c')
    p.line(genDate,
           genNum,
           legend="generated Wurstchers",
           line_width=2,
           color='#66b4d3')
    p.line(usedDate,
           list(600 - np.asarray(usedNum)),
           legend="Wurst Reserve",
           line_width=2,
           color='#e16d00')
    p.legend.location = "top_left"

    # show the results
    save(p)
    time.sleep(0.1)
    return send_from_directory(os.path.join(root_dir, 'tmp'), 'lines.html')
Beispiel #2
0
def getCode():
    dbConn = DbConnector('wurst.db')
    attr = json.loads(request.data.decode())
    try:
        code = dbConn.getCode(attr['volume'], attr['method'])
        ret = {'code': code}
    except InvalidPubMethodError as e:
        ret = {'error': e.message}
    dbConn.close()
    return json.dumps(ret)
Beispiel #3
0
def methodStuff(method):
    dbConn = DbConnector('wurst.db')
    if request.method == 'PUT':
        dbConn.addPubMethod(method)
        return ''
    if request.method == 'POST':
        dbConn.enablePubMethod(method)
        return ''
    if request.method == 'DELETE':
        dbConn.blackList(method)
        return ''

    dbConn.close()
def createDbManager(args, pipeEnd):
    """
    Create a listener ready to query the database.
    
    This method is blocking and SHOULD be used in a separate process. The
    object will listen to its pipeEnd to receive orders to query the database.
    The orders SHOULD be either Markers.END to signal that the
    process can end or a tuple (value from Markers, args corresponding to
    the marker).
    
    Markers.TEXT_LIST: associated with a list of CIDs, check if they are 
    already stored in the database.
    
    Markers.TEXT: associated with a list of parsed texts, store them in the
    database.

    Parameters
    ----------
    args : tuple
        Arguments to create a LegiConnector object.
    pipeEnd : multiprocessing.connection.Connection
        This connection MUST be read/write. The easiest way to get
        such an object is to use the second return value of 
        multiprocessing.Pipe(True).
    """
    order = pipeEnd.recv()
    with DbConnector(*args) as connector:
        prepareStatements(connector)
        while order != Markers.END:
            if order[0] == Markers.TEXT_LIST:
                _checkIfKnown(connector, pipeEnd, order[1])
            elif order[0] == Markers.TEXT:
                _storeText(connector, pipeEnd, order[1])
            order = pipeEnd.recv()
Beispiel #5
0
from Spyder, it must be run in an external terminal. To do that: 
Run > Configuration per file > Execute in an external system terminal
"""
from converter import createTextProvider, createDbManager, Middleman

if __name__ == "__main__":
    from multiprocessing import Process, Pipe
    init_db = False
    runTest = True
    import secret
    from converter import SearchFilters, Markers
    if init_db:
        print("Initialising DB")
        from dbStructure import initDb
        from dbConnector import DbConnector
        initDb(DbConnector(secret.DB_NAME, secret.DB_USER, secret.DB_PW))
        print("DB initialised")
    if runTest:
        print("Setting up query process")
        legi1, legi2 = Pipe(True)
        db1, db2 = Pipe(True)
        command1, command2 = Pipe(True)
        legiProcess = Process(target=createTextProvider,
                              args=((secret.CLIENT_ID, secret.CLIENT_SECRET),
                                    legi2))
        dbProcess = Process(target=createDbManager,
                            args=((secret.DB_NAME, secret.DB_USER,
                                   secret.DB_PW), db2))
        middleProcess = Process(target=Middleman.create,
                                args=(legi1, db1, command1))
        for query in SearchFilters:
Beispiel #6
0
def statsOpen():
    dbConn = DbConnector('wurst.db')
    opens = dbConn.getStatOpen()
    dbConn.close()
    return json.dumps(opens)
Beispiel #7
0
def checkCode(code):
    dbConn = DbConnector('wurst.db')
    valid = dbConn.checkCode(code)
    dbConn.close()
    return json.dumps({'valid': valid})
Beispiel #8
0
def run():
    """Run the classification process 
	
	Raises:
	    SystemExit: Description
	"""
    legalPath = args.outputDir + "/legalModel.clf"
    labelPath = args.outputDir + "/labelModel.clf"
    scalePath = args.outputDir + "/scaleModel.clf"

    svmType = args.svmType if args.svmType is not None else os.environ[
        "CLASSIFIER_SVM_TYPE"]
    kernelType = args.kernelType if args.kernelType is not None else os.environ[
        "CLASSIFIER_KERNEL_TYPE"]
    cost = args.cost if args.cost is not None else literal_eval(
        os.environ["CLASSIFIER_COST"])
    nu = args.nu if args.nu is not None else literal_eval(
        os.environ["CLASSIFIER_NU"])
    degree = args.degree if args.degree is not None else literal_eval(
        os.environ["CLASSIFIER_DEGREE"])
    gamma = args.gamma if args.gamma is not None else literal_eval(
        os.environ["CLASSIFIER_GAMMA"])
    rValue = args.rValue if args.rValue is not None else literal_eval(
        os.environ["CLASSIFIER_R"])
    kFold = args.kFold if args.kFold is not None else literal_eval(
        os.environ["CLASSIFIER_KFOLD"])
    cacheSize = args.cacheSize if args.cacheSize is not None else literal_eval(
        os.environ["CLASSIFIER_CACHE_SIZE"])
    shrinking = args.shrinking if args.shrinking is not None else literal_eval(
        os.environ["CLASSIFIER_SHRINKING"])
    probability = args.probability if args.probability is not None else literal_eval(
        os.environ["CLASSIFIER_PROBABILITY"])
    tol = args.eps if args.eps is not None else literal_eval(
        os.environ["CLASSIFIER_EPS"])

    labelClfTrained = False
    legalClfTrained = False
    scaleModelTrained = False
    try:
        labelClf = restoreModel(labelPath, "LabelClassifier")
        labelClfTrained = True
    except Exception as e:
        logger.warning(str(e))
        logger.warning(
            "Cannot restore previous label classifier - creating new one")
        try:
            labelClf = Classifier.fromParams(svmType=svmType,
                                             kernelType=kernelType,
                                             cost=cost,
                                             nu=nu,
                                             degree=degree,
                                             gamma=gamma,
                                             rValue=rValue,
                                             kFold=kFold,
                                             cacheSize=cacheSize,
                                             shrinking=shrinking,
                                             probability=probability,
                                             tol=tol,
                                             name="LabelClassifier")
        except Exception as e:
            logger.exception(str(e))
            logger.error("Could not create label classifier instance")
            raise SystemExit(-1)

    try:
        legalClf = restoreModel(legalPath, "LegalClassifier")
        legalClfTrained = True
    except Exception as e:
        logger.warning(str(e))
        logger.warning(
            "Cannot restore previous label classifier - creating new one")
        try:
            legalClf = Classifier.fromParams(svmType=svmType,
                                             kernelType=kernelType,
                                             cost=cost,
                                             nu=nu,
                                             degree=degree,
                                             gamma=gamma,
                                             rValue=rValue,
                                             kFold=kFold,
                                             cacheSize=cacheSize,
                                             shrinking=shrinking,
                                             probability=probability,
                                             tol=tol,
                                             name="LegalClassifier")
        except Exception as e:
            logger.exception(str(e))
            logger.error("Could not creat legal classifier instance")
            raise SystemExit(-1)
    try:
        scaler = joblib.load(scalePath)
        scaleModelTrained = True
    except Exception as e:
        try:
            scaler = StandardScaler()
        except Exception as e:
            logger.exception(str(e))
            logger.error("could not create scaler - giving up")
            raise SystemExit(-1)

    db = DbConnector(dbName=os.environ["TDSE_DB_NAME"],
                     userName=os.environ["TDSE_DB_USER"],
                     host=os.environ["DB_HOST"],
                     port=os.environ["TDSE_DB_PORT"],
                     password=os.environ["TDSE_DB_PASSWORD"])

    labels, labelSession = db.getAllLabels()
    if len(labels) == 0:
        insertLabels(db)
        labels, _ = db.getAllLabels(labelSession)
    labelModelsByLabel = {}
    for label in labels:
        labelModelsByLabel[label.label] = label
    mode = args.mode
    if not mode:
        if args.datasetPath:
            mode = "train"
        elif labelClfTrained and legalClfTrained:
            mode = "apply"
        else:
            logger.error("Cannot apply empty models. Please train first")
            raise SystemExit(-1)
    else:
        if mode == "apply" and not labelClfTrained and not legalClfTrained:
            logger.error("Cannot apply empty models. Please train first")
            raise SystemExit(-1)

    language = args.language if args.language is not None else os.environ[
        "CLASSIFIER_LANGUAGE"]

    languageId = None
    if language is not None and language != "all":
        languageModel, languageSession = db.getLanguage(language)
        languageId = languageModel.languageId
        languageSession.commit()
        languageSession.close()
    dfQuantile = args.minDocFrequency if args.minDocFrequency is not None else literal_eval(
        os.environ["CLASSIFIER_MIN_DF_FREQ"])
    quantile = args.quantile if args.quantile is not None else literal_eval(
        os.environ["CLASSIFIER_QUANTILE"])
    limit = args.limit if args.limit is not None else literal_eval(
        os.environ["CLASSIFIER_LIMIT"])

    if mode == "train":
        dataset, trainingSession = db.getTrainingData(limit=limit,
                                                      quantile=quantile,
                                                      mode="bow",
                                                      dfQuantile=dfQuantile,
                                                      languageIds=tuple(
                                                          [languageId]))
        X_train = []
        Y_label = []
        Y_legal = []
        for dataEntry in dataset:
            X_train.append(dataEntry[0])
            model = dataEntry[1]
            Y_legal.append(1 if model.legal else 0)
            Y_label.append(model.primaryLabelLabelId)
        X_train = np.array(X_train)
        Y_legal = np.array(Y_legal)
        Y_label = np.array(Y_label)

        # Init scaler if not yet done
        scaler.fit(X_train)
        X_train = scaler.transform(X_train)

        # store scaler (will be needed in the application phase)
        storeModel(scalePath, scaler)

        # train both classifiers
        labelClf.train(X_train, Y_label)
        legalClf.train(X_train, Y_legal)

        storeModel(labelPath, labelClf.clf)
        storeModel(legalPath, legalClf.clf)
        trainingSession.commit()
        trainingSession.close()
    elif mode == "apply":
        if not scaleModelTrained:
            logger.error(
                "Please first run a train run - Otherwise, classification is impossible"
            )
            logger.error(
                "If you did a test run check the outputModels directory - does it contain a scaleModel.clf?"
            )
            raise ValueError("scale model has to be trained first")
        first = True
        dataset = []
        while len(dataset) >= limit or first:
            first = False
            dataset, labellingSession = db.getLabellingData(
                limit=limit,
                mode="bow",
                dfQuantile=dfQuantile,
                languageIds=tuple([languageId]))
            X_apply = []
            cleanContents = []
            for dataEntry in dataset:
                X_apply.append(dataEntry[0])
                cleanContents.append(dataEntry[1])
            X_apply = np.array(X_apply)
            X_apply = scaler.transform(X_apply)

            Y_label_r = labelClf.apply(X_apply)
            Y_legal_r = legalClf.apply(X_apply)

            for idx, cleanContent in enumerate(cleanContents):
                label = Y_label_r[idx][0]
                legal = Y_legal_r[idx][0]
                labelCertainty = Y_label_r[idx][1]
                legalCertainty = Y_legal_r[idx][1]
                cleanContent.primaryLabelLabelId = label
                cleanContent.legal = legal
                cleanContent.labelCertainty = labelCertainty
                cleanContent.legalCertainty = legalCertainty
            labellingSession.commit()
            labellingSession.close()
            # Insertion via: Insert => on conflict do update legal&label
            # => this way we get bulkupdate capabilities -- if needed
    elif mode == "insert":
        # TODO: read in specified csv file, insert those labelled entries into the db
        # likely useful to introduce upsert behaviour
        if not args.datasetPath:
            logger.error(
                "Please specify a dataset with -d if --mode insert is specified"
            )
            raise SystemExit(-1)
        with open(args.datasetPath, ) as datasetFile:
            reader = csv.DictReader(datasetFile,
                                    delimiter=";",
                                    quoting=csv.QUOTE_NONE)
            insertSession = db.Session()
            for row in reader:
                values = {}
                values["legalCertainty"] = 1.0
                values["labelCertainty"] = 1.0
                label = row["label"]
                values["legal"] = "legal" == row["legal"]
                try:
                    values["primaryLabelLabelId"] = labelModelsByLabel[
                        label].labelId
                except Exception as e:
                    logger.exception(str(e))
                    logger.error("Label: " + label)
                    raise SystemExit(-1)
                insertSession.query(db.cleanContents).\
                 filter_by(cleanContentId = row["cleanContentId"]).\
                 update(values)
            insertSession.commit()
            insertSession.close()
    labelSession.commit()
    labelSession.close()