Ejemplo n.º 1
0
def processAsk():
    """
    Gets a prediction from the model.

    Required params:\n
    -action = 'ask'\n
    -stoken\n
    -datatype => text or blob\n
    -data => raw text or base64 blob\n
    -tags => list of tags\n
    :return: The predicted sentiment, related tags and the original submitted text and 201
    """
    request.get_data()
    if request.json is None:
        return jsonify({
            'status': {
                'error':
                True,
                'message':
                "Incorrect header type. Header type should be application/json."
            },
            'data': None
        }), 400

    data = request.json
    noMissingData, data = getRequiredParameters(request,
                                                stoken='',
                                                action='',
                                                datatype='',
                                                data='')

    if not noMissingData:
        return jsonify({
            'status': {
                'message': 'Missing required parameter \'' + data + '\'',
                'error': True
            },
            'data': None
        }), 400

    con = Conn()
    userID = con.checkToken(data['stoken'])

    if userID == -1:
        return jsonify(INVALID_SESSION_TOKEN), 403

    if data['action'] != "ask":
        return jsonify({
            'status': {
                'message': 'Wrong action. This endpoint is only for teach.',
                'error': True
            },
            'data': None
        }), 403

    if data['datatype'] != 'text' and data['datatype'] != 'blob':
        return jsonify({
            'status': {
                'message':
                'Wrong datatype. This endpoint only accepts raw text as \'text\' and pdf or txt files as \'blob\'.',
                'error': True
            },
            'data': None
        }), 403

    if data['datatype'] == 'text':
        txtdata = str(request.json['data'])

        # remove escape characters, if any
        txtdata = txtdata.replace("\\", "")

        if con.getModelInfo(str(userID)) == [None, None]:
            return jsonify({
                'status': {
                    'message': "No model created yet.",
                    'error': True
                },
                'data': None
            }), 400

        modelID, modelName = con.getModelInfo(str(userID))

        availTraining = con.getAvailableTraining(modelID)

        ifPrevKnowSimilar = False
        for eachTraining in availTraining:
            if getCosineSimilarity(
                    txtdata,
                    eachTraining[1]) >= PREVIOUS_KNOWLEDGE_SIMILARITY_RATE:
                ifPrevKnowSimilar = True
                break

        if not ifPrevKnowSimilar:
            return jsonify({
                'status': RETURN_SUCCESS_STATUS,
                'data': {
                    'predictedSentiment': "Not available",
                    'suggested': "Not available",
                    'text': txtdata
                }
            }), 200

        model = Classifier(makeNewModel=False, modelName=modelName)

        predSentiment, retTags = model.predict(txtdata)

        return jsonify({
            'status': RETURN_SUCCESS_STATUS,
            'data': {
                'predictedSentiment': predSentiment,
                'suggested': retTags,
                'text': txtdata
            }
        }), 200
    elif data['datatype'] == 'blob':
        if type(data['data']) != list and type(data['data']) != str:
            return jsonify({
                'status': {
                    'error':
                    True,
                    'message':
                    'Data in the \'data\' field should be sent as a string or a list of items.'
                },
                'data': None
            }), 400

        if type(data['data']) == list:  # if multiple files
            modelID, modelName = con.getModelInfo(str(userID))
            model = Classifier(makeNewModel=False, modelName=modelName)

            predSentiment = list()
            retTags = list()
            for item in data['data']:
                filename = str(uuid.uuid4())  # generate temp file name
                status, filepath = getFileBase64(item, filename)

                # in case file received is not a text-based (.pdf/.txt) file
                if not filepath.endswith('pdf') and not filepath.endswith(
                        'txt'):
                    os.remove(filepath)
                    return jsonify({
                        'status': {
                            'error': True,
                            'message': "Wrong format for data."
                        },
                        'data': None
                    }), 400

                if not status:
                    return jsonify({
                        'status': {
                            'error': False,
                            'message': ERRORFILEEXTEN
                        }
                    }), 400

                textData = readTextFileContents(filepath)
                sent, tag = model.predict(textData)
                predSentiment.append(sent)
                retTags.append(tag)
                os.remove(filepath)

            return jsonify({
                'status': RETURN_SUCCESS_STATUS,
                'data': {
                    'predictedSentiment': predSentiment,
                    'suggested': retTags,
                    'text': data['data']
                }
            }), 200
        elif type(data['data']) == str:  # if single file
            modelID, modelName = con.getModelInfo(str(userID))
            model = Classifier(makeNewModel=False, modelName=modelName)

            filename = str(uuid.uuid4())  # generate temp file name
            status, filepath = getFileBase64(data['data'], filename)

            # in case file received is not a text-based (.pdf/.txt) file
            if not filepath.endswith('pdf') and not filepath.endswith('txt'):
                os.remove(filepath)
                return jsonify({
                    'status': {
                        'error': True,
                        'message': "Wrong format for data."
                    },
                    'data': None
                }), 400

            textData = readTextFileContents(filepath)
            os.remove(filepath)

            predSentiment, retTags = model.predict(textData)

            return jsonify({
                'status': RETURN_SUCCESS_STATUS,
                'data': {
                    'predictedSentiment': predSentiment,
                    'suggested': retTags,
                    'text': data['text']
                }
            }), 200
Ejemplo n.º 2
0
def processTeach():
    """
    Teaches the model with new knowledge.

    Required params:\n
    -action = 'teach'\n
    -stoken\n
    -datatype => text or blob\n
    -data => raw text or base64 blob\n
    -sentiment => 'positive', 'negative' or 'neutral'\n
    -tags => list of tags\n
    :return: A success message and 201
    """
    request.get_data()
    if request.json is None:
        return jsonify({
            'status': {
                'error':
                True,
                'message':
                "Incorrect header type. Header type should be application/json."
            },
            'data': None
        }), 400

    data = request.json
    """noMissingData, data = getRequiredParameters(request, stoken='', action='', datatype='', data='', sentiment='', tags='')

    if not noMissingData:
        return jsonify(
            {
                'status':
                    {'message': 'Missing required parameter \'' + data + '\'',
                     'error': True},
                'data': None
            }), 400"""

    con = Conn()
    userID = con.checkToken(data['stoken'])

    if userID == -1:
        return jsonify(INVALID_SESSION_TOKEN), 403

    if data['action'] != "teach":
        return jsonify({
            'status': {
                'message': 'Wrong action. This endpoint is only for teach.',
                'error': True
            },
            'data': None
        }), 403

    if data['datatype'] != 'text' and data['datatype'] != 'blob':
        return jsonify({
            'status': {
                'message':
                'Wrong datatype. This endpoint only accepts raw text as \'text\' and pdf or txt files as \'blob\'.',
                'error': True
            },
            'data': None
        }), 403

    if data['datatype'] == 'text':
        txtdata = str(data['data'])
        tags = data['tags']
        sentiment = data['sentiment']

        # remove escape characters, if any
        if type(txtdata) is str:
            txtdata = txtdata.replace("\\", "")

        if sentiment not in _labelAll:
            return jsonify({
                'status': {
                    'message': "Error in sentiments.",
                    'error': True
                },
                'data': None
            }), 400

        if con.getModelInfo(str(userID)) == [None, None]:
            return jsonify({
                'status': {
                    'message': "No model created yet.",
                    'error': True
                },
                'data': None
            }), 400

        modelID, modelName = con.getModelInfo(str(userID))
        model = Classifier(makeNewModel=False, modelName=modelName)
        processedText = model.train(text=txtdata,
                                    sentiment=data['sentiment'],
                                    tags=tags,
                                    returnProcessedText=True)

        con.addNewTraining(modelID=modelID,
                           tags=tags,
                           sentiment=data['sentiment'],
                           processedText=processedText,
                           rawText=txtdata)

        return jsonify({'status': RETURN_SUCCESS_STATUS, 'data': None}), 201
    elif data['datatype'] == 'blob':
        if type(data['data']) != list and type(data['data']) != str:
            return jsonify({
                'status': {
                    'error':
                    True,
                    'message':
                    'Data in the \'data\' field should be sent as a string or a list of items.'
                },
                'data': None
            }), 400

        if type(data['data']) == list:  # if multiple files
            status, files = list(), list()
            for item in data['data']:
                filename = str(uuid.uuid4())  # generate temp file name
                # retval is the filename when iferr = False or the error message when iferr = True
                iferr, retval = getFileBase64(item, filename, userID)

                # in case file received is not a text-based (.pdf/.txt) file
                if not retval.endswith('pdf') and not retval.endswith('txt'):
                    return jsonify({
                        'status': {
                            'error': True,
                            'message': "Wrong format for data."
                        },
                        'data': None
                    }), 400

                status.append(iferr)
                files.append(retval)

            if False in status:  # if any error during converting the file
                return jsonify(
                    {'status': {
                        'error': False,
                        'message': ERRORFILEEXTEN
                    }}), 400

            modelID, modelName = con.getModelInfo(str(userID))
            model = Classifier(makeNewModel=False, modelName=modelName)

            tags = data['tags']

            # get text for each file
            for eachFile in files:
                txtdata = readTextFileContents(eachFile, return_metadata=False)
                os.remove(eachFile)  # remove temp file
                processedText = model.train(text=txtdata,
                                            sentiment=data['sentiment'],
                                            tags=tags,
                                            returnProcessedText=True)
                con.addNewTraining(modelID=modelID,
                                   tags=tags,
                                   sentiment=data['sentiment'],
                                   processedText=processedText,
                                   rawText=txtdata)

            return jsonify({
                'status': RETURN_SUCCESS_STATUS,
                'data': None
            }), 201
        elif type(data['data']) == str:  # if single file
            filename = str(uuid.uuid4())  # generate temp file name
            status, filepath = getFileBase64(request.json['data'], filename,
                                             userID)

            # in case file received is not a text-based (.pdf/.txt) file
            if not filepath.endswith('pdf') and not filepath.endswith('txt'):
                return jsonify({
                    'status': {
                        'error': True,
                        'message': "Wrong format for data."
                    },
                    'data': None
                }), 400

            if not status:
                return jsonify({
                    'status': {
                        'error': True,
                        'message': ERRORFILEEXTEN
                    },
                    'data': None
                }), 400

            modelID, modelName = con.getModelInfo(str(userID))
            model = Classifier(makeNewModel=False, modelName=modelName)

            tags = data['tags']

            txtdata = readTextFileContents(filepath, return_metadata=False)
            os.remove(filepath)  # remove temp file
            processedText = model.train(text=txtdata,
                                        sentiment=data['sentiment'],
                                        tags=tags,
                                        returnProcessedText=True)
            con.addNewTraining(modelID=modelID,
                               tags=tags,
                               sentiment=data['sentiment'],
                               processedText=processedText,
                               rawText=txtdata)
            return jsonify({
                'status': RETURN_SUCCESS_STATUS,
                'data': None
            }), 200
Ejemplo n.º 3
0
def getTrainingData():
    """
    Gets all the available training data.

    Required params:\n
    -action = 'getData'\n
    -stoken\n

    :return: Returns the training data for that user and 200.
    """
    request.get_data()
    if request.json is None:
        return jsonify({
            'status': {
                'error':
                True,
                'message':
                "Incorrect header type. Header type should be application/json."
            },
            'data': None
        }), 400

    chkMissingData, data = getRequiredParameters(request, stoken='', action='')

    if not chkMissingData:
        return jsonify({
            'status': {
                'message': 'Missing required parameter \'' + data + '\'',
                'error': True
            },
            'data': None
        }), 400

    con = Conn()
    userID = con.checkToken(data["stoken"])

    if userID == -1:
        return jsonify(INVALID_SESSION_TOKEN), 403

    if data['action'] != 'getData':
        return jsonify({
            'status': {
                'message':
                'Wrong action. This endpoint is only for retrieving training data.',
                'error': True
            },
            'data': None
        }), 403

    modelID, modelName = con.getModelInfo(str(userID))

    availableTraining = con.getAvailableTraining(modelID)

    formattedTraining = list()
    for item in availableTraining:
        trainingDict = dict()
        trainingDict["id"] = item[0]
        trainingDict["rawText"] = item[1]
        trainingDict["sentiment"] = item[3]
        trainingDict["tags"] = item[4]
        trainingDict["dateTrained"] = item[5]
        formattedTraining.append(trainingDict)

    return jsonify({
        'status': RETURN_SUCCESS_STATUS,
        'data': {
            'availableTraining': formattedTraining
        }
    }), 200
Ejemplo n.º 4
0
def getOCRtext():
    """
    Gets extracted text for an image.

    Required params:\n
    -action = 'ocr'\n
    -stoken\n
    -datatype = blob\n
    -data => list (if multiple) or string (if single)\n

    :return: Returns the extracted text and 200.
    """
    request.get_data()
    if request.json is None:
        return jsonify({
            'status': {
                'error':
                True,
                'message':
                "Incorrect header type. Header type should be application/json."
            },
            'data': None
        }), 400

    data = request.json
    noMissingData, data = getRequiredParameters(request,
                                                stoken='',
                                                datatype='',
                                                data='',
                                                action='')

    if not noMissingData:
        return jsonify({
            'status': {
                'message': 'Missing required parameter \'' + data + '\'',
                'error': True
            },
            'data': None
        }), 400

    con = Conn()
    userID = con.checkToken(data['stoken'])

    if userID == -1:
        return jsonify(INVALID_SESSION_TOKEN), 403

    if data['action'] != 'ocr':
        return jsonify({
            'status': {
                'message':
                'Wrong action. This endpoint is only for getting OCR text.',
                'error': True
            },
            'data': None
        }), 403

    if data['datatype'] != 'blob':
        return jsonify({
            'status': {
                'message': 'Wrong request. This endpoint is only for blobs.',
                'error': True
            },
            'data': None
        }), 403

    if type(data['data']) != list and type(data['data']) != str:
        return jsonify({
            'status': {
                'message':
                'Wrong data type for \'data\' field. Accepted types are str or list.',
                'error': True
            },
            'data': None
        }), 400

    if type(data['data']) == list:  # if multiple files
        status, files = [], []
        for item in data['data']:
            filename = str(uuid.uuid4())  # generate temp file name
            iferr, retval = getFileBase64(item, filename, userID)
            status.append(iferr)
            files.append(retval)

        if False in status:  # if any error during converting the file
            return jsonify(
                {'status': {
                    'error': False,
                    'message': ERRORFILEEXTEN
                }}), 400

        ocrtext = []
        for eachFile in files:
            ocrtext.append(getOcr(eachFile))
            os.remove(eachFile)

        return jsonify({
            'status': RETURN_SUCCESS_STATUS,
            'data': {
                'ocrtext': ocrtext,
                'type': 'list'
            }
        }), 200

    elif type(data['data']) == str:  # if single file
        filename = str(uuid.uuid4())  # generate temp file name
        status, filepath = getFileBase64(request.json['data'], filename,
                                         userID)

        if not status:
            # if error during converting the file
            return jsonify(
                {'status': {
                    'error': False,
                    'message': ERRORFILEEXTEN
                }}), 400

        ocrtext = getOcr(filepath)
        os.remove(filepath)  # remove temp file
        return jsonify({
            'status': RETURN_SUCCESS_STATUS,
            'data': {
                'ocrtext': ocrtext,
                'type': 'str'
            }
        }), 200
Ejemplo n.º 5
0
def retrain():
    """
    Used to retrain the model.

    Required params:\n
    -action = 'retrain'\n
    -stoken => the session token \n
    -data => a list of trainingID that will be deleted\n

    :return: A success message and 200.
    """
    request.get_data()
    if request.json is None:
        return jsonify({
            'status': {
                'error':
                True,
                'message':
                "Incorrect header type. Header type should be application/json."
            },
            'data': None
        }), 400

    chkMissingData, data = getRequiredParameters(request,
                                                 stoken='',
                                                 action='',
                                                 data='')

    if not chkMissingData:
        return jsonify({
            'status': {
                'message': 'Missing required parameter \'' + data + '\'',
                'error': True
            },
            'data': None
        }), 400

    con = Conn()
    userID = con.checkToken(data["stoken"])

    if userID == -1:
        return jsonify(INVALID_SESSION_TOKEN), 403

    if data['action'] != 'retrain':
        return jsonify({
            'status': {
                'message':
                'Wrong action. This endpoint is only for retraining.',
                'error': True
            },
            'data': None
        }), 403

    if type(data['data']) != list:
        return jsonify({
            'status': {
                'message':
                'Bad data. Data in the \'data\' field must be sent as list.',
                'error': True
            },
            'data': None
        }), 400

    # get old model info
    oldModelID, oldModelName = con.getModelInfo(str(userID))

    if not con.removeTraining(oldModelID, data['data']):
        return jsonify({
            'status': {
                'message': 'Internal server encountered. Please try again.',
                'error': True
            },
            'data': None
        }), 500

    availableTraining = con.getAvailableTraining(oldModelID)

    # make a new model
    model = Classifier(makeNewModel=True)
    for eachData in availableTraining:
        model.train(text=eachData[2],
                    sentiment=eachData[3],
                    tags=eachData[4],
                    fromDB=True)

    # update db with new model info
    if not con.updateModelInfo(oldModelID, modelName=model.modelName):
        return jsonify({
            'status': {
                'message': 'Internal server error when updating the resource.',
                'error': True
            },
            'data': None
        }), 500

    if not oldModelName.endswith(PICKLE_FILE_EXTENSION):
        oldModelName += PICKLE_FILE_EXTENSION

    os.remove(os.path.join(os.path.dirname(__file__),
                           'Models/' + oldModelName))

    return jsonify({
        'status': {
            'message': 'Model successfully retrained.',
            'error': False
        },
        'data': None
    }), 200