def getTrainingData(): """ Gets all the available training data. Required params:\n -action = 'getData'\n -stoken\n :return: Returns the training data for that user and 200. """ request.get_data() if request.json is None: return jsonify({ 'status': { 'error': True, 'message': "Incorrect header type. Header type should be application/json." }, 'data': None }), 400 chkMissingData, data = getRequiredParameters(request, stoken='', action='') if not chkMissingData: return jsonify({ 'status': { 'message': 'Missing required parameter \'' + data + '\'', 'error': True }, 'data': None }), 400 con = Conn() userID = con.checkToken(data["stoken"]) if userID == -1: return jsonify(INVALID_SESSION_TOKEN), 403 if data['action'] != 'getData': return jsonify({ 'status': { 'message': 'Wrong action. This endpoint is only for retrieving training data.', 'error': True }, 'data': None }), 403 modelID, modelName = con.getModelInfo(str(userID)) availableTraining = con.getAvailableTraining(modelID) formattedTraining = list() for item in availableTraining: trainingDict = dict() trainingDict["id"] = item[0] trainingDict["rawText"] = item[1] trainingDict["sentiment"] = item[3] trainingDict["tags"] = item[4] trainingDict["dateTrained"] = item[5] formattedTraining.append(trainingDict) return jsonify({ 'status': RETURN_SUCCESS_STATUS, 'data': { 'availableTraining': formattedTraining } }), 200
def processAsk(): """ Gets a prediction from the model. Required params:\n -action = 'ask'\n -stoken\n -datatype => text or blob\n -data => raw text or base64 blob\n -tags => list of tags\n :return: The predicted sentiment, related tags and the original submitted text and 201 """ request.get_data() if request.json is None: return jsonify({ 'status': { 'error': True, 'message': "Incorrect header type. Header type should be application/json." }, 'data': None }), 400 data = request.json noMissingData, data = getRequiredParameters(request, stoken='', action='', datatype='', data='') if not noMissingData: return jsonify({ 'status': { 'message': 'Missing required parameter \'' + data + '\'', 'error': True }, 'data': None }), 400 con = Conn() userID = con.checkToken(data['stoken']) if userID == -1: return jsonify(INVALID_SESSION_TOKEN), 403 if data['action'] != "ask": return jsonify({ 'status': { 'message': 'Wrong action. This endpoint is only for teach.', 'error': True }, 'data': None }), 403 if data['datatype'] != 'text' and data['datatype'] != 'blob': return jsonify({ 'status': { 'message': 'Wrong datatype. This endpoint only accepts raw text as \'text\' and pdf or txt files as \'blob\'.', 'error': True }, 'data': None }), 403 if data['datatype'] == 'text': txtdata = str(request.json['data']) # remove escape characters, if any txtdata = txtdata.replace("\\", "") if con.getModelInfo(str(userID)) == [None, None]: return jsonify({ 'status': { 'message': "No model created yet.", 'error': True }, 'data': None }), 400 modelID, modelName = con.getModelInfo(str(userID)) availTraining = con.getAvailableTraining(modelID) ifPrevKnowSimilar = False for eachTraining in availTraining: if getCosineSimilarity( txtdata, eachTraining[1]) >= PREVIOUS_KNOWLEDGE_SIMILARITY_RATE: ifPrevKnowSimilar = True break if not ifPrevKnowSimilar: return jsonify({ 'status': RETURN_SUCCESS_STATUS, 'data': { 'predictedSentiment': "Not available", 'suggested': "Not available", 'text': txtdata } }), 200 model = Classifier(makeNewModel=False, modelName=modelName) predSentiment, retTags = model.predict(txtdata) return jsonify({ 'status': RETURN_SUCCESS_STATUS, 'data': { 'predictedSentiment': predSentiment, 'suggested': retTags, 'text': txtdata } }), 200 elif data['datatype'] == 'blob': if type(data['data']) != list and type(data['data']) != str: return jsonify({ 'status': { 'error': True, 'message': 'Data in the \'data\' field should be sent as a string or a list of items.' }, 'data': None }), 400 if type(data['data']) == list: # if multiple files modelID, modelName = con.getModelInfo(str(userID)) model = Classifier(makeNewModel=False, modelName=modelName) predSentiment = list() retTags = list() for item in data['data']: filename = str(uuid.uuid4()) # generate temp file name status, filepath = getFileBase64(item, filename) # in case file received is not a text-based (.pdf/.txt) file if not filepath.endswith('pdf') and not filepath.endswith( 'txt'): os.remove(filepath) return jsonify({ 'status': { 'error': True, 'message': "Wrong format for data." }, 'data': None }), 400 if not status: return jsonify({ 'status': { 'error': False, 'message': ERRORFILEEXTEN } }), 400 textData = readTextFileContents(filepath) sent, tag = model.predict(textData) predSentiment.append(sent) retTags.append(tag) os.remove(filepath) return jsonify({ 'status': RETURN_SUCCESS_STATUS, 'data': { 'predictedSentiment': predSentiment, 'suggested': retTags, 'text': data['data'] } }), 200 elif type(data['data']) == str: # if single file modelID, modelName = con.getModelInfo(str(userID)) model = Classifier(makeNewModel=False, modelName=modelName) filename = str(uuid.uuid4()) # generate temp file name status, filepath = getFileBase64(data['data'], filename) # in case file received is not a text-based (.pdf/.txt) file if not filepath.endswith('pdf') and not filepath.endswith('txt'): os.remove(filepath) return jsonify({ 'status': { 'error': True, 'message': "Wrong format for data." }, 'data': None }), 400 textData = readTextFileContents(filepath) os.remove(filepath) predSentiment, retTags = model.predict(textData) return jsonify({ 'status': RETURN_SUCCESS_STATUS, 'data': { 'predictedSentiment': predSentiment, 'suggested': retTags, 'text': data['text'] } }), 200
def retrain(): """ Used to retrain the model. Required params:\n -action = 'retrain'\n -stoken => the session token \n -data => a list of trainingID that will be deleted\n :return: A success message and 200. """ request.get_data() if request.json is None: return jsonify({ 'status': { 'error': True, 'message': "Incorrect header type. Header type should be application/json." }, 'data': None }), 400 chkMissingData, data = getRequiredParameters(request, stoken='', action='', data='') if not chkMissingData: return jsonify({ 'status': { 'message': 'Missing required parameter \'' + data + '\'', 'error': True }, 'data': None }), 400 con = Conn() userID = con.checkToken(data["stoken"]) if userID == -1: return jsonify(INVALID_SESSION_TOKEN), 403 if data['action'] != 'retrain': return jsonify({ 'status': { 'message': 'Wrong action. This endpoint is only for retraining.', 'error': True }, 'data': None }), 403 if type(data['data']) != list: return jsonify({ 'status': { 'message': 'Bad data. Data in the \'data\' field must be sent as list.', 'error': True }, 'data': None }), 400 # get old model info oldModelID, oldModelName = con.getModelInfo(str(userID)) if not con.removeTraining(oldModelID, data['data']): return jsonify({ 'status': { 'message': 'Internal server encountered. Please try again.', 'error': True }, 'data': None }), 500 availableTraining = con.getAvailableTraining(oldModelID) # make a new model model = Classifier(makeNewModel=True) for eachData in availableTraining: model.train(text=eachData[2], sentiment=eachData[3], tags=eachData[4], fromDB=True) # update db with new model info if not con.updateModelInfo(oldModelID, modelName=model.modelName): return jsonify({ 'status': { 'message': 'Internal server error when updating the resource.', 'error': True }, 'data': None }), 500 if not oldModelName.endswith(PICKLE_FILE_EXTENSION): oldModelName += PICKLE_FILE_EXTENSION os.remove(os.path.join(os.path.dirname(__file__), 'Models/' + oldModelName)) return jsonify({ 'status': { 'message': 'Model successfully retrained.', 'error': False }, 'data': None }), 200