def trainFinal( currentSet , scale ): paramsDict = getParams( currentSet ) #Loads parameters if scale <= 2: #Train LR model with loaded parameters modelType = "LR" ngramMax, kInter, param3, param1, param2 = paramsDict[modelType] text, originalText, y, y1, y2 = get_data ( currentSet, ngramMax, kInter ) modelTLG = tfidf( analyzer = 'word', ngram_range = (1,1) , token_pattern = r'[^ ]+', min_df = 2 , norm = None, use_idf = False, smooth_idf = False, sublinear_tf = True ) modelTLG.fit( text ) X = modelTLG.transform( text ) modelLG = mainModel( param1 = param1, param2 = param2, param3 = param3 ) if scale > 1: modelLG.fit( X, y , "LR2" ) else: modelLG.fit( X, y , "LR1" ) #Train SV model with loaded parameters modelType = "SV" ngramMax, kInter, param3, param1, param2 = paramsDict[modelType] text, originalText, y, y1, y2 = get_data ( currentSet, ngramMax, kInter ) modelTSV = tfidf( analyzer = 'word', ngram_range = (1,1) , token_pattern = r'[^ ]+', min_df = 2 , norm = None, use_idf = False, smooth_idf = False, sublinear_tf = True ) modelTSV.fit( text ) X = modelTSV.transform( text ) modelSV = mainModel( param1 = param1, param2 = param2, param3 = param3 ) if scale > 1: modelSV.fit( X, y , "SV2" ) else: modelSV.fit( X, y , "SV1" ) else: modelTLG, modelLG, modelTSV, modelSV = None, None, None, None #Train GB model with loaded parameters modelType = "GB" ngramMax, kInter, param3, param1, param2 = paramsDict[modelType] text, originalText, y, y1, y2 = get_data ( currentSet, ngramMax, kInter ) modelTGB = tfidf( analyzer = 'word', ngram_range = (1,1) , token_pattern = r'[^ ]+', min_df = 2 , norm = None, use_idf = False, smooth_idf = False, sublinear_tf = True ) modelTGB.fit( text ) X = modelTGB.transform( text ) modelGB = mainModel( param1 = param1+1, param2 = param2, param3 = param3 ) if scale > 1: modelGB.fit( X, y , "GB2" ) else: modelGB.fit( X, y , "GB1" ) completeModel = [ modelTLG, modelLG, modelTSV, modelSV, modelTGB, modelGB ] #Save all models pickle.dump(completeModel, open( "savedModels/" + currentSet + ".pickle" , "w"))
def go(self): #self.setContentType('text/plain') print "Content-Type: " + self.content_type + "\n\n" p = serverPath.getMe() settings = siteSettings.getMe() title = settings.getVal('main_title') #title="title" o = output.getMe() self.model = mainModel() tpl = self.getTemplate() tmpl = template('templates/' + tpl) self.controller = mainController(tmpl, '') self.controller.setTitle(title) self.controller.setModel(self.model) self.controller.parseRequest() self.controller.proceed() self.controller.display()
def go(self): #self.setContentType('text/plain') print "Content-Type: "+self.content_type+"\n\n" p=serverPath.getMe() settings=siteSettings.getMe() title=settings.getVal('main_title') #title="title" o=output.getMe() self.model=mainModel() tpl=self.getTemplate() tmpl=template('templates/'+tpl) self.controller=mainController(tmpl,'') self.controller.setTitle(title) self.controller.setModel(self.model) self.controller.parseRequest() self.controller.proceed() self.controller.display()
def trainFull( currentSet , scale, listParams, shuffle = 0 ): shuffle += 20 paramsDict = getParams( currentSet ) #Load parameters #Train LR model with loaded parameters modelType = "LR" ngramMax, kInter, param3, param1, param2 = paramsDict[modelType] text, originalText, y, y1, y2 = get_data ( currentSet, ngramMax, kInter , shuffle) XTrain, XTest, indices_Train, indices_Test = get_XTrain_XTest( text , y ) global objectEv objectEv = evaluation( XTrain = XTrain, XTest = XTest, indices_Train = indices_Train, indices_Test = indices_Test, y = y, y1 = y1, y2 = y2 , scale = scale , modelType = modelType ) modelLG = mainModel( param1 = param1, param2 = param2, param3 = param3 ) if scale > 1: yProb1, yProb2 = objectEv.cv_estimateCl2 ( modelLG , False) yp1 = np.maximum( yProb1, yProb2 * 2 ) else: yp1 = objectEv.cv_estimateCl1( modelLG, False ) #Train SV model with loaded parameters modelType = "SV" ngramMax, kInter, param3, param1, param2 = paramsDict[modelType] text, originalText, y, y1, y2 = get_data ( currentSet, ngramMax, kInter , shuffle) XTrain, XTest, indices_Train, indices_Test = get_XTrain_XTest( text , y ) objectEv = evaluation( XTrain = XTrain, XTest = XTest, indices_Train = indices_Train, indices_Test = indices_Test, y = y, y1 = y1, y2 = y2 , scale = scale , modelType = modelType ) modelSV = mainModel( param1 = param1, param2 = param2, param3 = param3 ) if scale > 1: yp2 = objectEv.cv_estimateSV2 ( modelSV ) else: yp2 = objectEv.cv_estimateCl1( modelSV, True ) #Train GB model with loaded parameters modelType = "GB" ngramMax, kInter, param3, param1, param2 = paramsDict[modelType] text, originalText, y, y1, y2 = get_data ( currentSet, ngramMax, kInter , shuffle) XTrain, XTest, indices_Train, indices_Test = get_XTrain_XTest( text , y ) objectEv = evaluation( XTrain = XTrain, XTest = XTest, indices_Train = indices_Train, indices_Test = indices_Test, y = y, y1 = y1, y2 = y2 , scale = scale , modelType = modelType ) modelGB = mainModel( param1 = param1+1, param2 = param2, param3 = param3 ) if scale > 1: yL = objectEv.cv_estimateGB2( modelGB , param1 + 1) else: yL = objectEv.cv_estimateGB1( modelGB , param1 + 1) yp3 = yL[ param1 ] #Calculates the kappas obtained by using the weights and thresholds #in the list of parameters prevCoef1 = -1 prevCoef2 = -1 prevCoef3 = -1 listKappas = list() for cParams in listParams: if scale > 1: coef1, coef2, coef3, threshold1, threshold2 = cParams if prevCoef1 != coef1 or prevCoef2 != coef2 or prevCoef3 != coef3: yF = coef1 * yp1 + coef2 * yp2 + coef3 * yp3 yPred = ( yF >= threshold1 ) * 1 yPred = ( yF >= threshold2 ) * 1 + yPred else: coef1, coef2, coef3, threshold = cParams if prevCoef1 != coef1 or prevCoef2 != coef2 or prevCoef3 != coef3: yF = coef1 * yp1 + coef2 * yp2 + coef3 * yp3 yPred = yF > threshold ckappa = skllMetrics.kappa( objectEv.y, yPred ) listKappas.append( ckappa ) prevCoef1, prevCoef2, prevCoef3 = coef1, coef2, coef3 return listKappas
def trainFinal(currentSet, scale): paramsDict = getParams(currentSet) #Loads parameters if scale <= 2: #Train LR model with loaded parameters modelType = "LR" ngramMax, kInter, param3, param1, param2 = paramsDict[modelType] text, originalText, y, y1, y2 = get_data(currentSet, ngramMax, kInter) modelTLG = tfidf(analyzer='word', ngram_range=(1, 1), token_pattern=r'[^ ]+', min_df=2, norm=None, use_idf=False, smooth_idf=False, sublinear_tf=True) modelTLG.fit(text) X = modelTLG.transform(text) modelLG = mainModel(param1=param1, param2=param2, param3=param3) if scale > 1: modelLG.fit(X, y, "LR2") else: modelLG.fit(X, y, "LR1") #Train SV model with loaded parameters modelType = "SV" ngramMax, kInter, param3, param1, param2 = paramsDict[modelType] text, originalText, y, y1, y2 = get_data(currentSet, ngramMax, kInter) modelTSV = tfidf(analyzer='word', ngram_range=(1, 1), token_pattern=r'[^ ]+', min_df=2, norm=None, use_idf=False, smooth_idf=False, sublinear_tf=True) modelTSV.fit(text) X = modelTSV.transform(text) modelSV = mainModel(param1=param1, param2=param2, param3=param3) if scale > 1: modelSV.fit(X, y, "SV2") else: modelSV.fit(X, y, "SV1") else: modelTLG, modelLG, modelTSV, modelSV = None, None, None, None #Train GB model with loaded parameters modelType = "GB" ngramMax, kInter, param3, param1, param2 = paramsDict[modelType] text, originalText, y, y1, y2 = get_data(currentSet, ngramMax, kInter) modelTGB = tfidf(analyzer='word', ngram_range=(1, 1), token_pattern=r'[^ ]+', min_df=2, norm=None, use_idf=False, smooth_idf=False, sublinear_tf=True) modelTGB.fit(text) X = modelTGB.transform(text) modelGB = mainModel(param1=param1 + 1, param2=param2, param3=param3) if scale > 1: modelGB.fit(X, y, "GB2") else: modelGB.fit(X, y, "GB1") completeModel = [modelTLG, modelLG, modelTSV, modelSV, modelTGB, modelGB] #Save all models pickle.dump(completeModel, open("savedModels/" + currentSet + ".pickle", "w"))
def evaluateParametersInd( params ): modelTemp = mainModel( param1 = params[0], param2 = params[1], param3 = params[2] ) return objectEv.evaluate( modelTemp )
def trainFull(currentSet, scale, listParams, shuffle=0): shuffle += 20 paramsDict = getParams(currentSet) #Load parameters #Train LR model with loaded parameters modelType = "LR" ngramMax, kInter, param3, param1, param2 = paramsDict[modelType] text, originalText, y, y1, y2 = get_data(currentSet, ngramMax, kInter, shuffle) XTrain, XTest, indices_Train, indices_Test = get_XTrain_XTest(text, y) global objectEv objectEv = evaluation(XTrain=XTrain, XTest=XTest, indices_Train=indices_Train, indices_Test=indices_Test, y=y, y1=y1, y2=y2, scale=scale, modelType=modelType) modelLG = mainModel(param1=param1, param2=param2, param3=param3) if scale > 1: yProb1, yProb2 = objectEv.cv_estimateCl2(modelLG, False) yp1 = np.maximum(yProb1, yProb2 * 2) else: yp1 = objectEv.cv_estimateCl1(modelLG, False) #Train SV model with loaded parameters modelType = "SV" ngramMax, kInter, param3, param1, param2 = paramsDict[modelType] text, originalText, y, y1, y2 = get_data(currentSet, ngramMax, kInter, shuffle) XTrain, XTest, indices_Train, indices_Test = get_XTrain_XTest(text, y) objectEv = evaluation(XTrain=XTrain, XTest=XTest, indices_Train=indices_Train, indices_Test=indices_Test, y=y, y1=y1, y2=y2, scale=scale, modelType=modelType) modelSV = mainModel(param1=param1, param2=param2, param3=param3) if scale > 1: yp2 = objectEv.cv_estimateSV2(modelSV) else: yp2 = objectEv.cv_estimateCl1(modelSV, True) #Train GB model with loaded parameters modelType = "GB" ngramMax, kInter, param3, param1, param2 = paramsDict[modelType] text, originalText, y, y1, y2 = get_data(currentSet, ngramMax, kInter, shuffle) XTrain, XTest, indices_Train, indices_Test = get_XTrain_XTest(text, y) objectEv = evaluation(XTrain=XTrain, XTest=XTest, indices_Train=indices_Train, indices_Test=indices_Test, y=y, y1=y1, y2=y2, scale=scale, modelType=modelType) modelGB = mainModel(param1=param1 + 1, param2=param2, param3=param3) if scale > 1: yL = objectEv.cv_estimateGB2(modelGB, param1 + 1) else: yL = objectEv.cv_estimateGB1(modelGB, param1 + 1) yp3 = yL[param1] #Calculates the kappas obtained by using the weights and thresholds #in the list of parameters prevCoef1 = -1 prevCoef2 = -1 prevCoef3 = -1 listKappas = list() for cParams in listParams: if scale > 1: coef1, coef2, coef3, threshold1, threshold2 = cParams if prevCoef1 != coef1 or prevCoef2 != coef2 or prevCoef3 != coef3: yF = coef1 * yp1 + coef2 * yp2 + coef3 * yp3 yPred = (yF >= threshold1) * 1 yPred = (yF >= threshold2) * 1 + yPred else: coef1, coef2, coef3, threshold = cParams if prevCoef1 != coef1 or prevCoef2 != coef2 or prevCoef3 != coef3: yF = coef1 * yp1 + coef2 * yp2 + coef3 * yp3 yPred = yF > threshold ckappa = skllMetrics.kappa(objectEv.y, yPred) listKappas.append(ckappa) prevCoef1, prevCoef2, prevCoef3 = coef1, coef2, coef3 return listKappas