예제 #1
0
def trainFinal( currentSet , scale ):
  paramsDict = getParams( currentSet ) #Loads parameters

  if scale <= 2:    
    #Train LR model with loaded parameters
    modelType = "LR"
    ngramMax, kInter, param3, param1, param2 = paramsDict[modelType]  
    text, originalText, y, y1, y2 = get_data ( currentSet, ngramMax, kInter ) 
    modelTLG = tfidf( analyzer = 'word', ngram_range = (1,1) , token_pattern = r'[^ ]+', min_df = 2 ,
                  norm = None, use_idf = False, smooth_idf = False, sublinear_tf = True )
    modelTLG.fit( text )
    X = modelTLG.transform( text )
    modelLG = mainModel( param1 = param1, param2 = param2, param3 = param3  ) 
    if scale > 1:
      modelLG.fit( X, y , "LR2" )
    else:
      modelLG.fit( X, y , "LR1" ) 
    
    #Train SV model with loaded parameters
    modelType = "SV"
    ngramMax, kInter, param3, param1, param2 = paramsDict[modelType]  
    text, originalText, y, y1, y2 = get_data ( currentSet, ngramMax, kInter )
    modelTSV = tfidf( analyzer = 'word', ngram_range = (1,1) , token_pattern = r'[^ ]+', min_df = 2 ,
                  norm = None, use_idf = False, smooth_idf = False, sublinear_tf = True )
    modelTSV.fit( text )
    X = modelTSV.transform( text )  
    modelSV = mainModel( param1 = param1, param2 = param2, param3 = param3  ) 
    if scale > 1:
      modelSV.fit( X, y , "SV2" )
    else:
      modelSV.fit( X, y , "SV1" )
  else:
    modelTLG, modelLG, modelTSV, modelSV = None, None, None, None
  
  #Train GB model with loaded parameters
  modelType = "GB"
  ngramMax, kInter, param3, param1, param2 = paramsDict[modelType]  
  text, originalText, y, y1, y2 = get_data ( currentSet, ngramMax, kInter )
  modelTGB = tfidf( analyzer = 'word', ngram_range = (1,1) , token_pattern = r'[^ ]+', min_df = 2 ,
                  norm = None, use_idf = False, smooth_idf = False, sublinear_tf = True )
  modelTGB.fit( text )
  X = modelTGB.transform( text )  
  modelGB = mainModel( param1 = param1+1, param2 = param2, param3 = param3 ) 
  if scale > 1:
    modelGB.fit( X, y , "GB2" )
  else:
    modelGB.fit( X, y , "GB1" )
  completeModel = [ modelTLG, modelLG, modelTSV, modelSV, modelTGB, modelGB ]
  #Save all models
  pickle.dump(completeModel, open( "savedModels/" + currentSet + ".pickle" , "w"))
예제 #2
0
 def go(self):
     #self.setContentType('text/plain')
     print "Content-Type: " + self.content_type + "\n\n"
     p = serverPath.getMe()
     settings = siteSettings.getMe()
     title = settings.getVal('main_title')
     #title="title"
     o = output.getMe()
     self.model = mainModel()
     tpl = self.getTemplate()
     tmpl = template('templates/' + tpl)
     self.controller = mainController(tmpl, '')
     self.controller.setTitle(title)
     self.controller.setModel(self.model)
     self.controller.parseRequest()
     self.controller.proceed()
     self.controller.display()
예제 #3
0
 def go(self):
  #self.setContentType('text/plain')
  print "Content-Type: "+self.content_type+"\n\n"
  p=serverPath.getMe()
  settings=siteSettings.getMe()
  title=settings.getVal('main_title')
  #title="title"
  o=output.getMe()
  self.model=mainModel()
  tpl=self.getTemplate()
  tmpl=template('templates/'+tpl)
  self.controller=mainController(tmpl,'')
  self.controller.setTitle(title)
  self.controller.setModel(self.model)
  self.controller.parseRequest()
  self.controller.proceed()
  self.controller.display()
예제 #4
0
def trainFull( currentSet , scale, listParams, shuffle = 0  ):
  
  shuffle += 20
  paramsDict = getParams( currentSet ) #Load parameters

  #Train LR model with loaded parameters
  modelType = "LR" 
  ngramMax, kInter, param3, param1, param2 = paramsDict[modelType]  
  text, originalText, y, y1, y2 = get_data ( currentSet, ngramMax, kInter , shuffle)
  XTrain, XTest, indices_Train, indices_Test = get_XTrain_XTest( text , y )  
  global objectEv
  objectEv = evaluation( XTrain = XTrain, XTest = XTest, indices_Train = indices_Train, indices_Test = indices_Test,
                      y = y, y1 = y1, y2 = y2 , scale = scale , modelType = modelType )       
  modelLG = mainModel( param1 = param1, param2 = param2, param3 = param3 ) 
  if scale > 1:
    yProb1, yProb2 = objectEv.cv_estimateCl2 ( modelLG , False)
    yp1 = np.maximum( yProb1, yProb2 * 2 )
  else:
    yp1 = objectEv.cv_estimateCl1( modelLG, False )

  #Train SV model with loaded parameters
  modelType = "SV"
  ngramMax, kInter, param3, param1, param2 = paramsDict[modelType]
  text, originalText, y, y1, y2 = get_data ( currentSet, ngramMax, kInter , shuffle)
  XTrain, XTest, indices_Train, indices_Test = get_XTrain_XTest( text , y )  
  objectEv = evaluation( XTrain = XTrain, XTest = XTest, indices_Train = indices_Train, indices_Test = indices_Test,
                      y = y, y1 = y1, y2 = y2 , scale = scale , modelType = modelType )       
  modelSV = mainModel( param1 = param1, param2 = param2, param3 = param3 ) 
  if scale > 1:
    yp2 = objectEv.cv_estimateSV2 ( modelSV )
  else:
    yp2 = objectEv.cv_estimateCl1( modelSV, True )
  
  #Train GB model with loaded parameters
  modelType = "GB"
  ngramMax, kInter, param3, param1, param2 = paramsDict[modelType]    
  text, originalText, y, y1, y2 = get_data ( currentSet, ngramMax, kInter , shuffle)
  XTrain, XTest, indices_Train, indices_Test = get_XTrain_XTest( text , y )
  objectEv = evaluation( XTrain = XTrain, XTest = XTest, indices_Train = indices_Train, indices_Test = indices_Test,
                      y = y, y1 = y1, y2 = y2 , scale = scale , modelType = modelType )       
  modelGB = mainModel( param1 = param1+1, param2 = param2, param3 = param3 )   
  if scale > 1:
    yL = objectEv.cv_estimateGB2( modelGB , param1 + 1)
  else:
    yL = objectEv.cv_estimateGB1( modelGB , param1 + 1)
  yp3 = yL[ param1 ]
  
  #Calculates the kappas obtained by using the weights and thresholds 
  #in the list of parameters
  prevCoef1 = -1
  prevCoef2 = -1
  prevCoef3 = -1
  listKappas = list()
  for cParams in listParams:
    if scale > 1:
      coef1, coef2, coef3, threshold1, threshold2 = cParams
      if  prevCoef1 != coef1 or prevCoef2 != coef2 or prevCoef3 != coef3:
        yF = coef1 * yp1 + coef2 * yp2 + coef3 * yp3
      yPred = ( yF >= threshold1 ) * 1        
      yPred = ( yF >= threshold2 ) * 1 + yPred
    else:
      coef1, coef2, coef3, threshold = cParams
      if  prevCoef1 != coef1 or prevCoef2 != coef2 or prevCoef3 != coef3:
        yF = coef1 * yp1 + coef2 * yp2 + coef3 * yp3
      yPred = yF > threshold
    ckappa = skllMetrics.kappa( objectEv.y, yPred )
    listKappas.append( ckappa )
    prevCoef1, prevCoef2, prevCoef3 = coef1, coef2, coef3
  
  return listKappas
예제 #5
0
def trainFinal(currentSet, scale):
    paramsDict = getParams(currentSet)  #Loads parameters

    if scale <= 2:
        #Train LR model with loaded parameters
        modelType = "LR"
        ngramMax, kInter, param3, param1, param2 = paramsDict[modelType]
        text, originalText, y, y1, y2 = get_data(currentSet, ngramMax, kInter)
        modelTLG = tfidf(analyzer='word',
                         ngram_range=(1, 1),
                         token_pattern=r'[^ ]+',
                         min_df=2,
                         norm=None,
                         use_idf=False,
                         smooth_idf=False,
                         sublinear_tf=True)
        modelTLG.fit(text)
        X = modelTLG.transform(text)
        modelLG = mainModel(param1=param1, param2=param2, param3=param3)
        if scale > 1:
            modelLG.fit(X, y, "LR2")
        else:
            modelLG.fit(X, y, "LR1")

        #Train SV model with loaded parameters
        modelType = "SV"
        ngramMax, kInter, param3, param1, param2 = paramsDict[modelType]
        text, originalText, y, y1, y2 = get_data(currentSet, ngramMax, kInter)
        modelTSV = tfidf(analyzer='word',
                         ngram_range=(1, 1),
                         token_pattern=r'[^ ]+',
                         min_df=2,
                         norm=None,
                         use_idf=False,
                         smooth_idf=False,
                         sublinear_tf=True)
        modelTSV.fit(text)
        X = modelTSV.transform(text)
        modelSV = mainModel(param1=param1, param2=param2, param3=param3)
        if scale > 1:
            modelSV.fit(X, y, "SV2")
        else:
            modelSV.fit(X, y, "SV1")
    else:
        modelTLG, modelLG, modelTSV, modelSV = None, None, None, None

    #Train GB model with loaded parameters
    modelType = "GB"
    ngramMax, kInter, param3, param1, param2 = paramsDict[modelType]
    text, originalText, y, y1, y2 = get_data(currentSet, ngramMax, kInter)
    modelTGB = tfidf(analyzer='word',
                     ngram_range=(1, 1),
                     token_pattern=r'[^ ]+',
                     min_df=2,
                     norm=None,
                     use_idf=False,
                     smooth_idf=False,
                     sublinear_tf=True)
    modelTGB.fit(text)
    X = modelTGB.transform(text)
    modelGB = mainModel(param1=param1 + 1, param2=param2, param3=param3)
    if scale > 1:
        modelGB.fit(X, y, "GB2")
    else:
        modelGB.fit(X, y, "GB1")
    completeModel = [modelTLG, modelLG, modelTSV, modelSV, modelTGB, modelGB]
    #Save all models
    pickle.dump(completeModel,
                open("savedModels/" + currentSet + ".pickle", "w"))
예제 #6
0
def evaluateParametersInd( params ):
  modelTemp = mainModel( param1 = params[0], param2 = params[1], param3 = params[2] )   
  return objectEv.evaluate( modelTemp  )
예제 #7
0
def trainFull(currentSet, scale, listParams, shuffle=0):

    shuffle += 20
    paramsDict = getParams(currentSet)  #Load parameters

    #Train LR model with loaded parameters
    modelType = "LR"
    ngramMax, kInter, param3, param1, param2 = paramsDict[modelType]
    text, originalText, y, y1, y2 = get_data(currentSet, ngramMax, kInter,
                                             shuffle)
    XTrain, XTest, indices_Train, indices_Test = get_XTrain_XTest(text, y)
    global objectEv
    objectEv = evaluation(XTrain=XTrain,
                          XTest=XTest,
                          indices_Train=indices_Train,
                          indices_Test=indices_Test,
                          y=y,
                          y1=y1,
                          y2=y2,
                          scale=scale,
                          modelType=modelType)
    modelLG = mainModel(param1=param1, param2=param2, param3=param3)
    if scale > 1:
        yProb1, yProb2 = objectEv.cv_estimateCl2(modelLG, False)
        yp1 = np.maximum(yProb1, yProb2 * 2)
    else:
        yp1 = objectEv.cv_estimateCl1(modelLG, False)

    #Train SV model with loaded parameters
    modelType = "SV"
    ngramMax, kInter, param3, param1, param2 = paramsDict[modelType]
    text, originalText, y, y1, y2 = get_data(currentSet, ngramMax, kInter,
                                             shuffle)
    XTrain, XTest, indices_Train, indices_Test = get_XTrain_XTest(text, y)
    objectEv = evaluation(XTrain=XTrain,
                          XTest=XTest,
                          indices_Train=indices_Train,
                          indices_Test=indices_Test,
                          y=y,
                          y1=y1,
                          y2=y2,
                          scale=scale,
                          modelType=modelType)
    modelSV = mainModel(param1=param1, param2=param2, param3=param3)
    if scale > 1:
        yp2 = objectEv.cv_estimateSV2(modelSV)
    else:
        yp2 = objectEv.cv_estimateCl1(modelSV, True)

    #Train GB model with loaded parameters
    modelType = "GB"
    ngramMax, kInter, param3, param1, param2 = paramsDict[modelType]
    text, originalText, y, y1, y2 = get_data(currentSet, ngramMax, kInter,
                                             shuffle)
    XTrain, XTest, indices_Train, indices_Test = get_XTrain_XTest(text, y)
    objectEv = evaluation(XTrain=XTrain,
                          XTest=XTest,
                          indices_Train=indices_Train,
                          indices_Test=indices_Test,
                          y=y,
                          y1=y1,
                          y2=y2,
                          scale=scale,
                          modelType=modelType)
    modelGB = mainModel(param1=param1 + 1, param2=param2, param3=param3)
    if scale > 1:
        yL = objectEv.cv_estimateGB2(modelGB, param1 + 1)
    else:
        yL = objectEv.cv_estimateGB1(modelGB, param1 + 1)
    yp3 = yL[param1]

    #Calculates the kappas obtained by using the weights and thresholds
    #in the list of parameters
    prevCoef1 = -1
    prevCoef2 = -1
    prevCoef3 = -1
    listKappas = list()
    for cParams in listParams:
        if scale > 1:
            coef1, coef2, coef3, threshold1, threshold2 = cParams
            if prevCoef1 != coef1 or prevCoef2 != coef2 or prevCoef3 != coef3:
                yF = coef1 * yp1 + coef2 * yp2 + coef3 * yp3
            yPred = (yF >= threshold1) * 1
            yPred = (yF >= threshold2) * 1 + yPred
        else:
            coef1, coef2, coef3, threshold = cParams
            if prevCoef1 != coef1 or prevCoef2 != coef2 or prevCoef3 != coef3:
                yF = coef1 * yp1 + coef2 * yp2 + coef3 * yp3
            yPred = yF > threshold
        ckappa = skllMetrics.kappa(objectEv.y, yPred)
        listKappas.append(ckappa)
        prevCoef1, prevCoef2, prevCoef3 = coef1, coef2, coef3

    return listKappas