Example #1
0
    def calculateScores(self, resume):
        resume_content = irutils.processText(resume)
        resume_tokens = self.tfgetter.getTokens(resume_content)
        idfdict = {}

        for token in resume_tokens:
            idfdict[token] = 0
            tokenNum = 0
            for job in self.jobs:
                tf = job["tf"]
                if tf.has_key(token):
                    tokenNum += 1
    #       print token, tokenNum
            idfdict[token] = math.log10(
                (self.doc_num - tokenNum + 0.5) / (tokenNum + 0.5))

    #   print "idfdict=", idfdict
        for job in self.jobs:
            tf = job["tf"]
            score = 0
            for token in resume_tokens:
                if tf.has_key(token):
                    n1 = tf[token] * (K1 + 1)
                    n2 = tf[token] + K1 * (1 - B +
                                           B * job["length"] / self.avgLength)
                    score += idfdict[token] * (n1 / n2)
            job["score"] = score
Example #2
0
 def saveJobTfIdf(self, jobcoll , idfColl):
      
      df = {}    
      doc_num = 0
      for item in jobcoll.find(): 
          content = irutils.processText(item["summary"])  
          tf = self.getTf(content)
          item['tf'] = tf
          item['wtf'] =  getwtf(tf)
          jobcoll.save(item)
          dfAddTf(df,tf)
          doc_num+=1
      
      idfitem={}
      idfitem['doc_num'] = doc_num
      idfitem['df'] = df
      idf = getIdf(df,doc_num)
      idfitem['idf'] = idf
      idfitem['coll_name'] = jobcoll.name  
      idfitem['date'] = datetime.datetime.now()
     # print idf
      idfColl.save(idfitem)         
      
      for item in jobcoll.find(): 
         wtf = item['wtf']
         item['wtfidf'] , item['length'] = getWtfIdf(wtf,idf)
         jobcoll.save(item)
         
      return idfitem
Example #3
0
  def processColl(self, jobcoll ):     
       self.jobs = []        
       self.doc_num = 0
       sum_length = 0
       for item in jobcoll.find(): 
           content = irutils.processText(item["summary"])    
           tokens =  self.tfgetter.getTokens(content)
           tf = self.tfgetter.getTf(tokens)      
 #          print "tf=",  tf
           item['tf'] =  tf
           item['length'] = len(tokens)
           self.jobs.append(item)
           self.doc_num+=1
           sum_length += item['length']
       self.avgLength = sum_length/self.doc_num
       print "self.avgLength =", self.avgLength
Example #4
0
 def getJobTfIdf(self, jobcoll ):     
      jobs = []
      df = {}    
      doc_num = 0
      for item in jobcoll.find(): 
          content = irutils.processText(item["summary"])       
          tf = self.getTf(content)            
          item['wtf'] =  getwtf(tf)
          jobs.append(item)
          dfAddTf(df,tf)
          doc_num+=1       
   
      idf = getIdf(df,doc_num)    
      for item in jobs: 
         wtf = item['wtf']
         item['wtfidf'] , item['length'] = getWtfIdf(wtf,idf)
         
      return idf, jobs
Example #5
0
 def calculateScores(self,resume):
     resume_content = irutils.processText(resume) 
     resume_tokens =  self.tfgetter.getTokens(resume_content)
     resumetf = self.tfgetter.getTf(resume_tokens) 
     resume_len = len(resume_tokens)
     resume_pq = {}
     for key in resumetf.keys():
         resume_pq[key] = float(resumetf[key])/resume_len
 #    print "resume_len=" ,resume_len        
 #    print "resume_pq=", resume_pq        
    
     for job in self.jobs:
          tf = job["tf"]
          job_len = job["length"]
     #     print "job_len=", job_len
          score = 0
          for key in resumetf.keys():
              if tf.has_key(key):
                  job_p =  float (tf[key]) / job_len 
             #     print "job_p=", job_p
                  score += job_p * math.log (   job_p / resume_pq[key] )  
          job["score"] = score
Example #6
0
    def calculateScores(self, resume):
        resume_content = irutils.processText(resume)
        resume_tokens = self.tfgetter.getTokens(resume_content)
        resumetf = self.tfgetter.getTf(resume_tokens)
        resume_len = len(resume_tokens)
        resume_pq = {}
        for key in resumetf.keys():
            resume_pq[key] = float(resumetf[key]) / resume_len

    #    print "resume_len=" ,resume_len
    #    print "resume_pq=", resume_pq

        for job in self.jobs:
            tf = job["tf"]
            job_len = job["length"]
            #     print "job_len=", job_len
            score = 0
            for key in resumetf.keys():
                if tf.has_key(key):
                    job_p = float(tf[key]) / job_len
                    #     print "job_p=", job_p
                    score += job_p * math.log(job_p / resume_pq[key])
            job["score"] = score
Example #7
0
 def calculateScores(self,resume):
     resume_content = irutils.processText(resume) 
     resume_tokens =  self.tfgetter.getTokens(resume_content)
     idfdict = {}
     
     for token in resume_tokens:                        
         idfdict[token] = 0
         tokenNum = 0
         for job in self.jobs:
              tf = job["tf"]
              if tf.has_key(token):
                  tokenNum+=1
  #       print token, tokenNum
         idfdict[token] = math.log10( ( self.doc_num - tokenNum + 0.5 ) / (tokenNum + 0.5)  )
  #   print "idfdict=", idfdict
     for job in self.jobs:
          tf = job["tf"]
          score = 0
          for token in resume_tokens:
              if tf.has_key(token):
                  n1 = tf[token] * (K1+1)
                  n2 = tf[token] +  K1*(1-B + B * job["length"] / self.avgLength )
                  score += idfdict[token] * (n1 / n2) 
          job["score"] = score
Example #8
0
 def getResumeWight(self, resume):
     content = irutils.processText(resume)       
     tf = self.tfIdfGetter.getTf(content)
     wtf = getwtf(tf)
     wtfidf, length  = getQueryWtfIdf(wtf, self.jobs_idf)
     return wtfidf
Example #9
0
 def getResumeWight(self, resume):
     content = irutils.processText(resume)
     tf = self.tfIdfGetter.getTf(content)
     wtf = getwtf(tf)
     wtfidf, length = getQueryWtfIdf(wtf, self.jobs_idf)
     return wtfidf