def filterByOp(self,clone): opStr1 = "" opStr2 = "" indx1,start1,end1 = clone[1] indx2,start2,end2 = clone[2] for i in range(start1,end1+1): opStr1 += str(self.op1_hash.get(i,-1)) for i in range(start2,end2+1): opStr2 += str(self.op2_hash.get(i,-1)) if config.DEBUG is True: print "start1 = %d, end1 = %d, ops = %s" % (start1,end1,opStr1) print "start2 = %d, end2 = %d, ops = %s" % (start2,end2,opStr2) # if ((self.hasChanged(opStr1) is False) or # (self.hasChanged(opStr2) is False)): if not (self.hasChanged(opStr1) and self.hasChanged(opStr2)): return None idx = NGram(N=config.NGRAM) ngram1 = list(idx.ngrams(opStr1)) ngram2 = list(idx.ngrams(opStr2)) metric = self.compareList(ngram1,ngram2) return metric
def test(): filter = opFilter() opStr1 = "nnn+" opStr2 = "nn+" idx = NGram(N=config.NGRAM) l1 = list(idx.ngrams(opStr1)) l2 = list(idx.ngrams(opStr2)) print filter.compareList(l1,l2)
def ngram_compare(a, b, N): ngram = NGram(N=N) A = set(ngram.ngrams(a)) B = set(ngram.ngrams(b)) return len(A & B) / len(A | B)
def main(): # NGramで解析する単位の設定 index = NGram(N=2) list_recruit = [] count = [0] * 10 # projects_hit_count = [[count_hits,id]] projects_hit_count = [] # DB connector = psycopg2.connect(host="localhost", database="wantedly") cursor = connector.cursor() # 質問読み込み print "質問:" input_q = raw_input().decode("utf-8") for input_q in index.ngrams(index.pad(input_q)): # 入力されたメッセージを含まれたものを全て出すため input_q = "%" + input_q + "%" # SQL sql = "select id,company_id,title,description,location,keywords from projects where looking_for like '%s'" % input_q # print sql cursor.execute(sql) result = cursor.fetchall() for row in result: count[row[0]] = count[row[0]] + 1 for i in range(0, 10): if count[i] != 0: projects_hit_count.append([count[i], i]) # ヒット数高い順 projects_hit_count.sort() projects_hit_count.reverse() print projects_hit_count if len(projects_hit_count) == 0: a = 0 # 当てはめる募集がなかった場合 print "やりたい仕事がありますか?それか興味ある分野とは?" elif len(projects_hit_count) < 3: a = len(projects_hit_count) print "\nこんにちは、今のような募集がありますが、いかがですか?" else: a = 3 print "\nこんにちは、今のような募集がありますが、いかがですか?" for i in range(0, a): sql = "select id,company_id,title,description,location,keywords from projects where id ='%s'" % projects_hit_count[ i][1] # print sql cursor.execute(sql) result = cursor.fetchall() for row in result: print str(i + 1) + "." print str(row[2]) print str(row[5]) connector.commit() cursor.close() connector.close()
def main(): # NGramで解析する単位の設定 index = NGram(N=2) list_recruit = [] count = [0] * 10 # projects_hit_count = [[count_hits,id]] projects_hit_count = [] # DB connector = psycopg2.connect(host="localhost",database="wantedly") cursor = connector.cursor() # 質問読み込み print "質問:" input_q = raw_input().decode("utf-8") for input_q in index.ngrams(index.pad(input_q)): # 入力されたメッセージを含まれたものを全て出すため input_q = "%"+input_q+"%" # SQL sql="select id,company_id,title,description,location,keywords from projects where looking_for like '%s'"%input_q # print sql cursor.execute(sql) result = cursor.fetchall() for row in result: count[row[0]] = count[row[0]] +1 for i in range(0,10): if count[i] != 0: projects_hit_count.append([count[i],i]) # ヒット数高い順 projects_hit_count.sort() projects_hit_count.reverse() print projects_hit_count if len(projects_hit_count) ==0: a=0 # 当てはめる募集がなかった場合 print "やりたい仕事がありますか?それか興味ある分野とは?" elif len(projects_hit_count) < 3: a=len(projects_hit_count) print "\nこんにちは、今のような募集がありますが、いかがですか?" else: a=3 print "\nこんにちは、今のような募集がありますが、いかがですか?" for i in range(0,a): sql="select id,company_id,title,description,location,keywords from projects where id ='%s'"%projects_hit_count[i][1] # print sql cursor.execute(sql) result = cursor.fetchall() for row in result: print str(i+1)+"." print str(row[2]) print str(row[5]) connector.commit() cursor.close() connector.close()