def get_abs(doc_text,keywords): start_pos=doc_text.find('<s>')+len('<s>') end_pos=doc_text.find('</s>') content = doc_text[start_pos:end_pos] sentences = get_sentence(content) # for sentence in sentences: # print sentence # print '---' keywords = process_keywords(keywords) keylist = [] if len(keywords)==0: return None elif len(keywords)==1: keywords = keywords[0].decode('utf-8','ignore') keylist.extend([keywords,keywords,keywords]) elif len(keywords)>1: keywords = [keyword.decode('utf-8','ignore') for keyword in keywords] if len(keywords) == 2: keylist.extend(keywords.append(keywords[0])) else: keylist.extend(keywords[:3]) result = [] # print keylist for key in keylist: for sentence in sentences: pos = sentence.find(key) if pos!=-1: result.append(sentence) sentences.remove(sentence) break result = [s.encode('utf-8') for s in result] return '... '.join(result)+'...'
def make_query_string(keywords, site='0'): ##filreq(#band(#datebetween(05/26/2015 06/02/2015) 长安 福特) #weight(2.0 #2(长安 福特).(title) 1.0 #2(长安 福特).(s))) ##filreq(长安 福特 #weight(2.0 #2(长安 福特).(title) 1.0 #2(长安 福特).(s))) query_string = '#filreq( %s #weight(%s))' #根据权重排序 keywords = jieba.cut(keywords) keywords = ' '.join(keywords).strip() keywords = process_keywords(keywords) if int(site) == 0: reqstr = '#band(%s)' % (' '.join(keywords).encode('utf-8', 'ignore')) else: reqsite = '#equals(site %s)' % (site) reqstr = '#band(%s %s)' % (reqsite, ' '.join(keywords).encode( 'utf-8', 'ignore')) if len(keywords) == 0: return None elif len(keywords) == 1: keywords = keywords[0].encode('utf-8', 'ignore') weitht = '10.0 %s.(userid) 10.0 %s.(author) 2.0 %s.(abs) 2.0 %s.(authenticate) 1.0 %s.(hot)' % ( keywords, keywords, keywords, keywords, keywords) elif len(keywords) > 1: length = len(keywords) keywords = [keyword.encode('utf-8', 'ignore') for keyword in keywords] keywords = ' '.join(keywords) weitht = '10.0 #%s(%s).(userid) 10.0 #%s(%s).(author) 2.0 #%s(%s).(abs) 2.0 #%s(%s).(authenticate) 1.0 #%s(%s).(hot)' % ( length, keywords, length, keywords, length, keywords, length, keywords, length, keywords) query_string = query_string % (reqstr, weitht) return query_string
def make_query_string(keywords,site='0'): ##filreq(#band(#datebetween(05/26/2015 06/02/2015) 长安 福特) #weight(2.0 #2(长安 福特).(title) 1.0 #2(长安 福特).(s))) ##filreq(长安 福特 #weight(2.0 #2(长安 福特).(title) 1.0 #2(长安 福特).(s))) query_string='#filreq( %s #weight(%s))' #根据权重排序 keywords = jieba.cut(keywords) keywords = ' '.join(keywords).strip() keywords = process_keywords(keywords) if int(site)==0: reqstr = '#band(%s)'%(' '.join(keywords).encode('utf-8','ignore')) else: reqsite = '#equals(site %s)'%(site) reqstr = '#band(%s %s)'%(reqsite,' '.join(keywords).encode('utf-8','ignore')) if len(keywords)==0: return None elif len(keywords)==1: keywords = keywords[0].encode('utf-8','ignore') weitht = '10.0 %s.(userid) 10.0 %s.(author) 2.0 %s.(abs) 2.0 %s.(authenticate) 1.0 %s.(hot)'%(keywords,keywords,keywords,keywords,keywords) elif len(keywords)>1: length = len(keywords) keywords = [keyword.encode('utf-8','ignore') for keyword in keywords] keywords = ' '.join(keywords) weitht = '10.0 #%s(%s).(userid) 10.0 #%s(%s).(author) 2.0 #%s(%s).(abs) 2.0 #%s(%s).(authenticate) 1.0 #%s(%s).(hot)'%(length,keywords,length,keywords,length,keywords,length,keywords,length,keywords) query_string = query_string%(reqstr,weitht) return query_string