def summary_results(rawResults, strsearch, logic_items=[], results_stats={}): results = [] titles = [] sptitle_collection = [] # ~ stats for each provider for provid in xrange(len(rawResults)): if (len(rawResults[provid])): results_stats[str(rawResults[provid][0]['providertitle'])] = [len(rawResults[provid]), 0] # ~ all in one array for provid in xrange(len(rawResults)): for z in xrange(len(rawResults[provid])): if (rawResults[provid][z]['title'] != None): rawResults[provid][z]['title'] = SearchModule.sanitize_html(rawResults[provid][z]['title']) rawResults[provid][z]['provid'] = provid title = SearchModule.sanitize_strings(rawResults[provid][z]['title']) titles.append(title) sptitle_collection.append(Set(title.split("."))) results.append(rawResults[provid][z]) strsearch1 = SearchModule.sanitize_strings(strsearch) strsearch1_collection = Set(strsearch1.split(".")) rcount = [0] * 3 for z in xrange(len(results)): findone = 0 results[z]['ignore'] = 0 intrs = strsearch1_collection.intersection(sptitle_collection[z]) if (len(intrs) == len(strsearch1_collection)): findone = 1 else: results[z]['ignore'] = 2 # ~ relax the search ~ 0.45 unmatched_terms_search = strsearch1_collection.difference(intrs) unmatched_count = 0 for mst in unmatched_terms_search: my_list = [i for i in sptitle_collection[z] if i.find(mst) == 0] if (len(my_list)): unmatched_count = unmatched_count + 1 if (unmatched_count == len(unmatched_terms_search)): findone = 1 results[z]['ignore'] = 0 # ~ print unmatched_terms_search # ~ print unmatched_count # ~ print unmatched_terms_search # ~ print strsearch1_collection # ~ print intrs # ~ print findone # ~ print '------------------' if (findone and results[z]['ignore'] == 0): # ~ print titles[z] for v in xrange(z + 1, len(results)): if (titles[z] == titles[v]): sz1 = float(results[z]['size']) sz2 = float(results[v]['size']) if (abs(sz1 - sz2) < 5000000): results[z]['ignore'] = 1 # ~ stats rcount[results[z]['ignore']] += 1 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ # ~ logic params exclude_coll = Set([]) include_coll = Set([]) # ~ print '*'+logic_items[0][1]+'*' for i in xrange(len(logic_items)): if (logic_items[i][0] == '-'): exclude_coll.add(logic_items[i][1]) if (logic_items[i][0] == '+'): include_coll.add(logic_items[i][1]) if (len(include_coll)): for z in xrange(len(results)): if (results[z]['ignore'] < 2): intrs_i = include_coll.intersection(sptitle_collection[z]) if (len(intrs_i) == 0): results[z]['ignore'] = 2 if (len(exclude_coll)): for z in xrange(len(results)): if (results[z]['ignore'] < 2): intrs_e = exclude_coll.intersection(sptitle_collection[z]) if (len(intrs_e) > 0): results[z]['ignore'] = 2 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ mssg = 'Overall search stats: [' + strsearch1 + ']' + ' [' + strsearch + '] ' + str(rcount[0]) + ' ' + str( rcount[1]) + ' ' + str(rcount[2]) log.info(mssg) for z in xrange(len(results)): if (results[z]['ignore'] != 2): results_stats[str(results[z]['providertitle'])][1] = results_stats[str(results[z]['providertitle'])][1] + 1 return results
def summary_results(rawResults, strsearch, logic_items=[]): results = [] titles = [] sptitle_collection = [] # ~ all in one array for provid in xrange(len(rawResults)): for z in xrange(len(rawResults[provid])): rawResults[provid][z]["title"] = SearchModule.sanitize_html(rawResults[provid][z]["title"]) title = SearchModule.sanitize_strings(rawResults[provid][z]["title"]) titles.append(title) sptitle_collection.append(Set(title.split("."))) results.append(rawResults[provid][z]) strsearch1 = SearchModule.sanitize_strings(strsearch) strsearch1_collection = Set(strsearch1.split(".")) rcount = [0] * 3 for z in xrange(len(results)): findone = 0 results[z]["ignore"] = 0 intrs = strsearch1_collection.intersection(sptitle_collection[z]) if len(intrs) == len(strsearch1_collection): findone = 1 else: results[z]["ignore"] = 2 # ~ print strsearch1_collection # ~ print intrs # ~ print findone # ~ print '------------------' if findone and results[z]["ignore"] == 0: # ~ print titles[z] for v in xrange(z + 1, len(results)): if titles[z] == titles[v]: sz1 = float(results[z]["size"]) sz2 = float(results[v]["size"]) if abs(sz1 - sz2) < 5000000: results[z]["ignore"] = 1 # ~ stats rcount[results[z]["ignore"]] += 1 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ # ~ logic params exclude_coll = Set([]) include_coll = Set([]) # ~ print '*'+logic_items[0][1]+'*' for i in xrange(len(logic_items)): if logic_items[i][0] == "-": exclude_coll.add(logic_items[i][1]) if logic_items[i][0] == "+": include_coll.add(logic_items[i][1]) if len(include_coll): for z in xrange(len(results)): if results[z]["ignore"] < 2: intrs_i = include_coll.intersection(sptitle_collection[z]) if len(intrs_i) == 0: results[z]["ignore"] = 2 if len(exclude_coll): for z in xrange(len(results)): if results[z]["ignore"] < 2: intrs_e = exclude_coll.intersection(sptitle_collection[z]) if len(intrs_e) > 0: results[z]["ignore"] = 2 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ mssg = ( "[" + strsearch1 + "]" + " [" + strsearch + "] " + str(rcount[0]) + " " + str(rcount[1]) + " " + str(rcount[2]) ) print mssg log.info(mssg) return results
def summary_results(rawResults, strsearch, logic_items=[]): results = [] titles = [] sptitle_collection = [] #~ all in one array for provid in xrange(len(rawResults)): for z in xrange(len(rawResults[provid])): rawResults[provid][z]['title'] = SearchModule.sanitize_html( rawResults[provid][z]['title']) title = SearchModule.sanitize_strings( rawResults[provid][z]['title']) titles.append(title) sptitle_collection.append(Set(title.split("."))) results.append(rawResults[provid][z]) strsearch1 = SearchModule.sanitize_strings(strsearch) strsearch1_collection = Set(strsearch1.split(".")) rcount = [0] * 3 for z in xrange(len(results)): findone = 0 results[z]['ignore'] = 0 intrs = strsearch1_collection.intersection(sptitle_collection[z]) if (len(intrs) == len(strsearch1_collection)): findone = 1 else: results[z]['ignore'] = 2 #~ print strsearch1_collection #~ print intrs #~ print findone #~ print '------------------' if (findone and results[z]['ignore'] == 0): #~ print titles[z] for v in xrange(z + 1, len(results)): if (titles[z] == titles[v]): sz1 = float(results[z]['size']) sz2 = float(results[v]['size']) if (abs(sz1 - sz2) < 5000000): results[z]['ignore'] = 1 #~ stats rcount[results[z]['ignore']] += 1 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ #~ logic params exclude_coll = Set([]) include_coll = Set([]) #~ print '*'+logic_items[0][1]+'*' for i in xrange(len(logic_items)): if (logic_items[i][0] == '-'): exclude_coll.add(logic_items[i][1]) if (logic_items[i][0] == '+'): include_coll.add(logic_items[i][1]) if (len(include_coll)): for z in xrange(len(results)): if (results[z]['ignore'] < 2): intrs_i = include_coll.intersection(sptitle_collection[z]) if (len(intrs_i) == 0): results[z]['ignore'] = 2 if (len(exclude_coll)): for z in xrange(len(results)): if (results[z]['ignore'] < 2): intrs_e = exclude_coll.intersection(sptitle_collection[z]) if (len(intrs_e) > 0): results[z]['ignore'] = 2 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ mssg = '[' + strsearch1 + ']' + ' [' + strsearch + '] ' + str( rcount[0]) + ' ' + str(rcount[1]) + ' ' + str(rcount[2]) print mssg log.info(mssg) return results
def summary_results(rawResults, strsearch, logic_items=[], results_stats={}): results = [] titles = [] sptitle_collection = [] #~ stats for each provider for provid in xrange(len(rawResults)): if (len(rawResults[provid])): results_stats[str(rawResults[provid][0]['providertitle'])] = [ len(rawResults[provid]), 0 ] #~ all in one array for provid in xrange(len(rawResults)): for z in xrange(len(rawResults[provid])): if (rawResults[provid][z]['title'] != None): rawResults[provid][z]['title'] = SearchModule.sanitize_html( rawResults[provid][z]['title']) rawResults[provid][z]['provid'] = provid title = SearchModule.sanitize_strings( rawResults[provid][z]['title']) titles.append(title) sptitle_collection.append(Set(title.split("."))) results.append(rawResults[provid][z]) strsearch1 = SearchModule.sanitize_strings(strsearch) strsearch1_collection = Set(strsearch1.split(".")) rcount = [0] * 3 for z in xrange(len(results)): findone = 0 results[z]['ignore'] = 0 intrs = strsearch1_collection.intersection(sptitle_collection[z]) if (len(intrs) == len(strsearch1_collection)): findone = 1 else: results[z]['ignore'] = 2 #~ relax the search ~ 0.45 unmatched_terms_search = strsearch1_collection.difference(intrs) unmatched_count = 0 for mst in unmatched_terms_search: my_list = [ i for i in sptitle_collection[z] if i.find(mst) == 0 ] if (len(my_list)): unmatched_count = unmatched_count + 1 if (unmatched_count == len(unmatched_terms_search)): findone = 1 results[z]['ignore'] = 0 #~ print unmatched_terms_search #~ print unmatched_count #~ print unmatched_terms_search #~ print strsearch1_collection #~ print intrs #~ print findone #~ print '------------------' if (findone and results[z]['ignore'] == 0): #~ print titles[z] for v in xrange(z + 1, len(results)): if (titles[z] == titles[v]): sz1 = float(results[z]['size']) sz2 = float(results[v]['size']) if (abs(sz1 - sz2) < 5000000): results[z]['ignore'] = 1 #~ stats rcount[results[z]['ignore']] += 1 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ #~ logic params exclude_coll = Set([]) include_coll = Set([]) #~ print '*'+logic_items[0][1]+'*' for i in xrange(len(logic_items)): if (logic_items[i][0] == '-'): exclude_coll.add(logic_items[i][1]) if (logic_items[i][0] == '+'): include_coll.add(logic_items[i][1]) if (len(include_coll)): for z in xrange(len(results)): if (results[z]['ignore'] < 2): intrs_i = include_coll.intersection(sptitle_collection[z]) if (len(intrs_i) == 0): results[z]['ignore'] = 2 if (len(exclude_coll)): for z in xrange(len(results)): if (results[z]['ignore'] < 2): intrs_e = exclude_coll.intersection(sptitle_collection[z]) if (len(intrs_e) > 0): results[z]['ignore'] = 2 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ mssg = 'Overall search stats: [' + strsearch1 + ']' + ' [' + strsearch + '] ' + str( rcount[0]) + ' ' + str(rcount[1]) + ' ' + str(rcount[2]) log.info(mssg) for z in xrange(len(results)): if (results[z]['ignore'] != 2): results_stats[str( results[z]['providertitle'])][1] = results_stats[str( results[z]['providertitle'])][1] + 1 return results