def generate_music_nabresponse(self): dstring = [] if(self.args.has_key('artist')): dstring.append(SearchModule.sanitize_strings(self.args['artist'])) if(self.args.has_key('album')): dstring.append(SearchModule.sanitize_strings(self.args['album'])) if(self.args.has_key('track')): dstring.append(SearchModule.sanitize_strings(self.args['track'])) if(self.args.has_key('year')): dstring.append(SearchModule.sanitize_strings(self.args['year'])) music_search_str = '' for i in xrange(len(dstring)): if(len(dstring[i]) and i<len(dstring)-1): music_search_str = music_search_str + dstring[i] print music_search_str #~ print movie_search_str self.searchstring = music_search_str self.typesearch = 0 #~ compile results #~ results = SearchModule.performSearch(movie_search_str, self.cfg ) #~ flatten and summarize them #~ cleaned_results = megasearch.summary_results(results,movie_search_str) #~ render XML #~ return self.cleanUpResultsXML(cleaned_results) return 'm'
def generate_music_nabresponse(self): dstring = [] if self.args.has_key("artist"): dstring.append(SearchModule.sanitize_strings(self.args["artist"])) if self.args.has_key("album"): dstring.append(SearchModule.sanitize_strings(self.args["album"])) if self.args.has_key("track"): dstring.append(SearchModule.sanitize_strings(self.args["track"])) if self.args.has_key("year"): dstring.append(SearchModule.sanitize_strings(self.args["year"])) music_search_str = "" for i in xrange(len(dstring)): if len(dstring[i]) and i < len(dstring) - 1: music_search_str = music_search_str + dstring[i] print music_search_str # ~ print movie_search_str self.searchstring = music_search_str self.typesearch = 0 # ~ compile results # ~ results = SearchModule.performSearch(movie_search_str, self.cfg ) # ~ flatten and summarize them # ~ cleaned_results = megasearch.summary_results(results,movie_search_str) # ~ render XML # ~ return self.cleanUpResultsXML(cleaned_results) return "m"
def generate_tvserie_nabresponse(self,tvrage_show ): #~ compile string season_num = self.args.get('season',-1, type=int) relaxed_seasonmatch = 0 serie_search_str = SearchModule.sanitize_strings(tvrage_show['showtitle']) if(self.args.has_key('ep')): ep_num = self.args.get('ep',-1, type=int) serie_search_str = serie_search_str + '.s%02d' % season_num + 'e%02d' % ep_num else: serie_search_str = serie_search_str + '.s%02d' % season_num relaxed_seasonmatch = 1 self.typesearch = 1 self.searchstring = serie_search_str #~ compile results results = SearchModule.performSearch(serie_search_str, self.cfg , self.cfg_ds ) cleaned_results = [] if(relaxed_seasonmatch): #~ no cleaning just flatten in one array for provid in xrange(len(results)): if(results[provid] is not None): for z in xrange(len(results[provid])): cleaned_results.append(results[provid][z]) else: #~ flatten and summarize them cleaned_results = megasearch.summary_results(results,serie_search_str) #~ render XML return self.cleanUpResultsXML(cleaned_results)
def dosearch(self, args): #~ restore originals self.cfg = copy.deepcopy(self.cfg_cpy) if('q' not in args): self.results = [] return self.results nuqry = args['q'] + ' ' + self.cgen['searchaddontxt'] self.logic_items = self.logic_expr.findall(nuqry) self.qry_nologic = self.logic_expr.sub(" ",nuqry) if('selcat' in args): if(args['selcat'] != ""): self.qry_nologic += " " + args['selcat'] #~ speed class speed_class_sel = 1 if('tm' in args): speed_class_sel = int(args['tm']) #~ speed class deepsearch self.ds.set_timeout_speedclass(speed_class_sel) #~ speed class Nabbased for conf in self.cfg : if ( (conf['speed_class'] <= speed_class_sel) and (conf['valid'])): conf['timeout'] = self.cgen['timeout_class'][ speed_class_sel ] #~ print conf['type'] + " " + str(conf['timeout'] ) + ' ' + str(speed_class_sel ) else: conf['valid'] = 0 if( len(args['q']) == 0 ): if('selcat' in args): if(len(args['selcat'])==0): self.results = [] return self.results else: self.results = [] return self.results if(self.qry_nologic.replace(" ", "") == ""): self.results = [] return self.results self.cleancache() #~ cache hit, no server report self.returncode_fine['code'] = 2 self.resultsraw = self.chkforcache(self.wrp.chash64_encode(SearchModule.sanitize_strings(self.qry_nologic)), speed_class_sel) if( self.resultsraw is None): self.resultsraw = SearchModule.performSearch(self.qry_nologic, self.cfg, self.ds ) self.prepareretcode(); if( self.cgen['smartsearch'] == 1): #~ smartsearch self.results = summary_results(self.resultsraw, self.qry_nologic, self.logic_items) else: #~ no cleaning just flatten in one array self.results = [] for provid in xrange(len(self.resultsraw)): for z in xrange(len(self.resultsraw[provid])): if (self.resultsraw[provid][z]['title'] != None): self.results.append(self.resultsraw[provid][z])
def prepareforquery_show(self, sugg_info_raw, lastepisode, sugg_info): for i in xrange(len(lastepisode)): si = {'searchstr': SearchModule.sanitize_strings(sugg_info_raw['title']) + '.S%02d' % int(lastepisode[i]['season']) + 'E%02d' % int(lastepisode[i]['ep']), 'prettytxt': sugg_info_raw['title'] + ' S%02d ' % int(lastepisode[i]['season']) + 'E%02d' % int(lastepisode[i]['ep']), 'imdb_url': sugg_info_raw['tvdb_url']} sugg_info.append(si) return sugg_info
def generate_tsearch_nabresponse(self): if(self.args.has_key('q')): freesearch_str = SearchModule.sanitize_strings(self.args['q']) self.searchstring = freesearch_str self.typesearch = 2 #~ compile results results = SearchModule.performSearch(freesearch_str, self.cfg, self.cfg_ds ) #~ flatten and summarize them cleaned_results = megasearch.summary_results(results, freesearch_str) #~ render XML return self.cleanUpResultsXML(cleaned_results)
def prepareforquery(self, sugg_info_raw): sugg_info = [] for i in xrange(len(sugg_info_raw)): shorinfo = sugg_info_raw[i]['title'] if (len(shorinfo) > MAX_CHAR_LEN): shorinfo = shorinfo[0:MAX_CHAR_LEN-2] + '..' si = {'searchstr': SearchModule.sanitize_strings(sugg_info_raw[i]['title']) + '.' + sugg_info_raw[i]['year'] , 'prettytxt': shorinfo + '('+ sugg_info_raw[i]['year'] + ')', 'imdb_url': sugg_info_raw[i]['imdb_url']} sugg_info.append(si) #~ print si #~ print 'dcdddddddddddddddd' return sugg_info
def generate_tvserie_nabresponse(self,tvrage_show ): #~ compile string season_num = self.args.get('season',-1, type=int) serie_search_str = SearchModule.sanitize_strings(tvrage_show['showtitle']) if(self.args.has_key('ep')): ep_num = self.args.get('ep',-1, type=int) serie_search_str = serie_search_str + '.s%02d' % season_num + 'e%02d' % ep_num else: serie_search_str = serie_search_str + '.s%02d' % season_num self.typesearch = 1 self.searchstring = serie_search_str #~ compile results results = SearchModule.performSearch(serie_search_str, self.cfg ) #~ flatten and summarize them cleaned_results = megasearch.summary_results(results,serie_search_str) #~ render XML return self.cleanUpResultsXML(cleaned_results)
def prepareforquery_show(self, sugg_info_raw, lastepisode, sugg_info): for i in xrange(len(lastepisode)): si = { 'searchstr': SearchModule.sanitize_strings(sugg_info_raw['title']) + '.S%02d' % int(lastepisode[i]['season']) + 'E%02d' % int(lastepisode[i]['ep']), 'prettytxt': sugg_info_raw['title'] + ' S%02d ' % int(lastepisode[i]['season']) + 'E%02d' % int(lastepisode[i]['ep']), 'imdb_url': sugg_info_raw['tvdb_url'] } sugg_info.append(si) return sugg_info
def prepareforquery(self, sugg_info_raw): sugg_info = [] for i in xrange(len(sugg_info_raw)): shorinfo = sugg_info_raw[i]['title'] if (len(shorinfo) > MAX_CHAR_LEN): shorinfo = shorinfo[0:MAX_CHAR_LEN - 2] + '..' si = { 'searchstr': SearchModule.sanitize_strings(sugg_info_raw[i]['title']) + '.' + sugg_info_raw[i]['year'], 'prettytxt': shorinfo + '(' + sugg_info_raw[i]['year'] + ')', 'imdb_url': sugg_info_raw[i]['imdb_url'] } sugg_info.append(si) #~ print si #~ print 'dcdddddddddddddddd' return sugg_info
def cleanUpResults(self, params): sugg_list = params['sugg'] results = self.results svalid = self.svalid args = params['args'] ver_notify = params['ver'] niceResults = [] existduplicates = 0 # ~ tries to match predb entries self.matchpredb(results, params['predb']) # ~ avoids GMT problems for i in xrange(len(results)): totdays = int((time.time() - results[i]['posting_date_timestamp']) / (3600 * 24)) if (totdays == 0): totdays = float((time.time() - results[i]['posting_date_timestamp']) / (3600)) if (totdays < 0): totdays = -totdays totdays = totdays / 100 results[i]['posting_date_timestamp_refined'] = float(totdays) # ~ sorting if 'order' not in args: results = sorted(results, key=itemgetter('posting_date_timestamp_refined'), reverse=False) else: if (args['order'] == 't'): results = sorted(results, key=itemgetter('title')) if (args['order'] == 's'): results = sorted(results, key=itemgetter('size'), reverse=True) if (args['order'] == 'p'): results = sorted(results, key=itemgetter('providertitle')) if (args['order'] == 'd'): results = sorted(results, key=itemgetter('posting_date_timestamp_refined'), reverse=False) if (args['order'] == 'x'): results = sorted(results, key=itemgetter('predb'), reverse=True) if (args['order'] == 'c'): results = sorted(results, key=itemgetter('categ'), reverse=True) # ~ do nice for i in xrange(len(results)): if (results[i]['posting_date_timestamp_refined'] > self.cgen['daysretention']): continue if (results[i]['ignore'] == 2): continue if (results[i]['ignore'] == 1): existduplicates = 1 # Convert sized to smallest SI unit (note that these are powers of 10, not powers of 2, i.e. OS X file sizes rather than Windows/Linux file sizes) szf = float(results[i]['size'] / 1000000.0) mgsz = ' MB ' if (szf > 1000.0): szf = szf / 1000 mgsz = ' GB ' fsze1 = str(round(szf, 1)) + mgsz if (results[i]['size'] == -1): fsze1 = 'N/A' totdays = results[i]['posting_date_timestamp_refined'] if (totdays < 1): totdays = str(int(totdays * 100)) + "h" else: totdays = str(int(totdays)) + "d" category_str = '' keynum = len(results[i]['categ']) keycount = 0 for key in results[i]['categ'].keys(): category_str = category_str + key keycount = keycount + 1 if (keycount < keynum): category_str = category_str + ' - ' if (results[i]['url'] is None): results[i]['url'] = "" qryforwarp = self.wrp.chash64_encode(results[i]['url']) if ('req_pwd' in results[i]): qryforwarp += '&m=' + results[i]['req_pwd'] niceResults.append({ 'id': i, 'url': results[i]['url'], 'url_encr': 'warp?x=' + qryforwarp, 'title': results[i]['title'], 'filesize': fsze1, 'cat': category_str.upper(), 'age': totdays, 'details': results[i]['release_comments'], 'details_deref': 'http://www.derefer.me/?' + results[i]['release_comments'], 'providerurl': results[i]['provider'], 'providertitle': results[i]['providertitle'], 'ignore': results[i]['ignore'], 'predb': results[i]['predb'], 'predb_lnk': results[i]['predb_lnk'] }) send2nzbget_exist = None if ('nzbget_url' in self.cgen): if (len(self.cgen['nzbget_url'])): send2nzbget_exist = self.sckname send2sab_exist = None if ('sabnzbd_url' in self.cgen): if (len(self.cgen['sabnzbd_url'])): send2sab_exist = self.sckname speed_class_sel = 1 if ('tm' in args): speed_class_sel = int(args['tm']) # ~ save for caching if (self.resultsraw is not None): if (self.cgen['cache_active'] == 1 and len(self.resultsraw) > 0): if (len(self.collect_info) < self.cgen['max_cache_qty']): if (self.chkforcache(self.wrp.chash64_encode(SearchModule.sanitize_strings(self.qry_nologic)), speed_class_sel) is None): collect_all = {} collect_all['searchstr'] = self.wrp.chash64_encode( SearchModule.sanitize_strings(self.qry_nologic)) collect_all['tstamp'] = time.time() collect_all['resultsraw'] = self.resultsraw collect_all['speedclass'] = speed_class_sel self.collect_info.append(collect_all) # ~ print 'Result added to the cache list' # ~ ~ ~ ~ ~ ~ ~ ~ ~ scat = '' if ('selcat' in params['args']): scat = params['args']['selcat'] return render_template('main_page.html', results=niceResults, exist=existduplicates, vr=ver_notify, args=args, nc=svalid, sugg=sugg_list, speed_class_sel=speed_class_sel, send2sab_exist=send2sab_exist, send2nzbget_exist=send2nzbget_exist, cgen=self.cgen, trend_show=params['trend_show'], trend_movie=params['trend_movie'], debug_flag=params['debugflag'], sstring=params['args']['q'], scat=scat, selectable_opt=params['selectable_opt'], search_opt=params['search_opt'], sid=params['sid'], servercode_return=self.returncode_fine, large_server=self.cgen['large_server'], motd=params['motd'])
def summary_results(rawResults, strsearch, logic_items=[], results_stats={}): results = [] titles = [] sptitle_collection = [] #~ stats for each provider for provid in xrange(len(rawResults)): if (len(rawResults[provid])): results_stats[str(rawResults[provid][0]['providertitle'])] = [ len(rawResults[provid]), 0 ] #~ all in one array for provid in xrange(len(rawResults)): for z in xrange(len(rawResults[provid])): if (rawResults[provid][z]['title'] != None): rawResults[provid][z]['title'] = SearchModule.sanitize_html( rawResults[provid][z]['title']) rawResults[provid][z]['provid'] = provid title = SearchModule.sanitize_strings( rawResults[provid][z]['title']) titles.append(title) sptitle_collection.append(Set(title.split("."))) results.append(rawResults[provid][z]) strsearch1 = SearchModule.sanitize_strings(strsearch) strsearch1_collection = Set(strsearch1.split(".")) rcount = [0] * 3 for z in xrange(len(results)): findone = 0 results[z]['ignore'] = 0 intrs = strsearch1_collection.intersection(sptitle_collection[z]) if (len(intrs) == len(strsearch1_collection)): findone = 1 else: results[z]['ignore'] = 2 #~ relax the search ~ 0.45 unmatched_terms_search = strsearch1_collection.difference(intrs) unmatched_count = 0 for mst in unmatched_terms_search: my_list = [ i for i in sptitle_collection[z] if i.find(mst) == 0 ] if (len(my_list)): unmatched_count = unmatched_count + 1 if (unmatched_count == len(unmatched_terms_search)): findone = 1 results[z]['ignore'] = 0 #~ print unmatched_terms_search #~ print unmatched_count #~ print unmatched_terms_search #~ print strsearch1_collection #~ print intrs #~ print findone #~ print '------------------' if (findone and results[z]['ignore'] == 0): #~ print titles[z] for v in xrange(z + 1, len(results)): if (titles[z] == titles[v]): sz1 = float(results[z]['size']) sz2 = float(results[v]['size']) if (abs(sz1 - sz2) < 5000000): results[z]['ignore'] = 1 #~ stats rcount[results[z]['ignore']] += 1 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ #~ logic params exclude_coll = Set([]) include_coll = Set([]) #~ print '*'+logic_items[0][1]+'*' for i in xrange(len(logic_items)): if (logic_items[i][0] == '-'): exclude_coll.add(logic_items[i][1]) if (logic_items[i][0] == '+'): include_coll.add(logic_items[i][1]) if (len(include_coll)): for z in xrange(len(results)): if (results[z]['ignore'] < 2): intrs_i = include_coll.intersection(sptitle_collection[z]) if (len(intrs_i) == 0): results[z]['ignore'] = 2 if (len(exclude_coll)): for z in xrange(len(results)): if (results[z]['ignore'] < 2): intrs_e = exclude_coll.intersection(sptitle_collection[z]) if (len(intrs_e) > 0): results[z]['ignore'] = 2 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ mssg = 'Overall search stats: [' + strsearch1 + ']' + ' [' + strsearch + '] ' + str( rcount[0]) + ' ' + str(rcount[1]) + ' ' + str(rcount[2]) log.info(mssg) for z in xrange(len(results)): if (results[z]['ignore'] != 2): results_stats[str( results[z]['providertitle'])][1] = results_stats[str( results[z]['providertitle'])][1] + 1 return results
def cleanUpResults(self, params): sugg_list = params['sugg'] results = self.results svalid = self.svalid args = params['args'] ver_notify = params['ver'] niceResults = [] existduplicates = 0 #~ tries to match predb entries self.matchpredb(results, params['predb']) #~ avoids GMT problems for i in xrange(len(results)): totdays = int( (time.time() - results[i]['posting_date_timestamp']) / (3600 * 24)) if (totdays == 0): totdays = float( (time.time() - results[i]['posting_date_timestamp']) / (3600)) if (totdays < 0): totdays = -totdays totdays = totdays / 100 results[i]['posting_date_timestamp_refined'] = float(totdays) #~ sorting if 'order' not in args: results = sorted(results, key=itemgetter('posting_date_timestamp_refined'), reverse=False) else: if (args['order'] == 't'): results = sorted(results, key=itemgetter('title')) if (args['order'] == 's'): results = sorted(results, key=itemgetter('size'), reverse=True) if (args['order'] == 'p'): results = sorted(results, key=itemgetter('providertitle')) if (args['order'] == 'd'): results = sorted( results, key=itemgetter('posting_date_timestamp_refined'), reverse=False) if (args['order'] == 'x'): results = sorted(results, key=itemgetter('predb'), reverse=True) if (args['order'] == 'c'): results = sorted(results, key=itemgetter('categ'), reverse=True) #~ do nice for i in xrange(len(results)): if (results[i]['ignore'] == 2): continue if (results[i]['ignore'] == 1): existduplicates = 1 # Convert sized to smallest SI unit (note that these are powers of 10, not powers of 2, i.e. OS X file sizes rather than Windows/Linux file sizes) szf = float(results[i]['size'] / 1000000.0) mgsz = ' MB ' if (szf > 1000.0): szf = szf / 1000 mgsz = ' GB ' fsze1 = str(round(szf, 1)) + mgsz if (results[i]['size'] == -1): fsze1 = 'N/A' totdays = results[i]['posting_date_timestamp_refined'] if (totdays < 1): totdays = str(int(totdays * 100)) + "h" else: totdays = str(int(totdays)) + "d" category_str = '' keynum = len(results[i]['categ']) keycount = 0 for key in results[i]['categ'].keys(): category_str = category_str + key keycount = keycount + 1 if (keycount < keynum): category_str = category_str + ' - ' if (results[i]['url'] is None): results[i]['url'] = "" qryforwarp = self.wrp.chash64_encode(results[i]['url']) if ('req_pwd' in results[i]): qryforwarp += '&m=' + results[i]['req_pwd'] niceResults.append({ 'id': i, 'url': results[i]['url'], 'url_encr': 'warp?x=' + qryforwarp, 'title': results[i]['title'], 'filesize': fsze1, 'cat': category_str.upper(), 'age': totdays, 'details': results[i]['release_comments'], 'details_deref': 'http://www.derefer.me/?' + results[i]['release_comments'], 'providerurl': results[i]['provider'], 'providertitle': results[i]['providertitle'], 'ignore': results[i]['ignore'], 'predb': results[i]['predb'], 'predb_lnk': results[i]['predb_lnk'] }) send2nzbget_exist = None if ('nzbget_url' in self.cgen): if (len(self.cgen['nzbget_url'])): send2nzbget_exist = self.sckname send2sab_exist = None if ('sabnzbd_url' in self.cgen): if (len(self.cgen['sabnzbd_url'])): send2sab_exist = self.sckname speed_class_sel = 1 if ('tm' in args): speed_class_sel = int(args['tm']) #~ save for caching if (self.resultsraw is not None): if (self.cgen['cache_active'] == 1 and len(self.resultsraw) > 0): if (len(self.collect_info) < self.cgen['max_cache_qty']): if (self.chkforcache( self.wrp.chash64_encode( SearchModule.sanitize_strings( self.qry_nologic)), speed_class_sel) is None): collect_all = {} collect_all['searchstr'] = self.wrp.chash64_encode( SearchModule.sanitize_strings(self.qry_nologic)) collect_all['tstamp'] = time.time() collect_all['resultsraw'] = self.resultsraw collect_all['speedclass'] = speed_class_sel self.collect_info.append(collect_all) #~ print 'Result added to the cache list' #~ ~ ~ ~ ~ ~ ~ ~ ~ scat = '' if ('selcat' in params['args']): scat = params['args']['selcat'] return render_template('main_page.html', results=niceResults, exist=existduplicates, vr=ver_notify, args=args, nc=svalid, sugg=sugg_list, speed_class_sel=speed_class_sel, send2sab_exist=send2sab_exist, send2nzbget_exist=send2nzbget_exist, cgen=self.cgen, trend_show=params['trend_show'], trend_movie=params['trend_movie'], debug_flag=params['debugflag'], sstring=params['args']['q'], scat=scat, selectable_opt=params['selectable_opt'], search_opt=params['search_opt'], sid=params['sid'], servercode_return=self.returncode_fine, large_server=self.cgen['large_server'], motd=params['motd'])
def dosearch(self, args): #~ restore originals self.cfg = copy.deepcopy(self.cfg_cpy) if ('q' not in args): self.results = [] return self.results nuqry = args['q'] + ' ' + self.cgen['searchaddontxt'] self.logic_items = self.logic_expr.findall(nuqry) self.qry_nologic = self.logic_expr.sub(" ", nuqry) if ('selcat' in args): if (args['selcat'] != ""): self.qry_nologic += " " + args['selcat'] #~ speed class speed_class_sel = 1 if ('tm' in args): speed_class_sel = int(args['tm']) #~ speed class deepsearch self.ds.set_extraopt(speed_class_sel, 'manual') #~ speed class Nabbased self.set_timeout_speedclass(speed_class_sel) #~ manual search Nabbased self.set_extraopt() if (len(args['q']) == 0): if ('selcat' in args): if (len(args['selcat']) == 0): self.results = [] return self.results else: self.results = [] return self.results if (self.qry_nologic.replace(" ", "") == ""): self.results = [] return self.results log.info('TYPE OF SEARCH: ' + str(speed_class_sel)) self.cleancache() #~ cache hit, no server report cachehit = True self.returncode_fine['code'] = 2 self.resultsraw = self.chkforcache( self.wrp.chash64_encode( SearchModule.sanitize_strings(self.qry_nologic)), speed_class_sel) if (self.resultsraw is None): self.resultsraw = SearchModule.performSearch( self.qry_nologic, self.cfg, self.ds) cachehit = False if (self.cgen['smartsearch'] == 1): #~ smartsearch self.res_results = {} self.results = summary_results(self.resultsraw, self.qry_nologic, self.logic_items, self.res_results) else: #~ no cleaning just flatten in one array self.results = [] self.res_results = {} for provid in xrange(len(self.resultsraw)): if (len(self.resultsraw[provid])): self.res_results[str( self.resultsraw[provid][0]['providertitle'])] = [ len(self.resultsraw[provid]), 0 ] for provid in xrange(len(self.resultsraw)): for z in xrange(len(self.resultsraw[provid])): if (self.resultsraw[provid][z]['title'] != None): self.results.append(self.resultsraw[provid][z]) #~ server status output if (cachehit == False): self.prepareretcode()
def summary_results(rawResults, strsearch, logic_items=[]): results = [] titles = [] sptitle_collection = [] # ~ all in one array for provid in xrange(len(rawResults)): for z in xrange(len(rawResults[provid])): rawResults[provid][z]["title"] = SearchModule.sanitize_html(rawResults[provid][z]["title"]) title = SearchModule.sanitize_strings(rawResults[provid][z]["title"]) titles.append(title) sptitle_collection.append(Set(title.split("."))) results.append(rawResults[provid][z]) strsearch1 = SearchModule.sanitize_strings(strsearch) strsearch1_collection = Set(strsearch1.split(".")) rcount = [0] * 3 for z in xrange(len(results)): findone = 0 results[z]["ignore"] = 0 intrs = strsearch1_collection.intersection(sptitle_collection[z]) if len(intrs) == len(strsearch1_collection): findone = 1 else: results[z]["ignore"] = 2 # ~ print strsearch1_collection # ~ print intrs # ~ print findone # ~ print '------------------' if findone and results[z]["ignore"] == 0: # ~ print titles[z] for v in xrange(z + 1, len(results)): if titles[z] == titles[v]: sz1 = float(results[z]["size"]) sz2 = float(results[v]["size"]) if abs(sz1 - sz2) < 5000000: results[z]["ignore"] = 1 # ~ stats rcount[results[z]["ignore"]] += 1 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ # ~ logic params exclude_coll = Set([]) include_coll = Set([]) # ~ print '*'+logic_items[0][1]+'*' for i in xrange(len(logic_items)): if logic_items[i][0] == "-": exclude_coll.add(logic_items[i][1]) if logic_items[i][0] == "+": include_coll.add(logic_items[i][1]) if len(include_coll): for z in xrange(len(results)): if results[z]["ignore"] < 2: intrs_i = include_coll.intersection(sptitle_collection[z]) if len(intrs_i) == 0: results[z]["ignore"] = 2 if len(exclude_coll): for z in xrange(len(results)): if results[z]["ignore"] < 2: intrs_e = exclude_coll.intersection(sptitle_collection[z]) if len(intrs_e) > 0: results[z]["ignore"] = 2 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ mssg = ( "[" + strsearch1 + "]" + " [" + strsearch + "] " + str(rcount[0]) + " " + str(rcount[1]) + " " + str(rcount[2]) ) print mssg log.info(mssg) return results
def summary_results(rawResults, strsearch, logic_items=[]): results = [] titles = [] sptitle_collection = [] #~ all in one array for provid in xrange(len(rawResults)): for z in xrange(len(rawResults[provid])): rawResults[provid][z]['title'] = SearchModule.sanitize_html( rawResults[provid][z]['title']) title = SearchModule.sanitize_strings( rawResults[provid][z]['title']) titles.append(title) sptitle_collection.append(Set(title.split("."))) results.append(rawResults[provid][z]) strsearch1 = SearchModule.sanitize_strings(strsearch) strsearch1_collection = Set(strsearch1.split(".")) rcount = [0] * 3 for z in xrange(len(results)): findone = 0 results[z]['ignore'] = 0 intrs = strsearch1_collection.intersection(sptitle_collection[z]) if (len(intrs) == len(strsearch1_collection)): findone = 1 else: results[z]['ignore'] = 2 #~ print strsearch1_collection #~ print intrs #~ print findone #~ print '------------------' if (findone and results[z]['ignore'] == 0): #~ print titles[z] for v in xrange(z + 1, len(results)): if (titles[z] == titles[v]): sz1 = float(results[z]['size']) sz2 = float(results[v]['size']) if (abs(sz1 - sz2) < 5000000): results[z]['ignore'] = 1 #~ stats rcount[results[z]['ignore']] += 1 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ #~ logic params exclude_coll = Set([]) include_coll = Set([]) #~ print '*'+logic_items[0][1]+'*' for i in xrange(len(logic_items)): if (logic_items[i][0] == '-'): exclude_coll.add(logic_items[i][1]) if (logic_items[i][0] == '+'): include_coll.add(logic_items[i][1]) if (len(include_coll)): for z in xrange(len(results)): if (results[z]['ignore'] < 2): intrs_i = include_coll.intersection(sptitle_collection[z]) if (len(intrs_i) == 0): results[z]['ignore'] = 2 if (len(exclude_coll)): for z in xrange(len(results)): if (results[z]['ignore'] < 2): intrs_e = exclude_coll.intersection(sptitle_collection[z]) if (len(intrs_e) > 0): results[z]['ignore'] = 2 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ mssg = '[' + strsearch1 + ']' + ' [' + strsearch + '] ' + str( rcount[0]) + ' ' + str(rcount[1]) + ' ' + str(rcount[2]) print mssg log.info(mssg) return results
def dosearch(self, args): # ~ restore originals self.cfg = copy.deepcopy(self.cfg_cpy) if ('q' not in args): self.results = [] return self.results nuqry = args['q'] + ' ' + self.cgen['searchaddontxt'] self.logic_items = self.logic_expr.findall(nuqry) self.qry_nologic = self.logic_expr.sub(" ", nuqry) if ('selcat' in args): if (args['selcat'] != ""): self.qry_nologic += " " + args['selcat'] # ~ speed class speed_class_sel = 1 if ('tm' in args): speed_class_sel = int(args['tm']) # ~ speed class deepsearch self.ds.set_extraopt(speed_class_sel, 'manual') # ~ speed class Nabbased self.set_timeout_speedclass(speed_class_sel) # ~ manual search Nabbased self.set_extraopt() if (len(args['q']) == 0): if ('selcat' in args): if (len(args['selcat']) == 0): self.results = [] return self.results else: self.results = [] return self.results if (self.qry_nologic.replace(" ", "") == ""): self.results = [] return self.results log.info('TYPE OF SEARCH: ' + str(speed_class_sel)) self.cleancache() # ~ cache hit, no server report cachehit = True self.returncode_fine['code'] = 2 self.resultsraw = self.chkforcache(self.wrp.chash64_encode(SearchModule.sanitize_strings(self.qry_nologic)), speed_class_sel) if (self.resultsraw is None): self.resultsraw = SearchModule.performSearch(self.qry_nologic, self.cfg, self.ds) cachehit = False if (self.cgen['smartsearch'] == 1): # ~ smartsearch self.res_results = {} self.results = summary_results(self.resultsraw, self.qry_nologic, self.logic_items, self.res_results) else: # ~ no cleaning just flatten in one array self.results = [] self.res_results = {} for provid in xrange(len(self.resultsraw)): if (len(self.resultsraw[provid])): self.res_results[str(self.resultsraw[provid][0]['providertitle'])] = [len(self.resultsraw[provid]), 0] for provid in xrange(len(self.resultsraw)): for z in xrange(len(self.resultsraw[provid])): if (self.resultsraw[provid][z]['title'] != None): self.results.append(self.resultsraw[provid][z]) # ~ server status output if (cachehit == False): self.prepareretcode();
def ask(self, arguments): self.args = arguments self.search_str = SearchModule.sanitize_strings(self.args['q']).replace(".", " ") movieinfo = self.imdb_titlemovieinfo() sugg_info_raw = self.movie_bestmatch(movieinfo) self.sugg_info = self.prepareforquery(sugg_info_raw)
def summary_results(rawResults, strsearch, logic_items=[], results_stats={}): results = [] titles = [] sptitle_collection = [] # ~ stats for each provider for provid in xrange(len(rawResults)): if (len(rawResults[provid])): results_stats[str(rawResults[provid][0]['providertitle'])] = [len(rawResults[provid]), 0] # ~ all in one array for provid in xrange(len(rawResults)): for z in xrange(len(rawResults[provid])): if (rawResults[provid][z]['title'] != None): rawResults[provid][z]['title'] = SearchModule.sanitize_html(rawResults[provid][z]['title']) rawResults[provid][z]['provid'] = provid title = SearchModule.sanitize_strings(rawResults[provid][z]['title']) titles.append(title) sptitle_collection.append(Set(title.split("."))) results.append(rawResults[provid][z]) strsearch1 = SearchModule.sanitize_strings(strsearch) strsearch1_collection = Set(strsearch1.split(".")) rcount = [0] * 3 for z in xrange(len(results)): findone = 0 results[z]['ignore'] = 0 intrs = strsearch1_collection.intersection(sptitle_collection[z]) if (len(intrs) == len(strsearch1_collection)): findone = 1 else: results[z]['ignore'] = 2 # ~ relax the search ~ 0.45 unmatched_terms_search = strsearch1_collection.difference(intrs) unmatched_count = 0 for mst in unmatched_terms_search: my_list = [i for i in sptitle_collection[z] if i.find(mst) == 0] if (len(my_list)): unmatched_count = unmatched_count + 1 if (unmatched_count == len(unmatched_terms_search)): findone = 1 results[z]['ignore'] = 0 # ~ print unmatched_terms_search # ~ print unmatched_count # ~ print unmatched_terms_search # ~ print strsearch1_collection # ~ print intrs # ~ print findone # ~ print '------------------' if (findone and results[z]['ignore'] == 0): # ~ print titles[z] for v in xrange(z + 1, len(results)): if (titles[z] == titles[v]): sz1 = float(results[z]['size']) sz2 = float(results[v]['size']) if (abs(sz1 - sz2) < 5000000): results[z]['ignore'] = 1 # ~ stats rcount[results[z]['ignore']] += 1 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ # ~ logic params exclude_coll = Set([]) include_coll = Set([]) # ~ print '*'+logic_items[0][1]+'*' for i in xrange(len(logic_items)): if (logic_items[i][0] == '-'): exclude_coll.add(logic_items[i][1]) if (logic_items[i][0] == '+'): include_coll.add(logic_items[i][1]) if (len(include_coll)): for z in xrange(len(results)): if (results[z]['ignore'] < 2): intrs_i = include_coll.intersection(sptitle_collection[z]) if (len(intrs_i) == 0): results[z]['ignore'] = 2 if (len(exclude_coll)): for z in xrange(len(results)): if (results[z]['ignore'] < 2): intrs_e = exclude_coll.intersection(sptitle_collection[z]) if (len(intrs_e) > 0): results[z]['ignore'] = 2 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ mssg = 'Overall search stats: [' + strsearch1 + ']' + ' [' + strsearch + '] ' + str(rcount[0]) + ' ' + str( rcount[1]) + ' ' + str(rcount[2]) log.info(mssg) for z in xrange(len(results)): if (results[z]['ignore'] != 2): results_stats[str(results[z]['providertitle'])][1] = results_stats[str(results[z]['providertitle'])][1] + 1 return results