def update_term(self, term, stream_name): key = ndb.Key('SearchTerm', term) search_term = key.get() if not search_term: search_term = SearchTerm(term=term, stream_names=[stream_name]) else: terms = search_term.stream_names.append(stream_name) search_term.stream_names = list(set(terms)) search_term.put()
def store(request, q): from stats import stats # if search term is at least three chars long, store in db if len(q) > 2: term = SearchTerm() term.q = q term.tracking_id = stats.tracking_id(request) term.ip_address = request.META.get('REMOTE_ADDR') term.user = None if request.user.is_authenticated(): term.user = request.user term.save()
def get(self): term = self.request.get('term') terms = map(lambda t: t.term, SearchTerm.query().order(SearchTerm.term).fetch()) matches = list(set(filter(lambda t: term in t, terms))) self.response.headers['Content-Type'] = 'application/json' self.response.out.write(json.dumps({"matches": matches}))
def store(request, q): if len(q) > 2: term = SearchTerm() term.q = q term.ip_address = request.META.get('REMOTE_ADDR') term.tracking_id = stats.tracking_id(request) term.user = None if request.user.is_authenticated(): term.user = request.user term.save()
def update_search_stats(query, browser): """ Here we update the stats in the database for the SearchTerm and the browser. We start by searching for our SearchTerm and browser in the database. In Part 1 we see if the SearchTerm already exists, and if so, we add 1 to its counter. If not, we create a new SearchTerm in the database and set its count to 1. In Part 2 we see if the SearchBrowser already exists, and if so, we add 1 to its counter. If not, we create a new SearchBrowser in the database and set its count to 1. """ SearchTerm_exists=SearchTerm.objects.filter(name=query).exists() SearchBrowser_exists=SearchBrowser.objects.filter(name=browser).exists() #1.) The SearchTerm #if the search term already exists in the database if SearchTerm_exists == True: newSearchTerm=SearchTerm.objects.get(name=query) #find the count count=newSearchTerm.count #increase the count by 1 count+=1 newSearchTerm.count=count newSearchTerm.save() #if the search term does not exist yet elif (SearchTerm_exists == False): newSearchTerm=SearchTerm(name=query) newSearchTerm.count=1 newSearchTerm.save() #2.) The SearchBrowser #if the browser already exists in the database if SearchBrowser_exists == True: newSearchBrowser=SearchBrowser.objects.get(name=browser) #find the count count=newSearchBrowser.count #increase the count by 1 count+=1 newSearchBrowser.count=count newSearchBrowser.save() #if the browser does not exist yet elif SearchBrowser_exists == False: newSearchBrowser=SearchBrowser(name=browser) newSearchBrowser.count=1 newSearchBrowser.save()
def store(request, q): # if search term is at least three chars long, store in db if len(q) > 2: term = SearchTerm() term.q = q term.ip_address = request.META.get('REMOTE_ADDR') term.tracking_id = stats.tracking_id(request) term.user = None if request.user.is_authenticated(): term.user = request.user term.save()
def store(request, q, matching): """ stores the search text """ # if search term is at least three chars long, store in db if len(q) > 1: term = SearchTerm() term.q = q term.ip_address = request.META.get('REMOTE_ADDR') term.tracking_id = stats.__tracking_id(request) term.user = None if request.user.is_authenticated(): term.user = request.user # salvamos para que no nos salga el error: <SearchTerm: inst> need to have a value for field "searchterm" # before this many-to-many relationship can be used. term.save() for match in matching: term.found_products.add(match)
def store(request ,q): # if the search term is atleast three chars long , store in the if len(q)>2: term = SearchTerm() term.q = q term.user = None if request.user.is_authenticated(): term.user = request.user term.save()
def process_search(): search_query = request.GET.get('search_query', '').strip() query = search_query.lower() show_daverank = False results = False number_pages = 10 number_videos = 5 #Move this stuff to its own procedure tomorrow! if query.find('--') == 0: if query.find('--forum') == 0: redirect_url = 'http://www.udacity-forums.com/cs101/search/?q=' + urllib.quote( query[8:]) return redirect(redirect_url) if query.find('--cs373') == 0: redirect_url = 'http://www.udacity-forums.com/cs373/search/?q=' + urllib.quote( query[8:]) return redirect(redirect_url) if query.find('--python') == 0: redirect_url = 'http://docs.python.org/search.html?q=' + urllib.quote( query[9:]) return redirect(redirect_url) if query.find('--searchwithpeterdotinfo') == 0: redirect_url = 'http://searchwithpeter.info/secretplans.html?q=' + urllib.quote( query[25:]) return redirect(redirect_url) if query.find('--showmore') == 0: query = query[11:] search_query = query number_pages = 20 number_videos = 10 if query.find('--daverank') == 0: query = query[11:] search_query = query show_daverank = True if query.find('python') == 0: pyquery = query[7:] else: pyquery = query ddgurl_root = 'http://duckduckgo.com/?q=python+' ddgurl_suffix = urllib.quote(pyquery) + '&format=json' response = urllib.urlopen(ddgurl_root + ddgurl_suffix) response_json = response.read() pythonterm = json.loads(response_json) if pythonterm: pyterm_info = {} if pythonterm['AbstractSource'] == 'Python Documentation': pyterm = BeautifulSoup(pythonterm['AbstractText']) try: pyterm_code = pyterm.find('code').string pyterm.pre.decompose() pyterm_info['code'] = pyterm_code except: pyterm_info['code'] = None pyterm_desc = pyterm.get_text() pyterm_info['desc'] = pyterm_desc pyterm_info['url'] = pythonterm['AbstractURL'] results = True else: pyterm_info = None query_words = query.split() for word in query_words: if word in stopwords: query_words.remove(word) query_urls = [] for term in query_words: # Get all SearchTerm objects that match the search_query. q = SearchTerm.all().filter('term =', term).get() if q: query_urls.append(set(q.urls)) if query_urls: query_url_set = set.intersection(*query_urls) query_url_list = list(query_url_set) if len(query_url_list) > 0: results = True if len(query_url_list) > 30: query_url_list = query_url_list[0:30] page_results = Page.all().filter( 'url IN', query_url_list).order('-dave_rank').fetch(number_pages) page_dicts = [] for page in page_results: page_info = {} query_index = page.text.find(query) if query_index != -1: i = page.text.find(' ', query_index - 25) excerpt_words = page.text[i:].split(' ') page_info['exact_match'] = True else: excerpt_words = page.text.split(' ') page_info['exact_match'] = False excerpt = ' '.join(excerpt_words[:50]) page_info['text'] = excerpt page_info['title'] = page.title page_info['url'] = page.url page_info['daverank'] = page.dave_rank page_info['doc'] = page.doc page_dicts.append(page_info) page_dicts.sort(key=itemgetter('exact_match'), reverse=True) video_results = Video.all().filter( 'url IN', query_url_list).order('-views').fetch(number_videos) video_dicts = [] for video in video_results: video_info = {} subtitles = video.text.lower() query_index = subtitles.find(query) time_string = '' if query_index != -1: subtitle_list = subtitles.splitlines() for phrase in subtitle_list: if phrase.find(query) != -1: timestamp_index = subtitle_list.index(phrase) - 1 timestamp = subtitle_list[timestamp_index] if len(timestamp) > 1: minutes = timestamp[3:5] seconds = timestamp[6:8] time_string = '#t=' + minutes + 'm' + seconds + 's' start = 60 * int(minutes) + int(seconds) if time_string: url = video.url + time_string video_info['exact_match'] = True else: url = video.url start = 0 video_info['exact_match'] = False video_info['title'] = video.title video_info['url'] = url video_info['subtitle'] = video.text[-20:query_index:20] video_info['id'] = video.id video_info['start'] = start video_dicts.append(video_info) video_dicts.sort(key=itemgetter('exact_match'), reverse=True) else: page_dicts = None video_dicts = None query_string_words = query.split() return template('templates/results', search_query=search_query, query_string_words=query_string_words, page_dicts=page_dicts, video_dicts=video_dicts, pyterm_info=pyterm_info, show_daverank=show_daverank, results=results)
def process_search(): search_query = request.GET.get("search_query", "").strip() query = search_query.lower() show_daverank = False results = False number_pages = 10 number_videos = 5 # Move this stuff to its own procedure tomorrow! if query.find("--") == 0: if query.find("--forum") == 0: redirect_url = "http://www.udacity-forums.com/cs101/search/?q=" + urllib.quote(query[8:]) return redirect(redirect_url) if query.find("--cs373") == 0: redirect_url = "http://www.udacity-forums.com/cs373/search/?q=" + urllib.quote(query[8:]) return redirect(redirect_url) if query.find("--python") == 0: redirect_url = "http://docs.python.org/search.html?q=" + urllib.quote(query[9:]) return redirect(redirect_url) if query.find("--searchwithpeterdotinfo") == 0: redirect_url = "http://searchwithpeter.info/secretplans.html?q=" + urllib.quote(query[25:]) return redirect(redirect_url) if query.find("--showmore") == 0: query = query[11:] search_query = query number_pages = 20 number_videos = 10 if query.find("--daverank") == 0: query = query[11:] search_query = query show_daverank = True if query.find("python") == 0: pyquery = query[7:] else: pyquery = query ddgurl_root = "http://duckduckgo.com/?q=python+" ddgurl_suffix = urllib.quote(pyquery) + "&format=json" response = urllib.urlopen(ddgurl_root + ddgurl_suffix) response_json = response.read() pythonterm = json.loads(response_json) if pythonterm: pyterm_info = {} if pythonterm["AbstractSource"] == "Python Documentation": pyterm = BeautifulSoup(pythonterm["AbstractText"]) try: pyterm_code = pyterm.find("code").string pyterm.pre.decompose() pyterm_info["code"] = pyterm_code except: pyterm_info["code"] = None pyterm_desc = pyterm.get_text() pyterm_info["desc"] = pyterm_desc pyterm_info["url"] = pythonterm["AbstractURL"] results = True else: pyterm_info = None query_words = query.split() for word in query_words: if word in stopwords: query_words.remove(word) query_urls = [] for term in query_words: # Get all SearchTerm objects that match the search_query. q = SearchTerm.all().filter("term =", term).get() if q: query_urls.append(set(q.urls)) if query_urls: query_url_set = set.intersection(*query_urls) query_url_list = list(query_url_set) if len(query_url_list) > 0: results = True if len(query_url_list) > 30: query_url_list = query_url_list[0:30] page_results = Page.all().filter("url IN", query_url_list).order("-dave_rank").fetch(number_pages) page_dicts = [] for page in page_results: page_info = {} query_index = page.text.find(query) if query_index != -1: i = page.text.find(" ", query_index - 25) excerpt_words = page.text[i:].split(" ") page_info["exact_match"] = True else: excerpt_words = page.text.split(" ") page_info["exact_match"] = False excerpt = " ".join(excerpt_words[:50]) page_info["text"] = excerpt page_info["title"] = page.title page_info["url"] = page.url page_info["daverank"] = page.dave_rank page_info["doc"] = page.doc page_dicts.append(page_info) page_dicts.sort(key=itemgetter("exact_match"), reverse=True) video_results = Video.all().filter("url IN", query_url_list).order("-views").fetch(number_videos) video_dicts = [] for video in video_results: video_info = {} subtitles = video.text.lower() query_index = subtitles.find(query) time_string = "" if query_index != -1: subtitle_list = subtitles.splitlines() for phrase in subtitle_list: if phrase.find(query) != -1: timestamp_index = subtitle_list.index(phrase) - 1 timestamp = subtitle_list[timestamp_index] if len(timestamp) > 1: minutes = timestamp[3:5] seconds = timestamp[6:8] time_string = "#t=" + minutes + "m" + seconds + "s" start = 60 * int(minutes) + int(seconds) if time_string: url = video.url + time_string video_info["exact_match"] = True else: url = video.url start = 0 video_info["exact_match"] = False video_info["title"] = video.title video_info["url"] = url video_info["subtitle"] = video.text[-20:query_index:20] video_info["id"] = video.id video_info["start"] = start video_dicts.append(video_info) video_dicts.sort(key=itemgetter("exact_match"), reverse=True) else: page_dicts = None video_dicts = None query_string_words = query.split() return template( "templates/results", search_query=search_query, query_string_words=query_string_words, page_dicts=page_dicts, video_dicts=video_dicts, pyterm_info=pyterm_info, show_daverank=show_daverank, results=results, )