def GoogleSearch(self, site_url, srch_term, srch_kywrds="", page=1): # Checks only the first search result # if match is more than 65%...valid result valid_results = [] from entertainment.xgoogle.search import GoogleSearch search_url = "site:" + site_url + " " + srch_term + " " + srch_kywrds gs = GoogleSearch(search_url) gs.results_per_page = 20 gs.page = page - 1 title_words = (srch_term + " " + srch_kywrds).lower().split(" ") for result in gs.get_results(): result_title = (result.title + " " + result.url).lower() match_total = float(len(title_words)) match_count = 0 for title_word in title_words: if title_word in result_title: match_count = match_count + 1 if (match_count / match_total) > 0.65: valid_results.append({"title": result.title, "url": result.url}) return valid_results
def GoogleSearchByTitleReturnFirstResultOnlyIfValid( self, site_url, title, srch_kywrds="", item_count=1, title_extrctr="", exact_match=False, use_site_prefix=True, return_dict=False, ): # Checks for the first valid result in the fetched number of items # if item starts with search term # and match is more than 65%...valid result return_url = "" from entertainment.xgoogle.search import GoogleSearch if title_extrctr != "": import re search_url = "site:" if use_site_prefix == True else "" search_url = search_url + site_url + " " + title + " " + srch_kywrds gs = GoogleSearch(search_url) gs.results_per_page = item_count title_lower = title.lower().strip() title_words = title_lower.split(" ") for result in gs.get_results(): result_title = result.title.lower() if str(title_extrctr) != "": if isinstance(title_extrctr, list): for ttlextrct in title_extrctr: result_title_re = re.search(ttlextrct, result_title) if result_title_re: result_title = result_title_re.group(1) break else: result_title_re = re.search(title_extrctr, result_title) if result_title_re: result_title = result_title_re.group(1) else: continue if exact_match == True: if result_title == title_lower or result_title.replace("'", "") == title_lower.replace("'", ""): if return_dict: return_url = result else: return_url = result.url break else: continue if not result_title.startswith(title_lower) and not title_lower.startswith(result_title): continue match_total = float(len(title_words)) match_count = 0 for title_word in title_words: if title_word in result_title: match_count = match_count + 1 match_fraction = match_count / match_total if (match_total == 2 and match_fraction >= 0.5) or (match_fraction > 0.65): if return_dict: return_url = result else: return_url = result.url break if not return_url and use_site_prefix == True: return_url = self.GoogleSearchByTitleReturnFirstResultOnlyIfValid( site_url, title, srch_kywrds, item_count, title_extrctr, exact_match, use_site_prefix=False, return_dict=return_dict, ) return return_url