def search_google_(target): engine = Google() results = engine.search("'" + target + "'") for r in results: print("|") print("|----[INFO][GOOGLE][RESULTS][>] " + r["title"]) print("|----[INFO][GOOGLE][RESULTS][DESCRIPTION][>] " + r["text"]) print("|----[INFO][GOOGLE][RESULTS][LINK][>] " + r["link"]) try: tsd, td, tsu = extract(r["link"]) domain = td + '.' + tsu spain_newspaper = open("data/newspaper/spain-newspaper.txt", "r") for news in spain_newspaper: if domain == news.strip(): newspaper.news_parser(r["link"], target) else: if not domain in config.BL_parserPhone: web = requests.get(r["link"], timeout=3) if web.status_code >= 200 or web.status_code < 300: TEXT = er.remove_tags(str(web.text)) parser.parserMAIN(TEXT) print("|") except Exception as e: print("|----[ERROR][HTTP CONNECTION][>] " + str(e))
def search_google_(target): engine = Google() results = engine.search("'" + target + "'") for r in results: print ("|--[INFO][GOOGLE][RESULTS][>] " + r["title"] + " | " + r["text"] + " | " + r["link"]) try: web = requests.get(r["link"], timeout=3) print ("|----[INFO][WEB][HTTP CODE][>] " + str(web.status_code) + "\n") if web.status_code >= 200 or web.status_code < 300: TEXT = er.remove_tags(str(web.text)) parser.parserMAIN(TEXT) except Exception as e: print ("|----[ERROR][HTTP CONNECTION][>] " + str(e))
def search_DDG_DORKS(TITLE, TEXT_0): engine = Duckduckgo() for FC_domain in config.FC_list: results = engine.search(f"site:{FC_domain} {TITLE}") for r in results: print("|--[INFO][GOOGLE][RESULTS][>] " + r["title"] + " | " + r["text"] + " | " + r["link"]) try: tsd, td, tsu = extract(r["link"]) domain = td + '.' + tsu web = requests.get(r["link"], timeout=3) print("|----[INFO][WEB][HTTP CODE][>] " + str(web.status_code) + "\n") if web.status_code >= 200 or web.status_code < 300: if ".pdf" in r["link"]: pass else: if not domain in config.BL_parserPhone: TEXT = er.remove_tags(str(web.text)) compareTEXT(TEXT, TEXT_0) parser.FC_words_in_text(TEXT) parser.parserMAIN(TEXT) ratio = compareTEXT(TEXT_0, TEXT) print( f"|----[INFO][COMPARE TEXTS][>] Ratio: {ratio}" ) #Guardamos la info en un log data = f"{r['title']} ||| {r['link']} ||| {r['text']}, ||| {ratio} \n" generateLOG(data, target) else: pass print("") time.sleep(2) except Exception as e: print("|----[ERROR][HTTP CONNECTION][>] " + str(e))
def main(): #Imprimimos el banner principal print(config.banner) #Insertamos la URL a buscar url = input("Insert URL: ") #Obtenemos el HTML HTML = requests.get(url) #Obtenemos el título TITLE = footprintingWEB_TITLE(HTML) #Obtenemos la descripción DESC = er.remove_tags(str(footprintingWEB_DESC(HTML))) print(f"|----[TARGET][>] {url}") print(f"|--------[TARGET][TITLE][>] {TITLE}") print(f"|--------[TARGET][DESCRIPTION][>] {DESC}") time.sleep(2) #Obtenemos el texto de la noticia TEXT_0 = er.remove_tags(str(HTML.text)) #buscamos una fecha en la URL DATE = parser.parser_EN_DATE(url) #Parseamos y obtenemos diferentes tipos de datos parser.parserMAIN(TEXT_0) time.sleep(3) #Buscamos en Google y DuckDuckGo print("|----[INFO][>] Now let's look for other news: \n") m = input("Do you want to search the original web? (Y/n): ") if m == "y" or m == "Y": search_google_(TITLE, TEXT_0) search_DDG_(TITLE, TEXT_0) else: pass #Buscamos en plataformas de verificación m = input("Do you want to analyze in fact-checking platforms? (Y/n): ") if m == "y" or m == "Y": #Buscamos con dorks en DDG search_DDG_DORKS(TITLE, TEXT_0) else: exit #Buscamos en Twitter m = input("Do you want to search in Twitter? (Y/n): ") if m == "y" or m == "Y": #Buscamos con dorks en DDG Twint.search_Twitter(url) else: exit