Python parserMAIN Exemples, modules.parsers.parserMAIN Python Exemples

Exemple #1

0

Afficher le fichier

def search_google_(target):
    engine = Google()
    results = engine.search("'" + target + "'")
    for r in results:
        print("|")
        print("|----[INFO][GOOGLE][RESULTS][>] " + r["title"])
        print("|----[INFO][GOOGLE][RESULTS][DESCRIPTION][>] " + r["text"])
        print("|----[INFO][GOOGLE][RESULTS][LINK][>] " + r["link"])

        try:
            tsd, td, tsu = extract(r["link"])
            domain = td + '.' + tsu

            spain_newspaper = open("data/newspaper/spain-newspaper.txt", "r")

            for news in spain_newspaper:

                if domain == news.strip():

                    newspaper.news_parser(r["link"], target)

            else:
                if not domain in config.BL_parserPhone:

                    web = requests.get(r["link"], timeout=3)

                    if web.status_code >= 200 or web.status_code < 300:

                        TEXT = er.remove_tags(str(web.text))
                        parser.parserMAIN(TEXT)

            print("|")

        except Exception as e:
            print("|----[ERROR][HTTP CONNECTION][>] " + str(e))

Exemple #2

0

Afficher le fichier

Fichier : BuscadorPersonas.py Projet : 3v1lW1th1n/osint-suite-tools

def search_google_(target):
    engine = Google()
    results = engine.search("'" + target + "'")
    for r in results:
        print ("|--[INFO][GOOGLE][RESULTS][>] " + r["title"] + " | " + r["text"] + " | " + r["link"])
        
        try:
            web = requests.get(r["link"], timeout=3)
            print ("|----[INFO][WEB][HTTP CODE][>] " + str(web.status_code) + "\n")
            if web.status_code >= 200 or web.status_code < 300:
                TEXT = er.remove_tags(str(web.text))
                parser.parserMAIN(TEXT)

        except Exception as e:
            print ("|----[ERROR][HTTP CONNECTION][>] " + str(e))

Exemple #3

0

Afficher le fichier

def search_DDG_DORKS(TITLE, TEXT_0):

    engine = Duckduckgo()
    for FC_domain in config.FC_list:

        results = engine.search(f"site:{FC_domain} {TITLE}")
        for r in results:
            print("|--[INFO][GOOGLE][RESULTS][>] " + r["title"] + " | " +
                  r["text"] + " | " + r["link"])

            try:

                tsd, td, tsu = extract(r["link"])
                domain = td + '.' + tsu

                web = requests.get(r["link"], timeout=3)
                print("|----[INFO][WEB][HTTP CODE][>] " +
                      str(web.status_code) + "\n")

                if web.status_code >= 200 or web.status_code < 300:
                    if ".pdf" in r["link"]:
                        pass
                    else:
                        if not domain in config.BL_parserPhone:
                            TEXT = er.remove_tags(str(web.text))

                            compareTEXT(TEXT, TEXT_0)
                            parser.FC_words_in_text(TEXT)
                            parser.parserMAIN(TEXT)

                            ratio = compareTEXT(TEXT_0, TEXT)
                            print(
                                f"|----[INFO][COMPARE TEXTS][>] Ratio: {ratio}"
                            )

                            #Guardamos la info en un log
                            data = f"{r['title']} ||| {r['link']} ||| {r['text']}, ||| {ratio} \n"
                            generateLOG(data, target)

                        else:
                            pass
                print("")
                time.sleep(2)

            except Exception as e:
                print("|----[ERROR][HTTP CONNECTION][>] " + str(e))

Exemple #4

0

Afficher le fichier

def main():

    #Imprimimos el banner principal
    print(config.banner)

    #Insertamos la URL a buscar
    url = input("Insert URL: ")

    #Obtenemos el HTML
    HTML = requests.get(url)

    #Obtenemos el título
    TITLE = footprintingWEB_TITLE(HTML)

    #Obtenemos la descripción
    DESC = er.remove_tags(str(footprintingWEB_DESC(HTML)))

    print(f"|----[TARGET][>] {url}")
    print(f"|--------[TARGET][TITLE][>] {TITLE}")
    print(f"|--------[TARGET][DESCRIPTION][>] {DESC}")
    time.sleep(2)

    #Obtenemos el texto de la noticia
    TEXT_0 = er.remove_tags(str(HTML.text))

    #buscamos una fecha en la URL
    DATE = parser.parser_EN_DATE(url)

    #Parseamos y obtenemos diferentes tipos de datos
    parser.parserMAIN(TEXT_0)
    time.sleep(3)

    #Buscamos en Google y DuckDuckGo
    print("|----[INFO][>] Now let's look for other news: \n")

    m = input("Do you want to search the original web? (Y/n): ")

    if m == "y" or m == "Y":
        search_google_(TITLE, TEXT_0)
        search_DDG_(TITLE, TEXT_0)
    else:
        pass

    #Buscamos en plataformas de verificación
    m = input("Do you want to analyze in fact-checking platforms? (Y/n): ")

    if m == "y" or m == "Y":

        #Buscamos con dorks en DDG
        search_DDG_DORKS(TITLE, TEXT_0)

    else:

        exit

    #Buscamos en Twitter
    m = input("Do you want to search in Twitter? (Y/n): ")

    if m == "y" or m == "Y":

        #Buscamos con dorks en DDG
        Twint.search_Twitter(url)

    else:

        exit