Example #1
0
 def search(self,
            search_term: Text) -> WebInfo:  ##[('Titulo','Resumen','URL')]
     try:
         html_text = Browser().getHTML(
             "https://www.metacrawler.com/serp?q=" +
             search_term.replace(' ', '+'))
         html = tree.HTML(html_text)
         titles = reduceXpath(
             html,
             "//div[@class='web-bing__result']//a[@class='web-bing__title']"
         )
         titles = [normalize(t.xpath("string()")) for t in titles]
         contents = reduceXpath(
             html,
             "//div[@class='web-bing__result']//span[@class='web-bing__description']"
         )
         contents = [normalize(x.xpath("string()")) for x in contents]
         links = html.xpath(
             "//div[@class='web-bing__result']//a[@class='web-bing__title']"
         )
         links = [findurl(l.get('href')) for l in links]
         links = [l for l in links if not (l is None)]
         results = zip3(titles, contents, links, '')
         return results
     except:
         print("Metacrawle Fallo")
         pass
     return None
Example #2
0
 def search(self,
            search_term: Text) -> WebInfo:  #Optional[(Titulo,Conten,Url)]
     try:
         html_text = Browser().getHTML("https://www.ask.com/web?q=" +
                                       search_term.replace(' ', '+'))
         html = tree.HTML(html_text)
         title = reduceXpath(
             html,
             "//div[@class='PartialSearchResults-body']//div[@class='PartialSearchResults-item']//a"
         )
         title = [normalize(t.xpath("string(//a)")) for t in title]
         contents = reduceXpath(
             html,
             "//div[@class='PartialSearchResults-body']//div[@class='PartialSearchResults-item']//p[@class='PartialSearchResults-item-abstract']"
         )
         contents = [normalize(x.xpath("string(//p)")) for x in contents]
         links = html.xpath(
             "//div[@class='PartialSearchResults-body']//div[@class='PartialSearchResults-item']//a"
         )
         links = [findurl(l.get('href')) for l in links]
         links = [l for l in links if not (l is None)]
         results = zip3(title, contents, links, '')
         return results
     except:
         print("Ask Fallo")
         pass
     return None
Example #3
0
def cleanTrivia(
    trivia: Trivia
) -> Optional[Tuple[Text, List[Text], List[List[Text]], bool]]:

    if trivia is None:
        return None

    pregunta, opciones = trivia

    # Proceso la pregunta
    words_question = normalize(pregunta).replace('?', '')
    token_question = tokenize(words_question).split(' ')

    if token_question[0] == 'pregunta' and token_question[1] in range(1, 13):
        token_question = token_question[2:]

    # Proceso las opciones
    words_option = [normalize(x) for x in opciones]
    words_option = [tokenize(l) for l in words_option]
    token_option = [l.split(' ') for l in words_option]
    for i in range(len(token_option)):
        if token_option[i][0] in [
                '1', '2', '3', '4', '5', '6', '7', '8', '9', '0'
        ]:
            token_option[i] = token_option[i][1:]
    # Modificar en caso de realizar analisis mas complejos
    token_option = [list(set(l)) for l in token_option]

    query = ' '.join(token_question)
    return (query, token_question, token_option, ' no ' in pregunta
            or ' not ' in pregunta)
Example #4
0
 def search(self, search_term: Text) -> WebInfo:  # [('Titulo','Resumen','URL')]
     try:
         html_text = Browser().getHTML("https://www.bing.com/search?q=" + search_term.replace(' ', '+'))
         html = tree.HTML(html_text)
         titles = reduceXpath(html, "//ol[@id='b_results']//li[@class='b_algo']//h2//a")
         titles = [normalize(x.xpath("string(//a)")) for x in titles]
         contents = reduceXpath(html, "//ol[@id='b_results']//li[@class='b_algo']//p")
         contents = [normalize(x.xpath("string(//p)")) for x in contents]
         links = reduceXpath(html, "//ol[@id='b_results']//li[@class='b_algo']//h2//a")
         links = [findurl(l.get('href')) for l in links]
         links = [l for l in links if not (l is None)]
         results = zip3(titles, contents, links, '')
         return results
     except:
         print("Bing Fallo")
         pass
     return None
Example #5
0
def getTrivia(shaper: ImageShape, ocr: OCR) -> Optional[Trivia]:
    # Nombre que no choca con nada
    file_pregunta = str(
        str(os.getcwd()) + '/' + str(random.randint(1, 10001)) +
        'runtimecreationtoremove_question_file.png')
    file_opciones = str(
        str(os.getcwd()) + '/' + str(random.randint(1, 10001)) +
        'runtimecreationtoremove_options_file.png')
    # Corto la imagen
    shaper.shapeImage(file_pregunta, file_opciones)

    # Extraigo el texto
    pre_text = ocr.getText(file_pregunta)
    opt_text = ocr.getText(file_opciones)

    # Remuevo el archivo creado por cutImage
    os.remove(file_pregunta)
    os.remove(file_opciones)

    if (pre_text is None) or (opt_text is None):
        return None

    # Limpio las listas de strings
    # Pregunta
    pre_text = normalize(pre_text)
    pre_txt = str(pre_text).split('?')
    while ('' in pre_txt):
        pre_txt.remove('')
    pre_text = pre_txt[0]
    pre_text = pre_text.replace('\n', ' ') + '?'

    # Opciones
    opt_text = opt_text.replace('\n', '\t')
    opt_text = normalize(opt_text)

    # En caso de que ocr halla leido 'Pregunta N'
    for nu in range(1, 13):
        prg = 'pregunta ' + str(nu)
        if pre_text.count(prg) > 0:
            pre_text = pre_text.replace(prg, '')
            break
    opt_txt = str(opt_text).split('\t')
    while ('' in opt_txt):
        opt_txt.remove('')

    return (pre_text, opt_txt)
Example #6
0
 def search(self, search_term: Text) -> WebInfo:
     try:
         html_text = Browser().getHTML(
             "https://www.google.com.ar/search?q=" +
             search_term.replace(' ', '+'))
         html = tree.HTML(html_text)
         titles = html.xpath("//div[@class='g']//h3[@class='r']//a")
         titles = [normalize(t.xpath('string()')) for t in titles]
         contents = reduceXpath(
             html, "//div[@class='g']//div[@class='s']//span[@class='st']")
         contents = [
             normalize(x.xpath("string()").replace('\n', ''))
             for x in contents
         ]
         links = html.xpath("//div[@class='g']//h3[@class='r']//a")
         links = [findurl(l.get('href')) for l in links]
         links = [l for l in links if not (l is None)]
         results = zip3(titles, contents, links, '')
         return results
     except:
         print("Google Fallo")
         pass
     return None