Exemple #1
0
def imagenes(request):
    buscador = []
    if request.method == 'POST':
        tags_to_delete = request.POST.getlist('tags')
        tags_to_add = request.POST.getlist('Tagx')
        data_id = request.POST.getlist('id')
        buscador = request.POST.getlist('buscador')
        if len(tags_to_delete) > 0:
            data = ScrapedImage.objects.filter(id=data_id[0])
            existing_tags = separar(data[0].tags)
            new_tags = delete_tags_from_img(tags_to_delete, existing_tags)
            new_tags_str = juntar(new_tags)
            ScrapedImage.objects.filter(id=data_id[0]).update(tags=new_tags_str)
        # ADD TAGS IF
        if len(tags_to_add) > 0:
            data = ScrapedImage.objects.filter(id=data_id[0])
            existing_tags = separar(data[0].tags)
            existing_tags.append(tags_to_add[0])
            new_tags_str = juntar(existing_tags)
            ScrapedImage.objects.filter(id=data_id[0]).update(tags=new_tags_str)

    all_img = ScrapedImage.objects.all()
    tags = ScrapedImage.objects.values_list('tags', flat=False)
    keywords = []
    if buscador:
        all_img = ScrapedImage.objects.all().filter(Q(tags__icontains=buscador[0]))
        tags = all_img.values_list('tags',flat=False)
    for tag in tags:
        keywords.append(separar(tag[0]))
    all_info = zip(all_img, keywords)
    return render(request, 'imagenes.html', {'all_data': all_info})
Exemple #2
0
def showdata(request):
    buscador = []
    if request.method == 'POST':
        if "Entrenar" in request.POST:
            registro_a_entrenar = request.POST.getlist('Entrenar')
            info = ScrapedData.objects.get(id=registro_a_entrenar[0])
            info.data_entrenamiento = True
            info.save()
            update_classifier_text(info.information, info.classification)

        tags_to_delete = request.POST.getlist('tags')
        tags_to_add = request.POST.getlist('Tagx')
        link = request.POST.getlist('Linkx')
        classification_to_change = request.POST.getlist('type')
        data_id = request.POST.getlist('id')
        buscador = request.POST.getlist('buscador')

        # CHANGE CLASSIFICATION IF:
        if len(classification_to_change) > 0:
            ScrapedData.objects.filter(id=data_id[0]).update(classification=classification_to_change[0])

        #DELETE TAGS IF
        if len(tags_to_delete) > 0:
            data = ScrapedData.objects.filter(id=data_id[0])
            existing_tags = separar(data[0].tags)
            existing_metadata = separar(data[0].metadata)
            new_tags, new_metadata = delete_tags_from_data(tags_to_delete, existing_tags, existing_metadata)
            new_tags_str = juntar(new_tags)
            new_metadata_str = juntar(new_metadata)
            ScrapedData.objects.filter(id=data_id[0]).update(tags=new_tags_str,
                                                             metadata=new_metadata_str)
        #ADD TAGS IF
        if len(tags_to_add) > 0:
            data = ScrapedData.objects.filter(id=data_id[0])
            if len(link[0]) == 0:
                existing_tags_a, existing_metadata_a = add_tags_from_data(tags_to_add[0], "None", data[0].tags,
                                                                          data[0].metadata)
            else:
                existing_tags_a, existing_metadata_a = add_tags_from_data(tags_to_add[0], link[0], data[0].tags,
                                                                          data[0].metadata)
            ScrapedData.objects.filter(id=data_id[0]).update(tags=existing_tags_a,
                                                             metadata=existing_metadata_a)

    all_data = ScrapedData.objects.all()
    tags_metadata = ScrapedData.objects.values_list('tags', 'metadata', flat=False)
    references = []
    if buscador:
        all_data = ScrapedData.objects.all().filter(Q(tags__icontains=buscador[0]) | Q(classification__icontains=buscador[0]))
        tags_metadata = all_data.values_list('tags', 'metadata', flat=False)
    for tag, metadata in tags_metadata:
        references.append((separar(tag), separar(metadata)))
    all_info = zip(all_data, references)
    return render(request, 'showdata.html', {'all_data': all_info})
def get_info_photo(url):
    # credentials and creation of service
    vision_client = vision.Client()
    translate_client = translate.Client()
    urllib.request.urlretrieve(url, "temp.jpg")

    # getting tags from a certain image and delete it
    image = vision_client.image(filename='temp.jpg')
    labels = image.detect_labels()
    remove('temp.jpg')

    # processing of tags and transform to string
    i = 0
    tags = []
    for label in range(len(labels)):
        try:
            translated_label = translate_client.translate(
                labels[i].description,
                target_language='es',
                format_='text',
                source_language='en')
            tags.append(translated_label['translatedText'])
            i = i + 1
        except:
            tags.append(labels[i].description)
            i = i + 1

    return juntar(limpieza(tags))
Exemple #4
0
def entities_example(request):
    form = ClassifyForm(request.POST)
    if form.is_valid():
        sentence = form.cleaned_data['sentence']
        entities, metadata = get_entities(sentence)
        urls = separar(get_urls(metadata))
        entities_fixed = separar(juntar(limpieza(entities)))

        args = {'form': form, 'entities': entities_fixed, 'urls': urls}
        return render(request, 'entities_example.html', args)
    else:
        form = SignUpForm()
        args = {'form': form}
        return render(request, 'entities_example.html', args)
    def parse(self, response):

        #Se guardan todos los links de imagenes
        Imagenes = response.xpath('//img[not(ancestor::footer) and not(ancestor::header)]/@src').extract()

        Videos = response.xpath('//iframe[not(ancestor::footer) and not(ancestor::header)]/@src').extract()



        #se guarda la url de la pagina actual
        url = response.url

        #Lista de parrafos, imagenes y listas de texto
        Lista_completa = response.xpath('//p[not(ancestor::footer) and not(ancestor::*[contains(@class,"nav")]) ]  | //img[not(ancestor::footer) and not(ancestor::header)]/@src | //iframe[not(ancestor::footer) and not(ancestor::header)]/@src | //ul[not(ancestor::footer) and not(@class)]').extract()


        #Se guarda el titulo de la pagina
        titulo = response.xpath('string(//title)').extract()[0]


        Lista_videos_final = []
        Lista_imagenes_final = []
        Lista_informaciones_final = []


        #
        k = 1
        l = 0
        leer = 1
        for item in Lista_completa:
            if leer == 1:
                if item in Imagenes:
                    link = ajuste_img_src(item,response)
                    width , height = getsizes(link)
                    Lista_imagenes_final.append([link,"imagen",k,titulo,url,width,height])
                    k = k + 1
                elif item in Videos:
                    Lista_videos_final.append([item,"video",k,titulo,url])
                    k = k + 1
                else:
                    soup = BeautifulSoup(item, 'html.parser')
                    texto = soup.get_text()
                    if not(texto == ""):
                        if texto.endswith(":") and (Lista_completa[l + 1] not in Imagenes) and (Lista_completa[l + 1] not in Videos):
                            soup2 = BeautifulSoup(Lista_completa[l + 1], 'html.parser')
                            if soup2.get_text() == "":
                                Lista_informaciones_final.append([texto,"informacion",k,titulo,url])
                            else:
                                Lista_informaciones_final.append([texto + "\n" + soup2.get_text(),"informacion",k,titulo,url])
                            leer = 0
                        else:
                            Lista_informaciones_final.append([texto,"informacion",k,titulo,url])
                        k = k + 1
            else:
                leer = 1
            l = l + 1


        #A partir de aca se pueden utilizar o guardar los parrafos, imagenes y videos, que estan en la Listas: Lista_imagenes_final , Lista_informaciones_final
        # y Lista_videos_final
        #cada elemento de la lista de parrafos contiene: [texto, tipo de dato, orden, titulo(tema), url de la pagina]
        #cada elemento de la lista de imagenes contiene: [url de imagen, tipo de dato, orden, titulo(tema), url de la pagina, ancho,alto]
        #cada elemento de la lista de videos contiene: [url de video, tipo de dato, orden, titulo(tema), url de la pagina]

        for img in Lista_imagenes_final:
            ScrapedImageItem(order=img[2],
                             topic=img[3],
                             url=img[4],
                             information=img[0],
                             width=img[5],
                             height=img[6],
                             tags=get_info_photo(img[0])).save()

        for info in Lista_informaciones_final:
            print(info)
            classified = classify('../TextClassifier/classifier_bayes.pickle', info[0])
            entities, meta = get_entities(info[0])
            tag = juntar(limpieza(entities))
            urls = get_urls(meta)
            ScrapedDataItem(order=info[2],
                            topic=info[3],
                            url=info[4],
                            information=info[0],
                            classification=classified,
                            tags=tag,
                            metadata=urls).save()

        for video in Lista_videos_final:
             # cada elemento de la lista de videos contiene: [url de video, tipo de dato, orden, titulo(tema), url de la pagina]
            ScrapedVideoItem(order=video[2], topic=video[3], url=video[4], information=video[0]).save()
Exemple #6
0
def add_tags_from_data(tag_to_add, link, tags, metadata):
    lista = separar(tags)
    lista.append(tag_to_add)
    lista2 = separar(metadata)
    lista2.append('https://'+link)
    return juntar(lista), juntar(lista2)