Exemple #1
0
 def parse(self, response):
     #####COMENTARIOS#####
     print('\n########Pagina ' + str(self._num_pagina) + '########')
     for a in response.xpath('/html/body/div[1]/a'):
         try:
             titu = a.css('::attr(title)').get()
             if titu is not None and titu != 'Descargar' and titu != 'Ak47Full' and titu != 'iPauta' and titu != 'ElGenero' and titu != 'FlowHot':
                 ref = a.css('::attr(href)').get()
                 hoy = date.today().strftime("%d %B, %Y")
                 cantante, album = separa_titulo(titu, '–')
                 yield scrapy.Request(ref,
                                      callback=self.parse_attr,
                                      meta={
                                          'referer': ref,
                                          'fecha': hoy,
                                          'titulo': titu,
                                          'cantante': cantante,
                                          'album': album
                                      })
         except:
             pass
     self._num_pagina += 1
     try:
         next_page = response.css('a.nextpostslink ::attr(href)').get()
         if next_page is not None:
             yield response.follow(next_page, callback=self.parse)
     except:
         print('Hubo un problema al abrir la página siguiente')
Exemple #2
0
 def parse(self, response):
     #####COMENTARIOS#####
     print('\n########Pagina ' + str(self._num_pagina) + '########')
     for art in response.css('div.article-container > article'):
         href = art.css('div.featured-image > a ::attr(href)').get()
         titulo = art.css('div.featured-image > a ::attr(title)').get()
         cantante, album = separa_titulo(titulo, '–')
         fecha = art.css(
             'div.below-entry-meta > span > a > time ::text').get()
         yield scrapy.Request(href,
                              callback=self.parse_attr,
                              meta={
                                  'fecha': fecha,
                                  'titulo': titulo,
                                  'cantante': cantante,
                                  'album': album
                              })
         #break
     self._num_pagina += 1
     try:
         next_page = response.css('li.previous > a ::attr(href)').get()
         if next_page is not None:
             yield response.follow(next_page, callback=self.parse)
     except:
         print('Hubo un problema al abrir la página siguiente')
Exemple #3
0
    def parse(self, response):
        #####COMENTARIOS#####
        print('\n########Pagina ' + str(self._num_pagina) + '########')

        for art in response.css('div#main-content > article'):
            titulo = art.css('a ::attr(title)').get()
            referer = art.css('a ::attr(href)').get()
            fecha = art.css('span.mh-meta-date updated ::text').get()
            cantante, album = separa_titulo(titulo, '–')

            #####LLAMA AL REFERER#####
            yield scrapy.Request(referer,
                                 callback=self.parse_attr,
                                 meta={
                                     'fecha': fecha,
                                     'referer': referer,
                                     'titulo': titulo,
                                     'cantante': cantante,
                                     'album': album
                                 })
            #break
        self._num_pagina += 1
        try:
            next_page = response.css('div.nav-previous > a ::attr(href)').get()
            #print('PAGINA SIGUIENTE:' + next_page)
            if next_page is not None:
                yield response.follow(next_page, callback=self.parse)
        except:
            print('Hubo un problema al abrir la página siguiente')
Exemple #4
0
    def parse(self, response):
        #####COMENTARIOS#####
        print('\n########Pagina ' + str(self._num_pagina) + '########')

        for h3 in response.css('h3.post-title') or response.css(
                'h3.post-title.entry-title'):
            #####RECOLECTA LOS DATOS DE LA PÁGINA#####
            referer = h3.css('a ::attr(href)').get()
            titulo = h3.css('a ::text').get()
            #####SEPARA CANTANTE Y ALBUM#####
            cantante, album = separa_titulo(titulo, '-')
            #####LLAMA AL REFERER#####
            yield scrapy.Request(referer,
                                 callback=self.parse_attr,
                                 meta={
                                     'referer': referer,
                                     'titulo': titulo,
                                     'cantante': cantante,
                                     'album': album
                                 })

        self._num_pagina += 1
        try:
            next_page = response.css(
                'a.blog-pager-older-link.flat-button.ripple ::attr(href)').get(
                )
            if next_page is not None:
                yield response.follow(next_page, callback=self.parse)
        except:
            print('Hubo un problema al abrir la página siguiente')
Exemple #5
0
 def parse(self, response):
     #####COMENTARIOS#####
     print('\n########Pagina ' + str(self._num_pagina) + '########')
     #####TOMA LOS DATOS DE LA PÁGINA#####
     titulo = response.css('h1.entry-title ::text').get()
     referer = response.css('figure > a ::attr(href)').get()
     fecha = response.css('time.entry-date.published.updated ::text').get()
     cantante, album = separa_titulo(titulo, '–')
     album = separa(album,' ', 1)
     
     #####LLAMA AL REFERER#####
     yield scrapy.Request(referer, callback= self.parse_attr, meta= {'fecha': fecha, 'referer': referer, 'titulo': titulo, 'cantante': cantante, 'album': album})
Exemple #6
0
 def parse(self, response):
     titulo = []
     cantante = []
     album = []
     referer = []
     infringing = []
     fecha = date.today().strftime("%B %d, %Y")
     #####COMENTARIOS#####
     print('\n########Pagina ' + str(self._num_pagina) + '########')
     #prueba = response.css("[style = 'text-align: center; outline: rgb(33, 198, 243) none 0px;'] > span").get()
     for span in response.css('div#post-body-5669324029259817671 > div > div'):
         text = span.css('span ::text').get()
         #print(str(text))
         if text is not None:
             if text != 'DESCARGAR' and text[0] != '0':
                 titulo.append(text)
                 can, alb = separa_titulo(titulo[-1], '-')
                 if can == '-':
                     can, alb = separa_titulo(titulo[-1], '–')
                 cantante.append(can)
                 album.append(alb)
                 #print('agrega titulo: ' + titulo[-1])
                 #print('agrega cantante: ' + cantante[-1])
                 #print('agrega album: ' + album[-1])
             text_des = span.css('span > a ::text').get()
             text_des1 = span.css('a ::text').get()
             if text_des == 'DESCARGAR' or text_des1 == 'DESCARGAR':
                 #print(str(span.css('span > a ::attr(href)').get()))
                 if span.css('span > a ::attr(href)').get() is not None:
                     referer.append(str(span.css('span > a ::attr(href)').get()))
                 elif span.css('a ::attr(href)').get() is not None:
                     referer.append(str(span.css('a ::attr(href)').get()))
                 if referer is not None:
                     infringing.append(self.get_inf(referer[-1]))
                 #print('agrega referer: ' + referer[-1])
                 #print('agrega infringing: ' + infringing[-1])
                 
     self.get_datos(titulo, fecha, cantante, album, referer, infringing)
Exemple #7
0
 def parse_attr(self, response):
     for li in response.css('li.mp3Play'):
         Titulo = li.css('b ::text').get()
         Cantante, Album = separa_titulo(Titulo, '-')
         Fecha = date.today().strftime("%d %B, %Y")
         id = li.css('a.b_down ::attr(data-url)').get()
         if id is not None:
             url = 'https://www.internet-dvr.com/api-private.js?vidID={}&token=37fb468a1118f225202b8f6be914f4406c93954f1b460d36afb40d540581508b74fefaa886db99f2fb8d3ac3bcd481a77a658c4432f801bd8df0c4da26916588'.format(
                 id)
             #print(url)
             yield Request(url,
                           meta={
                               'referer': response.meta['referer'],
                               'Titulo': Titulo,
                               'Cantante': Cantante,
                               'Album': Album,
                               'Fecha': Fecha
                           },
                           callback=self.parse_attr2)
Exemple #8
0
def extrae_categoria(driver):
    global pag
    next_page = 1
    while next_page:
        print(
            "#################################### Página {} ####################################"
            .format(pag))
        for a in driver.find_elements_by_css_selector(
                'div#blog-entries > article h2 > a'):
            titulo = a.get_attribute('title').replace('(FLAC)',
                                                      '').replace('(Mp3)', '')
            referer = a.get_attribute('href')
            cantante, album = separa_titulo(titulo, '–')
            time.sleep(2)
            driver.execute_script("window.open(arguments[0]);", referer)
            driver.switch_to.window(driver.window_handles[2])
            ref_inf = extrae_infringing(driver)
            if ref_inf:
                for inf in ref_inf:
                    mega_link = get_mega(inf)
                    if mega_link != False:
                        if mega_link.find('mega') != -1:
                            if c.existe_inf(mega_link) == False:
                                imprime_datos(titulo, fecha, cantante, album,
                                              referer, mega_link)
                                c.inserta_item(titulo, cantante, album,
                                               referer, mega_link, fecha,
                                               id_domin)
                                v.muestra_item_guardado(titulo)
            close_taps(driver, 1)
        try:
            next_page = driver.find_element_by_css_selector(
                'a.next.page-numbers')
            next_page.click()
            pag += 1
        except:
            print('Ocurrió un error al cambiar de página')
            break
    close_taps(driver, 0)
Exemple #9
0
 def parse_attr(self, response):
     titulo = response.css('h1 > a ::text').get()
     cantante, album = separa_titulo(titulo, '-')
     fecha = response.css('time > a ::text').get()
     fecha = strip_spaces(fecha)
     fecha = separa(fecha, '-', 0)
     infringing = response.css(
         'div.post__content > p > a ::attr(href)').get()
     try:
         if infringing.find('images') > 0:
             infringing = response.xpath(
                 '//*[@id="post"]/div[2]/p/a[2]/@href').get()
         if infringing is not None or infringing.find('megaupload') > 0:
             if veri(infringing) == True:
                 imprime_datos(titulo, fecha, cantante, album,
                               response.meta['referer'], infringing)
                 if self.c.existe_inf(infringing, self.id_domin) == False:
                     self.c.inserta_item(titulo, cantante, album,
                                         response.meta['referer'],
                                         infringing, fecha, self.id_domin)
     except:
         pass
Exemple #10
0
#####ABRE NAVEGADOR SELENIUM#####
driver = webdriver.Chrome('C:\\Users\\APDIF\\Desktop\\chromedriver.exe')
driver.get(start_urls[0])
#####ESPERA A QUE CARGUE LA PÁGINA#####
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "h2 > a")))
#####GUARDA LA PÁGINA PRINCIPAL#####
main_window = driver.current_window_handle
#####TOMA EL HREF DE LA PÁGINA SIGUIENTE#####
next_page = driver.find_element_by_css_selector('a.sa.sa-nextpage.tip').get_attribute('href')
#####RECORRE TODAS LAS PÁGINAS#####
while next_page is not None: 
    #####TOMA LOS DATOS#####
    for a in driver.find_elements_by_css_selector("h2 > a"):
        referer = a.get_attribute('href')
        titulo = a.find_element_by_css_selector('span').text
        cantante, album = separa_titulo(titulo, '-')
        fecha = date.today().strftime("%B %d, %Y")
        #####ABRE UNA NUEVA PESTAÑA#####
        driver.execute_script("window.open(arguments[0]);", referer)
        driver.switch_to.window(driver.window_handles[1])
        infringing = driver.find_element_by_xpath('//*[@id="shell"]/section/div[1]/div[2]/article/section/center/a').get_attribute('href')
        imprime_datos(titulo, fecha, cantante, album, referer, infringing)
        if c.existe_inf(infringing, id_domin) == False:
                if veri(infringing) == True:
                    c.inserta_item(titulo, cantante, album, referer, infringing, fecha, id_domin)
        #####CIERRA LA PESTAÑA#####
        driver.close()
        #####CAMBIA A LA PÁGINA PRINCIPAL#####
        driver.switch_to.window(main_window)
    #####ABRE LA SIGUIENTE PÁGINA#####
    driver.get(next_page)
    def parse(self, response):
        id = '0'
        #####COMENTARIOS#####
        print('\n########Pagina ' + str(self._num_pagina) + '########')
        for art in response.xpath('//*[@id="content"]/div[1]/article'):
            band = True
            titulo = art.css(' h2 > a ::text').get()
            fecha = art.css('p > span > a ::text').get()
            album, cantante = separa_titulo(titulo, '-')
            referer = art.css('div > div.post-entry-content > a ::attr(href)').get()
            id = self.get_id(referer,'/', 3)
            infringing = self.get_infr(id)
            if referer is None:
                referer = art.css('div > div > p > strong > a ::attr(href)').get()
                if referer is None:
                    #print('PATH: ' + str(art.xpath('//div/div/p[2]/a').extract()))
                    #print('A: ' + str(art.css('div.post-entry-content').get()))
                    for p in art.css('div.post-entry-content > strong > span > span > a') or art.css('div.post-entry-content > p > a'):
                        band = False
                        #print ('A: ' + str(art.css('div.post-entry-content').get()))
                        referer = p.css('::attr(href)').get()
                        id = self.get_id(referer,'/', 3)
                        infringing = self.get_infr(id)
                        #print('REF: ' + referer)
                        if self.comprueba_refer(referer, 'open') == True:
                            r = art.css('div.post-entry-content > p')
                            #print('A: ' + str(art.css('div.post-entry-content > p > strong').get()))
                            if r.css('strong') is not None:
                                r = r.css('strong > span')
                            referer = r.css('a ::attr(href)').get()
                            id = self.get_id(referer,'/', 3)
                            infringing = self.get_infr(id)
                            #####INSERTA EN BD#####
                            if veri(infringing) == True:
                                if self.c.existe_inf(infringing, self.id_domin) == False:
                                    self.c.inserta_item(titulo, cantante, album, referer, infringing, fecha, self.id_domin)
                            
                        else:
                            id = self.get_id(referer,'/', 3)
                            infringing = self.get_infr(id)
                        self.imprime_datos(titulo, fecha, cantante, album, referer, infringing)
                else:
                    id = self.get_id(referer,'/', 3)
                    infringing = self.get_infr(id)
            else:
                if self.comprueba_refer(referer, '?') == True:
                    infringing = self.get_Mega(referer)
                    
            if band == True:
                #####INSERTA EN BD#####
                if veri(infringing) == True:
                    if self.c.existe_inf(infringing, self.id_domin) == False:
                        self.c.inserta_item(titulo, cantante, album, referer, infringing, fecha, self.id_domin)
    
            self.imprime_datos(titulo, fecha, cantante, album, referer, infringing)
            #referer = None

        self._num_pagina+=1
        try:
            next_page = response.css('a.next.page-numbers ::attr(href)').get()
            if next_page is not None:
                yield response.follow(next_page, callback= self.parse)
        except:
             print('Hubo un problema al abrir la página siguiente')