def inserta_BD(self, response, fecha, inf): if self.c.existe_inf(inf, self.id_domin) == False: if veri(inf) == True: self.c.inserta_item(response.meta['titulo'], response.meta['cantante'], response.meta['album'], response.meta['referer'], inf, fecha, self.id_domin)
def parse_attr2(self, response): #print('ENTRA') infringing = response.css('a.download-mp3-url ::attr(href)').get() #print(infringing[-3::]) if infringing[-3::] == 'mp3': if veri(infringing) == True: imprime_datos(response.meta['Titulo'], response.meta['Fecha'], response.meta['Cantante'], response.meta['Album'], response.meta['referer'], infringing)
def parse_attr(self, response): if response.css('a.btn-dl'): infringing = response.css('a.btn-dl ::attr(href)').get() imprime_datos(response.meta['titulo'], response.meta['fecha'], response.meta['cantante'], response.meta['album'], response.meta['referer'], infringing) #####INSERTA EN BD##### if veri(infringing) == True: if self.c.existe_inf(infringing, self.id_domin) == False: self.c.inserta_item(response.meta['titulo'], response.meta['cantante'], response.meta['album'], response.meta['referer'], infringing, response.meta['fecha'], self.id_domin)
def parse_attr2(self, response): referer = response.url infringing = response.css( 'div.post-body.entry-content > center > table > tbody > tr > td > center > a ::attr(href)' ).get() imprime_datos(response.meta['titulo'], response.meta['fecha'], response.meta['cantante'], response.meta['album'], referer, infringing) #####INSERTA EN BD##### if veri(infringing) == True: if self.c.existe_inf(infringing, self.id_domin) == False: self.c.inserta_item(response.meta['titulo'], response.meta['cantante'], response.meta['album'], referer, infringing, response.meta['fecha'], self.id_domin)
def parse_attr(self, response): inf = response.css('button#download-btn ::attr(onclick)').get() infringing = separa(inf, "'", 1) for tb in response.css('td.column-title ::text'): titulo = tb.get() print('\n*****************DATOS*****************') print('infringing: ' + infringing) print('referer: ' + response.meta['referer']) print('titulo: ' + titulo) print('fecha: ' + response.meta['fecha']) print('cantante: ' + response.meta['cantante']) print('album: ' + response.meta['album']) print('***************************************\n') #####INSERTA EN BD##### if veri(infringing) == True: self.c.inserta_item(titulo, response.meta['cantante'], response.meta['album'],response.meta['referer'], infringing, response.meta['fecha'], self.id_domin)
def get_datos(self, titulo, fecha, cantante, album, referer, infringing): #print('LEN: ' + str(len(titulo))) #print('LEN: ' + str(len(infringing))) #print('LEN: ' + str(len(referer))) #print('LEN: ' + str(len(cantante))) #print('LEN: ' + str(len(album))) #print('titulo: ' + titulo[-1]) for i in range(len(infringing)): print('\n*****************DATOS*****************') print('infringing: ' + infringing[i]) print('referer: ' + referer[i]) print('titulo: ' + titulo[i]) print('fecha: ' + fecha) print('cantante: ' + cantante[i]) print('album: ' + album[i]) print('***************************************\n') #####INSERTA EN BD##### if veri(infringing[i]) == True: if self.c.existe_inf(infringing[i], self.id_domin) == False: self.c.inserta_item(titulo[i], cantante[i], album[i], referer[i], infringing[i], fecha, self.id_domin)
def parse_attr(self, response): infringing = response.css('h3 > a ::attr(href)').get() print('\n*****************DATOS*****************') print('infringing: ' + infringing) print('referer: ' + response.meta['referer']) print('titulo: ' + response.meta['titulo']) print('fecha: ' + str(response.meta['fecha'])) print('cantante: ' + response.meta['cantante']) print('album: ' + response.meta['album']) print('***************************************\n') #####INSERTA EN BD##### if veri(infringing) == True: if self.c.existe_inf(infringing, self.id_domin) == False: self.c.inserta_item(response.meta['titulo'], response.meta['cantante'], response.meta['album'], response.meta['referer'], infringing, response.meta['fecha'], self.id_domin)
def parse_attr(self, response): url = str(response.url) link_mega = open_adfly(url, 'skip_bu2tton') if link_mega is not None: link_mega = 'mega' + self.separaLink(link_mega) #return link_mega infringing = str(unquote(link_mega)) print('\n*****************DATOS*****************') print('infringing: ' + infringing) print('fecha: ' + response.meta['fecha']) print('referer: ' + response.meta['referer']) print('titulo: '+ response.meta['titulo']) print('cantante: '+ response.meta['cantante']) print('album: '+ response.meta['album']) print('***************************************\n') #####INSERTA EN BD##### if veri(infringing) == True: if c.existe_inf(infringing, self.id_domin) == False: self.c.inserta_item(response.meta['titulo'], response.meta['cantante'], response.meta['album'],response.meta['referer'], infringing, response.meta['fecha'], self.id_domin)
def parse_attr(self, response): Referer = response.url Artista = response.xpath( '/html/body/div/div[2]/div/div[2]/span[2]/text()').extract_first() Album = response.xpath( '/html/body/div/div[2]/div/div[2]/span[4]/text()').extract_first() Fecha = response.xpath( '/html/body/div/div[2]/div/div[2]/span[6]/text()').extract_first() Infringing = response.css( 'div#download-btn-div :nth-child(4) ::attr(onclick)').get() Infringing = separa(Infringing, '"', 1) for tr in response.css('tbody > tr :nth-child(1)'): Cancion = tr.css('::text').get() #####IMPRIME INFORMACIÓN##### imprime_datos(Cancion, Fecha, Artista, Album, Referer, Infringing) #####INSERTA EN BD##### if self.c.existe_inf(Infringing, self.id_domin) == False: if veri(Infringing) == True: self.c.inserta_item(Cancion, Artista, Album, Referer, Infringing, Fecha, self.id_domin)
def parse_attr(self, response): titulo = response.css('h1 > a ::text').get() cantante, album = separa_titulo(titulo, '-') fecha = response.css('time > a ::text').get() fecha = strip_spaces(fecha) fecha = separa(fecha, '-', 0) infringing = response.css( 'div.post__content > p > a ::attr(href)').get() try: if infringing.find('images') > 0: infringing = response.xpath( '//*[@id="post"]/div[2]/p/a[2]/@href').get() if infringing is not None or infringing.find('megaupload') > 0: if veri(infringing) == True: imprime_datos(titulo, fecha, cantante, album, response.meta['referer'], infringing) if self.c.existe_inf(infringing, self.id_domin) == False: self.c.inserta_item(titulo, cantante, album, response.meta['referer'], infringing, fecha, self.id_domin) except: pass
def parse_attr(self, response): #####VARIABLE##### found = 0 item = response.meta['item'] c = response.meta['controler'] #####TOMA EL ID DE LA PÁGINA##### post_id = response.xpath( '//*[@id="page"]/div/article/div/@id').extract()[0] #####RECOLECTA LOS DATOS DE LA PÁGINA##### item['titulo'] = response.xpath('//*[@id="' + post_id + '"]/div/header/h1/text()').extract()[0] item['href'] = response.url item['fecha'] = response.xpath('//*[@id="' + post_id + '"]/div/header/div/text()').extract()[0] #####SEPARA CANTANTE Y ALBUM##### separacion = str(item['titulo']).split('–') cantante, album = separacion[0], separacion[1] #####BÚSCA LA PALABRA DOWNLOAD##### for link in response.xpath( '//a[text()="DOWNLOAD AUDIO"]/@href').extract(): found = 1 item['infringing'] = link if found == 0: for link in response.xpath( '//a[text()="DOWNLOAD ZIP"]/@href').extract(): item['infringing'] = link #####INSERTA EN BD##### if veri(item['infringing']) == True: if c.existe_inf(item['infringing'], self.id_domin) == False: c.inserta_item(str(item['titulo']), cantante, album, str(item['href']), str(item['infringing']), str(item['fecha']), self.id_domin) return item
#####ESPERA A QUE CARGUE LA PÁGINA##### WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "h2 > a"))) #####GUARDA LA PÁGINA PRINCIPAL##### main_window = driver.current_window_handle #####TOMA EL HREF DE LA PÁGINA SIGUIENTE##### next_page = driver.find_element_by_css_selector('a.sa.sa-nextpage.tip').get_attribute('href') #####RECORRE TODAS LAS PÁGINAS##### while next_page is not None: #####TOMA LOS DATOS##### for a in driver.find_elements_by_css_selector("h2 > a"): referer = a.get_attribute('href') titulo = a.find_element_by_css_selector('span').text cantante, album = separa_titulo(titulo, '-') fecha = date.today().strftime("%B %d, %Y") #####ABRE UNA NUEVA PESTAÑA##### driver.execute_script("window.open(arguments[0]);", referer) driver.switch_to.window(driver.window_handles[1]) infringing = driver.find_element_by_xpath('//*[@id="shell"]/section/div[1]/div[2]/article/section/center/a').get_attribute('href') imprime_datos(titulo, fecha, cantante, album, referer, infringing) if c.existe_inf(infringing, id_domin) == False: if veri(infringing) == True: c.inserta_item(titulo, cantante, album, referer, infringing, fecha, id_domin) #####CIERRA LA PESTAÑA##### driver.close() #####CAMBIA A LA PÁGINA PRINCIPAL##### driver.switch_to.window(main_window) #####ABRE LA SIGUIENTE PÁGINA##### driver.get(next_page) #####TOMA EL HREF DE LA PÁGINA SIGUIENTE##### next_page = driver.find_element_by_css_selector('a.sa.sa-nextpage.tip').get_attribute('href') driver.quit()
def parse(self, response): id = '0' #####COMENTARIOS##### print('\n########Pagina ' + str(self._num_pagina) + '########') for art in response.xpath('//*[@id="content"]/div[1]/article'): band = True titulo = art.css(' h2 > a ::text').get() fecha = art.css('p > span > a ::text').get() album, cantante = separa_titulo(titulo, '-') referer = art.css('div > div.post-entry-content > a ::attr(href)').get() id = self.get_id(referer,'/', 3) infringing = self.get_infr(id) if referer is None: referer = art.css('div > div > p > strong > a ::attr(href)').get() if referer is None: #print('PATH: ' + str(art.xpath('//div/div/p[2]/a').extract())) #print('A: ' + str(art.css('div.post-entry-content').get())) for p in art.css('div.post-entry-content > strong > span > span > a') or art.css('div.post-entry-content > p > a'): band = False #print ('A: ' + str(art.css('div.post-entry-content').get())) referer = p.css('::attr(href)').get() id = self.get_id(referer,'/', 3) infringing = self.get_infr(id) #print('REF: ' + referer) if self.comprueba_refer(referer, 'open') == True: r = art.css('div.post-entry-content > p') #print('A: ' + str(art.css('div.post-entry-content > p > strong').get())) if r.css('strong') is not None: r = r.css('strong > span') referer = r.css('a ::attr(href)').get() id = self.get_id(referer,'/', 3) infringing = self.get_infr(id) #####INSERTA EN BD##### if veri(infringing) == True: if self.c.existe_inf(infringing, self.id_domin) == False: self.c.inserta_item(titulo, cantante, album, referer, infringing, fecha, self.id_domin) else: id = self.get_id(referer,'/', 3) infringing = self.get_infr(id) self.imprime_datos(titulo, fecha, cantante, album, referer, infringing) else: id = self.get_id(referer,'/', 3) infringing = self.get_infr(id) else: if self.comprueba_refer(referer, '?') == True: infringing = self.get_Mega(referer) if band == True: #####INSERTA EN BD##### if veri(infringing) == True: if self.c.existe_inf(infringing, self.id_domin) == False: self.c.inserta_item(titulo, cantante, album, referer, infringing, fecha, self.id_domin) self.imprime_datos(titulo, fecha, cantante, album, referer, infringing) #referer = None self._num_pagina+=1 try: next_page = response.css('a.next.page-numbers ::attr(href)').get() if next_page is not None: yield response.follow(next_page, callback= self.parse) except: print('Hubo un problema al abrir la página siguiente')