def get_urls_pars(html): soup = BeautifulSoup(html, 'lxml') web = soup.find('div', id="projects-list").find_all('div', class_="b-post") # print('--*-- Найдено', len(web), 'элементов --*--') for index, i in enumerate(web): show = i.find('a', class_="b-post__link").text url = i.find('a', class_="b-post__link").get('href') # ref_show = refind_show(show) strings = [ 'Спарсить', 'спарсить', 'Парсинг', 'парсинг', 'Парсер', 'парсер' ] for string in strings: match = re.search(string, show) if match: # print('Порядковый номер:', index+1) # print('show', show) # print('url', url) link = 'https://www.fl.ru' + url ref_link = refind_link(link) ref_link_2 = refind_link_2(link) ref_link_3 = refind_link_3(link) date_p1 = refind_w(ref_link_2) time_p = refind_t(ref_link_2) date_y = refind_name_y(date_p1) date_m = refind_name_m(date_p1) date_d = refind_name_d(date_p1) date_p = date_y + '-' + date_m + '-' + date_d # print('d', date_p) price = ref_link_3 image = 'images/fl.png' try: p = Fl.objects.get(link=link) p.show = show p.price = price p.ref_link = ref_link p.date_p = date_p p.time_p = time_p p.save() except Fl.DoesNotExist: p = Fl( link=link, show=show, price=price, ref_link=ref_link, date_p=date_p, time_p=time_p, image=image, ).save() print(link)
def get_urls_pars(html): soup = BeautifulSoup(html, 'lxml') web = soup.find_all('div', class_="row") # print(web) # print('--*-- Найдено', len(web), 'элементов --*--') for index, i in enumerate(web): try: show = i.find('a', class_="text-bold show_visited").text.strip() date = i.find('span', class_="time_ago").get('title') status = i.find('span', class_="text-muted").text.strip() status_2 = i.find( 'div', class_="float-left float-sm-none text_field").text.strip() field = i.find('p', class_="text_field").text.strip() url = i.find('a', class_="text-bold show_visited").get('href') except: show = '-' date = '-' status = '-' status_2 = '-' field = '-' url = '-' strings = [ 'Спарсить', 'спарсить', 'Парсинг', 'парсинг', 'Парсер', 'парсер' ] for string in strings: match = re.search(string, show) if match: # print('Порядковый номер:', index+1) # print('show', show) # print('date', date) # print('status', status) # print('status_2', status_2) # print('field', field) # print('url', 'https://www.weblancer.net' + url) link = ('https://www.weblancer.net' + url) ref_link = field date_p1 = refind_w(date) time_p = refind_t(date) date_y = refind_name_y(date_p1) date_m = refind_name_m(date_p1) date_d = refind_name_d(date_p1) date_p = date_y + '-' + date_m + '-' + date_d price = 'По договорённости' image = 'images/weblancer.png' try: p = Fl.objects.get(link=link) p.show = show p.price = price p.ref_link = ref_link p.date_p = date_p p.time_p = time_p p.save() except Fl.DoesNotExist: p = Fl( link=link, show=show, price=price, ref_link=ref_link, date_p=date_p, time_p=time_p, image=image, ).save() print(link)
def get_all_links(html): soup = BeautifulSoup(html, 'lxml') try: tds = soup.find('div', class_="b-task-block__description").text name = soup.find('h1', class_="b-task-block__header__title").text name_2 = soup.find('div', class_="js-task-item--brief").find('li', class_="b-task-brief__item").text ref_name_2 = refind_name_2(name_2) name_3 = soup.find('div', class_="js-task-item--brief").find('li', class_="b-task-brief__item").find_next_sibling('li').text # ref_name_3 = refind_name_3(name_3) name_4 = soup.find('span', class_="js-budget-text").text ref_name_4 = refind_name_4(name_4) name_5 = soup.find('div', class_="js-task-item--brief").find('li', class_="b-task-brief__item").find_next_sibling('li').find_next_sibling('li').find_next_sibling('li').find_next_sibling('li').text ref_name_5 = 'https://youdo.com/t' + refind_name_5(name_5) # print('1', name) # print('2', ref_name_2) # print('3', name_3) # print('4', ref_name_4) # print('5', ref_name_5) # print('6', tds) link = ref_name_5 show = name ref_link = tds date_p = str(datetime.now()).split(' ')[0] # print(date_p) time_p1 = str(datetime.now()).split(' ')[1] time_p001 = time_p1.split('.')[0] time_p01 = time_p001.split(':')[0] time_p02 = time_p001.split(':')[1] time_p = time_p01 + ':' + time_p02 # print(time_p) price = ref_name_4 image = 'images/youdo.png' try: p = Fl.objects.get(link=link) p.show = show p.price = price p.ref_link = ref_link # p.date_p = date_p # p.time_p = time_p p.save() except Fl.DoesNotExist: p = Fl( link=link, show=show, price = price, ref_link = ref_link, date_p = date_p, time_p = time_p, image = image, ).save() print(link) return tds except: r = '**-??????????????-**' return r
def get_all_links(html): soup = BeautifulSoup(html, 'lxml') try: tds = soup.find('div', class_="task__description").text except: tds = '!-----!-----!' try: name = soup.find('h2', class_="task__title").text except: name = '!-----!-----!' try: date = soup.find('div', class_="task__meta").text except: date = '!-----!-----!' ref_date_1 = refind_date_1(date) ref_date_2 = refind_date_2(date) ref_date_3 = refind_date_3(date) try: prise = soup.find('div', class_="task__finance").text except: prise = '!-----!-----!' try: url_l = soup.find('div', class_="dropdown__menu").find('a').get('href') except: url_l = '!-----!-----!' try: ref_url_l = 'https' + url_l.split('+http')[1] except: ref_url_l = '!-----!-----!' # print(name) # print(ref_date_1) # print(ref_date_2) # print(ref_date_3) # print(prise) # print(tds) date_y = refind_name_y(ref_date_1) # print(date_y) date_d = refind_name_d(ref_date_1) # print(date_d) # date_m = refind_name_m(str(datetime.now()).split(' ')[0]) date_m = refind_name_m(ref_date_1) # print(date_m) ref_date = date_y + '-' + date_m + '-' + date_d print(ref_date) link = ref_url_l show = name ref_link = tds date_p = ref_date time_p = refind_t(ref_date_1) price = prise image = 'images/freelansim.png' try: p = Fl.objects.get(link=link) p.show = show p.price = price p.ref_link = ref_link p.date_p = date_p p.time_p = time_p p.save() except Fl.DoesNotExist: p = Fl( link=link, show=show, price=price, ref_link=ref_link, date_p=date_p, time_p=time_p, image=image, ).save() print(link) return tds
def get_urls_pars(html): # print('---**???***--- && ---***???**---') soup = BeautifulSoup(html, 'lxml') # uas = soup.find('div', class_="page") uas = soup.find('tbody').find_all('tr') # print('--*-- Найдено', len(uas), 'элементов --*--') # print(uas) for index, ua in enumerate(uas): try: name = ua.find('td', class_="left").text except: name = '---' try: url = ua.find('td', class_="left").find('a').get('href') except: url = '- - - - -' try: price = ua.find('td', class_="text-center").find( 'div', class_="text-green").text except: price = 'Договорная' try: dates = ua.find_all('td', class_="text-center") for index, d in enumerate(dates): if index == 2: try: date = d.find('div', class_="with-tooltip").find('h2').text except: date = '++++++' except: dates = '-----' # print(name) # print(url) # print('-------------- && --------------') # strings = ['парс',] # for string in strings: # match = re.search(string, name) # if match: # print('Порядковый номер:', index+1) ref_link = refind_links(url) # print('Текст:', name.split('\n')[1]) # print('Ссылка:', url) # print('Дата:', date) # print('Цена:', price) # print('Содержание:', ref_link) if date in [ '31', '30', '29', '28', '27', '26', '25', '24', '23', '22', '21', '20', '19', '18', '17', '16', '15', '14', '13', '12', '11', '10', '9', '8', '7', '6', '5', '4', '3', '2', '1' ]: # print(date) continue else: link = url show = refind_name(name) date_p = str(datetime.now()).split(' ')[0] time_p = date image = 'images/freelancehunt.png' try: p = Fl.objects.get(link=link) p.show = show p.price = price p.ref_link = ref_link # p.date_p = date_p # p.time_p = time_p p.save() except Fl.DoesNotExist: p = Fl( link=link, show=show, price=price, ref_link=ref_link, date_p=date_p, time_p=time_p, image=image, ).save() print(link) print('---**+++***--- && ---***+++**---')
def get_urls_pars(html): # print('---**???***--- && ---***???**---') soup = BeautifulSoup(html, 'lxml') uas = soup.find('div', class_="projects") # print('--*-- Найдено', len(uas), 'элементов --*--') for index, ua in enumerate(uas): try: name = ua.find('div').text.strip() except: name = '- - - - -' try: name_2 = ua.find('a').get('href') except: name_2 = '- - - - -' try: name_3 = ua.find('li').text except: name_3 = '- - - - -' try: name_4 = ua.find('ul').find('i').text except: name_4 = '- - - - -' try: name_5 = ua.find('ul').text ref_name_5 = refind_name_5(name_5) except: name_5 = '- - - - -' try: name_6 = ua.find('ul').text.strip() ref_name_6 = refind_name_6(name_6) except: name_6 = '- - - - -' try: date_y = refind_name_y(name_3) date_m = refind_name_m(name_3) date_d = refind_name_d(name_3) ref_date = date_d + '.' + date_m + '.20' + date_y date_pub = '20' + date_y + '-' + date_m + '-' + date_d except: ref_date = '00000000000' # print('-------------- && --------------') strings = ['Парсинг', 'парсинг', 'Парсер', 'парсер'] for string in strings: match = re.search(string, name) if match: # print('Порядковый номер:', index+1) link = 'https://freelance.ru' + name_2 ref_link = refind_link(link) # print('===', ref_link) # print('Текст:', name) # print('Дата:', date_pub) # print('Ответов:', name_4) # print('Просмотров:', ref_name_5) # print('Вид:', ref_name_6) show = name date_p = date_pub time_p1 = str(datetime.now()).split(' ')[1] time_p001 = time_p1.split('.')[0] time_p01 = time_p001.split(':')[0] time_p02 = time_p001.split(':')[1] time_p = time_p01 + ':' + time_p02 price = 'Договорная' image = 'images/freelance.png' try: p = Fl.objects.get(link=link) p.show = show p.price = price p.ref_link = ref_link p.date_p = date_p # p.time_p = time_p p.save() except Fl.DoesNotExist: p = Fl( link=link, show=show, price=price, ref_link=ref_link, date_p=date_p, time_p=time_p, image=image, ).save() print(link)
def get_all_links(html, url): # print('---**???***--- && ---***???**---') # print('url', url) soup = BeautifulSoup(html, 'lxml') try: tds = soup.find('div', class_="s_box").find('h1', class_="proj_tophead").text.strip() # print('show', tds) tds_z = soup.find('p', class_="txt href_me").text.strip() # ref_tds_z = refind_tds_z(tds_z) except: tds_z = '**-Для Бизнес-аккаунтов-**' try: tds_z_2 = soup.find('div', class_="s_box").find_all('tr') for index, t in enumerate(tds_z_2): if index == 0: td_1 = t.find('td').text.strip() # print(td_1) # if index == 1: # td_2 = t.find('td').find('p', class_="txt href_me").text.strip() # print(td_2) if index == 2: td_3 = t.find('td').text.strip() # print(td_3) if index == 3: td_4 = t.find('td').find('p', class_="txt href_me").text.strip() # print(td_4) # if td_3 == 'Пожаловаться': # tds_z_3 = td_1 + '\n' + tds_z # elif tdtu == 'Присоединенные файлы': # tds_z_3 = td_1 + '\n' + tds_z + '\n' + td_3 + '\n' + td_4 + '\n' + tdtu # else: # tds_z_3 = td_1 + '\n' + tds_z + '\n' + td_3 + '\n' + td_4 except: tds_z_2 = '------' try: tds_z_u = soup.find('div', class_="s_box").find_all('tr') all_tds_z_u = [] for index, t in enumerate(tds_z_u): if index == 4: try: tdtu = t.find('td').find('h4').text.strip() except: tdtu = '---------1////////' try: tdnu = t.find('td').text.strip() except: tdnu = '---------2////////' try: tdu_1 = t.find('td').find_all('a') for index, i in enumerate(tdu_1): if index in [2, 5, 8, 11, 14, 17, 20]: tdu = i.get('href') all_tds_z_u.append(tdu) # print(index, '++++++++++', tdu) except: tdu_1 = '---------3////////' # print(tdtu) # print(tdnu) except: tds_z_u = '------' myString = '\n'.join(all_tds_z_u) # print('!!!', myString) if td_3 == 'Пожаловаться': tds_z_3 = td_1 + '\n\n' + tds_z elif tdtu == 'Присоединенные файлы': tds_z_3 = td_1 + '\n\n' + tds_z + '\n\n' + td_3 + '\n\n' + td_4 + '\n\n' + tdtu + '\n\n' + myString else: tds_z_3 = td_1 + '\n\n' + tds_z + '\n\n' + td_3 + '\n\n' + td_4 # print('text', tds_z) # print('text_2', tds_z_3) if tds_z != '**-Для Бизнес-аккаунтов-**': prices = soup.find('div', class_="col-lg-12").find_all('tr') for index, p in enumerate(prices): if index == 0: price = p.find('td').find_next_sibling('td').text # print('price', price) if index == 3: date_1 = p.find('td').find_next_sibling('td').text # print('date_1', date_1) if index == 4: date_2 = p.find('td').find_next_sibling('td').text # print('date_2', date_2) link = url show = tds if date_1 != '': date_p = date_1.split(' ')[0] time_p = date_1.split(' ')[1] # print('date_1', date_1) else: date_p = date_2.split(' ')[0] time_p = date_2.split(' ')[1] # print('date_2', date_2) price = price image = 'images/freelance.png' if tds_z_3 != '': ref_link = tds_z_3 else: ref_link = tds_z try: p = Fl.objects.get(link=link) p.show = show p.price = price p.ref_link = ref_link p.date_p = date_p p.time_p = time_p p.save() except Fl.DoesNotExist: p = Fl( link=link, show=show, price=price, ref_link=ref_link, date_p=date_p, time_p=time_p, image=image, ).save() print('2 ', link) return tds_z