def parse_blog(self): for i in range(4300,4752,1): req = requests.get('https://www.campuspick.com/activity/view?id='+str(i)) html = req.text soup = BeautifulSoup(html, 'html.parser') my_titles = soup.select( 'h1' ) my_image = soup.select( '.poster > img' ) my_explanation = soup.select( 'article.description' ) my_company = soup.select( 'p.company' ) my_dday = soup.select( '.dday + p.indent' ) if len(my_explanation) == 0 : continue for title in my_titles: activity_title = str(title) for image in my_image: activity_image = image.get('href') for explanation in my_explanation: activity_explanation = str(explanation) for company in my_company: activity_company = str(company) for d_day in my_dday: activity_d_day = str(d_day) BlogData(title = activity_title, image = activity_image, explanation = activity_explanation, company = activity_company, d_day = activity_d_day).save()
def parse_blog(): req = requests.get('http://h3njupio.pythonanywhere.com/blog/') html = req.text soup = BeautifulSoup(html, 'html.parser') titles = soup.select( 'body > div.content.container > div > div > div > h1 > a') recent = titles[-1] with open(os.path.join(data_dir, 'etc', 'recent.txt'), 'r') as r_file: latest = r_file.readline() if latest != recent.text: BlogData(title=recent.text, link=recent.get('href')).save() bot.send_message(chat_id=chat_id, text='[알림] 새 글이 등록되었습니다.\n' + recent.text) with open(os.path.join(data_dir, 'etc', 'recent.txt'), 'w+') as w_file: w_file.write(recent.text)
os.environ.setdefault("DJANGO_SETTINGS_MODULE","websaver.settings") ## 이제 장고를 가져과 장고 프로젝트를 사용할 수 있또록 환경 만듦. import django django.setup() ## Blog data를 import 해옴 from parsed_data.models import BlogData def parse_blog(): req = requests.get('https://beomi.github.io/beomi.github.io_old/') html = req.text soup = BeautifulSoup(html,'html.parser') my_titles = soup.select( 'h3 > a' ) data ={} for title in my_titles: data[title.text] = title.get('href') return data #이 명령어는 이 파일이 import 가 아닌 python에서 직접 실행할 경우에만 아래 코드가 동작하도록 함. if __name__=='__main__': blog_data_dict = parse_blog() for t,l in blog_data_dict.items(): BlogData(title=t, link=l).save() #with open(os.path.join(BASE_DIR,'result.json'),'w+') as json_file: # json.dump(data,json_file)