コード例 #1
0
ファイル: parser.py プロジェクト: chiheon/Mental_illness
 def parse_blog(self):
     for i in range(4300,4752,1):
         req = requests.get('https://www.campuspick.com/activity/view?id='+str(i))
         html = req.text
         soup = BeautifulSoup(html, 'html.parser')
         my_titles = soup.select(
             'h1'
         )
         my_image = soup.select(
             '.poster > img'
         )
         my_explanation = soup.select(
             'article.description'
         )
         my_company = soup.select(
             'p.company'
         )
         my_dday = soup.select(
             '.dday + p.indent'
         )
         
         if len(my_explanation) == 0 :
             continue
         for title in my_titles:
             activity_title = str(title)
         for image in my_image:
             activity_image = image.get('href')
         for explanation in my_explanation:
             activity_explanation = str(explanation)
         for company in my_company:
             activity_company = str(company)
         for d_day in my_dday:
             activity_d_day = str(d_day)
             
         BlogData(title = activity_title, image = activity_image, explanation = activity_explanation, company = activity_company, d_day = activity_d_day).save()
コード例 #2
0
def parse_blog():
    req = requests.get('http://h3njupio.pythonanywhere.com/blog/')
    html = req.text
    soup = BeautifulSoup(html, 'html.parser')
    titles = soup.select(
        'body > div.content.container > div > div > div > h1 > a')
    recent = titles[-1]

    with open(os.path.join(data_dir, 'etc', 'recent.txt'), 'r') as r_file:
        latest = r_file.readline()
        if latest != recent.text:
            BlogData(title=recent.text, link=recent.get('href')).save()
            bot.send_message(chat_id=chat_id,
                             text='[알림] 새 글이 등록되었습니다.\n' + recent.text)
            with open(os.path.join(data_dir, 'etc', 'recent.txt'),
                      'w+') as w_file:
                w_file.write(recent.text)
コード例 #3
0
os.environ.setdefault("DJANGO_SETTINGS_MODULE","websaver.settings")
## 이제 장고를 가져과 장고 프로젝트를 사용할 수 있또록 환경 만듦.
import django
django.setup()
## Blog data를 import 해옴
from parsed_data.models import BlogData


def parse_blog():
	req = requests.get('https://beomi.github.io/beomi.github.io_old/')
	html = req.text
	soup = BeautifulSoup(html,'html.parser')
	my_titles = soup.select(
		'h3 > a'
		)
	data ={}
	for title in my_titles:
		data[title.text] = title.get('href')
	return data

#이 명령어는 이 파일이 import 가 아닌 python에서 직접 실행할 경우에만 아래 코드가 동작하도록 함.
if __name__=='__main__':
	blog_data_dict = parse_blog()
	for t,l in blog_data_dict.items():
		BlogData(title=t, link=l).save()
	
#with open(os.path.join(BASE_DIR,'result.json'),'w+') as json_file:
#	json.dump(data,json_file)