import django
django.setup()
from django_crawl.models import BlogData
BASE_DIR = os.path.dirname(os.path.abspath(__file__))


def parse_blog():
    html_url = 'http://cms.pknu.ac.kr/pknujob/view.do?no=2342'
    req = requests.get(html_url)
    html = req.text
    soup = BeautifulSoup(html, 'html.parser')
    my_titles = soup.find('ul', {'id': 'board_list'})
    my_titles = my_titles.select('li > a')
    data = {}
    for title in my_titles:
        data[title.find("h4").text] = [
            title.get('href'),
            title.find('span', {
                'class': 'date'
            }).text
        ]
    return data


if __name__ == '__main__':
    blog_data_dict = parse_blog()
    for t, l in blog_data_dict.items():
        try:
            BlogData(title=t, link=l[0], tag='job_general', date=l[1]).save()
        except:
            pass
Ejemplo n.º 2
0
    # my_googles = soup.find_all('div', {'class':'BNeawe vvjwJb AP7Wnd'})
    #case incruit
    req = requests.get('http://www.pknu.ac.kr/usrBoardActn.do?p_bm_idx=5&p_boardcode=PK10000005&p_sbsidx=2')
    html = req.text
    soup = BeautifulSoup(html, 'html.parser')
    my_incruit = soup.find_all('td', {'class':'title'})

    data = {}
    for title in my_incruit:
        data[title.text] = title.get('href')
    return data

if __name__=='__main__':
    blog_data_dict = parse_blog()
    for t, l in blog_data_dict.items():
        BlogData(title=t).save()


# req = requests.get('https://www.google.com/search?q=%ED%81%AC%EB%A1%A4%EB%9F%AC&rlz=1C1OKWM_enKR881KR881&oq=%ED%81%AC%EB%A1%A4%EB%9F%AC&aqs=chrome..69i57j0l4j69i61l3.2578j0j7&sourceid=chrome&ie=UTF-8')
# html = req.text
# soup = BeautifulSoup(html, 'html.parser')
# my_googles = soup.find_all('div', {'class':'BNeawe vvjwJb AP7Wnd'})

def my_cron_job():
        req = requests.get('http://www.pknu.ac.kr/usrBoardActn.do?p_bm_idx=5&p_boardcode=PK10000005&p_sbsidx=2')
        html = req.text
        soup = BeautifulSoup(html, 'html.parser')
        my_incruit = soup.find_all('td', {'class':'title'})

        data = {}
        for title in my_incruit:
Ejemplo n.º 3
0
import django
django.setup()
from django_crawl.models import BlogData
BASE_DIR = os.path.dirname(os.path.abspath(__file__))


def parse_blog():
    html_url = 'http://cms.pknu.ac.kr/lincplus/view.do?no=9100'
    req = requests.get(html_url)
    html = req.text
    soup = BeautifulSoup(html, 'html.parser')
    my_titles = soup.find('ul', {'id': 'board_list'})
    my_titles = my_titles.select('li > a')
    data = {}
    for title in my_titles:
        data[title.find("h4").text] = [
            title.get('href'),
            title.find('span', {
                'class': 'date'
            }).text
        ]
    return data


if __name__ == '__main__':
    blog_data_dict = parse_blog()
    for t, l in blog_data_dict.items():
        try:
            BlogData(title=t, link=l[0], tag='link', date=l[1]).save()
        except:
            pass