-
Notifications
You must be signed in to change notification settings - Fork 0
/
tasks.py
41 lines (34 loc) · 1.69 KB
/
tasks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from celery import Celery
from celery import shared_task
from bs4 import BeautifulSoup as bs4
import requests
from os.path import isfile
import csv
from utils import validate_data
app = Celery('tasks', broker='pyamqp://guest@localhost//')
@shared_task
def scrape_data(link, file_name):
detail = requests.get(link)
soup = bs4(detail.content, 'html.parser')
membership_level = validate_data(soup.find('div', {'id':'idMembershipLevelContainer'}))
organization=validate_data(soup.find('div', {'id': 'idContainer2075262'}))
first_name = validate_data(soup.find('div', {'id':'idContainer2075260'}))
nick_name = validate_data(soup.find('div', {'id': 'idContainer2100498'}))
last_name = validate_data(soup.find('div', {'id': 'idContainer2075261'}))
email = validate_data(soup.find('div', {'id': 'idContainer2075259'}))
street_address = validate_data(soup.find('div', {'id': 'idContainer2783240'}))
city = validate_data(soup.find('div', {'id': 'idContainer2100502'}))
state = validate_data(soup.find('div', {'id': 'idContainer2100503'}))
zip_code = validate_data(soup.find('div', {'id': 'idContainer2783241'}))
phone = validate_data(soup.find('div', {'id': 'idContainer2075265'}))
office = validate_data(soup.find('div', {'id': 'idContainer2783253'}))
job_title = validate_data(soup.find('div', {'id': 'idContainer2075270'}))
data = [
membership_level, organization, first_name,
last_name, nick_name, email, street_address, city,
state, zip_code, phone, office, job_title
]
with open(f'all_scraped_data/{file_name}.csv', 'a+', newline='') as file:
writer = csv.writer(file)
writer.writerow(data)
return 'Task Done!!!'