/
tasks.py
56 lines (49 loc) · 1.98 KB
/
tasks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from celery import Celery
from sqlalchemy.orm import sessionmaker
from models import db_connect, JobInfo, create_table
from job_scrapy.settings import BROKER_URL, CELERY_RESULT_BACKEND
app = Celery('tasks', broker=BROKER_URL, backend=CELERY_RESULT_BACKEND)
def add_filter_salary(data, instance, query, session):
if instance.filter_salary is not None:
salaries = instance.filter_salary.split(';')
if data['filter_salary'] and data['filter_salary'] not in salaries:
query.update(
{
JobInfo.filter_salary: JobInfo.filter_salary + ';' + data['filter_salary']
},
synchronize_session='fetch'
)
session.commit()
@app.task
def insert_item(item):
engine = db_connect()
create_table(engine)
Session = sessionmaker(bind=engine)
session = Session()
for data in item:
job = JobInfo(**data)
if data['sponsored']:
exist_row = session.query(JobInfo).filter(
JobInfo.title == data['title'],
JobInfo.sponsored == data['sponsored']
).all()
if not exist_row:
session.add(job)
session.commit()
else:
query = session.query(JobInfo).filter(
JobInfo.title == data['title'],
JobInfo.sponsored == data['sponsored']
)
for instance in query:
add_filter_salary(data, instance, query, session)
if data['sponsored'] is None:
exist_post_url = session.query(JobInfo).filter(JobInfo.post_url == data['post_url']).all()
if not exist_post_url:
session.add(job)
session.commit()
else:
query = session.query(JobInfo).filter(JobInfo.post_url == data['post_url'])
for instance in query:
add_filter_salary(data, instance, query, session)
session.close()