-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
77 lines (60 loc) · 2.97 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import re
import time
import requests
from bs4 import BeautifulSoup
from flask import Flask, render_template, request, redirect, url_for
from craigslist import CraigslistHousing
app = Flask(__name__)
app.config['SECRET_KEY'] = 'our very hard to guess secretfir'
def start_scrapper(cit, cat):
print("Starting to scrape data...")
def housing(citi_code, category_code):
cl_h = CraigslistHousing(site=citi_code, category=category_code, filters={'posted_today': True})
for result in cl_h.get_results(sort_by='newest', geotagged=True):
id = str(result["id"]).replace(",", "")
name = str(result["name"]).replace(",", "")
url = str(result["url"]).replace(",", "")
date_time = str(result["datetime"]).replace(",", "")
last_update = str(result["last_updated"]).replace(",", "")
price = str(result["price"]).replace(",", "")
location = str(result["where"]).replace(",", "")
geolocation = str(result["geotag"]).replace(",", " and ")
asd = requests.get(result["url"])
time.sleep(2)
soup = BeautifulSoup(asd.text, "html.parser")
bsd = soup.find('section', {'id': 'postingbody'})
discription = bsd.text.replace("\n", " ").replace(",", "").strip(" ").strip(
"QR Code Link to This Post ")
phone_number = re.findall("(\d{3}[-\.\s]\d{3}[-\.\s]\d{4}|\(\d{3}\)\s*\d{3}[-\.\s]\d{4}|\d{3}[-\.\s]\d{4})", discription)
PhoneNumber = ''
for phone in phone_number:
PhoneNumber += phone + "/"
emails = re.findall("([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)", discription)
Emails = ''
for email in emails:
Emails += email + "/"
to_write = id + "," + name + "," +PhoneNumber.replace(",","")+ "," +Emails.replace(",","")+ "," + discription + "," + url + "," + date_time + "," + last_update + "," + price + "," + location + "," + geolocation + "\n"
write_to_file.write(to_write)
print(to_write)
write_to_file.close()
write_to_file = open("output.csv", "w", encoding='utf-8')
header = "ID,Title,Phone_Number,Email,Discription,URL,TimePosted,LastUpdate,Price,Location,GeoCodes\n"
write_to_file.write(header)
housing(cit, cat)
@app.route('/thank-you')
def thank_you():
return render_template('thank-you.html')
# Simple form handling using raw HTML forms
@app.route('/', methods=['GET', 'POST'])
def scrapping():
error = ""
if request.method == 'POST':
# Form being submitted; grab data from form.
city = request.form['city']
category = request.form['category']
start_scrapper(city, category)
return redirect(url_for('thank_you'))
# Render the sign-up page
return render_template('index.html', message=error)
# Run the application
app.run(debug=True)