from bs4 import BeautifulSoup from slugify import slugify import json from pprint import pprint import Queue import threading from time import gmtime, strftime from HTMLParser import HTMLParser import re import datetime from datetime import timedelta from unidecode import unidecode import os import snappy_class snappy = snappy_class.SnappyContent('indeed') # grab contacts def get_contacts(): return { 'email': '', 'phone':'' } # grab location def get_location(soap_object): statecity = soap_object.find("span",{ "itemprop" : "addressLocality" }).getText().split(',') city = statecity[0]
from slugify import slugify import json from pprint import pprint import Queue import threading from time import gmtime, strftime from HTMLParser import HTMLParser import urlparse import re import datetime from datetime import timedelta from unidecode import unidecode import os import snappy_class snappy = snappy_class.SnappyContent('jobtocareer') # grab contacts def get_contacts(): return {'email': '', 'phone': ''} # grab location def get_location(soap_object): if soap_object is None: return None zip = '' city = ''
from bs4 import BeautifulSoup from slugify import slugify import json from pprint import pprint import Queue import threading from time import gmtime, strftime from HTMLParser import HTMLParser import re import datetime from datetime import timedelta import sys from unidecode import unidecode import os import snappy_class snappy = snappy_class.SnappyContent('ziprecruiter') # grab content def get_content(soap_object): # name if soap_object.find('span',{'itemprop':'title'}) is not None: title = unidecode( str(soap_object.find('span',{'itemprop':'title'}).getText().encode('utf-8')).decode('utf-8') ) print title name = html2text.html2text(title) else: name = "" #slug slug = slugify(name) # description
import json from pprint import pprint import Queue import threading from time import gmtime, strftime from HTMLParser import HTMLParser import re import datetime from datetime import timedelta from datetime import datetime from unidecode import unidecode import os import snappy_class snappy = snappy_class.SnappyContent('groovejob') # grab contacts def get_contacts(): return {'email': '', 'phone': ''} # grab location def get_location(soap_object): statecity = soap_object.find("div", { "class": "location" }).getText().split(',') city = statecity[0] state = ''
from time import gmtime, strftime from HTMLParser import HTMLParser import re import datetime from datetime import timedelta from datetime import datetime import Queue import threading import os import datetime from datetime import timedelta from datetime import datetime import snappy_class snappy = snappy_class.SnappyContent('care') snappy.hdr = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.81 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, sdch', 'Accept-Language': 'en-US,en;q=0.8', 'Cache-Control': 'no-cache', # open care.com in browser to get the cookie 'Cookie': 'n_tc=1191%7C1308%7C1380%7C1286%7C1234; mt.v=2.283603359.1433558150693; __utma=174140029.1052334686.1433558152.1434906449.1436926286.5; __utmz=174140029.1433558152.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __hstc=174140029.7e5a41f2ffc7f7dca4b4e49b58ae28e3.1433558153452.1434906461782.1436926287709.4; hsfirstvisit=https%3A%2F%2Fwww.care.com%2F||1433558153450; hubspotutk=7e5a41f2ffc7f7dca4b4e49b58ae28e3; __qca=P0-1588461042-1433558154545; __utmv=174140029.|3=testCellInfo=1234%7C1286%7C1380=1; lc=%7B%22syncedWithSession%22%3Atrue%2C%22geolocation%22%3Anull%2C%22overrideGeolocation%22%3Afalse%2C%22lastLocation%22%3Anull%2C%22access%22%3Afalse%7D; plvj=5649590; csc=dom-ord-prodwebapp-101436692646921; vc=834196cd-e7b6-4816-8d50-6b6c85e33a9e; ROUTE2=d; __utmb=174140029.4.10.1436926286; __utmc=174140029; __utmt=1; __hssrc=1; __hssc=174140029.4.1436926287709; everyForm=%7B%22vertical%22%3A%22Children%22%2C%22seekerProvider%22%3A%22sitter%22%2C%22l2SelectedIndex%22%3A0%2C%22serviceId%22%3A%22CHILDCARE%22%7D', 'Connection': 'keep-alive', 'Host': 'www.care.com', 'Pragma': 'no-cache' }
from bs4 import BeautifulSoup from slugify import slugify import json from pprint import pprint import Queue import threading from time import gmtime, strftime from HTMLParser import HTMLParser import re import datetime from datetime import timedelta from unidecode import unidecode import os import snappy_class snappy = snappy_class.SnappyContent('linkedin') # grab contacts def get_contacts(soap_object): if soap_object.find("span", {"class": "company-name-text"}) is not None: email = soap_object.find("span", { "class": "company-name-text" }).getText() else: email = '' return {'email': email, 'phone': ''} # grab location def get_location(soap_object):
import json from pprint import pprint import Queue import threading from time import gmtime, strftime from HTMLParser import HTMLParser import re import datetime from datetime import timedelta from datetime import datetime from unidecode import unidecode import os import _strptime from datetime import datetime import snappy_class snappy = snappy_class.SnappyContent('coolworks') # grab contacts def get_contacts(): return { 'email': '', 'phone':'' } # grab location def get_location(soap_object): address = soap_object.find("meta",{ "itemprop" : "addressLocality" }) if address is not None: address = address.getText() state = soap_object.find('meta',{'itemprop':'addressRegion'})
from slugify import slugify import json from pprint import pprint import Queue import threading from time import gmtime, strftime from HTMLParser import HTMLParser import re import datetime from datetime import timedelta from datetime import datetime import sys from unidecode import unidecode import os import snappy_class snappy = snappy_class.SnappyContent('jobshiringnearme', False) # grab contacts def get_contacts(): return {'email': '', 'phone': ''} # grab location def get_location(soap_object): items = soap_object.findAll('p') items[1] location_value = { 'address': items[1].getText(),
from bs4 import BeautifulSoup from slugify import slugify import json from pprint import pprint import Queue import threading from time import gmtime, strftime from HTMLParser import HTMLParser import re import datetime from datetime import timedelta from unidecode import unidecode import os import snappy_class snappy = snappy_class.SnappyContent('careerbuilder') # grab contacts def get_contacts(soap_object): return { 'email': soap_object.find("div", { "class": "job-information" }).find("div", { "class": "medium-3" }).find("h4", { "class": "job-text" }).getText(), 'phone': ''
from bs4 import BeautifulSoup from slugify import slugify import json from pprint import pprint import Queue import threading from time import gmtime, strftime from HTMLParser import HTMLParser import re import datetime from datetime import timedelta from unidecode import unidecode import os import snappy_class snappy = snappy_class.SnappyContent('simplyhired') # grab contacts def get_contacts(): return {'email': '', 'phone': ''} # grab location def get_location(soap_object): city = '' if soap_object.find("span", {"itemprop": "addressLocality"}) is not None: city = soap_object.find("span", { "itemprop": "addressLocality" }).getText() state = soap_object.find("span", {"itemprop": "addressRegion"}).getText()