Esempio n. 1
0
from bs4 import BeautifulSoup
from slugify import slugify
import json
from pprint import pprint
import Queue
import threading
from time import gmtime, strftime
from HTMLParser import HTMLParser
import re
import datetime
from datetime import timedelta
from unidecode import unidecode

import os
import snappy_class
snappy = snappy_class.SnappyContent('indeed')


# grab contacts
def get_contacts():
    return {
        'email': '',
        'phone':''
    }


# grab location
def get_location(soap_object):
    
    statecity = soap_object.find("span",{ "itemprop" : "addressLocality" }).getText().split(',')
    city = statecity[0]
Esempio n. 2
0
from slugify import slugify
import json
from pprint import pprint
import Queue
import threading
from time import gmtime, strftime
from HTMLParser import HTMLParser
import urlparse
import re
import datetime
from datetime import timedelta
from unidecode import unidecode
import os

import snappy_class
snappy = snappy_class.SnappyContent('jobtocareer')


# grab contacts
def get_contacts():
    return {'email': '', 'phone': ''}


# grab location
def get_location(soap_object):

    if soap_object is None:
        return None

    zip = ''
    city = ''
Esempio n. 3
0
from bs4 import BeautifulSoup
from slugify import slugify
import json
from pprint import pprint
import Queue
import threading
from time import gmtime, strftime
from HTMLParser import HTMLParser
import re
import datetime
from datetime import timedelta
import sys
from unidecode import unidecode
import os
import snappy_class
snappy = snappy_class.SnappyContent('ziprecruiter')



# grab content
def get_content(soap_object):
    # name
    if soap_object.find('span',{'itemprop':'title'}) is not None:
        title = unidecode( str(soap_object.find('span',{'itemprop':'title'}).getText().encode('utf-8')).decode('utf-8') )
        print title
        name = html2text.html2text(title)
    else:
        name = ""
    #slug
    slug = slugify(name)
    # description
Esempio n. 4
0
import json
from pprint import pprint
import Queue
import threading
from time import gmtime, strftime
from HTMLParser import HTMLParser
import re
import datetime
from datetime import timedelta
from datetime import datetime
from unidecode import unidecode

import os
import snappy_class

snappy = snappy_class.SnappyContent('groovejob')


# grab contacts
def get_contacts():
    return {'email': '', 'phone': ''}


# grab location
def get_location(soap_object):

    statecity = soap_object.find("div", {
        "class": "location"
    }).getText().split(',')
    city = statecity[0]
    state = ''
Esempio n. 5
0
from time import gmtime, strftime
from HTMLParser import HTMLParser
import re
import datetime
from datetime import timedelta
from datetime import datetime
import Queue
import threading
import os

import datetime
from datetime import timedelta
from datetime import datetime

import snappy_class
snappy = snappy_class.SnappyContent('care')
snappy.hdr = {
    'User-Agent':
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.81 Safari/537.36',
    'Accept':
    'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate, sdch',
    'Accept-Language': 'en-US,en;q=0.8',
    'Cache-Control': 'no-cache',
    # open care.com in browser to get the cookie
    'Cookie':
    'n_tc=1191%7C1308%7C1380%7C1286%7C1234; mt.v=2.283603359.1433558150693; __utma=174140029.1052334686.1433558152.1434906449.1436926286.5; __utmz=174140029.1433558152.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __hstc=174140029.7e5a41f2ffc7f7dca4b4e49b58ae28e3.1433558153452.1434906461782.1436926287709.4; hsfirstvisit=https%3A%2F%2Fwww.care.com%2F||1433558153450; hubspotutk=7e5a41f2ffc7f7dca4b4e49b58ae28e3; __qca=P0-1588461042-1433558154545; __utmv=174140029.|3=testCellInfo=1234%7C1286%7C1380=1; lc=%7B%22syncedWithSession%22%3Atrue%2C%22geolocation%22%3Anull%2C%22overrideGeolocation%22%3Afalse%2C%22lastLocation%22%3Anull%2C%22access%22%3Afalse%7D; plvj=5649590; csc=dom-ord-prodwebapp-101436692646921; vc=834196cd-e7b6-4816-8d50-6b6c85e33a9e; ROUTE2=d; __utmb=174140029.4.10.1436926286; __utmc=174140029; __utmt=1; __hssrc=1; __hssc=174140029.4.1436926287709; everyForm=%7B%22vertical%22%3A%22Children%22%2C%22seekerProvider%22%3A%22sitter%22%2C%22l2SelectedIndex%22%3A0%2C%22serviceId%22%3A%22CHILDCARE%22%7D',
    'Connection': 'keep-alive',
    'Host': 'www.care.com',
    'Pragma': 'no-cache'
}
Esempio n. 6
0
from bs4 import BeautifulSoup
from slugify import slugify
import json
from pprint import pprint
import Queue
import threading
from time import gmtime, strftime
from HTMLParser import HTMLParser
import re
import datetime
from datetime import timedelta
from unidecode import unidecode

import os
import snappy_class
snappy = snappy_class.SnappyContent('linkedin')


# grab contacts
def get_contacts(soap_object):
    if soap_object.find("span", {"class": "company-name-text"}) is not None:
        email = soap_object.find("span", {
            "class": "company-name-text"
        }).getText()
    else:
        email = ''
    return {'email': email, 'phone': ''}


# grab location
def get_location(soap_object):
Esempio n. 7
0
import json
from pprint import pprint
import Queue
import threading
from time import gmtime, strftime
from HTMLParser import HTMLParser
import re
import datetime
from datetime import timedelta
from datetime import datetime
from unidecode import unidecode
import os
import _strptime
from datetime import datetime
import snappy_class
snappy = snappy_class.SnappyContent('coolworks')


# grab contacts
def get_contacts():
    return {
        'email': '',
        'phone':''
    }

# grab location
def get_location(soap_object):
    address = soap_object.find("meta",{ "itemprop" : "addressLocality" })
    if address is not None:
        address = address.getText()
    state = soap_object.find('meta',{'itemprop':'addressRegion'})
Esempio n. 8
0
from slugify import slugify
import json
from pprint import pprint
import Queue
import threading
from time import gmtime, strftime
from HTMLParser import HTMLParser
import re
import datetime
from datetime import timedelta
from datetime import datetime
import sys
from unidecode import unidecode
import os
import snappy_class
snappy = snappy_class.SnappyContent('jobshiringnearme', False)


# grab contacts
def get_contacts():
    return {'email': '', 'phone': ''}


# grab location
def get_location(soap_object):

    items = soap_object.findAll('p')
    items[1]

    location_value = {
        'address': items[1].getText(),
Esempio n. 9
0
from bs4 import BeautifulSoup
from slugify import slugify
import json
from pprint import pprint
import Queue
import threading
from time import gmtime, strftime
from HTMLParser import HTMLParser
import re
import datetime
from datetime import timedelta
from unidecode import unidecode

import os
import snappy_class
snappy = snappy_class.SnappyContent('careerbuilder')


# grab contacts
def get_contacts(soap_object):
    return {
        'email':
        soap_object.find("div", {
            "class": "job-information"
        }).find("div", {
            "class": "medium-3"
        }).find("h4", {
            "class": "job-text"
        }).getText(),
        'phone':
        ''
Esempio n. 10
0
from bs4 import BeautifulSoup
from slugify import slugify
import json
from pprint import pprint
import Queue
import threading
from time import gmtime, strftime
from HTMLParser import HTMLParser
import re
import datetime
from datetime import timedelta
from unidecode import unidecode

import os
import snappy_class
snappy = snappy_class.SnappyContent('simplyhired')


# grab contacts
def get_contacts():
    return {'email': '', 'phone': ''}


# grab location
def get_location(soap_object):
    city = ''
    if soap_object.find("span", {"itemprop": "addressLocality"}) is not None:
        city = soap_object.find("span", {
            "itemprop": "addressLocality"
        }).getText()
    state = soap_object.find("span", {"itemprop": "addressRegion"}).getText()