コード例 #1
0
def parse_url(url): 	
	output = {}
	crawler = LinkedinCrawler()	
	try:
		items = crawler.start(url) # launching crawler	
	except BadFormatError:
		return process_error(BadFormatError())					
	output["tags"] = [elem.get('label') for elem in items if 'label' in elem.keys()]# formatting output
	output["educations"] = [elem for elem in items if 'school' in elem.keys()]	
	return json.dumps(output)
コード例 #2
0
def parse_url(url):
    output = {}
    crawler = LinkedinCrawler()
    try:
        items = crawler.start(url)  # launching crawler
    except BadFormatError:
        return process_error(BadFormatError())
    output["tags"] = [
        elem.get('label') for elem in items if 'label' in elem.keys()
    ]  # formatting output
    output["educations"] = [elem for elem in items if 'school' in elem.keys()]
    return json.dumps(output)
コード例 #3
0
def parse_url(url):

    if get_status_code(url)==404: # page is not found
        return error(PageNotFoundError())

    output = {}
    crawler = LinkedinCrawler()

    try:
        items = crawler.start(url) # launching crawler
    except BadFormatError:
        return error(BadFormatError())

    output["tags"]= [elem.get('label') for elem in items if 'label' in elem.keys()]# formatting output
    output["educations"] = [elem for elem in items if 'school' in elem.keys()]    
    return json.dumps(output)
コード例 #4
0
ファイル: main.py プロジェクト: junjieChen0608/GreedySpider
from crawler import LinkedinCrawler

info_dict = {
    "firstName": "",
    "lastName": "",
    "schoolName": "",
    "degree": "",
    "major": "",
    "gradYear": ""
}
info_dict["firstName"] = input("Please enter first name:")
info_dict["lastName"] = input("Please enter last name:")
myCrawler = LinkedinCrawler(info_dict, "")
myCrawler.crawl_linkedin()