def parseGoogleProfileEducation(soup): educations = [""]*15 eduModule = soup.find("div", {"class":"Ee h5a vna Jqc"}) if eduModule!=None: eduRows = eduModule.findAll("li", {"class":"UZa"}) # select top 3 educations if len(eduRows) > 3: eduRows = eduRows[:3] for i in range(len(eduRows)): row = eduRows[i] school = row.find("div", {"class":"PLa"}).getText().strip() eduInfo = row.findAll("div", {"class": "ija"}) # have department, time, description if len(eduInfo) == 2: desc = eduInfo[1].getText().strip().replace("\n", " ") else: desc = "" department, eduFrom, eduTo = ut.parseTitleTime(eduInfo[0].getText()) index = 5*i educations[index] = school educations[index+1] = department educations[index+2] = eduFrom educations[index+3] = eduTo educations[index+4] = desc for i in range(len(eduRows),3): for j in range(5): educations[i*5+j] = "" return educations
def parseGoogleProfileEducation(soup): educations = [""] * 15 eduModule = soup.find("div", {"class": "Ee h5a vna Jqc"}) if eduModule != None: eduRows = eduModule.findAll("li", {"class": "UZa"}) # select top 3 educations if len(eduRows) > 3: eduRows = eduRows[:3] for i in range(len(eduRows)): row = eduRows[i] school = row.find("div", {"class": "PLa"}).getText().strip() eduInfo = row.findAll("div", {"class": "ija"}) # have department, time, description if len(eduInfo) == 2: desc = eduInfo[1].getText().strip().replace("\n", " ") else: desc = "" department, eduFrom, eduTo = ut.parseTitleTime( eduInfo[0].getText()) index = 5 * i educations[index] = school educations[index + 1] = department educations[index + 2] = eduFrom educations[index + 3] = eduTo educations[index + 4] = desc for i in range(len(eduRows), 3): for j in range(5): educations[i * 5 + j] = "" return educations
def parseGoogleProfileWork(soup): works = [""] * 17 # workTitles = ["職業","技能","工作經歷"] workTitles = ["Occupation", "Skills", "Employment"] workModule = soup.find("div", {"class": "Ee l5a vna Tqc"}) if workModule != None: occupation = "" skills = "" workRows = workModule.findAll("div", {"class": "wna"}) for row in workRows: title = row.find("div", {"class": "Cr"}).getText() # Occupation if title == workTitles[0]: occupation = row.find("div", {"class": "y4"}).getText() works[0] = occupation # Skills elif title == workTitles[1]: skills = row.find("div", {"class": "y4"}).getText() works[1] = skills # Employment else: employRows = row.findAll("li", {"class": "UZa"}) if len(employRows) > 3: employRows = employRows[:3] for i in range(len(employRows)): row = employRows[i] corp = row.find("div", {"class": "PLa"}).getText().strip() employInfo = row.findAll("div", {"class": "ija"}) if len(employInfo) == 2: description = employInfo[1].getText().strip().replace( "\n", " ").replace("\t", " ") else: description = "" job, jobFrom, jobTo = ut.parseTitleTime( employInfo[0].getText()) index = i * 5 + 2 works[index] = corp works[index + 1] = job works[index + 2] = jobFrom works[index + 3] = jobTo works[index + 4] = description for i in range(len(employRows), 3): for j in range(5): works[2 + i * 5 + j] = "" return works
def parseGoogleProfileWork(soup): works = [""]*17 # workTitles = ["職業","技能","工作經歷"] workTitles = ["Occupation", "Skills", "Employment"] workModule = soup.find("div", {"class":"Ee l5a vna Tqc"}) if workModule != None: occupation = "" skills = "" workRows = workModule.findAll("div", {"class":"wna"}) for row in workRows: title = row.find("div", {"class":"Cr"}).getText() # Occupation if title == workTitles[0]: occupation = row.find("div", {"class":"y4"}).getText() works[0] = occupation # Skills elif title==workTitles[1]: skills = row.find("div", {"class":"y4"}).getText() works[1] = skills # Employment else: employRows = row.findAll("li",{"class":"UZa"}) if len(employRows)>3: employRows = employRows[:3] for i in range(len(employRows)): row = employRows[i] corp = row.find("div", {"class":"PLa"}).getText().strip() employInfo = row.findAll("div", {"class":"ija"}) if len(employInfo) == 2: description = employInfo[1].getText().strip().replace("\n", " ").replace("\t", " ") else: description = "" job, jobFrom, jobTo = ut.parseTitleTime(employInfo[0].getText()) index = i*5+2 works[index] = corp works[index+1] = job works[index+2] = jobFrom works[index+3] = jobTo works[index+4] = description for i in range(len(employRows),3): for j in range(5): works[2+i*5+j] = "" return works