def test_experience_parsing(): """ Tests parsing function using the JSON response to avoid un-needed API calls 1. Test proper count. 2. Test that each item has the correct keys. """ for xml in XML_MAPS: resume = xml['tree_name'] experience_xml_list = bs4(resume, 'lxml').findAll('experience') experiences = parse_candidate_experiences(experience_xml_list) assert len(experiences) == xml['experience_len'] for experience in experiences: assert validate(experience, EXPERIENCE_SCHEMA, format_checker=FormatChecker()) is None # Tests against specific KeyError reported in JIRA (GET-626) xml_experiences_a = bs4(GET_626a, 'lxml').findAll('experience') processed_experiences_a = parse_candidate_experiences(xml_experiences_a) xml_experiences_b = bs4(GET_626b, 'lxml').findAll('experience') processed_experiences_b = parse_candidate_experiences(xml_experiences_b) for experience in processed_experiences_a: assert validate(experience, EXPERIENCE_SCHEMA, format_checker=FormatChecker()) is None for experience in processed_experiences_b: assert validate(experience, EXPERIENCE_SCHEMA, format_checker=FormatChecker()) is None
def test_g646_accuracy(): # Contact Parsing. contact_xml_list = bs4(GET_646, 'lxml').findAll('contact') first, last = parse_candidate_name(contact_xml_list) phones = parse_candidate_phones(contact_xml_list) addresses = parse_candidate_addresses(contact_xml_list) assert first == 'Patrick' assert last == 'Kaldawy' assert GET_646_ADDRESS in addresses assert {'value': u'+18583531111', 'label': 'Home'} in phones assert {'value': u'+18583532222', 'label': 'Mobile'} in phones assert {'value': u'+18583535555', 'label': 'Work'} in phones assert {'value': u'+18583533333', 'label': 'Work'} in phones assert {'value': u'+18583534444', 'label': 'Home Fax'} in phones assert {'value': u'+96170345340', 'label': 'Mobile'} in phones # Experience parsing. experience_xml_list = bs4(GET_646, 'lxml').findAll('experience') experiences = parse_candidate_experiences(experience_xml_list) for exp in experiences: assert len(exp['bullets']) == 1 # Name is currently not grabbed. # exp1 = next((org for org in experiences if org["organization"] == u'Technical Difference'), None) exp2 = next((org for org in experiences if org["organization"] == u'Convergence Inc. Llc'), None) exp3 = next( (org for org in experiences if org["organization"] == u'Avalon Digital Marketing Systems, Inc'), None) # The following returns the org name without the division in parens. # exp4 = next((org for org in experiences if org["organization"] == u'Avalon Digital Marketing Systems, Inc (European Division'), None) # assert None not in [exp1, exp2, exp3, exp4] assert None not in [exp2, exp3] assert exp2['start_month'] == 3 assert exp2['start_year'] == 2004 assert exp2['end_month'] == 9 assert exp2['end_year'] == 2004 assert exp3['start_year'] == 2002 assert exp3['end_year'] == 2003 # Educations. educations_xml_list = bs4(GET_646, 'lxml').findAll('education') educations = parse_candidate_educations(educations_xml_list) # edu1 = next((edu for edu in educations if edu["school_name"] == u'California State University, Chico'), None) # assert edu1 edu2 = next( (edu for edu in educations if edu["school_name"] == u'Butte College'), None) assert edu2 assert { 'start_month': 1, 'end_month': 1, 'start_year': 1995, 'bullets': [{ 'major': None, 'comments': None }], 'title': u'A.A', 'gpa_num': None, 'end_year': 1996, 'type': 'Associate of Arts' } in edu2['degrees']
def test_g626b_accuracy(): # Contact Parsing. contact_xml_list = bs4(GET_626b, 'lxml').findAll('contact') first, last = parse_candidate_name(contact_xml_list) phones = parse_candidate_phones(contact_xml_list) addresses = parse_candidate_addresses(contact_xml_list) assert first == 'Kate' assert last == 'Begonia' assert GET_626b_ADDRESS in addresses # assert {'value': u'503.493.1548'} in phones # Experience parsing. experience_xml_list = bs4(GET_626b, 'lxml').findAll('experience') experiences = parse_candidate_experiences(experience_xml_list) for exp in experiences: assert len(exp['bullets']) == 1 # Below does not parse positions after , in Director # exp1 = next((org for org in experiences if ( # org["organization"] == u'Sage Software' and # org['position'] == u'Director, Digital Marketing Communications')), None) # exp2 = next((org for org in experiences if ( # org["organization"] == u'Sage Software' and # org['position'] == u'Director, Creative Services')), None) # The below is for a self employed consulting job. # exp3 = next((org for org in experiences if org["organization"] == u'None'), None) # exp4 = next((org for org in experiences if ( # org["organization"] == u'Oracle Corporation' and # org['position'] == u'Senior Director, Branding')), None) # exp5 = next((org for org in experiences if ( # org["organization"] == u'Oracle Corporation' and # org['position'] == u'Senior Director, Global Advertising')), None) # exp6 = next((org for org in experiences if ( # org["organization"] == u'Oracle Corporation' and # org['position'] == u'Director, Global Advertising and Direct Marketing Programs')), None) # exp7 = next((org for org in experiences if ( # org["organization"] == u'Oracle Corporation' and # org['position'] == u'Senior Advertising Manager, Domestic Advertising')), None) # exp8 = next((org for org in experiences if ( # org["organization"] == u'Oracle Corporation' and # org['position'] == u'Marketing Manager, Industry Solutions Marketing')), None) # Educations. educations_xml_list = bs4(GET_626b, 'lxml').findAll('education') educations = parse_candidate_educations(educations_xml_list) edu1 = next((edu for edu in educations if edu["school_name"] == u'San Francisco State University'), None) assert edu1 assert { 'start_month': None, 'end_month': None, 'start_year': None, 'bullets': [{ 'major': u'Humanities', 'comments': None }], 'title': u'Bachelor of Arts', 'gpa_num': None, 'end_year': None, 'type': "Bachelor's" } in edu1['degrees']
def test_bullet_parsing(): soup = bs4(SQUARE_BULLETS).findAll('experience') experiences = parse_candidate_experiences(soup) for experience in experiences: if experience['organization'] == u'Verizon Wireless': assert experience['bullets'][0]['description'].count('\n') == 9 elif experience['organization'] == u'Wal-mart': assert experience['bullets'][0]['description'].count('\n') == 7 if experience['organization'] == u'Jamaica Savings Bank': assert experience['bullets'][0]['description'].count('\n') == 2
def test_parsing_experiences(): xml_combos = [xml for xml in dir(job_combinations) if "__" not in xml] for combo in xml_combos: combo_to_parse = bs4(getattr(job_combinations, combo), 'lxml').findAll('experience') experiences = parse_candidate_experiences(combo_to_parse) for experience in experiences: assert validate(experience, EXPERIENCE_SCHEMA, format_checker=FormatChecker()) is None
def test_pdf13_accuracy(): # Contact Parsing. contact_xml_list = bs4(PDF_13, 'lxml').findAll('contact') first, last = parse_candidate_name(contact_xml_list) phones = parse_candidate_phones(contact_xml_list) assert first == 'Bruce' assert last == 'Parkey' assert {'value': u'+16309302756', 'label': 'Other'} in phones experience_xml_list = bs4(PDF_13, 'lxml').findAll('experience') experiences = parse_candidate_experiences(experience_xml_list) for exp in experiences: assert len(exp['bullets']) == 1 exp1 = next((org for org in experiences if org["organization"] == u'Sagamore Apps, Inc'), None) assert exp1 # assert exp1['position'] == u'Owner and Senior iOS Contract Developer' assert exp1['start_month'] == 1 assert exp1['start_year'] == 2008 assert exp1['city'] == u'Darien' assert exp1['state'] == u'IL' exp2 = next((org for org in experiences if ( org["organization"] == u'Rapid Solutions Group' and org['position'] == u'Vice President and Chief Information Officer')), None) assert exp2 assert exp2['start_month'] == 1 assert exp2['start_year'] == 2003 assert exp2['end_month'] == 1 assert exp2['end_year'] == 2007 assert exp2['city'] == u'Mt. Prospect' assert exp2['state'] == u'IL' exp3 = next( (org for org in experiences if org["organization"] == u'Ams Direct, Inc'), None) assert exp3 # assert exp3['position'] == u'Vice President Information Technology' assert exp3['start_month'] == 1 assert exp3['start_year'] == 1999 assert exp3['end_month'] == 1 assert exp3['end_year'] == 2003 assert exp3['city'] == u'Burr Ridge' assert exp3['state'] == u'IL' educations_xml_list = bs4(PDF_13, 'lxml').findAll('education') educations = parse_candidate_educations(educations_xml_list) edu1 = next( (edu for edu in educations if edu["school_name"] == u'Purdue University'), None) assert edu1 assert { 'start_month': None, 'end_month': None, 'start_year': None, 'bullets': [{ 'major': u'Information Systems', 'comments': None }], 'title': u'Bachelor of Science', 'gpa_num': None, 'end_year': None, 'type': "Bachelor's" } in edu1['degrees']
def test_pdf_accuracy(): # Contact Parsing. contact_xml_list = bs4(PDF, 'lxml').findAll('contact') first, last = parse_candidate_name(contact_xml_list) phones = parse_candidate_phones(contact_xml_list) addresses = parse_candidate_addresses(contact_xml_list) assert first == 'Mark' assert last == 'Greene' assert PDF_ADDRESS in addresses assert {'value': u'+17275651234', 'label': 'Other'} in phones experience_xml_list = bs4(PDF, 'lxml').findAll('experience') experiences = parse_candidate_experiences(experience_xml_list) for exp in experiences: assert len(exp['bullets']) == 1 # exp1 = next((org for org in experiences if ( # org["organization"] == u'SmartSource' and # org['position'] == u'Technical Support')), None) # exp2 = next((org for org in experiences if ( # org["organization"] == u'Aerotek, Bank of America' and # org['position'] == u'Mortgage Affiliate Services')), None) # TODO: look into raw (JCIII & Associates) # exp3 = next((org for org in experiences if ( # org["organization"] == u'JCIII & Associates' and # org['position'] == u'Document Reviewer')), None) exp4 = next((org for org in experiences if (org["organization"] == u'CHASE' and org['position'] == u'Sr. Loan Processor')), None) assert exp4 assert exp4['start_month'] == 5 assert exp4['start_year'] == 2012 assert exp4['end_month'] == 10 assert exp4['end_year'] == 2012 assert exp4['city'] == u'Tampa' assert exp4['state'] == u'FL' exp5 = next((org for org in experiences if (org["organization"] == u'CHASE' and org['position'] == u'Business Analyst/Loss Mitigation Specialist')), None) assert exp5 assert exp5['start_month'] == 7 assert exp5['start_year'] == 2010 assert exp5['end_month'] == 5 assert exp5['end_year'] == 2012 assert exp5['city'] == u'Tampa' assert exp5['state'] == u'FL' exp6 = next((org for org in experiences if (org["organization"] == u'Computer Generated Solutions' and org['position'] == u'Team Lead')), None) assert exp6 assert exp6['start_month'] == 12 assert exp6['start_year'] == 2007 assert exp6['end_month'] == 12 assert exp6['end_year'] == 2008 assert exp6['city'] == u'Tampa' assert exp6['state'] == u'FL' exp7 = next((org for org in experiences if (org["organization"] == u'Computer Generated Solutions' and org['position'] == u'Desktop Support Agent')), None) assert exp7 assert exp7['start_month'] == 9 assert exp7['start_year'] == 2006 assert exp7['end_month'] == 2 assert exp7['end_year'] == 2007 assert exp7['city'] == u'Tampa' assert exp7['state'] == u'FL' exp6 = next((org for org in experiences if (org["organization"] == u'Advanced System Design' and org['position'] == u'Software Analyst')), None) assert exp6 assert exp6['start_month'] == 10 assert exp6['start_year'] == 2005 assert exp6['end_month'] == 5 assert exp6['end_year'] == 2006 assert exp6['city'] == u'Tallahassee' assert exp6['state'] == u'FL' exp6 = next((org for org in experiences if (org["organization"] == u'Bmc Solutions' and org['position'] == u'Desktop Deployment Technician')), None) assert exp6 assert exp6['start_month'] == 7 assert exp6['start_year'] == 2005 assert exp6['end_month'] == 8 assert exp6['end_year'] == 2005 assert exp6['city'] == u'Tallahassee' assert exp6['state'] == u'FL' educations_xml_list = bs4(PDF, 'lxml').findAll('education') educations = parse_candidate_educations(educations_xml_list) edu1 = next((edu for edu in educations if edu["school_name"] == u'ITT Technical Institute'), None) assert edu1 assert { 'start_month': None, 'end_month': 1, 'start_year': None, 'bullets': [{ 'major': u'Information Systems/Cyber Securities', 'comments': None }], 'title': u'Bachelor of Science Degree', 'gpa_num': None, 'end_year': 2013, 'type': "Bachelor's" } in edu1['degrees']
def test_g626a_accuracy(): # Contact Parsing. contact_xml_list = bs4(GET_626a, 'lxml').findAll('contact') contact_xml = parse_candidate_name(contact_xml_list) phones = parse_candidate_phones(contact_xml_list) addresses = parse_candidate_addresses(contact_xml_list) # assert contact_xml['first_name'] == 'Yetunde' # assert contact_xml['last_name'] == 'Laniran' assert GET_626a_ADDRESS in addresses assert {'value': u'+15033330350', 'label': 'Other'} in phones # Experience parsing. experience_xml_list = bs4(GET_626a, 'lxml').findAll('experience') experiences = parse_candidate_experiences(experience_xml_list) for exp in experiences: assert len(exp['bullets']) == 1 exp1 = next( (org for org in experiences if org["organization"] == u'Census Bureau'), None) assert exp1 assert exp1['start_month'] == 9 assert exp1['start_year'] == 2009 assert exp1['city'] == u'Bothell' assert exp1['state'] == u'WA' assert exp1['position'] == u'Partnership Specialist' exp2 = next((org for org in experiences if org['organization'] == u'Nw Facility Maintenance'), None) assert exp2 assert exp2['position'] == u'Network Admin/Project Coordinator' assert exp2['start_month'] == 1 assert exp2['start_year'] == 2007 assert exp2['end_month'] == 9 assert exp2['end_year'] == 2009 assert exp2['city'] == u'Portland' assert exp2['state'] == u'OR' # exp3 = next((org for org in experiences if org['organization'] == u'Via Training'), None) exp4 = next((org for org in experiences if org['organization'] == u'Oregon Catholic Press'), None) assert exp4 assert exp4['position'] == u'Project Manager/ Computer Support Specialist' assert exp4['start_month'] == 9 assert exp4['start_year'] == 2001 assert exp4['end_month'] == 12 assert exp4['end_year'] == 2006 assert exp4['city'] == u'Portland' assert exp4['state'] == u'OR' exp5 = next( (org for org in experiences if org["organization"] == u'Teksystems'), None) assert exp5 assert exp5['position'] == u'Computer Support Specialist' assert exp5['start_month'] == 7 assert exp5['start_year'] == 2000 assert exp5['end_month'] == 7 assert exp5['end_year'] == 2001 assert exp5['city'] == u'Portland' assert exp5['state'] == u'OR' exp6 = next( (org for org in experiences if org["organization"] == u'Manpower'), None) assert exp6 assert exp6['position'] == u'Computer Support Specialist' assert exp6['start_month'] == 9 assert exp6['start_year'] == 1999 assert exp6['end_month'] == 7 assert exp6['end_year'] == 2000 assert exp6['city'] == u'Portland' assert exp6['state'] == u'OR' exp7 = next((org for org in experiences if org["organization"] == u'Portland Youth Builders'), None) assert exp7 assert exp7['position'] == u'Computer Instructor/Support Specialist' assert exp7['start_month'] == 4 assert exp7['start_year'] == 1999 assert exp7['end_month'] == 9 assert exp7['end_year'] == 1999 assert exp7['city'] == u'Portland' assert exp7['state'] == u'OR' exp8 = next((org for org in experiences if org["organization"] == u'University Of Oregon'), None) assert exp8 # assert exp8['position'] == u'Instructor- Linguistics' assert exp8['start_month'] == 12 assert exp8['start_year'] == 1998 assert exp8['end_month'] == 3 assert exp8['end_year'] == 1999 exp9 = next((org for org in experiences if org["organization"] == u'Portland State University'), None) assert exp9 # assert exp9['position'] == u'Instructor - Language and Culture' assert exp9['start_month'] == 7 assert exp9['start_year'] == 1998 assert exp9['end_month'] == 8 assert exp9['end_year'] == 1998 # exp10 = next((org for org in experiences if org["organization"] == u'Oregon Graduate Institute'), None) exp11 = next((org for org in experiences if org["organization"] == u'University Of North Carolina'), None) assert exp11 # assert exp11['position'] == u'Research Associcate - Linguistics' assert exp11['start_month'] == 1 assert exp11['start_year'] == 1995 assert exp11['end_month'] == 6 assert exp11['end_year'] == 1997 # Educations. educations_xml_list = bs4(GET_626a, 'lxml').findAll('education') educations = parse_candidate_educations(educations_xml_list) edu1 = next((edu for edu in educations if edu["school_name"] == u'University of Phoenix'), None) assert edu1 assert { 'start_month': None, 'end_month': 1, 'start_year': None, 'bullets': [{ 'major': u'Information Systems/Management', 'comments': None }], 'title': u'Masters', 'gpa_num': None, 'end_year': 2007, 'type': "Master's" } in edu1['degrees'] edu2 = next( (edu for edu in educations if edu["school_name"] == u'Heald College'), None) assert edu2 assert { 'start_month': None, 'end_month': 1, 'start_year': None, 'bullets': [{ 'major': u'Computer Technology', 'comments': None }], 'title': u'Diploma', 'gpa_num': None, 'end_year': 1999, 'type': 'Diploma' } in edu2['degrees'] # edu3 = next((edu for edu in educations if edu["school_name"] == u'Cornell University'), None) edu4 = next( (edu for edu in educations if edu["school_name"] == u'Cornell University'), None) assert edu4 assert { 'start_month': None, 'end_month': 1, 'start_year': None, 'bullets': [{ 'major': u'Linguistics', 'comments': None }], 'title': u'Master of Arts Degree', 'gpa_num': None, 'end_year': 1988, 'type': "Master's" } in edu4['degrees'] edu5 = next((edu for edu in educations if edu["school_name"] == u'University of Ibadan'), None) assert edu5 assert { 'start_month': None, 'end_month': 1, 'start_year': None, 'bullets': [{ 'major': u'Linguistics', 'comments': None }], 'title': u'Bachelor of Arts Degree', 'gpa_num': None, 'end_year': 1979, 'type': "Bachelor's" } in edu5['degrees']
def test_g642_accuracy(): # Contact Parsing. contact_xml_list = bs4(GET_642, 'lxml').findAll('contact') first, last = parse_candidate_name(contact_xml_list) phones = parse_candidate_phones(contact_xml_list) addresses = parse_candidate_addresses(contact_xml_list) assert first == u'Bobby' assert last == u'Breland' assert {'value': u'+15137595877', 'label': 'Home'} in phones assert {'value': u'+15134773784', 'label': 'Mobile'} in phones assert GET_642_ADDRESS in addresses # Experience parsing. experience_xml_list = bs4(GET_642, 'lxml').findAll('experience') experiences = parse_candidate_experiences(experience_xml_list) for exp in experiences: assert len(exp['bullets']) == 1 exp1 = next((org for org in experiences if org["organization"] == u'Pivotalthought Llc'), None) exp2 = next( (org for org in experiences if org["organization"] == u'Gxs, Inc'), None) exp3 = next( (org for org in experiences if org["organization"] == u'Sun Microsystems'), None) exp4 = next((org for org in experiences if org["organization"] == u'First Consulting Group'), None) exp5 = next((org for org in experiences if org["organization"] == u'Computer Sciences Corporation Consulting Group'), None) exp6 = next( (org for org in experiences if org["organization"] == u'Seebeyond Technology Corporation'), None) exp7 = next( (org for org in experiences if org["organization"] == u'Collaborex, Inc'), None) # exp8 = next((org for org in experiences if org["organization"] == u'Origin Technology in Business'), None) exp8 = next((org for org in experiences if org["organization"] == u'Origin Technology'), None) exp9 = next( (org for org in experiences if org["organization"] == u'R.w. Johnson Pri'), None) exp10 = next( (org for org in experiences if org["organization"] == u'Taratec Development Corporation'), None) exp11 = next( (org for org in experiences if org["organization"] == u'H. B. Zachry'), None) exp12 = next( (org for org in experiences if org["organization"] == u'Triple I'), None) assert None not in [ exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9, exp10, exp11, exp12 ] assert exp1['start_month'] == 1 assert exp1['start_year'] == 2010 # assert exp1['city'] == u'Liberty Township' assert exp1['city'] == u'Liberty' assert exp2['start_month'] == 8 assert exp2['start_year'] == 2008 assert exp2['end_month'] == 1 assert exp2['end_year'] == 2010 assert exp2['city'] == u'Gaithersburg' assert exp3['start_month'] == 3 assert exp3['start_year'] == 2006 assert exp3['end_month'] == 7 assert exp3['end_year'] == 2008 assert exp3['city'] == u'Santa Clara' assert exp4['start_month'] == 11 assert exp4['start_year'] == 2005 assert exp4['end_month'] == 3 assert exp4['end_year'] == 2006 assert exp4['city'] == u'Long Beach' assert exp5['start_month'] == 2 assert exp5['start_year'] == 2003 assert exp5['end_month'] == 11 assert exp5['end_year'] == 2005 assert exp5['city'] == u'Waltham' assert exp6['start_month'] == 3 assert exp6['start_year'] == 2001 assert exp6['end_month'] == 2 assert exp6['end_year'] == 2003 assert exp6['city'] == u'Monrovia' assert exp7['start_month'] == 4 assert exp7['start_year'] == 2000 assert exp7['end_month'] == 3 assert exp7['end_year'] == 2001 assert exp7['city'] == u'Fairfax' assert exp8['start_month'] == 2 assert exp8['start_year'] == 1998 assert exp8['end_month'] == 4 assert exp8['end_year'] == 2000 assert exp8['city'] == u'Cincinnati' assert exp9['start_month'] == 5 assert exp9['start_year'] == 1995 assert exp9['end_month'] == 2 assert exp9['end_year'] == 1998 assert exp9['city'] == u'Raritan' assert exp10['start_month'] == 12 assert exp10['start_year'] == 1991 assert exp10['end_month'] == 5 assert exp10['end_year'] == 1995 assert exp10['city'] == u'Bridgewater' assert exp11['start_month'] == 4 assert exp11['start_year'] == 1991 assert exp11['end_month'] == 12 assert exp11['end_year'] == 1991 assert exp11['city'] == u'San Antonio' assert exp12['start_month'] == 6 assert exp12['start_year'] == 1989 assert exp12['end_month'] == 4 assert exp12['end_year'] == 1991 assert exp12['city'] == u'Deepwater' # Educations. educations_xml_list = bs4(GET_642, 'lxml').findAll('education') educations = parse_candidate_educations(educations_xml_list) edu1 = next((edu for edu in educations if edu["school_name"] == u'Northeast Louisiana University'), None) assert edu1 assert { 'start_month': None, 'end_month': 12, 'start_year': None, 'bullets': [{ 'major': u'Computer Science', 'comments': None }], 'title': u'B.S', 'gpa_num': None, 'end_year': 1988, 'type': 'Bachelor of Science' } in edu1['degrees']
def test_docx_accuracy(): # Contact Parsing. contact_xml_list = bs4(DOCX, 'lxml').findAll('contact') first, last = parse_candidate_name(contact_xml_list) phones = parse_candidate_phones(contact_xml_list) addresses = parse_candidate_addresses(contact_xml_list) assert DOCX_ADDRESS in addresses assert first == 'Veena' assert last == 'Nithoo' assert {'value': u'+12154120817', 'label': 'Other'} in phones # Experience parsing. experience_xml_list = bs4(DOCX, 'lxml').findAll('experience') experiences = parse_candidate_experiences(experience_xml_list) for exp in experiences: assert len(exp['bullets']) == 1 exp1 = next( (org for org in experiences if org["organization"] == u'Merck & Co, Inc'), None) exp2 = next( (org for org in experiences if org["organization"] == u'Infomc Inc'), None) exp3 = next( (org for org in experiences if org["organization"] == u'Datakinetics Inc'), None) exp4 = next( (org for org in experiences if org["organization"] == u'Harel Mallac, Mcs Development Ltd'), None) exp5 = next((org for org in experiences if org["organization"] == u'Gt Management Ltd'), None) assert None not in [exp1, exp2, exp3, exp4, exp5] assert exp1['start_month'] == 10 assert exp1['start_year'] == 2000 assert exp1['position'] == u'Application Services Analyst' assert exp2['start_month'] == 9 assert exp2['start_year'] == 1999 assert exp2['end_month'] == 6 assert exp2['end_year'] == 2000 assert exp2['position'] == u'Analyst Programmer' assert exp3['start_month'] == 1 assert exp3['start_year'] == 1998 assert exp3['end_month'] == 8 assert exp3['end_year'] == 1999 assert exp3['position'] == u'Analyst Programmer' assert exp4['start_month'] == 3 assert exp4['start_year'] == 1996 assert exp4['end_month'] == 5 assert exp4['end_year'] == 1997 assert exp4['position'] == u'Analyst Programmer' assert exp5['start_month'] == 9 assert exp5['start_year'] == 1993 assert exp5['end_month'] == 9 assert exp5['end_year'] == 1994 assert exp5['position'] == u'Analyst Programmer' # Education Parsing. educations_xml_list = bs4(DOCX, 'lxml').findAll('education') educations = parse_candidate_educations(educations_xml_list) edu1 = next((edu for edu in educations if edu["school_name"] == u'South Bank University'), None) assert edu1 assert edu1['city'] == u'London' assert { 'start_month': None, 'end_month': 7, 'start_year': None, 'bullets': [{ 'major': u'Computing Studies', 'comments': None }], 'title': u'B.Sc', 'gpa_num': None, 'end_year': 1995, 'type': 'Bachelor of Science' } in edu1['degrees']
def test_dupe_experience_bullets(): experience_xml_list = bs4(DUPED_EXPERIENCE, 'lxml').findAll('experience') experiences = parse_candidate_experiences(experience_xml_list) for exp in experiences: assert len(exp['bullets']) == 1
def test_pdf14_accuracy(): # Contact Parsing. contact_xml_list = bs4(PDF_14, 'lxml').findAll('contact') first, last = parse_candidate_name(contact_xml_list) phones = parse_candidate_phones(contact_xml_list) addresses = parse_candidate_addresses(contact_xml_list) experience_xml_list = bs4(PDF_14, 'lxml').findAll('experience') experiences = parse_candidate_experiences(experience_xml_list) for exp in experiences: assert len(exp['bullets']) == 1 educations_xml_list = bs4(PDF_14, 'lxml').findAll('education') educations = parse_candidate_educations(educations_xml_list) assert first == 'Jose' assert last == 'Chavez' # assert {'value': u'604.609.0921'} in phones # exp1 = next((org for org in experiences if ( # org["organization"] == u'Organization Committee Commonwelath Games 2010' and # org['position'] == u'Games Management Systems Director')), None) # exp2 = next((org for org in experiences if ( # org["organization"] == u'Atos Origin Canada' and # org['position'] == u'Core Games Systems Application Manager')), None) # exp3 = next((org for org in experiences if ( # org["organization"] == u'Design Maintenance Systems Inc.' and # org['position'] == u'Software Testing Engineer/ Jr. Developer')), None) # exp4 = next((org for org in experiences if ( # org["organization"] == u'Orbital Technologies Inc.' and # org['position'] == u'Software Testing Engineer')), None) edu1 = next( (edu for edu in educations if edu["school_name"] == u'ITESO University'), None) assert edu1 assert { 'start_month': None, 'end_month': 1, 'start_year': None, 'bullets': [{ 'major': u'Computer Systems Engineer', 'comments': None }], 'title': u'B. Sc', 'gpa_num': None, 'end_year': 1992, 'type': 'Bachelor of Science' } in edu1['degrees'] # edu2 = next((edu for edu in educations if edu["school_name"] == u'ITESM University'), None) edu3 = next( (edu for edu in educations if edu["school_name"] == u'British Columbia Institute of Technology'), None) assert { 'start_month': None, 'end_month': 1, 'start_year': None, 'bullets': [{ 'major': u'software Engineering', 'comments': None }], 'title': u'Software Engineering Diploma', 'gpa_num': None, 'end_year': 1993, 'type': 'Diploma' } in edu3['degrees']