Beispiel #1
0
	def test_resume_urls_and_their_categories(self):
		ideal_resume_urls = get_urls(self.ideal_resume_data)
		resume_without_name_urls = get_urls(self.resume_without_name)

		self.assertEqual(len(ideal_resume_urls), 1)
		self.assertEqual(ideal_resume_urls[0], 'api.ai')

		self.assertEqual(len(resume_without_name_urls), 1)
		self.assertEqual(resume_without_name_urls[0], 'api.ai')

		category_urls = url_categories(ideal_resume_urls)
		category_of_without_name_urls = url_categories(resume_without_name_urls)

		self.assertEqual(category_of_without_name_urls['others'][0], 'http://api.ai')
		self.assertEqual(category_urls['others'][0], 'http://api.ai')
    def test_website_score(self):
        urls = get_urls(self.test_website_score)

        category_urls = url_categories(urls)

        # self.assertEqual('http://imgur.com/', category_urls['others'][0])
        url_response = get_url_response(category_urls)
        resume = Resume.objects.create(parse_status=0)

        for item in url_response:
            resume_urls = Url.objects.filter(url=item['name'])
            if resume_urls.exists():
                resume_url = resume_urls[0]
                resume_url.category = item['type']
                resume_url.save()
            else:
                resume_url = Url.objects.create(url=item['name'],
                                                category=item['type'])

            resume.urls.add(resume_url)

        first_name, last_name, phone_number, email = get_basics(
            settings.TESTDATA_DIRS +
            'score_calculation/test_website_score.txt')

        self.assertEqual(email, '*****@*****.**')

        apply_website_score = mock.Mock(return_value=(None, None, None))
        (website_activity_score, website_reputation_score,
         website_contribution_score) = apply_website_score(
             category_urls, resume, email)

        self.assertEqual(website_activity_score, None)
        self.assertEqual(website_contribution_score, None)
        self.assertEqual(website_reputation_score, None)
Beispiel #3
0
def parse_resume_internal(path, text, resume_id, file_name, hash_value,
                          post_data):
    urls = get_urls(text)
    categories_url = url_categories(urls)
    url_response = get_url_response(categories_url)
    resume_details = Resume.objects.get(id=resume_id)
    response = extract_resume(path)
    basics = response.get('basics')
    if basics:
        first_name = basics.get('first_name')
        last_name = basics.get('last_name')
        phone_number = basics.get('phone')
        email = basics.get('email')
        if email:
            email = email[0]

    for item in url_response:
        resume_url = Url.objects.create(category=item['type'],
                                        url=item['name'])
        resume_details.urls.add(resume_url)

    # Resume
    resume_details.first_name = post_data.get('first_name')
    resume_details.last_name = post_data.get('last_name')
    resume_details.phone_number = post_data.get('contact_no')
    resume_details.email = post_data.get('email')

    if not resume_details.first_name:
        resume_details.first_name = first_name[:45]
    if not resume_details.last_name:
        resume_details.last_name = last_name[:45]
    if not resume_details.phone_number:
        resume_details.phone_number = phone_number
    if not resume_details.email:
        resume_details.email = email

    resume_details.content = text
    resume_details.file_name = file_name
    resume_details.resume_location = path
    resume_details.content_hash = hash_value
    resume_details.parse_status = Resume.STATUS.processed
    resume_details.save()
    return resume_details
    def test_bitbucket_score_calculation(self):
        urls = get_urls(self.test_bitbucket_score)
        category_urls = url_categories(urls)
        contribution_urls = category_urls['contributions']

        bitbucket_url = get_bit_bucket_url(contribution_urls)

        # this issue can be sorted by
        get_bitbucket_username = mock.Mock(return_value='Anubhav_722')
        bitbucket_username = get_bitbucket_username(bitbucket_url)
        # bitbucket_username = bitbucket_username.split('\\')
        # bitbucket_username = bitbucket_username[0]

        self.assertEqual(bitbucket_username, 'Anubhav_722')

        resume = Resume.objects.create(parse_status=0)
        url_response = get_url_response(category_urls)

        for item in url_response:
            resume_urls = Url.objects.filter(url=item['name'])
            if resume_urls.exists():
                resume_url = resume_urls[0]
                resume_url.category = item['type']
                resume_url.save()
            else:
                resume_url = Url.objects.create(url=item['name'],
                                                category=item['type'])

            resume.urls.add(resume_url)

        apply_bitbucket_score = mock.Mock(return_value=(.2, .05, 0.0))
        activity_score, reputation_score, contribution_score = apply_bitbucket_score(
            bitbucket_username, resume)

        # user_type = 1 so activity_score = 0.2
        # no. of repos = 2 so contribution_score = .05
        # no. of followers = 0 so reputation_score = 0.0

        self.assertEqual(activity_score, .2)
        self.assertEqual(contribution_score, .05)
        self.assertEqual(reputation_score, 0.0)
    def test_blog_score(self):
        urls = get_urls(self.test_blog_score)

        category_urls = url_categories(urls)

        # fix for fetching blog_urls
        # blog_url = categories_url['blog'][0].split('\\')[0]
        category_urls['blog'][0] = category_urls['blog'][0].split('\\')[0]

        resume = Resume.objects.create(parse_status=0)

        url_response = get_url_response(category_urls)

        for item in url_response:
            resume_urls = Url.objects.filter(url=item['name'])
            if resume_urls.exists():
                resume_url = resume_urls[0]
                resume_url.category = item['type']
                resume_url.save()
            else:
                resume_url = Url.objects.create(url=item['name'],
                                                category=item['type'])

            resume.urls.add(resume_url)

        self.assertEqual(resume.urls.all()[0].url,
                         'https://www.tumblr.com/blog/i-psychoassassin')

        apply_blog_score(category_urls, resume)

        # avg_activity_score = 0.4
        # avg_contribution_score = 0.0
        # avg_reputation_score = 0.0
        # avg_total_score = 0.0

        calculate_blog_scores = mock.Mock(return_value=(.4, 0, 0))
        avg_blog_activity_score, avg_blog_reputation_score, avg_blog_contribution_score = calculate_blog_scores(
            resume)
        self.assertEqual(avg_blog_activity_score, .4)
        self.assertEqual(avg_blog_reputation_score, 0)
        self.assertEqual(avg_blog_contribution_score, 0)
    def test_itunes_store_score(self):
        urls = get_urls(self.test_itunes_score)
        category_urls = url_categories(urls)

        url_response = get_url_response(category_urls)

        self.assertIn(
            'https://itunes.apple.com/in/app/whatsapp-messenger/id310633997?mt=8',
            category_urls['apps'])

        first_name, last_name, email, phone_number = get_basics(
            settings.TESTDATA_DIRS + 'score_calculation/test_itunes_score.txt')

        self.assertEqual(first_name, 'anubhav')

        resume = Resume.objects.create(parse_status=0)

        for item in url_response:
            resume_urls = Url.objects.filter(url=item['name'])
            if resume_urls.exists():
                resume_url = resume_urls[0]
                resume_url.category = item['type']
                resume_url.save()
            else:
                resume_url = Url.objects.create(url=item['name'],
                                                category=item['type'])

            resume.urls.add(resume_url)

        calculate_average_mobile_contrib_score = mock.Mock(return_value=(.3,
                                                                         .7,
                                                                         .05))
        apply_itunes_score(category_urls, first_name, resume)

        avg_mobile_apps_activity_score, avg_mobile_apps_reputation_score, avg_mobile_apps_contribution_score = calculate_average_mobile_contrib_score(
            resume)

        self.assertEqual(avg_mobile_apps_contribution_score, .05)
        self.assertEqual(avg_mobile_apps_reputation_score, .7)
        self.assertEqual(avg_mobile_apps_activity_score, .3)
    def test_github_score_calculation(self):
        get_github_username = mock.Mock(return_value='Anubhav722')
        github_username = get_github_username(self.test_github_score)
        resume = Resume.objects.create(parse_status=0)
        urls = get_urls(self.test_github_score)
        category_urls = url_categories(urls)
        url_response = get_url_response(category_urls)

        for item in url_response:
            resume_urls = Url.objects.filter(url=item['name'])
            if resume_urls.exists():
                resume_url = resume_urls[0]
                resume_url.category = item['type']
                resume_url.save()
            else:
                resume_url = Url.objects.create(url=item['name'],
                                                category=item['type'])

            resume.urls.add(resume_url)

        # this issue can be sorted by
        # putting this logic in get_github_username() in resume/utils/parser_helper.py while returning match
        # github_username = github_username.split('\\')
        # github_username = github_username[0]

        self.assertEqual(github_username, 'Anubhav722')

        # user_type = 2 so activity_score = .2
        # no. of repos = 136 so contribution_score = .6
        # no. of followers =6 and medium active user(user_type=2) so reputation_score = .01

        apply_github_score = mock.Mock(return_value=(.1, .01, .6))
        activity_score, reputation_score, contribution_score = apply_github_score(
            github_username, resume)

        self.assertEqual(activity_score, .1)
        self.assertEqual(reputation_score, .01)
        self.assertEqual(contribution_score, .6)
    def test_play_store_score(self):
        urls = get_urls(self.test_play_store_score)
        category_urls = url_categories(urls)
        category_urls['apps'][0] = category_urls['apps'][0].split('\\')[0]

        url_response = get_url_response(category_urls)

        resume = Resume.objects.create(parse_status=0)

        for item in url_response:
            resume_urls = Url.objects.filter(url=item['name'])
            if resume_urls.exists():
                resume_url = resume_urls[0]
                resume_url.category = item['type']
                resume_url.save()
            else:
                resume_url = Url.objects.create(url=item['name'],
                                                category=item['type'])

            resume.urls.add(resume_url)

        apply_play_store_app_score(category_urls, resume)

        # app rating is 4.4 so AVG_MOBILE_APP_REPUTATION_SCORE = .44
        # app downloads are between 5K - 10K so, AVG_MOBILE_APP_CONTRIBUTION_SCORE = .25
        # app last updated date is 2nd april 2017, so AVG_MOBILE_APP_ACTIVITY_SCORE = .3

        calculate_average_mobile_contrib_score = mock.Mock(return_value=(.25,
                                                                         .44,
                                                                         .2))
        avg_mobile_apps_activity_score, avg_mobile_apps_reputation_score, avg_mobile_apps_contribution_score = calculate_average_mobile_contrib_score(
            resume)

        self.assertEqual(avg_mobile_apps_reputation_score, 0.44)
        self.assertEqual(avg_mobile_apps_activity_score, .25)
        self.assertEqual(avg_mobile_apps_contribution_score, .2)
Beispiel #9
0
 def test_get_urls(self):
     self.assertEqual(get_urls(self.text), urls)
Beispiel #10
0
def benchmark(quick_mode=False):
    global resume_list
    user = {}
    if quick_mode:
        resume_list = resume_list[:5]
    for resume in resume_list:
        response = extract_resume(resume)
        # github_username = get_github_username(text)
        # stackoverflow_userid = get_stackoverflow_userid(text)
        # stackoverflow_username = get_stackoverflow_username(text)
        # stack_user_details = {}
        # git_user_details = {}
        # repo_details = {}
        # if stackoverflow_userid is None:
        #     pass
        # else:
        #     stack_user_details = stackoverflow_user_details(stackoverflow_userid)
        # if github_username is None:
        #     pass
        # else:
        #     git_user_details = github_user_details(github_username)
        #     repo_details = git_user_details['repo_details']
        # github_url =
        text = get_text(resume)
        if text is None:
            pass
        else:
            urls = get_urls(text)
            categories = url_categorizer(urls, text)
        file_name = resume
        blog = ' '
        personal_website = ' '
        github_url = ' '
        stackoverflow_url = ' '
        linkedin_url = ' '
        bit_bucket = ' '
        gist_url = ' '
        other_urls = ' '

        if 'Blog' in list(categories['Websites'].keys()):
            blog = categories['Websites']['Blog']
        if 'Personal Website' in list(
                categories['Websites']['Personal Urls'].keys()):
            personal_website = categories['Websites']['Personal Urls'][
                'Personal Website']
        if 'Other Urls' in list(
                categories['Websites']['Personal Urls'].keys()):
            other_urls = categories['Websites']['Personal Urls']['Other Urls']
        if 'GitHub Url' in list(categories['Social Websites'].keys()):
            github_url = categories['Social Websites']['GitHub Url']
        if 'StackOverflow Url' in list(categories['Social Websites'].keys()):
            stackoverflow_url = categories['Social Websites'][
                'StackOverflow Url']
        if 'LinkedIn Url' in list(categories['Social Websites'].keys()):
            linkedin_url = categories['Social Websites']['LinkedIn Url']
        if 'BitBucket Url' in list(categories['Social Websites'].keys()):
            bit_bucket = categories['Social Websites']['BitBucket Url']
        if 'GitHub Gist Url' in list(categories['Social Websites'].keys()):
            gist_url = categories['Social Websites']['GitHub Gist Url']
        # social = {}
        # social = {'url_categories': categories}
        # # user['Basic'] = response
        # # user['Social'] = social
        # data = {'Basic': response, 'Social': social}
        # details = dict(chain(user.items(), data.items()))
        field_names = [
            'file_name', 'Personal Website', 'Blog', 'GitHub', 'LinkedIn',
            'StackOverflow', 'BitBucket', 'GitHub_Gist', 'Other urls'
        ]
        with open('output.csv', 'a') as csvfile:
            write = csv.DictWriter(csvfile, fieldnames=field_names)
            write.writeheader()
            write.writerow({
                'file_name': file_name,
                'Personal Website': personal_website,
                'Blog': blog,
                'GitHub': github_url,
                'LinkedIn': linkedin_url,
                'StackOverflow': stackoverflow_url,
                'BitBucket': bit_bucket,
                'GitHub_Gist': gist_url,
                'Other urls': other_urls
            })
        csvfile.close()
    # with open('json-out.json', 'w') as outfile:
    #     json.dump(details, outfile)

    return json.dumps(user)
Beispiel #11
0
def parse_resume(path,
                 text,
                 resume_id,
                 skills,
                 file_name,
                 hash_value,
                 callback_client=False):
    content_list = text.lower().split()

    # Get resume instance.
    resume_details = Resume.objects.get(id=resume_id)
    # Categorising urls
    categories_url = url_categories(get_urls(text))
    # Getting custom url response using get_url_response
    url_response = get_url_response(categories_url)

    # Get basic details.
    first_name, last_name, phone_number, email = get_basics(path)

    # Consider basic details only if resume_details for the same is non-nil.
    if resume_details.email:
        email = resume_details.email
    if resume_details.first_name:
        first_name = resume_details.first_name
    if resume_details.last_name:
        last_name = resume_details.last_name
    if resume_details.phone_number:
        phone_number = resume_details.phone_number

    # Url Instance
    for item in url_response:
        resume_urls = Url.objects.filter(url=item['name'])
        if resume_urls.exists():
            resume_url = resume_urls[0]
            resume_url.category = item['type']
            resume_url.save()
        else:
            resume_url = Url.objects.create(url=item['name'],
                                            category=item['type'])

        resume_details.urls.add(resume_url)

    # Skills Matching Score
    (skill_match_score, skills_matched,
     skills_not_matched) = get_skill_matching_score(skills, text)

    # Definition of scores
    github_contribution_score = 0
    github_activity_score = 0
    github_reputation_score = 0
    bit_bucket_contribution_score = 0
    bit_bucket_activity_score = 0
    bit_bucket_reputation_score = 0
    stackoverflow_contribution_score = 0
    stackoverflow_reputation_score = 0
    stackoverflow_activity_score = 0

    # StackOverflow Score
    stackoverflow_user_id = get_stackoverflow_userid(text)
    if stackoverflow_user_id:
        (stackoverflow_activity_score, stackoverflow_reputation_score,
         stackoverflow_contribution_score) = apply_stackoverflow_score(
             stackoverflow_user_id, resume_details)

    # GitHub Score
    github_username = get_github_username(text)
    if github_username:
        (github_activity_score, github_reputation_score,
         github_contribution_score) = apply_github_score(
             github_username, resume_details)

    # Blog score
    apply_blog_score(categories_url, resume_details)

    # BitBucket Score
    contribution_urls = categories_url['contributions']
    bit_bucket_url = get_bit_bucket_url(contribution_urls)
    if bit_bucket_url == 'No Url Found' or bit_bucket_url is None:
        pass
    else:
        bit_bucket_user_name = get_bitbucket_username(bit_bucket_url)
        (bit_bucket_activity_score, bit_bucket_reputation_score,
         bit_bucket_contribution_score) = apply_bitbucket_score(
             bit_bucket_user_name, resume_details)

    # MobileApp Database saving and score calculations
    # Play Store - Total Score
    apply_play_store_app_score(categories_url, resume_details)
    # ITunes - Total Score
    apply_itunes_score(categories_url, first_name, resume_details)

    # Website Score
    (website_activity_score, website_reputation_score,
     website_contribution_score) = apply_website_score(categories_url,
                                                       resume_details, email)

    save_resume_skills(resume_details, skills_matched, skills_not_matched)

    # Work Experience
    features = FeatureExtraction()
    work_experience = features.get_work_experience(text)

    # Blog
    (average_blog_activity_score, average_blog_reputation_score,
     average_blog_contribution_score) = calculate_blog_scores(resume_details)

    # Website
    (average_website_activity_score, average_website_reputation_score,
     average_website_contribution_score
     ) = calculate_website_scores(resume_details)

    # average mobile contribution score
    (average_mobile_app_activity_score, average_mobile_app_reputation_score,
     average_mobile_app_contribution_score
     ) = calculate_average_mobile_contrib_score(resume_details)

    # Activity Scores
    blog_activity_score = average_blog_activity_score
    website_activity_score = average_website_activity_score
    mobile_app_activity_score = average_mobile_app_activity_score

    # Contributions Scores
    blog_contribution_score = average_blog_contribution_score
    website_contribution_score = average_website_contribution_score
    mobile_app_contribution_score = average_mobile_app_contribution_score

    # Reputation Scores
    blog_reputation_score = average_blog_reputation_score
    website_reputation_score = average_website_reputation_score
    mobile_app_reputation_score = average_mobile_app_reputation_score

    # Total Contribution Score
    coding_total_contribution_score = (github_contribution_score +
                                       bit_bucket_contribution_score +
                                       stackoverflow_contribution_score +
                                       mobile_app_contribution_score)
    social_total_contribution_score = (blog_contribution_score +
                                       website_contribution_score)

    # Total Activity score
    coding_total_activity_score = (github_activity_score +
                                   stackoverflow_activity_score +
                                   bit_bucket_activity_score +
                                   mobile_app_activity_score)
    social_total_activity_score = blog_activity_score + website_activity_score

    # Total Reputation Score
    coding_total_reputation_score = (github_reputation_score +
                                     stackoverflow_reputation_score +
                                     bit_bucket_reputation_score +
                                     mobile_app_reputation_score)
    social_total_reputation_score = (blog_reputation_score +
                                     website_reputation_score)

    # Total Coding score 2.5 out of 5
    total_coding_score = (coding_total_contribution_score +
                          coding_total_reputation_score +
                          coding_total_activity_score)
    # Total Social Score 1.5 out 5
    total_social_score = (social_total_contribution_score +
                          social_total_activity_score +
                          social_total_reputation_score)
    # Total Skill Matching Score 1 out 5
    total_skill_score = skill_match_score

    # Saving to Scores Model
    # saving total contribution score
    coding_score_instance = Score.objects.create(type=Score.TYPES.coding,
                                                 score=total_coding_score)
    resume_details.scores.add(coding_score_instance)

    social_score_instance = Score.objects.create(type=Score.TYPES.social,
                                                 score=total_social_score)
    resume_details.scores.add(social_score_instance)

    skill_score_instance = Score.objects.create(
        type=Score.TYPES.skill_matching, score=total_skill_score)
    resume_details.scores.add(skill_score_instance)

    total_ranking = total_coding_score + total_social_score + total_skill_score

    # Extracting Location, Company and Institution Names
    extract_features = ExtractFeatures()
    locations = extract_features.get_location(text)
    companies = extract_features.get_company_names(text)
    institutions = extract_features.get_institution_names(text)
    for location in locations:
        location_instance, created = Location.objects.get_or_create(
            name=location)
        resume_details.locations.add(location_instance)
    for company in companies:
        company_instance, created = Company.objects.get_or_create(name=company)
        resume_details.companies.add(company_instance)
    for institution in institutions:
        institution_instance, created = Institution.objects.get_or_create(
            name=institution)
        resume_details.institutions.add(institution_instance)

    # Extract skills from provided text.
    # NOTE: As per now we're getting top 1000 tags from SO to extract skills.
    # With those skills we're getting intersection with list of content text.
    # Need to find better solution to do so [Bloom filter, et cetera]

    import re
    content_list = map(lambda x: re.sub('[^0-9a-zA-Z\.]+', '', x),
                       content_list)
    # print(content_list)
    # content_list = map(lambda x: x.replace(',', ''), content_list)
    matched_skills = list(skillset.intersection(content_list))
    for skill in matched_skills:
        skill_instance, created = Skill.objects.get_or_create(name=skill)
        rskills = ResumeSkills(resume=resume_details, skill=skill_instance)
        rskills.save()

    # Resume
    file_name = check_file_name_length(file_name)
    resume_details.first_name = first_name
    resume_details.last_name = last_name
    resume_details.phone_number = phone_number
    resume_details.parse_status = Resume.STATUS.processed
    resume_details.file_name = file_name
    resume_details.content_hash = hash_value
    resume_details.content = text
    resume_details.email = email
    resume_details.resume_location = path
    resume_details.experience = work_experience
    resume_details.total_score = total_ranking
    resume_details.save()

    if callback_client:
        resp = callback_internal_client(resume_details)
        if resp.status_code != requests.codes.ok:
            print(
                "ERROR: Unable to callback to internal client for resume: %s".
                format(str(resume_id)))

    return "Resume Processed"