def hello_firestore(event, context):
    path_parts = context.resource.split('/documents/')[1].split('/')
    collection_path = path_parts[0]
    document_path = '/'.join(path_parts[1:])

    affected_doc = client.collection(collection_path).document(document_path)
    affected_doc.update({
        u'checking':
        "checing",
        u'gotUrl':
        event["value"]['fields']["url"]['stringValue']
    })

    file_url = event["value"]['fields']["url"]['stringValue']
    fil = urllib.request.urlopen(file_url)
    response = requests.get(file_url)
    fine_name = os.path.join(tempfile.gettempdir(), "metadata.pdf")
    with open(fine_name, 'wb') as f:
        f.write(response.content)
    text = textract.process(fine_name, method='pdfminer', encoding='ascii')
    data = resumeparse.read_file(fine_name)
    affected_doc.update({u'resumeData': data})

    affected_doc.update(
        {u'original': event["value"]['fields']["url"]['stringValue']})
Esempio n. 2
0
def extract_fields(txt):
    # import pdb;pdb.set_trace()
    resume_fields = resumeparse.read_file(
        '/home/ebabu/Downloads/Sanidhya_CV-converted.docx')
    return resume_fields


# def main():
# 	text = extract_text_from_docx('/home/dell/Downloads/Sanidhya_CV-converted.docx')
# 	experience = extract_experience(text)
# main()

# import pdb;pdb.set_trace()
# for chunk in nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sent))):
# 	# chunk = list(chunk)
# 	if hasattr(chunk, 'label') and (chunk.label() == 'ORGANIZATION' or 'PERSON'):
# 	# 		projects_list.append(sent)
# 	# 		chunk_leave = chunk.leaves()
# 	# 		# import pdb;pdb.set_trace()
# 	# 		for word,tag in chunk_leave:
# 	# 			# projects_title = []
# 	# 			# projects_title.append(word)
# 	# 			projects_title = word
# 	# 		print("projects_title",projects_title)
# 	# project_sent = sent
# 	# projects_dict[projects_title] = project_sent

# 		# while count<=3:
# 			# print(chunk.label())
# 			# pass
# 			# count += 1
# 	# print("sent",sent," ")
# import pdb;pdb.set_trace()
def index():

    if request.method == 'GET':
        return "Hello"

    content = request.json()
    data = resumeparse.read_file('/content/Anubhab_Cover letter.pdf')

    result =data

    return jsonify( {'solution_text': result } )
#from pyresparser import ResumeParser

from resume_parser import resumeparse

if __name__ == '__main__':
    #data = ResumeParser(r'CV_AliDoggaz.pdf').get_extracted_data()
    data = resumeparse.read_file('CV_AliDoggaz.pdf')
    print(data)
Esempio n. 5
0
from resume_parser import resumeparse
import os
import warnings

warnings.simplefilter('ignore')
path = 'C:/Users/91884/Desktop/OCR_Resume/OCR_Resume/'
resumes = os.listdir(path)
c = 0
try:
    for i in range(8, 10):
        print(resumes[i])
        data = resumeparse.read_file(f'{path}{resumes[i]}')
        print(data['skills'])
        c += 1

except:
    pass
print(c)
print(len(resumes) + 1 - 9)
#8
Esempio n. 6
0
# All Resume Parser Dependencies must be installed
# Java must be running

from resume_parser import resumeparse

data = resumeparse.read_file(r"C:\Users\some_\Downloads\Profile.pdf")

# PS: First Run takes a while
Esempio n. 7
0
def home():
    filename = request.args.get("filename")
    data = resumeparse.read_file('pdfs/' + filename)

    name = data['name'].lower().title()
    email = data['email']
    phone = data['phone']
    try:
        school = data['university'][0].title()
    except:
        school = ""
    skills = []
    count = 0
    for x in data['skills']:
        skills.append(x.strip())
        if count == 5:
            break
        else:
            count += 1
    skills = list(filter(None, skills))
    skillstr = ', '.join(skills)

    matchings = []
    listings = [[]]
    best_jobs = []
    concentration = []
    organization, job_type, job_title, job_description, location = [], [], [], [], []

    # Converting pdf to txt file for library

    def pdf2txt(PDFfile, TXTfile):
        in_file = open(PDFfile, 'rb')
        res_mgr = PDFResourceManager()
        data = io.StringIO()
        TxtConverter = TextConverter(res_mgr, data, laparams=LAParams())
        interpreter = PDFPageInterpreter(res_mgr, TxtConverter)
        for page in PDFPage.get_pages(in_file):
            interpreter.process_page(page)

        txt = data.getvalue()
        with open(TXTfile, 'w') as f:
            f.write(txt)

    # Function that searches for best match in jobs dataset

    def find_matches(user_resume):
        with open(user_resume, 'r') as resume:
            with open('small_jobs_dataset.csv', 'r') as job_listings_csv:
                # Splitting dataset rows by delimiter
                csv_reader = csv.reader(job_listings_csv)
                count = 0
                # Reading user's resume into variable
                resume_var = resume.read()
                for row in csv_reader:
                    str_row = str(row)
                    job_as_list = pp.commaSeparatedList.parseString(
                        str_row).asList()
                    # Storing job description
                    job_desc = job_as_list[4]
                    if count > 0:
                        # Feature extraction on job desc and resume
                        text = [resume_var, job_desc]
                        count_vec = CountVectorizer()
                        count_matrix = count_vec.fit_transform(text)
                        match = cosine_similarity(count_matrix)[0][1] * 100
                        matchings.append(tuple((match, count)))
                        listings.append(job_as_list)

                    count += 1
        # Sorting by jobs with highest match
        matchings.sort(reverse=True)
        # Storing jobs with highest match and user's concentration
        for i in range(5):
            match = matchings[i]
            job = listings[int(match[1])]
            split_string = job
            organization.append(split_string[0].strip('\"'))
            job_type.append(split_string[3].strip('\"'))
            job_title.append(split_string[7].strip('\"'))
            job_description.append(split_string[4].strip('\"'))
            location.append(split_string[6].strip('\"'))
            if i == 0:
                job_industry = split_string[3]
                job_industry = job_industry.strip('\"')
                concentration.append(job_industry)

    # Resume needs to be converted from pdf to txt

    PDFfile = 'pdfs/' + filename
    TXTfile = 'parsed_resume.txt'

    # Converting pdf resume to txt

    pdf2txt(PDFfile, TXTfile)

    # Calling find_matches function

    find_matches('parsed_resume.txt')

    # Printing results

    company_names = []
    company_desc = []
    company_match = []
    for i in range(3):
        try:
            company_names.append(re.sub(r'[^A-Za-z0-9 ]+', '',
                                        organization[i]))
            company_desc.append(job_title[i].strip())
            matchc = round(float(str(matchings[i][0]).strip()))
            company_match.append(matchc)
        except:
            pass

    try:
        avg = 0
        for x in company_match:
            if x < 80:
                avg += x + 20
            else:
                avg += x
        avg = round(avg / 3)
    except:
        print('didnt work')

    import os
    os.remove("parsed_resume.txt")

    return render_template("results.html",
                           name=name,
                           email=email,
                           phone=phone,
                           school=school,
                           skills=skillstr,
                           company_desc=company_desc,
                           company_match=company_match,
                           company_names=company_names,
                           avg=avg)
from resume_parser import resumeparse

# nltk.download('popular')
data = resumeparse.read_file(
    '/home/ebabu/Downloads/SrishtiJain 2021-converted.docx')
print(data)

# from pyresparser import ResumeParser
# data = ResumeParser('/path/to/resume/file').get_extracted_data()

# resume_fields = ResumeParser('/home/ebabu/Downloads/Sanidhya_CV-converted.docx').get_extracted_data()
# print(resume_fields)
Esempio n. 9
0
# This file conatins python code to extract skills from uploaded resume using natural language processing
import json
import re
import os
import sys
from resume_parser import resumeparse
from itertools import filterfalse
from nltk.corpus import stopwords

stopwords = set(stopwords.words('english'))
file_name = "./backend-engine/" + sys.argv[1]
data = resumeparse.read_file(file_name)
skills = data["skills"]

# remove all non-alphabet or non-numbers
# remove beginning and trailling white spaces
for i in range(0, len(skills)):
    skills[i] = re.sub('[^0-9a-zA-Z\s]+', '', skills[i])
    skills[i] = skills[i].strip()

# remove strings with length greater than 2
skills[:] = filterfalse(lambda elm: len(elm.split()) > 2, skills)

# remove any stopwords
for i in range(0, len(skills)):
    skills[i] = ' '.join(
        filter(lambda w: not w in stopwords, skills[i].split()))

# remove empty string
skills[:] = filterfalse(lambda elm: len(elm) == 0, skills)
Esempio n. 10
0
def find_cv():
    """
    :param: N/A
    :return: The name of the file that is the most likely to be the candidate's resume/cv, as well as the candidate email
    adress, his phone number, and his name.
    """

    #Special case where candidate's folder is empty or contains only 1 file

    if len(os.listdir('PDF_Converted_Files')) == 0:
        return
    if len(os.listdir('PDF_Converted_Files')) == 1:
        return os.listdir('PDF_Converted_Files')[0]

    # If a file contains the words CV/Resume/etc..., returns directly that file name
    for name in os.listdir('PDF_Converted_Files'):
        if name.lower().startswith("cv"):
            data = resumeparse.read_file('PDF_Converted_Files' + os.sep + name)
            return name, data['email'], data['phone'], data['name']

        for keyword in [
                'cv.', 'resume', 'résumé', 'curriculum vitae',
                'curriculumvitae'
        ]:
            if keyword in name.lower():
                data = resumeparse.read_file('PDF_Converted_Files' + os.sep +
                                             name)
                return name, data['email'], data['phone'], data['name']

    # Attribute a score to each file. The score will allow us to estimate the probability of
    # that file being the candidate's resume.

    # "maxi" will store the highest score reached yet.
    maxi = 0

    # loop over all files and attribute a "score" to each one.
    # If the file's score is >= to maxi, maxi = score. In this case,
    # we will also store the email, phone number, and fullname present in the file.
    for name in os.listdir('PDF_Converted_Files'):
        # Parse the file, looking for relevant info (email, skills, education, etc...)
        score = 0
        data = resumeparse.read_file('PDF_Converted_Files' + os.sep + name)

        # Increase score if we find relevant info in the file
        if data['skills']:
            score += 5  # If the file contains the candidate's skills, there are very high chances that this
            # file is the candidate's resume. So we increase its score by 5.
        if data['email']:
            score += 1
        if data['phone']:
            score += 1
        if data['degree']:
            score += 3
        if data['university']:
            score += 2
        if data['total_exp']:
            score += 2
        if score >= maxi:
            cv_name, email, phone, FullName = name, data['email'], data[
                'phone'], data['name']
            maxi = score

    # Return the file with the highest score (highest chances of being the candidate's resume)
    return cv_name, email, phone, FullName