Python match_timestamp_by_regの例、resume_parser.recognize.match_timestamp.match_timestamp_by_reg Pythonの例

コード例 #1

0

ファイルを表示

def extract_traininfo(text):
    train = resume_struct.get_training_struct()

    for line in text.split('\n'):
        m_train = re.search(train_reg, line)
        if m_train:
            timestamp = match_timestamp.match_timestamp_by_reg(train_reg, line)
            train["start_time"], train["end_time"], train[
                "so_far"] = StringUtils.transform_timestamp(timestamp)
            train["name"] = m_train.group("train").strip()
            if len(train["name"].split("\t")) == 2:
                train["authority"] = train["name"].split("\t")[0]
                train["name"] = train["name"].split("\t")[1]

        mauth = re.search(u"^培训机构：(.+)", line)
        if mauth:
            train["authority"] = mauth.group(1).strip()
        mcity = re.search(u"^培训地点：(.+)", line)
        if mcity:
            train["city"] = mcity.group(1).strip()
        mdesc = re.search(u"^培训描述：(.+)", line)
        if mdesc:
            train["description"] = mdesc.group(1).strip()

    return train

コード例 #2

0

ファイルを表示

def extract_projectinfo(text):
    project = resume_struct.get_project_struct()
    project["ori_text"] = text
    lines = text.split("\n")

    isDesc, isResp = False, False
    for preline, line in izip([""] + lines, lines):
        m_proj = re.search(project_reg, line)
        if m_proj:
            timestamp = match_timestamp.match_timestamp_by_reg(
                project_reg, line)
            project["name"] = m_proj.group("project").strip()
            project["start_time"], project["end_time"], project[
                "so_far"] = StringUtils.transform_timestamp(timestamp)

        m_desc = re.search(u"项目描述(:|：)", line)
        if m_desc:
            line = re.sub(u"项目描述(:|：)", "", line).strip()
            isDesc, isResp = True, False
        m_resp = re.search(u"责任描述(:|：)", line)
        if m_resp:
            line = re.sub(u"责任描述(:|：)", "", line).strip()
            isDesc, isResp = False, True
        pass
        if isDesc:
            project["describe"] += '\n' + line if project[
                "describe"] and line else line
        if isResp:
            project["responsibilities"] += '\n' + line if project[
                "responsibilities"] and line else line
    return project

コード例 #3

0

ファイルを表示

def extract_certinfo(text):
    cert = resume_struct.get_certificate_struct()

    for line in text.split('\n'):
        m_cert = re.search(certi_reg, line)
        if m_cert:
            timestamp = match_timestamp.match_timestamp_by_reg(certi_reg, line)
            cert["name"] = m_cert.group("name").strip()
            cert["start_time"], _, _ = StringUtils.transform_timestamp(
                timestamp)

    return cert

コード例 #4

0

ファイルを表示

def extract_eduinfo(expblock):
    edu = resume_struct.get_education_struct()
    edu["ori_text"] = expblock

    for line in expblock.split("\n"):
        m = re.search(edu_reg, line)
        if m:
            timestamp = match_timestamp.match_timestamp_by_reg(edu_reg, line)
            edu["school_name"] = m.group("school").strip()
            edu["start_time"], edu["end_time"], edu[
                "so_far"] = StringUtils.transform_timestamp(timestamp)
            edu["degree"] = match_education.match_degree(m.group('degree'), 99)
            edu["discipline_name"] = m.group('discipline').strip()
    return edu

コード例 #5

0

ファイルを表示

def extract_projectinfo(text):
    project = resume_struct.get_project_struct()
    project["ori_text"] = text

    time_found = False
    isResp, isDesc, isAchi = False, False, False
    for line in text.split('\n'):
        m_proj = re.search(project_reg, line)
        if m_proj:
            timestamp = match_timestamp.match_timestamp_by_reg(
                project_reg, line)
            project["start_time"], project["end_time"], project[
                "so_far"] = StringUtils.transform_timestamp(timestamp)
            time_found = True
            continue
        if time_found:
            project["name"], time_found = line, False
            continue

        m_posi = re.search(u"项目职务(:|：)\s*(?P<posi>.+)", line)
        if m_posi:
            project["position_name"] = m_posi.group("posi")
        m_corp = re.search(u"所在公司(:|：)(?P<corp>.+)", line)
        if m_corp:
            project["corporation_name"] = m_corp.group("corp")
        m_desc = re.search(u"项目简介(:|：)", line)
        if m_desc:
            line = re.sub(u"项目简介(:|：)", "", line).strip()
            isResp, isDesc, isAchi = False, True, False
        m_resp = re.search(u"项目职责(:|：)", line)
        if m_resp:
            line = re.sub(u"项目职责(:|：)", "", line).strip()
            isResp, isDesc, isAchi = True, False, False
        m_achi = re.search(u"项目业绩(:|：)", line)
        if m_achi:
            line = re.sub(u"项目业绩(:|：)", "", line).strip()
            isResp, isDesc, isAchi = False, False, True
        pass
        if isDesc:
            project["describe"] += '\n' + line if project[
                "describe"] and line else line
        if isResp:
            project["responsibilities"] += '\n' + line if project[
                "responsibilities"] and line else line
        if isAchi:
            project["achivement"] += "\n" + line if project[
                "achivement"] and line else line
    pass
    return project

コード例 #6

0

ファイルを表示

def extract_eduinfo(expblock):
    edu = resume_struct.get_education_struct()
    edu["ori_text"] = expblock

    for line in expblock.split("\n"):
        m = re.search(edu_reg, line)
        if m:
            edu["school_name"] = m.group("school").strip()
            edu["discipline_name"] = m.group("disc").strip()
            edu["degree"] = match_education.match_degree(
                m.group("degree").strip(), 99)
            timestamp = match_timestamp.match_timestamp_by_reg(edu_reg, line)
            edu["start_time"], edu["end_time"], edu[
                "so_far"] = StringUtils.transform_timestamp(timestamp)
        else:
            edu["discipline_desc"] += "\n" + line.strip(
            ) if edu["discipline_desc"] else line.strip()

    edu["discipline_desc"] = re.sub(u"^专业描述(：|:)", "",
                                    edu["discipline_desc"]).strip()
    return edu

コード例 #7

0

ファイルを表示

def extract_workinfo(text):
    work = resume_struct.get_emplyment_struct()
    work["ori_text"] = text

    last_line, isResp = "", False
    for line in text.split('\n'):
        m_time = re.search(work_reg, line)
        if m_time:
            timestamp = match_timestamp.match_timestamp_by_reg(work_reg, line)
            work["start_time"], work["end_time"], work[
                "so_far"] = StringUtils.transform_timestamp(timestamp)
            last_line = "time"
            continue
        if last_line == "time":
            work["corporation_name"] = line
            last_line = "corp_name"
            continue
        if last_line == "corp_name":
            work["position_name"] = line
            last_line = ""
            continue

        m_loc = re.search(u"所在地区(：|:)(?P<loc>.+)", line)
        if m_loc:
            work["city"] = m_loc.group("loc")

        m_resp = re.search(u"职责业绩(：|:)", line)
        if m_resp:
            line = re.sub(u".*职责业绩(：|:)", "", line)
            isResp = True

        if isResp:
            work["responsibilities"] += "\n" + line if work[
                "responsibilities"] and line else line
    pass
    work["responsibilities"] = re.sub(u"^工作描述(:|：)", "",
                                      work["responsibilities"]).strip()
    return work

コード例 #8

0

ファイルを表示

def extract_workinfo(text):
    work = resume_struct.get_emplyment_struct()
    work["ori_text"] = text

    lastline = "not found company"
    for line in text.split('\n'):
        if re.search(u"工作描述(:|：)", line):
            lastline = "position"
        if lastline == "not found company":
            m_company = re.search(work_reg, line)
            if m_company:
                timestamp = match_timestamp.match_timestamp_by_reg(
                    work_reg, line)
                work["corporation_name"] = clean_company_name(
                    m_company.group("company").strip())
                work["start_time"], work["end_time"], work[
                    "so_far"] = StringUtils.transform_timestamp(timestamp)
                lastline = "company name"
            pass
        elif lastline == "company name":
            items = line.split("|")
            if len(items) > 0:
                work["industry_name"] = items[0].strip()
                lastline = "industry"
                continue
        elif lastline == "industry":
            items = re.split("\s+", line)
            if len(items) > 1:
                work["architecture_name"] = items[0]
                work["position_name"] = items[1]
            lastline = "position"
        elif lastline == "position":
            work["responsibilities"] += '\n' + line if work[
                "responsibilities"] else line
    pass
    work["responsibilities"] = re.sub(u"^工作描述(:|：)", "",
                                      work["responsibilities"]).strip()
    return work

コード例 #9

0

ファイルを表示

def extract_workinfo(text):
    work = resume_struct.get_emplyment_struct()
    work["ori_text"] = text
    last_industry = False

    for line in text.split('\n'):
        m_company = re.search(work_reg, line)
        if m_company:
            timestamp = match_timestamp.match_timestamp_by_reg(work_reg, line)
            work["corporation_name"] = m_company.group("company").strip()
            work["corporation_name"] = clean_company_name(
                work["corporation_name"])
            work["start_time"], work["end_time"], work[
                "so_far"] = StringUtils.transform_timestamp(timestamp)
        m_position = re.search(u"职位名称(：|:)(?P<pos>.+)部门(：|:)(?P<arc>.+)", line)
        if m_position:
            work["position_name"] = m_position.group("pos").replace(
                u"(兼职)", "").strip()
            work["architecture_name"] = m_position.group("arc")
        m_industry = re.search(u"(行业|所属行业)(：|:)\s*(?P<ind>.+?)(\s|$)", line)
        if m_industry:
            work["industry_name"] = m_industry.group("ind").strip()
            last_industry = True
            continue
        if not work["position_name"] and last_industry:
            if len(line.split("\t")) == 2:
                work["architecture_name"] = line.split("\t")[0].strip()
                work["position_name"] = line.split("\t")[1].strip()
            if len(line.split("\t")) == 1:
                work["position_name"] = line.split("\t")[0].strip()
            continue
        if last_industry:
            work["responsibilities"] += "\n" + line if work[
                "responsibilities"] else line
    pass
    work["responsibilities"] = re.sub(u"^工作描述(:|：)", "",
                                      work["responsibilities"]).strip()
    return work

コード例 #10

0

ファイルを表示

def extract_eduinfo(expblock):
    edu = resume_struct.get_education_struct()
    edu["ori_text"] = expblock

    lastline = "not found school"
    for line in expblock.split("\n"):
        if lastline == "not found school":
            m = re.search(edu_reg, line)
            if m:
                timestamp = match_timestamp.match_timestamp_by_reg(
                    edu_reg, line)
                edu["school_name"] = re.sub(u"海外经历", "",
                                            m.group("school")).strip()
                edu["start_time"], edu["end_time"], edu[
                    "so_far"] = StringUtils.transform_timestamp(timestamp)
                lastline = "school"
        elif lastline == "school":
            items = line.split("|")
            if len(items) >= 2:
                edu["degree"] = match_education.match_degree(items[0], 99)
                edu["degree_ori"] = items[0].strip()
                edu["discipline_name"] = items[1].strip()
            else:
                if match_education.match_degree(items):
                    edu["degree"] = match_education.match_degree(items[0])
                    edu["degree_ori"] = items[0].strip()
                else:
                    edu["discipline_name"] = items[0].strip()
            lastline = "degree"
        elif lastline == "degree":
            edu["discipline_desc"] += '\n' + line.strip(
            ) if edu["discipline_desc"] else line.strip()
        pass
    pass
    edu["discipline_desc"] = re.sub(u"^专业描述(：|:)", "",
                                    edu["discipline_desc"]).strip()
    return edu

コード例 #11

0

ファイルを表示

def extract_eduinfo(expblock):
    edu = resume_struct.get_education_struct()
    edu["ori_text"] = expblock

    time_found = False
    for line in expblock.split("\n"):
        m_time = re.search(edu_reg, line)
        if m_time:
            timestamp = match_timestamp.match_timestamp_by_reg(edu_reg, line)
            edu["start_time"], edu["end_time"], edu[
                "so_far"] = StringUtils.transform_timestamp(timestamp)
            time_found = True
            continue
        if time_found:
            edu["school_name"] = line.strip()
            time_found = False
        m_dis = re.search(u"专业(:|：)(?P<dis>.+)", line)
        if m_dis:
            edu["discipline_name"] = m_dis.group("dis").strip()
        m_deg = re.search(u"学历(:|：)(?P<deg>.+)", line)
        if m_deg:
            edu["degree"] = match_education.match_degree(
                m_deg.group("deg").strip())
    return edu

コード例 #12

0

ファイルを表示

def extract_projectinfo(text):
    project = resume_struct.get_project_struct()
    project["ori_text"] = text

    lastline = "not found project"
    isResp, isDesc = False, False
    for line in text.split('\n'):
        if lastline == "not found project":
            m_proj = re.search(project_reg, line)
            if m_proj:
                timestamp = match_timestamp.match_timestamp_by_reg(
                    project_reg, line)
                project["name"] = m_proj.group("project").strip()
                project["name"] = re.sub(u"已关联$", "", project["name"]).strip()
                project["start_time"], project["end_time"], project[
                    "so_far"] = StringUtils.transform_timestamp(timestamp)
                lastline = "project"
            pass
        elif lastline == "project":
            m_desc = re.search(u"项目描述(:|：)", line)
            if m_desc:
                line = re.sub(u"项目描述(:|：)", "", line).strip()
                isDesc, isResp = True, False
            m_resp = re.search(u"责任描述(:|：)", line)
            if m_resp:
                line = re.sub(u"责任描述(:|：)", "", line).strip()
                isDesc, isResp = False, True
            pass
            if isDesc:
                project[
                    "describe"] += '\n' + line if project["describe"] else line
            if isResp:
                project["responsibilities"] += '\n' + line if project[
                    "responsibilities"] else line
    pass
    return project