def transform(observations, nlp): # TODO Docstring logging.info('Begin transform') # Extract candidate name observations['candidate_name'] = observations['text'].apply( lambda x: field_extraction.candidate_name_extractor(x, nlp)) if observations['candidate_name'] == "NOT FOUND": match = re.search(field_extraction.NAME_REGEX, observations['text'], re.IGNORECASE) observations['candidate_name'] = match[0] # Extract contact fields observations['email'] = observations['text'].apply( lambda x: lib.term_match(x, field_extraction.EMAIL_REGEX)) observations['phone'] = observations['text'].apply( lambda x: lib.term_match(x, field_extraction.PHONE_REGEX)) # Extract skills observations = field_extraction.extract_fields(observations) # Archive schema and return lib.archive_dataset_schemas('transform', locals(), globals()) logging.info('End transform') return observations, nlp
def transform(observations, nlp): # TODO Docstring logging.info('Begin transform') # Extract candidate name observations['candidate_name'] = observations['text'].apply( lambda x: field_extraction.candidate_name_extractor(x, nlp)) # Extract contact fields observations['email'] = observations['text'].apply( lambda x: lib.term_match(x, field_extraction.EMAIL_REGEX)) observations['phone'] = observations['text'].apply( lambda x: lib.term_match(x, field_extraction.PHONE_REGEX)) # Extract education data count = 0 observations['university'] = observations['text'].apply( lambda x: field_extraction.university_extractor(x, nlp)) observations['Major'] = observations['text'].apply( lambda x: field_extraction.major_extractor(x, nlp)) # Extract skills observations = field_extraction.extract_fields(observations) # Archive schema and return lib.archive_dataset_schemas('transform', locals(), globals()) logging.info('End transform') return observations, nlp