def extract_from_date(from_date_string, to_date_string): current_date = format_date(from_date_string) d, m, y = utils.formatted_date(current_date) if y == 100000: current_date = format_date(to_date_string) d, m, y = utils.formatted_date(current_date) elif not y: [issue_flag, date_diff] = utils.date_difference(from_date_string, to_date_string) if not issue_flag: last_date = format_date(to_date_string) ld, lm, ly = utils.formatted_date(last_date) if ly and ly != 100000: to_date = ld + lm * 30 + ly * 365 - date_diff d, m, y = int((to_date % 365) % 12), int( (to_date % 365) / 12), int(to_date / 365) return "{},{},{}".format(d, m, y)
def compute_median_career_time(db_utils, work_meta_info, min_conf=0, soc_level=configurator.commons.SOC_LEVEL): ''' Creating an array of work ex index attributes ''' from_index = 1 to_index = 15 title_durations = {} resume_count = 0 ''' Fetching a cursor over all non-duplicate resumes ''' resumes = db_utils.fetch_data("resume", "cursor", {"Duplicate": False}) ''' Iterating over the cursor to find the transition times for each title ''' for resume in resumes: resume_count += 1 if resume_count % 10000 == 0: print "Processed {} resumes".format(resume_count) career_start_date = None for index in list(reversed(range(from_index - 1, to_index))): ''' Check whether the given work experience is a valid one, in case it is we need to do further processing otherwise not ''' current_work_ex_meta_info = work_meta_info[index] if is_valid_work_experience(resume, current_work_ex_meta_info, min_conf): ''' This is a valid work experience, we should add the title and the time taken by this person to reach this title ''' current_date = extract_from_date( resume[current_work_ex_meta_info['from']], resume[current_work_ex_meta_info['to']]) cy = [int(x) for x in current_date.split(',')][2] if cy != 100000 and cy: ''' From Date is valid ''' ''' Check whether this is the start point in this career ''' if not career_start_date: ''' We have found the career start date ''' career_start_date = current_date title_tuple = ( resume[current_work_ex_meta_info['title']], resume[current_work_ex_meta_info['soc']][:soc_level]) if title_tuple not in title_durations: title_durations[title_tuple] = [] [issue_flag, duration_to_reach ] = utils.date_difference(career_start_date, current_date) if not issue_flag: title_durations[title_tuple].append(duration_to_reach) return title_durations
def create_major_titles_map(work_meta_info, end_title_limit, start_title_limit, majors, degrees): major_field = "latest_ed_major" degree_field = "latest_ed_degree" latest_ed_from = "latest_ed_from" latest_ed_to = "latest_ed_to" resumes = dbutils.fetch_data('resume', 'cursor', { 'latest_ed_major': { '$in': majors }, 'latest_ed_degree': { '$in': degrees } }) major_title_map = {} resume_count = 0 for resume in resumes: degree = norm_degree(resume[degree_field]) date_string = format_date(resume[latest_ed_from]) d, m, y = utils.formatted_date(date_string) is_valid_date_flag = True if y == 100000 or not y: is_valid_date_flag = False if not is_valid_date_flag: date_string = format_date(resume[latest_ed_to]) d, m, y = utils.formatted_date(date_string) is_valid_date_flag = True if y == 100000 or not y: is_valid_date_flag = False if resume[major_field] and degree and is_valid_date_flag: if resume[major_field] not in major_title_map: major_title_map[resume[major_field]] = {} if degree not in major_title_map[resume[major_field]]: major_title_map[resume[major_field]][degree] = {} major_title_map[ resume[major_field]][degree]["valid_resumes"] = 0 major_title_map[resume[major_field]][degree]["title_dict"] = {} major_title_map[resume[major_field]][degree]["valid_resumes"] += 1 total_exp = 0 for work_info_obj in work_meta_info: if is_valid_work_info(resume, work_info_obj): date_string = format_date(resume[work_info_obj['to']]) wtd, wtm, wty = utils.formatted_date(date_string) is_valid_date_flag = True to_years = 0 from_years = 0 if wty == 100000 or not wty: is_valid_date_flag = False else: to_years += (wtd + wtm * 30 + wty * 12 * 30) wfy = 0 if not is_valid_date_flag: date_string = format_date( resume[work_info_obj['from']]) wfd, wfm, wfy = utils.formatted_date(date_string) is_valid_date_flag = True if wfy == 100000 or not wfy: is_valid_date_flag = False else: from_years += (wfd + wfm * 30 + wfy * 12 * 30) if wfy == 100000 or wty == 100000 or ( is_valid_date_flag and (to_years > (d + m * 30 + y * 12 * 30) or (not to_years and from_years >= (d + m * 30 + y * 12 * 30)))): [issue_flag, years_of_exp] = utils.date_difference( resume[work_info_obj["from"]], resume[work_info_obj["to"]]) if not issue_flag: if resume[work_info_obj[ "title"]] not in major_title_map[resume[ major_field]][degree]["title_dict"]: major_title_map[ resume[major_field]][degree]["title_dict"][ resume[work_info_obj["title"]]] = {} exp = int(total_exp) if exp not in major_title_map[ resume[major_field]][degree]["title_dict"][ resume[work_info_obj["title"]]]: major_title_map[resume[major_field]][degree][ "title_dict"][resume[ work_info_obj["title"]]][exp] = 0 major_title_map[ resume[major_field]][degree]["title_dict"][ resume[work_info_obj["title"]]][exp] += 1 total_exp += years_of_exp resume_count += 1 if resume_count % 10000 == 0: print "{} Resumes Processed".format(resume_count) print "{} Resumes Processed".format(resume_count) return major_title_map
def create_edges_from_resume(resume, work_meta_info, min_conf, edges, nodes, major): is_valid_date_flag = True [d, m, y] = [ int(x) for x in extract_from_date(resume["latest_ed_from"], resume[ "latest_ed_to"]).split(',') ] if y == 100000 or not y: is_valid_date_flag = False if is_valid_date_flag: valid_index = False path = [] title_time_map = {} for index in reversed(range(1, 15)): current_work_ex_meta_info = work_meta_info[index] next_work_ex_meta_info = work_meta_info[index - 1] if (current_work_ex_meta_info["title"] in resume and resume[current_work_ex_meta_info["title"]] and current_work_ex_meta_info["match"] in resume and resume[current_work_ex_meta_info["match"]] and current_work_ex_meta_info["confidence"] in resume and resume[current_work_ex_meta_info["confidence"]] >= min_conf and next_work_ex_meta_info["title"] in resume and resume[next_work_ex_meta_info["title"]] and next_work_ex_meta_info["match"] in resume and resume[next_work_ex_meta_info["match"]] and next_work_ex_meta_info["confidence"] in resume and resume[next_work_ex_meta_info["confidence"]] >= min_conf and resume[current_work_ex_meta_info["title"]] != resume[next_work_ex_meta_info["title"]]): ''' There is a valid edge present from the work experience at index to the one at (index-1) ''' current_date = extract_from_date( resume[current_work_ex_meta_info['from']], resume[current_work_ex_meta_info['to']]) [cd, cm, cy] = [int(x) for x in current_date.split(',')] if valid_index or (d + m * 30 + y * 365) <= ( cd + cm * 30 + cy * 365): from_title = resume[current_work_ex_meta_info["title"]] to_title = resume[next_work_ex_meta_info["title"]] if not valid_index: valid_index = True path.append(from_title) title_time_map[from_title] = current_date path.append(to_title) edge = (from_title, to_title) if edge not in edges: edges[edge] = {} edges[edge]['count'] = 0 edges[edge]['time_intervals'] = [] if from_title not in nodes: nodes.add(from_title) if to_title not in nodes: nodes.add(to_title) edges[edge]['count'] += 1 next_date = extract_from_date( resume[next_work_ex_meta_info['from']], resume[next_work_ex_meta_info['to']]) title_time_map[to_title] = next_date [issue_flag, date_diff] = utils.date_difference( current_date, next_date) if not issue_flag: edges[edge]['time_intervals'].append(date_diff) for i, node1 in enumerate(path): current_date = title_time_map[ node1] if node1 in title_time_map else '' for j, node2 in enumerate(path[i + 2:]): edge = (node1, node2) if edge not in edges: edges[edge] = {} edges[edge]['count'] = 0 edges[edge]['time_intervals'] = [] edges[edge]['count'] += 1 / (j + 2) next_date = title_time_map[ node2] if node2 in title_time_map else '' if current_date and next_date: [issue_flag, date_diff] = utils.date_difference( current_date, next_date) if not issue_flag: edges[edge]['time_intervals'].append(date_diff)
def run(self): home_dir = pathlib.Path.home() desktop = os.path.expanduser('~/Desktop/output.csv') filename = qtw.QFileDialog.getSaveFileName(self, 'Save File', desktop) n_rows = self.ui.rate_changes_table.rowCount() rate_change_list = [] previous_date = self.ui.date_input.date().toPyDate() rate_value = float(self.ui.initial_interest_input.text()) / (100 * 12) total_months = 0 for i in range(n_rows): change_date = self.ui.rate_changes_table.item(i, 0) change_rate = self.ui.rate_changes_table.item(i, 1) if change_date is not None and change_rate is not None: change_date_value = datetime.strptime(change_date.text(), '%d/%m/%Y') n_months = date_difference(change_date_value, previous_date) total_months += n_months change_rate_value = float(change_rate.text()) / (100 * 12) if not n_months > 0: qtw.QMessageBox.critical( self, 'Warning', 'Please ensure dates are in the correct order.' ) return rate_change_list.append((n_months, rate_value)) rate_value = change_rate_value previous_date = change_date_value rate_change_list.append((0, rate_value)) if not filename[0]: return try: mortgage_value = int(self.ui.mortgage_input.text()) interest_rate = float(self.ui.initial_interest_input.text()) / (100 * 12) term_length_base = int(self.ui.base_full_term_input.text()) term_length_second = int(self.ui.second_scen_full_term_input.text()) if self.ui.interest_type_box.currentText() == 'PIO': IO_term_length_base = int(self.ui.base_IIOT_input.text()) IO_term_length_second = int(self.ui.second_scen_IIOT_input.text()) except ValueError: qtw.QMessageBox.critical( self, 'Warning', 'Please ensure all information is put in correctly' ) return if self.ui.interest_type_box.currentText() == 'ANN': interest_paid_base = annuity_total(mortgage_value, interest_rate, term_length_base, rate_change_list) - mortgage_value interest_paid_second = annuity_total(mortgage_value, interest_rate, term_length_second, rate_change_list) - mortgage_value elif self.ui.interest_type_box.currentText() == 'IOM': interest_paid_base = interest_only_total(mortgage_value, interest_rate, term_length_base, rate_change_list) - mortgage_value interest_paid_second = interest_only_total(mortgage_value, interest_rate, term_length_second, rate_change_list) - mortgage_value elif self.ui.interest_type_box.currentText() == 'PIO': interest_paid_base = partial_interest_only_total(mortgage_value, interest_rate, term_length_base, IO_term_length_base, rate_change_list) - mortgage_value interest_paid_second = partial_interest_only_total(mortgage_value, interest_rate, term_length_second, IO_term_length_second, rate_change_list) - mortgage_value base_minus_second = interest_paid_base - interest_paid_second labels = [ 'Mortgage Value', 'Initial Interest Rate', 'Base Term Length', 'Second Scenario Term Length', 'Interest Paid Base', 'Interest Paid Second', 'Difference' ] data = [ mortgage_value, 100 * 12 * interest_rate, term_length_base, term_length_second, interest_paid_base, interest_paid_second, base_minus_second, ] new_rate_list = [(self.ui.date_input.date().toString('dd/MM/yyyy'), 100 * 12 * interest_rate)] for i in range(n_rows): change_date = self.ui.rate_changes_table.item(i, 0) change_rate = self.ui.rate_changes_table.item(i, 1) if change_date is not None and change_rate is not None: new_rate_list.append((change_date.text(), change_rate.text())) write_to_file(filename[0], labels, data, new_rate_list)