def break_up_jobs_file(self): #Anon analysis tool reports seem to fail if there's more than 4 or 5k jobs #splitting up jobs between queries jobs_list = [] job_file = files.FileHandler() file_count = 1 row_count = 0 for row in job_file.open_csv(self.automated_files + "Current Jobs.csv"): row_count += 1 for i, row in enumerate( job_file.open_csv(self.automated_files + "Current Jobs.csv")): main_job = row[job_file.header.index("ANSWER VALUE")] jobs_list.append(main_job) if (i % 3000 == 0 and i != 0) or i == row_count - 1: with open( self.automated_files + "Current Jobs_" + str(file_count) + ".csv", 'wb') as W: writer = csv.writer(W, delimiter=',') header = "ANSWER VALUE", "" writer.writerow(header) for job in jobs_list: final_job = job, "" writer.writerow(final_job) file_count += 1 jobs_list = []
def gather_data(self): logs = li.CSVLogIterator(self.start_date, self.current_date) logs.find_all_logs_survey() file_run = sorted(logs.log_files_to_use) handler = fw.FileHandler() for files in file_run: try: print "Working on " + files for row in handler.open_csv(log_file_path + files): try: final_job = row[handler.header.index("Profile job")] unprompted_1 = row[handler.header.index( 'Profile job.unprompted')] unprompted_2 = row[handler.header.index( 'Job\Job Unprompted')] final_unprompted = self.determine_unprompted( unprompted_1, unprompted_2) combined = final_job, final_unprompted if "" not in combined: self.overall_list.append(combined) self.individual_list.append(final_job) self.individual_list.append(final_unprompted) except ValueError: continue except IOError: continue print "File data completely gathered"
def soc_code_map(): onet_mapping = {} main_file = fw.FileHandler() for row in main_file.open_csv(auto_path + "CFM.csv"): job = row[main_file.header.index("All Confirmed Job Titles")] soc = row[main_file.header.index("SOC Code")] onet_mapping[job] = soc return onet_mapping
def extract_data(self): print "Unzipping and modifying Analysis Tool results..." fw.FileHandler().zip_file_open('Ryan Job Rollup Suggestor EAC.zip', self.temp_out_files) extract.ExtractXMLData( 'Ryan Job Rollup Suggestor EAC_0.xml').fix_xml_encoding( self.temp_out_files) count_dict = extract.ExtractXMLData(self.temp_out_files + 'Ryan Job Rollup Suggestor EAC_0.xml')\ .overall_medians_list_return() return count_dict
def temp_pull_current_jobs(self): job_list = [] reader = files.FileHandler() for row in reader.open_csv(self.automated_files + "Current Jobs.csv"): job_title = row[reader.header.index("ANSWER VALUE")] if job_title != "": job_list.append(job_title) return list(set(job_list))
def pull_all_current_job_rollups(self): rollup_list = [] reader = files.FileHandler() for row in reader.open_tsv(self.automated_files + "Current Job Rollups.tsv"): rollup = row[reader.header.index("Read-only Answervalue")] if rollup != "": rollup_list.append(rollup) return list(set(rollup_list))
def create_full_task_dictionary(self): print "Gathering all tasks into a job to task dictionary..." handler = files.FileHandler() task_dict = {} for line in handler.open_csv(self.automated_files + 'Current Tasks.csv'): job = line[handler.header.index("JOB")] tasks = line[handler.header.index("TASK")] try: task_dict[job] += " " + tasks.lower() except KeyError: task_dict[job] = tasks.lower() return task_dict
def pull_all_current_jobs(self): """This is not currently working. Something is wrong on the C# code side me thinks""" job_list = [] reader = files.FileHandler() for row in reader.open_tsv(self.automated_files + "Current Jobs.tsv"): try: job_title = row[reader.header.index("ANSWER VALUE")] if job_title != "": job_list.append(job_title) except IndexError: print row return list(set(job_list))
def find_breadth_x_soc_codes(self, x): main_file = fw.FileHandler() if x == '30': breadth = 'All Breadth 30' elif x == '40': breadth = 'All Breadth 40' elif x == '50': breadth = 'All Breadth 50' else: breadth = 'All Breadth 60' soc_list = [] for row in main_file.open_csv(self.main_file_path + "Onet_Breadths.csv"): soc = row[main_file.header.index(breadth)] soc_list.append(soc) return list(set(soc_list))
def unprompted_pull(self): matching_list = [] append = matching_list.append print "Sorting through the unprompted strings file" handler = fw.FileHandler() for row in handler.open_csv(unprompted_path + self.current_unprompted): unprompted = row[1] confirmed = row[0] mutual_information = row[5] word_similarity = row[6] all = unprompted, confirmed, mutual_information, word_similarity if float(mutual_information) > 12 and float(word_similarity) < .9: append(all) else: continue return matching_list
return jaccard print "Gathering probabilities..." combo_probs = {} with open("C:\\users\\ryanm\\desktop\\ProbabilityCache.csv", 'rb') as R: reader = csv.reader(R, delimiter=',') header = reader.next() for row in reader: combo = row[0].lower(), row[1].lower() combo_probs[combo] = row[2] write_header = True handler = fw.FileHandler() row_count = 0 with open( "C:\\users\\ryanm\\desktop\\truth_data_with_basesalary_comparisons_and_prob.csv", 'wb') as W: writer = csv.writer(W, lineterminator='\n') for line in handler.open_csv( "C:\\users\\ryanm\\desktop\\truth_data_with_basesalary_comparisons.csv" ): row_count += 1 if row_count % 1000 == 0: print "Processed %s rows..." % row_count if write_header is True:
def unzip_file(self, zip_file, out_file=None): if out_file == None: files.FileHandler().zip_file_open(zip_file, self.out_files) else: files.FileHandler().zip_file_open(zip_file, out_file)