Esempio n. 1
0
    def break_up_jobs_file(self):
        #Anon analysis tool reports seem to fail if there's more than 4 or 5k jobs
        #splitting up jobs between queries
        jobs_list = []
        job_file = files.FileHandler()
        file_count = 1
        row_count = 0

        for row in job_file.open_csv(self.automated_files +
                                     "Current Jobs.csv"):
            row_count += 1

        for i, row in enumerate(
                job_file.open_csv(self.automated_files + "Current Jobs.csv")):
            main_job = row[job_file.header.index("ANSWER VALUE")]
            jobs_list.append(main_job)
            if (i % 3000 == 0 and i != 0) or i == row_count - 1:
                with open(
                        self.automated_files + "Current Jobs_" +
                        str(file_count) + ".csv", 'wb') as W:
                    writer = csv.writer(W, delimiter=',')
                    header = "ANSWER VALUE", ""
                    writer.writerow(header)

                    for job in jobs_list:
                        final_job = job, ""
                        writer.writerow(final_job)

                file_count += 1
                jobs_list = []
Esempio n. 2
0
    def gather_data(self):

        logs = li.CSVLogIterator(self.start_date, self.current_date)
        logs.find_all_logs_survey()
        file_run = sorted(logs.log_files_to_use)

        handler = fw.FileHandler()

        for files in file_run:
            try:
                print "Working on " + files
                for row in handler.open_csv(log_file_path + files):
                    try:
                        final_job = row[handler.header.index("Profile job")]
                        unprompted_1 = row[handler.header.index(
                            'Profile job.unprompted')]
                        unprompted_2 = row[handler.header.index(
                            'Job\Job Unprompted')]
                        final_unprompted = self.determine_unprompted(
                            unprompted_1, unprompted_2)
                        combined = final_job, final_unprompted
                        if "" not in combined:
                            self.overall_list.append(combined)
                            self.individual_list.append(final_job)
                            self.individual_list.append(final_unprompted)
                    except ValueError:
                        continue

            except IOError:
                continue

        print "File data completely gathered"
Esempio n. 3
0
    def soc_code_map():

        onet_mapping = {}
        main_file = fw.FileHandler()

        for row in main_file.open_csv(auto_path + "CFM.csv"):
            job = row[main_file.header.index("All Confirmed Job Titles")]
            soc = row[main_file.header.index("SOC Code")]
            onet_mapping[job] = soc

        return onet_mapping
Esempio n. 4
0
    def extract_data(self):

        print "Unzipping and modifying Analysis Tool results..."
        fw.FileHandler().zip_file_open('Ryan Job Rollup Suggestor EAC.zip',
                                       self.temp_out_files)
        extract.ExtractXMLData(
            'Ryan Job Rollup Suggestor EAC_0.xml').fix_xml_encoding(
                self.temp_out_files)
        count_dict = extract.ExtractXMLData(self.temp_out_files + 'Ryan Job Rollup Suggestor EAC_0.xml')\
            .overall_medians_list_return()

        return count_dict
Esempio n. 5
0
    def temp_pull_current_jobs(self):

        job_list = []

        reader = files.FileHandler()

        for row in reader.open_csv(self.automated_files + "Current Jobs.csv"):

            job_title = row[reader.header.index("ANSWER VALUE")]
            if job_title != "":
                job_list.append(job_title)

        return list(set(job_list))
Esempio n. 6
0
    def pull_all_current_job_rollups(self):

        rollup_list = []

        reader = files.FileHandler()

        for row in reader.open_tsv(self.automated_files +
                                   "Current Job Rollups.tsv"):

            rollup = row[reader.header.index("Read-only Answervalue")]
            if rollup != "":
                rollup_list.append(rollup)

        return list(set(rollup_list))
Esempio n. 7
0
    def create_full_task_dictionary(self):
        print "Gathering all tasks into a job to task dictionary..."
        handler = files.FileHandler()
        task_dict = {}
        for line in handler.open_csv(self.automated_files +
                                     'Current Tasks.csv'):
            job = line[handler.header.index("JOB")]
            tasks = line[handler.header.index("TASK")]

            try:
                task_dict[job] += " " + tasks.lower()
            except KeyError:
                task_dict[job] = tasks.lower()

        return task_dict
Esempio n. 8
0
    def pull_all_current_jobs(self):
        """This is not currently working. Something is wrong on the C# code side me thinks"""

        job_list = []

        reader = files.FileHandler()

        for row in reader.open_tsv(self.automated_files + "Current Jobs.tsv"):
            try:
                job_title = row[reader.header.index("ANSWER VALUE")]
                if job_title != "":
                    job_list.append(job_title)
            except IndexError:
                print row

        return list(set(job_list))
Esempio n. 9
0
    def find_breadth_x_soc_codes(self, x):

        main_file = fw.FileHandler()

        if x == '30':
            breadth = 'All Breadth 30'
        elif x == '40':
            breadth = 'All Breadth 40'
        elif x == '50':
            breadth = 'All Breadth 50'
        else:
            breadth = 'All Breadth 60'

        soc_list = []

        for row in main_file.open_csv(self.main_file_path +
                                      "Onet_Breadths.csv"):
            soc = row[main_file.header.index(breadth)]
            soc_list.append(soc)

        return list(set(soc_list))
Esempio n. 10
0
    def unprompted_pull(self):

        matching_list = []

        append = matching_list.append
        print "Sorting through the unprompted strings file"

        handler = fw.FileHandler()

        for row in handler.open_csv(unprompted_path + self.current_unprompted):
            unprompted = row[1]
            confirmed = row[0]
            mutual_information = row[5]
            word_similarity = row[6]

            all = unprompted, confirmed, mutual_information, word_similarity

            if float(mutual_information) > 12 and float(word_similarity) < .9:
                append(all)

            else:
                continue

        return matching_list
Esempio n. 11
0
    return jaccard


print "Gathering probabilities..."
combo_probs = {}
with open("C:\\users\\ryanm\\desktop\\ProbabilityCache.csv", 'rb') as R:
    reader = csv.reader(R, delimiter=',')

    header = reader.next()

    for row in reader:
        combo = row[0].lower(), row[1].lower()
        combo_probs[combo] = row[2]

write_header = True
handler = fw.FileHandler()
row_count = 0
with open(
        "C:\\users\\ryanm\\desktop\\truth_data_with_basesalary_comparisons_and_prob.csv",
        'wb') as W:
    writer = csv.writer(W, lineterminator='\n')

    for line in handler.open_csv(
            "C:\\users\\ryanm\\desktop\\truth_data_with_basesalary_comparisons.csv"
    ):
        row_count += 1

        if row_count % 1000 == 0:
            print "Processed %s rows..." % row_count

        if write_header is True:
Esempio n. 12
0
    def unzip_file(self, zip_file, out_file=None):

        if out_file == None:
            files.FileHandler().zip_file_open(zip_file, self.out_files)
        else:
            files.FileHandler().zip_file_open(zip_file, out_file)