def gather_data(self): logs = li.CSVLogIterator(self.start_date, self.current_date) logs.find_all_logs_survey() file_run = sorted(logs.log_files_to_use) handler = fw.FileHandler() for files in file_run: try: print "Working on " + files for row in handler.open_csv(log_file_path + files): try: final_job = row[handler.header.index("Profile job")] unprompted_1 = row[handler.header.index( 'Profile job.unprompted')] unprompted_2 = row[handler.header.index( 'Job\Job Unprompted')] final_unprompted = self.determine_unprompted( unprompted_1, unprompted_2) combined = final_job, final_unprompted if "" not in combined: self.overall_list.append(combined) self.individual_list.append(final_job) self.individual_list.append(final_unprompted) except ValueError: continue except IOError: continue print "File data completely gathered"
def five_month_survey_log_time(): logs = li.CSVLogIterator(datetime.datetime(2014, 4, 15), datetime.datetime(2014, 9, 15)) logs.find_all_logs_survey() all_year_months = logs.log_files_to_use return all_year_months
def count_number_of_files_to_process(start_date): survey_files = li.CSVLogIterator( dat.DateTimeFormats().date_to_date_time(start_date.date()), dat.DateTimeFormats().return_datetime_y_m_d_last_day_month( start_date.date())) survey_files.find_all_logs_survey() files_to_go = 0 for survey in survey_files.log_files_to_use: files_to_go += 1 return files_to_go
def run_log_file_data_extraction(self): log_files = logs.CSVLogIterator( self.start_date.date().toPyDate(), self.last_datetime_day_of_month(self.start_date.date())) log_files.find_all_logs_survey() for current_file_number, log_file in enumerate( log_files.log_files_to_use): self.update_string_progress.emit("Working on " + log_file + "...") with open(self.log_path + log_file, 'rb') as R: reader = csv.reader(R, delimiter=',') header = reader.next() for row in reader: saw_20 = row[header.index("Saw20")] bailed_20 = row[header.index("Bailed20")] saw_payscale = row[header.index("SawMyPayscale")] profile_job = row[header.index("Profile job")] try: soc = self.soc_map[profile_job] except KeyError: soc = "N/A" self.log_data_to_dictionary(row, header, saw_20, bailed_20, saw_payscale, soc) self.update_progress.emit(current_file_number + 1) self.update_string_progress.emit("Log File data completely gathered") self.update_string_progress.emit( "Placing log data and Analysis Tool data into database...") db.QuestionDashboardData().insert_data_from_dict_to_database( self.denominator_dict, self.numerator_dict, self.start_date.date().toPyDate().strftime("%Y%m")) self.update_string_progress.emit( "Filling up the different breadth x databases...") cb.QuestionDashSupport().fill_all_breadth_db( self.start_date.date().toPyDate().strftime("%Y%m")) self.update_string_progress.emit( "Log File and Analysis Tool work completed. Feel free to query away on the" "currently pulled months data.")
from LogFileTools import log_iterator import datetime import csv csv_logs = log_iterator.CSVLogIterator( datetime.date(2013, 5, 1), datetime.date(2014, 5, 1), path_to_use="\\\\filer01\\public\\Data Dashboards\\PSP Reports\\") csv_logs.find_all_logs_dashboard() csv_rows = csv_logs.iterate_files_return_row() storage_dict = {} def pull_final_data(key, sub_key, pa): final = key, sub_key, pa return final row_counter = 0 for row in csv_rows: if row_counter % 10000 == 0 and row_counter != 0: print '%s rows processed' % row_counter account_id = row[csv_logs.header.index("Acct ID")] product_activity = row[csv_logs.header.index("Product Activity")] industry_affinity = row[csv_logs.header.index("Industry Affinity")] job = row[csv_logs.header.index("Job")]
from LogFileTools import log_iterator import datetime csv_logs = log_iterator.CSVLogIterator(datetime.date(2013, 5, 1), datetime.date(2014, 5, 1)) csv_logs.find_all_logs_survey() print csv_logs.log_files_to_use
import LogFileTools.log_iterator as log import datetime import csv survey_path = '\\\\psfiler01\\data\\SurveyReports\\' iterator = log.CSVLogIterator(datetime.datetime(2014, 5, 1), datetime.datetime(2014, 9, 30)) iterator.find_all_logs_survey() needed_guid = set() with open("C:\\users\\ryanm\\desktop\\GUIDs.csv", 'rb') as R: reader = csv.reader(R, delimiter=',') for row in reader: match_guid = row[0] needed_guid.add(match_guid) results = [] for log_files in iterator.log_files_to_use: try: print "Working on " + log_files with open(survey_path + log_files, 'rb') as R: reader = csv.reader(R, delimiter=',') header = reader.next() for row in reader: guid = row[header.index("ProfileGuid")] job_title = row[header.index("Profile job")] if guid in needed_guid:
guid_dict = {} temp_in = '\\\\filer01\\public\\Data_Analytics\\AnonReports\\TempIn\\MobileKayla\\' temp_out = '\\\\filer01\\public\\Data_Analytics\\AnonReports\\TempOut\\MobileKayla\\' start_date = datetime.datetime(2014, 10, 1) end_date = datetime.datetime(2014, 10, 1) is_active = set() mobile_question_guid = {} non_question_guid = {} profile_class = {} '''Log File Denominator pull''' mobile_survey = li.CSVLogIterator(start_date, end_date) mobile_survey.find_all_logs_survey() local_path = os.path.expanduser("~" + getpass.getuser()) + "\\desktop\\" def dict_insert(value, dictionary, key, question): if value == "True": try: dictionary[question][key] += 1 except KeyError: try: dictionary[question][key] = 1 except KeyError: dictionary[question] = {} dictionary[question][key] = 1