def count_number_of_files_to_process(start_date): survey_files = li.CSVLogIterator( dat.DateTimeFormats().date_to_date_time(start_date.date()), dat.DateTimeFormats().return_datetime_y_m_d_last_day_month( start_date.date())) survey_files.find_all_logs_survey() files_to_go = 0 for survey in survey_files.log_files_to_use: files_to_go += 1 return files_to_go
def run_iqr_queries(self, query_name, file_dim, table_dim, years): """ :param query_name: Title of the file you're going to create :param file_dim: The file dimension you want to build :param table_dim: The table dimension you want to build :param years: The number of years back you want the query to start """ main_filter = 'Location\Country\United States{match,1,0} & survey\survey\PayScale Main{match,1,0}' datage = dates.DateTimeFormats() end_date = str( datage.find_first_of_current_month().strftime("%Y-%m-%d")) start_date = str( datage.find_first_of_month_x_years_ago(years).strftime("%Y-%m-%d")) dash_reports = rq.AnonReports(self.in_files, self.out_files) dash_reports.post_analysis_tool_query_dim_list_create( 'localhost', query_name, start_date, end_date, main_filter, "Combined TCC IQR Measure", 'Median', file_dim, table_dim, )
def run_unconfirmed_data_queries(self, query_name, file_dim, table_dim, years): filter = 'profile\\profilewizardid{irange,3,0,5,5} & !(affinityid\\12883618{match,1,0} | ' \ 'affinityid\\12883619{match,1,0} | affinityid\\12883620{match,1,0} |' \ ' affinityid\\12883621{match,1,0} | affinityid\\12883625{match,1,0} | ' \ 'affinityid\\12883626{match,1,0} | affinityid\\12883627{match,1,0} | ' \ 'affinityid\\12883628{match,1,0} | affinityid\\12883629{match,1,0} | ' \ 'affinityid\\12883630{match,1,0} | affinityid\\12883631{match,1,0} | ' \ 'affinityid\\12883632{match,1,0} | affinityid\\12883633{match,1,0} | ' \ 'affinityid\\12883634{match,1,0} | affinityid\\12883636{match,1,0} | ' \ 'affinityid\\12883638{match,1,0} | affinityid\\12883639{match,1,0} | ' \ 'affinityid\\12883640{match,1,0}){999,d,0,0}' datage = dates.DateTimeFormats() end_date = str( datage.find_first_of_current_month().strftime("%Y-%m-%d")) start_date = str( datage.find_first_of_month_x_years_ago(years).strftime("%Y-%m-%d")) dash_reports = rq.AnonReports(self.in_files, self.out_files) dash_reports.post_analysis_tool_count_table_report( "psbuilder02a", query_name, start_date, end_date, filter, first_dimension_list=file_dim, second_dimension_list=table_dim)
def __init__(self): self.min_date = None self.max_date = None self.file_path = '\\\\filer01\\public\\Data_Analytics\\Splunk_PSP_Metrics\\Data\\' self.header = [ 'AccountID', 'AccountName', 'Algorithm', 'Search Terms', 'Created', 'Email', 'Find Position', 'Query Count', 'More Count', 'PayscaleTitle', 'Queries', 'SalesForceAccountID', 'SubscriptionType' ] self.dates = df.DateTimeFormats() self.create_dates()
def run_analysis_tool(self): dates = di.DateTimeFormats() end_date = dates.find_first_of_current_month().strftime("%Y-%m-%d") start_date = dates.find_first_of_month_x_years_ago(2).strftime( "%Y-%m-%d") query = rq.AnonReports(self.temp_in_files, self.temp_out_files) query.post_analysis_tool_query_dim_list_create( 'localhost', 'Ryan Job Rollup Suggestor EAC', start_date, end_date, 'Location\\Country\\United States{match,1,0}', 'EAC Measure', 'Medians', first_dimension_list=None, second_dimension_list=self.pull_jobs_no_rollups(), string_builder_type='Job')
def __init__(self): self.dates = di.DateTimeFormats() self.current_date = self.dates.find_first_of_current_month() self.current_unprompted = unprompted_path + self.current_date.strftime( "%Y%m") + " Unprompted Strings List.csv" self.years_to_run = 2 self.start_date = self.dates.find_first_of_month_x_years_ago( self.years_to_run) #self.start_date = datetime.datetime(2014, 10, 30) self.overall_list = [] self.individual_list = [] self.final_dict = {} self.individual_dict = {} self.individual_length = None self.header = [ 'Job Chosen', 'Unprompted String', 'Joint Count', 'CFM Count', 'UNC Count', 'Mutual Information', 'Word Similarity Score' ]
def run_overall_table_queries(self, query_name, file_dim, table_dim, years, filter=""): """ :param query_name: Title of the file you're going to create :param file_dim: The file dimension you want to build :param table_dim: The table dimension you want to build :param years: The number of years back you want the query to start """ datage = dates.DateTimeFormats() end_date = str( datage.find_first_of_current_month().strftime("%Y-%m-%d")) start_date = str( datage.find_first_of_month_x_years_ago(years).strftime("%Y-%m-%d")) dash_reports = rq.AnonReports(self.in_files, self.out_files) dash_reports.post_analysis_tool_query_dim_list_create( 'psstats03', query_name, start_date, end_date, filter, "Profile Age Measure", 'Count', file_dim, table_dim, )
def build_count_table_counts_file(self): root = ET.Element("AnalysisConfig") root.set('xmlns:xsd', "http://www.w3.org/2001/XMLSchema") root.set('xmlns:xsi', "http://www.w3.org/2001/XMLSchema-instance") index_server = ET.SubElement(root, 'IndexServer') index_server.text = self.index_server name = ET.SubElement(root, 'Name') name.text = self.input_file text_filter = ET.SubElement(root, 'Filter') text_filter.text = self.main_filter report_def = ET.SubElement(root, 'ReportDef') report_def.set('xsi:type', self.report_definition) dimension = ET.SubElement(report_def, 'Dimension') under_dimension = ET.SubElement(dimension, 'Dimension') if self.custom_dimension_tuple == None: under_dimension.set('Ref', self.report_measure) else: build_custom_dimension(ET, under_dimension, self.custom_dimension_tuple) ranges = ET.SubElement(dimension, "Ranges") ranges.text = self.ranges sub_filter = ET.SubElement(dimension, 'Filter') sub_filter.text = self.sub_filter row_count = ET.SubElement(dimension, 'RowCount') row_count.text = self.row_count min_per_row = ET.SubElement(dimension, 'MinPerRow') min_per_row.text = self.min_per_row if self.report_definition != 'SampleDefinition': include_profile_answer = ET.SubElement(report_def, 'IncludeProfileAnswer') include_profile_answer.text = "false" else: profile_count = ET.SubElement(report_def, 'ProfileCount') profile_count.text = self.profile_count sampling_method = ET.SubElement(report_def, 'SamplingMethod') sampling_method.text = self.sampling_method group_by_first_dim = ET.SubElement(report_def, 'GroupByFirstDim') group_by_first_dim.text = self.group_by_first_dim row_names = ET.SubElement(report_def, 'RowNames') row_names.text = self.row_names add_dim_def = ET.SubElement(report_def, 'AddDimDefinition') add_dim_def.text = self.add_dim_definition if self.first_dimension_file is None: dim_1_list = ET.SubElement(root, 'Dim1List') self.dimension_list_build(dim_1_list, 1) else: dim_1_file = ET.SubElement(root, 'Dim1File') dim_1_file.text = self.first_dimension_file if self.second_dimension_file is None: dim_2_list = ET.SubElement(root, 'Dim2List') self.dimension_list_build(dim_2_list, 2) else: dim_2_file = ET.SubElement(root, 'Dim2File') dim_2_file.text = self.second_dimension_file part_time = ET.SubElement(root, 'PartTime') part_time.text = self.part_time unique = ET.SubElement(root, 'Unique') unique.text = 'false' # ATH 4-18-2016 changed to false no_profile_list = ET.SubElement(root, 'NoProfileLists') no_profile_list.text = 'true' pfgid = ET.SubElement(root, 'PFGID') pfgid.text = self.pfgid show_report_error = ET.SubElement(root, 'ShowReportErrors') show_report_error.text = 'false' currency = ET.SubElement(root, 'Currency') currency.text = 'U.S. Dollar (USD)' min_date = ET.SubElement(root, 'MinDate') min_date.text = self.min_date max_date = ET.SubElement(root, 'MaxDate') max_date.text = self.max_date last_run = ET.SubElement(root, 'LastRunDate') last_run.text = str(df.DateTimeFormats().return_today_as_y_m_d("-")) + "T00:00:00" exp_date = ET.SubElement(root, 'ExpirationDate') exp_date.text = str(df.DateTimeFormats().return_today_as_y_m_d("-")) + "T00:00:00" max_results = ET.SubElement(root, 'MaxResults') max_results.text = '1000000' alt_search = ET.SubElement(root, 'AltSearchMode') alt_search.text = "" xmlString = minidom.parseString(ET.tostring(root)).toprettyxml(indent=' ', encoding='utf-8') with open(self.output_file, 'w') as w: w.write(xmlString)
def build_xml_file(self): root = ET.Element("AnalysisConfig") root.set('xmlns:xsd', "http://www.w3.org/2001/XMLSchema") root.set('xmlns:xsi', "http://www.w3.org/2001/XMLSchema-instance") index_server = ET.SubElement(root, 'IndexServer') index_server.text = self.index_server name = ET.SubElement(root, 'Name') name.text = self.input_file text_filter = ET.SubElement(root, 'Filter') text_filter.text = "job\\job\\forester{match,1,0}" report_def = ET.SubElement(root, 'ReportDef') report_def.set('xsi:type', "MVariateViewDefinition") source = ET.SubElement(report_def, "Source") source.set("xsi:type", "ModelParmMVDataSource") first_source = ET.SubElement(source, "Source") first_source.text = self.country variable = ET.SubElement(source, "Variable") variable.set("Ref", "Combined TCC Dimension") names = ET.SubElement(source, "Names") names.text = "Strings" joint_dist = ET.SubElement(source, "JointDist") joint_dist.set("Ref", "Combined TCC Dimension") min_count = ET.SubElement(source, "MinCount") min_count.text = "0" views = ET.SubElement(report_def, "Views") view = ET.SubElement(views, "View") view.set("xsi:type", "DataPointMVView") sort = ET.SubElement(view, "Sort") sort.text = "Alphabetic" reverse = ET.SubElement(view, "Reverse") reverse.text = "false" display_views = ET.SubElement(view, "DisplayViews") display_views.text = "false" row_count = ET.SubElement(view, "RowCount") row_count.text = "0" lower_view = ET.SubElement(view, "View") lower_view.text = "None" first_dimension = ET.SubElement(root, "Dim1File") first_dimension.text = "&& ALL" second_dimension = ET.SubElement(root, "Dim2File") second_dimension.text = '&& ALL' part_time = ET.SubElement(root, "PartTime") part_time.text = "false" unique = ET.SubElement(root, 'Unique') unique.text = 'true' no_profile_list = ET.SubElement(root, 'NoProfileLists') no_profile_list.text = 'true' pfgid = ET.SubElement(root, 'PFGID') pfgid.text = "false" show_report_error = ET.SubElement(root, 'ShowReportErrors') show_report_error.text = 'false' currency = ET.SubElement(root, 'Currency') currency.text = 'U.S. Dollar (USD)' min_date = ET.SubElement(root, 'MinDate') min_date.text = self.min_date max_date = ET.SubElement(root, 'MaxDate') max_date.text = self.max_date last_run = ET.SubElement(root, 'LastRunDate') last_run.text = str(df.DateTimeFormats().return_today_as_y_m_d("-")) + "T00:00:00" exp_date = ET.SubElement(root, 'ExpirationDate') exp_date.text = str(df.DateTimeFormats().return_today_as_y_m_d("-")) + "T00:00:00" max_results = ET.SubElement(root, 'MaxResults') max_results.text = '1000000' alt_search = ET.SubElement(root, 'AltSearchMode') alt_search.text = "" xmlString = minidom.parseString(ET.tostring(root)).toprettyxml(indent=' ', encoding='utf-8') with open(self.output_file, 'w') as w: w.write(xmlString)
def build_overall_table_counts_file(self): root = ET.Element("AnalysisConfig") root.set('xmlns:xsd', "http://www.w3.org/2001/XMLSchema") root.set('xmlns:xsi', "http://www.w3.org/2001/XMLSchema-instance") index_server = ET.SubElement(root, 'IndexServer') index_server.text = self.index_server name = ET.SubElement(root, 'Name') name.text = self.input_file text_filter = ET.SubElement(root, 'Filter') text_filter.text = self.main_filter report_def = ET.SubElement(root, 'ReportDef') report_def.set('xsi:type', self.report_definition) #The Name parameter is apparently not required #report_def.set("Name", "Average EAC Overall") measure = ET.SubElement(report_def, 'Measure') under_measure = ET.SubElement(measure, 'Measure') under_measure.set('Ref', self.report_measure) fitting = ET.SubElement(measure, "Fitting") fitting.text = self.fitting columns = ET.SubElement(report_def, 'Columns') columns.text = self.return_type if self.dimension is None: self.build_report_definition(report_def) else: self.build_table_dimension(report_def) summarize = ET.SubElement(report_def, 'Summarize') if self.custom_dimension_tuple is None: summarize.text = "true" else: summarize.text = "false" if self.first_dimension_file is None: dim_1_list = ET.SubElement(root, 'Dim1List') self.dimension_list_build(dim_1_list, 1) else: dim_1_file = ET.SubElement(root, 'Dim1File') dim_1_file.text = self.first_dimension_file if self.second_dimension_file is None: dim_2_list = ET.SubElement(root, 'Dim2List') self.dimension_list_build(dim_2_list, 2) else: dim_2_file = ET.SubElement(root, 'Dim2File') dim_2_file.text = self.second_dimension_file part_time = ET.SubElement(root, 'PartTime') part_time.text = self.part_time unique = ET.SubElement(root, 'Unique') unique.text = 'false' # ATH 1/22/2016 changed to false no_profile_list = ET.SubElement(root, 'NoProfileLists') no_profile_list.text = self.no_profile_lists # ATH 4/18/2016 adding variable here so I can control # no_profile_list.text = "false" pfgid = ET.SubElement(root, 'PFGID') pfgid.text = self.pfgid show_report_error = ET.SubElement(root, 'ShowReportErrors') show_report_error.text = 'false' currency = ET.SubElement(root, 'Currency') currency.text = 'U.S. Dollar (USD)' min_date = ET.SubElement(root, 'MinDate') min_date.text = self.min_date max_date = ET.SubElement(root, 'MaxDate') max_date.text = self.max_date last_run = ET.SubElement(root, 'LastRunDate') last_run.text = str(df.DateTimeFormats().return_today_as_y_m_d("-")) + "T00:00:00" exp_date = ET.SubElement(root, 'ExpirationDate') exp_date.text = str(df.DateTimeFormats().return_today_as_y_m_d("-")) + "T00:00:00" max_results = ET.SubElement(root, 'MaxResults') max_results.text = '1000000000' alt_search = ET.SubElement(root, 'AltSearchMode') alt_search.text = "" #pretty print ftw! Can finally debug this stuff easier now.... xmlString = minidom.parseString(ET.tostring(root)).toprettyxml(indent=' ', encoding='utf-8') with open(self.output_file, 'w') as w: w.write(xmlString)
import data_extract_thread as analysis import DateFunctions.datetimes_in as dates import csv import gui_code_behind as cb import question_database as db import Constants.constant_strings as c import os data_pull_thread = analysis.ExtractDataThread() start_date = dates.DateTimeFormats().find_first_of_last_month() current_month = dates.DateTimeFormats().return_last_month_as_y_m("-") out_file_path = c.Constants().question_dashboard + current_month + "\\" if not os.path.exists(out_file_path): os.makedirs(out_file_path) if not data_pull_thread.isRunning(): data_pull_thread.get_date_non_gui(start_date) data_pull_thread.start() def write_data_to_file(): file_name = "Question_Dashboard.csv" update_line = "Writing to file at %s" % out_file_path print update_line with open(out_file_path + file_name, 'wb') as W: writer = csv.writer(W, lineterminator='\n')
def last_day_of_month(given_date): return dat.DateTimeFormats().return_y_m_d_last_day_of_month( given_date, "-")
def __init__(self): self.file_path = '\\\\filer01\\public\\Data_Analytics\\Splunk_PSP_Metrics\\Data\\' self.dates = df.DateTimeFormats()
def last_datetime_day_of_month(given_date): return dat.DateTimeFormats().return_datetime_y_m_d_last_day_month( given_date).date()
from PyQt4 import QtCore import AnalysisTool.deserialize_xml as my_xml import RyanBot.DataDashboard.dashboard_support as ds dash = ds.DashboardSupport() import AnalysisTool.extract_data_xml as extract import Constants.constant_strings as cs const = cs.Constants() import DateFunctions.datetimes_in as dat dates = dat.DateTimeFormats() import AnalysisTool.run_queries as rq import datetime import os import csv report_measure = 'EAC Measure' return_type = 'Medians' ranges = 'Fixed' file_dimension = { 1: 'Alumni Analytics File Dimension 1', 2: 'Alumni Analytics EducationalLevelAffiliateSchool', 3: 'Alumni Analytics File Dimension YG', 4: '&& ALL', 5: '&& ALL' } table_dimension = { 1: 'Alumni Analytics Table Dimension 1', 2: 'Alumni Analytics Table Dimension Jobs', 3: 'Alumni Analytics Table Dimension YG', 4: 'Alumni Analytics Schools',
def analyze_jobs(self): print "Analyzing Pay" for row in self.suggestions_list: first_job = self.look_up[row[0]] try: rollup = self.rollup_dict[first_job] first = self.job_eac_dict[rollup] except KeyError: rollup = 'None' try: first = self.job_eac_dict[first_job] except KeyError: first = 'N/A' second_job = row[1] mi = row[2] try: second = self.job_eac_dict[second_job] except KeyError: second = 'N/A' if first == 'N/A' or second == 'N/A': difference = 'N/A' diff_error = 'N/A' if first != 'N/A' and second != 'N/A': try: difference = abs((float(first[0]) - float(second[0])) / float(second[0])) diff_error = self.calculate_difference_error( first_job, second_job) except ValueError: print first[0], second[0] rollup_key_check = self.rollup_dict.keys() if first_job in rollup_key_check: rollup = self.rollup_dict[first_job] try: examined_job_rollup = self.rollup_dict[second_job] except KeyError: examined_job_rollup = second_job if examined_job_rollup == rollup: continue if isinstance(first[0], float) is not True or isinstance( second[0], float) is not True: final = row[ 0], first_job, second_job, mi, rollup, examined_job_rollup, first[ 0], second[0] if isinstance(first[0], float) is True and isinstance( second[0], float) is True: final = row[0], first_job, second_job, mi, rollup, examined_job_rollup, round(float(first[0]), 2),\ round(float(second[0]), 2) if difference > .2 and difference > diff_error: continue elif .2 < difference <= diff_error: self.final_list.append(final) else: self.final_list.append(final) print "Writing to file" write_date = str(di.DateTimeFormats().return_today_as_y_m("-")) with open( final_suggestions + write_date + " Job Rollup Suggestions.csv", 'w') as f: writer = csv.writer(f, lineterminator='\n') header = [ 'Unprompted String', 'Matched Job', 'Job Being Evaluated', 'Mutual Information', 'Rollup for Matched Job', 'Rollup for Job Being Evaluated', 'Matched Job EAC', 'Job Being Evaluated EAC' ] writer.writerow(header) for row in self.final_list: final = row[0], row[1], row[2], row[3], row[4], row[5], row[ 6], row[7] writer.writerow(final)
import hmac import hashlib from datetime import datetime import DateFunctions.datetimes_in as df import SqlTools.payscale_replicated as pr import datetime import urllib import json """Script to find all the main site abusers by IP""" out_path = '\\\\filer01\\public\\Data_Analytics\\SiteAbusers\\' data = pr.SqlServer("SiteStats") ip_results = {} dates = df.DateTimeFormats() first_of_last = dates.return_ymd_first_of_last_month("-") last_of_last = dates.return_ymd_last_day_of_last_month().strftime("%Y-%m-%d") #Main SQl Query. Need to be authenticated to the firewall query = ''' declare @StartDate datetime declare @EndDate datetime declare @pageName varchar(255) declare @codeName varchar(255) declare @codeVal varchar(255) set @StartDate = '{0}' set @EndDate = '{1}' set @pageName = '/mypayscale.aspx' set @codeName = 'ProfileGuid'