コード例 #1
0
    def count_number_of_files_to_process(start_date):

        survey_files = li.CSVLogIterator(
            dat.DateTimeFormats().date_to_date_time(start_date.date()),
            dat.DateTimeFormats().return_datetime_y_m_d_last_day_month(
                start_date.date()))

        survey_files.find_all_logs_survey()
        files_to_go = 0
        for survey in survey_files.log_files_to_use:
            files_to_go += 1

        return files_to_go
コード例 #2
0
    def run_iqr_queries(self, query_name, file_dim, table_dim, years):
        """
        :param query_name:
            Title of the file you're going to create
        :param file_dim:
            The file dimension you want to build
        :param table_dim:
            The table dimension you want to build
        :param years:
            The number of years back you want the query to start
        """

        main_filter = 'Location\Country\United States{match,1,0} & survey\survey\PayScale Main{match,1,0}'
        datage = dates.DateTimeFormats()

        end_date = str(
            datage.find_first_of_current_month().strftime("%Y-%m-%d"))
        start_date = str(
            datage.find_first_of_month_x_years_ago(years).strftime("%Y-%m-%d"))

        dash_reports = rq.AnonReports(self.in_files, self.out_files)

        dash_reports.post_analysis_tool_query_dim_list_create(
            'localhost',
            query_name,
            start_date,
            end_date,
            main_filter,
            "Combined TCC IQR Measure",
            'Median',
            file_dim,
            table_dim,
        )
コード例 #3
0
    def run_unconfirmed_data_queries(self, query_name, file_dim, table_dim,
                                     years):

        filter = 'profile\\profilewizardid{irange,3,0,5,5} & !(affinityid\\12883618{match,1,0} | ' \
                 'affinityid\\12883619{match,1,0} | affinityid\\12883620{match,1,0} |' \
                 ' affinityid\\12883621{match,1,0} | affinityid\\12883625{match,1,0} | ' \
                 'affinityid\\12883626{match,1,0} | affinityid\\12883627{match,1,0} | ' \
                 'affinityid\\12883628{match,1,0} | affinityid\\12883629{match,1,0} | ' \
                 'affinityid\\12883630{match,1,0} | affinityid\\12883631{match,1,0} | ' \
                 'affinityid\\12883632{match,1,0} | affinityid\\12883633{match,1,0} | ' \
                 'affinityid\\12883634{match,1,0} | affinityid\\12883636{match,1,0} | ' \
                 'affinityid\\12883638{match,1,0} | affinityid\\12883639{match,1,0} | ' \
                 'affinityid\\12883640{match,1,0}){999,d,0,0}'

        datage = dates.DateTimeFormats()

        end_date = str(
            datage.find_first_of_current_month().strftime("%Y-%m-%d"))
        start_date = str(
            datage.find_first_of_month_x_years_ago(years).strftime("%Y-%m-%d"))

        dash_reports = rq.AnonReports(self.in_files, self.out_files)

        dash_reports.post_analysis_tool_count_table_report(
            "psbuilder02a",
            query_name,
            start_date,
            end_date,
            filter,
            first_dimension_list=file_dim,
            second_dimension_list=table_dim)
コード例 #4
0
 def __init__(self):
     self.min_date = None
     self.max_date = None
     self.file_path = '\\\\filer01\\public\\Data_Analytics\\Splunk_PSP_Metrics\\Data\\'
     self.header = [
         'AccountID', 'AccountName', 'Algorithm', 'Search Terms', 'Created',
         'Email', 'Find Position', 'Query Count', 'More Count',
         'PayscaleTitle', 'Queries', 'SalesForceAccountID',
         'SubscriptionType'
     ]
     self.dates = df.DateTimeFormats()
     self.create_dates()
コード例 #5
0
    def run_analysis_tool(self):
        dates = di.DateTimeFormats()
        end_date = dates.find_first_of_current_month().strftime("%Y-%m-%d")
        start_date = dates.find_first_of_month_x_years_ago(2).strftime(
            "%Y-%m-%d")

        query = rq.AnonReports(self.temp_in_files, self.temp_out_files)
        query.post_analysis_tool_query_dim_list_create(
            'localhost',
            'Ryan Job Rollup Suggestor EAC',
            start_date,
            end_date,
            'Location\\Country\\United States{match,1,0}',
            'EAC Measure',
            'Medians',
            first_dimension_list=None,
            second_dimension_list=self.pull_jobs_no_rollups(),
            string_builder_type='Job')
コード例 #6
0
ファイル: unprompted.py プロジェクト: twolock/Analytics
    def __init__(self):
        self.dates = di.DateTimeFormats()
        self.current_date = self.dates.find_first_of_current_month()
        self.current_unprompted = unprompted_path + self.current_date.strftime(
            "%Y%m") + " Unprompted Strings List.csv"
        self.years_to_run = 2

        self.start_date = self.dates.find_first_of_month_x_years_ago(
            self.years_to_run)
        #self.start_date = datetime.datetime(2014, 10, 30)
        self.overall_list = []
        self.individual_list = []
        self.final_dict = {}
        self.individual_dict = {}
        self.individual_length = None
        self.header = [
            'Job Chosen', 'Unprompted String', 'Joint Count', 'CFM Count',
            'UNC Count', 'Mutual Information', 'Word Similarity Score'
        ]
コード例 #7
0
    def run_overall_table_queries(self,
                                  query_name,
                                  file_dim,
                                  table_dim,
                                  years,
                                  filter=""):
        """
        :param query_name:
            Title of the file you're going to create
        :param file_dim:
            The file dimension you want to build
        :param table_dim:
            The table dimension you want to build
        :param years:
            The number of years back you want the query to start
        """
        datage = dates.DateTimeFormats()

        end_date = str(
            datage.find_first_of_current_month().strftime("%Y-%m-%d"))
        start_date = str(
            datage.find_first_of_month_x_years_ago(years).strftime("%Y-%m-%d"))

        dash_reports = rq.AnonReports(self.in_files, self.out_files)

        dash_reports.post_analysis_tool_query_dim_list_create(
            'psstats03',
            query_name,
            start_date,
            end_date,
            filter,
            "Profile Age Measure",
            'Count',
            file_dim,
            table_dim,
        )
コード例 #8
0
ファイル: serialize_xml.py プロジェクト: twolock/Analytics
    def build_count_table_counts_file(self):

        root = ET.Element("AnalysisConfig")
        root.set('xmlns:xsd', "http://www.w3.org/2001/XMLSchema")
        root.set('xmlns:xsi', "http://www.w3.org/2001/XMLSchema-instance")

        index_server = ET.SubElement(root, 'IndexServer')
        index_server.text = self.index_server

        name = ET.SubElement(root, 'Name')
        name.text = self.input_file

        text_filter = ET.SubElement(root, 'Filter')
        text_filter.text = self.main_filter

        report_def = ET.SubElement(root, 'ReportDef')
        report_def.set('xsi:type', self.report_definition)

        dimension = ET.SubElement(report_def, 'Dimension')
        under_dimension = ET.SubElement(dimension, 'Dimension')

        if self.custom_dimension_tuple == None:
            under_dimension.set('Ref', self.report_measure)
        else:

            build_custom_dimension(ET, under_dimension, self.custom_dimension_tuple)

        ranges = ET.SubElement(dimension, "Ranges")
        ranges.text = self.ranges

        sub_filter = ET.SubElement(dimension, 'Filter')
        sub_filter.text = self.sub_filter

        row_count = ET.SubElement(dimension, 'RowCount')
        row_count.text = self.row_count

        min_per_row = ET.SubElement(dimension, 'MinPerRow')
        min_per_row.text = self.min_per_row

        if self.report_definition != 'SampleDefinition':
            include_profile_answer = ET.SubElement(report_def, 'IncludeProfileAnswer')
            include_profile_answer.text = "false"

        else:
            profile_count = ET.SubElement(report_def, 'ProfileCount')
            profile_count.text = self.profile_count

            sampling_method = ET.SubElement(report_def, 'SamplingMethod')
            sampling_method.text = self.sampling_method

            group_by_first_dim = ET.SubElement(report_def, 'GroupByFirstDim')
            group_by_first_dim.text = self.group_by_first_dim

            row_names = ET.SubElement(report_def, 'RowNames')
            row_names.text = self.row_names

            add_dim_def = ET.SubElement(report_def, 'AddDimDefinition')
            add_dim_def.text = self.add_dim_definition

        if self.first_dimension_file is None:
            dim_1_list = ET.SubElement(root, 'Dim1List')
            self.dimension_list_build(dim_1_list, 1)
        else:
            dim_1_file = ET.SubElement(root, 'Dim1File')
            dim_1_file.text = self.first_dimension_file

        if self.second_dimension_file is None:
            dim_2_list = ET.SubElement(root, 'Dim2List')
            self.dimension_list_build(dim_2_list, 2)
        else:
            dim_2_file = ET.SubElement(root, 'Dim2File')
            dim_2_file.text = self.second_dimension_file

        part_time = ET.SubElement(root, 'PartTime')
        part_time.text = self.part_time

        unique = ET.SubElement(root, 'Unique')
        unique.text = 'false'  # ATH 4-18-2016 changed to false

        no_profile_list = ET.SubElement(root, 'NoProfileLists')
        no_profile_list.text = 'true'

        pfgid = ET.SubElement(root, 'PFGID')
        pfgid.text = self.pfgid

        show_report_error = ET.SubElement(root, 'ShowReportErrors')
        show_report_error.text = 'false'

        currency = ET.SubElement(root, 'Currency')
        currency.text = 'U.S. Dollar (USD)'

        min_date = ET.SubElement(root, 'MinDate')
        min_date.text = self.min_date

        max_date = ET.SubElement(root, 'MaxDate')
        max_date.text = self.max_date

        last_run = ET.SubElement(root, 'LastRunDate')
        last_run.text = str(df.DateTimeFormats().return_today_as_y_m_d("-")) + "T00:00:00"

        exp_date = ET.SubElement(root, 'ExpirationDate')
        exp_date.text = str(df.DateTimeFormats().return_today_as_y_m_d("-")) + "T00:00:00"

        max_results = ET.SubElement(root, 'MaxResults')
        max_results.text = '1000000'

        alt_search = ET.SubElement(root, 'AltSearchMode')
        alt_search.text = ""

        xmlString = minidom.parseString(ET.tostring(root)).toprettyxml(indent='  ', encoding='utf-8')
        with open(self.output_file, 'w') as w:
            w.write(xmlString)
コード例 #9
0
ファイル: serialize_xml.py プロジェクト: twolock/Analytics
    def build_xml_file(self):

        root = ET.Element("AnalysisConfig")
        root.set('xmlns:xsd', "http://www.w3.org/2001/XMLSchema")
        root.set('xmlns:xsi', "http://www.w3.org/2001/XMLSchema-instance")

        index_server = ET.SubElement(root, 'IndexServer')
        index_server.text = self.index_server

        name = ET.SubElement(root, 'Name')
        name.text = self.input_file

        text_filter = ET.SubElement(root, 'Filter')
        text_filter.text = "job\\job\\forester{match,1,0}"

        report_def = ET.SubElement(root, 'ReportDef')
        report_def.set('xsi:type', "MVariateViewDefinition")

        source = ET.SubElement(report_def, "Source")
        source.set("xsi:type", "ModelParmMVDataSource")

        first_source = ET.SubElement(source, "Source")
        first_source.text = self.country

        variable = ET.SubElement(source, "Variable")
        variable.set("Ref", "Combined TCC Dimension")

        names = ET.SubElement(source, "Names")
        names.text = "Strings"

        joint_dist = ET.SubElement(source, "JointDist")
        joint_dist.set("Ref", "Combined TCC Dimension")

        min_count = ET.SubElement(source, "MinCount")
        min_count.text = "0"

        views = ET.SubElement(report_def, "Views")

        view = ET.SubElement(views, "View")
        view.set("xsi:type", "DataPointMVView")

        sort = ET.SubElement(view, "Sort")
        sort.text = "Alphabetic"

        reverse = ET.SubElement(view, "Reverse")
        reverse.text = "false"

        display_views = ET.SubElement(view, "DisplayViews")
        display_views.text = "false"

        row_count = ET.SubElement(view, "RowCount")
        row_count.text = "0"
        lower_view = ET.SubElement(view, "View")
        lower_view.text = "None"

        first_dimension = ET.SubElement(root, "Dim1File")
        first_dimension.text = "&& ALL"

        second_dimension = ET.SubElement(root, "Dim2File")
        second_dimension.text = '&& ALL'

        part_time = ET.SubElement(root, "PartTime")
        part_time.text = "false"

        unique = ET.SubElement(root, 'Unique')
        unique.text = 'true'

        no_profile_list = ET.SubElement(root, 'NoProfileLists')
        no_profile_list.text = 'true'

        pfgid = ET.SubElement(root, 'PFGID')
        pfgid.text = "false"

        show_report_error = ET.SubElement(root, 'ShowReportErrors')
        show_report_error.text = 'false'

        currency = ET.SubElement(root, 'Currency')
        currency.text = 'U.S. Dollar (USD)'

        min_date = ET.SubElement(root, 'MinDate')
        min_date.text = self.min_date

        max_date = ET.SubElement(root, 'MaxDate')
        max_date.text = self.max_date

        last_run = ET.SubElement(root, 'LastRunDate')
        last_run.text = str(df.DateTimeFormats().return_today_as_y_m_d("-")) + "T00:00:00"

        exp_date = ET.SubElement(root, 'ExpirationDate')
        exp_date.text = str(df.DateTimeFormats().return_today_as_y_m_d("-")) + "T00:00:00"

        max_results = ET.SubElement(root, 'MaxResults')
        max_results.text = '1000000'

        alt_search = ET.SubElement(root, 'AltSearchMode')
        alt_search.text = ""

        xmlString = minidom.parseString(ET.tostring(root)).toprettyxml(indent='  ', encoding='utf-8')
        with open(self.output_file, 'w') as w:
            w.write(xmlString)
コード例 #10
0
ファイル: serialize_xml.py プロジェクト: twolock/Analytics
    def build_overall_table_counts_file(self):

        root = ET.Element("AnalysisConfig")
        root.set('xmlns:xsd', "http://www.w3.org/2001/XMLSchema")
        root.set('xmlns:xsi', "http://www.w3.org/2001/XMLSchema-instance")

        index_server = ET.SubElement(root, 'IndexServer')
        index_server.text = self.index_server

        name = ET.SubElement(root, 'Name')
        name.text = self.input_file

        text_filter = ET.SubElement(root, 'Filter')
        text_filter.text = self.main_filter

        report_def = ET.SubElement(root, 'ReportDef')
        report_def.set('xsi:type', self.report_definition)
        #The Name parameter is apparently not required
        #report_def.set("Name", "Average EAC Overall")

        measure = ET.SubElement(report_def, 'Measure')
        under_measure = ET.SubElement(measure, 'Measure')
        under_measure.set('Ref', self.report_measure)

        fitting = ET.SubElement(measure, "Fitting")
        fitting.text = self.fitting

        columns = ET.SubElement(report_def, 'Columns')
        columns.text = self.return_type

        if self.dimension is None:
            self.build_report_definition(report_def)

        else:
            self.build_table_dimension(report_def)
            summarize = ET.SubElement(report_def, 'Summarize')
            if self.custom_dimension_tuple is None:
                summarize.text = "true"
            else:
                summarize.text = "false"

        if self.first_dimension_file is None:
            dim_1_list = ET.SubElement(root, 'Dim1List')
            self.dimension_list_build(dim_1_list, 1)
        else:
            dim_1_file = ET.SubElement(root, 'Dim1File')
            dim_1_file.text = self.first_dimension_file

        if self.second_dimension_file is None:
            dim_2_list = ET.SubElement(root, 'Dim2List')
            self.dimension_list_build(dim_2_list, 2)
        else:
            dim_2_file = ET.SubElement(root, 'Dim2File')
            dim_2_file.text = self.second_dimension_file

        part_time = ET.SubElement(root, 'PartTime')
        part_time.text = self.part_time

        unique = ET.SubElement(root, 'Unique')
        unique.text = 'false'   # ATH 1/22/2016 changed to false

        no_profile_list = ET.SubElement(root, 'NoProfileLists')
        no_profile_list.text = self.no_profile_lists # ATH 4/18/2016 adding variable here so I can control
        # no_profile_list.text = "false"

        pfgid = ET.SubElement(root, 'PFGID')
        pfgid.text = self.pfgid

        show_report_error = ET.SubElement(root, 'ShowReportErrors')
        show_report_error.text = 'false'

        currency = ET.SubElement(root, 'Currency')
        currency.text = 'U.S. Dollar (USD)'

        min_date = ET.SubElement(root, 'MinDate')
        min_date.text = self.min_date

        max_date = ET.SubElement(root, 'MaxDate')
        max_date.text = self.max_date

        last_run = ET.SubElement(root, 'LastRunDate')
        last_run.text = str(df.DateTimeFormats().return_today_as_y_m_d("-")) + "T00:00:00"

        exp_date = ET.SubElement(root, 'ExpirationDate')
        exp_date.text = str(df.DateTimeFormats().return_today_as_y_m_d("-")) + "T00:00:00"

        max_results = ET.SubElement(root, 'MaxResults')
        max_results.text = '1000000000'

        alt_search = ET.SubElement(root, 'AltSearchMode')
        alt_search.text = ""

        #pretty print ftw! Can finally debug this stuff easier now....
        xmlString = minidom.parseString(ET.tostring(root)).toprettyxml(indent='  ', encoding='utf-8')
        with open(self.output_file, 'w') as w:
            w.write(xmlString)
コード例 #11
0
ファイル: auto_run.py プロジェクト: twolock/Analytics
import data_extract_thread as analysis
import DateFunctions.datetimes_in as dates
import csv
import gui_code_behind as cb
import question_database as db
import Constants.constant_strings as c
import os

data_pull_thread = analysis.ExtractDataThread()
start_date = dates.DateTimeFormats().find_first_of_last_month()
current_month = dates.DateTimeFormats().return_last_month_as_y_m("-")

out_file_path = c.Constants().question_dashboard + current_month + "\\"

if not os.path.exists(out_file_path):
    os.makedirs(out_file_path)

if not data_pull_thread.isRunning():
    data_pull_thread.get_date_non_gui(start_date)
    data_pull_thread.start()


def write_data_to_file():

    file_name = "Question_Dashboard.csv"

    update_line = "Writing to file at %s" % out_file_path
    print update_line
    with open(out_file_path + file_name, 'wb') as W:
        writer = csv.writer(W, lineterminator='\n')
コード例 #12
0
    def last_day_of_month(given_date):

        return dat.DateTimeFormats().return_y_m_d_last_day_of_month(
            given_date, "-")
コード例 #13
0
 def __init__(self):
     self.file_path = '\\\\filer01\\public\\Data_Analytics\\Splunk_PSP_Metrics\\Data\\'
     self.dates = df.DateTimeFormats()
コード例 #14
0
    def last_datetime_day_of_month(given_date):

        return dat.DateTimeFormats().return_datetime_y_m_d_last_day_month(
            given_date).date()
コード例 #15
0
ファイル: get_alumni_data.py プロジェクト: twolock/Analytics
from PyQt4 import QtCore
import AnalysisTool.deserialize_xml as my_xml
import RyanBot.DataDashboard.dashboard_support as ds
dash = ds.DashboardSupport()
import AnalysisTool.extract_data_xml as extract
import Constants.constant_strings as cs
const = cs.Constants()
import DateFunctions.datetimes_in as dat
dates = dat.DateTimeFormats()
import AnalysisTool.run_queries as rq
import datetime
import os
import csv

report_measure = 'EAC Measure'
return_type = 'Medians'
ranges = 'Fixed'

file_dimension = {
    1: 'Alumni Analytics File Dimension 1',
    2: 'Alumni Analytics EducationalLevelAffiliateSchool',
    3: 'Alumni Analytics File Dimension YG',
    4: '&& ALL',
    5: '&& ALL'
}

table_dimension = {
    1: 'Alumni Analytics Table Dimension 1',
    2: 'Alumni Analytics Table Dimension Jobs',
    3: 'Alumni Analytics Table Dimension YG',
    4: 'Alumni Analytics Schools',
コード例 #16
0
    def analyze_jobs(self):

        print "Analyzing Pay"

        for row in self.suggestions_list:

            first_job = self.look_up[row[0]]

            try:
                rollup = self.rollup_dict[first_job]
                first = self.job_eac_dict[rollup]
            except KeyError:
                rollup = 'None'

                try:
                    first = self.job_eac_dict[first_job]
                except KeyError:
                    first = 'N/A'

            second_job = row[1]
            mi = row[2]

            try:
                second = self.job_eac_dict[second_job]
            except KeyError:
                second = 'N/A'

            if first == 'N/A' or second == 'N/A':
                difference = 'N/A'
                diff_error = 'N/A'

            if first != 'N/A' and second != 'N/A':
                try:
                    difference = abs((float(first[0]) - float(second[0])) /
                                     float(second[0]))
                    diff_error = self.calculate_difference_error(
                        first_job, second_job)
                except ValueError:
                    print first[0], second[0]

            rollup_key_check = self.rollup_dict.keys()

            if first_job in rollup_key_check:
                rollup = self.rollup_dict[first_job]

            try:
                examined_job_rollup = self.rollup_dict[second_job]
            except KeyError:
                examined_job_rollup = second_job

            if examined_job_rollup == rollup:
                continue

            if isinstance(first[0], float) is not True or isinstance(
                    second[0], float) is not True:
                final = row[
                    0], first_job, second_job, mi, rollup, examined_job_rollup, first[
                        0], second[0]

            if isinstance(first[0], float) is True and isinstance(
                    second[0], float) is True:
                final = row[0], first_job, second_job, mi, rollup, examined_job_rollup, round(float(first[0]), 2),\
                    round(float(second[0]), 2)

            if difference > .2 and difference > diff_error:
                continue

            elif .2 < difference <= diff_error:
                self.final_list.append(final)

            else:
                self.final_list.append(final)

        print "Writing to file"

        write_date = str(di.DateTimeFormats().return_today_as_y_m("-"))

        with open(
                final_suggestions + write_date + " Job Rollup Suggestions.csv",
                'w') as f:
            writer = csv.writer(f, lineterminator='\n')

            header = [
                'Unprompted String', 'Matched Job', 'Job Being Evaluated',
                'Mutual Information', 'Rollup for Matched Job',
                'Rollup for Job Being Evaluated', 'Matched Job EAC',
                'Job Being Evaluated EAC'
            ]

            writer.writerow(header)

            for row in self.final_list:

                final = row[0], row[1], row[2], row[3], row[4], row[5], row[
                    6], row[7]

                writer.writerow(final)
コード例 #17
0
ファイル: site_abuse.py プロジェクト: TheBigGinge/Analytics
import hmac
import hashlib
from datetime import datetime
import DateFunctions.datetimes_in as df
import SqlTools.payscale_replicated as pr
import datetime
import urllib
import json
"""Script to find all the main site abusers by IP"""
out_path = '\\\\filer01\\public\\Data_Analytics\\SiteAbusers\\'
data = pr.SqlServer("SiteStats")
ip_results = {}

dates = df.DateTimeFormats()

first_of_last = dates.return_ymd_first_of_last_month("-")
last_of_last = dates.return_ymd_last_day_of_last_month().strftime("%Y-%m-%d")
#Main SQl Query. Need to be authenticated to the firewall

query = '''
declare     @StartDate datetime
declare     @EndDate datetime
declare     @pageName varchar(255)
declare     @codeName varchar(255)
declare     @codeVal varchar(255)

set @StartDate = '{0}'

set @EndDate = '{1}'
set @pageName = '/mypayscale.aspx'
set @codeName = 'ProfileGuid'