Esempio n. 1
0
    def __init__(self):
        self.report_path = "\\\\psstats03\\reports\\"
        self.temp_in_files = "\\\\filer01\\public\\Data_Analytics\\Data_Distributions\\temporary_files\\in_files\\"
        self.temp_out_files = "\\\\filer01\\public\\Data_Analytics\\Data_Distributions\\temporary_files\\out_files\\"
        self.zip_path = "\\\\filer01\\public\\Data_Analytics\\Data Distributions\\temporary_files\\out_files\\"

        self.most_recent_sunday = dates.DateFunctions(
        ).find_most_recent_sunday()
        self.most_recent_sunday = dates.DateFunctions(
        ).datetime_to_string_y_m_d(self.most_recent_sunday, "-")
        self.first_of_month = dates.DateFunctions(
        ).return_ymd_first_of_the_current_month("-")

        self.monthly_zip = [
            ("Ryan Data Dist ONET 40.zip", 'onet_forty'),
            ("Ryan Data Dist ONET 40 Overall.zip", 'onet_forty_overall'),
            ("Ryan Data Dist Month by Month NAICS 40.zip", 'naics_codes'),
            ("Ryan Data Dist Month by Month NAICS 40 Overall.zip",
             'naics_codes_overall'),
            ("Ryan Data Dist Metros.zip", 'metro_main'),
            ("Ryan Data Dist Metros Overall.zip", 'metro_main_overall'),
            ("Ryan Data Dist Degree Affinities.zip", 'degree_affinities'),
            ("Ryan Data Dist Degree Affinities Overall.zip",
             'degree_affinities_overall'),
            ("Ryan Data Dist Company Sizes.zip", 'company_size'),
            ("Ryan Data Dist Company Sizes Overall.zip",
             'company_size_overall'),
            ("Ryan Data Dist Age Buckets.zip", 'age_ranges'),
            ("Ryan Data Dist Age Buckets Overall.zip", 'age_ranges_overall'),
            ("Ryan Data Dist YE.zip", 'years_experience'),
            ("Ryan Data Dist YE Overall.zip", 'years_experience_overall'),
            ("Ryan Data Dist Rollups.zip", 'rollups'),
            ("Ryan Data Dist Rollups Overall.zip", 'rollups_overall')
        ]
Esempio n. 2
0
    def find_weekly_date_values():

        closest_sunday = dates.DateFunctions().find_most_recent_sunday()
        weeks_prior = dates.DateFunctions().create_prior_date_by_weeks(
            closest_sunday, 6)

        closest_sunday = str(closest_sunday.strftime("%Y%m%d"))
        weeks_prior = str(weeks_prior.strftime("%Y%m%d"))

        return closest_sunday, weeks_prior
Esempio n. 3
0
 def switchDates(self, weekly):
     if weekly is True:
         self.startDateLabel.setText('From Week starting ' + str(df.DateFunctions().end_week(self.startDate.date()
                                                                                             .toPyDate())))
         self.startDate.setDisplayFormat('MM.dd.yyyy')
         self.endDateLabel.setText('Through Week starting ' + str(df.DateFunctions().end_week(self.endDate.date()
                                                                                              .toPyDate())))
         self.endDate.setDisplayFormat('MM.dd.yyyy')
     else:
         self.startDateLabel.setText('From 1st of ')
         self.startDate.setDisplayFormat('MM.yyyy')
         self.endDateLabel.setText('Through 31st of')
         self.endDate.setDisplayFormat('MM.yyyy')
Esempio n. 4
0
    def update_database_with_monthly_values(self,
                                            dictionary,
                                            data_table,
                                            mapping=None,
                                            rollup_metro=False):

        conn = sqlite3.connect(self.sql_data_base)
        c = conn.cursor()

        if rollup_metro is False:
            print('Placing data into ' + data_table + ' \n')
            for keys in dictionary:
                use_date = daten.DateFunctions().return_full_year_month_day(
                    str(keys))
                category = set()
                [
                    category.add(new_key) for new_key in dictionary[keys]
                    if new_key is not None
                ]
                for i, value in enumerate(category):
                    profiles = dictionary[keys][value]

                    if mapping is None:
                        c.execute(
                            'INSERT INTO ' + data_table + ' VALUES (?,?,?)',
                            (use_date, value, profiles))
                    if mapping is not None:
                        c.execute(
                            'INSERT INTO ' + data_table + ' VALUES (?,?,?)',
                            (use_date, mapping[value], profiles))

        elif rollup_metro is True:
            for keys in dictionary:
                use_date = daten.DateFunctions().return_full_year_month_day(
                    keys)
                category = set()
                [
                    category.add(new_key) for new_key in dictionary[keys]
                    if new_key is not None
                ]
                for i, value in enumerate(category):
                    data_list = dictionary[keys][value]
                    for pair in data_list:
                        rollup = pair[0]
                        profiles = pair[1]

                        c.execute(
                            'INSERT INTO ' + data_table + ' VALUES (?,?,?,?)',
                            (use_date, rollup, mapping[value], profiles))
        conn.commit()
        conn.close()
Esempio n. 5
0
    def extract_data(self, full_zip_file, file_name):

        current_year_month = dates.DateFunctions().return_year_month_as_string(
            "")

        if current_year_month in file_name:
            pass
        else:
            full_zip_file.extract(file_name, self.working_directory)
            print "Working on " + file_name

            with open(self.working_directory + "\\" + file_name) as reader:
                for row in csv.reader(reader, delimiter=',', quotechar='"'):
                    #Get rid of bad rows
                    if len(row) < 16:
                        continue
                    else:
                        date = file_name[len('JobTitleMatchingV2_'
                                             ):len('JobTitleMatchingV2_') + 10]
                        mon, day, year = date[4:6], date[6:8], date[0:4]
                        final_date = year + '-' + mon

                        data = self.data_pull(row)
                        if data[6] is False:
                            self.is_internal_data.append(data)
                            self.is_internal_dates.append(final_date)
                        else:
                            self.is_customer_data.append(data)
                            self.is_customer_dates.append(final_date)
Esempio n. 6
0
    def create_weekly_statistics(self, active_list, total_list):

        current_distribution = []
        past_distribution = dict()
        past_distribution['active'] = 0
        past_distribution['total'] = 0
        last_sunday, six_weeks_prior = self.find_weekly_date_values()
        two_sundays_ago = str(
            dates.DateFunctions().find_date_two_sundays_ago().strftime(
                "%Y%m%d"))

        for i in xrange(0, len(active_list) - 1):
            if str(active_list[i][0]) == two_sundays_ago:
                current_distribution.append(active_list[i][1])
                current_distribution.append(total_list[i][1] -
                                            active_list[i][1])
            else:
                past_distribution["active"] += active_list[i][1]
                past_distribution['total'] += total_list[i][1]

        current_distribution.append(past_distribution['active'])
        current_distribution.append(past_distribution['total'] -
                                    past_distribution['active'])

        current_ratio = float(current_distribution[0]) / float(
            current_distribution[1])
        past_ratio = float(past_distribution['active']) / float(
            past_distribution['total'])

        stats = chi.ChiSquaredIndependence(current_distribution)
        stats.calculate_expected_values()
        final_stats = stats.calculate_chi_square()

        return last_sunday, current_distribution[0], current_distribution[1], current_ratio, current_distribution[2], \
            current_distribution[3], past_ratio, final_stats[0], final_stats[1], current_ratio - past_ratio
Esempio n. 7
0
    def solver_alt(self):
        self.answer = gui_classes.AnswerPopup()
        begin = df.DateFunctions().end_week(
            self.dates.startDate.date().toPyDate())
        end = df.DateFunctions().end_week(self.dates.endDate.date().toPyDate())

        self.data = pull_stats.Solver(
            self.combo.currentIndex(), begin, end,
            self.variables.variable_box.currentText(),
            self.variables.metroCountry.currentText(),
            self.variables.metroSt.currentText(),
            self.variables.metroCity.currentText())

        self.data.dist.canvas.setParent(self.answer)
        self.answer.setLayout(self.data.v_box_data)
        self.answer.show()
Esempio n. 8
0
 def __init__(self, min_date, max_date):
     self.host = 'digger'
     self.user = '******'
     self.password = '******'
     self.port = 8089
     self.date_format = df.DateFunctions()
     self.min_date = min_date
     self.max_date = max_date
     self.search_string = "search sourcetype=PSPJobMatching | table AccountID, AccountName, Algorithm, AllQueries," \
                          " Created, Email, JobIndex, NumSearches, NumViewMore, PayscaleTitle, Query, " \
                          "SalesForceAccountID, SubscriptionType"
Esempio n. 9
0
    def update_database_with_weekly_values(self,
                                           count_dict,
                                           data_table,
                                           medians=False):
        #Inserting data into the created tables
        if medians is False:
            print('Placing data into ' + data_table + ' \n')
            conn = sqlite3.connect(self.sql_data_base)
            c = conn.cursor()

            for keys in count_dict:
                use_date = daten.DateFunctions().return_full_year_month_day(
                    str(keys))
                c.execute('INSERT INTO ' + data_table + ' VALUES (?,?)',
                          (use_date, count_dict[keys]))

            conn.commit()
            conn.close()

        elif medians is True:
            print('Placing data into ' + data_table + ' \n')
            conn = sqlite3.connect(self.sql_data_base)
            c = conn.cursor()

            for keys in count_dict:
                use_date = daten.DateFunctions().return_full_year_month_day(
                    str(keys))

                twenty_fifth = count_dict[keys][1]
                median = count_dict[keys][2]
                seventy_fifth = count_dict[keys][3]

                c.execute('INSERT INTO eac_weekly VALUES (?,?,?,?)',
                          (use_date, twenty_fifth, median, seventy_fifth))

            conn.commit()
            conn.close()
Esempio n. 10
0
    def getData(self, query, start, weekly):
        #gets data from query, adds zeros to dates without entry
        if weekly is True:
            start = df.DateFunctions().end_week(start)
            for row in self.c.execute(query):
                if str(row[0]).startswith("9"):
                    #Keeping out values from before 2010
                    continue
                while df.DateFunctions().from_date(
                        row[0]) != start and start < df.DateFunctions(
                        ).from_date(row[0]):
                    #print(start)
                    #self.data_points_x.append(start)
                    #self.data_points_y.append(0)
                    start = start + relativedelta(days=+7)
                self.data_points_x.append(df.DateFunctions().from_date(row[0]))
                self.data_points_y.append(row[1])
                start = df.DateFunctions().from_date(
                    row[0]) + relativedelta(days=+7)

        else:

            for row in self.c.execute(str(query)):
                while df.DateFunctions().from_date(
                        row[0]) != start and start < df.DateFunctions(
                        ).from_date(row[0]):
                    #self.data_points_x.append(start)
                    #self.data_points_y.append(0)
                    start = start + relativedelta(months=+1)
                self.data_points_x.append(df.DateFunctions().from_date(row[0]))
                self.data_points_y.append(row[1])
                start = df.DateFunctions().from_date(
                    row[0]) + relativedelta(months=+1)
        '''puts data in box for user retrieval'''
        text = QtGui.QTextEdit()
        nums = ""
        for i in range(len(self.data_points_x)):
            nums = nums + str(self.data_points_x[i]) + "\t" + str(
                self.data_points_y[i]) + "\n"
        text.setText(nums)
        self.hboxNums.addWidget(QtGui.QLabel("Data points"))
        self.hboxNums.addWidget(text)
Esempio n. 11
0
    def __init__(self, start, end, rollup, country, state, city):
        super(RollupMetro, self).__init__()

        self.key = self.getMetroKey(country, state, city)
        query = "SELECT date, profiles FROM metro_rollups"+" WHERE date >= " + \
                start+" AND date <= "+end+" AND rollup='"+rollup + \
                "' AND metro_key="+self.key+" ORDER BY date ASC"
        self.getData(query, df.DateFunctions().from_date(start), False)
        self.graph()
        self.fig.suptitle(rollup + ' Rollup in ' + city + ', ' + state +
                          ' Metro Area Profiles')

        self.start = start
        self.end = end
        self.rollup = rollup
Esempio n. 12
0
    def __init__(self, start, end, country=0, state=0, city=0, metrokey=0):
        super(Metro, self).__init__()

        if metrokey == 0:
            key = self.getMetroKey(country, state, city)
        else:
            key = metrokey
        query= "SELECT date, profiles FROM metro_main WHERE date >= "+start+\
               " AND date <= "+end+" AND key="+key+" ORDER BY date ASC"
        self.getData(query, df.DateFunctions().from_date(start), False)
        self.graph()
        if city != 0:
            self.fig.suptitle(city + ', ' + state + ' Metro Area Profiles')
        else:
            self.fig.suptitle('All Metro Area Profiles')

        self.start = start
        self.end = end
Esempio n. 13
0
    def find_monthly_date_values():
        first_of_month = dates.DateFunctions(
        ).return_ymd_first_of_the_current_month("")

        current_month = int(first_of_month[4:6])
        current_year = int(first_of_month[:4])

        for i in xrange(0, 6):
            current_month -= 1

            if current_month == 0:
                current_month += 12
                current_year -= 1

        current_month = str(current_month)
        if len(current_month) == 1:
            current_month = "0" + current_month

        return str(current_year) + current_month + "01", first_of_month
Esempio n. 14
0
    def __init__(self, index, start, end, other):
        super(MonthlyDist, self).__init__()

        tables = [
            "company_size", "degree_affinities", "naics_codes", "age_ranges",
            "onet_forty", "years_experience", "rollups"
        ]

        self.table = tables[index - 4]
        columns = [
            "size_range", "level", "code", "age_range", "code",
            "experience_range", "name"
        ]
        self.column = columns[index - 4]
        names = [
            'Company Size', 'Degree Level', 'NAICS Code', 'Years Old',
            'ONET Code', 'Years Experience', 'Rollup'
        ]
        if index == 6:
            name = other[6:]
            other = other[:3]
        if index == 8:
            name = other[13:]
            other = other[:10]
        self.index = index
        self.other = other
        query = "SELECT date, profiles FROM " + self.table + " WHERE date >= " \
                + start + " AND date <= " + end + " AND " + self.column + "='" + other + \
                "' ORDER BY date ASC"
        self.getData(query, df.DateFunctions().from_date(start), False)
        self.start = start
        self.end = end

        self.graph()
        if index == 11:
            self.fig.suptitle(other + ' ' + names[index - 4] + ' Profiles')
        elif index == 6 or index == 8:
            self.fig.suptitle(other + ' ' + name + ' Profiles')
        else:
            self.fig.suptitle(names[index - 4] + ' ' + other + ' Profiles')
Esempio n. 15
0
    def __init__(self, index, start, end):
        super(WeeklyDist, self).__init__()
        tables = [
            "active_profiles", "overall_profiles", "eac_weekly",
            "combined_salary_weekly"
        ]
        self.table = tables[index]
        columns = ["profiles", "profiles", "median", "median"]
        self.column = columns[index]
        names = [
            'All Active Profiles', 'All Profiles', 'Median EAC',
            'Median Combined Salary'
        ]

        #query = "SELECT * FROM " + self.table

        query = "SELECT date, " + self.column + " FROM " + self.table + " WHERE date >= " \
                + start + " AND date <= " + end + " ORDER BY date ASC"
        self.getData(query, df.DateFunctions().from_date(start), True)
        self.start = start
        self.end = end

        self.graph()
        self.fig.suptitle(names[index])
import data_distributions as data_dist
import DateFunctions.date_functions as dates
import data_dist_database as db

analytics_file_path = '\\\\filer01\\public\\Data_Analytics\\Data_Distributions\\temporary_files\\in_files\\'
#Find the most recent sunday. This is for the week by week data pulls
#since the weeks end on sunday
most_recent_sunday = dates.DateFunctions().find_most_recent_sunday()
most_recent_sunday = dates.DateFunctions().datetime_to_string_y_m_d(
    most_recent_sunday, "-")


def find_weekly_query_start_date(table):
    last_run_date = db.DatabaseWork().pull_most_recent_date_value(table) + 1
    last_run_date = str(last_run_date)

    year = last_run_date[:4]
    month = last_run_date[4:6]
    day = last_run_date[6:8]

    return year + "-" + month + "-" + day


def find_monthly_query_start_date(table):
    last_run_date = str(db.DatabaseWork().pull_most_recent_date_value(table))

    year = int(last_run_date[:4])
    month = int(last_run_date[4:6]) + 1

    if month == 13:
        month = 1
Esempio n. 17
0
    def create_monthly_statistics(self, active_list, total_list, category):

        past_actives = {}
        past_totals = {}

        last_month = dates.DateFunctions().return_ymd_first_of_last_month(
            "")[:6]

        current_actives = {}
        current_total = {}

        [
            self.create_monthly_values_dictionary(item, past_actives)
            for item in active_list if last_month not in str(item[0])
        ]
        [
            self.create_monthly_values_dictionary(item, past_totals)
            for item in total_list if last_month not in str(item[0])
        ]
        [
            self.create_monthly_values_dictionary(key_trio, current_actives)
            for key_trio in active_list if last_month in str(key_trio[0])
        ]
        [
            self.create_monthly_values_dictionary(key_trio, current_total)
            for key_trio in total_list if last_month in str(key_trio[0])
        ]

        for keys in past_totals.keys():

            distribution_list = list()

            try:
                current_active_count = current_actives[keys]
            except KeyError:
                current_active_count = 0

            try:
                current_total_count = current_total[keys] - current_active_count
            except KeyError:
                current_total_count = 0

            distribution_list.append(current_active_count)
            distribution_list.append(current_total_count)

            try:
                current_ratio = float(current_active_count) / float(
                    current_active_count + current_total_count)
            except ZeroDivisionError:
                current_ratio = 0

            try:
                past_active_count = past_actives[keys]
            except KeyError:
                past_active_count = 0

            distribution_list.append(past_active_count)
            distribution_list.append(past_totals[keys] - past_active_count)

            past_ratio = float(past_active_count) / float(past_totals[keys])

            pass_fail = True

            for number in distribution_list:
                if number in distribution_list <= 5:
                    pass_fail = False

            if pass_fail is True:
                stats = chi.ChiSquaredIndependence(distribution_list)
                stats.calculate_expected_values()
                final_stats = stats.calculate_chi_square()
            else:
                final_stats = ("N/A", "N/A")

            yield category, keys, current_active_count, current_total_count + current_active_count, current_ratio, \
                past_active_count, past_totals[keys], past_ratio, \
                final_stats[0], final_stats[1], current_ratio - past_ratio
Esempio n. 18
0
    def __init__(self,
                 index,
                 start,
                 end,
                 other_var=0,
                 country=0,
                 state=0,
                 city=0):
        self.v_box_data = None
        if index < 4:
            self.dist = WeeklyDist(
                index,
                df.DateFunctions().return_full_year_month_day(start),
                df.DateFunctions().return_full_year_month_day(end))
        elif index < 11:
            self.dist = MonthlyDist(
                index,
                df.DateFunctions().return_full_year_month_day(
                    df.DateFunctions().start_month(start)),
                df.DateFunctions().return_full_year_month_day(
                    df.DateFunctions().start_month(end)), other_var)
        elif index == 11:
            self.dist = RollupMetro(
                df.DateFunctions().return_full_year_month_day(
                    df.DateFunctions().start_month(start)),
                df.DateFunctions().return_full_year_month_day(
                    df.DateFunctions().start_month(end)), other_var, country,
                state, city)
        elif index == 12:
            self.dist = Metro(
                df.DateFunctions().return_full_year_month_day(
                    df.DateFunctions().start_month(start)),
                df.DateFunctions().return_full_year_month_day(
                    df.DateFunctions().start_month(end)), country, state, city)

        self.dist.special()
        self.layout()
Esempio n. 19
0
import sys
sys.path.append("C:\\hg\\payscale\\users\\ryanm\\PayScaleAnalytics\\")
import datetime
import csv
import os
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText

import DataDistributions.data_dist_database as db
import DateFunctions.date_functions as df
import DataDistributions.data_distributions as dd

alert_writing_path = '\\\\filer01\\public\\Data_Analytics\\Data_Distributions\\Distributions_Alert_Report\\'
today = datetime.datetime.today()
string_today = df.DateFunctions().datetime_to_string_y_m_d(today, "-")

print "Running weekly queries..."
dd.DataDistributions().run_weekly_analysis_tool_queries()
dd.DataDistributions().update_weekly_database()

if today.day < 9:
    print "Running monthly queries..."
    dd.DataDistributions().run_monthly_analysis_tool_queries()
    dd.DataDistributions().update_monthly_database()

monthly_reports = [
    'onet_forty', 'onet_forty_overall', 'naics_codes', 'naics_codes_overall',
    'metro_main', 'metro_main_overall', 'degree_affinities',
    'degree_affinities_overall', 'company_size', 'company_size_overall',
    'age_ranges', 'age_ranges_overall', 'years_experience',
Esempio n. 20
0
import csv

import DateFunctions.date_functions as df

with open("C:\\users\\ryanm\\big_fancy_file\\out.txt", 'rb') as R:
    with open("C:\\users\\ryanm\\big_fancy_file\\fixed_dates.txt", 'wb') as W:

        reader = csv.reader(R, delimiter=',')
        writer = csv.writer(W, delimiter=',', lineterminator='\n')

        header = reader.next()
        writer.writerow(header)

        for i, row in enumerate(reader):
            row_index = header.index("ActivityDateTime")

            test_date = row[row_index]

            formatted_date = df.DateFunctions().rob_date_return(test_date)

            row[row_index] = formatted_date

            writer.writerow(row)