Beispiel #1
0
    def StageCodeBookFile(self):

        try:
            self.logger.info("Starting staging codebook file")

            codebookHeaderRow = self.config.get(
                'CodeBook',
                str(self.year) + self.responseClass + "header")
            codebookSheetName = self.config.get(
                'CodeBook',
                str(self.year) + self.responseClass + "sheet")
            codebookDict = pd.read_excel(self.codeBookFile,
                                         index_col=None,
                                         header=int(codebookHeaderRow),
                                         sheet_name=codebookSheetName)
            codebookDF = pd.DataFrame.from_dict(codebookDict)

            self.logger.info("Starting transformation of codebook.")
            codebookDF = self.AddCodesForMissingData(codebookDF)
            codebookDF = codebookDF.drop_duplicates()
            self.cbdf = codebookDF

            self.logger.info(
                "Starting insertion of codebook file data into staging table.")
            with SurveyDatabase.surveyDatabase() as db:
                db.createStagingTableFromDF(
                    self.cbdf,
                    'codebook_' + self.responseClass + str(self.year))

            self.logger.info(
                "Finished insertion of codebook file data into staging table.")

        except Exception as e:
            self.logger.error(e.args[0])
            raise
Beispiel #2
0
def initLogging():
    config = SurveyConfigReader.surveyConfig()
    LogToDB = config.get('Logging', 'LogToDB')

    #DEBUG - Detailed information, typically of interest only when diagnosing problems.
    #INFO - Confirmation that things are working as expected.
    #WARNING - An indication that something unexpected happened, or indicative of some problem in the near future (e.g. ‘disk space low’). The software is still working as expected.
    #ERROR - Due to a more serious problem, the software has not been able to perform some function.
    #CRITICAL - A serious error, indicating that the program itself may be unable to continue running.
    loggingLevel = logging.INFO
    for logger_name in ['surveyLogger']:
        logger = logging.getLogger(logger_name)
        logger.setLevel(loggingLevel)

        if LogToDB == 'True':
            sqlConn = SurveyDatabase.surveyDatabase().sql_conn
            cursor = sqlConn.cursor()
            ch = LogDBHandler(sqlConn, cursor, "stg.log")
        else:
            path = config.get('Logging', 'LoggingDirectory')
            ch = logging.FileHandler(
                path + 'Logging_' +
                datetime.datetime.now().strftime("%Y%m%d") + '.log')
            ch.setFormatter(
                logging.Formatter(
                    '%(asctime)s - %(threadName)s %(filename)s:%(lineno)d - %(funcName)s() - %(levelname)s - %(message)s'
                ))

        ch.setLevel(loggingLevel)
        logger.addHandler(ch)
Beispiel #3
0
    def TransformResponseAndCodeTable(self, rfdf, cbdf):
        #Foreach column in cbdf
        for header in list(rfdf.columns.values):
            rfdf = self.AddParseColumns(header, rfdf, cbdf)

        with SurveyDatabase.surveyDatabase() as db:
            db.createStagingTableFromDF(rfdf,'response_and_code_' + self.responseClass +'_'+str(self.year))
Beispiel #4
0
    def StageResponseFile(self):
        try:
            self.logger.info("Starting staging response file")
            with SurveyDatabase.surveyDatabase() as db:

                #read in where the header row starts
                header_row = int(
                    self.config.get(
                        'Response',
                        str(self.year) + self.responseClass + "header"))
                self.logger.info("Setting header row to: " + str(header_row))
                sheet_name = self.config.get(
                    'Response',
                    str(self.year) + self.responseClass + "sheet")
                self.logger.info("Setting sheet name to: " + sheet_name)

                self.logger.info("Reading in file format for " +
                                 str(self.year))
                readFormat = self.config.get(
                    'Response',
                    str(self.year) + self.responseClass + "format")

                if readFormat == 'excel':
                    response_sheet_name = self.config.get(
                        'Response',
                        str(self.year) + self.responseClass + "sheet")
                    self.logger.info("Setting sheet name to: " +
                                     response_sheet_name)
                    self.logger.info('Reading in survey excel file from: ' +
                                     self.responseFile)
                    rfdf = pd.read_excel(self.responseFile,
                                         index_col=None,
                                         header=header_row,
                                         sheet_name=response_sheet_name)
                elif readFormat == 'database':
                    self.logger.info('Reading in survey from database table')
                    rfdf = db.executeAndPandas("SELECT * FROM [stg].[" +
                                               self.responseFile + "]")
                elif readFormat == 'csv':
                    self.logger.info('Reading in survey csv file from: ' +
                                     self.responseFile)
                    rfdf = pd.read_csv(self.responseFile,
                                       index_col=None,
                                       header=header_row)
                else:
                    raise Exception('Unknown Format Type')

                self.logger.info(
                    "Starting insertion of response file data into staging table."
                )
                db.createStagingTableFromDF(rfdf,
                                            'Survey_file_' + str(self.year))
                self.logger.info(
                    "Finished insertion of response file data into staging table."
                )

                self.rfdf = rfdf
        except Exception as e:
            self.logger.error(e.args[0])
            raise
Beispiel #5
0
 def ProcessHouseholdFactTable(self):
     try:
         with SurveyDatabase.surveyDatabase() as db:
             db.execute("exec HHSurvey.mergeHouseholdFact" + str(self.year))
         return True
     except Exception as e:
         self.logger.error(e.args[0])
         raise
Beispiel #6
0
 def ProcessTripDim(self):
     try:
         with SurveyDatabase.surveyDatabase() as db:
             db.execute("exec HHSurvey.merge_trip_dim_" + str(self.year))
             #upsert logic instead of sql logic
     except Exception as e:
         self.logger.error(e.args[0])
         raise
Beispiel #7
0
 def ProcessFactTable(self):
     try:
         with SurveyDatabase.surveyDatabase() as db:
             db.execute("exec HHSurvey.merge_" + self.responseClass +
                        "_fact_" + str(self.year))
         return True
     except Exception as e:
         self.logger.error(e.args[0])
         raise
Beispiel #8
0
 def LoadDims(self):
     try:
         self.logger.info("Starting loading {}_dim".format(self.responseClass))
         sproc_name = 'HHSurvey.merge_{}_dim_{}'.format(self.responseClass, str(self.year))
         with SurveyDatabase.surveyDatabase() as db:
             db.execute("exec {}".format(sproc_name))
         self.logger.info("Finished loading {}_dim".format(self.responseClass))
     except Exception as e:
         self.logger.error(e.args[0])
         raise
Beispiel #9
0
    def ProcessMapping(self, df):
        with SurveyDatabase.surveyDatabase() as db:
            self.logger.info("Pulling in the mapping table")
            mappingDF = db.pullMappingTable("File_"+ str(self.year))

            #Convert to Dictionary
            mappingDict = mappingDF.set_index("Orginal_Names")["Master_Names"].to_dict()

            self.logger.info("Renaming column to master names")
            df = df.rename(index=str, columns=mappingDict)
        return True
Beispiel #10
0
    def TransformResponseAndCodeTable(self, rfdf, cbdf):
        try:
            #Foreach column in cbdf
            for header in list(rfdf.columns.values):
                rfdf = self.AddParseColumns(header, rfdf, cbdf)

            with SurveyDatabase.surveyDatabase() as db:
                db.createStagingTableFromDF(rfdf,'response_and_code_'+self.responseClass+'_'+str(self.year))

        except Exception as e:
            self.logger.error('header=' + header + e.args[0])
            raise
Beispiel #11
0
 def ProcessHouseHoldDim(self):
     try:
         with SurveyDatabase.surveyDatabase() as db:
             db.execute("exec HHSurvey.merge_household_dim_" + str(self.year))
             #upsert logic instead of sql logic
             #TODO: delete existing year data
             #TODO format df to HouseholdDim structure
             #TODO: append data to dim table
             #appendTableFromDF('dbo',df, 'HouseholdDim'):
         return True
     except Exception as e:
         self.logger.error(e.args[0])
         raise
Beispiel #12
0
    def StageCodeBookFile(self):

        try:
            self.logger.info("Starting staging codebook file")

            with SurveyDatabase.surveyDatabase() as db:
                codebookHeaderRow = self.config.get(
                    'CodeBook',
                    str(self.year) + self.responseClass + "header")
                codebookSheetName = self.config.get(
                    'CodeBook',
                    str(self.year) + self.responseClass + "sheet")

                codebookDict = pd.read_excel(self.codeBookFile,
                                             index_col=None,
                                             header=int(codebookHeaderRow),
                                             sheet_name=codebookSheetName)
                codebookDF = pd.DataFrame.from_dict(codebookDict)

                self.logger.info(
                    "Starting insertion of codebook file data into staging table."
                )
                db.createStagingTableFromDF(
                    codebookDF,
                    'codebook_' + self.responseClass + str(self.year))
                self.logger.info(
                    "Finished insertion of codebook file data into staging table."
                )

                self.logger.info("Starting transformation of codebook.")
                codebookDF = codebookDF.replace('Valid Values', pd.np.nan)
                codebookDF = codebookDF.replace('Labeled Values', pd.np.nan)
                codebookDF[['order',
                            'Field']] = codebookDF[['order', 'Field'
                                                    ]].fillna(method='ffill')
                if (self.year == '2017' and self.responseClass == 'trip'):
                    codebookDF = self.AddSpecialCodeBookRows(codebookDF)
                self.cbdf = codebookDF[['Field', 'Variable', 'Value']]
                self.logger.info("Finished transformation of codebook.")
        except Exception as e:
            self.logger.error(e.args[0])
            raise
Beispiel #13
0
 def ProcessPersonDim(self):
     with SurveyDatabase.surveyDatabase() as db:
         db.execute("exec HHSurvey.merge_person_dim_" + str(self.year))