def map_task(self, task): """ Mapping module to match the dictionary key provided from the databse with the application configuration key names representing the same key. Eg. database_name with DATABASE, uid with username etc. rtype: dictionary """ try: task['server_name'] = task.pop('SERVER') task['database_name'] = task.pop('DATABASE_NAME') task['user_name'] = task.pop('USERID') task['password'] = task.pop('PWD') task['sid'] = task.pop('SID') task['port'] = task.pop('PORT') except Exception, e: """ Raise system exit when mapping error. Run again after fixing the mapping error """ Log.writeLog('Exception in Dbua.map_task module. Msg : ' + str(e), strs.ERRO) Helpers.displayText('Exception in Dbua.map_task module. Msg : ' + str(e)) # raise SystemExit('Exception in Dbua.map_task module. Msg : ' + str(e)) return 0
def uploadResultset(self, db_con, input_resulset): """ Upload the resultset here """ cursor = db_con.cursor() columns = [] columns = Helpers.getColumnList(input_resulset) error_count = 0 for record in input_resulset: insert_string = strs.insert_query_to_dbua_audit_report_table.format( record[0], record[1], record[2], record[3], record[4], record[5]) #Log.writeLog(insert_string, strs.INFO) try: cursor.execute(insert_string) db_con.commit() except Exception, e: Log.writeLog( 'Error in executing : ' + insert_string + ' Msgs : ' + str(e), strs.ERRO) Helpers.writeToFile(strs.invalid_entries_csv, '|'.join(map(str, record))) error_count = error_count + 1
def scramblePwd(cls, mode, plain_pwd, enc_pwd): if mode == strs.encrypt: Log.writeLog('encryption module is disabled.', strs.INFO) pass # random_int = randint(0000000000, 9999999999) # cmd_string = './bin/scramble.out "{0:s}" "{1:s}" "{2:s}"'.format(plain_pwd, " ", random_int) # print cmd_string # scramble_cmd = subprocess.Popen(shlex.split(cmd_string), stdout=subprocess.PIPE) # return scramble_cmd.stdout elif mode == strs.decrypt: cmd_string = './bin/scramble.out "{0:s}" "{1:s}" "{2:s}"'.format( " ", enc_pwd.strip(), " ") scramble_cmd = subprocess.Popen(shlex.split(cmd_string), stdout=subprocess.PIPE) for line in scramble_cmd.stdout: plain_pwd = line[10:] scramble_cmd.communicate() return plain_pwd.replace('\n', '').strip() else: """ noting to do """ return None
def command(cls, args_list): Log.info('Running system command: {0}'.format(' '.join(args_list))) proc = subprocess.Popen(args_list, stdout=subprocess.PIPE, stderr=subprocess.PIPE) s_output, s_err = proc.communicate() s_return = proc.returncode return s_return, s_output, s_err
def put(cls,file,path): (ret, out, err) = cls.command(['hdfs', 'dfs', '-put', file, path]) Log.info("return: {}".format(ret)) Log.info("output: {}".format(out)) if ret == 1: Log.error("Error while uploading the file to HDFS: ") Log.error(err) else: Log.info("File successfully uploaded to HDFS")
def createFolder(cls): path = "../../data/earthquakes-history/" try: os.mkdir(path) except OSError: Log.info( "Creation of the data directory %s failed, already exist" % path) else: Log.info("Successfully created data directory %s " % path)
def pathValidation(cls,path): cls.path = path Log.info("HDFS path validation:") (ret, out, err) = cls.command(['hdfs', 'dfs', '-ls', path]) if ret ==1: Log.error("HDFS path Error. Exiting the Application..") Log.error(err) Log.exit() else: Log.info("Valid HDFS path")
def toFile(cls, eq_list, year, d, magnitudeOver): count = 0 with open( '../../data/earthquakes-history/earthquakes{}mag{}.csv'.format( year, magnitudeOver), 'a') as writer: for eq in eq_list: count = count + 1 eq_str = ",".join(eq) writer.write("%s\r\n" % (eq_str)) Log.info("Earthquakes for {} stored to file, records: {}".format( d, count))
def loadEarthquakesData(cls, file): hivevar = "path='" + file + "'" Log.info("Loading earthquakes data to hive:") Log.info(file) (ret, out, err) = System.command([ 'hive', '-hivevar', hivevar, '-f', '../hive_ql/load-earthquakes.hql' ]) Log.info("return, {}".format(ret)) Log.info("output, {}".format(out)) Log.error("error, {}".format(err))
def executeQuery(self, query_string, conn): # Get query self.cursor = conn.cursor() try: self.cursor.execute(query_string) except Exception, e: Log.writeLog('Exception occured in query.executeQuery module. Msg : ' + str(e) + \ '\n Query : ' + query_string, strs.ERRO) return 0
def closeDbConnection(self, db_conn): """ Implemetation of closing database connection task """ try: db_conn.close() return True except Exception, e: Log.writeLog('Exception while closing connection', strs.ERRO) return False
def Read(cls): try: with open(r'../../conf/earthquakes-application.yaml') as file: configuration = yaml.load(file, Loader=yaml.FullLoader) Log.info("Loading configuration from earthquakes-application.yaml") Log.info("values: {}".format(configuration)) history_args, hive_args = cls.Evaluate(configuration) return history_args, hive_args except EnvironmentError as error: Log.error("Configuration can not be loaded.") Log.error(error) Log.exit()
def distanceAllToCities(cls): Log.info("Calculating earthquakes distance to all cities..") (ret, out, err) = System.command( ['hive', '-f', '../hive_ql/distance-to-cities.hql']) Log.info("return, {}".format(ret)) Log.info("output, {}".format(out)) Log.error("error, {}".format(err))
def createEarthquakesTables(cls): Log.info("Creating hive tables:") (ret, out, err) = System.command( ['hive', '-f', '../hive_ql/create-earthquakes-tables.hql']) Log.info("return, {}".format(ret)) Log.info("output, {}".format(out)) Log.error("error, {}".format(err))
def distanceToClosestCity(cls): Log.info("Calculating earthquakes distance to closest city..") (ret, out, err) = System.command( ['hive', '-f', '../hive_ql/distance-to-city-closest.hql']) Log.info("return, {}".format(ret)) Log.info("output, {}".format(out)) Log.error("error, {}".format(err))
def __init__(self, output_formate, input_list): self.input_list = input_list self.output_formate = output_formate # dump file to required formate if self.output_formate == strs.output_formate_csv: self.dumpToCsv(self.input_list) elif self.output_formate == strs.output_formate_excel: self.dumpToExcel(self.input_list) else: Log.writeLog( 'File format not recognized. Formate: ' + str(self.output_formate) + ' : ' + str(output_formate), strs.ERRO)
def dropProcedure(self, conn): """ To drop the procedure executed above """ self.cursor = conn.cursor() try: dropprocedure = strs.drop_inventry self.cursor.execute(dropprocedure) except Exception, e: Log.writeLog( 'Exception in MssqlQuery.dropProcedure module. Msg : ' + str(e), strs.ERRO) return 0
def clearEarthquakesTables(cls): Log.warning( "Option 'drop-tables' is enabled. All data will be removed.") (ret, out, err) = System.command(['hive', '-f', '../hive_ql/clear-tables.hql']) Log.info("return, {}".format(ret)) Log.info("output, {}".format(out)) Log.error("error, {}".format(err))
def distanceToAllSeismographicStations(cls): Log.info( "Calculating earthquakes distance to all seismographic stations..") (ret, out, err) = System.command( ['hive', '-f', '../hive_ql/distance-to-stations.hql']) Log.info("return, {}".format(ret)) Log.info("output, {}".format(out)) Log.error("error, {}".format(err))
def getColumnList(cls, input_resultset): """ We return the list of columns in the input_resultset rtype: list, None """ col_list = [] try: col_desc = input_resultset.description col_count = len(col_desc) except Exception, e: Log.writeLog( 'Exception in Helpers.getColumnList module. Msg : ' + str(e), strs.ERRO) return col_list
class ConfigReader(object): @classmethod def getLogLevel(cls): """ Read the log level from config file rtype: numeric string """ config = ConfigParser.ConfigParser() try: config.read(strs.dbu_conf_file) except Exception, e: Log.writeLog( 'Error reading dbu_conf_file.cfg file. Msg : ' + str(e), strs.ERRO) raise SystemExit('Error reading dbu_conf_file.cfg file. Msg : ' + str(e)) try: # print config.sections() return config.get(strs.dbu_conf_file_section_dbu_log, 'log_level') except Exception, e: Log.writeLog( 'Exception occured while reading log level. Msg : ' + str(e), strs.ERRO) return 0
def executeQuery(self, query_string, conn): # Get query self.cursor = conn.cursor() """ Create procedure to extract information about sql """ try: procedurecreate = strs.create_mssql_proc_inventry self.cursor.execute(strs.drop_inventry_if_exists) self.cursor.execute(procedurecreate) except Exception, e: Log.writeLog( 'Exception in MssqlQuery.executeQuery module. Msg : ' + str(e), strs.ERRO) return 0
def Request(cls, start, end, magnitude_over): eventlet.monkey_patch() with eventlet.Timeout(180): try: with requests.Session() as s: download = s.get( "https://earthquake.usgs.gov/fdsnws/event/1/query?format=csv&starttime={}&endtime={}&minmagnitude={}" .format(start, end, str(magnitude_over))) decoded_content = download.content.decode('utf-8') eq_csv = csv.reader(decoded_content.splitlines(), delimiter=',') eq_list = list(eq_csv) return eq_list except Exception as error: Log.error("Request Error:") Log.error(error)
def getMetaSvrAccessPara(cls): """ rtype: dictionary """ config = ConfigParser.ConfigParser() try: config.read(strs.meta_svr_conf_file) except Exception, e: Helpers.displayText( 'Error reading meta_svr_conf_file.cfg file. Msg : ' + str(e)) Log.writeLog( 'Error reading meta_svr_conf_file.cfg file. Msg : ' + str(e), strs.ERRO) raise SystemExit( 'Error reading meta_svr_conf_file.cfg file. Msg : ' + str(e))
def createDB(cls, path): hivevar = "path='" + path + "/earthquakes.db'" Log.info("Creating hive database: 'earthquakes'") (ret, out, err) = System.command([ 'hive', '-hivevar', hivevar, '-f', '../hive_ql/create-database.hql' ]) Log.info("return, {}".format(ret)) Log.info("output, {}".format(out)) Log.error("error, {}".format(err))
def produceOutputSeismographs(cls): Log.info( "ETL pipeline Output: Join earthquakes with closest city,station and produce seismograph.." ) (ret, out, err) = System.command( ['hive', '-f', '../hive_ql/output-seismograph.hql']) Log.info("return, {}".format(ret)) Log.info("output, {}".format(out)) Log.error("error, {}".format(err))
def CreateDB(cls): db = TinyDB('../../data/hive-etl-pipeline/pipeline_db.json') now = str(datetime.utcnow()) query = Query() record = db.search(query.hiveDB == 'created') if record == []: db.insert({'hiveDB': 'created', 'date': now}) Log.info( "Database updated with record 'hiveDB': Creating application database to Hive" ) create = True else: Log.info( "Database record exists 'hiveDB': application database already exist" ) create = False return create
def dumpToCsv(self, input_list): """ Convert the input list into csv file. Filename: output.csv """ try: with open(strs.output_file_csv, "wb") as f: writer = csv.writer(f) for row in input_list: # modified speciall for this class. writer.writerows(row) return 1 except Exception, e: Log.writeLog('Exception while writing to ' + strs.output_file_csv, strs.ERRO) return 0
def startScript(cls, ): """ Start logics from here """ from configuration import VerifyConfiguration from dbuaclient import DbuaClient Helpers.displayText('script started') Log.writeLog('script started', strs.INFO) # verify config VerifyConfiguration() Helpers.displayText('configuration verified') Log.writeLog('configuration verified', strs.INFO) # start client program DbuaClient()
def getLogLevel(cls): """ Read the log level from config file rtype: numeric string """ config = ConfigParser.ConfigParser() try: config.read(strs.dbu_conf_file) except Exception, e: Log.writeLog( 'Error reading dbu_conf_file.cfg file. Msg : ' + str(e), strs.ERRO) raise SystemExit('Error reading dbu_conf_file.cfg file. Msg : ' + str(e))