def pathValidation(cls,path): cls.path = path Log.info("HDFS path validation:") (ret, out, err) = cls.command(['hdfs', 'dfs', '-ls', path]) if ret ==1: Log.error("HDFS path Error. Exiting the Application..") Log.error(err) Log.exit() else: Log.info("Valid HDFS path")
def validateMagnitude(cls, arg): try: magnutide = float(arg) if 0 <= magnutide <= 8: return magnutide else: Log.exit() except: Log.error( "invalid magnitude input, value: '{}'. You can only pass magnitude values from '0.0' to '8.0'. Exciting the application..".format( arg)) Log.exit()
def Read(cls): try: with open(r'../../conf/earthquakes-application.yaml') as file: configuration = yaml.load(file, Loader=yaml.FullLoader) Log.info("Loading configuration from earthquakes-application.yaml") Log.info("values: {}".format(configuration)) history_args, hive_args = cls.Evaluate(configuration) return history_args, hive_args except EnvironmentError as error: Log.error("Configuration can not be loaded.") Log.error(error) Log.exit()
def validateYear(cls, arg): now = datetime.utcnow() currentYear = now.year try: year = int(arg) if 1900 <= year <= currentYear: return year else: Log.exit() except: Log.error( "invalid year input, value: '{}'. You can only pass year values from '1900' to '{}'. Exciting the application..".format( arg, currentYear)) Log.exit()
def filesInPath(cls, path): cls.path = path Log.info("HDFS path validation:") (ret, out, err) = cls.command(['hdfs', 'dfs', '-ls', path]) if ret == 1: Log.error("HDFS path Error. Exiting the Application..") Log.error(err) Log.exit() else: Log.info("Valid HDFS path") lines = out.splitlines() for line in lines: line_split = line.split(' ') line_len = len(line_split) file_exists = re.search('.*csv$', line_split[line_len - 1]) if file_exists: cls.files.append(line_split[line_len - 1])
def toList(cls, args1, args2): yearsList = [] if args2 is None: yearsTempList = str(args1).split(",") for year in yearsTempList: yearInt = cls.validateYear(year) if yearInt not in yearsList: yearsList.append(yearInt) yearsList.sort() elif args2 is not None: fromYear = cls.validateYear(args1) toYear = cls.validateYear(args2) if fromYear > toYear: Log.error("Input Error. 'from-year' value must be less that 'to-year' value. Exiting the application..") Log.exit() else: for year in range(fromYear, toYear + 1): yearsList.append(year) return yearsList
def Evaluate(cls, configuration): history_args = {} hive_args = {} for config, values in configuration.items(): if config == 'hdfs-path': history_args['--hdfs-path='] = values hive_args['--hdfs-path='] = values elif config == 'download-list-of-years': list_of_years = "" if values is not None: for value in values: if list_of_years == "": list_of_years = str(value) else: list_of_years = list_of_years + "," + str(value) if list_of_years is not "": history_args['--year='] = list_of_years elif config == 'download-group-of-years': if values is not None: if len(values) == 2: history_args['--from-year='] = str(values[0]) history_args['--to-year='] = str(values[1]) else: Log.error("Property 'download-group-of-years' can take only two values, from-year, to-year") Log.exit() elif config == 'download-magnitude-over': history_args['--magnitude-over='] = str(values) elif config == 'download-again-historical-data': if values is True: history_args['--download-again'] = "" elif config == 'hive-drop-all-tables': hive_args['--drop-tables'] = values if '--hdfs-path=' in history_args.keys(): if history_args['--hdfs-path='] is None: Log.error("You must specify an HDFS path for the data to be stored.") Log.exit() else: Log.error("You must specify an HDFS path for the data to be stored.") Log.exit() if '--year=' in history_args.keys() and ( '--from-year=' in history_args.keys() or '--to-year=' in history_args.keys()): Log.error( "You can not pass values for both 'download-list-of-years' and 'download-group-of-years'. Chose one of this options.") Log.exit() return history_args, hive_args
def getValues(cls, inputArgs): Log.info("input arguments: {}".format(inputArgs)) options = "p:d" longOptions = ["hdfs-path=", "drop-tables"] try: opts, args = getopt.getopt(inputArgs, options, longOptions) except getopt.GetoptError as err: Log.error(err) Log.exit() hdfsPathFlag = False hdfsPathArg = None dropTablesFlag = False for opt, arg in opts: Log.info("processing option: {} with arguments: {}".format( opt, arg)) if opt in ("-p", "--hdfs-path"): if hdfsPathFlag: cls.notUniqueArg() else: hdfsPathFlag = True hdfsPathArg = arg elif opt in ("-d", "--drop-tables"): if dropTablesFlag: cls.notUniqueArg() else: dropTablesFlag = True if hdfsPathFlag is False: Log.error( "Input Error. You must specify a valid HDFS path. Exiting the application.." ) Log.exit() else: HDFS.filesInPath(hdfsPathArg) return dropTablesFlag
def notUniqueArg(cls): Log.error( "Input Error. Can't pass one argument twice. Exiting the application.." ) Log.exit()
def getValues(cls, inputArgs): Log.info("input arguments: {}".format(inputArgs)) options = "y:f:t:m:p:d" longOptions = ["year=", "from-year=", "to-year=", "magnitude-over=", "download-again","hdfs-path="] try: opts, args = getopt.getopt(inputArgs, options, longOptions) except getopt.GetoptError as err: Log.error(err) Log.exit() yearFlag = False yearArg = None fromYearFlag = False fromYearArg = None toYearFlag = False toYearArg = None magnOverFlag = False magnOverArg = None overwriteFlag = False hdfsPathFlag = False hdfsPathArg = None for opt, arg in opts: Log.info("processing option: {} with arguments: {}".format(opt,arg)) if opt in ("-p", "--hdfs-path"): if hdfsPathFlag: cls.notUniqueArg() else: hdfsPathFlag = True hdfsPathArg = arg elif opt in ("-y", "--year"): if yearFlag: cls.notUniqueArg() else: yearFlag = True yearArg = arg elif opt in ("-f", "--from-year"): if fromYearFlag: cls.notUniqueArg() else: fromYearFlag = True fromYearArg = arg elif opt in ("-t", "--to-year"): if toYearFlag: cls.notUniqueArg() else: toYearFlag = True toYearArg = arg elif opt in ("-m", "--magnitude-over"): if magnOverFlag: cls.notUniqueArg() else: magnOverFlag = True magnOverArg = arg elif opt in ("-d", "--download-again"): if overwriteFlag: cls.notUniqueArg() else: overwriteFlag = True if hdfsPathFlag is False: Log.error("Input Error. You must specify a valid HDFS path. Exiting the application..") Log.exit() else: HDFS.pathValidation(hdfsPathArg) fromToOption = False yearOption = False if fromYearFlag and toYearFlag and not yearFlag: fromToOption = True elif not fromYearFlag and not toYearFlag and yearFlag: yearOption = True else: Log.error("Input Parameters Error.\r\n" \ "You must pass parameters in one of the following formats:\r\n" \ "Example with a range of values: '--from-year=2010 --to-year=2020'\r\n" \ "Example with a list of unique values: '--year=2010,2011,2012'\r\n" \ "Exiting the application..") Log.exit() if fromToOption: fromYearInt = cls.validateYear(fromYearArg) toYearInt = cls.validateYear(toYearArg) yearsList = cls.toList(fromYearInt, toYearInt) elif yearOption: yearsList = cls.toList(yearArg, None) if magnOverArg is None: magnOverArg = 0 magnitudeOver = cls.validateMagnitude(magnOverArg) return yearsList, magnitudeOver, overwriteFlag