Esempio n. 1
0
 def pathValidation(cls,path):
     cls.path = path
     Log.info("HDFS path validation:")
     (ret, out, err) = cls.command(['hdfs', 'dfs', '-ls', path])
     if ret ==1:
         Log.error("HDFS path Error. Exiting the Application..")
         Log.error(err)
         Log.exit()
     else:
         Log.info("Valid HDFS path")
Esempio n. 2
0
 def validateMagnitude(cls, arg):
     try:
         magnutide = float(arg)
         if 0 <= magnutide <= 8:
             return magnutide
         else:
             Log.exit()
     except:
         Log.error(
             "invalid magnitude input, value: '{}'. You can only pass magnitude values from '0.0' to '8.0'. Exciting the application..".format(
                 arg))
         Log.exit()
 def Read(cls):
     try:
         with open(r'../../conf/earthquakes-application.yaml') as file:
             configuration = yaml.load(file, Loader=yaml.FullLoader)
             Log.info("Loading configuration from earthquakes-application.yaml")
             Log.info("values: {}".format(configuration))
             history_args, hive_args = cls.Evaluate(configuration)
             return history_args, hive_args
     except EnvironmentError as error:
         Log.error("Configuration can not be loaded.")
         Log.error(error)
         Log.exit()
Esempio n. 4
0
 def validateYear(cls, arg):
     now = datetime.utcnow()
     currentYear = now.year
     try:
         year = int(arg)
         if 1900 <= year <= currentYear:
             return year
         else:
             Log.exit()
     except:
         Log.error(
             "invalid year input, value: '{}'. You can only pass year values from '1900' to '{}'. Exciting the application..".format(
                 arg, currentYear))
         Log.exit()
Esempio n. 5
0
 def filesInPath(cls, path):
     cls.path = path
     Log.info("HDFS path validation:")
     (ret, out, err) = cls.command(['hdfs', 'dfs', '-ls', path])
     if ret == 1:
         Log.error("HDFS path Error. Exiting the Application..")
         Log.error(err)
         Log.exit()
     else:
         Log.info("Valid HDFS path")
         lines = out.splitlines()
         for line in lines:
             line_split = line.split(' ')
             line_len = len(line_split)
             file_exists = re.search('.*csv$', line_split[line_len - 1])
             if file_exists:
                 cls.files.append(line_split[line_len - 1])
Esempio n. 6
0
 def toList(cls, args1, args2):
     yearsList = []
     if args2 is None:
         yearsTempList = str(args1).split(",")
         for year in yearsTempList:
             yearInt = cls.validateYear(year)
             if yearInt not in yearsList:
                 yearsList.append(yearInt)
         yearsList.sort()
     elif args2 is not None:
         fromYear = cls.validateYear(args1)
         toYear = cls.validateYear(args2)
         if fromYear > toYear:
             Log.error("Input Error. 'from-year' value must be less that 'to-year' value. Exiting the application..")
             Log.exit()
         else:
             for year in range(fromYear, toYear + 1):
                 yearsList.append(year)
     return yearsList
    def Evaluate(cls, configuration):
        history_args = {}
        hive_args = {}
        for config, values in configuration.items():
            if config == 'hdfs-path':
                history_args['--hdfs-path='] = values
                hive_args['--hdfs-path='] = values
            elif config == 'download-list-of-years':
                list_of_years = ""
                if values is not None:
                    for value in values:
                        if list_of_years == "":
                            list_of_years = str(value)
                        else:
                            list_of_years = list_of_years + "," + str(value)
                if list_of_years is not "":
                    history_args['--year='] = list_of_years
            elif config == 'download-group-of-years':
                if values is not None:
                    if len(values) == 2:
                        history_args['--from-year='] = str(values[0])
                        history_args['--to-year='] = str(values[1])
                    else:
                        Log.error("Property 'download-group-of-years' can take only two values, from-year, to-year")
                        Log.exit()
            elif config == 'download-magnitude-over':
                history_args['--magnitude-over='] = str(values)
            elif config == 'download-again-historical-data':
                if values is True:
                    history_args['--download-again'] = ""
            elif config == 'hive-drop-all-tables':
                hive_args['--drop-tables'] = values

        if '--hdfs-path=' in history_args.keys():
            if history_args['--hdfs-path='] is None:
                Log.error("You must specify an HDFS path for the data to be stored.")
                Log.exit()
        else:
            Log.error("You must specify an HDFS path for the data to be stored.")
            Log.exit()

        if '--year=' in history_args.keys() and (
                '--from-year=' in history_args.keys() or '--to-year=' in history_args.keys()):
            Log.error(
                "You can not pass values for both 'download-list-of-years' and 'download-group-of-years'. Chose one of this options.")
            Log.exit()

        return history_args, hive_args
Esempio n. 8
0
    def getValues(cls, inputArgs):
        Log.info("input arguments: {}".format(inputArgs))
        options = "p:d"
        longOptions = ["hdfs-path=", "drop-tables"]
        try:
            opts, args = getopt.getopt(inputArgs, options, longOptions)
        except getopt.GetoptError as err:
            Log.error(err)
            Log.exit()

        hdfsPathFlag = False
        hdfsPathArg = None
        dropTablesFlag = False

        for opt, arg in opts:
            Log.info("processing option: {} with arguments: {}".format(
                opt, arg))
            if opt in ("-p", "--hdfs-path"):
                if hdfsPathFlag:
                    cls.notUniqueArg()
                else:
                    hdfsPathFlag = True
                    hdfsPathArg = arg
            elif opt in ("-d", "--drop-tables"):
                if dropTablesFlag:
                    cls.notUniqueArg()
                else:
                    dropTablesFlag = True

        if hdfsPathFlag is False:
            Log.error(
                "Input Error. You must specify a valid HDFS path. Exiting the application.."
            )
            Log.exit()
        else:
            HDFS.filesInPath(hdfsPathArg)

        return dropTablesFlag
Esempio n. 9
0
 def notUniqueArg(cls):
     Log.error(
         "Input Error. Can't pass one argument twice. Exiting the application.."
     )
     Log.exit()
Esempio n. 10
0
    def getValues(cls, inputArgs):
        Log.info("input arguments: {}".format(inputArgs))
        options = "y:f:t:m:p:d"
        longOptions = ["year=", "from-year=", "to-year=", "magnitude-over=", "download-again","hdfs-path="]
        try:
            opts, args = getopt.getopt(inputArgs, options, longOptions)
        except getopt.GetoptError as err:
            Log.error(err)
            Log.exit()

        yearFlag = False
        yearArg = None
        fromYearFlag = False
        fromYearArg = None
        toYearFlag = False
        toYearArg = None
        magnOverFlag = False
        magnOverArg = None
        overwriteFlag = False
        hdfsPathFlag = False
        hdfsPathArg = None

        for opt, arg in opts:
            Log.info("processing option: {} with arguments: {}".format(opt,arg))
            if opt in ("-p", "--hdfs-path"):
                if hdfsPathFlag:
                    cls.notUniqueArg()
                else:
                    hdfsPathFlag = True
                    hdfsPathArg = arg
            elif opt in ("-y", "--year"):
                if yearFlag:
                    cls.notUniqueArg()
                else:
                    yearFlag = True
                    yearArg = arg
            elif opt in ("-f", "--from-year"):
                if fromYearFlag:
                    cls.notUniqueArg()
                else:
                    fromYearFlag = True
                    fromYearArg = arg
            elif opt in ("-t", "--to-year"):
                if toYearFlag:
                    cls.notUniqueArg()
                else:
                    toYearFlag = True
                    toYearArg = arg
            elif opt in ("-m", "--magnitude-over"):
                if magnOverFlag:
                    cls.notUniqueArg()
                else:
                    magnOverFlag = True
                    magnOverArg = arg
            elif opt in ("-d", "--download-again"):
                if overwriteFlag:
                    cls.notUniqueArg()
                else:
                    overwriteFlag = True

        if hdfsPathFlag is False:
            Log.error("Input Error. You must specify a valid HDFS path. Exiting the application..")
            Log.exit()
        else:
            HDFS.pathValidation(hdfsPathArg)

        fromToOption = False
        yearOption = False
        if fromYearFlag and toYearFlag and not yearFlag:
            fromToOption = True
        elif not fromYearFlag and not toYearFlag and yearFlag:
            yearOption = True
        else:
            Log.error("Input Parameters Error.\r\n" \
                  "You must pass parameters in one of the following formats:\r\n" \
                  "Example with a range of values:       '--from-year=2010 --to-year=2020'\r\n" \
                  "Example with a list of unique values: '--year=2010,2011,2012'\r\n" \
                  "Exiting the application..")
            Log.exit()

        if fromToOption:
            fromYearInt = cls.validateYear(fromYearArg)
            toYearInt = cls.validateYear(toYearArg)
            yearsList = cls.toList(fromYearInt, toYearInt)
        elif yearOption:
            yearsList = cls.toList(yearArg, None)

        if magnOverArg is None:
            magnOverArg = 0
        magnitudeOver = cls.validateMagnitude(magnOverArg)

        return yearsList, magnitudeOver, overwriteFlag