예제 #1
0
def GenerateColumnAttributesReportCMDLineArgs():
    """
    * Get command line arguments for GenerateColumnAttributesReport.py script.
    """
    parser = ArgumentParser()
    # Mandatory positional arguments:
    parser.add_argument("datapath",
                        type=str,
                        help="Path to folder containing data.")
    parser.add_argument("filedatereg",
                        type=str,
                        help="Regular expression string for file dates.")
    parser.add_argument("reportpath", type=str, help="Output path for report.")
    # Optional arguments:
    parser.add_argument(
        "--filenamereg",
        type=str,
        help="(Optional) Regular expression for files in datapath.")
    # parser.add_argument("--gjsonattr", action="store_true", help="(Optional) Put if want to generate skeleton file for DyETL with column attributes filled in.")

    ###############################
    # Check arg validity:
    ###############################
    args = parser.parse_args()
    errs = []
    args.datapath = args.datapath.replace('R:\\',
                                          '\\\\wanlink.us\\dfsroot\\APPS\\')
    if '.' in args.datapath:
        errs.append('datapath must point to folder.')
    elif not os.path.exists(args.datapath):
        errs.append(' '.join(['(datapath)', args.datapath, 'does not exist.']))

    if '.' not in args.reportpath:
        errs.append('reportpath must point to a file.')
    elif os.path.exists(args.reportpath):
        errs.append(' '.join(
            ['(reportpath)', args.reportpath, 'already exists.']))
    if not IsRegex(args.filedatereg):
        errs.append(' '.join([
            '(filedatereg)', args.filedatereg,
            'is not a valid regular expression string.'
        ]))

    fileNameReg = None
    if hasattr(args, 'filenamereg') and not IsRegex(args.filenamereg):
        errs.append(' '.join([
            '(--filenamereg)', args.filenamereg,
            'is not a valid regular expression.'
        ]))
    elif hasattr(args, 'filenamereg'):
        fileNameReg = args.filenamereg

    if errs:
        raise Exception("\n".join(errs))

    return args
예제 #2
0
 def GetAllFolderPaths(cls, headpath, folderreg):
     """
     * Get all folders matching folderreg located
     in headpath.
     Inputs:
     * headpath: String path containing folders want to search.
     * folderreg: regex string or regex object used to find
     folders.
     """
     errs = []
     if not isinstance(headpath, str):
         errs.append('headpath must be a string.')
     elif not os.path.isdir(headpath):
         errs.append('headpath does not point to valid directory.')
     if not isinstance(folderreg, (str, FileConverter.__reObj)):
         errs.append('folderreg must be a string or regular expression.')
     elif isinstance(folderreg, str):
         if not IsRegex(folderreg):
             errs.append('folderreg must be a valid regular expression.')
         else:
             folderreg = re.compile(folderreg)
     if errs:
         raise Exception('\n'.join(errs))
     # Get all matching folders:
     folders = []
     for folder in os.listdir(headpath):
         fullpath = os.path.join(headpath, folder)
         if os.path.isdir(fullpath) and folderreg.match(folder):
             folders.append(fullpath)
     return folders
예제 #3
0
 def GetAllFilePaths(cls, folderpath, filereg, subdirs=False):
     """
     * Get all full paths for files matching regular expression
     at folderpath.
     Inputs:
     * folderpath: String path to folder.
     * filereg: Regular expression for files.
     Optional:
     * subdirs: Search all subdirectories in folder for files. 
     Output:
     * Returns { FileName -> Path }.
     """
     errs = []
     if not isinstance(folderpath, str):
         errs.append('folderpath must be a string.')
     elif not os.path.isdir(folderpath):
         errs.append('folderpath must point to a folder.')
     elif not os.path.exists(folderpath):
         errs.append('folderpath does not exist.')
     if isinstance(filereg, str) and not IsRegex(filereg):
         errs.append(
             'filereg must be a valid regular expression string or object.')
     elif isinstance(filereg, str):
         filereg = re.compile(filereg)
     if not subdirs is None and not isinstance(subdirs, bool):
         errs.append('subdirs must be a boolean.')
     if errs:
         raise Exception('\n'.join(errs))
     filePaths = {}
     if not subdirs:
         if filereg:
             filePaths = {
                 file: os.path.join(folderpath, file)
                 for file in os.listdir(folderpath)
                 if os.path.isfile(os.path.join(folderpath, file))
                 and filereg.match(file)
             }
         else:
             filePaths = {
                 file: os.path.join(folderpath, file)
                 for file in os.listdir(folderpath)
                 if os.path.isfile(os.path.join(folderpath, file))
             }
     else:
         # Get all files (with or without signature) in all folders within top directory (only down one level):
         filePaths = {}
         # traverse root directory, and list directories as dirs and files as files
         for root, dirs, files in os.walk(folderpath):
             if not dirs:
                 pass
             else:
                 for dir in dirs:
                     for file in files:
                         pass
     return filePaths
예제 #4
0
def GenerateFileTransferConfigJsonArgs():
    """
    * Pull and validate required arguments for 
    GenerateFileTransferConfig script.
    """
    req_args = set(['groupregex', 'outputfolder'])
    path = 'ScriptArgs\\GenerateFileTransferConfig.json'
    errs = []
    if not os.path.exists(path):
        raise Exception('%s does not exist.' % path)
    try:
        args = ETLDashboardJsonArgs()
        args.update(LoadJsonFile(path))
    except Exception as ex:
        errs.append('Failed to read %s' % path)
        errs.append('Reason: %s' % str(ex))
        raise Exception('\n'.join(errs))
    missing = req_args - set(args)
    if missing:
        raise Exception('The following required arguments are missing: %s' %
                        ','.join(missing))

    # Fill environment variables in 'etlfilepaths':
    args['etlfilepaths'] = FillEnvironmentVariables(args['etlfilepaths'],
                                                    args['config'], 'PROD')

    ############################
    # Required arguments:
    ############################
    # groupregex:
    if not isinstance(args['groupregex'], str):
        errs.append('(groupregex) Must be a string.')
    elif not IsRegex(args['groupregex']):
        errs.append('(groupregex) Invalid regular expression.')
    # outputfolder:
    if not isinstance(args['outputfolder'], str):
        errs.append('(outputfolder) Must be a string.')
    elif not os.path.isdir(args['outputfolder']):
        errs.append('(outputfolder) Does not point to valid directory.')
    elif not args['outputfolder'].endswith('\\'):
        args['outputfolder'] += '\\'
    if errs:
        raise Exception('\n'.join(errs))
    return args
예제 #5
0
 def __Validate(ftsurl, etlpathsjson, chromedriverpath, groupregex):
     """
     * Validate constructor parameters.
     """
     errs = []
     if not isinstance(ftsurl, str):
         errs.append('ftsurl must be a string.')
     if not isinstance(etlpathsjson, dict):
         errs.append('etlpathsjson must be a json dictionary.')
     if not isinstance(chromedriverpath, str):
         errs.append('chromedriverpath must be a string.')
     elif not chromedriverpath.endswith('.exe'):
         errs.append('chromedriverpath must point to an executable.')
     if not isinstance(groupregex,
                       (str, FileTransferServiceAggregator.__reType)):
         errs.append(
             'groupregex must be a string or regular expression object.')
     elif isinstance(groupregex, str) and not IsRegex(groupregex):
         errs.append(
             'groupregex must be a valid regular expression string.')
     if errs:
         raise Exception('\n'.join(errs))
 def PostAllFiles(self,
                  etlname,
                  datafolder,
                  fileregex,
                  testmode="LOCAL",
                  waitseconds=200):
     """
     * Open DynamicETL.WebAPI, post all files located in datapath folder matching fileregex, 
     and run DynamicETL.Service to load data into local tables.
     Inputs:
     * etlname: String etl name. Must be configured in serviceappsettings.
     * datafolder: String folder containing all data files for ETL.
     * fileregex: Regular expression string or regex object to match files in datafolder.
     Optional:
     * waitseconds: Number of seconds to wait before closing DynamicETL.Service and WebAPI.
     """
     errs = []
     if not isinstance(etlname, str):
         errs.append('etlname must be a string.')
     else:
         if not etlname in self.__serviceappsettings['Etls']:
             errs.append(
                 '%s is not configured in the DynamicETL.Service appsettings.json file.'
                 % etlname)
         # Ensure etl is configured in etlfilepaths.json:
         isAvailable = False
         for elem in self.__etlpaths['files']:
             if elem['subject'].lower() == etlname.lower():
                 isAvailable = True
                 break
         if not isAvailable:
             errs.append('%s ETL is not configured in etlfilepaths.json')
     if not isinstance(datafolder, str):
         errs.append('datafolder must be a string.')
     elif not os.path.isdir(datafolder):
         errs.append('datafolder does not point to a valid folder.')
     if isinstance(fileregex, str):
         if not IsRegex(fileregex):
             errs.append(
                 'fileregex is not a valid regular expression string.')
         else:
             fileregex = re.compile(fileregex)
     elif not isinstance(fileregex, LocalLargeDataJobPoster.__reType):
         errs.append(
             'fileregex must be a regular expression string or regular expression object.'
         )
     if not isinstance(testmode, str):
         errs.append('testmode must be a string.')
     elif not testmode.upper() in LocalLargeDataJobPoster.__validModes:
         errs.append('testmode must be one of %s (case insensitive).' %
                     ', '.join(LocalLargeDataJobPoster.__validModes))
     if not isinstance(waitseconds, (float, int)):
         errs.append('waitseconds must be numeric.')
     elif waitseconds <= 0:
         errs.append('waitseconds must be positive.')
     if errs:
         raise Exception('\n'.join(errs))
     testmode = testmode.upper()
     files = self.__GetAllMatchingFiles(datafolder, fileregex)
     if len(files) == 0:
         # Exit immediately if no matching files were found:
         return False
     self.__modeserviceappsettings = FillEnvironmentVariables(
         copy.deepcopy(self.__serviceappsettings),
         copy.deepcopy(self.__config), testmode)
     self.__modeetlpaths = FillEnvironmentVariables(
         copy.deepcopy(self.__etlpaths), copy.deepcopy(self.__config),
         testmode)
     if testmode == "LOCAL":
         self.__webapiurl = self.__modeserviceappsettings["EtlJobsUrl"]
         # Post all matching files to WebAPI and run Service locally:
         self.__OpenWebAPI()
         self.__PostAllJobs(etlname, files)
         self.__OpenService()
         self.__CloseAllInstances(waitseconds)
         return True
     else:
         # Drop all matching files to target location to be sucked up by ETL:
         self.__DropAllFiles(files, etlname)
         return True
예제 #7
0
    def __CheckArgs(args):
        """
        * Check argument validity, throw exception if any are invalid.
        """
        errs = []
        req = Arguments.__reqArgs.copy()
        missing = req - set(args)
        if missing:
            raise Exception(' '.join([
                'The following required arguments are missing:',
                ', '.join(missing)
            ]))

        #############################
        # Required Arguments:
        #############################
        # "etlname":
        if not isinstance(args['etlname'], str):
            errs.append('etlname must be a string.')

        # "data":
        if 'path' not in args['data']:
            missing.append('data::path')
        elif not os.path.isdir(args['data']['path']):
            errs.append('data::path must point to folder.')
        elif not os.path.exists(args['data']['path']):
            errs.append(' '.join(
                ['(data::path)', args['data']['path'], ' does not exist.']))
        if 'sheets' in args['data'] and not isinstance(args['data']['sheets'],
                                                       list):
            errs.append('data::sheets must be a list.')
        if 'delim' in args['data'] and not isinstance(args['data']['delim'],
                                                      str):
            errs.append('data::delim must be a string.')

        # "filedatereg":
        if not 'Regex' in args['filedatereg']:
            errs.append('filedatereg requires "Regex" argument.')
        elif not IsRegex(args['filedatereg']['Regex']):
            errs.append(' '.join([
                '(filedatereg)', args['filedatereg']['Regex'],
                'Not a valid regular expression.'
            ]))

        # "outputfolder":
        if not os.path.isdir(args['outputfolder']):
            errs.append('(outputfolder) Folder does not exist.')
        elif not args['outputfolder'].endswith('\\'):
            args['outputfolder'] = args['outputfolder'] + '\\'

        # "filenamereg":
        if 'filenamereg' in args and not IsRegex(args['filenamereg']):
            errs.append(' '.join([
                '(filenamereg) ', args['filenamereg'],
                ' is not a valid regular expression.'
            ]))

        #####################
        # Optional:
        #####################
        # "allnull" arguments:
        if 'allnull' in args:
            if not args['allnull'].lower() in ['true', 'false']:
                errs.append('allnull must be "true"/"false".')

        # "convert":
        if 'convert' in args:
            if not ('convertpath' in args['convert']
                    and 'toextension' in args['convert']):
                errs.append(
                    'convert requires "toextension" and "convertpath" as attributes.'
                )
            elif '.' not in args['convert']['toextension']:
                errs.append('%s is invalid conversion extension.' %
                            args['convert']['toextension'])
            if 'convertpath' in args['convert'] and not os.path.exists(
                    args['convert']['convertpath']):
                errs.append('convertpath does not exist.')

        if missing:
            errs.append(
                'The following required subarguments are missing: {%s}' %
                ', '.join(missing))
        if errs:
            raise Exception("\n".join(errs))
예제 #8
0
def EvaluateDataJsonArgs():
    """
    * Get arguments used in EvaluateData.py script.
    """
    req = set([
        'data', 'filenamereg', 'outputfolder', 'processname', 'recursive',
        'tablename'
    ])
    opt = set(['allnull', 'filedatereg'])
    path = 'ScriptArgs\\EvaluateData.json'
    if not os.path.exists(path):
        raise Exception('%s is missing.' % path)
    args = json.load(open(path, 'rb'))
    missing = req - set(args)
    if missing:
        raise Exception('The following required arguments are missing: %s' %
                        ','.join(missing))
    errs = []
    #############################
    # Required Arguments:
    #############################
    # "processname":
    if not isinstance(args['processname'], str):
        errs.append('(processname) Must be a string.')

    # "data":
    if 'path' not in args['data']:
        missing.append('data::path')
    elif not os.path.isdir(args['data']['path']):
        errs.append('data::path must point to folder.')
    elif not os.path.exists(args['data']['path']):
        errs.append(' '.join(
            ['(data::path)', args['data']['path'], ' does not exist.']))
    if 'sheets' in args['data']:
        if not isinstance(args['data']['sheets'], list):
            errs.append('data::sheets must be a list.')
    else:
        args['data']['sheets'] = None
    if 'delim' in args['data'] and not isinstance(args['data']['delim'], str):
        errs.append('data::delim must be a string.')
    else:
        args['data']['delim'] = None

    if 'rowstart' in args['data']:
        if not isinstance(args['data']['rowstart'], int):
            errs.append('data::rowstart must be an integer.')
        elif not args['data']['rowstart'] >= 0:
            errs.append('data::rowstart must be nonnegative integer.')

    # "filenamereg":
    if 'filenamereg' in args and not IsRegex(args['filenamereg']):
        errs.append(' '.join([
            '(filenamereg) ', args['filenamereg'],
            ' is not a valid regular expression.'
        ]))
    else:
        args['filenamereg'] = re.compile(args['filenamereg'])

    # "outputfolder":
    if not os.path.isdir(args['outputfolder']):
        errs.append('(outputfolder) Folder does not exist.')
    elif not args['outputfolder'].endswith('\\'):
        args['outputfolder'] = args['outputfolder'] + '\\'

    # "recursive":
    tf = {'true': True, 'false': False}
    if 'recursive' in args and not args['recursive'].lower() in tf:
        errs.append('(recursive) Must be true/false.')
    else:
        args['recursive'] = tf[args['recursive'].lower()]

    #####################
    # Optional:
    #####################
    # "allnull" arguments:
    if 'allnull' in args:
        if not args['allnull'].lower() in ['true', 'false']:
            errs.append('allnull must be "true"/"false".')
        else:
            args['allnull'] = tf[args['allnull'].lower()]
    else:
        args['allnull'] = False

    # "filedatereg":
    if 'filedatereg' in args:
        if not 'Regex' in args['filedatereg']:
            errs.append('filedatereg requires "Regex" argument.')
        elif not IsRegex(args['filedatereg']['Regex']):
            errs.append(' '.join([
                '(filedatereg)', args['filedatereg']['Regex'],
                'Not a valid regular expression.'
            ]))
        else:
            args['filedatereg'] = re.compile(args['filedatereg']['Regex'])
    else:
        # Pull all files in folder:
        args['filedatereg'] = None

    if missing:
        errs.append('The following required subarguments are missing: {%s}' %
                    ', '.join(missing))
    if errs:
        raise Exception("\n".join(errs))

    return args
예제 #9
0
    def ConvertAllFilePaths(cls,
                            outfolder,
                            new_ext,
                            folderpath=None,
                            filereg=None,
                            paths=None,
                            subdirs=False):
        """
        * Convert all files listed in folderpath/paths to files with 
        extension in outfolder.
        Inputs:
        * outfolder: string output folder for converted files. Must exist.
        * new_ext: extension to convert files to.
        Mutually Exclusive Inputs:
        * folderpath, filereg: put string folder containing files matching filereg regular expression.
        * paths: list of filepaths to convert.
        Optional Inputs:
        * subdirs: Search all subdirectories of folderpath for matching files.
        Outputs:
        * Dictionary mapping { convertedfilename -> path }.
        """
        errs = []
        if not isinstance(outfolder, str):
            errs.append('outfolder must be a string.')
        elif not os.path.isdir(outfolder):
            errs.append('outfolder must be point to a folder.')
        elif not os.path.exists(outfolder):
            errs.append('outfolder does not exist.')
        if not isinstance(new_ext, str):
            errs.append('extension must be a string.')
        elif '.' not in new_ext:
            errs.append('extension is invalid.')
        if folderpath is None and filereg is None and paths is None:
            errs.append('One of folderpath, filereg and paths must be passed.')
        elif not (not folderpath is None
                  and not filereg is None) ^ (not paths is None):
            errs.append(
                'folderpath AND filereg or (exclusive) paths must be passed.')
        elif not folderpath is None:
            if not isinstance(folderpath, str):
                errs.append('folderpath must be a string.')
            elif not os.path.isdir(folderpath):
                errs.append('folderpath must point to a folder.')
            elif not os.path.exists(folderpath):
                errs.append('folderpath does not exist.')
            if not IsRegex(filereg):
                errs.append('filereg must be a valid regular expression.')
        elif not paths is None:
            if not hasattr(paths, '__iter__'):
                errs.append('paths must be an iterable.')
        if errs:
            raise Exception('\n'.join(errs))

        # Convert local folders to current working directory for use with
        # shutil:
        if ":" not in outfolder and outfolder[0] != "\\":
            outfolder = "%s\\%s" % (os.getcwd(), outfolder)
        if ":" not in folderpath and folderpath[0] != "\\":
            outfolder = "%s\\%s" % (os.getcwd(), folderpath)
        # Get all full file paths if not provided:
        if folderpath:
            paths = list(
                FileConverter.GetAllFilePaths(folderpath, filereg,
                                              subdirs).values())
        if not outfolder[len(outfolder) - 1:len(outfolder)] == "\\":
            outfolder += "\\"

        convertedpaths = {}
        for filepath in paths:
            filename = os.path.basename(filepath)
            newfilename = "%s%s" % (filename[0:filename.find('.')], new_ext)
            convertedpath = "%s%s" % (outfolder, newfilename)
            if not os.path.exists(convertedpath):
                shutil.copyfile(filepath, convertedpath)
            convertedpaths[newfilename] = convertedpath

        return convertedpaths