Python dir_pattern Exemples, apm.classes.system.dir_pattern Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : unpack.py Projet : michaeltg12/ADC_Reproc_Toolbox

    def __init__(self, config, archive_path, stage_path, files={}):
        """ Initialize with args """
        self.filenames = files
        self.config = config
        self.begin = self.config['begin']
        self.end = self.config['end']
        self.quiet = self.config['quiet']

        self.source = self.config['source']
        self.stage = self.config['stage']
        self.job = self.config['job']
        self.archive_path = dir_pattern().format(self.source, archive_path)
        self.stage_path = dir_pattern(4).format(self.stage, self.job,
                                                'collection', stage_path)

        self.cwd = os.getcwd()

        temp = self.stage_path.split('/')
        self.local = dir_pattern().format(temp[-2], temp[-1])

        # List of tar files
        self.files = None

        # duplicate file manipulation
        self.file_names = []
        self.st_files = []
        self.members = []
        self.duplicates = {}
        self.dups = None

Exemple #2

0

Afficher le fichier

    def __init__(self, config):
        """ Initialize with config """
        if config == {}:
            return
        self.config = config
        self.path = dir_pattern().format(self.config['stage'],
                                         self.config['job'])
        self.job = dir_pattern().format(self.path, 'collection')
        self.ops = dir_pattern(3).format(self.path, 'conf', 'ops')
        self.file = dir_pattern().format(self.ops, 'vapmgr.conf')
        if not os.path.exists(self.ops):
            os.mkdir(self.ops)

        if not os.path.exists(self.file):
            os.symlink('/data/conf/ops/vapmgr.conf', self.file)

Exemple #3

0

Afficher le fichier

    def add_to_env(self):
        ext = get_shell()
        if ext == 'bash':
            ext = 'sh'
        elif ext == 'csh':
            ext = 'csh'
        else:
            exit(
                "Unable to determine shell. Please run again from Bash or CSH shell."
            )

        db = DB(self.config)
        site = self.config['site']
        ins = self.config['instrument']
        fac = self.config['facility']
        output = '\n'

        # Construct the vapmgr command
        output += "/apps/ds/bin/vapmgr -setup "
        if site:
            output += "-r {}".format(site)
            if fac:
                output += ".{} ".format(fac)
        output += ins

        env = 'env.{}'.format(ext)
        env = dir_pattern().format(self.path, env)
        fp = open(env, 'a')
        fp.write(output)
        fp.close()

        return

Exemple #4

0

Afficher le fichier

Fichier : unpack.py Projet : michaeltg12/ADC_Reproc_Toolbox

 def __init__(self, tar, tar_file):
     self.error = False
     self.result = None
     self.tar = tar
     self.file = tar_file
     self.config = self.tar.config
     temp = self.tar.stage_path
     self.local = dir_pattern().format(temp[-2], temp[-1])
     threading.Thread.__init__(self)

Exemple #5

0

Afficher le fichier

    def email_del_list(self, del_file):
        global DEVEL
        # Get the directory where the job and deletion file are stored
        job_dir = dir_pattern().format(self.config['stage'],
                                       self.config['job'])

        # Get the contents of the deletion list
        cwd = os.getcwd()
        os.chdir(job_dir)
        f = open(del_file, 'r')
        attach_file = f.read()
        f.close()
        os.chdir(cwd)

        # Setup the email variables
        email_from = '*****@*****.**'
        email_to = ['*****@*****.**']
        # email_to = ['*****@*****.**']

        if DEVEL:
            email_to.append('*****@*****.**')

        text = [
            'The deletion list for %s is attached.' % self.config['job'],
            'Once the files have been deleted, run the following commands:',
            '',
            'cd %s' % job_dir,
            'apm archive -j %s' % self.config['job'],
            'Attachment:',
            del_file,
        ]
        text = '\n'.join(text)

        ############################################################
        # Create a message
        msg = MIMEMultipart()
        msg['From'] = email_from
        msg['To'] = ', '.join(email_to)
        msg['Subject'] = "APM: %s deletion list" % self.config['job']

        # Add the body of the message
        msgText = MIMEText(text)
        msg.attach(msgText)

        # Add the attachment
        attachment = MIMEText(attach_file)
        attachment.add_header('Content-Disposition',
                              'attachment',
                              filename=del_file)
        msg.attach(attachment)

        # Send the message
        s = smtplib.SMTP('localhost')
        s.sendmail(email_from, email_to, msg.as_string())
        s.quit()

Exemple #6

0

Afficher le fichier

Fichier : rename.py Projet : michaeltg12/ADC_Reproc_Toolbox

    def run(self):
        manager = PluginManager()
        config = self.config
        f = Files(config)
        cwd = os.getcwd()
        stage = config['stage']
        collection = dir_pattern(3).format(stage, config['job'], 'collection')

        # Make sure files are supposed to be renamed
        if config['rename'] == False:
            return config, self.files

        # Verify there are no file collisions
        if self.check_for_collisions():
            return config, self.files

        # Strip the ARM prefix from all of the files
        print("\nStripping ARM prefix from files... ", end="")
        sys.stdout.flush()

        manager.callPluginCommand('hook_rename_preprocess', {'config': config})

        os.chdir(collection)
        sites = set(os.listdir('.'))
        for site in sites:
            os.chdir(site)
            instruments = set(os.listdir('.'))
            for ins in instruments:
                os.chdir(ins)
                files = set(os.listdir('.'))
                for i in files:
                    new_name = f.rename_file(i)
                    if new_name != None:
                        if i != new_name:
                            self.files[site][ins][new_name] = self.files[site][
                                ins][i]
                            self.files[site][ins].pop(i)

                        self.files[site][ins][new_name][
                            'current_name'] = new_name
                        self.files[site][ins][new_name][
                            'stripped_name'] = new_name

                os.chdir('..')

            os.chdir('..')

        manager.callPluginCommand('hook_renamed_files_alter',
                                  {'config': config})

        print("Done\n")
        sys.stdout.flush()

        return config, self.files

Exemple #7

0

Afficher le fichier

    def get_rename_raw_process_list(self):
        path = binpath
        binary = "rename_raw"
        command = dir_pattern().format(path, binary)

        if not os.path.exists(command):
            print("Cannot find Rename_raw")
            return False

        helptext = None
        try:
            ps = Popen([command, '-h'], stdout=PIPE, stderr=PIPE)
            (output, error) = ps.communicate()
            returncode = ps.returncode
            helptext = output
        except CalledProcessError as e:
            error.close()
            output.close()
            status = e.returncode
            helptext = e.output

        if helptext == None:
            print("Unable to get rename_raw help text")
            return False

        helptext = helptext.split("\n\n")

        processes = None

        for k,v in enumerate(helptext):
            if v == "SUPPORTED PROCESSES:" and (k+1) < len(helptext):
                processes = helptext[k+1]

        if processes == None:
            print("Unable to get list of processes from rename_raw")
            return False

        processes = processes.split("\n")

        for k,v in enumerate(processes):
            v = re.sub(r'\s+', ' ', v).strip().split(' ')
            if v[0] == "*":
                processes[k] = v[1].lower()
            else:
                processes[k] = v[0].lower()

        processes = list(set(processes))
        processes.sort()

        return processes

Exemple #8

0

Afficher le fichier

    def get_tar_structure(self, path):
        """
            Return the structure of the files contained within the tar files at the given location
            The given path should be the where a site directory is located
            Example: datastream should be provided
                job/datastream/site/process/list_of_tar_files
        """
        global DEVEL
        file_list = {}
        for site in os.listdir(path):
            file_list[site] = {}
            for process_path in glob(
                    dir_pattern(3).format(path, site, '*.00')):
                process = process_path.split('/')[-1]
                file_list[site][process] = {}
                tar_file_list = os.listdir(process_path)

                if DEVEL:
                    print("Retrieving file list for %s" % process)
                    pbar = UI()
                    pbar.progress(0)
                    count = len(tar_file_list)
                    i = 1

                for tar_file in tar_file_list:
                    if not os.path.isdir(dir_pattern().format(
                            process_path, tar_file)):
                        file_list[site][process][
                            tar_file] = self.get_all_files_from_tar(
                                tar_file, process_path)
                    if DEVEL:
                        pbar.progress(int((float(i) / float(count)) * 100))
                        i = i + 1

                if DEVEL:
                    print("")
        return file_list

Exemple #9

0

Afficher le fichier

Fichier : rename.py Projet : michaeltg12/ADC_Reproc_Toolbox

    def check_for_collisions(self):
        """ Check all unpacked files for file naming collisions """
        print("Checking for file naming collisions...", end="")
        sys.stdout.flush()

        config = self.config
        f = Files(config, self.files)
        cwd = os.getcwd()
        collection = dir_pattern(3).format(config['stage'], config['job'],
                                           'collection')
        os.chdir(collection)

        sites = os.listdir('.')
        for site in sites:
            os.chdir(site)
            instruments = set(os.listdir('.'))
            for ins in instruments:
                os.chdir(ins)
                files = set(os.listdir('.'))
                names = self.files[site][ins]

                # Mark files as deleted
                for k, v in names.items():
                    if k not in files:
                        names[k]['deleted'] = True

                # Check for duplicates
                for k, v in names.items():
                    if len(v['duplicate_files']) > 0 and v['deleted'] == False:
                        for i in v['duplicate_files']:
                            name = f.get_file_by_uuid(i)
                            if names[name]['uuid'] == i and names[name][
                                    'deleted'] == False:
                                config['duplicates'] = True
                                print("Fail")
                                print(
                                    "Files with naming collisions still exist.\nPlease resolve these issues before continuing.\n"
                                )
                                return True

                os.chdir('..')

            os.chdir('..')

        os.chdir(cwd)
        config['duplicates'] = False
        print("Done")
        sys.stdout.flush()
        return False

Exemple #10

0

Afficher le fichier

    def bundle_raw_data(self, datastreams):
        """ Bundle the raw data in <job>/datastream/<site>/<site><instrument><facility>.00 """
        if type(datastreams) == str:
            datastreams = [datastreams]

        if type(datastreams) != list:
            return False

        # Update env variables so bundle_data will tar the right files and put the tar files in the right place
        p = Process(self.config, self.files)
        print("\nUpdating environment variables...", end="")
        if update_env(dir_pattern().format(self.config['stage'],
                                           self.config['job'])):
            print("Done")
        else:
            print("Failed")
            return False

        # Validate the bundle alias exists
        home = os.path.expanduser('~')
        db_file = dir_pattern().format(home, ".db_connect")
        p.setup_alias(db_file, 'bundle')

        # Run this process for each of the passed streams
        print("Bundling raw data...", end="")
        sys.stdout.flush()
        for stream in datastreams:
            # split the stream string to get the needed information
            stream = stream.split('.')[0]

            for i, e in reversed(list(enumerate(stream))):
                if not is_number(e):
                    fac = i
                    break
            else:
                print("Failed: Could not separate facility from %s" % stream)
                return False

            s = stream[0:3]
            i = stream[3:fac]
            f = stream[fac:]

            # Build the command
            command = ['bundle_data', '-e', '-s', s, '-f', f, i]

            # Run the command
            try:
                ps = Popen(command, stdout=PIPE, stderr=PIPE)
                ps.communicate()
                returncode = ps.returncode
                if returncode != 0:
                    print("Bad Return...", end="")
                    print("Failed")
                    return False
            except CalledProcessError as e:
                print("Called Process Error...", end="")
                print("Failed")
                return False
            except Exception as e:
                raise e

        print("Done")
        return True

Exemple #11

0

Afficher le fichier

    def run(self):
        self.times['ingest']['start'] = datetime.now().replace(microsecond=0)

        self.command = []
        self.command.append(self.options['ingest'])
        self.command.append("-a")
        if not self.config['alias']:
            self.command.append('apm')
        else:
            self.command.append(self.config['alias'])

        # Add the site to the command
        self.command.append('-s')
        self.command.append(self.site)

        # Add the facility to the command
        self.command.append('-f')
        self.command.append(self.facility)

        # If multiple add -n and the instrument to the command
        if self.options['multiple']:
            self.command.append('-n')
            self.command.append(self.instrument)

        # Add the final option
        self.command.append('-R')

        # Add additional user specified flags
        if self.flags != None and type(self.flags == list):
            for i in self.flags:
                self.command.append('-%s' % i)

        ps = Popen(self.command, stdout=PIPE, stderr=PIPE)
        (output, error) = ps.communicate()
        returncode = ps.returncode

        self.times['ingest']['end'] = datetime.now().replace(microsecond=0)

        if returncode != 0:

            self.error = "Error running ingest (%s)" % ' '.join(self.command)
            self.result = None
            return
        else:
            self.stdout = output
            self.stderr = error

        ##################################################
        # Parse the log file.
        ##################################################
        if not os.path.exists(self.logfile):
            self.error = "ERROR: Unable to find log file"
            return

        log = open(self.logfile, 'r')
        text = log.readlines()
        log.close()

        parse = False
        names = {}

        for k,line in enumerate(text):
            if line.startswith('**** OPENED: '):
                timeformat = "**** OPENED: %Y-%m-%d %X\n"
                linedate = datetime.strptime(line, timeformat)

                if linedate >= self.times['ingest']['start'] and linedate <= self.times['ingest']['end']:
                    parse = True

            elif parse and line.startswith('**** CLOSED: '):
                timeformat = "**** CLOSED: %Y-%m-%d %X\n"
                linedate = datetime.strptime(line, timeformat)

                if linedate >= self.times['ingest']['start'] and linedate <= self.times['ingest']['end']:
                    parse = False


            elif parse and line.startswith("Renaming:   "):
                old_path = line.replace('Renaming:   ', '').replace("\n", '')
                new_path = text[k + 1].replace(' -> to:     ', '').replace("\n", '')

                parts = old_path.split('/')
                site = parts[-3]
                sif = parts[-2]

                old_name = parts[-1]
                new_name = new_path.split('/')[-1]

                if site not in names:
                    names[site] = {}

                if sif not in names[site]:
                    names[site][sif] = {}

                names[site][sif][old_name] = new_name

        folder = dir_pattern(5).format(self.config['stage'], self.config['job'], 'collection', self.config['site'], '{}{}{}.00'.format(self.config['site'], self.config['instrument'], self.config['facility']))
        listdir = os.listdir(folder)


        if len(listdir) > 0:
            ##################################################
            # Run Rename Raw
            ##################################################
            if self.config['instrument'] in self.get_rename_raw_process_list():
                self.times['rename']['start'] = datetime.now().replace(microsecond=0)
                command = ['%s/rename_raw' % binpath, '-s', self.config['site'], '-f', self.config['facility'], self.config['instrument']]
                ps = Popen(command, stdout=PIPE, stderr=PIPE)
                (output, error) = ps.communicate()
                returncode = ps.returncode
                self.times['rename']['end'] = datetime.now().replace(microsecond=0)

                if returncode != 0:
                    self.result = names
                    self.error = error
                    return

                ##################################################
                # Parse rename_raw log file
                ##################################################
                if not os.path.exists(self.renamelog):
                    self.result = names
                    self.error = "renamelog does not exist"
                    return

                lf = open(self.renamelog, 'r');
                logs = lf.readlines()
                lf.close()

                parse = False

                for k,line in enumerate(logs):
                    if line.startswith("****OPEN"):
                        i = k + 1
                        timeline = logs[i]
                        timeformat = "Time: %a %b %d %X %Y\n"
                        opentime = datetime.strptime(timeline, timeformat)

                        if opentime >= self.times['rename']['start'] and opentime <= self.times['rename']['end']:
                            parse = True

                    elif parse and line.startswith("Close time: "):
                        i = k
                        timeformat = "Close time: %a %b %d %X %Y\n"
                        closetime = datetime.strptime(line, timeformat)

                        if closetime >= self.times['rename']['start'] and closetime <= self.times['rename']['end']:
                            parse = False

                    elif parse and line.startswith("Renamed: "):
                        old_path = line.replace("Renamed: ", "").replace("\n", '').split(' (')[0]
                        new_path = logs[k + 1].replace(" ->      ", '').replace("\n", '')

                        parts = old_path.split('/')
                        site = parts[-3]
                        sif = parts[-2]

                        old_name = parts[-1]
                        new_name = new_path.split('/')[-1]

                        if site not in names:
                            names[site] = {}

                        if sif not in names[site]:
                            names[site][sif] = {}

                        names[site][sif][old_name] = new_name


                ##################################################
                # Check for additional files
                ##################################################
                listdir = os.listdir(folder)
                if len(listdir) > 0:
                    self.result = names
                    self.error = "rename_raw did not move all the files in {}".format(folder)
                    return

        self.result = names
        return

Exemple #12

0

Afficher le fichier

Fichier : unpack.py Projet : michaeltg12/ADC_Reproc_Toolbox

    def handle_duplicate_files(self):
        # Handle duplicates
        f = Files(self.config)
        dup_list = {}
        duplicates = {}

        files = self.file_names
        dups = self.duplicates

        if len(dups) > 0:
            for i, n in dups.items():
                for j, v in enumerate(n):
                    folder = 'dup_{}'.format(j + 1)
                    delete = False
                    move = False
                    if f.is_same_file(
                            dir_pattern().format(self.stage_path, i),
                            dir_pattern(3).format(self.stage_path, folder, v)):
                        delete = True
                        move = False
                    else:
                        delete = False
                        move = True

                    if delete:
                        os.remove(
                            dir_pattern(3).format(self.stage_path, folder, v))
                    elif move:
                        if i not in dup_list:
                            name = '{}.v1'.format(i)
                            dup_list[i] = [name]
                            src = dir_pattern().format(self.stage_path, i)
                            dst = dir_pattern().format(self.stage_path, name)
                            try:
                                os.rename(src, dst)
                            except OSError:
                                shutil.move(src, dst)

                        num = len(dup_list[i]) + 1
                        name = '{}.v{}'.format(v, num)
                        dup_list[i].append(name)
                        src = dir_pattern(3).format(self.stage_path, folder, v)
                        dst = dir_pattern().format(self.stage_path, name)
                        try:
                            os.rename(src, dst)
                        except OSError:
                            shutil.move(src, dst)

            for i in dup_list:
                if len(dup_list[i]) > 1:
                    key = dir_pattern().format(self.local, i)
                    duplicates[key] = []
                    for j in dup_list[i]:
                        duplicates[key].append(dir_pattern().format(
                            self.local, j))

            self.dups = duplicates

            # Delete directory if now empty
            dupdirs = glob('{}/dup_*'.format(self.stage_path))
            for i in dupdirs:
                f.empty_dir(i)
                os.rmdir(i)

        return False if duplicates == {} else duplicates

Exemple #13

0

Afficher le fichier

    def find_ingest_exec(self, process):
        """ Find the appropriate ingest executable """
        # Does the ingest run for multiple processes and require the -n option
        multiple = False

        cwd = os.getcwd()
        base_path = binpath
        os.chdir(base_path)

        executable = {}

        # Get a list of ingest executables
        ingest = glob.glob('*_ingest')

        skip = [
        # 	'xsapr_ingest',
        # 	'wacrspec_ingest',
        # 	'wacr_ingest',
        # 	'mwacrspec_ingest',
        ]

        # Loop over the executables and get the process names for each
        for i in ingest:
            multiple = False
            if (i not in skip ): # Remove this line for productions, This suppresses an error on Copper
                # Get the help text
                help_text = ""
                try:
                    ps = Popen([i, '-h'], stdout=PIPE, stderr=PIPE)
                    (output, error) = ps.communicate()
                    returncode = ps.returncode
                    help_text = output

                except CalledProcessError as e:
                    error.close()
                    output.close()
                    help_text = ""
                    status = e.returncode
                    if DEVEL:
                        print("\nCALLED PROCESS ERROR: GET HELP TEXT\n")

                # Check for process names
                help_text = help_text.split("VALID PROCESS NAMES")

                # If process names exist
                if len(help_text) == 2:
                    names = help_text[1].strip()
                    names = names.split('\n')
                    # Add each of the valid process names to the dict
                    for n in names:
                        n = n.strip()
                        executable[n] = {
                            'executable': i,
                            'multiple': True
                        }


                elif len(help_text) < 2:
                    name = i.split('_')
                    executable[name[0]] = {
                        'executable': i,
                        'multiple': False
                    }

        if process in executable:
            return dir_pattern().format(base_path, executable[process]['executable']), executable[process]['multiple']
        else:
            return None, None

Exemple #14

0

Afficher le fichier

Fichier : archive.py Projet : michaeltg12/ADC_Reproc_Toolbox

    def run(self):
        """ Run the archive portion of the cleanup phase """
        if not self.config['cleanup_status']['remove']['status']:
            print(self.config['cleanup_status']['remove']['status'])
            print('')
            print(
                "Data files must be requested for deletion before the files can be archived."
            )
            self.config['exit'] = True
            return self.config, self.files

        # Setup vars
        stage = self.config['stage']
        job = self.config['job']

        ############################################################
        # Check to see if the current user is `dsmgr`
        ############################################################
        # Verify current user is authenticated to run this command
        if not self.authenticate():
            self.config['exit'] = True
            return self.config, self.files

        # Do this if the files have not yet been verified as deleted from the archive
        if not self.config['cleanup_status']['archive']['files_deleted']:
            print("Verifying all files have been deleted from the archive...",
                  end="")
            ############################################################
            # Update the local archive database
            ############################################################
            # Setup the datastreams to update
            datastreams = []
            datastream_path = dir_pattern(3).format(stage, job, 'datastream')
            for site in os.listdir(datastream_path):
                path = dir_pattern().format(datastream_path, site)
                for folder in os.listdir(path):
                    abs_folder = dir_pattern().format(path, folder)
                    if os.path.isdir(
                            abs_folder) and not os.path.islink(abs_folder):
                        datastreams.append(folder)

            # Update the local copy of the archive db
            if not DEVEL:
                update_archive(datastreams)

            ############################################################
            # Load the list of files to be removed from the archive
            ############################################################
            deleted_files = []
            deletion_file = dir_pattern(3).format(stage, job,
                                                  "%s.deletion-list.txt" % job)
            if not os.path.exists(deletion_file):
                print("Failed")
                print(
                    "Deletion list does not exist. Please create it and try again."
                )
                self.config['exit'] = True
                return self.config, self.files

            fp = open(deletion_file, 'r')
            deletion_text = fp.readlines()
            fp.close()

            for line in deletion_text:
                if line.endswith("\r\n"):
                    line = line[:-2]

                tar = {}
                parts, tar['md5'] = line.split(' ')
                parts = parts.split('.')
                tar['version'] = parts[-1][1:]
                tar['name'] = '.'.join(parts[:-1])
                deleted_files.append(tar)

                del tar, parts

            if 'line' in locals():
                del line

            ############################################################
            # Verify all files have been removed from the archive
            ############################################################
            # Get a list of files that are currently at the archive
            archive_files = {}
            db_file = '/apps/ds/conf/datainv/.db_connect'
            alias = 'inv_read'

            db = DB(self.config, db_file=db_file, alias=alias)

            # Store the query
            query = "SELECT * FROM get_remote_files_by_tag('%s') WHERE file_stamp >= %d AND file_stamp <= %d AND file_active = true ORDER BY file_stamp, file_version;"

            # List the column names so the values can be mapped in a dictionary
            cols = [
                'file_tag', 'file_name', 'file_version', 'file_size',
                'file_stored', 'file_md5', 'file_stamp', 'file_checked',
                'file_active'
            ]

            # convert the start and end dates to a unix timestamp
            start = convert_date_to_timestamp(self.config['begin'])
            end = convert_date_to_timestamp(self.config['end'])

            # Query the database for each of the datastreams
            for k, v in enumerate(datastreams):
                args = (v, start, end)
                result = db.query(query % args, columns=cols)

                if len(result) > 0:
                    archive_files[v] = result
                else:
                    print("Failed")
                    print("No results for %s" % v)

            # Store the list of what is currently in the archive and their versions to file
            current_archive = dir_pattern(3).format(stage, job,
                                                    'current_archive.json')
            fp = open(current_archive, 'w')
            fp.write(
                json.dumps(archive_files,
                           indent=2,
                           sort_keys=False,
                           separators=(',', ': ')))
            fp.close()
            del fp

            if DEVEL:
                file_path = dir_pattern(3).format(stage, job,
                                                  '%s.archive.json' % job)
                if os.path.exists(file_path):
                    fp = open(file_path, 'r')
                    archive_files = json.loads(fp.read())
                    fp.close()

                    del fp, file_path

            # Check to see if any of the "deleted_files" are in the list
            # If yes, quit
            # If no, proceed
            all_files_deleted = None

            if len(deleted_files) > 0:
                # Check the list of files from the archive to see if the current file has been deleted
                for f in deleted_files:
                    process = '.'.join(f['name'].split('.')[0:2])
                    name = f['name']

                    if any(d['file_name'] == name
                           for d in archive_files[process]):
                        all_files_deleted = False
                        print("Failed")
                        print(
                            "Not all files have been deleted from the archive."
                        )
                        print("Please try again later.")
                        self.config['exit'] = True
                        return self.config, self.files

                else:
                    all_files_deleted = True

            else:
                all_files_deleted = True

            if 'f' in locals():
                del f
            if 'process' in locals():
                del process

            if all_files_deleted != True:
                print("Failed")
                print("Not all files have been removed from the archive.")
                print(
                    "Run this again once all files have been removed from the archive."
                )
                self.config['exit'] = True
                return self.config, self.files

            # Files have been deleted
            self.config['cleanup_status']['archive']['files_deleted'] = True
            print("Done")

        ############################################################
        # Move any files not being archived to subdirectories
        #
        # Processed files:
        # This includes any processed files outside the
        # 	date range specified
        # Raw/Tar files:
        # This includes any files that do not need to be rearchived
        ############################################################
        if not self.config['cleanup_status']['archive']['move_files']:
            print("Moving files that should not be archived...", end="")

            cwd = os.getcwd()
            datastream = dir_pattern(3).format(stage, job, 'datastream')

            # Load the list of tar files that need to be archived
            os.chdir(dir_pattern().format(stage, job))
            fp = open('archive.json', 'r')
            contents = json.loads(fp.read())
            fp.close()
            tar_archive = {}
            for k, v in enumerate(contents):
                s = v['site']
                p = v['instrument']
                if s not in tar_archive:
                    tar_archive[s] = {}
                if p not in tar_archive[s]:
                    tar_archive[s][p] = []

                tar_archive[s][p].append(v['file_name'])

            if len(contents) > 0:
                del s, p, k, v

            os.chdir(datastream)
            sites = os.listdir(datastream)
            for i, s in enumerate(sites):
                os.chdir(s)
                processes = os.listdir('.')
                for j, p in enumerate(processes):
                    no_archive = dir_pattern(4).format(datastream, s, p,
                                                       'no_archive')
                    os.chdir(p)

                    if p.split('.')[-1] == '00':
                        # This is a raw datastream
                        # Don't include directories

                        # Get a list of non-tar files from the raw datastreams
                        # Move all of these files to a sub-directory
                        rawfiles = [
                            x for x in os.listdir('.') if not x.endswith('tar')
                            if not os.path.isdir(x)
                        ]

                        # Get a list of all tar files from the raw datastreams
                        # Retrieve the list of tar files that need to be archived
                        # Move all of the files not in the list to a sub-directory
                        tarfiles = [
                            x for x in glob("*.tar") if not os.path.isdir(x)
                        ]

                        for x in rawfiles:
                            if not os.path.exists(no_archive):
                                os.mkdir(no_archive)
                            elif not os.path.isdir(no_archive):
                                print("Failed")
                                print(
                                    "There is a file called 'no_archive' in %s."
                                )
                                print(
                                    "This file must be removed before proceeding."
                                )
                                self.config['exit'] = True
                                return self.config, self.files

                            src = dir_pattern(4).format(datastream, s, p, x)
                            try:
                                os.rename(src, no_archive)
                            except OSError:
                                shutil.move(src, no_archive)

                        for x in tarfiles:
                            if not os.path.exists(no_archive):
                                os.mkdir(no_archive)
                            elif not os.path.isdir(no_archive):
                                print("Failed")
                                print(
                                    "There is a file called 'no_archive' in %s."
                                )
                                print(
                                    "This file must be removed before proceeding."
                                )
                                self.config['exit'] = True
                                return self.config, self.files

                            if s not in tar_archive or p not in tar_archive[
                                    s] or x not in tar_archive[s][p]:
                                src = dir_pattern(4).format(
                                    datastream, s, p, x)
                                try:
                                    os.rename(src, no_archive)
                                except OSError:
                                    shutil.move(src, no_archive)

                    else:
                        # For each processed datastream
                        # Get a list of all the files
                        # Move any files that fall outside the specified date range to a sub-directory
                        if not os.path.exists(no_archive):
                            os.mkdir(no_archive)
                        elif not os.path.isdir(no_archive):
                            print("Failed")
                            print("There is a file called 'no_archive' in %s.")
                            print(
                                "This file must be removed before proceeding.")
                            self.config['exit'] = True
                            return self.config, self.files

                        # Don't include directories
                        files = [
                            x for x in os.listdir('.') if not os.path.isdir(x)
                        ]

                        timeformat = "%Y%m%d"
                        begin = datetime.strptime(str(self.config['begin']),
                                                  timeformat)
                        end = datetime.strptime(str(self.config['end']),
                                                timeformat)

                        for x in files:
                            date = x.split('.')[2]
                            filedate = datetime.strptime(date, timeformat)

                            if not (filedate >= begin and filedate <= end):
                                src = dir_pattern(4).format(
                                    datastream, s, p, x)
                                try:
                                    os.rename(src, no_archive)
                                except OSError:
                                    shutil.move(src, no_archive)

                    os.chdir('..')
                os.chdir('..')
            os.chdir(cwd)

            print("Done")
            self.config['cleanup_status']['archive']['move_files'] = True
        ############################################################
        # Read environment variables
        ############################################################
        print("Updating environment variables...", end="")

        env_path = dir_pattern().format(stage, job)

        if not update_env(env_path):
            f = Files(self.config)
            shell = f.get_shell()
            if shell == "bash":
                ext = 'sh'
            else:
                ext = 'csh'

            print("Failed")
            exit("Error: Unable to locate env.%s." % ext)

        print("Done")  # Updating Env Vars

        ############################################################
        # Ensure `DBCONNECT_PATH` does not point to job `.db_connect` file
        ############################################################
        if 'DBCONNECT_PATH' in os.environ:
            del os.environ['DBCONNECT_PATH']

        # The command should be complete up to this point,
        # however I'm waiting on a response to verify the exact name
        # of this environment variable

        ############################################################
        # Run `release_data`
        ############################################################
        print("Running release_data...", end="")

        #############################################
        # Need to change this so it supports both
        #  `sif` data and `datastream` data
        #############################################
        db = DB(self.config)

        data_paths = db.get_data_paths()

        commands = []

        for d in data_paths:
            output = d['output']
            (site, temp) = output.split('/')
            temp = temp.split('.')[0][3:]
            for i, e in reversed(list(enumerate(temp))):
                if not is_number(e):
                    fac = i
                    break
            else:
                print("Could not separate facility from %s" % temp)
                self.config['exit'] = True
                return self.config, self.files

            facility = temp[fac:]
            process = temp[:fac]
            command = ['release_data', '-s', site, '-f', facility, process]
            # Check to see if a plugin needs to modify the command
            command = self.manager.callPluginCommand(
                'hook_release_data_command_alter', command)
            commands.append(command)

        # code to run a shell command copied from other part of APM
        # Needs modified to work here

        # Run the command
        for command in commands:
            try:
                if not DEVEL:
                    ps = Popen(command, stdout=PIPE, stderr=PIPE)
                    ps.communicate()
                    returncode = ps.returncode
                    if returncode != 0:
                        print("Failed")
                        self.config['exit'] = True
                        return self.config, self.files
            except CalledProcessError as e:
                print("Failed")
                self.config['exit'] = True
                return self.config, self.files
            except Exception as e:
                raise e

        print("Done")

        # Files have been released
        self.config['cleanup_status']['archive']['files_released'] = True

        # Archive is complete
        self.config['cleanup_status']['archive']['status'] = True

        return self.config, self.files

Exemple #15

0

Afficher le fichier

    def run(self):
        config = self.config
        manager = self.manager

        if config['ingest']:
            # If staging for Ingest

            # Make sure collection does not have any files that might get overwritten
            empty = self.check_collection_empty()
            if not empty:
                print(
                    "\nFiles currently exist in your collection directory.\nPlease empty {}/{}/collection and try again.\n"
                    .format(config['stage'], config['job']))
                config['exit'] = True
                return config, self.files

            # cd to the stage directory
            os.chdir(config['stage'])

            # Check to see if a plugin needs to modify the datastream
            temp = manager.callPluginCommand('hook_datastream_alter',
                                             {'config': config})
            config = temp if temp != None else config

            # Check to see if a plugin needs to modify the SIF data
            temp = manager.callPluginCommand('hook_sif_alter',
                                             {'config': config})
            config = temp if temp != None else config

            # Establish a database connection
            db = DB(config)

            # Get the data_paths
            data_paths = db.get_data_paths()

            # Check to see if a plugin needs to modify the data_paths
            temp = manager.callPluginCommand('hook_data_paths_alter', {
                'config': config,
                'data_paths': data_paths
            })
            data_paths = temp if temp != None else data_paths

            # for each instrument
            for k, v in enumerate(data_paths):
                archive_path = v['output']
                stage_path = v['input']

                # Set tar_path and check for plugin modifications
                tar_path = '{}/{}'.format(config['source'], archive_path)
                temp = manager.callPluginCommand('hook_tar_path_alter', {
                    'config': config,
                    'tar_path': tar_path
                })
                tar_path = temp if temp != None else tar_path

                if os.path.exists(tar_path):
                    # Get a list of tar files that match specified dates
                    tar = UnPack(config, archive_path, stage_path)
                    tar_files = tar.get_tar_files()

                    temp = manager.callPluginCommand('hook_tar_files_alter',
                                                     {'config': config})
                    tar_files = temp if temp != None else tar_files

                    if tar_files and len(tar_files) > 0:
                        # compare_path = '{}/{}/.compare/{}'.format(config['stage'], config['job'], stage_path)
                        compare_path = dir_pattern(5).format(
                            config['stage'], config['job'], 'file_comparison',
                            'raw', stage_path)
                        tar_backup = dir_pattern(5).format(
                            config['stage'], config['job'], 'file_comparison',
                            'tar', stage_path)
                        collection_path = '{}/{}/collection/{}'.format(
                            config['stage'], config['job'], stage_path)

                        # Make the above paths if they don't already exist
                        if not os.path.exists(compare_path):
                            os.makedirs(compare_path)

                        if not os.path.exists(tar_backup):
                            os.makedirs(tar_backup)

                        if not os.path.exists(collection_path):
                            os.makedirs(collection_path)

                        # Copy the tar files to the backup location
                        if not tar.copy_files(tar_files, tar_backup):
                            print("Unable to copy tar files")

                        # Unpack the tar files
                        tar.extract_tar_files(tar_files)
                        has_dups = tar.handle_duplicate_files()
                        if has_dups:
                            config['duplicates'] = True

                            for i in has_dups:
                                duplicates[i] = has_dups[i]

                    else:
                        temp = tar_path.split('/')
                        if not config['quiet']:
                            print(
                                '\nData not available for {} using the dates specified'
                                .format(temp[-1]))

                else:
                    temp = tar_path.split('/')
                    if not config['quiet']:
                        print('\nData for {} does not exist.'.format(temp[-1]))

                site, process = stage_path.split('/')

                if self.files == None:
                    self.files = {}

                if site not in self.files:
                    self.files[site] = {}

                site = self.files[site]
                if process not in site:
                    site[process] = {}

                process = site[process]

                if os.path.exists(
                        dir_pattern(4).format(self.config['stage'],
                                              self.config['job'], 'collection',
                                              stage_path)):
                    files = os.listdir(
                        dir_pattern(4).format(self.config['stage'],
                                              self.config['job'], 'collection',
                                              stage_path))
                    dup_uuid = {}
                    for i in files:
                        original_name = i
                        temp = i.split('.')
                        if temp[-1][0] == 'v':
                            try:
                                int(temp[-1][1:])
                                original_name = '.'.join(temp[:-1])
                            except:
                                pass

                        process[i] = {
                            "uuid": str(uuid.uuid4()),
                            "current_name": i,
                            "original_name": original_name,
                            "stripped_name": None,
                            "processed_name": None,
                            "unpacked_name": i,
                            "duplicate_files": [],
                            "deleted": False,
                        }
                        if original_name != i:
                            dup_uuid[i] = process[i]['uuid']

                    for i in duplicates:
                        if i.startswith(data_paths[k]['input']):
                            for j in duplicates[i]:
                                site, process, name = j.split('/')
                                for l in duplicates[i]:
                                    temp = l.split('/')
                                    if j != l:
                                        self.files[site][process][name][
                                            'duplicate_files'].append(
                                                dup_uuid[temp[2]])

                    # Copy the config files from /data/conf to /<stage>/<job>/conf
                    conf_path = "/data/conf/{0}/{0}{1}{2}".format(
                        self.config['site'], self.config['instrument'],
                        self.config['facility'])
                    conf_dest = "{0}/{1}/conf/{2}".format(
                        self.config['stage'], self.config['job'],
                        self.config['site'])
                    dest_folder = "{}{}{}".format(self.config['site'],
                                                  self.config['instrument'],
                                                  self.config['facility'])
                    if not os.path.exists(conf_path):
                        conf_path = "/data/conf/{0}/{1}{2}".format(
                            self.config['site'], self.config['instrument'],
                            self.config['facility'])
                        conf_dest = "{0}/{1}/conf/{2}".format(
                            self.config['stage'], self.config['job'],
                            self.config['site'])
                        dest_folder = "{}{}".format(self.config['instrument'],
                                                    self.config['facility'])

                    if os.path.exists(conf_path):
                        if not os.path.exists(conf_dest):
                            os.makedirs(conf_dest)

                        if os.path.exists(dir_pattern().format(
                                conf_dest, dest_folder)):
                            try:
                                os.rmdir(dir_pattern().format(
                                    conf_dest, dest_folder))
                            except OSError as e:
                                if e.errno == errno.ENOTEMPTY:
                                    exit(
                                        "Unable to copy config files to {}. Destination is not empty."
                                        .format(dir_pattern().format(
                                            conf_dest, dest_folder)))
                                else:
                                    raise e

                        shutil.copytree(
                            conf_path,
                            dir_pattern().format(conf_dest, dest_folder))

            f = Files(self.config)
            src = dir_pattern(3).format(config['stage'], config['job'],
                                        'collection')
            # dst = dir_pattern(3).format(config['stage'], config['job'], '.compare')
            dst = dir_pattern(4).format(config['stage'], config['job'],
                                        'file_comparison', 'raw')
            if os.path.exists(dst):
                f.empty_dir(dst)
                os.rmdir(dst)

            shutil.copytree(src, dst)

            if len(duplicates) > 0:
                print('')
                print(
                    'The following files had naming collisions when unpacked.\nPlease verify the contents and keep only the appropriate file(s).'
                )
                print(
                    'Please do not rename files, simply delete any unwanted files.'
                )
                for i in duplicates:
                    print('')
                    for j in duplicates[i]:
                        print(j)
                print('')

            f.save_env()

        elif config['vap']:
            f = Files(self.config)
            f.save_env()

            vap = VapMgr(self.config)
            vap.add_to_env()

        return config, self.files

Exemple #16

0

Afficher le fichier

Fichier : unpack.py Projet : michaeltg12/ADC_Reproc_Toolbox

    def run(self):
        """ Unpack the tar file """
        # Setup Vars
        st_files = self.tar.st_files
        file_names = self.tar.file_names
        # files = self.tar.members
        # temp = self.config['']

        files = []
        for i in range(len(st_files)):
            files.append([])

        # Open the tar file
        tar = tarfile.open(
            dir_pattern().format(self.tar.archive_path, self.file), 'r')

        # Get the content of the tar file and check for duplicate file names
        members = tar.getmembers()

        f = Files(self.config)

        # Iterate over each tar file
        for i, m in enumerate(members):

            # Make sure arrays are not 0 length
            if len(file_names) == 0:
                file_names.append([])
            if len(files) == 0:
                files.append([])
            if len(st_files) == 0:
                st_files.append([])

            # Iterate over each entry in file_names
            # Add the file name to the correct array
            for k, v in enumerate(file_names):
                sf_names = st_files[k]
                sn = f.strip_name(m.name)
                if sn == None or sn == 'orig' or sn == 'bad':
                    sn = m.name

                if not (m.name in v or sn in sf_names):
                    file_names[k].append(m.name)
                    files[k].append(m)
                    st_files[k].append(sn)
                    break

            else:
                file_names.append([m.name])
                files.append([m])
                st_files.append([sn])

        duplicates = {}
        stripped = st_files[0]
        full_names = file_names[0]

        for i in range(1, len(file_names)):
            for k, v in enumerate(file_names[i]):
                try:
                    myIndex = stripped.index(st_files[i][k])
                except IndexError:
                    pass
                    print("\nOOPS\n")
                    print("\nI: {}\nK: {}".format(i, k))
                try:
                    key = full_names[myIndex]
                except IndexError:
                    pass
                    print("\nOOPS 2\n")

                if key not in duplicates:
                    duplicates[key] = []
                duplicates[key].append(v)

        # Extract all files
        for i in range(len(files)):
            path = None
            if i > 0:
                path = 'dup_{}'.format(i)
            else:
                path = ''

            tar.extractall(path=path, members=files[i])

        tar.close()

        self.tar.duplicates = duplicates

        return

Exemple #17

0

Afficher le fichier

    def run(self):
        """ Run the remove portion of the cleanup phase """
        self.start_time = datetime.now()
        if not self.config['cleanup_status']['review']['status']:
            print(
                "\nData must be reviewed before it can be removed from the archive."
            )
            self.config['exit'] = True
            return self.config, self.files

        stage = self.config['stage']
        job = self.config['job']

        del_file = '%s.deletion-list.txt' % job
        job_folder = dir_pattern().format(stage, job)

        exists = False
        replace = False

        # Check to see if deletion file exists
        if os.path.exists(dir_pattern().format(job_folder, del_file)):
            exists = True
            ui = UI()
            replace = ui.yn_choice(
                '%s already exists.\n Would you like to overwrite this file?' %
                del_file, 'n')

        if exists and not replace:
            return self.config, self.files

        # Either file doesn't exist or user has chosen to overwrite it
        # Create <job>.deletion-list.txt file

        # Reset statuses for this run
        for k in self.config['cleanup_status']['remove']:
            self.config['cleanup_status']['remove'][k] = False

        contents = []

        ##################################################
        # Get list of files from datastream folder
        ##################################################
        datastreams = []
        datastream_path = dir_pattern(3).format(stage, job, 'datastream')
        for site in os.listdir(datastream_path):
            path = dir_pattern().format(datastream_path, site)
            for folder in os.listdir(path):
                abs_folder = dir_pattern().format(path, folder)
                if os.path.isdir(
                        abs_folder) and not os.path.islink(abs_folder):
                    datastreams.append(folder)

        # Processed files
        p_files = {}
        for k, v in enumerate(datastreams):
            if v not in p_files:
                p_files[v] = []

            p_files[v] = os.listdir(
                dir_pattern(3).format(datastream_path, site, v))

        ##################################################
        # Update the local copy of the archive db
        ##################################################
        # print("\nUpdating list of files stored at the archive..."
        # if not DEVEL:
        # 	update_archive(datastreams)
        # print("Done"
        ##################################################
        # Get list of files from archive db
        ##################################################
        print("\nRetrieving list of relevant files stored at the archive...",
              end="")
        # Connect to the database
        archive_files = {}
        db_file = '/apps/ds/conf/datainv/.db_connect'
        alias = 'inv_read'

        if not os.path.exists(db_file):
            print(
                "\nUnable to connect to the archive database. Please try again later."
            )
            self.config['exit'] = True
            return self.config, self.files

        db = DB(self.config, db_file=db_file, alias=alias)

        # Store the query
        query = "SELECT * FROM get_remote_files_by_tag('%s') WHERE file_stamp >= %d AND file_stamp <= %d AND file_active = true ORDER BY file_stamp, file_version;"

        # List the column names so the values can be mapped in a dictionary
        cols = [
            'file_tag', 'file_name', 'file_version', 'file_size',
            'file_stored', 'file_md5', 'file_stamp', 'file_checked',
            'file_active'
        ]

        # convert the start and end dates to a unix timestamp
        start = convert_date_to_timestamp(self.config['begin'])
        end = convert_date_to_timestamp(self.config['end'])

        # Query the database for each of the datastreams
        for k, v in enumerate(datastreams):
            args = (v, start, end)
            result = db.query(query % args, columns=cols)

            if len(result) > 0:
                archive_files[v] = result
            else:
                print("\nNo results for %s" % v)

        # Unset loop variables
        if len(datastreams) > 0:
            del k, v, args, result

        print("Done")

        print("Map original tar bundle structure...", end="")
        self.maps['orig']['tar'] = self.get_tar_structure(
            dir_pattern(3).format(stage, job, "file_comparison/tar"))
        print("Done")

        if self.config['ingest']:
            # Add files to the list that should be removed from the archive
            print("\nGenerating list of files to remove from the archive...")
            sys.stdout.flush()
            ##################################################
            # Compare raw files to see if they changed
            ##################################################

            # Setup Variables for the following code to use
            file_history = self.files  # List of files as they have traveled from tar file through the ingest. Mapped by their current name

            raw_streams = [
            ]  # The datastreams that contain the raw files (ex. sgpmfrsrC1.00)

            # Setup the paths for the ingested and untarred raw files
            new_folder = dir_pattern(3).format(stage, job, 'datastream')
            old_folder = dir_pattern(3).format(stage, job,
                                               'file_comparison/raw')

            raw_files = {
            }  # container to hold a mapping of raw files in the <job>/datastream folder
            archive_tars = {
            }  # Container to hold a list of tar files at the archive

            bundle_data = False  # Does the raw data in "datastream" need to be bundled

            # Get a list of the sites in "datastream"
            for site in os.listdir(new_folder):
                raw_files[site] = {}

            # Establish a structure for the raw files in "datastream"
            #  This structure follows the same pattern as 'file_history'
            for site in raw_files:
                for instrument in glob(
                        dir_pattern(3).format(new_folder, site, '*.00')):
                    instrument = instrument.split('/')[-1]
                    raw_files[site][instrument] = {}
                    raw_streams.append(instrument)
                    for f in os.listdir(
                            dir_pattern(3).format(new_folder, site,
                                                  instrument)):
                        raw_files[site][instrument][f] = {}

            # Compare all of the existing files
            #  By comparing existing files instead of files that were unpacked
            #  we make sure to include all files and can check for files that are not being tracked
            # 		(This should never happen)
            c = Files(self.config)
            for i, s in raw_files.items():  # i = key, s = site
                for j, p in s.items():  # j = key, p = process/instrument
                    pbar = UI()
                    percent = 0
                    pbar.progress(percent)
                    count = len(p)
                    l = 1
                    for k, f in p.items():  # k = key, f = file
                        # Compare the file in 'datastream' with its counterpart in 'file_comparison/raw'
                        if k not in file_history[i][
                                j]:  # This if statement should never evaluate "True"
                            # File is not being tracked
                            # Raw files in datastream need to be rebundled
                            bundle_data = True

                            # Tar file with this raw file needs to be added to the archive
                            # Make sure the site is in the dict
                            if i not in self.archive['add']['raw']:
                                self.archive['add']['raw'][i] = {j: {}}

                            # Make sure the process is in the dict
                            if j not in self.archive['add']['raw'][i]:
                                self.archive['add']['raw'][i][j] = {}

                            # Add the file to the dict
                            self.archive['add']['raw'][i][j][k] = {}

                            continue  # Go to the next iteration of the loop (file cannot be compared because there is no counterpart)

                        # Compare the ingested raw file with the unpacked raw file
                        file_path = dir_pattern(5).format(
                            stage, job, '%s', i, j)
                        file_1 = dir_pattern().format(file_path % 'datastream',
                                                      k)
                        file_2 = dir_pattern().format(
                            file_path % 'file_comparison/raw',
                            file_history[i][j][k]['original_name'])
                        if not c.is_same_file(file_1, file_2):
                            # The files are not the same. Raw files in datastream need to be rebundled
                            bundle_data = True

                            # Ensure self.archive['remove']['raw'] has the proper structure
                            if i not in self.archive['remove']['raw']:
                                self.archive['remove']['raw'][i] = {j: []}

                            if j not in self.archive['remove']['raw'][i]:
                                self.archive['remove']['raw'][i][j] = []

                            self.archive['remove']['raw'][i][j].append(k)

                            # Make self.archive['remove']['raw'][i][j] a unique list
                            self.archive['remove']['raw'][i][j] = list(
                                set(self.archive['remove']['raw'][i][j]))

                        percent = int((float(l) / float(count)) * 100)
                        pbar.progress(percent)
                        l = l + 1

                    percent = int((float(l) / float(count)) * 100)
                    pbar.progress(percent)
                    print("")
                    sys.stdout.flush()

            # Unset loop variables
            if len(raw_files) > 0:
                del i, j, k, s, p, f, c

            if bundle_data:
                # Fill self.maps['orig']['history'] and bundle the data
                for site in file_history:
                    if site not in self.maps['orig']['history']:
                        self.maps['orig']['history'][site] = {}

                    for process in file_history[site]:
                        if process not in self.maps['orig']['history'][site]:
                            self.maps['orig']['history'][site][process] = {}

                        for f, d in file_history[site][process].items():
                            if d['original_name'] not in self.maps['orig'][
                                    'history'][site][process]:
                                self.maps['orig']['history'][site][process][
                                    d['original_name']] = d

                # Find any orig/bad files and copy them over (correcting names as necessary)
                other_files_path = dir_pattern(3).format(
                    stage, job, 'file_comparison/raw/%s/%s/%s')
                for i, s in self.maps['orig']['history'].items():
                    for j, p in s.items():
                        bad_files = glob(other_files_path % (i, j, '*.bad.*'))
                        orig_files = glob(other_files_path %
                                          (i, j, '*.orig.*'))
                        edit_files = glob(other_files_path %
                                          (i, j, '*.edit*.*'))

                        # if len(orig_files) > 0:
                        # 	pbar = UI()
                        # 	count = len(orig_files)
                        # 	pbar.progress(0)

                        for k, of in enumerate(orig_files):
                            oFile = of.split('/')[-1]
                            if oFile in p:
                                key = oFile.replace('orig', 'raw')
                                if key in p:
                                    filename = p[key]['current_name'].replace(
                                        '.raw.', '.orig.')
                                    filename = dir_pattern(6).format(
                                        stage, job, 'datastream', i, j,
                                        filename)
                                    shutil.copy(of, filename)

                            del k, of, oFile, key

                        # print(""
                        # sys.stdout.flush()

                        # if len(bad_files) > 0:
                        # 	pbar = UI()
                        # 	count = len(bad_files)
                        # 	pbar.progress(0)
                        for k, bf in enumerate(bad_files):
                            bFile = bf.split('/')[-1]
                            if bFile in p:
                                key = bFile.replace('bad', 'raw')

                                if key in p:
                                    filename = p[key]['current_name'].replace(
                                        '.raw.', '.bad.')
                                else:
                                    filename = bFile

                                filename = dir_pattern(6).format(
                                    stage, job, 'datastream', i, j, filename)
                                shutil.copy(bf, filename)

                            # # Update progress bar
                            # pbar.progress(int((float(k + 1) / float(count)) * 100))

                            del k, bf, bFile, key

                        # print(""
                        # sys.stdout.flush()

                        # if len(edit_files) > 0:
                        # 	pbar = UI()
                        # 	count = len(edit_files)
                        # 	pbar.progress(0)
                        for k, ef in enumerate(edit_files):
                            eFile = ef.split('/')[-1]
                            temp = eFile.split('.')
                            edit = None
                            for t in temp:
                                if temp[t].startswith('edit'):
                                    edit = temp[t]
                                    break

                            if eFile in p:
                                key = eFile.replace(edit, 'raw')
                                if key in p:
                                    filename = p[key]['current_name'].replace(
                                        '.raw.', ".%s." % edit)
                                    filename = dir_pattern(6).format(
                                        stage, job, 'datastream', i, j,
                                        filename)
                                    shutil.copy(ef, filename)

                            # # Update progress bar
                            # pbar.progress(int((float(k + 1) / float(count)) * 100))

                            del k, ef, eFile, edit, t, key

                        # print(""
                        # sys.stdout.flush()

                        del j, p
                    del i, s

                # Create any needed orig files
                print("Create needed orig files...")
                sys.stdout.flush()

                for i, s in self.archive['remove']['raw'].items():
                    for j, p in s.items():
                        path = dir_pattern(5).format(stage, job, "datastream",
                                                     i, j)
                        k = 0
                        count = len(p)
                        for f in p:
                            orig = f.replace('.raw.', '.orig.')
                            if not os.path.exists(dir_pattern().format(
                                    path, orig)):
                                src = dir_pattern(6).format(
                                    stage, job, "file_comparison/raw", i, j,
                                    file_history[i][j][f]['unpacked_name'])
                                dst = dir_pattern().format(path, orig)
                                shutil.copy(src, dst)
                                # del src, dst
                            percent = int((float(k) / float(count)) * 100)
                            pbar.progress(percent)
                            k = k + 1

                        if percent < 100:
                            percent = int((float(k) / float(count)) * 100)
                            pbar.progress(percent)
                        print("")

                    # Unset loop variables
                    # del i, s, j, p, path, f, orig, src, dst

                print("Done")

                # Bundle the data
                self.bundle_raw_data(raw_streams)
                self.config['cleanup_status']['remove']['files_bundled'] = True

                print("Map new tar bundle structure...", end="")
                self.maps['new']['tar'] = self.get_tar_structure(
                    dir_pattern(3).format(stage, job, "datastream"))
                print("Done")

                print("")
                print("Mapping raw structure from original tar files...",
                      end="")
                self.maps['orig']['raw'] = self.map_raw_structure(
                    self.maps['orig']['tar'])
                print("Done")

                print("Mapping raw structure from new tar files...", end="")
                self.maps['new']['raw'] = self.map_raw_structure(
                    self.maps['new']['tar'])
                print("Done")

                ##################################################
                # Find all of the tar files that need
                #   to be removed from the archive
                ##################################################
                print("")
                print(
                    "Generating list of tar files to be removed from the archive..."
                )
                sys.stdout.flush()

                # Find all of the tar files that need to be removed from the archive
                for i, s in self.archive['remove']['raw'].items():
                    percent = 0
                    for j, p in s.items():
                        pbar = UI()
                        count = len(p)
                        pbar.progress(percent)
                        k = 1
                        for raw_file in p:
                            tar_files = self.find_original_tar_bundle(
                                file_history[i][j][raw_file]['original_name'],
                                i, j)
                            for f in tar_files:
                                if f not in self.archive['remove']['tar']:
                                    tar = {
                                        'site': i,
                                        'instrument': j,
                                        'file_name': f
                                    }
                                    self.archive['remove']['tar'].append(tar)
                            percent = int((float(k) / float(count)) * 100)
                            pbar.progress(percent)
                            k = k + 1

                        if percent == 99:
                            pbar.progress(100)
                        print("")
                        sys.stdout.flush()

                # Unset loop variables
                if len(self.archive['remove']['raw']) > 0:
                    del i, s, j, p, raw_file, tar_files, f, tar

                print("Done")

                ##################################################
                # Find all of the tar files that need
                #   to be added to the archive
                ##################################################
                print("")
                print(
                    "Generating list of tar files to be added to the archive..."
                )
                pbar = UI()
                pbar.progress(0)
                count = len(self.archive['remove']['tar'])
                percent = 0
                i = 1

                # Find all of the tar files that need to be added to the archive
                for tar_file in self.archive['remove']['tar']:
                    files = self.find_all_files_from_original_tar(
                        tar_file['file_name'], tar_file['site'],
                        tar_file['instrument'])
                    for f in files:
                        temp = f
                        if not any(d['file_name'] == temp
                                   for d in self.archive['add']['tar']):
                            tar = {
                                'site': tar_file['site'],
                                'instrument': tar_file['instrument'],
                                'file_name': f
                            }

                            self.archive['add']['tar'].append(tar)
                    percent = int((float(i) / float(count)) * 100)
                    pbar.progress(percent)
                    i = i + 1

                if percent == 99:
                    pbar.progress(100)
                print("")
                sys.stdout.flush()

                # Unset loop variables
                if len(self.archive['remove']['tar']) > 0:
                    del tar_file, files, f

                for i, s in self.archive['add']['raw'].items():
                    for j, p in s.items():
                        pbar = UI()
                        pbar.progress(0)
                        percent = 0
                        count = len(p)
                        i = 1
                        for raw_file, info in p.items():
                            tar_files = self.find_original_tar_bundle(
                                raw_file, i, j)
                            for f in tar_files:
                                temp = f
                                if not any(
                                        d['file_name'] == temp
                                        for d in self.archive['add']['tar']):
                                    tar = {
                                        'site': i,
                                        'instrument': j,
                                        'file_name': f
                                    }
                                    self.archive['add']['tar'].append(tar)
                            percent = int((float(i) / float(count)) * 100)
                            pbar.progress(percent)
                            i = i + 1

                        if percent == 99:
                            pbar.progress(100)
                        print("")
                        sys.stdout.flush()

                # Unset loop variables
                if len(self.archive['add']['raw']) > 0:
                    del i, s, j, p, raw_file, info, tar_files

                    if 'f' in locals():
                        del f
                    if 'tar' in locals():
                        del tar

                ##################################################
                # Update archive db for raw datastream
                ##################################################
                if not DEVEL:
                    update_archive(raw_streams)

                # Get list of tar files from the archive
                for k, v in enumerate(raw_streams):
                    stream = dir_pattern(5).format(stage, job,
                                                   'file_comparison/tar', site,
                                                   v)
                    files = os.listdir(stream)
                    files = "','".join(files)
                    args = (v, files)
                    query = "SELECT * FROM get_remote_files_by_tag('%s') WHERE file_active = true and file_name in ('%s')"
                    result = db.query(query % args, columns=cols)

                    if len(result) > 0:
                        archive_tars[v] = result
                    else:
                        print("\nNo results for %s" % v)

                # Unset loop variables
                del k, v, args, result

                print("Done generating tar file list")

                # Find data on tar files in list and add it to 'contents'
                print("")
                print("Adding tar files to deletion list...", end="")

                for f in self.archive['remove']['tar']:
                    files = archive_tars[f['instrument']]
                    for k, v in enumerate(files):
                        if v['file_name'] == f['file_name']:
                            index = k
                            break
                    else:
                        print("\nUnable to find %s in archive db" %
                              f['file_name'])
                        self.config['exit'] = True
                        return self.config, self.files

                    temp = f['file_name']
                    if not any(d['filename'] == temp for d in contents):
                        contents.append({
                            'datastream': f['instrument'],
                            'filename': f['file_name'],
                            'hash': files[index]['file_md5'],
                            'version': files[index]['file_version']
                        })

                if len(self.archive['remove']['tar']) > 0:
                    del f, files, k, v, index
                    pass

                print("Done")

        # Set proper file names in deletion list
        print("Setting proper file names in deletion list...", end="")
        for k, v in archive_files.items():
            if k.split('.')[-1] != '00':
                for key, f in enumerate(v):
                    if f['file_name'] not in p_files[k]:
                        temp = f['file_name']
                        pass
                        if not any(d['filename'] == temp for d in contents):
                            contents.append({
                                'datastream': k,
                                'filename': f['file_name'],
                                'hash': f['file_md5'],
                                'version': f['file_version']
                            })

        print("Done")

        # Store the list of files that need to be archived to file
        archive_json_file = dir_pattern(3).format(stage, job, 'archive.json')
        fp = open(archive_json_file, 'w')
        fp.write(
            json.dumps(self.archive['add']['tar'],
                       indent=2,
                       sort_keys=False,
                       separators=(',', ': ')))
        fp.close()
        del fp

        # Update the saved status
        self.config['cleanup_status']['remove']['archive_list'] = True

        ##################################################
        # Write the results to file
        # (Use '\r\n' for Windows line endings)
        ##################################################
        print("\nEmailing deletion list...", end="")
        sys.stdout.flush()
        file_contents = []

        contents = sorted(contents, key=self.get_sort_key)

        for line in contents:
            l = "%s.v%s %s" % (line['filename'], line['version'], line['hash'])
            file_contents.append(l)

        fp = open(dir_pattern().format(job_folder, del_file), 'w')
        fp.write("\r\n".join(file_contents))
        fp.close()
        del fp

        # Update the saved status
        self.config['cleanup_status']['remove']['deletion_list'] = True

        # Send the deletion list to the appropriate place (currently email, may be upload at a later time)
        self.email_del_list("%s.deletion-list.txt" % self.config['job'])
        # self.upload_del_list()

        print("Done")

        # Update the saved status
        self.config['cleanup_status']['remove']['status'] = True

        duration = datetime.now() - self.start_time
        print(duration)

        return self.config, self.files

Exemple #18

0

Afficher le fichier

    def setup_alias(self, db_file, alias=None, level=1):
        """ Make sure proper alias exists. Create apm alias if it doesn't exist. """
        if not alias:
            als = 'apm' if not self.config['alias'] else self.config['alias']
            del alias
        else:
            als = alias
            del alias

        ######################################################################
        # Remove this code when vapmgr accepts -a argument
        ######################################################################
        if self.config['vap']:
            als = 'vapmgr'
        ######################################################################

        fp = open(db_file, 'r')
        contents = fp.read()
        fp.close()

        lines = contents.split('\n')

        for line in lines:
            words = line.split()
            if len(words) == 5 and words[0] != '#' and not words[0].startswith('#'):
                (alias, host, database, user, password) = words
                if alias == als:
                    break
        else:
            # Doesn't have the specified alias
            if als == 'apm':
                # Doesn't have alias 'apm'
                alias = 'apm'
                host = 'pgdb.dmf.arm.gov'
                database = 'dsdb_reproc'
                user = '******'

                # Ask user for password
                if level == 1:
                    print("Alias '{}' does not exist. Please enter a password for user '{}'".format(alias, user))

                password = getpass.getpass()

                if not password:
                    if level < 3:
                        print("Error: unable to create alias '{}' without passowrd.\nPlease enter a password".format(alias))
                        self.setup_alias(db_file, alias=alias, level=level+1)

                    else:
                        print("Error: unable to create alias '{}' without password. Please try again.".format(alias))
                        return False

                else:
                    # validate provided password
                    # Write temp file
                    tmp_db_file = dir_pattern(3).format(self.config['stage'], self.config['job'], '.db_connect.tmp')
                    fp = open(tmp_db_file, 'w')
                    fp.write('{}  {}  {}  {}  {}'.format(alias, host, database, user, password))
                    fp.close()

                    if self.validate_alias(alias, tmp_db_file):
                        os.remove(tmp_db_file)
                    else:
                        if level < 3:
                            os.remove(tmp_db_file)
                            print("Error: invalid password provided.\nPlease enter a password")
                            return self.setup_alias(db_file, alias=alias, level=level+1)

                        else:
                            os.remove(tmp_db_file)
                            print("Error: invalid password provided, please try again")
                            return False

                # Write alias to file
                db_creds = '{}  {}  {}  {}  {}\n'.format(alias, host, database, user, password)

                fp = open(db_file, 'a')
                fp.write(db_creds)
                fp.close()

            else:
                # Alias is specified but is not 'apm'
                print("Unable to find alias '{}'. Please update .db_connect and try again.".format(als))
                return False

        return True

Exemple #19

0

Afficher le fichier

Fichier : cleanup.py Projet : michaeltg12/ADC_Reproc_Toolbox

    def run(self):
        """ Run the cleanup portion of the cleanup phase """
        if self.config['cleanup_status']['archive']['status'] != True:
            print("Data files must be archived before they can be cleaned up.")
            self.config['exit'] = True
            return self.config, self.files

        stage = self.config['stage']
        job = self.config['job']

        ################################################################################
        # Update local archive database
        ################################################################################
        if not self.config['cleanup_status']['cleanup']['files_archived']:
            print("Updating local copy of the archive...", end="")
            # Setup the datastreams to update
            datastreams = []
            datastream_path = dir_pattern(3).format(stage, job, 'datastream')
            for site in os.listdir(datastream_path):
                path = dir_pattern().format(datastream_path, site)
                for folder in os.listdir(path):
                    abs_folder = dir_pattern().format(path, folder)
                    if os.path.isdir(
                            abs_folder) and not os.path.islink(abs_folder):
                        datastreams.append(folder)

            # Update the local copy of the archive db
            if not DEVEL:
                update_archive(datastreams)

            print("Done")
            ################################################################################
            # Verify that all files to be added to the archive, were added
            ################################################################################
            print(
                "Verifying processed and bundled files have been archived...",
                end="")
            cwd = os.getcwd()

            archive_files = {}
            db_file = '/apps/ds/conf/datainv/.db_connect'
            alias = 'inv_read'

            if not os.path.exists(db_file):
                print("Failed")
                print(
                    "Unable to connect to the archive database. Please try again later."
                )
                self.config['exit'] = True
                return self.config, self.files

            db = DB(self.config, db_file=db_file, alias=alias)

            # Store the query
            query = "SELECT * FROM get_remote_files_by_tag('%s') WHERE file_stamp >= %d AND file_stamp <= %d AND file_active = true ORDER BY file_stamp, file_version;"

            # List the column names so the values can be mapped in a dictionary
            cols = [
                'file_tag', 'file_name', 'file_version', 'file_size',
                'file_stored', 'file_md5', 'file_stamp', 'file_checked',
                'file_active'
            ]

            # convert the start and end dates to a unix timestamp
            start = convert_date_to_timestamp(self.config['begin'])
            end = convert_date_to_timestamp(self.config['end'])

            archive_file = dir_pattern(3).format(stage, job,
                                                 'current_archive.json')
            fp = open(archive_file, 'r')
            oArch = json.loads(fp.read())
            fp.close()
            del fp

            os.chdir(datastream_path)
            for site in os.listdir('.'):
                path = dir_pattern().format(datastream_path, site)
                os.chdir(site)

                for folder in os.listdir('.'):
                    os.chdir(folder)

                    args = (folder, start, end)
                    result = db.query(query % args, columns=cols)

                    for f in os.listdir('.'):
                        if not os.path.isdir(dir_pattern().format(
                                os.getcwd(), f)):
                            try:
                                new_version = next(d['file_version']
                                                   for d in result
                                                   if d['file_name'] == f)
                                old_version = next(o['file_version']
                                                   for o in oArch[folder]
                                                   if o['file_name'] == f)
                                if not new_version > old_version:
                                    print("Failed")
                                    print(
                                        "Not all files have been successfully archived. Please try again later."
                                    )
                                    self.config['exit'] = True
                                    return self.config, self.files
                            except StopIteration:
                                pass

                    os.chdir('..')
                os.chdir('..')

            os.chdir(cwd)
            self.config['cleanup_status']['cleanup']['files_archived'] = True
            print("Done")

        ################################################################################
        # Remove all files from `<job>/datastream`
        ################################################################################
        if not self.config['cleanup_status']['cleanup']['files_cleaned_up']:
            print("Cleaning up project files...", end="")
            # Remove archive.json
            # Remove current_archive.json
            # Remove <job>.deletion-list.txt

            f = Files(self.config)
            path = dir_pattern().format(stage, job)
            delete = [
                "datastream",
                "collection",
                "file_comparison/raw",
                "file_comparison/tar",
                'archive.json',
                'current_archive.json',
                '%s.deletion-list.txt' % job,
            ]

            try:
                for i in delete:
                    item = dir_pattern().format(path, i)
                    if os.path.exists(item):
                        if os.path.isdir(item):
                            f.empty_dir(item)
                        elif os.path.isfile(item):
                            os.remove(item)

            except:
                print("Failed")
                print(
                    "Unable to cleanup all files. Please try again, or cleanup project manually."
                )
                self.config['exit'] = True
                return self.config, self.files

            print("Done")
            self.config['cleanup_status']['cleanup']['files_cleaned_up'] = True

        self.config['cleanup_status']['cleanup']['status'] = True
        return self.config, self.files

Exemple #20

0

Afficher le fichier

    def run(self):
        """ Run the process phase """
        if DEVEL:
            pass
        home = os.path.expanduser('~')

        # Update env variables
        print("\nUpdating environment vars...", end="")
        sys.stdout.flush()

        if not update_env(dir_pattern().format(self.config['stage'], self.config['job'])):
            shell = get_shell()
            if shell == "bash":
                ext = 'sh'
            else:
                ext = 'csh'

            print("Fail")
            exit("Error: Unable to locate env.%s." % ext)
        print("Done") # Updating Env Vars

        # Check for .db_connect file
        print("\nLocating .db_connect...", end="")
        sys.stdout.flush()

        db_file = dir_pattern().format(home, ".db_connect")

        if not os.path.exists(db_file):
            fp = open(db_file, 'w')
            fp.close()

        # Check for apm or user specified alias
        if not self.setup_alias(db_file):
            exit()
        print("Done")

        if self.config['ingest']:
            ##################################################
            # START INGEST PROCESSING
            ##################################################

            # Find the ingest executable for each process
            print("\nLocating ingest executable...", end="")
            sys.stdout.flush()

            processes = self.db.get_data_paths()

            for k, v in enumerate(processes):
                ingest, multiple = self.find_ingest_exec(v['proc'])

                if not ingest:
                    print("Fail")
                    exit("Unable to find Ingest executable for {}".format(v['proc']))
                else:
                    processes[k]['ingest'] = ingest
                    processes[k]['multiple'] = multiple

            # Add a plugin spot to update the ingest as necessary
            # Then create the IRT ingest plugin
            # Check to see if a plugin needs to modify the datastream
            temp = self.manager.callPluginCommand('hook_ingest_alter', {'processes': processes})
            processes = temp if temp != None else processes

            print("Done")
            print("")
            db_commands = []
            # Update the database

            if self.config['db_up'] != False:
                print("\nUpdating the database...", end="")
                sys.stdout.flush()

                for process in processes:
                    if not self.update_db(process):
                        print("Fail")
                        exit("ERROR: Unable to update database for {}".format(process['proc']))

                print("Done")

            print("\nExecuting ingest processes...", end="")
            sys.stdout.flush()

            # Execute an Ingest process
            threads = {}
            status = {}

            done = False


            while not done:
                done = True
                for k,v in enumerate(processes):
                    # Make sure all needed keys exist
                    if 'complete' not in v:
                        processes[k]['complete'] = False
                        v['complete'] = False

                    key = v['ingest'].split('/')[-1].split('_')[0]
                    if (key not in threads or status[key] == True) and v['complete'] == False:
                        done = False
                        status[key] = False
                        threads[key] = Ingest(v, self.config, k)
                        if not threads[key]:
                            exit("ERROR: Ingest object not created")
                        threads[key].start()
                    elif threads[key].is_alive():
                        done = False
                    elif (not threads[key].is_alive()) and (v['complete'] == False or status[key] == False):
                        status[key] = True
                        processes[threads[key].key]['complete'] = True
                        processes[threads[key].key]['result'] = threads[key].result
                        result = processes[threads[key].key]['result']
                        # Notify the user if there was an error, that way they can correct
                        # 	the error and run again or run manually if needed
                        if threads[key].error != False:
                            print("There was an error: ", end="")
                            print(threads[key].error)

                        if self.files and result:
                            for i,site in result.items():
                                for j,sif in site.items():
                                    for k,name in sif.items():
                                        if k in self.files[i][j]:
                                            self.files[i][j][name] = self.files[i][j][k]
                                            self.files[i][j].pop(k)
                                            self.files[i][j][name]['processed_name'] = name
                                            self.files[i][j][name]['current_name'] = name

                    elif v['complete'] == True:
                        pass
                    else:
                        print("OOPS forgot a status")


            print("Done")

            ##################################################
            # END INGEST PROCESSING
            ##################################################
        elif self.config['vap']:
            ##################################################
            # START VAP PROCESSING
            ##################################################
            print('Running vapmgr...', end="")
            sys.stdout.flush()

            starttime = None
            endtime = None
            is_success = None

            # Make sure vappath is in the path env variable
            syspath = os.environ.get('PATH')
            syspath = syspath.split(':')
            if binpath not in syspath:
                syspath.append(binpath)
                syspath = ':'.join(syspath)
                os.environ['PATH'] = syspath

            ###############################################
            # Remove this code for production
            ###############################################

            vaphome = binpath.split('/')[:-1]
            vaphome = '/'.join(vaphome)
            os.environ['VAP_HOME'] = vaphome

            ###############################################

            # Run vapmgr setup to create any needed aliases
            setup = [
                '%s/vapmgr' % vappath,
                '-setup',
                '-r',
                '%s.%s' % (self.config['site'], self.config['facility']),
                self.config['instrument']
            ]

            ps = Popen(setup, stdout=PIPE, stderr=PIPE)
            (output, error) = ps.communicate()
            returncode = ps.returncode


            if returncode != 0:
                print("ERROR: Unable to setup vapmgr")
                print("")
                print(error)
                exit()

            # Run vapmgr to process the vaps
            starttime = datetime.now().replace(microsecond=0)

            command = [
                '%s/vapmgr' % vappath,
                '-r',
                '%s.%s' % (self.config['site'], self.config['facility']),
                '-start',
                str(self.config['begin']),
                '-end',
                str(self.config['end']),
                '-force',
                self.config['instrument']
            ]

            ps = Popen(command, stdout=PIPE, stderr=PIPE)
            (output, error) = ps.communicate()
            returncode = ps.returncode
            endtime = datetime.now().replace(microsecond=0)

            if returncode != 0:
                print("ERROR: Error running vapmgr")
                print("")
                print(error)
                exit()


            # vapmgr ran successfully
            # Find out what log files need parsed
            path = dir_pattern(5).format(self.config['stage'], self.config['job'], 'logs', self.config['site'], '%s_logs')
            proc_path = path % 'proc'
            instr_path = path % 'instr'

            vaplogs = []
            vapmgrlogs = []
            vapmgrqclogs = []

            year = str(starttime.year).zfill(4)
            month = str(starttime.month).zfill(2)

            regex_log_file_pattern = '%s.*%s.*%s\.%s%s00.000000.%s'

            proc = regex_log_file_pattern % (self.config['site'], self.config['instrument'], self.config['facility'], year, month, 'VAP')
            instr = regex_log_file_pattern % (self.config['site'], self.config['instrument'], self.config['facility'], year, month, 'vapmgrlog')
            instrqc = regex_log_file_pattern % (self.config['site'], self.config['instrument'], self.config['facility'], year, month, 'vapmgrqclog')

            # vap logs don't always exist. Need to check to make sure they do before trying to access them
            if os.path.exists(proc_path):
                vaplog_dirs = os.listdir(proc_path)
                for d in vaplog_dirs:
                    tmp = os.listdir(dir_pattern().format(proc_path, d))
                    for i in tmp:
                        if re.search(proc, i):
                            vaplogs.append(dir_pattern(3).format(proc_path, d, i))



            if not os.path.exists(instr_path):
                exit("Unable to find vapmgr log files")

            vapmgrlog_dirs = os.listdir(instr_path)
            for d in vapmgrlog_dirs:
                tmp = os.listdir(dir_pattern().format(instr_path, d))
                for i in tmp:
                    if re.search(instr, i):
                        vapmgrlogs.append(dir_pattern(3).format(instr_path, d, i))
                    elif re.search(instrqc, i):
                        vapmgrqclogs.append(dir_pattern(3).format(instr_path, d, i))


            logs = {}

            # Parse VAP log file
            if len(vaplogs) > 0:
                for k,log in enumerate(vaplogs):
                    temp = self.parse_vap_log(log, starttime, endtime)
                    logs['vap'] = []
                    for i in temp:
                        i['log_file'] = vaplogs[k]
                        logs['vap'].append(i)


            # Parse vapmgr log file
            if len(vapmgrlogs) > 0:
                for k,log in enumerate(vapmgrlogs):
                    temp = self.parse_vapmgr_log(log, starttime, endtime)
                    logs['vapmgr'] = []
                    for i in temp:
                        i['log_file'] = vapmgrlogs[k]
                        logs['vapmgr'].append(i)

            # Parse vapmgrqclog
            if len(vapmgrqclogs) > 0:
                for k,log in enumerate(vapmgrqclogs):
                    temp = self.parse_vapmgr_log(log, starttime, endtime, qc=True)
                    logs['vapmgrqc'] = []
                    for i in temp:
                        i['log_file'] = vapmgrqclogs[k]
                        logs['vapmgrqc'].append(i)

            print('Done')


            if 'vap' in logs and len(logs['vap']) > 0:
                print('')
                print("VAP Results")

                for k,log in enumerate(logs['vap']):
                    print("Running: %s for %s..." % (log['process'], log['dates']), end="")
                    if log['status']:
                        print(log['message'])
                    else:
                        print("ERROR")
                        print("\tFor more information see the log entry starting on line %d of the following log file:\n\t %s" % (log['line_number'], log['log_file']))

            elif 'vapmgr' in logs and len(logs['vapmgr']) > 0:
                print('')
                print("VapMGR Results")

                for log in logs['vapmgr']:
                    print(self.vapmgr_log_results(log, 'output'))

            if 'vapmgr' in logs and len(logs['vapmgr']) > 0:
                print('')
                print("VapMGR Quicklooks Results")

                for log in logs['vapmgr']:
                    print(self.vapmgr_log_results(log, 'quicklooks'))


            if 'vapmgrqc' in logs and len(logs['vapmgrqc']) > 0:
                print('')
                print("VapMGRQC Results")

                for log in logs['vapmgrqc']:
                    print(self.vapmgr_log_results(log, 'output'))

            print('')

            ##################################################
            # END VAP PROCESSING
            ##################################################

        return self.config, self.files