def scan_archive_struct_stninfo(self, rootdir): # same as scan archive struct but looks for station info files self.archiveroot = rootdir stninfo = [] path2stninfo = [] for path, dirs, files in scandir.walk(rootdir): for file in files: file_path = os.path.join(path, file) if file.endswith(".info"): # only add valid rinex compressed files stninfo.append(file_path.rsplit(rootdir + '/')[1]) path2stninfo.append(file_path) elif file.endswith('DS_Store') or file.startswith('._'): # delete the stupid mac files file_try_remove(file_path) return stninfo, path2stninfo
def cleanup(self): if self.clk_path and not self.no_cleanup: # delete files file_try_remove(self.clk_path)
def main(): # put connection and config in global variable to use inside callback_handle global cnn global repository_data_in # bind to the repository directory parser = argparse.ArgumentParser( description='Archive operations Main Program') parser.add_argument( '-purge', '--purge_locks', action='store_true', help= "Delete any network starting with '?' from the stations table and purge the contents of " "the locks table, deleting the associated files from data_in.") parser.add_argument('-np', '--noparallel', action='store_true', help="Execute command without parallelization.") args = parser.parse_args() Config = pyOptions.ReadOptions('gnss_data.cfg') repository_data_in = Config.repository_data_in if not os.path.isdir(Config.repository): print("the provided repository path in gnss_data.cfg is not a folder") exit() JobServer = pyJobServer.JobServer(Config, run_parallel=not args.noparallel, software_sync=[ Config.options['ppp_remote_local'] ]) # type: pyJobServer.JobServer # create the execution log cnn.insert('executions', script='ArchiveService.py') # set the data_xx directories data_in = os.path.join(Config.repository, 'data_in') data_in_retry = os.path.join(Config.repository, 'data_in_retry') data_reject = os.path.join(Config.repository, 'data_rejected') # if if the subdirs exist for path in (data_in, data_in_retry, data_reject): if not os.path.isdir(path): os.makedirs(path) # delete any locks with a NetworkCode != '?%' cnn.query('DELETE FROM locks WHERE "NetworkCode" NOT LIKE \'?%\'') # get the locks to avoid reprocessing files that had no metadata in the database locks = cnn.query('SELECT * FROM locks').dictresult() if args.purge_locks: # first, delete all associated files for lock in tqdm(locks, ncols=160, unit='crz', desc='%-30s' % ' >> Purging locks', disable=None): file_try_remove( os.path.join(Config.repository_data_in, lock['filename'])) # purge the contents of stations. This will automatically purge the locks table cnn.query('delete from stations where "NetworkCode" like \'?%\'') # purge the networks cnn.query('delete from networks where "NetworkCode" like \'?%\'') # purge the locks already taken care of (just in case) cnn.query('delete from locks where "NetworkCode" not like \'?%\'') # get the locks to avoid reprocessing files that had no metadata in the database locks = cnn.query('SELECT * FROM locks').dictresult() # look for data in the data_in_retry and move it to data_in archive = pyArchiveStruct.RinexStruct(cnn) pbar = tqdm(desc='%-30s' % ' >> Scanning data_in_retry', ncols=160, unit='crz', disable=None) rfiles, paths, _ = archive.scan_archive_struct(data_in_retry, pbar) pbar.close() pbar = tqdm(desc='%-30s' % ' -- Moving files to data_in', total=len(rfiles), ncols=160, unit='crz', disable=None) for rfile, path in zip(rfiles, paths): dest_file = os.path.join(data_in, rfile) # move the file into the folder Utils.move(path, dest_file) pbar.set_postfix(crinez=rfile) pbar.update() # remove folder from data_in_retry (also removes the log file) # remove the log file that accompanies this CRINEZ file file_try_remove( pyRinexName.RinexNameFormat(path).filename_no_ext() + '.log') pbar.close() tqdm.write(' -- Cleaning data_in_retry') remove_empty_folders(data_in_retry) # take a break to allow the FS to finish the task time.sleep(5) files_list = [] pbar = tqdm(desc='%-30s' % ' >> Repository CRINEZ scan', ncols=160, disable=None) rpaths, _, files = archive.scan_archive_struct(data_in, pbar) pbar.close() pbar = tqdm(desc='%-30s' % ' -- Checking the locks table', total=len(files), ncols=130, unit='crz', disable=None) locks = set(lock['filename'] for lock in locks) for file, path in zip(files, rpaths): pbar.set_postfix(crinez=file) pbar.update() if path not in locks: files_list.append((path, file)) pbar.close() tqdm.write(" -- Found %i files in the lock list..." % len(locks)) tqdm.write(" -- Found %i files (matching RINEX 2/3 format) to process..." % len(files_list)) pbar = tqdm(desc='%-30s' % ' >> Processing repository', total=len(files_list), ncols=160, unit='crz', disable=None) # dependency functions depfuncs = (check_rinex_timespan_int, write_error, error_handle, insert_data, verify_rinex_multiday, file_append, file_try_remove, file_open) # import modules JobServer.create_cluster(process_crinex_file, depfuncs, callback_handle, pbar, modules=('pyRinex', 'pyArchiveStruct', 'pyOTL', 'pyPPP', 'pyStationInfo', 'dbConnection', 'Utils', 'os', 'uuid', 'datetime', 'pyDate', 'numpy', 'traceback', 'platform', 'pyBrdc', 'pyProducts', 'pyOptions', 'pyEvents', 'pyRinexName')) for file_to_process, sfile in files_list: JobServer.submit(file_to_process, sfile, data_reject, data_in_retry) JobServer.wait() pbar.close() JobServer.close_cluster() print_archive_service_summary() # iterate to delete empty folders remove_empty_folders(data_in)