def scan_archive_struct_stninfo(self, rootdir):

        # same as scan archive struct but looks for station info files
        self.archiveroot = rootdir

        stninfo = []
        path2stninfo = []
        for path, dirs, files in scandir.walk(rootdir):
            for file in files:
                file_path = os.path.join(path, file)
                if file.endswith(".info"):
                    # only add valid rinex compressed files
                    stninfo.append(file_path.rsplit(rootdir + '/')[1])
                    path2stninfo.append(file_path)
                elif file.endswith('DS_Store') or file.startswith('._'):
                    # delete the stupid mac files
                    file_try_remove(file_path)

        return stninfo, path2stninfo
Example #2
0
 def cleanup(self):
     if self.clk_path and not self.no_cleanup:
         # delete files
         file_try_remove(self.clk_path)
Example #3
0
def main():

    # put connection and config in global variable to use inside callback_handle
    global cnn
    global repository_data_in

    # bind to the repository directory
    parser = argparse.ArgumentParser(
        description='Archive operations Main Program')

    parser.add_argument(
        '-purge',
        '--purge_locks',
        action='store_true',
        help=
        "Delete any network starting with '?' from the stations table and purge the contents of "
        "the locks table, deleting the associated files from data_in.")

    parser.add_argument('-np',
                        '--noparallel',
                        action='store_true',
                        help="Execute command without parallelization.")

    args = parser.parse_args()

    Config = pyOptions.ReadOptions('gnss_data.cfg')

    repository_data_in = Config.repository_data_in

    if not os.path.isdir(Config.repository):
        print("the provided repository path in gnss_data.cfg is not a folder")
        exit()

    JobServer = pyJobServer.JobServer(Config,
                                      run_parallel=not args.noparallel,
                                      software_sync=[
                                          Config.options['ppp_remote_local']
                                      ])  # type: pyJobServer.JobServer

    # create the execution log
    cnn.insert('executions', script='ArchiveService.py')

    # set the data_xx directories
    data_in = os.path.join(Config.repository, 'data_in')
    data_in_retry = os.path.join(Config.repository, 'data_in_retry')
    data_reject = os.path.join(Config.repository, 'data_rejected')

    # if if the subdirs exist
    for path in (data_in, data_in_retry, data_reject):
        if not os.path.isdir(path):
            os.makedirs(path)

    # delete any locks with a NetworkCode != '?%'
    cnn.query('DELETE FROM locks WHERE "NetworkCode" NOT LIKE \'?%\'')
    # get the locks to avoid reprocessing files that had no metadata in the database
    locks = cnn.query('SELECT * FROM locks').dictresult()

    if args.purge_locks:
        # first, delete all associated files
        for lock in tqdm(locks,
                         ncols=160,
                         unit='crz',
                         desc='%-30s' % ' >> Purging locks',
                         disable=None):
            file_try_remove(
                os.path.join(Config.repository_data_in, lock['filename']))

        # purge the contents of stations. This will automatically purge the locks table
        cnn.query('delete from stations where "NetworkCode" like \'?%\'')
        # purge the networks
        cnn.query('delete from networks where "NetworkCode" like \'?%\'')
        # purge the locks already taken care of (just in case)
        cnn.query('delete from locks where "NetworkCode" not like \'?%\'')
        # get the locks to avoid reprocessing files that had no metadata in the database
        locks = cnn.query('SELECT * FROM locks').dictresult()

    # look for data in the data_in_retry and move it to data_in

    archive = pyArchiveStruct.RinexStruct(cnn)

    pbar = tqdm(desc='%-30s' % ' >> Scanning data_in_retry',
                ncols=160,
                unit='crz',
                disable=None)

    rfiles, paths, _ = archive.scan_archive_struct(data_in_retry, pbar)

    pbar.close()

    pbar = tqdm(desc='%-30s' % ' -- Moving files to data_in',
                total=len(rfiles),
                ncols=160,
                unit='crz',
                disable=None)

    for rfile, path in zip(rfiles, paths):

        dest_file = os.path.join(data_in, rfile)

        # move the file into the folder
        Utils.move(path, dest_file)

        pbar.set_postfix(crinez=rfile)
        pbar.update()

        # remove folder from data_in_retry (also removes the log file)
        # remove the log file that accompanies this CRINEZ file
        file_try_remove(
            pyRinexName.RinexNameFormat(path).filename_no_ext() + '.log')

    pbar.close()
    tqdm.write(' -- Cleaning data_in_retry')
    remove_empty_folders(data_in_retry)

    # take a break to allow the FS to finish the task
    time.sleep(5)

    files_list = []

    pbar = tqdm(desc='%-30s' % ' >> Repository CRINEZ scan',
                ncols=160,
                disable=None)

    rpaths, _, files = archive.scan_archive_struct(data_in, pbar)

    pbar.close()

    pbar = tqdm(desc='%-30s' % ' -- Checking the locks table',
                total=len(files),
                ncols=130,
                unit='crz',
                disable=None)

    locks = set(lock['filename'] for lock in locks)
    for file, path in zip(files, rpaths):
        pbar.set_postfix(crinez=file)
        pbar.update()
        if path not in locks:
            files_list.append((path, file))

    pbar.close()

    tqdm.write(" -- Found %i files in the lock list..." % len(locks))
    tqdm.write(" -- Found %i files (matching RINEX 2/3 format) to process..." %
               len(files_list))

    pbar = tqdm(desc='%-30s' % ' >> Processing repository',
                total=len(files_list),
                ncols=160,
                unit='crz',
                disable=None)

    # dependency functions
    depfuncs = (check_rinex_timespan_int, write_error, error_handle,
                insert_data, verify_rinex_multiday, file_append,
                file_try_remove, file_open)
    # import modules
    JobServer.create_cluster(process_crinex_file,
                             depfuncs,
                             callback_handle,
                             pbar,
                             modules=('pyRinex', 'pyArchiveStruct', 'pyOTL',
                                      'pyPPP', 'pyStationInfo', 'dbConnection',
                                      'Utils', 'os', 'uuid', 'datetime',
                                      'pyDate', 'numpy', 'traceback',
                                      'platform', 'pyBrdc', 'pyProducts',
                                      'pyOptions', 'pyEvents', 'pyRinexName'))

    for file_to_process, sfile in files_list:
        JobServer.submit(file_to_process, sfile, data_reject, data_in_retry)

    JobServer.wait()

    pbar.close()

    JobServer.close_cluster()

    print_archive_service_summary()

    # iterate to delete empty folders
    remove_empty_folders(data_in)