Exemple #1
0
    def log_job_error(msg):
        tqdm.write(' -- There were unhandled errors during this batch. '
                   'Please check errors_pyArchiveService.log for details')

        # function to print any error that are encountered during parallel execution
        file_append(
            'errors_pyArchiveService.log',
            'ON ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') +
            ' an unhandled error occurred:\n' + msg + '\n' +
            'END OF ERROR =================== \n\n')
Exemple #2
0
def gamit_callback(job):

    result = job.result

    if result is not None:
        msg = []
        if 'error' not in result.keys():
            if result['nrms'] > 1:
                msg.append('    > NRMS > 1.0 (%.3f)' % result['nrms'])

            if result['wl'] < 60:
                msg.append('    > WL fixed < 60 (%.1f)' % result['wl'])

            if result['missing']:
                msg.append('    > Missing sites in solution: ' +
                           ', '.join(result['missing']))

            # DDG: only show sessions with problems to facilitate debugging.
            if result['success']:
                if len(msg) > 0:
                    tqdm.write(
                        ' -- %s Done processing: %s -> WARNINGS:\n%s' %
                        (print_datetime(), result['session'], '\n'.join(msg)))

                # insert information in gamit_stats
                try:
                    cnn = dbConnection.Cnn(
                        'gnss_data.cfg')  # type: dbConnection.Cnn
                    cnn.insert('gamit_stats', result)
                    cnn.close()
                except dbConnection.dbErrInsert as e:
                    tqdm.write(
                        ' -- %s Error while inserting GAMIT stat for %s: ' %
                        (print_datetime(), result['session'] + ' ' + str(e)))

            else:
                tqdm.write(' -- %s Done processing: %s -> FATAL:\n'
                           '    > Failed to complete. Check monitor.log:\n%s' %
                           (print_datetime(), result['session'],
                            indent('\n'.join(result['fatals']), 4)))
                # write FATAL to file
                file_append(
                    'FATAL.log',
                    'ON %s session %s -> FATAL: Failed to complete. Check monitor.log\n%s\n'
                    % (print_datetime(), result['session'],
                       indent('\n'.join(result['fatals']), 4)))
        else:
            tqdm.write(
                ' -- %s Error in session %s message from node follows -> \n%s'
                % (print_datetime(), result['session'], result['error']))

    else:
        tqdm.write(
            ' -- %s Fatal error on node %s message from node follows -> \n%s' %
            (print_datetime(), job.ip_addr, job.exception))
Exemple #3
0
def write_error(folder, filename, msg):
    # @todo why retries are used?
    # do append just in case...
    count = 0
    while True:
        try:
            file_append(os.path.join(folder, filename), msg)
            return
        except IOError as e:
            if count < 3:
                count += 1
            else:
                raise IOError(str(e) + ' after 3 retries')
Exemple #4
0
def output_handle(callback):

    messages = [outmsg.errors for outmsg in callback]

    if len([out_msg for out_msg in messages if out_msg]) > 0:
        tqdm.write(
            ' >> There were unhandled errors during this batch. Please check errors_pyScanArchive.log for details'
        )

    # function to print any error that are encountered during parallel execution
    for msg in messages:
        if msg:
            file_append(
                'errors_amend.log',
                'ON ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') +
                ' an unhandled error occurred:\n' + msg + '\n' +
                'END OF ERROR =================== \n\n')
    return []
Exemple #5
0
    def finish(self):
        try:
            # delete everything inside the processing dir
            shutil.rmtree(self.pwd_brdc)
            shutil.rmtree(self.pwd_igs)

            # remove files in tables
            for ftype in ('*.grid', '*.dat', '*.apr'):
                for ff in glob.glob(os.path.join(self.pwd_tables, ftype)):
                    os.remove(ff)

            # remove processing files
            for ftype in ('b*', 'cfmrg*', 'DPH.*', 'eq_rename.*', 'g*', 'k*', 'p*', 'rcvant.*', 'y*'):
                for ff in glob.glob(os.path.join(os.path.join(self.pwd, self.date.ddd()), ftype)):
                    os.remove(ff)

            try:
                if not os.path.exists(os.path.dirname(self.solution_pwd)):
                    os.makedirs(os.path.dirname(self.solution_pwd))
            except OSError:
                # racing condition having several processes trying to create the same folder
                # if OSError occurs, ignore and continue
                pass

            # the solution folder exists because it was created by GamitSession to start the processing.
            # erase it to upload the result
            if os.path.exists(self.solution_pwd):
                shutil.rmtree(self.solution_pwd)

            # execute final step: copy to self.solution_pwd
            shutil.copytree(self.pwd, self.solution_pwd, symlinks=True)
            # remove the remote pwd
            shutil.rmtree(self.pwd)

        except:
            msg = traceback.format_exc() + '\nProcessing %s date %s on node %s' \
                  % (self.params['NetName'], self.date.yyyyddd(), platform.node())

            file_append(os.path.join(self.pwd, 'monitor.log'), 
                        now_str() +
                        ' -> ERROR in pyGamitTask.finish()\n%s' % msg)
Exemple #6
0
def output_handle(job):

    if job.result is not None:
        apr = job.result[0]
        stninfo = job.result[1]
        log = job.result[2]
        metafile = job.result[3]

        # write the APR and sigmas
        # writen in ENU not NEU, as specified by Abel
        meta_path = './' + metafile
        if apr is not None:
            file_append(meta_path + '.apr', apr + '\n')

        if stninfo is not None:
            file_append(meta_path + '.info', stninfo + '\n')

        # write a log line for debugging
        if log is not None:
            file_append(meta_path + '.log', log + '\n')

    elif job.exception:
        tqdm.write(' -- There were unhandled errors during this batch: ' +
                   job.exception)
Exemple #7
0
def pull_rinex(cnn, date, Config, JobServer):

    # before starting the sync, determine if there were any station code changes that will require file deletions in AWS
    # Join aws_sync with stations. If an entry in aws_sync has has no record in stations, station was renamed and needs
    # to be deleted. It will be resent in this run.
    rs = cnn.query(
        'SELECT a."NetworkCode", a."StationCode", a."StationAlias" FROM aws_sync as a '
        'LEFT JOIN stations as s on '
        'a."NetworkCode" = s."NetworkCode" and '
        'a."StationCode" = s."StationCode" '
        'WHERE "Year" = %i AND "DOY" = %i AND s."StationCode" IS NULL' %
        (date.year, date.doy))

    deletes = rs.dictresult()

    for stn in deletes:
        # produce a single file with the deletions that need to occur in the AWS
        file_append(
            'file_ops.log',
            'rm %s/%s* # %s.%s not found in stations table with net.stn code declared in aws_sync\n'
            % (date.yyyyddd().replace(' ', '/'), stn['StationAlias'],
               stn['NetworkCode'], stn['StationCode']))

        # delete the records from aws_sync
        cnn.query(
            'DELETE FROM aws_sync WHERE "Year" = %i AND "DOY" = %i AND "NetworkCode" = \'%s\' AND '
            '"StationCode" = \'%s\'' %
            (date.year, date.doy, stn['NetworkCode'], stn['StationCode']))

    # Join aws_sync with stationalias (stationalias is FK-ed to stations).
    # If an entry in aws_sync that has StationCode <> StationAlias has no record in stationalias OR
    # the stationalias declared is different than the station alias in the aws_sync, delete from AWS.
    # will be resent in this batch
    rs = cnn.query(
        'SELECT a."NetworkCode", a."StationCode", a."StationAlias" FROM aws_sync as a '
        'LEFT JOIN stationalias as sa on '
        'a."NetworkCode" = sa."NetworkCode" and '
        'a."StationCode" = sa."StationCode" '
        'WHERE "Year" = %i AND "DOY" = %i AND '
        'a."StationAlias" <> sa."StationAlias" OR '
        '(sa."StationAlias" IS NULL AND a."StationCode" <> a."StationAlias")' %
        (date.year, date.doy))

    deletes = rs.dictresult()

    for stn in deletes:
        # produce a single file with the deletions that need to occur in the AWS
        file_append(
            'file_ops.log',
            'rm %s/%s* # alias declared in aws_sync for %s.%s does not match alias in stationalias table\n'
            % (date.yyyyddd().replace(' ', '/'), stn['StationAlias'],
               stn['NetworkCode'], stn['StationCode']))

        # delete the records from aws_sync
        cnn.query(
            'DELETE FROM aws_sync WHERE "Year" = %i AND "DOY" = %i AND "NetworkCode" = \'%s\' AND '
            '"StationCode" = \'%s\'' %
            (date.year, date.doy, stn['NetworkCode'], stn['StationCode']))

    # check the individual files for this day. All files reported as uploaded should have a match in the rinex_proc
    # table, otherwise this could be a station split or deletion. If that's the case, order their deletion from the AWS
    rs = cnn.query(
        'SELECT a."NetworkCode", a."StationCode", a."StationAlias" FROM aws_sync as a '
        'LEFT JOIN rinex_proc as rx on '
        'a."NetworkCode" = rx."NetworkCode" and '
        'a."StationCode" = rx."StationCode" and '
        'a."Year"        = rx."ObservationYear" and '
        'a."DOY"         = rx."ObservationDOY" '
        'WHERE "Year" = %i AND "DOY" = %i AND '
        'rx."StationCode" IS NULL ' % (date.year, date.doy))

    deletes = rs.dictresult()

    for stn in deletes:
        # produce a single file with the deletions that need to occur in the AWS
        file_append(
            'file_ops.log',
            'rm %s/%s* # rinex file for %s.%s could not be found in the rinex_proc table\n'
            % (date.yyyyddd().replace(' ', '/'), stn['StationAlias'],
               stn['NetworkCode'], stn['StationCode']))

        # delete the records from aws_sync
        cnn.query(
            'DELETE FROM aws_sync WHERE "Year" = %i AND "DOY" = %i AND "NetworkCode" = \'%s\' AND '
            '"StationCode" = \'%s\'' %
            (date.year, date.doy, stn['NetworkCode'], stn['StationCode']))

    ####################################################################################################################
    # continue with sync of files
    ####################################################################################################################

    # behavior requested by Abel: ALWAYS output the metadata but don't output a RINEX if already synced.
    rs = cnn.query(
        'SELECT rinex_proc.* FROM rinex_proc '
        'WHERE "ObservationYear" = %i AND "ObservationDOY" = %i AND "Completion" >= 0.3'
        % (date.year, date.doy))

    rinex = rs.dictresult()

    pbar = tqdm(total=len(rinex), ncols=80)

    metafile = date.yyyy() + '/' + date.ddd() + '/' + date.yyyyddd().replace(
        ' ', '-')

    date_subpath = date.yyyy() + '/' + date.ddd()
    date_path = './' + date_path
    # following Abel's request, make a subdir for the files
    lele_path = '/media/leleiona/aws-files/' + date_subpath

    for p in (date_path, lele_path):
        if not os.path.isdir(p):
            os.makedirs(p)

    # write the header to the .info file
    file_write(
        './' + metafile + '.info',
        '*SITE  Station Name      Session Start      Session Stop       Ant Ht   HtCod  Ant N    Ant E    '
        'Receiver Type         Vers                  SwVer  Receiver SN           Antenna Type     Dome   '
        'Antenna SN          \n')

    modules = ('dbConnection', 'pyETM', 'pyDate', 'pyRinex', 'pyStationInfo',
               'pyOptions', 'pyArchiveStruct', 'os', 'numpy', 'traceback',
               'platform', 'Utils', 'shutil')

    depfuncs = (window_rinex, sigmas_neu2xyz)

    JobServer.create_cluster(rinex_task,
                             depfuncs,
                             output_handle,
                             pbar,
                             modules=modules)

    for rnx in rinex:
        JobServer.submit(rnx['NetworkCode'], rnx['StationCode'], date,
                         rnx['ObservationFYear'], metafile)

    JobServer.wait()

    pbar.close()

    JobServer.close_cluster()

    print('Done, chau!')
Exemple #8
0
    def start(self, dirname, year, doy, dry_run=False):
        monitor_open = False

        try:
            # copy the folder created by GamitSession in the solution_pwd to the remote_pwd (pwd)
            try:
                if not os.path.exists(os.path.dirname(self.pwd)):
                    os.makedirs(os.path.dirname(self.pwd))
            except OSError:
                # racing condition having several processes trying to create the same folder
                # if OSError occurs, ignore and continue
                pass

            # if the local folder exists (due to previous incomplete processing, erase it).
            if os.path.exists(self.pwd):
                shutil.rmtree(self.pwd)

            # ready to copy the shared solution_dir to pwd
            shutil.copytree(self.solution_pwd, self.pwd, symlinks=True)

            with file_open(os.path.join(self.pwd, 'monitor.log'), 'a') as monitor:
                monitor_open = True

                def log(s):
                    monitor.write(now_str() + ' -> ' + s + '\n')

                log('%s %i %i executing on %s' % (dirname, year, doy, platform.node()))
                log('fetching orbits')

                try:
                    Sp3 = pySp3.GetSp3Orbits(self.orbits['sp3_path'], self.date, self.orbits['sp3types'],
                                             self.pwd_igs, True)  # type: pySp3.GetSp3Orbits

                except pySp3.pySp3Exception:
                    log('could not find principal orbits, fetching alternative')

                    # try alternative orbits
                    if self.options['sp3altrn']:
                        Sp3 = pySp3.GetSp3Orbits(self.orbits['sp3_path'], self.date, self.orbits['sp3altrn'],
                                                 self.pwd_igs, True)  # type: pySp3.GetSp3Orbits
                    else:
                        raise

                if Sp3.type != 'igs':
                    # rename file
                    shutil.copyfile(Sp3.file_path, Sp3.file_path.replace(Sp3.type, 'igs'))

                log('fetching broadcast orbits')

                pyBrdc.GetBrdcOrbits(self.orbits['brdc_path'], self.date, self.pwd_brdc,
                                     no_cleanup=True)  # type: pyBrdc.GetBrdcOrbits

                for rinex in self.params['rinex']:

                    log('fetching rinex for %s %s %s %s'
                        % (stationID(rinex), rinex['StationAlias'],
                           '{:10.6f} {:11.6f}'.format(rinex['lat'], rinex['lon']), 'tie' if rinex['is_tie'] else ''))

                    try:
                        with pyRinex.ReadRinex(rinex['NetworkCode'],
                                               rinex['StationCode'],
                                               rinex['source'], False) as Rinex:  # type: pyRinex.ReadRinex

                            # WARNING! some multiday RINEX were generating conflicts because the RINEX has a name, say,
                            # tuc12302.10o and the program wants to rename it as tuc12030.10o but because it's a
                            # multiday file, during __init__ it's already split and renamed as tuc12300.10o and
                            # additional folders are generated with the information for each file. Therefore, find
                            # the rinex that corresponds to the date being processed and use that one instead of the
                            # original file. These files are not allowed by pyArchiveService, but the "start point" of
                            # the database (i.e. the files already in the folders read by pyScanArchive) has such
                            # problems.

                            # figure out if this station has been affected by an earthquake
                            # if so, window the data
                            if rinex['jump'] is not None:
                                monitor.write(
                                    '                    -> RINEX file has been windowed: ETM detected jump on ' +
                                    rinex['jump'].datetime().strftime('%Y-%m-%d %H:%M:%S') + '\n')

                            if Rinex.multiday:
                                # find the rinex that corresponds to the session being processed
                                for Rnx in Rinex.multiday_rnx_list:
                                    if Rnx.date == self.date:
                                        Rnx.rename(rinex['destiny'])

                                        if rinex['jump'] is not None:
                                            self.window_rinex(Rnx, rinex['jump'])
                                        # before creating local copy, decimate file
                                        Rnx.decimate(30)
                                        Rnx.purge_comments()
                                        Rnx.compress_local_copyto(self.pwd_rinex)
                                        break
                            else:
                                Rinex.rename(rinex['destiny'])

                                if rinex['jump'] is not None:
                                    self.window_rinex(Rinex, rinex['jump'])
                                # before creating local copy, decimate file
                                Rinex.decimate(30)
                                Rinex.purge_comments()
                                Rinex.compress_local_copyto(self.pwd_rinex)

                    except (OSError, IOError):
                        log('An error occurred while trying to copy ' +
                            rinex['source'] + ' to ' + rinex['destiny'] + ': File skipped.')

                    except (pyRinex.pyRinexException, Exception) as e:
                        log('An error occurred while trying to copy ' +
                            rinex['source'] + ': ' + str(e))

                log('executing GAMIT')

                # create the run script
                self.create_replace_links()
                self.create_run_script()
                self.create_finish_script()

            # run the script to replace the links of the tables directory
            self.p = subprocess.Popen('find ./tables ! -name "otl.grid" -type l -exec ./replace_links.sh {} +',
                                      shell=True, stdout=subprocess.PIPE,stderr=subprocess.PIPE, cwd=self.pwd)
            _, _ = self.p.communicate()

            # now execute the run script
            if not dry_run:
                self.p = subprocess.Popen('./run.sh', shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                                          cwd=self.pwd)

                self.stdout, self.stderr = self.p.communicate()

                self.p = subprocess.Popen('./finish.sh', shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                                          cwd=self.pwd)

                self.stdout, self.stderr = self.p.communicate()

                # check for any fatals
                self.p = subprocess.Popen('grep -q \'FATAL\' monitor.log', shell=True, stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE, cwd=self.pwd)

                _, _ = self.p.communicate()

                self.success = (self.p.returncode != 0)

            # output statistics to the parent to display
            result = self.parse_monitor(self.success)

            file_append(os.path.join(self.pwd, 'monitor.log'),
                        now_str() + ' -> return to Parallel.GAMIT\n')

            # no matter the result of the processing, move folder to final destination
            if not dry_run:
                self.finish()

            return result

        except:

            msg = traceback.format_exc() + '\nProcessing %s date %s on node %s' \
                  % (self.params['NetName'], self.date.yyyyddd(), platform.node())

            # DDG: do not attempt to write to monitor.log or do any file operations (maybe permission problem)
            # problem might occur during copytree or rmtree or some other operation before opening monitor.log
            if monitor_open:
                file_append(os.path.join(self.pwd, 'monitor.log'),
                            now_str() +
                            ' -> ERROR in pyGamitTask.start()\n%s' % msg)

                # the solution folder exists because it was created by GamitSession to start the processing.
                # erase it to upload the result
                if os.path.exists(self.solution_pwd):
                    shutil.rmtree(self.solution_pwd)

                # execute final error step: copy to self.solution_pwd
                shutil.copytree(self.pwd, self.solution_pwd, symlinks=True)
                # remove the remote pwd
                shutil.rmtree(self.pwd)

                # output statistics to the parent to display
                result = self.parse_monitor(False)
            else:
                result = {'session'             : '%s %s' % (self.date.yyyyddd(), self.params['DirName']),
                          'Project'             : self.params['NetName'],
                          'subnet'              : self.params['subnet'],
                          'Year'                : self.date.year,
                          'DOY'                 : self.date.doy,
                          'FYear'               : self.date.fyear,
                          'wl'                  : 0,
                          'nl'                  : 0,
                          'nrms'                : 0,
                          'relaxed_constrains'  : '',
                          'max_overconstrained' : '',
                          'node'                : platform.node(),
                          'execution_time'      : 0,
                          'execution_date'      : 0,
                          'missing'             : '',
                          'success'             : False,
                          'fatals'              : []
                          }

            result['error'] = msg

            # return useful information to the main node
            return result
Exemple #9
0
def debug(s):
    if DEBUG:
        file_append('/tmp/db.log', "DB: %s\n" % s)