def transfer(client, remote_uuid, local_uuid, file_list, event=None):
    """
    Setup a file transfer between two endpoints
    
    Parameters:
        remote_uuid (str): the globus uuid of the source endpoint
        local_uuid (str): the globus uuid of the destination endpoint
        file_list (list): a list of dictionaries with keys remote_path, local_path
        event (Threadding.Event): a kill event for running inside a thread
    """

    # create the transfer object
    try:
        task_label = 'Processflow auto transfer'
        transfer_task = TransferData(client,
                                     remote_uuid,
                                     local_uuid,
                                     sync_level='checksum',
                                     label=task_label)
    except Exception as e:
        logging.error('Error creating transfer task')
        logging.error(format_debug(e))
        return

    # add in our transfer items
    for datafile in file_list:
        transfer_task.add_item(source_path=datafile['remote_path'],
                               destination_path=datafile['local_path'],
                               recursive=False)

    # Start the transfer
    task_id = None
    result = None
    try:
        result = client.submit_transfer(transfer_task)
        task_id = result["task_id"]
        logging.info('starting transfer with task id %s', task_id)
    except Exception as e:
        if result:
            logging.error("result: %s", str(result))
        logging.error("Could not submit the transfer")
        logging.error(format_debug(e))
        return

    # loop until transfer is complete
    while True:
        status = client.get_task(task_id)
        if status['status'] == 'SUCCEEDED':
            return True, None
        elif status['status'] == 'FAILED':
            return False, status.get('nice_status_details')
        if event and event.is_set():
            client.cancel_task(task_id)
            return None, None
        sleep(10)
    def run(self):
        handlers = os.listdir(
            os.path.join(os.path.dirname(os.path.abspath(__file__)),
                         self._handlers_path))
        self._handlers = list()
        for handler in handlers:
            if not handler.endswith('.py'):
                continue
            if handler == "__init__.py":
                continue

            module, _ = handler.rsplit('.', 1)
            if module not in self._var_list and self._var_list[0] != 'all':
                continue
            module_path = '.'.join([self._handlers_path, module])
            mod = import_module(module_path)
            met = getattr(mod, 'handle')
            self._handlers.append({module: met})

        print '--- printing handlers ---'
        for handler in self._handlers:
            for key, val in handler.items():
                print '\t' + val()

        print '\n--- calling handlers ---'
        print '--- running with {} processes ---'.format(self._nproc)
        self._pool = Pool(self._nproc)
        self._pool_res = list()

        for handler in self._handlers:
            for key, val in handler.items():
                kwds = {
                    'infile':
                    os.path.join(self._input_path,
                                 self._caseid + '.' + key + '.nc'),
                    'tables_dir':
                    self._handlers_path
                }
                if not os.path.exists(kwds['infile']):
                    print 'File not found: {}'.format(kwds['infile'])
                    continue
                self._pool_res.append(
                    self._pool.apply_async(val, args=(), kwds=kwds))

        for res in self._pool_res:
            try:
                res.get()
            except Exception as e:
                print format_debug(e)
        self._pool.close()
        self._pool.join()
Beispiel #3
0
def _split_one(var, file_list, outfile):
    """
    Split a single variable from a list of self._file_list into the outfile

    Parameters:
        var (str): the name of the variable to extract
        inself._file_list (list): a list of strings that are the paths to history self._file_list to extract from
        outfile (str): a path to where the output file should be stored
    Returns:
        out (str): the stdout output returned from ncrcat
        err (str): the stderr output from ncrcat
    """
    start_time = datetime.now()
    # sleep to avoid printing errors
    # sleep(uniform(0.01, 0.1))
    # print_message(f'Starting {var}', 'ok')
    cmd = ['ncrcat', '-O', '-cv', var] + file_list + [outfile]
    msg = f'starting {var}'
    logging.info(msg)
    while True:
        try:
            proc = Popen(cmd, stderr=PIPE, stdout=PIPE)
            try:
                out, err = proc.communicate()
                if err:
                    return out, err
            except KeyboardInterrupt:
                sleep(uniform(0.01, 0.1))
                msg = f'  - killing {var}'
                proc.terminate()
                print_message(msg)
                return None, None
        except Exception as e:
            msg = format_debug(e)
            logging.error(e)
            msg = 'cant start process, retrying'
            print_message(msg)
            sleep(uniform(0.01, 0.1))
        else:
            break
    end_time = datetime.now()
    tdelta = end_time - start_time
    msg = f'finished {var} in {tdelta.seconds}.{tdelta.microseconds/1000:02d} seconds'
    logging.info(msg)
    return msg, None
Beispiel #4
0
def split_one(var, infiles, outfile):
    """
    Split a single variable from a list of files into the outfile
    """
    cmd = ['ncrcat', '-O', '-cv', var] + infiles + [outfile]
    msg = 'starting {}'.format(var)
    logging.info(msg)
    while True:
        try:
            proc = Popen(cmd, stderr=PIPE, stdout=PIPE)
            out, err = proc.communicate()
        except Exception as e:
            msg = format_debug(e)
            logging.error(e)
            sleep(uniform(0.1, 0.5))
        else:
            break
    msg = 'finished {}'.format(var)
    logging.info(msg)
    return out, err
Beispiel #5
0
    def run(self):
        """
        run all the requested CMOR handlers
        """
        handlers = os.listdir(self._handlers_path)
        self._handlers = list()
        for handler in handlers:
            if not handler.endswith('.py'):
                continue
            if handler == "__init__.py":
                continue

            module, _ = handler.rsplit('.', 1)
            # ignore handlers for variables that werent requested
            if module not in self._var_list and self._var_list[0] != 'all':
                continue
            module_path = '.'.join([self._handlers_path, module])
            # load the module, and extract the "handle" method
            try:
                mod = import_module(module_path)
                met = getattr(mod, 'handle')
            except Exception as e:
                msg = format_debug(e)
                print_message(f'Error loading handler for {module_path}')
                print_message(msg)
                logging.error(msg)
                continue
            else:
                msg = f'Loaded {mod}'
                if self._debug: print_message(msg, 'debug')
                logging.info(msg)
            self._handlers.append({module: met})
        
        # Setup the number of processes that will exist in the pool
        len_handlers = len(self._handlers)
        if self._proc_vars:
            ncpu = cpu_count()
            if len_handlers >= 100:
                self._nproc = 100 if ncpu > 100 else ncpu - 1
            else:
                self._nproc = len_handlers

        # only make as many processes as needed
        self._nproc = len_handlers if self._nproc > len_handlers else self._nproc
        if self._nproc == 0:
            msg = 'No handlers found'
            print_message(msg)
            logging.error(msg)
            sys.exit(1)

        if self._debug: print_message(f'running with {self._nproc} processes', 'debug')
        self._pool = Pool(self._nproc)
        self._pool_res = list()

        for handler in self._handlers:
            for key, val in handler.items():
                
                var_file = self.find_variable_file(key, self._input_path)
                if var_file is None:
                    continue
                var_path = os.path.join(
                    self._input_path,
                    var_file)
                kwds = {
                    'infile': var_path,
                    'tables': self._tables_path,
                    'user_input_path': self._user_input_path
                }

                _args = (kwds['infile'], kwds['tables'], kwds['user_input_path'])
                self._pool_res.append(
                    self._pool.apply_async(
                        val, args=_args, kwds={}))
        
        for idx, res in enumerate(self._pool_res):
            try:
                out = res.get(9999999)
                msg = f'Finished {out}, {idx + 1}/{len(self._pool_res)} jobs complete'
                print_message(msg, 'ok')
                logging.info(msg)
            except Exception as e:
                print format_debug(e)
                logging.error(e)
        self.terminate()
    def execute(self, event):
        """
        Start the transfer

        Parameters:
            event (thread.event): event to trigger job cancel
        """
        # reject if job isnt valid
        self.prevalidate()
        if self.status != JobStatus.VALID:
            logging.error('Transfer job in invalid state')
            logging.error(str(self))
            return
        if not check_logged_in():
            self.status = JobStatus.INVALID
            logging.error('Transfer failed, not logged into globus')
            return
        self.start_time = datetime.now()
        # Get source and destination UUIDs
        srcendpoint = self.config.get('source_endpoint')
        dstendpoint = self.config.get('destination_endpoint')
        message = 'Starting setup for transfer job from {src} to {dst}'.format(
            src=srcendpoint, dst=dstendpoint)
        logging.info(message)

        # Log into globus and activate endpoints
        endpoints = [srcendpoint, dstendpoint]
        setup_globus(endpoints=endpoints,
                     event_list=self.event_list,
                     no_ui=not self.config.get('ui', True),
                     src=self.config.get('source_email'),
                     dst=self.config.get('source_email'),
                     display_event=self.config.get('display_event'))
        client = get_client()
        # task_label = "{start} to {end}".format(
        #     start=self.file_list[0]['name'],
        #     end=self.file_list[-1]['name'])
        task_label = 'Autotransfer of {number} files at {time}'.format(
            number=len(self.file_list), time=time.strftime("%I-%M"))
        try:
            transfer_task = TransferData(client,
                                         srcendpoint,
                                         dstendpoint,
                                         sync_level='checksum',
                                         label=task_label)
        except Exception as e:
            logging.error('Error creating transfer task')
            logging.error(format_debug(e))
            self.status = JobStatus.FAILED
            return

        if not self.config['file_list']:
            logging.error('Unable to transfer files without a source list')
            self.status = JobStatus.FAILED
            return

        for datafile in self.config['file_list']:
            transfer_task.add_item(source_path=datafile['remote_path'],
                                   destination_path=datafile['local_path'],
                                   recursive=False)

        # Start the transfer
        task_id = None
        result = None
        try:
            result = client.submit_transfer(transfer_task)
            task_id = result["task_id"]
            logging.info('starting transfer with task id %s', task_id)
        except Exception as e:
            if result:
                logging.error("result: %s", str(result))
            logging.error("Could not submit the transfer")
            logging.error(format_debug(e))
            self.status = JobStatus.FAILED
            return

        # Check a status of the transfer every 10 secs
        number_transfered = -1
        while True:
            try:
                while True:
                    try:
                        status = client.get_task(task_id)
                    except:
                        time.sleep(1)
                    else:
                        break
                if status['status'] == 'SUCCEEDED':
                    logging.info('progress %d/%d', status['files_transferred'],
                                 status['files'])
                    percent_complete = 100.0
                    self.display_status(
                        percent_complete=percent_complete,
                        task_id=task_id,
                        num_completed=int(status['files_transferred']) +
                        int(status['files_skipped']),
                        num_total=status['files'])
                    message = 'Transfer job completed'
                    self.status = JobStatus.COMPLETED
                    return
                elif status['status'] == 'FAILED':
                    logging.error('Error transfering files %s',
                                  status.get('nice_status_details'))
                    self.status = JobStatus.FAILED
                    return
                elif status['status'] == 'ACTIVE':
                    if number_transfered < status['files_transferred']:
                        number_transfered = status['files_transferred']
                        logging.info('progress %d/%d',
                                     status['files_transferred'],
                                     status['files'])
                        percent_complete = (
                            float(status['files_transferred'] +
                                  float(status['files_skipped'])) /
                            float(status['files'])) * 100
                        self.display_status(
                            percent_complete=percent_complete,
                            task_id=task_id,
                            num_completed=int(status['files_transferred']) +
                            int(status['files_skipped']),
                            num_total=status['files'])
                    self.status = JobStatus.RUNNING
                if event and event.is_set():
                    client.cancel_task(task_id)
                    # self.error_cleanup()
                    return
            except Exception as e:
                logging.error(format_debug(e))
                client.cancel_task(task_id)
                # self.error_cleanup()
                return
            time.sleep(5)