def save_dates_from_task(task_name, savefile='/home/apmechev/.prefactor_v3.0_CI.pkl', **context): task_data = get_task_instance(context, task_name) prev_data = pickle.load(open(savefile, 'rb')) merged_data = _merge_two_dicts(prev_data, task_data) for key in task_data.keys(): merged_data[key]['saved_date'] = datetime.datetime.now() save_dates(merged_data, savefile)
def get_staged_srms(self, context): task_name = self.staging_task['name'] task_parent_dag = self.staging_task['parent_dag'] srm_xcom = get_task_instance(context, task_name, task_parent_dag) srmfile = srm_xcom['srmfile'] logging.info("Srmfile is " + srmfile) if srmfile == None: raise RuntimeError("Could not get the srm list from the " + str(self.staging_task) + " task") return self.get_list_from_files(srmfile)
def execute(self, context): """ Executes the staging command from the list of srms requested. """ if isinstance(self.srmfile, dict): task_name = self.srmfile['name'] task_parent_dag = self.srmfile['parent_dag'] sbx_xcom = get_task_instance(context, task_name, task_parent_dag) self.srmfile = sbx_xcom[self.srmkey] elif not os.path.isfile(self.srmfile) and not hasattr( self.srms, '__iter__'): self.srmfile = Variable.get(self.srmfile) if not os.path.isfile(self.srmfile): self.status = State.UPSTREAM_FAILED raise AirflowException( "Input srmfile doesn't exist and srm list not a list") self.progress = {'Percent done': 0} surl_list = srmlist.srmlist( ) #holds all the srms (both from file and list argument ) self.surl_list = self.build_srm_list(surl_list) try: self.stage_ID = stager_access.stage(list(self.surl_list)) except xmlrpclib.Fault: sleep(60) self.stage_ID = stager_access.stage(list(self.surl_list)) logging.info( "Successfully sent staging command for " + stager_access.get_progress()[str(self.stage_ID)]['File count'] + " files.") logging.info("StageID= " + str(self.stage_ID)) self.state = State.RUNNING sleep(120) try: self.progress = stager_access.get_progress()[str(self.stage_ID)] except: pass self.started = False f = NamedTemporaryFile(delete=False) for i in surl_list: f.write(bytes(i, encoding='utf8')) f.close() if not f: f.name = "" while self.still_running(): sleep(120) if self.state == State.SUCCESS: return {'srmfile': str(f.name)} self.state == State.FAILED return {'srmfile': str(f.name)}
def modify_fields(self, context): """If the append_task exists, this moethod will add all key:value pairs in its xcom to every token. This is used to for example take the results of the calibration taks and name it 'CAL_RESULTS'""" print(self.append_task) append_xcom = get_task_instance( context, self.append_task['name'], parent_dag=self.append_task['parent_dag']) for k in append_xcom: for token in self.token_list: token[k] = append_xcom[k] self.token_list.save()
def find_upload_file(self, context): """ Checks whether the file exists (if fed a filename as a parameter) Otherwise it looks for the xcom of the parset task and takes the key of the dictionary that is named after self.upload_file.split('/')[-1] (I.E. the filename) of that xcom. checks if it exists and returns it """ if not self.parset_task and os.path.exists(self.upload_file): return self.upload_file # no parset_task, just get file parameter parset_xcom = get_task_instance(context, self.parset_task, self.parent_dag) parset_filename = self.upload_file.split('/')[-1] parset_file_loc = parset_xcom[parset_filename] if os.path.exists(parset_file_loc): return parset_file_loc raise (Exception("Cannot find the parset file"))
def execute(self, context): logging.info(self.srmkey) if isinstance(self.srmfile, dict): task_name = self.srmfile['name'] task_parent_dag = self.srmfile['parent_dag'] sbx_xcom = get_task_instance(context, task_name, task_parent_dag) self.srmfile = sbx_xcom[self.srmkey] return { 'staged': False, 'srmfile': str(self.srmfile) } #I return false until gfal2 works on py3+ staging_statuses = state_all.main(self.srmfile, verbose=False) logging.info(staging_statuses) if state_all.percent_staged(staging_statuses) > self.threshold: return {'staged': True, 'srmfile': str(self.srmfile)} return {'staged': False, 'srmfile': str(self.srmfile)}
def execute(self, context): """ Execute the bash command in a temporary directory which will be cleaned afterwards """ srms = self.get_staged_srms(context) if not srms: logging.warn("Could not get the list of staged srms!") logging.info("the list of staged srms is {0}".format(srms)) pc = get_picas_credentials.picas_cred() if self.pc_database: pc.database = self.pc_database if self.fields_task: task_name = self.fields_task['name'] task_parent_dag = self.fields_task['parent_dag'] try: app = get_task_instance( context, task_name, task_parent_dag)['sanitized_field_name'] except KeyError: app = get_task_instance(context, task_name, task_parent_dag)['field_name'] else: app = srms.obsid self.t_type = self.t_type + app tok_settings = json.load(open(self.tok_config, 'rb')) token_variables = tok_settings['Job']['variables'] client = CouchDB(pc.user, pc.password, url='https://picas-lofar.grid.surfsara.nl:6984', connect=True) self.db = client[pc.database] pipe_type = token_variables['PIPELINE_STEP'] self.token_list = TokenList(database=self.db, token_type=self.t_type) if self.files_per_token != 1: d = slice_dicts( srms.sbn_dict(pref=self.subband_prefix, suff=self.subband_suffix), self.files_per_token) else: d = {} for i in srms.sbn_dict(pref=self.subband_prefix, suff=self.subband_suffix): d[i[0]] = i[1] for token_file in d: logging.info("Token file is {}".format(token_file)) with NamedTemporaryFile(delete=False) as savefile: for line in d[token_file]: savefile.write("{}\n".format(line).encode('utf-8')) # pref3_$FIELDNAME_$OBSID_$PIPELINE_SB$SBNUMBER pipeline_step = pipe_type.split('_')[1] # logging.info("Pipeline step is {}, type pipe_type is {}.".format(pipe_type, type(pipe_type))) if 'cal' in pipe_type: token_id = "{}_{}_{}".format(self.t_type, srms.obsid, pipeline_step) elif 'targ' in pipe_type: token_id = "{}_{}_{}_SB{}".format(self.t_type, srms.obsid, pipeline_step, token_file) else: token_id = "fields_$FIELDNAME_$OBSID_$PIPELINE: {}_{}_{}_{}_{}".format( self.t_type, token_file, srms.obsid, pipe_type.split('_')[1], time.time()) logging.info(token_id) self.token_list.append( self.build_token(token_id, attachment={ 'name': 'srm.txt', 'location': savefile.name })) self.token_list[-1]['STARTSB'] = token_file os.remove(savefile.name) self.token_list.add_token_views() if self.append_task: logging.info(self.append_task) logging.info(context) self.modify_fields(context) for token in self.token_list: token["OBSID"] = srms.obsid token['RESULTS_DIR'] += "/" + str(srms.obsid) token_variables['OBSID'] = srms.obsid token_variables['RESULTS_DIR'] += "/" + str(srms.obsid) # create result directory if not exist create_gsiftp_directory(token_variables['RESULTS_DIR']) logging.info('Token type is ' + self.t_type) logging.info( 'Tokens are available at https://picas-lofar.grid.surfsara.nl:6984/_utils/database.html?' + pc.database + '/_design/' + self.t_type + '/_view/overview_total') logging.info("Token settings are :") for i in token_variables.items(): logging.info(str(i)) logging.debug(srms) self.token_list.save() results = dict() results['num_jobs'] = len(d.keys()) results['output_dir'] = token_variables['RESULTS_DIR'] logging.info("output directory is {}".format(results['output_dir'])) results['token_type'] = str(self.t_type) results['view'] = pipe_type results['OBSID'] = token_variables['OBSID'] results['token_ids'] = [i['_id'] for i in self.token_list] return results
def exit_if_no_CI_tests(ci_dates_task, continue_task, exit_task, **context): task_data = get_task_instance(context, 'CI_dates') if task_data: return continue_task return exit_task