def check_folder_for_files_from_tokens(task_id, dummy, number, **context): xcom_results = context['ti'].xcom_pull(task_ids=task_id) tokens = list(xcom_results['token_ids']) token_type = xcom_results['token_type'] pc = picas_cred() client = CouchDB(pc.user, pc.password, url='https://picas-lofar.grid.surfsara.nl:6984', connect=True) db = client[pc.database] tokenlist = TokenList(token_type=token_type, database=db) for token_id in tokens: tokenlist.append( caToken(database=db, token_type=token_type, token_id=token_id)) tokenlist.fetch() for t in tokenlist: if t['status'] != 'done': raise RuntimeError( "Token {} is not in status 'done' but in status {}".format( t['_id'], t['status'])) print("All jobs in status 'done' ") locations = [t['Results_location'] for t in tokenlist] if len(locations) < number: print("Only {} files found!".format(len(locations))) for output_file in locations: c = subprocess.Popen(['uberftp', '-ls', output_file], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out = c.communicate() if not out[0].decode().strip().split('\n')[0]: print("{} not found".format(output_file))
def upload(self, token_ids, token_type, file_name): pc = get_picas_credentials.picas_cred() if self.pc_database: pc.database = self.pc_database client = CouchDB(pc.user, pc.password, url='https://picas-lofar.grid.surfsara.nl:6984', connect=True) db = client[pc.database] tl = TokenList(token_type=token_type, database=db) for t_id in token_ids: tl.append( caToken(database=db, token_type=token_type, token_id=t_id)) tl.fetch() tl.add_attachment(file_name, file_name.split('/')[-1])
def get_result_files_from_tokenlist(token_type, token_ids, key="Results_location", **kwargs): pc = picas_cred() client = CouchDB(pc.user, pc.password, url='https://picas-lofar.grid.surfsara.nl:6984', connect=True) db = client[pc.database] tokenlist = TokenList(token_type=token_type, database=db) for token_id in token_ids: tokenlist.append( caToken(token_id=token_id, token_type=token_type, database=db)) tokenlist.fetch() results = [i[key] for i in tokenlist if key in i] if len(results) == 1: return results[0] return results
db = client[pc.database] print('Connected to database ' + pc.database) print('2) Creating a list of paths to the files.') s = srmlist.srmlist() with open(srms, 'r') as f: for l in f.readlines(): s.append(l.strip()) print('3) Slicing list into groups.') g = s.sbn_dict(pref='SB', suff='_') d = srmlist.slice_dicts(g, 1) print(d) print('4) Building token list.') tl = TokenList(token_type=tok_type, database=db) for k, v in d.items(): print(v) tok = caToken(database=db, token_type=tok_type, token_id=tok_type + '_sub6asec_' + str(cal_obsid) + '_SB' + str(k)) with open('temp_srm.txt', 'w') as f: f.write('\n'.join(v)) tok.build(TokenJsonBuilder(jsonfile)) tok.save() tok.add_attachment(attachment_name='srm.txt', filename='temp_srm.txt') tl.append(tok) tl.save()
print('2) Creating a list of paths to the files.') s = srmlist.srmlist() with open(srms, 'r') as f: for l in f.readlines(): s.append(l.strip()) print('3) Slicing list into groups.') # g = s.sbn_dict(pref='SB', suff='_') # Temporary hack g = s.sbn_dict(pref='t_', suff='MHz') d = srmlist.slice_dicts(g, 999) print(d) print('4) Building token list.') tl = TokenList(token_type=tok_type, database=db) for k, v in d.items(): tok = caToken(database=db, token_type=tok_type, token_id=tok_type + '_1asec' + str(cal_obsid) + '_SB' + str(k)) with open('temp_srm.txt', 'w') as f: f.write('\n'.join(v)) tok.build(TokenJsonBuilder(jsonfile)) tok.save() tok.add_attachment(attachment_name='srm.txt', filename='temp_srm.txt') tok.add_attachment(attachment_name='step4_1asec_image.parset', filename='/project/sksp/Software/lofar-highres-widefield/testdir/test_with_GRID_LRT/step4_1asec_image.parset') tl.append(tok) tl.save() for tok in tl: tok['OBSID'] = obsid tok['PIPELINE_STEP'] = 'lb_1asec_cal1'
if not source: source = match.group(0) # Check if we switch to a new source if match.group(0) != source: d[source] = sources sources = [v] else: sources.append(v) source = match.group(0) else: # No additional source to trigger the if statement, add the final source after the loop finishes. d[source] = sources del match, source, sources print('4) Building token list.') tl = TokenList(token_type=tok_type, database=db) tokens = tl.list_view_tokens('step3_selfcal_cals') token_ids = [token['_id'] for token in tokens] for k, v in d.items(): match = re.search('S\d{1,4}', v[0]) if not match: raise ValueError('No sourcename extracted!') else: source = match.group(0) if (tok_type + '_scal_' + source + str(cal_obsid)) not in token_ids: tok = caToken(database=db, token_type=tok_type, token_id=tok_type + '_scal_' + source + str(cal_obsid)) with open('temp_srm_{:s}.txt'.format(source), 'w') as f: f.write('\n'.join(v)) tok.build(TokenJsonBuilder(jsonfile))
class TokenCreator(BaseOperator): """ Using a Token template input, this class creates the tokens for a LOFAR job The tokens are a set of documents that contain the metadata for each processing job as well as the job's progress, step completion times, and etc. :type sbx_task: string :param srms: a list of the srms that need to be staged :type srms: list :param stageID: In case staging was already done # # # # # :type stageID: string :type output_encoding: output encoding of bash command """ template_fields = () template_ext = () ui_color = '#f3f92c' @apply_defaults def __init__(self, tok_config, staging_task, append_task=None, fields_task=None, pc_database=None, subband_prefix=None, subband_suffix=None, token_type='test_', files_per_token=10, output_encoding='utf-8', *args, **kwargs): super(TokenCreator, self).__init__(*args, **kwargs) self.pc_database = pc_database self.tok_config = tok_config self.fields_task = fields_task if subband_prefix: self.subband_prefix = subband_prefix else: self.subband_prefix = "SB" if subband_suffix: self.subband_suffix = subband_suffix else: self.subband_suffix = "_" self.staging_task = staging_task self.append_task = append_task self.files_per_token = files_per_token self.output_encoding = output_encoding self.t_type = token_type self.state = State.QUEUED def execute(self, context): """ Execute the bash command in a temporary directory which will be cleaned afterwards """ srms = self.get_staged_srms(context) if not srms: logging.warn("Could not get the list of staged srms!") logging.info("the list of staged srms is {0}".format(srms)) pc = get_picas_credentials.picas_cred() if self.pc_database: pc.database = self.pc_database if self.fields_task: task_name = self.fields_task['name'] task_parent_dag = self.fields_task['parent_dag'] try: app = get_task_instance( context, task_name, task_parent_dag)['sanitized_field_name'] except KeyError: app = get_task_instance(context, task_name, task_parent_dag)['field_name'] else: app = srms.obsid self.t_type = self.t_type + app tok_settings = json.load(open(self.tok_config, 'rb')) token_variables = tok_settings['Job']['variables'] client = CouchDB(pc.user, pc.password, url='https://picas-lofar.grid.surfsara.nl:6984', connect=True) self.db = client[pc.database] pipe_type = token_variables['PIPELINE_STEP'] self.token_list = TokenList(database=self.db, token_type=self.t_type) if self.files_per_token != 1: d = slice_dicts( srms.sbn_dict(pref=self.subband_prefix, suff=self.subband_suffix), self.files_per_token) else: d = {} for i in srms.sbn_dict(pref=self.subband_prefix, suff=self.subband_suffix): d[i[0]] = i[1] for token_file in d: logging.info("Token file is {}".format(token_file)) with NamedTemporaryFile(delete=False) as savefile: for line in d[token_file]: savefile.write("{}\n".format(line).encode('utf-8')) # pref3_$FIELDNAME_$OBSID_$PIPELINE_SB$SBNUMBER pipeline_step = pipe_type.split('_')[1] # logging.info("Pipeline step is {}, type pipe_type is {}.".format(pipe_type, type(pipe_type))) if 'cal' in pipe_type: token_id = "{}_{}_{}".format(self.t_type, srms.obsid, pipeline_step) elif 'targ' in pipe_type: token_id = "{}_{}_{}_SB{}".format(self.t_type, srms.obsid, pipeline_step, token_file) else: token_id = "fields_$FIELDNAME_$OBSID_$PIPELINE: {}_{}_{}_{}_{}".format( self.t_type, token_file, srms.obsid, pipe_type.split('_')[1], time.time()) logging.info(token_id) self.token_list.append( self.build_token(token_id, attachment={ 'name': 'srm.txt', 'location': savefile.name })) self.token_list[-1]['STARTSB'] = token_file os.remove(savefile.name) self.token_list.add_token_views() if self.append_task: logging.info(self.append_task) logging.info(context) self.modify_fields(context) for token in self.token_list: token["OBSID"] = srms.obsid token['RESULTS_DIR'] += "/" + str(srms.obsid) token_variables['OBSID'] = srms.obsid token_variables['RESULTS_DIR'] += "/" + str(srms.obsid) # create result directory if not exist create_gsiftp_directory(token_variables['RESULTS_DIR']) logging.info('Token type is ' + self.t_type) logging.info( 'Tokens are available at https://picas-lofar.grid.surfsara.nl:6984/_utils/database.html?' + pc.database + '/_design/' + self.t_type + '/_view/overview_total') logging.info("Token settings are :") for i in token_variables.items(): logging.info(str(i)) logging.debug(srms) self.token_list.save() results = dict() results['num_jobs'] = len(d.keys()) results['output_dir'] = token_variables['RESULTS_DIR'] logging.info("output directory is {}".format(results['output_dir'])) results['token_type'] = str(self.t_type) results['view'] = pipe_type results['OBSID'] = token_variables['OBSID'] results['token_ids'] = [i['_id'] for i in self.token_list] return results def upload_tokens(self, tokens): pass def upload_attachments(self, attachment): pass def build_token(self, token_id, attachment=None): t1 = caToken(database=self.db, token_type=self.t_type, token_id=token_id) t1.build(TokenJsonBuilder(self.tok_config)) logging.info(self.tok_config) t1.save() if attachment: t1.add_attachment(attachment_name=attachment['name'], filename=attachment['location']) return t1 def modify_fields(self, context): """If the append_task exists, this moethod will add all key:value pairs in its xcom to every token. This is used to for example take the results of the calibration taks and name it 'CAL_RESULTS'""" print(self.append_task) append_xcom = get_task_instance( context, self.append_task['name'], parent_dag=self.append_task['parent_dag']) if append_xcom is None: logging.info("No calibration results found!") for k in append_xcom: for token in self.token_list: token[k] = append_xcom[k] self.token_list.save() def get_staged_srms(self, context): task_name = self.staging_task['name'] task_parent_dag = self.staging_task['parent_dag'] srm_xcom = get_task_instance(context, task_name, task_parent_dag) srmfile = srm_xcom['srmfile'] logging.info("Srmfile is " + srmfile) if srmfile == None: raise RuntimeError("Could not get the srm list from the " + str(self.staging_task) + " task") return self.get_list_from_files(srmfile) def get_list_from_files(self, filename): loaded_srmlist = srmlist() for link in open(filename, 'rb').readlines(): loaded_srmlist.append(link.decode('utf-8').strip('\n')) return loaded_srmlist def success(self): self.status = State.SUCCESS logging.info("Successfully uploaded " + str(self.progress['Percent done']) + " % of the tokens.") def on_kill(self): logging.warn('Sending SIGTERM signal to staging group') self.state = State.SHUTDOWN os.killpg(os.getpgid(self.sp.pid), signal.SIGTERM)
def execute(self, context): """ Execute the bash command in a temporary directory which will be cleaned afterwards """ srms = self.get_staged_srms(context) if not srms: logging.warn("Could not get the list of staged srms!") logging.info("the list of staged srms is {0}".format(srms)) pc = get_picas_credentials.picas_cred() if self.pc_database: pc.database = self.pc_database if self.fields_task: task_name = self.fields_task['name'] task_parent_dag = self.fields_task['parent_dag'] try: app = get_task_instance( context, task_name, task_parent_dag)['sanitized_field_name'] except KeyError: app = get_task_instance(context, task_name, task_parent_dag)['field_name'] else: app = srms.obsid self.t_type = self.t_type + app tok_settings = json.load(open(self.tok_config, 'rb')) token_variables = tok_settings['Job']['variables'] client = CouchDB(pc.user, pc.password, url='https://picas-lofar.grid.surfsara.nl:6984', connect=True) self.db = client[pc.database] pipe_type = token_variables['PIPELINE_STEP'] self.token_list = TokenList(database=self.db, token_type=self.t_type) if self.files_per_token != 1: d = slice_dicts( srms.sbn_dict(pref=self.subband_prefix, suff=self.subband_suffix), self.files_per_token) else: d = {} for i in srms.sbn_dict(pref=self.subband_prefix, suff=self.subband_suffix): d[i[0]] = i[1] for token_file in d: logging.info("Token file is {}".format(token_file)) with NamedTemporaryFile(delete=False) as savefile: for line in d[token_file]: savefile.write("{}\n".format(line).encode('utf-8')) # pref3_$FIELDNAME_$OBSID_$PIPELINE_SB$SBNUMBER pipeline_step = pipe_type.split('_')[1] # logging.info("Pipeline step is {}, type pipe_type is {}.".format(pipe_type, type(pipe_type))) if 'cal' in pipe_type: token_id = "{}_{}_{}".format(self.t_type, srms.obsid, pipeline_step) elif 'targ' in pipe_type: token_id = "{}_{}_{}_SB{}".format(self.t_type, srms.obsid, pipeline_step, token_file) else: token_id = "fields_$FIELDNAME_$OBSID_$PIPELINE: {}_{}_{}_{}_{}".format( self.t_type, token_file, srms.obsid, pipe_type.split('_')[1], time.time()) logging.info(token_id) self.token_list.append( self.build_token(token_id, attachment={ 'name': 'srm.txt', 'location': savefile.name })) self.token_list[-1]['STARTSB'] = token_file os.remove(savefile.name) self.token_list.add_token_views() if self.append_task: logging.info(self.append_task) logging.info(context) self.modify_fields(context) for token in self.token_list: token["OBSID"] = srms.obsid token['RESULTS_DIR'] += "/" + str(srms.obsid) token_variables['OBSID'] = srms.obsid token_variables['RESULTS_DIR'] += "/" + str(srms.obsid) # create result directory if not exist create_gsiftp_directory(token_variables['RESULTS_DIR']) logging.info('Token type is ' + self.t_type) logging.info( 'Tokens are available at https://picas-lofar.grid.surfsara.nl:6984/_utils/database.html?' + pc.database + '/_design/' + self.t_type + '/_view/overview_total') logging.info("Token settings are :") for i in token_variables.items(): logging.info(str(i)) logging.debug(srms) self.token_list.save() results = dict() results['num_jobs'] = len(d.keys()) results['output_dir'] = token_variables['RESULTS_DIR'] logging.info("output directory is {}".format(results['output_dir'])) results['token_type'] = str(self.t_type) results['view'] = pipe_type results['OBSID'] = token_variables['OBSID'] results['token_ids'] = [i['_id'] for i in self.token_list] return results