Esempio n. 1
0
def check_folder_for_files_from_tokens(task_id, dummy, number, **context):
    xcom_results = context['ti'].xcom_pull(task_ids=task_id)
    tokens = list(xcom_results['token_ids'])
    token_type = xcom_results['token_type']
    pc = picas_cred()
    client = CouchDB(pc.user,
                     pc.password,
                     url='https://picas-lofar.grid.surfsara.nl:6984',
                     connect=True)
    db = client[pc.database]
    tokenlist = TokenList(token_type=token_type, database=db)
    for token_id in tokens:
        tokenlist.append(
            caToken(database=db, token_type=token_type, token_id=token_id))
    tokenlist.fetch()
    for t in tokenlist:
        if t['status'] != 'done':
            raise RuntimeError(
                "Token {} is not in status 'done' but in status {}".format(
                    t['_id'], t['status']))
    print("All jobs in status 'done' ")
    locations = [t['Results_location'] for t in tokenlist]
    if len(locations) < number:
        print("Only {} files found!".format(len(locations)))
    for output_file in locations:
        c = subprocess.Popen(['uberftp', '-ls', output_file],
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
        out = c.communicate()
        if not out[0].decode().strip().split('\n')[0]:
            print("{} not found".format(output_file))
Esempio n. 2
0
    def upload(self, token_ids, token_type, file_name):
        pc = get_picas_credentials.picas_cred()
        if self.pc_database:
            pc.database = self.pc_database
        client = CouchDB(pc.user,
                         pc.password,
                         url='https://picas-lofar.grid.surfsara.nl:6984',
                         connect=True)
        db = client[pc.database]

        tl = TokenList(token_type=token_type, database=db)
        for t_id in token_ids:
            tl.append(
                caToken(database=db, token_type=token_type, token_id=t_id))
        tl.fetch()
        tl.add_attachment(file_name, file_name.split('/')[-1])
Esempio n. 3
0
def get_result_files_from_tokenlist(token_type,
                                    token_ids,
                                    key="Results_location",
                                    **kwargs):
    pc = picas_cred()
    client = CouchDB(pc.user,
                     pc.password,
                     url='https://picas-lofar.grid.surfsara.nl:6984',
                     connect=True)
    db = client[pc.database]
    tokenlist = TokenList(token_type=token_type, database=db)
    for token_id in token_ids:
        tokenlist.append(
            caToken(token_id=token_id, token_type=token_type, database=db))
    tokenlist.fetch()
    results = [i[key] for i in tokenlist if key in i]
    if len(results) == 1:
        return results[0]
    return results
db = client[pc.database]
print('Connected to database ' + pc.database)

print('2) Creating a list of paths to the files.')
s = srmlist.srmlist()
with open(srms, 'r') as f:
    for l in f.readlines():
        s.append(l.strip())

print('3) Slicing list into groups.')
g = s.sbn_dict(pref='SB', suff='_')
d = srmlist.slice_dicts(g, 1)
print(d)

print('4) Building token list.')
tl = TokenList(token_type=tok_type, database=db)
for k, v in d.items():
    print(v)

    tok = caToken(database=db,
                  token_type=tok_type,
                  token_id=tok_type + '_sub6asec_' + str(cal_obsid) + '_SB' +
                  str(k))
    with open('temp_srm.txt', 'w') as f:
        f.write('\n'.join(v))
    tok.build(TokenJsonBuilder(jsonfile))
    tok.save()
    tok.add_attachment(attachment_name='srm.txt', filename='temp_srm.txt')
    tl.append(tok)

tl.save()
print('2) Creating a list of paths to the files.')
s = srmlist.srmlist()
with open(srms, 'r') as f:
    for l in f.readlines():
        s.append(l.strip())

print('3) Slicing list into groups.')
# g = s.sbn_dict(pref='SB', suff='_')
# Temporary hack
g = s.sbn_dict(pref='t_', suff='MHz')
d = srmlist.slice_dicts(g, 999)
print(d)

print('4) Building token list.')
tl = TokenList(token_type=tok_type, database=db)
for k, v in d.items():
    tok = caToken(database=db, token_type=tok_type, token_id=tok_type + '_1asec' + str(cal_obsid) + '_SB' + str(k))
    with open('temp_srm.txt', 'w') as f:
        f.write('\n'.join(v))
    tok.build(TokenJsonBuilder(jsonfile))
    tok.save()
    tok.add_attachment(attachment_name='srm.txt', filename='temp_srm.txt')
    tok.add_attachment(attachment_name='step4_1asec_image.parset', filename='/project/sksp/Software/lofar-highres-widefield/testdir/test_with_GRID_LRT/step4_1asec_image.parset')
    tl.append(tok)

tl.save()

for tok in tl:
    tok['OBSID'] = obsid
    tok['PIPELINE_STEP'] = 'lb_1asec_cal1'
    if not source:
        source = match.group(0)
    # Check if we switch to a new source
    if match.group(0) != source:
        d[source] = sources
        sources = [v]
    else:
        sources.append(v)
    source = match.group(0)
else:
    # No additional source to trigger the if statement, add the final source after the loop finishes.
    d[source] = sources
    del match, source, sources

print('4) Building token list.')
tl = TokenList(token_type=tok_type, database=db)
tokens = tl.list_view_tokens('step3_selfcal_cals')
token_ids = [token['_id'] for token in tokens]
for k, v in d.items():
    match = re.search('S\d{1,4}', v[0])
    if not match:
        raise ValueError('No sourcename extracted!')
    else:
        source = match.group(0)
    if (tok_type + '_scal_' + source + str(cal_obsid)) not in token_ids:
        tok = caToken(database=db,
                      token_type=tok_type,
                      token_id=tok_type + '_scal_' + source + str(cal_obsid))
        with open('temp_srm_{:s}.txt'.format(source), 'w') as f:
            f.write('\n'.join(v))
        tok.build(TokenJsonBuilder(jsonfile))
Esempio n. 7
0
class TokenCreator(BaseOperator):
    """
    Using a Token template input, this class creates the tokens for a LOFAR job
    The tokens are a set of documents that contain the metadata for each processing
    job as well as the job's progress, step completion times, and etc. 

    :type sbx_task: string
    :param srms: a list of the srms that need to be staged
    :type srms: list
    :param stageID: In case staging was already done
    # 
    # 
    # 
    # 
    # 
    :type stageID: string
    :type output_encoding: output encoding of bash command
    """
    template_fields = ()
    template_ext = ()
    ui_color = '#f3f92c'

    @apply_defaults
    def __init__(self,
                 tok_config,
                 staging_task,
                 append_task=None,
                 fields_task=None,
                 pc_database=None,
                 subband_prefix=None,
                 subband_suffix=None,
                 token_type='test_',
                 files_per_token=10,
                 output_encoding='utf-8',
                 *args,
                 **kwargs):

        super(TokenCreator, self).__init__(*args, **kwargs)
        self.pc_database = pc_database
        self.tok_config = tok_config
        self.fields_task = fields_task
        if subband_prefix:
            self.subband_prefix = subband_prefix
        else:
            self.subband_prefix = "SB"
        if subband_suffix:
            self.subband_suffix = subband_suffix
        else:
            self.subband_suffix = "_"
        self.staging_task = staging_task
        self.append_task = append_task
        self.files_per_token = files_per_token
        self.output_encoding = output_encoding
        self.t_type = token_type
        self.state = State.QUEUED

    def execute(self, context):
        """
        Execute the bash command in a temporary directory
        which will be cleaned afterwards
        """
        srms = self.get_staged_srms(context)
        if not srms:
            logging.warn("Could not get the list of staged srms!")
        logging.info("the list of staged srms is {0}".format(srms))
        pc = get_picas_credentials.picas_cred()
        if self.pc_database:
            pc.database = self.pc_database
        if self.fields_task:
            task_name = self.fields_task['name']
            task_parent_dag = self.fields_task['parent_dag']
            try:
                app = get_task_instance(
                    context, task_name,
                    task_parent_dag)['sanitized_field_name']
            except KeyError:
                app = get_task_instance(context, task_name,
                                        task_parent_dag)['field_name']
        else:
            app = srms.obsid
        self.t_type = self.t_type + app
        tok_settings = json.load(open(self.tok_config, 'rb'))
        token_variables = tok_settings['Job']['variables']
        client = CouchDB(pc.user,
                         pc.password,
                         url='https://picas-lofar.grid.surfsara.nl:6984',
                         connect=True)
        self.db = client[pc.database]

        pipe_type = token_variables['PIPELINE_STEP']
        self.token_list = TokenList(database=self.db, token_type=self.t_type)

        if self.files_per_token != 1:
            d = slice_dicts(
                srms.sbn_dict(pref=self.subband_prefix,
                              suff=self.subband_suffix), self.files_per_token)
        else:
            d = {}
            for i in srms.sbn_dict(pref=self.subband_prefix,
                                   suff=self.subband_suffix):
                d[i[0]] = i[1]

        for token_file in d:
            logging.info("Token file is  {}".format(token_file))
            with NamedTemporaryFile(delete=False) as savefile:
                for line in d[token_file]:
                    savefile.write("{}\n".format(line).encode('utf-8'))
            # pref3_$FIELDNAME_$OBSID_$PIPELINE_SB$SBNUMBER
            pipeline_step = pipe_type.split('_')[1]
            # logging.info("Pipeline step is {}, type pipe_type is {}.".format(pipe_type, type(pipe_type)))
            if 'cal' in pipe_type:
                token_id = "{}_{}_{}".format(self.t_type, srms.obsid,
                                             pipeline_step)
            elif 'targ' in pipe_type:
                token_id = "{}_{}_{}_SB{}".format(self.t_type, srms.obsid,
                                                  pipeline_step, token_file)
            else:
                token_id = "fields_$FIELDNAME_$OBSID_$PIPELINE: {}_{}_{}_{}_{}".format(
                    self.t_type, token_file, srms.obsid,
                    pipe_type.split('_')[1], time.time())

            logging.info(token_id)
            self.token_list.append(
                self.build_token(token_id,
                                 attachment={
                                     'name': 'srm.txt',
                                     'location': savefile.name
                                 }))
            self.token_list[-1]['STARTSB'] = token_file
            os.remove(savefile.name)
        self.token_list.add_token_views()

        if self.append_task:
            logging.info(self.append_task)
            logging.info(context)
            self.modify_fields(context)

        for token in self.token_list:
            token["OBSID"] = srms.obsid
            token['RESULTS_DIR'] += "/" + str(srms.obsid)

        token_variables['OBSID'] = srms.obsid
        token_variables['RESULTS_DIR'] += "/" + str(srms.obsid)

        # create result directory if not exist
        create_gsiftp_directory(token_variables['RESULTS_DIR'])

        logging.info('Token type is ' + self.t_type)
        logging.info(
            'Tokens are available at https://picas-lofar.grid.surfsara.nl:6984/_utils/database.html?'
            + pc.database + '/_design/' + self.t_type +
            '/_view/overview_total')
        logging.info("Token settings are :")
        for i in token_variables.items():
            logging.info(str(i))
        logging.debug(srms)

        self.token_list.save()
        results = dict()
        results['num_jobs'] = len(d.keys())
        results['output_dir'] = token_variables['RESULTS_DIR']
        logging.info("output directory is {}".format(results['output_dir']))
        results['token_type'] = str(self.t_type)
        results['view'] = pipe_type
        results['OBSID'] = token_variables['OBSID']
        results['token_ids'] = [i['_id'] for i in self.token_list]
        return results

    def upload_tokens(self, tokens):
        pass

    def upload_attachments(self, attachment):
        pass

    def build_token(self, token_id, attachment=None):
        t1 = caToken(database=self.db,
                     token_type=self.t_type,
                     token_id=token_id)
        t1.build(TokenJsonBuilder(self.tok_config))
        logging.info(self.tok_config)
        t1.save()
        if attachment:
            t1.add_attachment(attachment_name=attachment['name'],
                              filename=attachment['location'])
        return t1

    def modify_fields(self, context):
        """If the append_task exists, this moethod will add all key:value pairs
        in its xcom to every token. This is used to for example take the results
        of the calibration taks and name it 'CAL_RESULTS'"""
        print(self.append_task)
        append_xcom = get_task_instance(
            context,
            self.append_task['name'],
            parent_dag=self.append_task['parent_dag'])
        if append_xcom is None:
            logging.info("No calibration results found!")

        for k in append_xcom:
            for token in self.token_list:
                token[k] = append_xcom[k]
        self.token_list.save()

    def get_staged_srms(self, context):
        task_name = self.staging_task['name']
        task_parent_dag = self.staging_task['parent_dag']
        srm_xcom = get_task_instance(context, task_name, task_parent_dag)
        srmfile = srm_xcom['srmfile']
        logging.info("Srmfile is " + srmfile)
        if srmfile == None:
            raise RuntimeError("Could not get the srm list from the " +
                               str(self.staging_task) + " task")
        return self.get_list_from_files(srmfile)

    def get_list_from_files(self, filename):
        loaded_srmlist = srmlist()
        for link in open(filename, 'rb').readlines():
            loaded_srmlist.append(link.decode('utf-8').strip('\n'))
        return loaded_srmlist

    def success(self):
        self.status = State.SUCCESS
        logging.info("Successfully uploaded " +
                     str(self.progress['Percent done']) + " % of the tokens.")

    def on_kill(self):
        logging.warn('Sending SIGTERM signal to staging group')
        self.state = State.SHUTDOWN
        os.killpg(os.getpgid(self.sp.pid), signal.SIGTERM)
Esempio n. 8
0
    def execute(self, context):
        """
        Execute the bash command in a temporary directory
        which will be cleaned afterwards
        """
        srms = self.get_staged_srms(context)
        if not srms:
            logging.warn("Could not get the list of staged srms!")
        logging.info("the list of staged srms is {0}".format(srms))
        pc = get_picas_credentials.picas_cred()
        if self.pc_database:
            pc.database = self.pc_database
        if self.fields_task:
            task_name = self.fields_task['name']
            task_parent_dag = self.fields_task['parent_dag']
            try:
                app = get_task_instance(
                    context, task_name,
                    task_parent_dag)['sanitized_field_name']
            except KeyError:
                app = get_task_instance(context, task_name,
                                        task_parent_dag)['field_name']
        else:
            app = srms.obsid
        self.t_type = self.t_type + app
        tok_settings = json.load(open(self.tok_config, 'rb'))
        token_variables = tok_settings['Job']['variables']
        client = CouchDB(pc.user,
                         pc.password,
                         url='https://picas-lofar.grid.surfsara.nl:6984',
                         connect=True)
        self.db = client[pc.database]

        pipe_type = token_variables['PIPELINE_STEP']
        self.token_list = TokenList(database=self.db, token_type=self.t_type)

        if self.files_per_token != 1:
            d = slice_dicts(
                srms.sbn_dict(pref=self.subband_prefix,
                              suff=self.subband_suffix), self.files_per_token)
        else:
            d = {}
            for i in srms.sbn_dict(pref=self.subband_prefix,
                                   suff=self.subband_suffix):
                d[i[0]] = i[1]

        for token_file in d:
            logging.info("Token file is  {}".format(token_file))
            with NamedTemporaryFile(delete=False) as savefile:
                for line in d[token_file]:
                    savefile.write("{}\n".format(line).encode('utf-8'))
            # pref3_$FIELDNAME_$OBSID_$PIPELINE_SB$SBNUMBER
            pipeline_step = pipe_type.split('_')[1]
            # logging.info("Pipeline step is {}, type pipe_type is {}.".format(pipe_type, type(pipe_type)))
            if 'cal' in pipe_type:
                token_id = "{}_{}_{}".format(self.t_type, srms.obsid,
                                             pipeline_step)
            elif 'targ' in pipe_type:
                token_id = "{}_{}_{}_SB{}".format(self.t_type, srms.obsid,
                                                  pipeline_step, token_file)
            else:
                token_id = "fields_$FIELDNAME_$OBSID_$PIPELINE: {}_{}_{}_{}_{}".format(
                    self.t_type, token_file, srms.obsid,
                    pipe_type.split('_')[1], time.time())

            logging.info(token_id)
            self.token_list.append(
                self.build_token(token_id,
                                 attachment={
                                     'name': 'srm.txt',
                                     'location': savefile.name
                                 }))
            self.token_list[-1]['STARTSB'] = token_file
            os.remove(savefile.name)
        self.token_list.add_token_views()

        if self.append_task:
            logging.info(self.append_task)
            logging.info(context)
            self.modify_fields(context)

        for token in self.token_list:
            token["OBSID"] = srms.obsid
            token['RESULTS_DIR'] += "/" + str(srms.obsid)

        token_variables['OBSID'] = srms.obsid
        token_variables['RESULTS_DIR'] += "/" + str(srms.obsid)

        # create result directory if not exist
        create_gsiftp_directory(token_variables['RESULTS_DIR'])

        logging.info('Token type is ' + self.t_type)
        logging.info(
            'Tokens are available at https://picas-lofar.grid.surfsara.nl:6984/_utils/database.html?'
            + pc.database + '/_design/' + self.t_type +
            '/_view/overview_total')
        logging.info("Token settings are :")
        for i in token_variables.items():
            logging.info(str(i))
        logging.debug(srms)

        self.token_list.save()
        results = dict()
        results['num_jobs'] = len(d.keys())
        results['output_dir'] = token_variables['RESULTS_DIR']
        logging.info("output directory is {}".format(results['output_dir']))
        results['token_type'] = str(self.t_type)
        results['view'] = pipe_type
        results['OBSID'] = token_variables['OBSID']
        results['token_ids'] = [i['_id'] for i in self.token_list]
        return results