Ejemplo n.º 1
0
def save_dates_from_task(task_name,
                         savefile='/home/apmechev/.prefactor_v3.0_CI.pkl',
                         **context):
    task_data = get_task_instance(context, task_name)
    prev_data = pickle.load(open(savefile, 'rb'))
    merged_data = _merge_two_dicts(prev_data, task_data)
    for key in task_data.keys():
        merged_data[key]['saved_date'] = datetime.datetime.now()
    save_dates(merged_data, savefile)
Ejemplo n.º 2
0
 def get_staged_srms(self, context):
     task_name = self.staging_task['name']
     task_parent_dag = self.staging_task['parent_dag']
     srm_xcom = get_task_instance(context, task_name, task_parent_dag)
     srmfile = srm_xcom['srmfile']
     logging.info("Srmfile is " + srmfile)
     if srmfile == None:
         raise RuntimeError("Could not get the srm list from the " +
                            str(self.staging_task) + " task")
     return self.get_list_from_files(srmfile)
Ejemplo n.º 3
0
    def execute(self, context):
        """
        Executes the staging command from the list of srms requested.
        """
        if isinstance(self.srmfile, dict):
            task_name = self.srmfile['name']
            task_parent_dag = self.srmfile['parent_dag']
            sbx_xcom = get_task_instance(context, task_name, task_parent_dag)
            self.srmfile = sbx_xcom[self.srmkey]

        elif not os.path.isfile(self.srmfile) and not hasattr(
                self.srms, '__iter__'):
            self.srmfile = Variable.get(self.srmfile)
            if not os.path.isfile(self.srmfile):
                self.status = State.UPSTREAM_FAILED
                raise AirflowException(
                    "Input srmfile doesn't exist and srm list not a list")
        self.progress = {'Percent done': 0}

        surl_list = srmlist.srmlist(
        )  #holds all the srms (both from file and list argument )
        self.surl_list = self.build_srm_list(surl_list)
        try:
            self.stage_ID = stager_access.stage(list(self.surl_list))
        except xmlrpclib.Fault:
            sleep(60)
            self.stage_ID = stager_access.stage(list(self.surl_list))
        logging.info(
            "Successfully sent staging command for " +
            stager_access.get_progress()[str(self.stage_ID)]['File count'] +
            " files.")
        logging.info("StageID= " + str(self.stage_ID))

        self.state = State.RUNNING
        sleep(120)
        try:
            self.progress = stager_access.get_progress()[str(self.stage_ID)]
        except:
            pass
        self.started = False
        f = NamedTemporaryFile(delete=False)
        for i in surl_list:
            f.write(bytes(i, encoding='utf8'))
        f.close()
        if not f:
            f.name = ""
        while self.still_running():
            sleep(120)
        if self.state == State.SUCCESS:
            return {'srmfile': str(f.name)}
        self.state == State.FAILED
        return {'srmfile': str(f.name)}
Ejemplo n.º 4
0
 def modify_fields(self, context):
     """If the append_task exists, this moethod will add all key:value pairs
     in its xcom to every token. This is used to for example take the results
     of the calibration taks and name it 'CAL_RESULTS'"""
     print(self.append_task)
     append_xcom = get_task_instance(
         context,
         self.append_task['name'],
         parent_dag=self.append_task['parent_dag'])
     for k in append_xcom:
         for token in self.token_list:
             token[k] = append_xcom[k]
     self.token_list.save()
Ejemplo n.º 5
0
    def find_upload_file(self, context):
        """ Checks whether the file exists (if fed a filename as a parameter)
        Otherwise it looks for the xcom of the parset task and takes the 
         key of the dictionary that is named after self.upload_file.split('/')[-1]
         (I.E. the filename) of that xcom. checks if it exists and returns it

        """
        if not self.parset_task and os.path.exists(self.upload_file):
            return self.upload_file  # no parset_task, just get file parameter
        parset_xcom = get_task_instance(context, self.parset_task,
                                        self.parent_dag)
        parset_filename = self.upload_file.split('/')[-1]
        parset_file_loc = parset_xcom[parset_filename]
        if os.path.exists(parset_file_loc):
            return parset_file_loc
        raise (Exception("Cannot find the parset file"))
Ejemplo n.º 6
0
 def execute(self, context):
     logging.info(self.srmkey)
     if isinstance(self.srmfile, dict):
         task_name = self.srmfile['name']
         task_parent_dag = self.srmfile['parent_dag']
         sbx_xcom = get_task_instance(context, task_name, task_parent_dag)
         self.srmfile = sbx_xcom[self.srmkey]
     return {
         'staged': False,
         'srmfile': str(self.srmfile)
     }  #I return false until gfal2 works on py3+
     staging_statuses = state_all.main(self.srmfile, verbose=False)
     logging.info(staging_statuses)
     if state_all.percent_staged(staging_statuses) > self.threshold:
         return {'staged': True, 'srmfile': str(self.srmfile)}
     return {'staged': False, 'srmfile': str(self.srmfile)}
Ejemplo n.º 7
0
    def execute(self, context):
        """
        Execute the bash command in a temporary directory
        which will be cleaned afterwards
        """
        srms = self.get_staged_srms(context)
        if not srms:
            logging.warn("Could not get the list of staged srms!")
        logging.info("the list of staged srms is {0}".format(srms))
        pc = get_picas_credentials.picas_cred()
        if self.pc_database:
            pc.database = self.pc_database
        if self.fields_task:
            task_name = self.fields_task['name']
            task_parent_dag = self.fields_task['parent_dag']
            try:
                app = get_task_instance(
                    context, task_name,
                    task_parent_dag)['sanitized_field_name']
            except KeyError:
                app = get_task_instance(context, task_name,
                                        task_parent_dag)['field_name']
        else:
            app = srms.obsid
        self.t_type = self.t_type + app
        tok_settings = json.load(open(self.tok_config, 'rb'))
        token_variables = tok_settings['Job']['variables']
        client = CouchDB(pc.user,
                         pc.password,
                         url='https://picas-lofar.grid.surfsara.nl:6984',
                         connect=True)
        self.db = client[pc.database]

        pipe_type = token_variables['PIPELINE_STEP']
        self.token_list = TokenList(database=self.db, token_type=self.t_type)

        if self.files_per_token != 1:
            d = slice_dicts(
                srms.sbn_dict(pref=self.subband_prefix,
                              suff=self.subband_suffix), self.files_per_token)
        else:
            d = {}
            for i in srms.sbn_dict(pref=self.subband_prefix,
                                   suff=self.subband_suffix):
                d[i[0]] = i[1]

        for token_file in d:
            logging.info("Token file is  {}".format(token_file))
            with NamedTemporaryFile(delete=False) as savefile:
                for line in d[token_file]:
                    savefile.write("{}\n".format(line).encode('utf-8'))
            # pref3_$FIELDNAME_$OBSID_$PIPELINE_SB$SBNUMBER
            pipeline_step = pipe_type.split('_')[1]
            # logging.info("Pipeline step is {}, type pipe_type is {}.".format(pipe_type, type(pipe_type)))
            if 'cal' in pipe_type:
                token_id = "{}_{}_{}".format(self.t_type, srms.obsid,
                                             pipeline_step)
            elif 'targ' in pipe_type:
                token_id = "{}_{}_{}_SB{}".format(self.t_type, srms.obsid,
                                                  pipeline_step, token_file)
            else:
                token_id = "fields_$FIELDNAME_$OBSID_$PIPELINE: {}_{}_{}_{}_{}".format(
                    self.t_type, token_file, srms.obsid,
                    pipe_type.split('_')[1], time.time())

            logging.info(token_id)
            self.token_list.append(
                self.build_token(token_id,
                                 attachment={
                                     'name': 'srm.txt',
                                     'location': savefile.name
                                 }))
            self.token_list[-1]['STARTSB'] = token_file
            os.remove(savefile.name)
        self.token_list.add_token_views()

        if self.append_task:
            logging.info(self.append_task)
            logging.info(context)
            self.modify_fields(context)

        for token in self.token_list:
            token["OBSID"] = srms.obsid
            token['RESULTS_DIR'] += "/" + str(srms.obsid)

        token_variables['OBSID'] = srms.obsid
        token_variables['RESULTS_DIR'] += "/" + str(srms.obsid)

        # create result directory if not exist
        create_gsiftp_directory(token_variables['RESULTS_DIR'])

        logging.info('Token type is ' + self.t_type)
        logging.info(
            'Tokens are available at https://picas-lofar.grid.surfsara.nl:6984/_utils/database.html?'
            + pc.database + '/_design/' + self.t_type +
            '/_view/overview_total')
        logging.info("Token settings are :")
        for i in token_variables.items():
            logging.info(str(i))
        logging.debug(srms)

        self.token_list.save()
        results = dict()
        results['num_jobs'] = len(d.keys())
        results['output_dir'] = token_variables['RESULTS_DIR']
        logging.info("output directory is {}".format(results['output_dir']))
        results['token_type'] = str(self.t_type)
        results['view'] = pipe_type
        results['OBSID'] = token_variables['OBSID']
        results['token_ids'] = [i['_id'] for i in self.token_list]
        return results
Ejemplo n.º 8
0
def exit_if_no_CI_tests(ci_dates_task, continue_task, exit_task, **context):
    task_data = get_task_instance(context, 'CI_dates')
    if task_data:
        return continue_task
    return exit_task