def start_job(self, name, data): """This method starts a new job. This method starts a new job using the job_factory. Args: self: The object pointer. name: The name of the job to be stared. data: The data to be pass as parameter to the job. Returns: None if the job could not be started, otherwise it will return the result of the start of the job. """ new_job = None if wh_global_config().get().CONTAINER_NAME == 'deavi': self.log.info('Starting deavi job') new_job = job_factory().get_deavi(name) elif wh_global_config().get().CONTAINER_NAME == 'gavip': self.log.info('Starting avi job') new_job = job_factory().get_avi(name) if not new_job: self.log.error('Error while initializing the job.') return None return new_job.start(data=data)
def init(self): """This method initializes the algorithms in the database. This method will first delete the temporal algorithms (if any) Then, it will check the different directories in which might be scientific algorithms and it will call the update_database() method. The directories to be checked are given by the wh_global_config warehouse. Args: self: The object pointer. See: wh_global_config: avi.warehouse.wh_global_config """ alg_m = algorithm_info_model.objects.all() for a in alg_m: if a.algorithm_type == "temporal": a.delete() self.update_database(wh_global_config().get().ALGORITHM_PATH, "installed") self.log.info(wh_global_config().get().UPLOADED_ALGORITHM_PATH) self.update_database(wh_global_config().get().UPLOADED_ALGORITHM_PATH, "uploaded")
def __init__(self): """Constructor Initializes the log, the warehouses, the configuration and the interface_manager Args: self: The object pointer """ ipath = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'config') #wh_global_config().get().INPUT_PATH self.str_log_config_file = os.path.join(ipath, 'log_config.xml') self.str_config_file = os.path.join(ipath, 'config.xml') self.str_global_config_file = os.path.join(ipath, 'global_config.xml') from avi.utils.config_manager import logger_configuration log_cfg = logger_configuration() log_cfg.load(self.str_log_config_file) self.log = logger().get_log(self.str_log_header) self.log.info("Initialization of RISEA...") self.log.info("RISEA ptr : %s",self.get_ptr()) # loading cfg from avi.utils.config_manager import configuration_manager self.cfg = configuration_manager() if not self.cfg.load(self.str_config_file): self.log.error("Failed to load the configuration %s", self.str_config_file) return # Initialization of the warehouses cfg = configuration_manager() if not cfg.load(self.str_global_config_file): self.log.error("Failed to load the configuration %s", self.str_global_config_file) return wh_global_config().get().load(cfg.get("global")) wh_frontend_config().get().load(cfg.get("frontend")) wh_frontend_config().get().\ CURRENT_PATH = os.path.\ normpath("".join(wh_global_config().get().RESOURCES_PATH)) wh_frontend_config().get().\ HOME_PATH = os.path.\ normpath("".join(wh_global_config().get().RESOURCES_PATH)) self.log.info("current id : %s\n resources id : %s", id(wh_frontend_config().get().CURRENT_PATH), id(wh_global_config().get().RESOURCES_PATH)) # Initialization of the interface manager from .interface.interface_manager import interface_manager self.interface_manager = interface_manager() self.interface_manager.init(self.cfg) # Initialization of the resources from avi.utils.resources_manager import resources_manager resources_manager().init()
def save_tar_file(self, source, name, path = None, id = "", task_name = "", date = ""): """Saves a plain data file Saves the given data into a file with the given name and creates a resource_model with the file information. Args: self: The object pointer source: The source directory to be saved name: The name of the file path: The path of the file id: The job id associated with the task that created the file task_name: The name of the task that created this file date: The date of creation Returns: The path to the created file See: resource_model: avi.models.resource_model """ if not path: self.log.info("Using default RESULTS_PATH") path = wh_global_config().get().RESULTS_PATH output_file_name = os.path.join(path, name) with tarfile.open(output_file_name, "w:gz") as tar: tar.add(source, arcname=os.path.basename(source)) res = os.path.abspath(output_file_name) self.log.info("Data saved in: %s", output_file_name) try: from avi.models import resource_model model = resource_model(sort_name = name, name = name, path = path, file_type = task_name, job_id = id, date = date) model.save() except Exception: self.log.warning("Something went wrong while updating the db" " saving tmp file to update the db later") tmp_path = wh_global_config().get().TMP_PATH full_tmp_path = os.path.join(tmp_path, str(id)) with open(full_tmp_path, "a") as tmp: tmp.write("%s;%s;%s;%s;%s"%(name, path, task_name, str(id), str(date))) return res
def _download_image(self, obs_urn, obsid, level, instrument): self.log.info("Downloading fits image...") self.log.info("Retrieving %s with urn %s", level, obs_urn[level]) hdu = self._get_fits(obs_urn[level]) # TODO: check obsid001, minobsid, fix hs header self.log.debug("hdu bandkey") # self.log.debug(hdu[0].header['obsid001']) bandkey_urn = self._parse_context_hdu(hdu) if not bandkey_urn: self.log.info("Skipping level %s", level) else: for i in bandkey_urn: self.log.info("Downloading herschel fits, " + \ " obsid=%s, level=%s, bandkey=%s, instrument=%s", \ obsid, level, i, instrument) # TODO: check path and create sub dirs, proper name, etc #file_name = self._download_path + obsid.decode('utf-8') #file_name = self._download_path \ name = "%s_%s_%s_%s_%s.fits" % (obsid.decode('utf-8'), instrument, level, i, self._files_name) full_name = wh_global_config()\ .get().SOURCES_FMT%{"mission":"hsa", "date":str(round(time.time())), "name": name} file_name = os.path.join(self._download_path, full_name) hdulist = self._get_fits(bandkey_urn[i], file_name, True) hdulist.close()
def start(self, data): """This method runs the relaunch_algorithm job. This method will relaunch_algorithm the asynchronous job provided in the data parameter. The data parameter must have the key 'algorithm' containing all the data of the algorithm to be relaunched. It will first parse the data parameter to a dictionary and save it into 'qparams' variable, then get the algorithm model by the name in the 'qparams' variable and save the 'pk' of the algorithm into 'qparams. If the algorithm input parameters contains 'Gaia' then will set the gaia file, if contains 'HSA' then will set the hsa file. Args: self: The object pointer. data: A dictorianry containing the input data for the job. Returns: The algorithm parameters to be relaunched. """ wh = wh_frontend_config().get() gwh = wh_global_config().get() log = logger().get_log("relaunch") log.info("inside relaunch_algorithm job") # data is a string type, transform to dictionary qparams = literal_eval(data['pk']) # get the model of the algorithm by the name in qparams m = algorithm_info_model.objects.get(name=qparams['algorithm']['name']) # add the pk of the algorithm qparams['algorithm']['pk'] = m.pk # get the keys of the algorithm parameters keys = qparams['algorithm']['params'].keys() # check if Gaia or HSA is one of the parameters to delete the path of the files if "Gaia" in keys: path_to_eliminate = gwh.SOURCES_PATH path_to_eliminate = str(path_to_eliminate) + "/gaia/" qparams['algorithm']['params']['Gaia'] = qparams['algorithm'][ 'params']['Gaia'].replace(path_to_eliminate, '') log.info(qparams['algorithm']['params']['Gaia']) #Delete path if "HSA" in keys: path_to_eliminate = gwh.SOURCES_PATH path_to_eliminate = str(path_to_eliminate) + "/hsa/" qparams['algorithm']['params']['HSA'] = qparams['algorithm'][ 'params']['HSA'].replace(path_to_eliminate, '') log.info(qparams['algorithm']['params']['HSA']) #Delete path data = {} self.job_data.data = qparams['algorithm'] log.info("params " + str(qparams['algorithm'])) return self.job_data
def init(self, cfg): """Initializes the herschel interface This method initializes the herschel archive attributes using the given configuration. Then calls the parent method init to finish the initialization. Args: self: The object pointer cfg: The configuration to be loaded. Returns: True if the configuration is loaded correctly, False otherwise See: archive_interface: avi.core.interface.archive_interface.archive_interface """ self._metadata_url = cfg['metadata_url'] self._product_url = cfg['product_url'] self._data_url = cfg['data'] self._download_path = wh_global_config().get().HSA_PATH self._tmp_path = wh_global_config().get().HSA_PATH return super(herschel, self).init(cfg)
def get_algorithm_info(self, data): """Deprecated? """ self.log.debug(str(data)) if not "algorithm" in data.keys(): self.log.error("Not algorithm") return None path = wh_global_config().get().ALGORITHM_PATH jm = json_manager() alg_name = str(data['algorithm'][0]) self.log.debug("Algoritm name %s", alg_name) if not os.path.isfile(path + alg_name + ".json"): self.log.error("Not algorithm 1") return None jm.load(path + alg_name + ".json") if not self.__check_json(jm.json_root): self.log.error("Not algorithm 2") return None ret = {"algorithm": {}} alg = ret['algorithm'] alg['name'] = alg_name alg['params'] = {} params = alg['params'] for k, v in jm.json_root['algorithm']['input'].items(): param_name = v['name'] post_name = alg_name + "_" + k self.log.debug("param_name %s post_name %s data %s", param_name, post_name, "") params[param_name] = self.__get_data(data[post_name][0], v['type']) data2 = { "algorithm": { "name": "dummy_algorithm", "params": { "param1": 1.0, #"float", "param2": 2.0, #"float", }, } } return ret
def save_file_info(self, name, file_name, id, task_name, date): """Saves the information of a given file Uses the resource_model to saves the given file information Args: self: The object pointer name: The name of the file file_name: The full name of the file id: The job id associated with the task that created this file task_name: Name of the task that created this file date: The date of creation Returns: The created resource_model See: resource_model: avi.models.resource_model """ full_name = os.path.basename(file_name) path = os.path.dirname(file_name) try: from avi.models import resource_model model = resource_model(sort_name = name, name = full_name, path = path, file_type = task_name, job_id = id, date = date) model.save() return model except Exception: self.log.warning("Something went wrong while updating the db" " saving tmp file to update the db later") self.log.warning(traceback.format_exc()) tmp_path = wh_global_config().get().TMP_PATH full_tmp_path = os.path.join(tmp_path, str(id)) with open(full_tmp_path, "a") as tmp: tmp.write("%s;%s;%s;%s;%s"%(name, path, task_name, str(id), str(date)))
def start(self, data): """This method runs the get_files job. This method will retrieve all the allowed files and directories to be shown in the user interface. It uses the resources_manager to get the path information and then uses the discard_files to discard the ones that should not be shown. After that it will paginate the results with the current page retrieved from the wh_frontend_config warehouse. Args: self: The object pointer. data: A dictorianry containing the input data for the job. Returns: The job_data attribute. The ok attribute provides the pages information. @see resources_manager @link avi.utils.resources_manager.resources_manager @see wh_frontend_config @link avi.warehouse.wh_frontend_config """ log = logger().get_log("views") wh = wh_frontend_config().get() gwh = wh_global_config().get() #dirs = data[0] #files = data[1] sorting_wh = wh.SORTING_RESOURCES_BY if sorting_wh[0] == '-': sorting_wh = sorting_wh[1:] order_by = '' all_files = self.discard_files(resources_manager().get_list( wh.CURRENT_PATH)) #---------------------------------------------------------------------------------------------------- #log.info("Current path!!" + str(wh.CURRENT_PATH)) #log.info("all filess: " + str(all_files)) gaia_files = self.discard_files( resources_manager().get_list("/data/output/sources/gaia")) #log.info("gaia filess: " + str(gaia_files)) gaia = resources_manager().get_info(gaia_files, "/data/output/sources/gaia") #log.info("gaia data: " + str(gaia)) hsa_files = self.discard_files( resources_manager().get_list("/data/output/sources/hsa")) #log.info("hsa filess: " + str(hsa_files)) hsa = resources_manager().get_info(hsa_files, "/data/output/sources/hsa") #log.info("hsa data: " + str(hsa)) sim_files = self.discard_files( resources_manager().get_list("/data/output/sources/sim")) #log.info("hsa filess: " + str(hsa_files)) sim = resources_manager().get_info(sim_files, "/data/output/sources/sim") #log.info("hsa data: " + str(hsa)) results_files = self.discard_files( resources_manager().get_list("/data/output/results")) #log.info("results filess: " + str(results_files)) results_data = resources_manager().get_info(results_files, "/data/output/results") #log.info("results data: " + str(results_data)) user_files = self.discard_files( resources_manager().get_list("/data/output/user")) #log.info("user filess: " + str(user_files)) user_data = resources_manager().get_info(user_files, "/data/output/user") #log.info("user data: " + str(user_data)) #--------------------------------------------------------------------------------------------------- all_files.sort() pg = Paginator(all_files, wh.MAX_RESOURCES_PER_PAGE) page = wh.CURRENT_RESOURCES_PAGE if page < 1: wh.CURRENT_RESOURCES_PAGE = 1 elif page > pg.num_pages: wh.CURRENT_RESOURCES_PAGE = pg.num_pages files = pg.page(wh.CURRENT_RESOURCES_PAGE) f, d = resources_manager().get_info(files, wh.CURRENT_PATH) log.info(f) log.info(sorting_wh) if sorting_wh == 'size': f = collections.OrderedDict(sorted(f.items(), key=lambda x: x[1])) d = collections.OrderedDict(sorted(d.items(), key=lambda x: x[1])) elif sorting_wh == 'name': f = collections.OrderedDict(sorted(f.items(), key=lambda x: x[0])) d = collections.OrderedDict(sorted(d.items(), key=lambda x: x[0])) #Parse for the filemanager breadcrumb p = wh.CURRENT_PATH path_to_eliminate = gwh.RESOURCES_PATH #path_to_eliminate = re.sub("/results", '', path_to_eliminate) fail #p = gwh.RESULTS_PATH p = re.sub(path_to_eliminate, '', p) #p = path_to_eliminate #End parse for the filemanager breadcrumb p = p.split("/") self.job_data.data = [f, d, p, gaia, hsa, sim, results_data, user_data] self.job_data.ok = (pg.num_pages, wh.CURRENT_RESOURCES_PAGE, \ wh.CURRENT_RESOURCES_PAGE + 1, wh.CURRENT_RESOURCES_PAGE - 1) return self.job_data
def init(self): """Initialization method Creates the needed directories to store the resources Args: self: The object pointer """ gconf = wh_global_config().get() # TODO: exception management if not self.dir_exists(gconf.SOURCES_PATH): try: os.makedirs(gconf.SOURCES_PATH) except OSError: pass if not self.dir_exists(gconf.GAIA_PATH): try: os.makedirs(gconf.GAIA_PATH) except OSError: pass if not self.dir_exists(gconf.HSA_PATH): try: os.makedirs(gconf.HSA_PATH) except OSError: pass if not self.dir_exists(gconf.SIM_PATH): try: os.makedirs(gconf.SIM_PATH) except OSError: pass if not self.dir_exists(gconf.RESULTS_PATH): try: os.makedirs(gconf.RESULTS_PATH) except OSError: pass if not self.dir_exists(gconf.TMP_PATH): try: os.makedirs(gconf.TMP_PATH) except OSError: pass if not self.dir_exists(gconf.UPLOADED_ALGORITHM_PATH): try: os.makedirs(gconf.UPLOADED_ALGORITHM_PATH) except OSError: pass if not self.dir_exists(gconf.TMP_ALGORITHM_PATH): try: os.makedirs(gconf.TMP_ALGORITHM_PATH) except OSError: pass if not self.dir_exists(gconf.USER_PATH): try: os.makedirs(gconf.USER_PATH) except OSError: pass else: import shutil for f in os.listdir(gconf.TMP_ALGORITHM_PATH): full_path = os.path.join(gconf.TMP_ALGORITHM_PATH, f) try: if os.path.isfile(full_path): os.unlink(full_path) elif os.path.isdir(full_path): shutil.rmtree(full_path) except Exception: pass pass
def start(self, data): """This method runs the launch job. This method will launch the asynchronous job provided in the data parameter. The data parameter must have the key 'id' containing the primary key of the query and the mission to be launched and the key mission unseted. It will first split the key 'id' by '-' and save the primary key into data 'id' and the mission into data 'mission'. Then it will get all the algorithms and take the valid algorithms for the query by comparing the data 'mission' and the inputs of the algorithms. Finally it will set all the parameters to be returned into the 'ret' variable. Args: self: The object pointer. data: A dictorianry containing the input data for the job. Returns: The the mission. The algorithms. The files of the query """ wh = wh_frontend_config().get() gwh = wh_global_config().get() log = logger().get_log("launch") log.info("inside launch job") ret={} cont = 0 ids = [] data['mission'] = data['id'].split('-')[1] data['id'] = data['id'].split('-')[0] ret['mission'] = data['mission'] ret['algorithms'] = {} pk={} #Take valid alorithms for the query m = get_algorithms.start(self, None) #for i in range(len(m.data['algorithms'])): # ids.append(m.data['algorithms'][i][0]) log.info(m.data) for group in m.data['algorithms']: for i in group['algorithms']: ids.append(i[0]) for j in ids: pk['id'] = j model = get_algorithm_info.start(self, pk) for a in list(model.data['algorithm']['input'].keys()): #if ret['mission'].lower() == model.data['algorithm']['input'][a]['view_name'].lower(): if ret['mission'].lower() in model.data['algorithm']['input'][a]['type'].lower(): ret['algorithms'][cont] = {} ret['algorithms'][cont]['pk'] = pk['id'] ret['algorithms'][cont]['view_name'] = model.data['algorithm']['view_name'] log.info(cont) cont = cont + 1 break #Set all the return data query_info = get_query_info.start(self,data) query_info_data = dict(query_info.data) if 'files' in query_info_data: ret['files'] = query_info_data['files'] else: ret['files'] = 'No data found' log.info("return of job_launch: "+str(ret)) self.job_data.data = ret return self.job_data
def save_file_binary_data(self, data, name): """Saves a binary data file (Deprecated) Saves the given data into a file with the given name and creates a resource_model with the file information. Args: self: The object pointer data: The data to be saved name: The name of the file path: The path of the file id: The job id associated with the task that created the file task_name: The name of the task that created this file date: The date of creation Returns: The path to the created file See: resource_model: avi.models.resource_model """ if not path: self.log.info("Using default RESULTS_PATH") path = wh_global_config().get().RESULTS_PATH output_file_name = os.path.join(path, name) output_file = open(output_file_name, "wb") output_file.write(data) output_file.close() res = os.path.abspath(output_file_name) self.log.info("Data saved in: %s", output_file_name) try: from avi.models import resource_model model = resource_model(sort_name = name, name = name, path = path, file_type = task_name, job_id = id, date = date) model.save() except Exception: self.log.warning("Something went wrong while updating the db" " saving tmp file to update the db later") tmp_path = wh_global_config().get().TMP_PATH full_tmp_path = os.path.join(tmp_path, str(id)) with open(full_tmp_path, "a") as tmp: tmp.write("%s;%s;%s;%s;%s"%(name, path, task_name, str(id), str(date))) return res # OLD self.resources_path = wh_global_config().get().RESOURCES_PATH self.log.info("Saving file in %s", self.resources_path) outputFileName = os.path.join(self.resources_path, name) outputFile = open(outputFileName, "wb") outputFile.write(data) outputFile.close() path = os.path.abspath(outputFileName) self.log.info("Data saved in: %s", outputFileName) return path
def get_algorithm_list(self): """Deprecated Returns the algorithm list This method returns a dictionary with the information of all the algorithms. Args: self: The object pointer. Returns: A dictionary with the information of all the algorithms. """ num_alg = wh_frontend_config().get().MAX_ALG_PER_PAGE current_page = wh_frontend_config().get().CURRENT_ALG_PAGE path = wh_global_config().get().ALGORITHM_PATH self.log.info("Reading algorithms data from %s", path) data = {} jm = json_manager() for f in os.listdir(path): if not f.endswith(".json"): continue self.log.info("File %s", f) name, fext = os.path.splitext(f) name = name + ".py" self.log.info("Checking file %s", name) if not os.path.isfile(path + name): continue self.log.info("Algorithm file found, reading data file now") jm.load(path + f) self.log.info("Data loaded: %s", jm.json_root) if not self.__check_json(jm.json_root): continue self.log.info("JSON checked correctly") alg_name = jm.json_root['algorithm']['name'] data[alg_name] = jm.json_root['algorithm'] data2 = { "alg_data": { "alg_1": { "input": { "input_1": { "name": "table name", "type": "table" }, "input_2": { "name": "float name", "type": "float" } }, "name": "algorithm 1", }, "alg_2": { "input": { "input_1": { "name": "bool name", "type": "bool" }, "input_2": { "name": "float name", "type": "float" } }, "name": "asdf 2", } } } ret = {"alg_data": data} return ret return self.__sort_dict(data)