def test_run_outlier_only(mk_tmp_dirs): """Test a basic run""" tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs # Copy necessary data to the tmp_data_path file_copy( path.join(DATAPATH, 'two_member_spec3_asn.json'), tmp_data_path ) asn_path = path.join(tmp_data_path, 'two_member_spec3_asn.json') with open(asn_path) as fp: asn = load_asn(fp) for product in asn['products']: for member in product['members']: file_copy( path.join(DATAPATH, 'level2b_twoslit', member['expname']), tmp_data_path ) args = [ path.join(SCRIPT_DATA_PATH, 'calwebb_spec3_default.cfg'), asn_path, '--steps.mrs_imatch.skip=true', '--steps.outlier_detection.save_results=true', '--steps.resample_spec.skip=true', '--steps.cube_build.skip=true', '--steps.extract_1d.skip=true', ] Step.from_cmdline(args) assert False
def test_run_full(mk_tmp_dirs): """Test a full run""" tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs # Copy necessary data to the tmp_data_path file_copy( path.join(DATAPATH, 'two_member_spec3_asn.json'), tmp_data_path ) asn_path = path.join(tmp_data_path, 'two_member_spec3_asn.json') with open(asn_path) as fp: asn = load_asn(fp) for product in asn['products']: for member in product['members']: file_copy( path.join(DATAPATH, 'level2b_twoslit', member['expname']), tmp_data_path ) args = [ path.join(SCRIPT_DATA_PATH, 'calwebb_spec3_default.cfg'), asn_path, ] Step.from_cmdline(args) assert False
def test_run_outlier_only(mk_tmp_dirs): """Test a basic run""" tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs # Copy necessary data to the tmp_data_path file_copy(path.join(DATAPATH, 'two_member_spec3_asn.json'), tmp_data_path) asn_path = path.join(tmp_data_path, 'two_member_spec3_asn.json') with open(asn_path) as fp: asn = load_asn(fp) for product in asn['products']: for member in product['members']: file_copy( path.join(DATAPATH, 'level2b_twoslit', member['expname']), tmp_data_path) args = [ path.join(SCRIPT_DATA_PATH, 'calwebb_spec3_default.cfg'), asn_path, '--steps.mrs_imatch.skip=true', '--steps.outlier_detection.save_results=true', '--steps.resample_spec.skip=true', '--steps.cube_build.skip=true', '--steps.extract_1d.skip=true', ] Step.from_cmdline(args) assert False
def test_run_msaflagging(mk_tmp_dirs, caplog): """Test msa flagging operation""" tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs # Copy msa config files from DATAPATH to # current working directory file_copy(path.join(DATAPATH, 'jw95065006001_0_msa_twoslit.fits'), '.') asn_path = update_asn_basedir(path.join( DATAPATH, 'mos_udf_g235m_twoslit_spec2_asn.json'), root=path.join(DATAPATH, 'level2a_twoslit')) args = [ path.join(SCRIPT_DATA_PATH, 'calwebb_spec2_basic.cfg'), asn_path, '--steps.msa_flagging.skip=false' ] Step.from_cmdline(args) assert 'Step msa_flagging running with args' in caplog.text assert 'Step msa_flagging done' in caplog.text with open(asn_path) as fp: asn = load_asn(fp) for product in asn['products']: prod_name = product['name'] + '_cal.fits' assert path.isfile(prod_name)
def install_my_kernel_spec(user=False): with TemporaryDirectory() as td: os.chmod(td, 0o755) # Starts off as 700, not user readable path_of_file = dirname(abspath(__file__)) file_copy(path_of_file + "/mikrokosmoskernel/kernel.js", td) file_copy(path_of_file + "/mikrokosmoskernel/kernel.json", td) print('Installing Jupyter kernel spec') install_kernel_spec(td, 'IMikrokosmos', user=user, replace=True)
def upgrade_thread(): global in_progress global file_count, manifest_checksum, manifest_size, manifest_url, version try: common.log("Firmware upgrade start") gc.collect() manifest_pathname = "manifest" shutil.path_remove(manifest_pathname) # TODO: Remove all "*_new" and "*_old" aiko.web_client.http_get_file(manifest_url, manifest_pathname) # TODO: Verify "manifest_pathname" actual file size versus "manifest_size" # TODO: Verify "manifest_pathname" checksum top_level_files = [] url_prefix = manifest_url.rpartition("/")[0] with open(manifest_pathname, "r") as manifest_file: file_index = 0 for line in manifest_file.readlines(): file_index += 1 file_checksum, file_size, filepath = line.split() url_suffix = filepath.partition("/")[-1] file_url = "/".join([url_prefix, url_suffix]) pathname = url_suffix.partition("/") if not pathname[0] in top_level_files: top_level_files.append(pathname[0]) pathname = "".join([pathname[0] + "_new"] + list(pathname[1:])) print(file_url + " --> " + pathname) common.log("Firmware get ... %d of %d" % (file_index, file_count)) aiko.web_client.http_get_file(file_url, pathname) # TODO: Verify actual file size versus size stated in the "manifest" # TODO: Verify actual file checksum shutil.path_remove(manifest_pathname) shutil.file_copy("configuration/net.py", "configuration_new/net.py") shutil.file_copy("configuration/keys.db", "configuration_new/keys.db") common.log("Firmware install") for file in top_level_files: try: print("Rename %s to %s" % (file + "_new", file)) shutil.path_remove(file) os.rename(file + "_new", file) except OSError: print("OSError") common.log("Firmware upgrade success !") common.log("Please reboot :)") except Exception as exception: common.log("Firmware upgrade failed :(") import sys sys.print_exception(exception) finally: in_progress = False version = None
def install_my_kernel_spec(user=True): with TemporaryDirectory() as td: os.chmod(td, 0o755) # Starts off as 700, not user readable with open(os.path.join(td, 'kernel.json'), 'w') as f: json.dump(kernel_json, f, sort_keys=True) path_of_file = dirname( abspath(__file__) ) + "/resources/" file_copy(path_of_file + "logo-32x32.png", td ) file_copy(path_of_file + "logo-64x64.png", td ) print('Installing IPython kernel spec') install_kernel_spec(td, 'Singular', user=user, replace=True)
def install_my_kernel_spec(user=True): with TemporaryDirectory() as td: os.chmod(td, 0o755) # Starts off as 700, not user readable with open(os.path.join(td, 'kernel.json'), 'w') as f: json.dump(kernel_json, f, sort_keys=True) # TODO: Copy resources once they're specified path_of_file = dirname(abspath(__file__)) + "/resources/" file_copy(path_of_file + "logo-32x32.png", td) file_copy(path_of_file + "logo-64x64.png", td) print('Installing Jupyter kernel spec from') install_kernel_spec(td, 'gap', user=user, replace=True)
def test_run_extract_1d_resample_mock(mk_tmp_dirs): """Test only the extraction step. Should produce nothing because extraction requires resampling """ tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs # Copy necessary data to the tmp_data_path file_copy( path.join(DATAPATH, 'two_member_spec3_asn.json'), tmp_data_path ) asn_path = path.join(tmp_data_path, 'two_member_spec3_asn.json') with open(asn_path) as fp: asn = load_asn(fp) for product in asn['products']: for member in product['members']: file_copy( path.join(DATAPATH, 'level2b_twoslit', member['expname']), tmp_data_path ) args = [ path.join(SCRIPT_DATA_PATH, 'calwebb_spec3_mock.cfg'), asn_path, '--steps.mrs_imatch.skip=true', '--steps.outlier_detection.skip=true', '--steps.cube_build.skip=true', ] Step.from_cmdline(args) # Though the calibration is not run, the conversion to # source base has occured. Check with open(asn_path) as fd: asn = load_asn(fd) product_name_template = asn['products'][0]['name'] product_name_glob = product_name_template.format( source_id='s0000[14]', ) + '_cal.fits' assert len(glob(product_name_glob)) == 2 product_name_glob = product_name_template.format( source_id='s0000[14]', ) + '_s2d.fits' assert len(glob(product_name_glob)) == 2 product_name_glob = product_name_template.format( source_id='s0000[14]', ) + '_x1d.fits' assert len(glob(product_name_glob)) == 2
def test_msa_missing_nofail(mk_tmp_dirs, caplog): """Test MSA missing failure""" tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs # Copy necessary data to the tmp_data_path input_file = 'F170LP-G235M_MOS_observation-6-c0e0_001_DN_NRS1_mod.fits' file_copy(path.join(DATAPATH, 'level2a_twoslit', input_file), tmp_data_path) args = [ path.join(SCRIPT_DATA_PATH, 'calwebb_spec2_basic.cfg'), path.join(tmp_data_path, input_file), '--fail_on_exception=false' ] Step.from_cmdline(args) assert 'Unable to open MSA FITS file (MSAMETFL)' in caplog.text
def test_msa_missing_skip(mk_tmp_dirs, caplog): """Test MSA missing failure""" tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs # Copy necessary data to the tmp_data_path input_file = 'F170LP-G235M_MOS_observation-6-c0e0_001_DN_NRS1_mod.fits' file_copy(path.join(DATAPATH, 'level2a_twoslit', input_file), tmp_data_path) args = [ path.join(SCRIPT_DATA_PATH, 'calwebb_spec2_basic.cfg'), path.join(tmp_data_path, input_file), '--steps.assign_wcs.skip=true' ] Step.from_cmdline(args) assert 'Aborting remaining processing for this exposure.' in caplog.text
def test_run_nosteps(mk_tmp_dirs): """Test where no steps execute""" tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs # Copy necessary data to the tmp_data_path file_copy( path.join(DATAPATH, 'two_member_spec3_asn.json'), tmp_data_path ) asn_path = path.join(tmp_data_path, 'two_member_spec3_asn.json') with open(asn_path) as fp: asn = load_asn(fp) for product in asn['products']: for member in product['members']: file_copy( path.join(DATAPATH, 'level2b_twoslit', member['expname']), tmp_data_path ) args = [ path.join(SCRIPT_DATA_PATH, 'calwebb_spec3_default.cfg'), asn_path, '--steps.mrs_imatch.skip=true', '--steps.outlier_detection.skip=true', '--steps.resample_spec.skip=true', '--steps.cube_build.skip=true', '--steps.extract_1d.skip=true', ] Step.from_cmdline(args) # Check for the Source-based cal name. with open(asn_path) as fp: asn = load_asn(fp) product_name_template = asn['products'][0]['name'] product_name_glob = product_name_template.format( source_id='s0000[14]', ) + '_cal.fits' assert len(glob(product_name_glob)) == 2 # Check that no other products built files = glob('*s3d*') files.extend(glob('*s2d*')) files.extend(glob('*x1d*')) assert not files
def test_run_outlier_only_mock(mk_tmp_dirs): """Test a basic run""" tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs # Copy necessary data to the tmp_data_path file_copy( path.join(DATAPATH, 'two_member_spec3_asn.json'), tmp_data_path ) asn_path = path.join(tmp_data_path, 'two_member_spec3_asn.json') with open(asn_path) as fp: asn = load_asn(fp) for product in asn['products']: for member in product['members']: file_copy( path.join(DATAPATH, 'level2b_twoslit', member['expname']), tmp_data_path ) args = [ path.join(SCRIPT_DATA_PATH, 'calwebb_spec3_mock.cfg'), asn_path, '--steps.mrs_imatch.skip=true', '--steps.resample_spec.skip=true', '--steps.cube_build.skip=true', '--steps.extract_1d.skip=true', ] Step.from_cmdline(args) # Check for the Source-based cal name. with open(asn_path) as fp: asn = load_asn(fp) product_name_template = asn['products'][0]['name'] product_name_glob = product_name_template.format( source_id='s0000[14]', ) + '_cal.fits' assert len(glob(product_name_glob)) == 2 # Check for the outlier resutls product_name_glob = product_name_template.format( source_id='s0000[14]', ) + '_crj.fits' assert len(glob(product_name_glob)) == 2
def _store_output_data( data_object_path, data_directory, output_to_store, results_by_id, ): """Collects all of the simulation to store, and saves it into a directory whose path will be passed to the storage backend to process. Parameters ---------- data_object_path: str The file path to serialize the data object to. data_directory: str The path of the directory to store ancillary data in. output_to_store: BaseStoredData An object which contains `ProtocolPath`s pointing to the data to store. results_by_id: dict of ProtocolPath and any The results of the protocols which formed the property estimation workflow. """ makedirs(data_directory, exist_ok=True) for attribute_name in output_to_store.get_attributes(StorageAttribute): attribute = getattr(output_to_store.__class__, attribute_name) attribute_value = getattr(output_to_store, attribute_name) if not isinstance(attribute_value, ProtocolPath): continue attribute_value = results_by_id[attribute_value] if issubclass(attribute.type_hint, FilePath): file_copy(attribute_value, data_directory) attribute_value = path.basename(attribute_value) setattr(output_to_store, attribute_name, attribute_value) with open(data_object_path, "w") as file: json.dump(output_to_store, file, cls=TypedJSONEncoder)
def install_my_kernel_spec(user=True): with TemporaryDirectory() as td: os.chmod(td, 0o755) # Starts off as 700, not user readable with open(os.path.join(td, 'kernel.json'), 'w') as f: json.dump(kernel_json, f, sort_keys=True) path_of_file = dirname( abspath(__file__) ) + "/resources/" filenames=[ "Detector.js", "three.js", "kernel.js" ] filenames_renderer=[ "CanvasRenderer.js", "Projector.js" ] filenames_control=[ "TrackballControls.js" ] for i in filenames: file_copy(path_of_file + i, td ) os.mkdir( td + "renderers", mode=755 ) for i in filenames_renderer: file_copy(path_of_file + "renderers/" + i, td + "renderers" ) os.mkdir( td + "controls", mode=755 ) for i in filenames_control: file_copy(path_of_file + "controls/" + i, td + "controls" ) file_copy(path_of_file + "logo-32x32.png", td ) file_copy(path_of_file + "logo-64x64.png", td ) print('Installing IPython kernel spec') install_kernel_spec(td, 'polymake', user=user, replace=True)
def test_run_msaflagging(mk_tmp_dirs, caplog): """Test msa flagging operation""" tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs # Copy necessary data to the tmp_data_path file_copy(path.join(DATAPATH, 'jw95065006001_0_msa_twoslit.fits'), tmp_data_path) file_copy(path.join(DATAPATH, 'mos_udf_g235m_twoslit_spec2_asn.json'), tmp_data_path) asn_path = path.join(tmp_data_path, 'mos_udf_g235m_twoslit_spec2_asn.json') with open(asn_path) as fp: asn = load_asn(fp) for product in asn['products']: for member in product['members']: file_copy( path.join(DATAPATH, 'level2a_twoslit', member['expname']), tmp_data_path) args = [ path.join(SCRIPT_DATA_PATH, 'calwebb_spec2_basic.cfg'), asn_path, '--steps.msa_flagging.skip=false' ] Step.from_cmdline(args) assert 'Step msa_flagging running with args' in caplog.text assert 'Step msa_flagging done' in caplog.text for product in asn['products']: prod_name = product['name'] + '_cal.fits' assert path.isfile(prod_name)
def test_msa_missing_nofail(mk_tmp_dirs, caplog): """Test MSA missing failure""" tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs # Copy necessary data to the tmp_data_path input_file = 'F170LP-G235M_MOS_observation-6-c0e0_001_DN_NRS1_mod.fits' file_copy( path.join( DATAPATH, 'level2a_twoslit', input_file ), tmp_data_path ) args = [ path.join(SCRIPT_DATA_PATH, 'calwebb_spec2_basic.cfg'), path.join(tmp_data_path, input_file), '--fail_on_exception=false' ] Step.from_cmdline(args) assert 'Unable to open MSA FITS file (MSAMETFL)' in caplog.text
def test_msa_missing_skip(mk_tmp_dirs, caplog): """Test MSA missing failure""" tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs # Copy necessary data to the tmp_data_path input_file = 'F170LP-G235M_MOS_observation-6-c0e0_001_DN_NRS1_mod.fits' file_copy( path.join( DATAPATH, 'level2a_twoslit', input_file ), tmp_data_path ) args = [ path.join(SCRIPT_DATA_PATH, 'calwebb_spec2_basic.cfg'), path.join(tmp_data_path, input_file), '--steps.assign_wcs.skip=true' ] Step.from_cmdline(args) assert 'Aborting remaining processing for this exposure.' in caplog.text
def add_song(file, artist, song_name, date, *tags): try: fileDetails = file.split(".") file_name = fileDetails[0] type = fileDetails[1].lower() id = str(uuid4()) if type not in accepted_types: raise "Unregonized file format!" # Format params artist = artist.replace("_", " ") song_name = artist.replace("_", " ") # Check date if match('(\d{2})[./-](\d{2})[./-](\d{4})$', date) is None: raise Exception("Invalid Date") # Copy file and rename it to ID.type file_copy(file, "Storage") os.rename(f"Storage/{file}", f"Storage/{id}.{type}") data = { "ID": id, "file name": file_name, "type": type, "artist": artist, "song name": song_name, "date": date, "tags": tags } db.insert(data) return f"Success! Song id: {id}" except Exception as error: return f"Failure! {error}"
def run(self): step = 0 size = len(self.data) while step < size: print '.', #quito los salto de linea path = self.data[step].replace('\n', '') filename = path.split('\\')[-1] try: npath = os.path.join(self.folder, filename) file_copy(path, npath) self.gauge.step() self.blist.insert(tk.END, "[%s] %s" %(str(step+1).zfill(3), filename)) except: time = datetime.datetime.now().strftime("[%d-%m-%Y %H:%M:%S]") text = "%s - ERROR AL COPIAR: %s \n" %(time, path) self.log.write(text) self.blist.insert(tk.END, "[%s] ERROR %s" %(str(step+1).zfill(3), filename)) #time.sleep(10) #duermo el hilo step += 1 self.is_run = False
def install_my_kernel_spec(user=True): with TemporaryDirectory() as td: os.chmod(td, 0o755) # Starts off as 700, not user readable with open(os.path.join(td, 'kernel.json'), 'w') as f: json.dump(kernel_json, f, sort_keys=True) path_of_file = dirname( abspath(__file__)) + "/jupyter_kernel_polymake/resources/" filenames = [ "three.js", "Detector.js", "controls/TrackballControls.js", "renderers/SVGRenderer.js", "renderers/CanvasRenderer.js", "renderers/Projector.js", "menu.svg", "close.svg" ] for i in filenames: file_copy(path_of_file + i, td) file_copy(path_of_file + "kernel.js", td) file_copy(path_of_file + "logo-32x32.png", td) file_copy(path_of_file + "logo-64x64.png", td) print('Installing jupyter kernel spec for polymake') install_kernel_spec(td, 'polymake', user=user, replace=True)
def test_run_msaflagging(mk_tmp_dirs, caplog): """Test msa flagging operation""" tmp_current_path, tmp_data_path, tmp_config_path = mk_tmp_dirs # Copy necessary data to the tmp_data_path file_copy( path.join(DATAPATH, 'jw95065006001_0_msa_twoslit.fits'), tmp_data_path ) file_copy( path.join(DATAPATH, 'mos_udf_g235m_twoslit_spec2_asn.json'), tmp_data_path ) asn_path = path.join(tmp_data_path, 'mos_udf_g235m_twoslit_spec2_asn.json') with open(asn_path) as fp: asn = load_asn(fp) for product in asn['products']: for member in product['members']: file_copy( path.join(DATAPATH, 'level2a_twoslit', member['expname']), tmp_data_path ) args = [ path.join(SCRIPT_DATA_PATH, 'calwebb_spec2_basic.cfg'), asn_path, '--steps.msa_flagging.skip=false' ] Step.from_cmdline(args) assert 'Step msa_flagging running with args' in caplog.text assert 'Step msa_flagging done' in caplog.text for product in asn['products']: prod_name = product['name'] + '_cal.fits' assert path.isfile(prod_name)
def tag_learn_test(**kwargs): # 90201 # tag_files args: input_path: str = kwargs['input_parses'] output_path: str = kwargs['output_grammar'] # ['out_path'] ? corpus: str = kwargs['output_grammar'] cp: str log = {} if 'input_grammar' not in kwargs: rulez, re01 = learn(**kwargs) log.update(re01) new_dict_path = re01['grammar_file'] else: # tag and learn # print('else: tag and learn') #?kwargs['out_path'] = kwargs['output_grammar'] # used in tag_files only # if 'out_path' in kwargs: # out_path : str = kwargs['out_path'] # del kwargs['out_path'] # tag_files uses kwargs['output_grammar'] instead key_dict_path: str = kwargs['input_grammar'] # dict for tagging re02 = tag_files(**kwargs) log.update(re02) #-kwargs['input_parses'] = re1['tagger_output_path'] + '/tagged_ull' kwargs['input_parses'] = output_path + '/tagged_ull' check_dir(kwargs['input_parses'], False, 'max') #-kwargs['output_grammar'] = kwargs['out_path'] rulez, re03 = learn(**kwargs) # rulez: dict FIXME: return log.update(re03) # Decode .dict: new_dict_path = re03['grammar_file'] with open(new_dict_path, 'r') as f: d: list = f.read().splitlines() # TODO? split at dict2list? tagged_dict_path = file_copy(new_dict_path, new_dict_path + '.tagged') with open(key_dict_path, 'r') as f: kd: list = f.read().splitlines() # TODO? split at dict2list? clusters: dict = dict2lists(kd, **kwargs) with open(new_dict_path, 'w') as f: f.write(decode_dict(d, clusters)) # TODO: single def to decode dict, input -- 2*strings: # with open(key_dict_path, 'r') as f: kd = f.read() # string # with open(new_dict_path, 'r') as f: d = f.read() # string # decoded_dict: str = decode_dict_new(d, kd) # decoded #-check: #-with open(new_dict_path, 'r') as f: tmp = f.read().splitlines() #-print(tmp[-7:]) # TODO: decode cat_tree.txt cat_tree_file = re03['cat_tree_file'] with open(cat_tree_file, 'r') as f: tree = f.read() tagged_cat_tree_path = file_copy(cat_tree_file, cat_tree_file + '.tagged') with open(cat_tree_file, 'w') as f: f.write(decode_cat_tree(tree, kd, **kwargs)) # TODO: Test Grammar with decoded .dict # pa, f1, p, pq: parse-ability, F-measure, precision, parse quality pa, f1, p, pq = pqa_meter(new_dict_path, '', '', '', **kwargs) # op,cp,rp = '' » use kwargs['out_path'], corpus_path, reference_path # TODO: log.update(a, f1, p, q) # print('pa, f1, p, pq:', pa, f1, p, pq) # TODO: replace pqa_meter with a local function: re = pqa(**kwargs) # TODO: decode & return rulez? return .dict converted to a string? # TODO: return line []? return log['grammar_rules'], pa, f1, log # rulez, log
def _store_output_data( data_object_path, data_directory, output_to_store, results_by_id, ): """Collects all of the simulation to store, and saves it into a directory whose path will be passed to the storage backend to process. Parameters ---------- data_object_path: str The file path to serialize the data object to. data_directory: str The path of the directory to store ancillary data in. output_to_store: BaseStoredData An object which contains `ProtocolPath`s pointing to the data to store. results_by_id: dict of ProtocolPath and any The results of the protocols which formed the property estimation workflow. """ makedirs(data_directory, exist_ok=True) for attribute_name in output_to_store.get_attributes(StorageAttribute): attribute = getattr(output_to_store.__class__, attribute_name) attribute_value = getattr(output_to_store, attribute_name) if isinstance(attribute_value, ProtocolPath): # Strip any nested attribute accessors before retrieving the result property_name = attribute_value.property_name.split( ".")[0].split("[")[0] result_path = ProtocolPath(property_name, *attribute_value.protocol_ids) result = results_by_id[result_path] if result_path != attribute_value: result = get_nested_attribute( {property_name: result}, attribute_value.property_name) attribute_value = result # Do not store gradient information for observables as this information # is very workflow / context specific. if isinstance(attribute_value, (Observable, ObservableArray, ObservableFrame)): attribute_value.clear_gradients() if issubclass(attribute.type_hint, FilePath): file_copy(attribute_value, data_directory) attribute_value = path.basename(attribute_value) setattr(output_to_store, attribute_name, attribute_value) with open(data_object_path, "w") as file: json.dump(output_to_store, file, cls=TypedJSONEncoder)
def install_by_version(cls, gadgets, kata_runtime_type, http_proxy=None, https_proxy=None, no_proxy=None, verbose=False): """Install Kata-containers with specified version. Args: gadgets: Kata-containers gadgets (e.g. kata-containers). kata_runtime_type: Runtime of Kata (e.g. qemu/clh/...). http_proxy: HTTP proxy. https_proxy: HTTPS proxy. no_proxy: Domains which should be visited without proxy. verbose: Verbose or not. Returns: Boolean indicating whether Kata-containers is successfully installed or not. """ stdout, stderr = verbose_func.verbose_output(verbose) kata_static_tar_file = config.kata_static_tar_file % gadgets[0]['version'] kata_static_save_path = config.runtime_data_dir + kata_static_tar_file kata_static_tar = Path(kata_static_save_path) # 1. download kata tar if necessary if not kata_static_tar.exists(): color_print.debug( '{kata_tar} is going to be downloaded'.format( kata_tar=kata_static_tar_file)) kata_static_url = ( config.kata_static_url_prefix % gadgets[0]['version']) + kata_static_tar_file proxies = { 'http': http_proxy, 'https': https_proxy, 'no_proxy': no_proxy, } cls.download_file( url=kata_static_url, save_path=kata_static_save_path, proxies=proxies) else: color_print.debug( '{kata_tar} has been downloaded'.format( kata_tar=kata_static_tar_file)) # 2. decompress color_print.debug( 'decompressing files into {dest}'.format( dest=config.kata_tar_decompress_dest)) rmtree(path=config.kata_tar_decompress_dest, ignore_errors=True) system_func.mkdir_if_not_exist(config.kata_tar_decompress_dest) # use --strip-components=3 because `opt/kata/` path from tar are not needed # also, we should not just decompress files into `/` root path # which might cause risks temp_cmd = 'tar xf {file} -C {dest} --strip-components=3'.format( file=kata_static_save_path, dest=config.kata_tar_decompress_dest) try: subprocess.run( temp_cmd.split(), stdout=stdout, stderr=stderr, check=True) except subprocess.CalledProcessError: color_print.error( 'failed to decompress {kata_tar}'.format( kata_tar=kata_static_tar_file)) return False # 3. copy files color_print.debug( 'copying files to {kata_config_dir}'.format( kata_config_dir=config.kata_config_dir)) rmtree(path=config.kata_config_dir, ignore_errors=True) system_func.mkdir_if_not_exist(config.kata_config_dir) for file in glob.glob( config.kata_tar_decompress_dest + 'share/defaults/kata-containers/*'): file_copy( src=file, dst=config.kata_config_dir, follow_symlinks=False) # 4. configure runtime type color_print.debug( 'configuring kata runtime (type: {runtime_type})'.format( runtime_type=kata_runtime_type)) kata_configuration_file = Path( '{kata_config_dir}/configuration.toml'.format(kata_config_dir=config.kata_config_dir)) if kata_configuration_file.exists(): kata_configuration_file.unlink() kata_configuration_file.symlink_to( '{kata_config_dir}/configuration-{runtime_type}.toml'.format( kata_config_dir=config.kata_config_dir, runtime_type=kata_runtime_type)) # [5]. if docker is installed, # modify docker's configuration and restart docker # currently, metarget only supports docker # in the future more CRIs will be supported # see # https://github.com/kata-containers/documentation/blob/master/how-to/run-kata-with-k8s.md color_print.debug('configuring docker with kata-containers') if not cls._configure_docker_with_kata( base_dir=config.kata_tar_decompress_dest): color_print.error( 'failed to configure docker with kata-containers') return False return cls.reload_and_restart_docker(verbose=verbose)
def generate_train_test_set_helper(sample_dir, target_dir, cross_val_num=5, seed=42, has_just_feature_matrix=False, dataset_name=""): sample_dir_path = Path(sample_dir) target_dir_path = Path(target_dir) if not dataset_name: dataset_name = sample_dir_path.stem # guess class label file class_label_file = MccImsAnalysis.guess_class_label_extension( sample_dir_path) class_labels = MccImsAnalysis.parse_class_labels(class_label_file) available_raw_files = sorted(sample_dir_path.glob("*_ims.csv")) available_preprocessed_files = sorted( sample_dir_path.glob("*_ims_preprocessed.csv")) available_pdrs = sorted( sample_dir_path.glob("*_peak_detection_result.csv")) # make sure we know which files we're missing so we can get them - too many files is not a problem - subset is allowed - or if we need to edit class labels not_available_raw_files = [] if available_raw_files: raw_files = [] for arw in available_raw_files: raw_files.append(arw.name) raw_file_set = set(raw_files) for fn in class_labels.keys(): if fn not in raw_file_set: not_available_raw_files.append(fn) print(f"Missing raw files: {not_available_raw_files}") not_available_preproc_files = [] if available_preprocessed_files: preproc_files = [] for apf in available_preprocessed_files: raw_name = apf.name.split("_ims.csv_")[0] raw_name += "_ims.csv" preproc_files.append(raw_name) preproc_file_set = set(preproc_files) for fn in class_labels: if fn not in preproc_file_set: not_available_preproc_files.append( f"{fn[:-4]}_preprocessed.csv") print(f"Missing preprocessed files: {not_available_preproc_files}") # approximate search, dont want to spell out all pdr_names not_available_pdr_files = [] if available_pdrs: av_pdrs = [] for apdr in available_pdrs: raw_name = apdr.name.split("_ims.csv_")[0] raw_name += "_ims.csv" av_pdrs.append(raw_name) av_pdr_set = set(av_pdrs) for fn in class_labels: if fn not in av_pdr_set: not_available_pdr_files.append( f"{fn[:-4]}_peak_detection_result.csv") print(f"Missing peak detection result: {not_available_pdr_files}") if not_available_raw_files or not_available_preproc_files or not_available_pdr_files: raise ValueError( "Class labels needs to be adjusted or missing files added.") # check if we have a layer_file potential_layers = [ str(filename) for filename in sample_dir_path.glob("*") if (str.endswith(str(filename), "layer.csv") or str.endswith(str(filename), "layer.xls")) ] print( f"Preparing dataset for {Counter(class_labels.values())} using {cross_val_num}-fold cross validation splits." ) X = [k for k in class_labels.keys()] y = [v for v in class_labels.values()] # class_labels[m.filename] test_fraction = 1. / cross_val_num train_df, test_df = split_labels_ratio(class_labels, train_val_fraction=1 - test_fraction, seed=seed) train_dir = str(target_dir_path) + "/" + f"train_{dataset_name}/" test_dir = str(target_dir_path) + "/" + f"test_{dataset_name}/" print(f"Deleting {train_dir} and {test_dir}") # delete train and test dir if already exisitent if Path(train_dir).exists(): rmtree(train_dir, ignore_errors=True) if Path(test_dir).exists(): rmtree(test_dir, ignore_errors=True) # TODO also remove exsiting results, such as peak_detection_results, feature matrices print(f"Creating {train_dir} and {test_dir}") Path(train_dir).mkdir(parents=True) Path(test_dir).mkdir(parents=True) tr_class_label_fn = Path(train_dir) / Path(class_label_file).name te_class_label_fn = Path(test_dir) / Path(class_label_file).name train_df[['name', 'label']].to_csv(tr_class_label_fn, sep=",", index=False) test_df[['name', 'label']].to_csv(te_class_label_fn, sep=",", index=False) # check if it has peak detection results pdrs = sorted(sample_dir_path.glob("*_peak_detection_result.csv")) # distribute into train and test list train_name_set = set(train_df['name'].values) test_name_set = set(test_df['name'].values) cannot_copy = [] for pdr in pdrs: raw_fn_pre = pdr.name.split("_ims.csv")[0] raw_fn = raw_fn_pre + "_ims.csv" new_fn = "" if raw_fn in train_name_set: new_fn = Path(train_dir) / pdr.name elif raw_fn in test_name_set: new_fn = Path(test_dir) / pdr.name else: cannot_copy.append(pdr) # copy to destination if new_fn: file_copy(pdr, new_fn) if cannot_copy: print(f"{len(cannot_copy)} PDRs not in either index.", f"{cannot_copy}") if has_just_feature_matrix: # write feature matrix potential_feature_matrices = sample_dir_path.glob( "*_feature_matrix.csv") for fn in potential_feature_matrices: try: fm = pd.read_csv(fn, index_col=0) tr_fm = fm.loc[fm.index.intersection(train_df['name'])] te_fm = fm.loc[fm.index.intersection(test_df['name'])] tr_fm_fn = Path(train_dir) / "train_feature_matrix.csv" te_fm_fn = Path(test_dir) / "test_feature_matrix.csv" tr_fm.to_csv(tr_fm_fn) te_fm.to_csv(te_fm_fn) print(f"Created feature matrices {tr_fm_fn} and {te_fm_fn}") # also implement for other branches - pdr and preprocessed for t_dir, t_fm, t_cl, in zip( [train_dir, test_dir], [tr_fm_fn, te_fm_fn], [tr_class_label_fn, te_class_label_fn]): t_dir_path = Path(t_dir) t_dir_name = t_dir_path.stem zip_path_tr = t_dir_path / f"{t_dir_name}.zip" with ZipFile(zip_path_tr, 'w', ZIP_DEFLATED) as trzip: trzip.write( t_fm, t_fm.name ) # needs to exist as file object on disk to to write to zip trzip.write(t_cl, t_cl.name) except ValueError: # go until no more potential candidates, which should be just one anyways pass else: # copy files to target dirs - only works if raw files are actually there, not always the case - eg if there's just results raw_files_not_copied = [] for fn in train_df.name: file_path = Path(sample_dir_path) / fn new_path = Path(train_dir) / fn if file_path.exists(): file_copy(file_path, dst=new_path) else: raw_files_not_copied.append(file_path) # same for test set for fn in test_df.name: file_path = Path(sample_dir_path) / fn new_path = Path(test_dir) / fn if file_path.exists(): file_copy(file_path, dst=new_path) else: raw_files_not_copied.append(file_path) if raw_files_not_copied: print( f"Didn't copy {len(raw_files_not_copied)} raw files - as not found in source directory." ) # also consider featureXML files feature_xmls_fns = filter_feature_xmls(sample_dir_path) cannot_copy = [] for feature_xml_fn_ in feature_xmls_fns: feature_xml_fn = Path(feature_xml_fn_) # split_fn - so can campare with class labels # need to get file ending of original raw file to match with class labels # could be MZML or MZXML raw_fn_pre = feature_xml_fn.name.split("ML_output.featureXML")[0] raw_fn = raw_fn_pre + "ML" new_fn = "" if raw_fn in train_name_set: new_fn = Path(train_dir) / feature_xml_fn.name elif raw_fn in test_name_set: new_fn = Path(test_dir) / feature_xml_fn.name else: cannot_copy.append(feature_xml_fn) # copy to destination if new_fn: file_copy(feature_xml_fn, new_fn) if feature_xmls_fns: print( f"Copied {len(feature_xmls_fns) - len(cannot_copy)}/{len(feature_xmls_fns)} featureXML files." ) if cannot_copy: print(f"{len(cannot_copy)} featureXML not in either index.", f"{cannot_copy}") # guess layer file and copy to target dir too if potential_layers: potential_layer_file = potential_layers[0] layer_name = Path(potential_layer_file).stem + Path( potential_layer_file).suffix file_copy(potential_layers[0], dst=str(train_dir) + "/" + layer_name) file_copy(potential_layers[0], dst=str(test_dir) + "/" + layer_name) print(f"{'|' * 40}\nFinished preparation of {dataset_name}\n") return train_df, test_df
def generate_full_candy_classes(plot_params, file_params, preprocessing_steps, evaluation_params_dict): all_files = glob.glob(file_params['data_dir'] + "*_ims.csv") class_labels = { r[0]: r[1] for r in np.loadtxt(file_params['data_dir'] + "class_labels.csv", delimiter=",", dtype=str, skiprows=1) } from collections import OrderedDict class_labels = OrderedDict(class_labels) analysis = MccImsAnalysis( [MccImsMeasurement(f) for f in all_files], preprocessing_steps, [], performance_measure_parameters=evaluation_params_dict, class_label_file=file_params['label_filename'], dataset_name='full_candy', dir_level="") for m in analysis.measurements: class_label = class_labels.get(m.filename) m.set_class_label(class_label) from sklearn.model_selection import train_test_split from shutil import copy as file_copy from shutil import rmtree from pathlib import Path X = [k for k in class_labels.keys()] y = [v for v in class_labels.values()] # class_labels[m.filename] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42) train_df = pd.DataFrame({"name": X_train, "candy": y_train}) test_df = pd.DataFrame({"name": X_test, "candy": y_test}) train_dir = file_params['data_dir'] + "train_full_candy/" test_dir = file_params['data_dir'] + "test_full_candy/" # delete train and test dir if Path(train_dir).exists(): rmtree(train_dir, ignore_errors=True) if Path(test_dir).exists(): rmtree(test_dir, ignore_errors=True) # create directory Path(train_dir).mkdir() Path(test_dir).mkdir() # create class_label file train_df[['name', 'candy']].to_csv(train_dir + "class_labels.csv", sep=",") test_df[['name', 'candy']].to_csv(test_dir + "class_labels.csv", sep=",") # copy files to target dirs for fn in train_df.name: file_path = file_params['data_dir'] + fn new_path = train_dir + fn file_copy(file_path, dst=new_path) # same for test set for fn in test_df.name: file_path = file_params['data_dir'] + fn new_path = test_dir + fn file_copy(file_path, dst=new_path)
def main(): '''Main function to get a fresh scope.''' args = parse_arguments() # Setup functions for dry run if necessary, else start logging if args.dry: print('Performing dry run!') setup_dry_run() else: # Create root directory for all the results os.mkdir(args.outdir) # Setup logging logging.basicConfig(filename=os.path.join(args.outdir, 'stats.log'), level=logging.INFO) # Models we are applying models, models2 = load_models(args.modeldir, args.powerset) if len(models) == 0 and not args.none: print('Warning, no model is being used! Use --none to force') return # Save some interesting data in global stats write_stats(args, models, args.powerset) logger_stats = logging.getLogger('stats') logger_other = logging.getLogger('other') # If provided, copy configuration file # TODO also, read values from the config file instead of using cli arguments if args.config is not None: cfg = os.path.join(args.outdir, 'configuration.ini') file_copy(args.config, cfg) args.config = cfg # Replace old choice if not args.noask: print( f'This is the time to review your configuration file in {cfg}') print( f'Generation counts will be {args.shortg} (short) and {args.longg} (long)' ) input(f'Press Enter when ready to go.') # Save config file to stats, for reference with zopen(cfg, 'rt') as cfgfp: global_stats['configuration.ini'] = cfgfp.read() # TODO # per poter fare le analisi sul tempo, bisogna avere una media delle semantiche # dopo tutti i run. per non recuperare le semantiche al momento delle analisi dati # conviene fare qui la media e la produzione di un file di output medio che sia facilmente usabile nelle analisi for r in range(args.runs): print(f'Performing run {r}') # Prepare output directory for this run outdir = get_run_path(args.outdir, r) # somepath/sim += /sim{r} os.mkdir(outdir) # Prepare dataset in somepath/sim/sim{r}/dataset # This single run will have the datafile partitioned in k folds dataset = Dataset(args.datafile, args.k_folds, outdir) cons, cons_msg = dataset.is_consistent() if not cons: # Emit a signal if K does not evenly partition the dataset print('Warning! Selected K cannot produce consistent semantics!') print(cons_msg) logi('run.dataset', cons_msg) dataset.generate_folds(True) # Model selection if args.all: bm = len(models2) - 1 # Last combination t_tot = 0 # No time spent elif args.none: bm = 0 t_tot = 0 # No time spent else: if args.powerset: bm, t_tot = run_powerset(args, outdir, models2, dataset) else: bm, t_tot = run_set(args, outdir, models2, dataset) # Get actual best_models = models2[bm] # Save selection time global_stats['sel_time'] = global_stats.get('sel_time', 0) + t_tot global_stats.setdefault('sel_times', []).append(t_tot) logi('stats.selection.walltimes', f'Time for running selection: {t_tot}') # Increment best model usage global_stats['best_models'] = global_stats.get( 'best_models', Counter()) + Counter({str(bm): 1}) # Save combination global_stats.setdefault('bm_hist', []).append(bm) logi('stats.selection.models.best', f'{bm} {best_models}') print('Performing long run with best models', models2[bm]) # Prepare simulation, storing data in somepath/sim/sim{r}/longrun forrest = Forrest(f'longrun', args.algorithm, models2[bm], dataset, args.k_folds, outdir, args.bindir, args.config) # Run simulation k_fits, k_timing, avg_sem_train, avg_sem_test = forrest.run(args.longg) # Write average semantic data forrest.save_files(avg_sem_train, avg_sem_test) # Save logs and stats logi('stats.longrun.cv.fitness.average', f'Average CV: {row_average(k_fits)}') t_tot = sum(k_timing) # Total time for executing K-fold CV logi('stats.longrun.walltimes', f'Total time for longruns: {t_tot}') global_stats['lon_time'] = global_stats.get('lon_time', 0) + t_tot global_stats.setdefault('lon_times', []).append(t_tot) logi('stats.selection.models.frequency', f'{global_stats["best_models"]}') with zopen(os.path.join(args.outdir, 'stats.json'), 'wt') as statfile: #for k, v in global_stats.items(): # print(f'Writing stat {k} = {v}') json.dump(global_stats, statfile, indent=4, sort_keys=True)