def __init__(self, save_path: Optional[Union[str, Path]], load_path: Optional[Union[str, Path]] = None, mode: str = 'infer', *args, **kwargs) -> None: if save_path: self.save_path = expand_path(save_path) self.save_path.parent.mkdir(parents=True, exist_ok=True) else: self.save_path = None if load_path: self.load_path = expand_path(load_path) if mode != 'train' and self.save_path and self.load_path != self.save_path: log.warning( "Load path '{}' differs from save path '{}' in '{}' mode for {}." .format(self.load_path, self.save_path, mode, self.__class__.__name__)) elif mode != 'train' and self.save_path: self.load_path = self.save_path log.warning( "No load path is set for {} in '{}' mode. Using save path instead" .format(self.__class__.__name__, mode)) else: self.load_path = None log.warning("No load path is set for {}!".format( self.__class__.__name__))
def main(unused_argv): absl.flags.FLAGS.alsologtostderr = True # Set hyperparams from json args and defaults flags = lib_flags.Flags() # Config hparams if FLAGS.config: config_module = importlib.import_module( 'magenta.models.gansynth.configs.{}'.format( FLAGS.config)) # TODO: Magenta not needed flags.load(config_module.hparams) # Command line hparams flags.load_json(FLAGS.hparams) # Set default flags lib_model.set_flags(flags) print('Flags:') flags.print_values() # Create training directory flags['train_root_dir'] = util.expand_path(flags['train_root_dir']) if not tf.gfile.Exists(flags['train_root_dir']): tf.gfile.MakeDirs(flags['train_root_dir']) # Save the flags to help with loading the model latter fname = os.path.join(flags['train_root_dir'], 'experiment.json') with tf.gfile.Open(fname, 'w') as f: json.dump(flags, f) # pytype: disable=wrong-arg-types # Run training run(flags)
def load_from_path(cls, path, flags=None): """Instantiate a Model for eval using flags and weights from a saved model. Currently only supports models trained by the experiment runner, since Model itself doesn't save flags (so we rely the runner's experiment.json) Args: path: Path to model directory (which contains stage folders). flags: Additional flags for loading the model. Raises: ValueError: If folder of path contains no stage folders. Returns: model: Instantiated model with saved weights. """ # Read the flags from the experiment.json file # experiment.json is in the folder above # Remove last '/' if present path = path[:-1] if path.endswith('/') else path path = util.expand_path(path) if flags is None: flags = lib_flags.Flags() flags['train_root_dir'] = path experiment_json_path = os.path.join(path, 'experiment.json') try: # Read json to dict with tf.gfile.GFile(experiment_json_path, 'r') as f: experiment_json = json.load(f) # Load dict as a Flags() object flags.load(experiment_json) except Exception as e: # pylint: disable=broad-except print("Warning! Couldn't load model flags from experiment.json") print(e) # Set default flags set_flags(flags) flags.print_values() # Get list_of_directories train_sub_dirs = sorted([ sub_dir for sub_dir in tf.gfile.ListDirectory(path) if sub_dir.startswith('stage_') ]) if not train_sub_dirs: raise ValueError( 'No stage folders found, is %s the correct model path?' % path) # Get last checkpoint last_stage_dir = train_sub_dirs[-1] stage_id = int(last_stage_dir.split('_')[-1]) weights_dir = os.path.join(path, last_stage_dir) ckpt = tf.train.latest_checkpoint(weights_dir) print('Load model from {}'.format(ckpt)) # Load the model, use eval_batch_size if present batch_size = flags.get('eval_batch_size', train_util.get_batch_size(stage_id, **flags)) model = cls(stage_id, batch_size, flags) model.saver.restore(model.sess, ckpt) return model
def load_midi(midi_path, min_pitch=36, max_pitch=84): """Load midi as a notesequence.""" midi_path = util.expand_path(midi_path) ns = note_seq.midi_file_to_sequence_proto(midi_path) pitches = np.array([n.pitch for n in ns.notes]) velocities = np.array([n.velocity for n in ns.notes]) start_times = np.array([n.start_time for n in ns.notes]) end_times = np.array([n.end_time for n in ns.notes]) valid = np.logical_and(pitches >= min_pitch, pitches <= max_pitch) notes = { 'pitches': pitches[valid], 'velocities': velocities[valid], 'start_times': start_times[valid], 'end_times': end_times[valid] } return ns, notes
def main(unused_argv): absl.flags.FLAGS.alsologtostderr = True # Load the model flags = lib_flags.Flags({'batch_size_schedule': [FLAGS.batch_size]}) model = lib_model.Model.load_from_path(FLAGS.ckpt_dir, flags) # Make an output directory if it doesn't exist output_dir = util.expand_path(FLAGS.output_dir) if not tf.gfile.Exists(output_dir): tf.gfile.MakeDirs(output_dir) if FLAGS.midi_file: # If a MIDI file is provided, synthesize interpolations across the clip unused_ns, notes = gu.load_midi(FLAGS.midi_file) # Distribute latent vectors linearly in time z_instruments, t_instruments = gu.get_random_instruments( model, notes['end_times'][-1], secs_per_instrument=FLAGS.secs_per_instrument) # Get latent vectors for each note z_notes = gu.get_z_notes(notes['start_times'], z_instruments, t_instruments) # Generate audio for each note print('Generating {} samples...'.format(len(z_notes))) audio_notes = model.generate_samples_from_z(z_notes, notes['pitches']) # Make a single audio clip audio_clip = gu.combine_notes(audio_notes, notes['start_times'], notes['end_times'], notes['velocities']) # Write the wave files fname = os.path.join(output_dir, 'generated_clip.wav') gu.save_wav(audio_clip, fname) else: # Otherwise, just generate a batch of random sounds waves = model.generate_samples(FLAGS.batch_size) # Write the wave files for i in range(len(waves)): fname = os.path.join(output_dir, 'generated_{}.wav'.format(i)) gu.save_wav(waves[i], fname)
def run(preprocessed_dir, invalid_thresh, invalid_user_thresh, relative_diff_thresh, data_split, no_interactions, negative, max_snps, model_id, cross_validation, output_dir): """ Builds a model to predict phenotype :param preprocessed_dir: The directory containing the preprocessed data :param invalid_thresh: The acceptable percentage of missing data before a SNP is discarded :param invalid_user_thresh: The acceptable percentage of missing data before a user is discarded :param relative_diff_thresh: The relative difference in mutation percent, calculated as a percent of the larger mutation percent value. :param data_split: The percent data used for testing. :param no_interactions: If True the model will not contain interactions :param negative: The negative phenotype label :param model_id: The id for the model to use :param cross_validation: number of folds for cross validation :param output_dir: The directory to write the model in """ # Expand file paths preprocessed_dir = expand_path(preprocessed_dir) # Make sure output directory exists before doing work clean_output(output_dir) setup_logger(output_dir, model_id + "_model") # Get model build_model = MODELS.get(model_id) if not build_model: raise ValueError('Model Id "{}" is not valid'.format(model_id)) phenotypes = timed_invoke('reading the preprocessed files', lambda: __read_phenotype_input(preprocessed_dir)) data_set = timed_invoke('creating model data set', lambda: mutation_difference.create_dataset( phenotypes, invalid_thresh, invalid_user_thresh, relative_diff_thresh) ) timed_invoke('building model', lambda: build_model(data_set, data_split, no_interactions, negative, max_snps, cross_validation, output_dir)) logger.info('Output written to "{}"'.format(output_dir))
def launch_app(self, app_name, log_filepath=None, extra_args_list=[], override_cwd=None): if not self.active_env: # TODO: provide error message. return if not self.active_app_cfg: # TODO: provide error message. return if app_name not in self.active_app_cfg: # TODO: provide error message. return app_info = self.active_app_cfg.get(app_name, {}).get(sys.platform, {}) if not app_info: # TODO: provide error message. return log_fp = None if log_filepath: try: log_fp = open(log_filepath, 'w') except: logging.error( "Unable to open app launch log file located here: {}". format(log_filepath)) logging.error( "Aborting launch of app named '{}'.".format(app_name)) return env_to_restore = os.environ.copy() os.environ.clear() os.environ.update(self.active_env) exe_path = util.expand_path(app_info.get("path", "")) # restore environment os.environ.clear() os.environ.update(env_to_restore) cmd_and_args = [exe_path] + app_info.get("fixed_args", []) + extra_args_list set_cwd = app_info.get("set_cwd", None) if override_cwd and os.path.isdir(override_cwd): set_cwd = override_cwd creation_flags = None cf_str = app_info.get("creation_flags", "") if cf_str: stmt = "creation_flags = {}".format(cf_str) exec(stmt) pid = 0 if creation_flags: if log_fp: pid = subprocess.Popen(cmd_and_args, env=self.active_env.copy(), creationflags=creation_flags, cwd=set_cwd, stdout=log_fp, stderr=log_fp).pid else: pid = subprocess.Popen(cmd_and_args, env=self.active_env.copy(), creationflags=creation_flags, cwd=set_cwd).pid else: if log_fp: pid = subprocess.Popen(cmd_and_args, env=self.active_env.copy(), cwd=set_cwd, stdout=log_fp, stderr=log_fp).pid else: pid = subprocess.Popen(cmd_and_args, env=self.active_env.copy(), cwd=set_cwd).pid return pid
def apply_env_config(self, config_key_name_to_apply): if config_key_name_to_apply not in self.env_config_path_by_name: # TODO: provide error message. return json_filepath = self.env_config_path_by_name.get( config_key_name_to_apply, "") if not os.path.isfile(json_filepath): # TODO: provide error message. return try: env_info_list = json.loads(open(json_filepath, "r").read()) except: logging.error(">>> EnvManager.apply_env_config() EXCEPTION ...") logging.error(traceback.format_exc()) return env_to_restore = os.environ.copy() os.environ.clear() # Always start from baseline environment os.environ.update(self.env_baseline) # Apply environment ... for env_info in env_info_list: if not env_info: continue e_var = str(env_info.get("var", "")) if not e_var: continue e_value = str(env_info.get("value", "")) e_type = env_info.get("type", "") if type(e_value) is types.NoneType: # this means remove entry from environment del os.environ[e_var] continue if not e_type: e_type = "string" operation = None if '.' in e_type: bits = e_type.split('.') e_type = bits[0] operation = bits[1] if e_type == "dir_path": assign_path = util.expand_path(e_value) if assign_path: os.environ[e_var] = assign_path elif e_type == "path_list": exist_path_list = os.environ.get(e_var, "").split(os.path.pathsep) assign_path_list = [] for path in e_value: assign_path = util.expand_path(path) if not assign_path: continue if assign_path in exist_path_list or assign_path in assign_path_list: continue assign_path_list.append(assign_path) assign_path_list_str = os.path.pathsep.join(assign_path_list) if operation == "prepend": os.environ[e_var] = "{}:{}".format(assign_path_list_str, os.getenv(e_var, "")) elif operation == "replace": os.environ[e_var] = assign_path_list_str else: # default behaviour is to append os.environ[e_var] = "{}:{}".format(os.getenv(e_var, ""), assign_path_list_str) else: assign_value = e_value if operation != "NO_EXPAND": assign_value = os.path.expandvars(e_value) os.environ[e_var] = assign_value self.active_env = os.environ.copy() self.active_env_name = config_key_name_to_apply self.active_env["PWUI_ACTIVE_ENV"] = self.active_env_name # restore environment os.environ.clear() os.environ.update(env_to_restore)
def run(users_dir, init_dir, model_dir, output_dir): """ Predicts phenotype for users :param users_dir: The directory containing the user :param init_dir: The directory containing the preprocessed files :param model_dir: The directory containing the model files :param output_dir: The directory to write the predictions to """ users_dir = expand_path(users_dir) init_dir = expand_path(init_dir) model_dir = expand_path(model_dir) output_dir = expand_path(output_dir) # Make sure output directory exists before doing work clean_output(output_dir) # Setup console and file loggers setup_logger(output_dir, "predict") # Read SNP data snp_details = pd.read_csv(os.path.join(init_dir, 'snp_database.csv.gz'), compression='gzip') # Read model config with open(os.path.join(model_dir, 'model_config.pkl')) as f: model_config = pickle.load(f) # Filter snps to only include selected snps snp_columns = model_config['snps'] selected_rsids = map(extract_rsid, snp_columns) snp_details = snp_details[snp_details['Rsid'].isin(selected_rsids)] # Predict for each user imputer = model_config['imputer'] model_desc = build_model_desc(snp_columns, model_config['no_interactions']) model = model_config['model'] pheno_map = model_config['pheno_map'] users = [] predictions = [] def calc_mutations(user): mutations = user.allele_transformation(snp_details, how='right') mutations['Rsid'] = mutations['Rsid'].apply(format_snps, args=(snp_details, )) mutations.set_index('Rsid', inplace=True) mutations = mutations.transpose() return mutations def predict_pheno(mutations): # Impute missing values x = imputer.transform(mutations) # Create model feature set x = dmatrix(model_desc, pd.DataFrame(x, columns=mutations.columns)) # Predict return model.predict(x)[0] count = 0 user_files = UserPhenotypes.get_user_geno_files(users_dir) for user_file in user_files: user = User(users_dir, user_file) count += 1 # Calculate mutations mutations = timed_invoke( 'calculating mutations for user {} ({}/{})'.format( user.id, count, len(user_files)), lambda: calc_mutations(user)) # Predict phenotype pheno_id = timed_invoke( 'prediction mutations for user {} ({}/{})'.format( user.id, count, len(user_files)), lambda: predict_pheno(mutations)) users.append(user.id) predictions.append(pheno_map[pheno_id]) pd.DataFrame({'user_id': users, 'prediction': predictions})\ .to_csv(os.path.join(output_dir, 'predictions.csv'), index=False, columns=['user_id', 'prediction']) print 'Output written to "{}"'.format(output_dir)
def __init__(self, config): self._train_data_path = util.expand_path(config['train_data_path'])