def predict(saved_model, song): model = compile_and_load(saved_model) if os.is_dir(song): os.mkdir(song + '/predictions') L = os.listdir(song) for l in L: if l.split('.')[-1] == 'wav': separate_song(model, song + '/' + l, song + '/' + 'predictions') else: separate_song(model, song, 'predictions')
def apply_model(model_dir, datagen, train = True, output_write_path = 'test/predictions'): model = tf.keras.models.load_model(model_dir, compile = False) model = compile_model(model) if train: for x, y in datagen: model.fit(x, y) model.save(model_dir) else: if not os.is_dir(output_write_path): os.mkdir(output_write_path) for x in datagen: y = model.predict(x) Y =
def create_list_of_files(abs_path, file_list=None): if file_list is None: file_list = [] directory_files = os.list_dir(abs_path) for file_name in directory_files: file_path = os.path.join(abs_path, file_name) #if file_path is not a directory if not os.is_dir(file_path): file_list.append(file_path) else: #file_path is a directory; recurse create_list_of_files(file_path, file_list) if file_list: return file_list else: return []
def restore(self, snapshotName, overwrite=False): # Get the configuration of the snapshot snapshot = self.configManager.getSnapshot(snapshotName) if snapshot: # See if we already have an existing instance with this name matchingInstances = [ i for i in self.configManager.instances() if i['name'] == snapshot['instance-name'] ] if len(matchingInstances) == 0 or (len(matchingInstances) == 1 and overwrite): # In case the instance already exists, wipe it if len(matchingInstances) > 0: self.instanceManager.remove(snapshot['instance-name']) # Create a new instance self.instanceManager.create(snapshot['instance-name'], snapshot['version']) # Remove unused files / folders for file in snapshot['removed-elements']: if os.is_dir(file): os.rmdir(file) else: os.remove(file) # Unzip the backup that we had shutil.unpack_archive( snapshot.getPath(), self.instanceManager.getInstancePath( snapshot['instance-name'])) else: self.logger.error( 'An instance with the name [{}] already exists, aborting ...' .format(snapshot['instance-name'])) else: self.logger.error( 'The snapshot name [{}] is invalid'.format(snapshotName))
def save(self, path): if os.is_dir(path): pass
def make_plots(outputdir, dirs, loss_delta): colors = plot.get_separate_colors(len(dirs)) dirs.sort() count_plot = plot.Plotter(colors) select_and_train_plot = plot.Plotter(colors) time_plot = plot.Plotter(colors) mae_plot = plot.Plotter(colors) ymax = float('-inf') for d in dirs: curdir = os.path.join(outputdir, d) data = np.array(get_data(curdir)) # for the first document, read off first dimension (the labeled set # counts) counts = data[0,0,:] # set up a 2D matrix with each experiment on its own row and each # experiment's pR^2 results in columns ys_mat = data[:,-1,:] ys_medians, ys_means, ys_errs_minus, ys_errs_plus = get_stats(ys_mat) ys_errs_plus_max = max(ys_errs_plus + ys_means) if ys_errs_plus_max > ymax: ymax = ys_errs_plus_max # set up a 2D matrix with each experiment on its own row and each # experiment's time results in columns times_mat = data[:,1,:] times_medians, times_means, times_errs_minus, times_errs_plus = \ get_stats(times_mat) count_plot.plot( counts, ys_means, d, ys_medians, [ys_errs_minus, ys_errs_plus]) time_plot.plot( times_means, ys_means, d, ys_medians, [ys_errs_minus, ys_errs_plus], times_medians, [times_errs_minus, times_errs_plus]) select_and_train_mat = data[:,2,:] sandt_medians, sandt_means, sandt_errs_minus, sandt_errs_plus = \ get_stats(select_and_train_mat) select_and_train_plot.plot( counts, sandt_means, d, sandt_medians, [sandt_errs_minus, sandt_errs_plus]) # get mae results losses = [] for maedir in os.listdir(curdir): curmaedir = os.path.join(curdir, maedir) if os.is_dir(curmaedir): losses.append([]) for i in range(len(counts)): maedata = np.loadtxt(os.path.join(curmaedir, str(i))) # generalized zero-one loss losses[-1].append(np.sum(maedata < loss_delta) / len(maedata)) losses = np.array(losses) mae_medians, mae_means, mae_errs_minus, mae_errs_plus = \ get_stats(losses) mae_plot.plot( counts, mae_means, d, mae_medians, [mae_errs_minus, mae_errs_plus]) corpus = os.path.basename(outputdir) count_plot.set_xlabel('Number of Labeled Documents') count_plot.set_ylabel('pR$^2$') count_plot.set_ylim([-0.05, ymax]) count_plot.savefig(os.path.join(outputdir, corpus+'.counts.pdf')) time_plot.set_xlabel('Time elapsed (seconds)') time_plot.set_ylabel('pR$^2$') time_plot.set_ylim([-0.05, ymax]) time_plot.savefig(os.path.join(outputdir, corpus+'.times.pdf')) select_and_train_plot.set_xlabel('Number of Labeled Documents') select_and_train_plot.set_ylabel('Time to select and train') select_and_train_plot.savefig(os.path.join(outputdir, corpus+'.select_and_train.pdf'))
def create_pem_bundle(destination, urls=None, resource_dir=None, set_env_var=True): """create a PEM formatted certificate bundle from the specified resources Args: destination(str, required): pathname for created pem bundle file urls(iterable, optional, default=None): passed to `download_resources` if specified, else the existing contents of `resource_dir` are processed; `urls` and/or `resource_dir` must be specified resource_dir(str, optional, default=None): location of resources to process; passed to `download_resources` along with `urls` if both specified, else a temporary location is utilized set_env_var(bool, optional, default=True): determines whether the `DOD_CA_CERTS_PEM_PATH` environmental variable is set with the value of created pem bundle pathname Returns: pathname of created pem bundle file """ if resource_dir is not None: assert os.exists(resource_dir) assert os.is_dir(resource_dir) else: assert urls is not None # `urls` or `resource_dir` must be specified if urls is not None: resource_dir = download_resources(urls, resource_dir) # create empty bytes stream pem_bundle = "# Bundle Created: {} \n".format(datetime.now()).encode() # get file list files = sorted(os.listdir(resource_dir)) # process CAs first then Roots for type in ['ca', 'root']: for file in files: if any([file.endswith(ext) for ext in cert_exts]): fpath = os.path.join(resource_dir, file) if file.lower().find(type) > -1 and os.path.isfile(fpath): with open(fpath, 'rb') as f: contents = f.read() try: cert = load_der_x509_certificate( contents, backend=default_backend()) except ValueError: try: cert = load_pem_x509_certificate( contents, backend=default_backend()) except ValueError: log.warning( 'Unable to load public key from: {}'. format(file)) continue # add cert's info and public key in PEM format to the bytes stream pem_bundle += describe_cert(cert).encode() pem_bundle += cert.public_bytes(Encoding.PEM) destination = os.path.abspath(destination) with open(destination, 'wb') as f: f.write(pem_bundle) if set_env_var: os.environ['DOD_CA_CERTS_PEM_PATH'] = destination log.info('Set DOD_CA_CERTS_PEM_PATH environment variable') return destination
batch_size = 8 _WINDOW_LEN = 16384 import os import sys from scipy.io.wavfile import write as wavwrite import tensorflow as tf tfrecord_fp, out_dir = sys.argv[1:] if not os.is_dir(out_dir): os.makedirs(out_dir) def _mapper(example_proto): features = { 'id': tf.FixedLenSequenceFeature([], tf.string, allow_missing=True), 'label': tf.FixedLenSequenceFeature([], tf.string, allow_missing=True), 'slice': tf.FixedLenFeature([], tf.int64), 'samples': tf.FixedLenSequenceFeature([1], tf.float32, allow_missing=True) } example = tf.parse_single_example(example_proto, features) wav_id = tf.reduce_join(example['id'], 0) wav_label = tf.reduce_join(example['label'], 0) wav_slice = example['slice'] wav = example['samples'] wav_len = tf.shape(wav)[0]
def process_xyz_files(data, process_file_fn, file_ext=None, file_idx_list=None, stack=True): """ Take a set of datafiles and apply a predefined data processing script to each one. Data can be stored in a directory, tarfile, or zipfile. An optional file extension can be added. Parameters ---------- data : str Complete path to datafiles. Files must be in a directory, tarball, or zip archive. process_file_fn : callable Function to process files. Can be defined externally. Must input a file, and output a dictionary of properties, each of which is a torch.tensor. Dictionary must contain at least three properties: {'num_elements', 'charges', 'positions'} file_ext : str, optional Optionally add a file extension if multiple types of files exist. file_idx_list : ?????, optional Optionally add a file filter to check a file index is in a predefined list, for example, when constructing a train/valid/test split. stack : bool, optional ????? """ logging.info('Processing data file: {}'.format(data)) if tarfile.is_tarfile(data): tardata = tarfile.open(data, 'r') files = tardata.getmembers() readfile = lambda data_pt: tardata.extractfile(data_pt) elif os.is_dir(data): files = os.listdir(data) files = [os.path.join(data, file) for file in files] readfile = lambda data_pt: open(data_pt, 'r') else: raise ValueError('Can only read from directory or tarball archive!') # Use only files that end with specified extension. if file_ext is not None: files = [file for file in files if file.endswith(file_ext)] # Use only files that match desired filter. if file_idx_list is not None: files = [ file for idx, file in enumerate(files) if idx in file_idx_list ] # Now loop over files using readfile function defined above # Process each file accordingly using process_file_fn molecules = [] for file in files: with readfile(file) as openfile: molecules.append(process_file_fn(openfile)) # Check that all molecules have the same set of items in their dictionary: props = molecules[0].keys() assert all( props == mol.keys() for mol in molecules), 'All molecules must have same set of properties/keys!' # Convert list-of-dicts to dict-of-lists molecules = {prop: [mol[prop] for mol in molecules] for prop in props} # If stacking is desireable, pad and then stack. if stack: molecules = { key: pad_sequence(val, batch_first=True) if val[0].dim() > 0 else torch.stack(val) for key, val in molecules.items() } return molecules