def run(cfg): model_config = cfg['model_config'] chorales = Datasets.getSynthesizedChorales(model_config["chorales_path"], model_config["score_informed"]) for chorale_number in chorales_to_check: chorale = get_chorale(chorales, chorale_number) input_shape, output_shape = Utils.get_separator_shapes(model_config) pad_frames = (input_shape[1] - output_shape[1]) // 2 Datasets.process_sample(chorale, pad_frames, cfg['model_config'])
def CF_Boosted_Trees(model, Data, config, name, location, states=3): # train_data, test_data """ Input: train_data ... list-of-lists """ train_data, test_data = dat.PrepareCrossFold(Data.H_alpha) train_labels, test_labels = dat.PrepareCrossFold(Data.labels) K = len(train_data) score_tab = Scoring.ScoringTable(location=location, name=name, n_states=states) feat = FE.Features(config=config, normalize=True) start = time() for cross in range(K): clf = copy(model) train_matrix = feat.fit_transform(Data=train_data[cross]) test_matrix = feat.fit_transform_batch(Data=test_data[cross]) target = dat.merge_labels(train_labels[cross]) # print(np.shape(train_matrix), np.shape(target)) # print(np.shape(test_matrix), np.shape(test_labels[cross])) pred = train_and_predict(model=clf, train=train_matrix, test=test_matrix, labels=target, unsupervised=False) score = Scoring.score(states=pred, results=test_labels[cross], unsupervised=False, pocet_stavu=states) # print("score", score) score_tab.add(scores=score) print("{} section done. Time taken from start {}".format( cross + 1, time() - start)) score_tab.save_table() config["n_estimators"] = model.n_estimators with open(location + name + '_config.pickle', 'wb') as f: pickle.dump(config, f) return score_tab.return_table()
def load_dataset(params): print("Loading the dataset...") if params['nyu_dataset']: dataset = NYUDataset("../data/nyudepthv2/train", split='train') test_dataset = NYUDataset("../data/nyudepthv2/val", split='val') else: dataset = Datasets.FastDepthDataset( params["training_dataset_paths"], split='train', depth_min=params["depth_min"], depth_max=params["depth_max"], input_shape_model=(224, 224), disparity=params["predict_disparity"], random_crop=params["random_crop"]) test_dataset = Datasets.FastDepthDataset( params["test_dataset_paths"], split='val', depth_min=params["depth_min"], depth_max=params["depth_max"], input_shape_model=(224, 224), disparity=params["predict_disparity"], random_crop=False) # Make training/validation split train_val_split_lengths = utils.get_train_val_split_lengths( params["train_val_split"], len(dataset)) train_dataset, val_dataset = torch.utils.data.random_split( dataset, train_val_split_lengths) params["num_training_examples"] = len(train_dataset) params["num_validation_examples"] = len(val_dataset) # DataLoaders train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=params["batch_size"], shuffle=True, num_workers=params["num_workers"], pin_memory=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=params["batch_size"], shuffle=True, num_workers=params["num_workers"], pin_memory=True) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=params["batch_size"], shuffle=False, num_workers=params["num_workers"], pin_memory=True) return train_loader, val_loader, test_loader
def dsd_100_experiment(model_config): print("SCRIPT START") # Create subfolders if they do not exist to save results for dir in [model_config["model_base_dir"], model_config["log_dir"]]: if not os.path.exists(dir): os.makedirs(dir) # Set up data input if os.path.exists('dataset.pkl'): with open('dataset.pkl', 'rb') as file: dataset = pickle.load(file) print("Loaded dataset from pickle!") else: dsd_train, dsd_test = Datasets.getMUSDB(model_config["musdb_path"]) ccm = Datasets.getCCMixter("CCMixter.xml") # Pick 25 random songs for validation from MUSDB train set (this is always the same selection each time since we fix the random seed!) val_idx = np.random.choice(len(dsd_train), size=5, replace=False) train_idx = [i for i in range(len(dsd_train)) if i not in val_idx] print("Validation with MUSDB training songs no. " + str(train_idx)) # Draw randomly from datasets dataset = dict() dataset["train_sup"] = [dsd_train[i] for i in train_idx] + ccm dataset["train_unsup"] = list( ) #[dsd_train[0][25:], dsd_train[1][25:], dsd_train[2][25:]] #[fma, list(), looperman] dataset["valid"] = [dsd_train[i] for i in val_idx] dataset["test"] = dsd_test with open('dataset.pkl', 'wb') as file: pickle.dump(dataset, file) print("Created dataset structure") # Setup dataset depending on task. Dataset contains sources in order: (mix, acc, bass, drums, other, vocal) if model_config["task"] == "voice": for i in range(25): dataset["train_sup"][i] = (dataset["train_sup"][i][0], dataset["train_sup"][i][1], dataset["train_sup"][i][5]) for subset in ["valid", "test"]: for i in range(len(dataset[subset])): dataset[subset][i] = (dataset[subset][i][0], dataset[subset][i][1], dataset[subset][i][5]) # Optimize in a +supervised fashion until validation loss worsens sup_model_path, sup_loss = optimise(dataset=dataset) print("Supervised training finished! Saved model at " + sup_model_path + ". Performance: " + str(sup_loss)) Evaluate.produce_source_estimates(model_config, sup_model_path, model_config["musdb_path"], model_config["estimates_path"], "train")
def preprocess(model_config, dataset): # Determine input and output shapes disc_input_shape = [ model_config["batch_size"], model_config["num_frames"], 0 ] # Shape of input separator_class = Models.UnetAudioSeparator.UnetAudioSeparator( model_config) sep_input_shape, sep_output_shape = separator_class.get_padding( np.array(disc_input_shape)) tiny = 'tiny' in dataset Datasets.preprocess_dataset(model_config, sep_input_shape, sep_output_shape, tiny)
def test_PrepareCrossFold(Data, vypis=False): train, test = Datasets.PrepareCrossFold(Data.H_alpha) tmp1, tmp2 = Datasets.PrepareCrossFold(Data.data) shodujiSe = True for m, n in enumerate(train): for i, j in enumerate(n): YN = np.allclose(j, tmp1[m][i][1]) if YN == False: shodujiSe = False if vypis: print(f'{m} Souhlasí {i+1} signál? ', YN) print("Shodují se všechny? ", shodujiSe)
def CalculatePriceChanges(): distribution = DiscountDistribution() priceMap = {} # data = ds.ZooplaMatchedDaily(2000000) # during rising housing market data = ds.ZooplaMatchedDaily() # at bottom of housing market chunk = data.read(500000) chunk.rename(columns={'\xef\xbb\xbfLISTING ID': 'LISTING ID'}, inplace=True) filteredchunk = chunk[chunk["MARKET"] == "SALE"][[ 'LISTING ID', 'DAY', 'PRICE' ]][chunk['PRICE'] > 0] for row in filteredchunk.values: # row: LISTING ID DAY PRICE listingid = row[0] if listingid in priceMap: lastRecord = priceMap[listingid] oldPrice = lastRecord.currentprice startDay, endDay, percent = lastRecord.add(row[1], row[2]) if (oldPrice == row[2]): # no price change distribution.noChange(startDay / 30, endDay / 30) else: # price has changed distribution.addChange(startDay / 30, endDay / 30, percent) else: priceMap[listingid] = PriceCalc(row[1], row[2]) return (distribution)
def __init__(self): rawData = ds.EHSinterview() incomeRent = rawData.loc[:, [self.incomeField, "rentwkx", "bedrqx"]] # Filter for fields of interest self.renterData = incomeRent[incomeRent["rentwkx"] > 0] # filter out non renters # self.renterData = self.renterData[self.renterData["bedrqx"]==4] # only consider one-bed # split the data into 2D histogram data self.population, self.xbins, self.ybins = np.histogram2d( np.log(self.renterData["rentwkx"].values), np.log(self.renterData[self.incomeField].values), bins=[40, 30]) self.xaxis = (np.array(self.xbins[1:]) + np.array(self.xbins[:-1])) / 2.0 self.yaxis = (np.array(self.ybins[1:]) + np.array(self.ybins[:-1])) / 2.0 self.popdf = pd.DataFrame(data=np.zeros( ((len(self.xbins) - 1) * (len(self.ybins) - 1), 3)), columns=['rental price', 'income', 'p']) i = 0 totalPop = self.population.sum() for param in range(1, len(self.ybins)): for out in range(1, len(self.xbins)): self.popdf.iloc[i, 0] = (self.xbins[out] + self.xbins[out - 1]) / 2.0 self.popdf.iloc[i, 1] = (self.ybins[param] + self.ybins[param - 1]) / 2.0 self.popdf.iloc[i, 2] = self.population[out - 1, param - 1] * 1.0 / totalPop i += 1
def ThreadingFix(self, database): if FileInput.BrowseWindow.NullError: QtWidgets.QMessageBox.warning( self, "Error", "Some format error occurred. Are there perhaps two spaces next to each other in the file? In a tsv these can be seen as two columns." ) if len(Ui_MainWindow.Nulvalues) > 0: QtWidgets.QMessageBox.warning( self, "Warning", "There were metrics that did not have values in them:" + str(Ui_MainWindow.Nulvalues)) Ui_MainWindow.EnableAnalysisButtons(self) if type(database) == Datasets.Datasets: Ui_MainWindow.metrics = database.metrics Ui_MainWindow.NumericMetrics = database.NumericMetrics Ui_MainWindow.EnableAnalysisButtons(self) elif type(database) == bool: Ui_MainWindow.Message( self, "An error occurred. Please check that the input files are either mzQC, tsv or csv quality files. Multiple files of the same type are allowed." ) database = Datasets.Datasets() Ui_MainWindow.UploadProgress.setValue(0) self.onBrowseClicked(database)
def extract_features_CUHK03(model, scale_image_size, data, extract_features_folder, logger, batch_size=128, workers=4, is_tencrop=False,normalize=None): logger.info('Begin extract features') if normalize == None: normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if is_tencrop: logger.info('==> Using TenCrop') tencrop = transforms.Compose([ transforms.Resize([int(x*1.125) for x in scale_image_size]), transforms.TenCrop(scale_image_size)]) else: tencrop = None transform = transforms.Compose([ transforms.Resize(scale_image_size), transforms.ToTensor(), normalize, ]) train_data_folder = data logger.info('Begin load train data from '+train_data_folder) train_dataloader = torch.utils.data.DataLoader( Datasets.CUHK03EvaluateDataset(folder=train_data_folder, transform=transform, tencrop=tencrop), batch_size=batch_size, shuffle=False, num_workers=workers, pin_memory=True) train_features = extract_features(model, train_dataloader, is_tencrop) if os.path.isdir(extract_features_folder) is False: os.makedirs(extract_features_folder) sio.savemat(os.path.join(extract_features_folder, 'train_features.mat'), {'feature_train_new': train_features}) return
def createFilteredDataset(self, num_allowed_docs): newDataset = Datasets.Datasets() oldNumQueries, oldNumDocuments, oldNumFeatures = numpy.shape( self.dataset.features) newDataset.relevances = -1 * numpy.ones( (oldNumQueries, num_allowed_docs), dtype=numpy.int32) newDataset.features = numpy.nan * numpy.ones( (oldNumQueries, num_allowed_docs, oldNumFeatures), dtype=numpy.float32) newDataset.docsPerQuery = numpy.clip(self.dataset.docsPerQuery, 0, num_allowed_docs) for i in range(oldNumQueries): producedRanking = self.predict(i, num_allowed_docs) allowedDocs = self.dataset.docsPerQuery[i] validDocs = min(num_allowed_docs, allowedDocs) newDataset.relevances[i, 0:validDocs] = self.dataset.relevances[ i, producedRanking[0:validDocs]] newDataset.features[i, 0:validDocs, :] = self.dataset.features[ i, producedRanking[0:validDocs], :] newDataset.name = self.name() + '_' + str(num_allowed_docs) print( "DeterministicPolicy:createFilteredDataset [INFO] %s MaxNumDocs %d" % (newDataset.name, num_allowed_docs), flush=True) return newDataset
def train(): model = lstm_model(config.seq_len, config.cell_num) # callback early_stopping = EarlyStopping(monitor='val_loss', patience=3) model_checkpoint = ModelCheckpoint(config.ckpt_path, save_best_only=True, save_weights_only=True) metrics = Metrics() dataset = Datasets.Dataset() hist = model.fit(dataset.train_data, dataset.train_label, batch_size=config.batch_size, epochs=config.epoch_size, shuffle=True, validation_split=config.val_split, callbacks=[early_stopping, model_checkpoint, metrics]) with open(config.log_path, 'w') as f: f.write(str(hist.history) + '\n') f.write(str(metrics.val_f1s) + '\n') f.write(str(metrics.val_precisions) + '\n') f.write(str(metrics.val_recalls) + '\n') score, acc = model.evaluate(dataset.test_data, dataset.test_label, batch_size=config.batch_size) print(score, acc)
def predict_cases_sldata(): Df_dataset = Datasets.SLDataPreprocess() new1 = Df_dataset[["Date_Added", "Detected_Prefecture"]] a = new1.groupby("Date_Added").size().values df1 = new1.drop_duplicates(subset="Date_Added").assign(Count=a) dfnew = df1.pivot_table('Count', ['Date_Added'], 'Detected_Prefecture') dfnew.fillna(0, inplace=True) return dfnew
def run(cfg): model_config = cfg["model_config"] print("SCRIPT START") # Create subfolders if they do not exist to save results for dir in [model_config["model_base_dir"], model_config["log_dir"]]: if not os.path.exists(dir): os.makedirs(dir) # Set up data input pickle_file = "dataset.pkl" if os.path.exists( pickle_file ): # Check whether our dataset file is already there, then load it with open(pickle_file, 'rbs') as file: dataset = pickle.load(file) print("Loaded dataset from pickle!") else: # Otherwise create the dataset pickle print("Preparing dataset! This could take a while...") # Specify path to dataset, as a tracklist composed by an XML file parsed using etree in Datasets.getAudioData # Each track element, describing 3 sources [speech.wav, noise.wav, mix.wav] and their relevant metadata, is parsed using etree in Datasets.py dataset_train = Datasets.getAudioData( "../noisy_trainset_28spk_wav_16000", "../clean_trainset_28spk_wav_16000") # Pick 10 random songs for validation from train set (this is always the same selection each time since the random seed is fixed) val_idx = np.random.choice(len(dataset_train), size=10, replace=False) train_idx = [i for i in range(len(dataset_train)) if i not in val_idx] print("Validation with training items no. " + str(train_idx)) # Draw randomly from datasets dataset = dict() dataset["train"] = dataset_train dataset["valid"] = [dataset_train[i] for i in val_idx] # Now create dataset, for source separation task for speech enhancement assert (model_config["task"] == "speech") for subset in ["train", "valid"]: for i in range(len(dataset[subset])): dataset[subset][i] = (dataset[subset][i][0], dataset[subset][i][1]) # Save dataset with open("dataset.pkl", 'wb') as file: pickle.dump(dataset, file) print("Wrote source separation for speech enhancement dataset!") print("LOADED DATASET") # The dataset structure is a dictionary with "train", "valid", "test" keys, whose entries are lists, where each element represents a noisy speech file. # Each noisy speech file is represented as a tuple of (mix, noise, speech) in the source separation task for speech enhancement. # Optimize in a supervised fashion until validation loss worsens sup_model_path, sup_loss = optimise(dataset=dataset) print("Supervised training finished! Saved model at " + sup_model_path + ". Performance: " + str(sup_loss))
def extract_features_MARS(model, scale_image_size, info_folder, data, extract_features_folder, logger, batch_size=128, workers=4, is_tencrop=False): logger.info('Begin extract features') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if is_tencrop: logger.info('==> Using TenCrop') tencrop = transforms.Compose([ transforms.Resize([int(x*1.125) for x in scale_image_size]), transforms.TenCrop(scale_image_size)]) else: tencrop = None transform = transforms.Compose([ transforms.Resize(scale_image_size), transforms.ToTensor(), normalize, ]) train_name_path = os.path.join(info_folder, 'train_name.txt') test_name_path = os.path.join(info_folder, 'test_name.txt') train_data_folder = os.path.join(data, 'bbox_train') test_data_folder = os.path.join(data, 'bbox_test') logger.info('Train data folder: '+train_data_folder) logger.info('Test data folder: '+test_data_folder) logger.info('Begin load train data') train_dataloader = torch.utils.data.DataLoader( Datasets.MARSEvalDataset(folder=train_data_folder, image_name_file=train_name_path, transform=transform, tencrop=tencrop), batch_size=batch_size, shuffle=False, num_workers=workers, pin_memory=True) logger.info('Begin load test data') test_dataloader = torch.utils.data.DataLoader( Datasets.MARSEvalDataset(folder=test_data_folder, image_name_file=test_name_path, transform=transform, tencrop=tencrop), batch_size=batch_size, shuffle=False, num_workers=workers, pin_memory=True) train_features = extract_features(model, train_dataloader, is_tencrop) test_features = extract_features(model, test_dataloader, is_tencrop) if os.path.isdir(extract_features_folder) is False: os.makedirs(extract_features_folder) sio.savemat(os.path.join(extract_features_folder, 'train_features.mat'), {'feature_train_new': train_features}) sio.savemat(os.path.join(extract_features_folder, 'test_features.mat'), {'feature_test_new': test_features}) return
def main(args): print("Loading config file: ", args.config) params = utils.load_config_file(args.config) params["dataset_paths"] = utils.format_dataset_path( params["dataset_paths"]) if "nyu" not in params: params["nyu"] = False # Data loading code print("Creating data loaders...") if params["nyu"]: from dataloaders.nyu import NYUDataset val_dataset = NYUDataset(params["dataset_paths"], split='val') else: val_dataset = Datasets.FastDepthDataset(params["dataset_paths"], split='val', depth_min=params["depth_min"], depth_max=params["depth_max"], input_shape_model=(224, 224), random_crop=False) # set batch size to be 1 for validation data_loader = torch.utils.data.DataLoader( val_dataset, batch_size=1, shuffle=False, num_workers=params["num_workers"], pin_memory=True) # Set GPU params["device"] = torch.device( "cuda:{}".format(params["device"]) if params["device"] >= 0 and torch.cuda.is_available() else "cpu") print("Using device", params["device"]) print("Loading models...") models = [] model_names = [] for model_dict in params["models"]: model_names.append(Path(model_dict["model_path"]).stem) model, _ = utils.load_model(model_dict, model_dict["model_path"], params["device"]) model.to(params["device"]) models.append(model) # Create output directory output_directory = os.path.join(params["save_folder"], ".".join(model_names)) if not os.path.exists(output_directory): os.makedirs(output_directory) params["output_directory"] = output_directory print("Saving results to " + output_directory) compare_models(params, data_loader, models)
def train_teacher(args): result_file = '_'.join([str(v) for v in args]) + '.pickle' result_file = os.path.join(Conf.OUTPUT_DIR, result_file) if os.path.exists(result_file): fid = open(result_file, 'rb') result = pickle.load(fid) fid.close() return result prefix = args[0] dataset = args[1] weight_decay = args[2] data = Datasets.get_cifar100(batch_size=Conf.DENSENET_BS) train_loader, val_loader, test_loader = data nb_class = get_class_count(dataset) model = DenseNet.DenseNet121(nb_class) logfile = '_'.join([str(v) for v in args]) + '.pickle' logfile = os.path.join(Conf.LOG_DIR, logfile) compute = 'gpu' if torch.cuda.is_available() else 'cpu' if Conf.STAGE == 'test': LR = Conf.T_TEST_LR Epochs = Conf.T_TEST_EPOCH verbose = True else: LR = Conf.T_LR Epochs = Conf.T_EPOCH verbose = False outputs = Utility.train_classifier(logfile, model, compute, train_loader, val_loader, test_loader, LR, Epochs, weight_decay, verbose) return outputs
def __init__(self): rawData = ds.EHSinterview() incomeRent = rawData.loc[:,[self.incomeField,"rentwkx","bedrqx"]] # Filter for fields of interest self.renterData = incomeRent[incomeRent["rentwkx"]>0] # filter out non renters # self.renterData = self.renterData[self.renterData["bedrqx"]==4] # only consider one-bed # split the data into 2D histogram data self.population, self.xbins, self.ybins = np.histogram2d( np.log(self.renterData["rentwkx"].values), np.log(self.renterData[self.incomeField].values), bins=[40,30])
def ZooplaPriceChanges(): total = 0 pSame = 0 priceMap = {} # distribution = DiscountDistribution() data = ds.ZooplaMatchedDaily() # store = pd.HDFStore('rawDaily.hd5',mode='w') # for chunk in data.parser: chunk = data.read(1000) chunk.rename(columns={'\xef\xbb\xbfLISTING ID': 'LISTING ID'}, inplace=True) filteredchunk = chunk[chunk["MARKET"] == "SALE"][[ 'LISTING ID', 'DAY', 'PRICE' ]][chunk['PRICE'] > 0] for row in filteredchunk.values: currentState = priceMap.get(row[0]) if currentState == None: priceMap[row[0]] = PriceCalc(row[1], row[2]) else: startDay, endDay, percent = currentState.add(row[1], row[2]) distribution.add(startDay, endDay, percent) # now get deletion dates delData = ds.ZooplaMatchedCollated() # for chunk in delData.parser: chunk = delData.read(1000) chunk.rename(columns={'\xef\xbb\xbfLISTING ID': 'LISTING ID'}, inplace=True) filteredchunk = chunk[chunk["MARKET"] == "SALE"][['LISTING ID', 'DELETED']] for row in filteredchunk.values: currentState = priceMap.get(row[0]) if currentState != None: if (currentState.currentprice == currentState.initialmarketprice): pSame += 1 total += 1 startDay, endDay, percent = currentState.add(row[1], 0) distribution.add(startDay, endDay, percent) priceMap.pop(row[0]) print len(priceMap) print pSame, total, pSame * 1.0 / total plotProbability(distribution.dist)
def read_scores(model_config, audio_length, score_filenames, pad_time_frames): scores = { source: Datasets.read_score(score_filenames[source], audio_length, model_config['expected_sr']) for source in model_config['separator_source_names'] } padded_scores = { source: np.pad(score, [(pad_time_frames, pad_time_frames)], mode="constant", constant_values=0.0) for source, score in scores.items() } return padded_scores
def main(cfg, model_path, output_path, chorales_path): model_config = cfg["model_config"] print(f'Running prediction for model: {model_path}') print(f'Reading chorale mixtures from {chorales_path}') chorales = Datasets.getSynthesizedChorales(chorales_path, model_config["score_informed"]) # Dataset.preprocess_dataset divides chorales into partitions, test partition starts after 320 chorales. test_chorales = chorales[320:] print('Saving outputs to: %s' % output_path) for i, chorale in enumerate(test_chorales): print(f'Predicting chorale {i+1} out of {len(test_chorales)}...') predict_chorale(model_config, chorale, model_path, output_path)
def __init__(self, opts): self.opts = opts self.model_path = opts.model_path self.output_dir = opts.output_dir self.gpu_id = opts.gpu_id self.disp_module = opts.disp_module self.dataset = opts.dataset self.batch_size = opts.batch_size self.train = opts.train # The data loader # getting the dataloader ready if self.dataset == 'kitti': dataset = Datasets.KittiDataset(self.opts) elif self.dataset == 'nyu': dataset = Datasets.NYUDataset(self.opts) else: raise NameError('Dataset not found') self.DataLoader = data.DataLoader(dataset, batch_size=self.batch_size, shuffle=False, num_workers=8) print('Data loader made') # loading the model disp_module = importlib.import_module(self.disp_module) self.DispNet = disp_module.DispResNet() self.DispNet.load_state_dict(torch.load(self.model_path)) if self.gpu_id is not None: self.device = torch.device('cuda:' + str(self.gpu_id[0])) self.DispNet = self.DispNet.to(self.device) if len(self.gpu_id) > 1: self.DispNet = nn.DataParallel(self.DispNet, self.gpu_id) else: self.device = torch.device('cpu') self.DispNet.eval() print('Model Loaded') self.start_test()
def noisy(model_config, dataset): csv_file_name = model_config[ "estimates_path"] + os.path.sep + "noisy" + "_" + dataset + ".csv" with open(csv_file_name, 'w') as csv_file: csv_writer = csv.writer(csv_file, delimiter=',') csv_writer.writerow( ["target_file", "input_file", "pesq", "lsd", "ssnr", "audio_len"]) # Get test set if dataset == "VCTK": _, test = Datasets.get_VCTK( model_config["preprocessed_data_path"] + "/VCTK_8k_DBE", model_config["input"], model_config["target"]) elif dataset == "DAPS": _, test = Datasets.get_DAPS( model_config["preprocessed_data_path"] + "/DAPS_8k_DBE", model_config["input"], model_config["target"]) for sample in test: # Evaluate input_file_name = sample[model_config["input"]] print("Test file " + input_file_name) target_file_name = sample[model_config["target"]] pesq, lsd, ssnr, audio_len = Metrics.Eval( target_file_name, input_file_name, model_config['expected_sr']) csv_writer.writerow([ target_file_name, input_file_name, pesq, lsd, ssnr, audio_len ]) print('PESQ:{:.3f} LSD:{:.3f} SSNR:{:.3f}'.format(pesq, lsd, ssnr)) results_df = pd.read_csv(csv_file_name, usecols=["ssnr", "lsd", "pesq", "audio_len"]) pesq, lsd, ssnr = (results_df[["pesq", "lsd", "ssnr"]].multiply( results_df["audio_len"], axis=0)).sum() / sum(results_df["audio_len"]) print('Results -> PESQ:{:.3f} LSD:{:.3f} SSNR:{:.3f}'.format( pesq, lsd, ssnr))
def plotBias(self): # plot the biased data set isInB = np.array([ int(elem) for elem in [ self.X_train[i] in self.X_b_train for i in range(len(self.X_train)) ] ]) fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(2, 2)) plt.xticks([]) plt.yticks([]) # plt.title("Biased Dataset") plt.scatter(x=self.X_train[np.where(isInB == 0)[0]][:, 0], y=self.X_train[np.where(isInB == 0)[0]][:, 1], c=Datasets.colormap(self.Y_train[np.where(isInB == 0)[0]]), alpha=0.05, s=1) plt.scatter(x=self.X_train[np.nonzero(isInB)][:, 0], y=self.X_train[np.nonzero(isInB)][:, 1], c=Datasets.colormap(self.Y_train[np.nonzero(isInB)]), alpha=0.3, s=2) plt.show() return fig
def check_partition(partition, model_config): records_dir = Path(model_config['data_path']) / partition records_files = sorted( records_dir.glob('*.tfrecords')) # type: Iterable[str] records_files = map(str, records_files) print('Partition: ' + partition) print('Parsing record files:\n\t' + '\n\t'.join(records_files)) dataset = tf.data.TFRecordDataset(records_files) input_shape, _output_shape = Utils.get_separator_shapes(model_config) dataset = dataset.map( lambda x: Datasets.parse_record(x, input_shape[1:], model_config)) for i, song in enumerate(dataset): print('\tSong %s: %s samples' % (i + 1, int(song['length']))) print('Total in %s partition: %s songs' % (partition, i + 1))
def __init__(self): # datasets self.dataset = Datasets.Dataset( '/datasets/UCF101/jpegs_256', '/datasets/UCF101/UCF_list', '04', 'spatial_dmd', 30, 24, ) self.train_loader = self.dataset.get_loader('train') self.test_loader = self.dataset.get_loader('test') # networks self.net = Networks.Network(in_channels=13, out_classes=10, pretrain_path='./zoo/spatial_pretrain.pth')
def extract_batches(partition, model_config): output_dir = tempfile.mkdtemp() print(f'Output to {output_dir}') input_shape, _output_shape = Utils.get_separator_shapes(model_config) dataset = Datasets.get_dataset(model_config, input_shape, _output_shape, partition) dataset = dataset.take(2) dataset = dataset.apply(tf.data.experimental.unbatch()) for snippet_index, snippet in enumerate(dataset): print('\tSnippet %s' % (snippet_index + 1)) snippet_dir = Path(output_dir) / str(snippet_index) os.makedirs(snippet_dir) write_audio('mix', model_config, snippet_dir, snippet) for source in model_config['separator_source_names']: write_audio(source, model_config, snippet_dir, snippet) if model_config['score_informed']: write_score(source, model_config, snippet_dir, snippet)
def CreateDataFrame(Data, config): """ Input: Data ... formát z load_dataset (Bunch) config ... konfigurace Output: df ... dataframe se všemi příznaky """ feat = FE.Features(config=config, normalize=True) X = feat.fit_transform(Data=Data.H_alpha) lab = dat.merge_labels(labels=Data.labels) X = np.hstack((X, lab.reshape(lab.shape[0], 1))) nm = feat.get_names(labels=True) df = pd.DataFrame(data=X, columns=nm) return df
def extract_partition(partition, model_config): output_dir = tempfile.mkdtemp() records_dir = Path(model_config['data_path']) / partition records_paths = sorted(records_dir.glob('*.tfrecords')) records_files = map(str, records_paths) print('Partition: ' + partition) print('Parsing record files:\n\t' + '\n\t'.join(records_files)) dataset = tf.data.TFRecordDataset(records_files) input_shape, _output_shape = Utils.get_separator_shapes(model_config) dataset = dataset.map( lambda x: Datasets.parse_record(x, input_shape[1:], model_config)) dataset = dataset.take(10) for song_index, song in enumerate(dataset): print('\tSong %s: %s samples' % (song_index + 1, int(song['length']))) os.makedirs('%s/%s' % (output_dir, song_index)) for source in model_config['source_names'] + ['mix']: librosa.output.write_wav( '%s/%s/%s.wav' % (output_dir, song_index, source), tf.squeeze(song[source]).numpy(), model_config['expected_sr'])
def __init__(self): # Bring raw data from Datasets class reader for the English Housing Survey data raw_data = ds.EHSinterview() # Filter for field rentwkx, total weekly rent payable (rent plus housing benefit) income_rent = raw_data.loc[:, [self.incomeField, "rentwkx"]] # Filter out non renters and unreasonably large weekly rent values self.renterData = income_rent[(income_rent["rentwkx"] > 0) & (income_rent["rentwkx"] < 50000)] # Filter out strings at rentwkx column self.renterData = self.renterData[self.renterData["rentwkx"].apply( lambda x: not isinstance(x, str))] # Cast rentwkx column values as numpy float64 type self.renterData = self.renterData.astype({"rentwkx": np.float64}) # Split the data into a 2D histogram with logarithmic bins (no normalisation here as we want column # normalisation, to be introduced when plotting) self.population, self.xbins, self.ybins = np.histogram2d( np.log(self.renterData[self.incomeField].values), np.log(self.renterData["rentwkx"].values), bins=[30, 30]) # Transpose the matrix as histogram2d returns a list of columns instead of a list of rows self.population = self.population.T