Python Datasets 예제들, Datasets Python 예제들

예제 #1

0

파일 보기

def run(cfg):
    model_config = cfg['model_config']
    chorales = Datasets.getSynthesizedChorales(model_config["chorales_path"],
                                               model_config["score_informed"])
    for chorale_number in chorales_to_check:
        chorale = get_chorale(chorales, chorale_number)
        input_shape, output_shape = Utils.get_separator_shapes(model_config)
        pad_frames = (input_shape[1] - output_shape[1]) // 2
        Datasets.process_sample(chorale, pad_frames, cfg['model_config'])

예제 #2

0

파일 보기

파일: Pipeline_Methods.py 프로젝트: TheMrGhostman/Research-Task

def CF_Boosted_Trees(model,
                     Data,
                     config,
                     name,
                     location,
                     states=3):  # train_data, test_data
    """
	Input:
			train_data  ... list-of-lists 

	"""
    train_data, test_data = dat.PrepareCrossFold(Data.H_alpha)
    train_labels, test_labels = dat.PrepareCrossFold(Data.labels)

    K = len(train_data)
    score_tab = Scoring.ScoringTable(location=location,
                                     name=name,
                                     n_states=states)

    feat = FE.Features(config=config, normalize=True)

    start = time()
    for cross in range(K):
        clf = copy(model)

        train_matrix = feat.fit_transform(Data=train_data[cross])
        test_matrix = feat.fit_transform_batch(Data=test_data[cross])
        target = dat.merge_labels(train_labels[cross])

        # print(np.shape(train_matrix), np.shape(target))
        # print(np.shape(test_matrix), np.shape(test_labels[cross]))

        pred = train_and_predict(model=clf,
                                 train=train_matrix,
                                 test=test_matrix,
                                 labels=target,
                                 unsupervised=False)

        score = Scoring.score(states=pred,
                              results=test_labels[cross],
                              unsupervised=False,
                              pocet_stavu=states)

        # print("score", score)

        score_tab.add(scores=score)
        print("{} section done. Time taken from start {}".format(
            cross + 1,
            time() - start))

    score_tab.save_table()
    config["n_estimators"] = model.n_estimators

    with open(location + name + '_config.pickle', 'wb') as f:
        pickle.dump(config, f)
    return score_tab.return_table()

예제 #3

0

파일 보기

파일: train.py 프로젝트: appliedinnovation/fast-depth

def load_dataset(params):
    print("Loading the dataset...")

    if params['nyu_dataset']:
        dataset = NYUDataset("../data/nyudepthv2/train", split='train')
        test_dataset = NYUDataset("../data/nyudepthv2/val", split='val')
    else:
        dataset = Datasets.FastDepthDataset(
            params["training_dataset_paths"],
            split='train',
            depth_min=params["depth_min"],
            depth_max=params["depth_max"],
            input_shape_model=(224, 224),
            disparity=params["predict_disparity"],
            random_crop=params["random_crop"])

        test_dataset = Datasets.FastDepthDataset(
            params["test_dataset_paths"],
            split='val',
            depth_min=params["depth_min"],
            depth_max=params["depth_max"],
            input_shape_model=(224, 224),
            disparity=params["predict_disparity"],
            random_crop=False)

    # Make training/validation split
    train_val_split_lengths = utils.get_train_val_split_lengths(
        params["train_val_split"], len(dataset))
    train_dataset, val_dataset = torch.utils.data.random_split(
        dataset, train_val_split_lengths)
    params["num_training_examples"] = len(train_dataset)
    params["num_validation_examples"] = len(val_dataset)

    # DataLoaders
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=params["batch_size"],
        shuffle=True,
        num_workers=params["num_workers"],
        pin_memory=True)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=params["batch_size"],
                                             shuffle=True,
                                             num_workers=params["num_workers"],
                                             pin_memory=True)

    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=params["batch_size"],
        shuffle=False,
        num_workers=params["num_workers"],
        pin_memory=True)

    return train_loader, val_loader, test_loader

예제 #4

0

파일 보기

def dsd_100_experiment(model_config):
    print("SCRIPT START")
    # Create subfolders if they do not exist to save results
    for dir in [model_config["model_base_dir"], model_config["log_dir"]]:
        if not os.path.exists(dir):
            os.makedirs(dir)

    # Set up data input
    if os.path.exists('dataset.pkl'):
        with open('dataset.pkl', 'rb') as file:
            dataset = pickle.load(file)
        print("Loaded dataset from pickle!")
    else:
        dsd_train, dsd_test = Datasets.getMUSDB(model_config["musdb_path"])
        ccm = Datasets.getCCMixter("CCMixter.xml")

        # Pick 25 random songs for validation from MUSDB train set (this is always the same selection each time since we fix the random seed!)

        val_idx = np.random.choice(len(dsd_train), size=5, replace=False)
        train_idx = [i for i in range(len(dsd_train)) if i not in val_idx]
        print("Validation with MUSDB training songs no. " + str(train_idx))

        # Draw randomly from datasets
        dataset = dict()
        dataset["train_sup"] = [dsd_train[i] for i in train_idx] + ccm
        dataset["train_unsup"] = list(
        )  #[dsd_train[0][25:], dsd_train[1][25:], dsd_train[2][25:]] #[fma, list(), looperman]
        dataset["valid"] = [dsd_train[i] for i in val_idx]
        dataset["test"] = dsd_test

        with open('dataset.pkl', 'wb') as file:
            pickle.dump(dataset, file)
        print("Created dataset structure")

    # Setup dataset depending on task. Dataset contains sources in order: (mix, acc, bass, drums, other, vocal)
    if model_config["task"] == "voice":
        for i in range(25):
            dataset["train_sup"][i] = (dataset["train_sup"][i][0],
                                       dataset["train_sup"][i][1],
                                       dataset["train_sup"][i][5])
        for subset in ["valid", "test"]:
            for i in range(len(dataset[subset])):
                dataset[subset][i] = (dataset[subset][i][0],
                                      dataset[subset][i][1],
                                      dataset[subset][i][5])

    # Optimize in a +supervised fashion until validation loss worsens
    sup_model_path, sup_loss = optimise(dataset=dataset)
    print("Supervised training finished! Saved model at " + sup_model_path +
          ". Performance: " + str(sup_loss))
    Evaluate.produce_source_estimates(model_config, sup_model_path,
                                      model_config["musdb_path"],
                                      model_config["estimates_path"], "train")

예제 #5

0

파일 보기

파일: preprocess.py 프로젝트: matangover/score-informed-Wave-U-Net

def preprocess(model_config, dataset):
    # Determine input and output shapes
    disc_input_shape = [
        model_config["batch_size"], model_config["num_frames"], 0
    ]  # Shape of input
    separator_class = Models.UnetAudioSeparator.UnetAudioSeparator(
        model_config)
    sep_input_shape, sep_output_shape = separator_class.get_padding(
        np.array(disc_input_shape))

    tiny = 'tiny' in dataset
    Datasets.preprocess_dataset(model_config, sep_input_shape,
                                sep_output_shape, tiny)

예제 #6

0

파일 보기

파일: unit_tests.py 프로젝트: TheMrGhostman/Research-Task

def test_PrepareCrossFold(Data, vypis=False):
    train, test = Datasets.PrepareCrossFold(Data.H_alpha)
    tmp1, tmp2 = Datasets.PrepareCrossFold(Data.data)

    shodujiSe = True

    for m, n in enumerate(train):
        for i, j in enumerate(n):
            YN = np.allclose(j, tmp1[m][i][1])
            if YN == False:
                shodujiSe = False
            if vypis:
                print(f'{m} Souhlasí {i+1} signál? ', YN)
    print("Shodují se všechny? ", shodujiSe)

예제 #7

0

파일 보기

def CalculatePriceChanges():
    distribution = DiscountDistribution()
    priceMap = {}
    #    data = ds.ZooplaMatchedDaily(2000000) # during rising housing market
    data = ds.ZooplaMatchedDaily()  # at bottom of housing market
    chunk = data.read(500000)
    chunk.rename(columns={'\xef\xbb\xbfLISTING ID': 'LISTING ID'},
                 inplace=True)
    filteredchunk = chunk[chunk["MARKET"] == "SALE"][[
        'LISTING ID', 'DAY', 'PRICE'
    ]][chunk['PRICE'] > 0]
    for row in filteredchunk.values:
        # row: LISTING ID   DAY   PRICE
        listingid = row[0]
        if listingid in priceMap:
            lastRecord = priceMap[listingid]
            oldPrice = lastRecord.currentprice
            startDay, endDay, percent = lastRecord.add(row[1], row[2])
            if (oldPrice == row[2]):  # no price change
                distribution.noChange(startDay / 30, endDay / 30)
            else:  # price has changed
                distribution.addChange(startDay / 30, endDay / 30, percent)
        else:
            priceMap[listingid] = PriceCalc(row[1], row[2])
    return (distribution)

예제 #8

0

파일 보기

    def __init__(self):
        rawData = ds.EHSinterview()

        incomeRent = rawData.loc[:,
                                 [self.incomeField, "rentwkx",
                                  "bedrqx"]]  # Filter for fields of interest
        self.renterData = incomeRent[incomeRent["rentwkx"] >
                                     0]  # filter out non renters
        #     self.renterData = self.renterData[self.renterData["bedrqx"]==4] # only consider one-bed
        # split the data into 2D histogram data
        self.population, self.xbins, self.ybins = np.histogram2d(
            np.log(self.renterData["rentwkx"].values),
            np.log(self.renterData[self.incomeField].values),
            bins=[40, 30])
        self.xaxis = (np.array(self.xbins[1:]) +
                      np.array(self.xbins[:-1])) / 2.0
        self.yaxis = (np.array(self.ybins[1:]) +
                      np.array(self.ybins[:-1])) / 2.0

        self.popdf = pd.DataFrame(data=np.zeros(
            ((len(self.xbins) - 1) * (len(self.ybins) - 1), 3)),
                                  columns=['rental price', 'income', 'p'])
        i = 0
        totalPop = self.population.sum()
        for param in range(1, len(self.ybins)):
            for out in range(1, len(self.xbins)):
                self.popdf.iloc[i, 0] = (self.xbins[out] +
                                         self.xbins[out - 1]) / 2.0
                self.popdf.iloc[i, 1] = (self.ybins[param] +
                                         self.ybins[param - 1]) / 2.0
                self.popdf.iloc[i, 2] = self.population[out - 1, param -
                                                        1] * 1.0 / totalPop
                i += 1

예제 #9

0

파일 보기

    def ThreadingFix(self, database):
        if FileInput.BrowseWindow.NullError:
            QtWidgets.QMessageBox.warning(
                self, "Error",
                "Some format error occurred. Are there perhaps two spaces next to each other in the file? In a tsv these can be seen as two columns."
            )
        if len(Ui_MainWindow.Nulvalues) > 0:
            QtWidgets.QMessageBox.warning(
                self, "Warning",
                "There were metrics that did not have values in them:" +
                str(Ui_MainWindow.Nulvalues))
        Ui_MainWindow.EnableAnalysisButtons(self)

        if type(database) == Datasets.Datasets:
            Ui_MainWindow.metrics = database.metrics
            Ui_MainWindow.NumericMetrics = database.NumericMetrics
            Ui_MainWindow.EnableAnalysisButtons(self)
        elif type(database) == bool:
            Ui_MainWindow.Message(
                self,
                "An error occurred. Please check that the input files are either mzQC, tsv or csv quality files. Multiple files of the same type are allowed."
            )
            database = Datasets.Datasets()
            Ui_MainWindow.UploadProgress.setValue(0)
            self.onBrowseClicked(database)

예제 #10

0

파일 보기

def extract_features_CUHK03(model, scale_image_size, data, extract_features_folder, logger, batch_size=128, workers=4, is_tencrop=False,normalize=None):
    logger.info('Begin extract features')
    if normalize == None:
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
    if is_tencrop:
        logger.info('==> Using TenCrop')
        tencrop = transforms.Compose([
            transforms.Resize([int(x*1.125) for x in scale_image_size]),
            transforms.TenCrop(scale_image_size)])
    else:
        tencrop = None
    transform = transforms.Compose([
        transforms.Resize(scale_image_size),
        transforms.ToTensor(),
        normalize, ])
    train_data_folder = data
    logger.info('Begin load train data from '+train_data_folder)
    train_dataloader = torch.utils.data.DataLoader(
        Datasets.CUHK03EvaluateDataset(folder=train_data_folder, transform=transform, tencrop=tencrop),
        batch_size=batch_size, shuffle=False,
        num_workers=workers, pin_memory=True)

    train_features = extract_features(model, train_dataloader, is_tencrop)
    if os.path.isdir(extract_features_folder) is False:
        os.makedirs(extract_features_folder)

    sio.savemat(os.path.join(extract_features_folder, 'train_features.mat'), {'feature_train_new': train_features})
    return

예제 #11

0

파일 보기

    def createFilteredDataset(self, num_allowed_docs):
        newDataset = Datasets.Datasets()
        oldNumQueries, oldNumDocuments, oldNumFeatures = numpy.shape(
            self.dataset.features)
        newDataset.relevances = -1 * numpy.ones(
            (oldNumQueries, num_allowed_docs), dtype=numpy.int32)
        newDataset.features = numpy.nan * numpy.ones(
            (oldNumQueries, num_allowed_docs, oldNumFeatures),
            dtype=numpy.float32)
        newDataset.docsPerQuery = numpy.clip(self.dataset.docsPerQuery, 0,
                                             num_allowed_docs)
        for i in range(oldNumQueries):
            producedRanking = self.predict(i, num_allowed_docs)
            allowedDocs = self.dataset.docsPerQuery[i]
            validDocs = min(num_allowed_docs, allowedDocs)
            newDataset.relevances[i, 0:validDocs] = self.dataset.relevances[
                i, producedRanking[0:validDocs]]
            newDataset.features[i, 0:validDocs, :] = self.dataset.features[
                i, producedRanking[0:validDocs], :]

        newDataset.name = self.name() + '_' + str(num_allowed_docs)

        print(
            "DeterministicPolicy:createFilteredDataset [INFO] %s MaxNumDocs %d"
            % (newDataset.name, num_allowed_docs),
            flush=True)
        return newDataset

예제 #12

0

파일 보기

def train():
    model = lstm_model(config.seq_len, config.cell_num)

    # callback
    early_stopping = EarlyStopping(monitor='val_loss', patience=3)
    model_checkpoint = ModelCheckpoint(config.ckpt_path, save_best_only=True, save_weights_only=True)
    metrics = Metrics()

    dataset = Datasets.Dataset()

    hist = model.fit(dataset.train_data,
                     dataset.train_label,
                     batch_size=config.batch_size,
                     epochs=config.epoch_size,
                     shuffle=True,
                     validation_split=config.val_split,
                     callbacks=[early_stopping, model_checkpoint, metrics])

    with open(config.log_path, 'w') as f:
        f.write(str(hist.history) + '\n')
        f.write(str(metrics.val_f1s) + '\n')
        f.write(str(metrics.val_precisions) + '\n')
        f.write(str(metrics.val_recalls) + '\n')

    score, acc = model.evaluate(dataset.test_data,
                                dataset.test_label,
                                batch_size=config.batch_size)
    print(score, acc)

예제 #13

0

파일 보기

파일: app.py 프로젝트: SasankaDidula/SLCovidPrediction

def predict_cases_sldata():
    Df_dataset = Datasets.SLDataPreprocess()
    new1 = Df_dataset[["Date_Added", "Detected_Prefecture"]]
    a = new1.groupby("Date_Added").size().values
    df1 = new1.drop_duplicates(subset="Date_Added").assign(Count=a)
    dfnew = df1.pivot_table('Count', ['Date_Added'], 'Detected_Prefecture')
    dfnew.fillna(0, inplace=True)
    return dfnew

예제 #14

0

파일 보기

def run(cfg):
    model_config = cfg["model_config"]
    print("SCRIPT START")
    # Create subfolders if they do not exist to save results
    for dir in [model_config["model_base_dir"], model_config["log_dir"]]:
        if not os.path.exists(dir):
            os.makedirs(dir)

    # Set up data input
    pickle_file = "dataset.pkl"
    if os.path.exists(
            pickle_file
    ):  # Check whether our dataset file is already there, then load it
        with open(pickle_file, 'rbs') as file:
            dataset = pickle.load(file)
        print("Loaded dataset from pickle!")
    else:  # Otherwise create the dataset pickle

        print("Preparing dataset! This could take a while...")

        # Specify path to dataset, as a tracklist composed by an XML file parsed using etree in Datasets.getAudioData
        # Each track element, describing 3 sources [speech.wav, noise.wav, mix.wav] and their relevant metadata, is parsed using etree in Datasets.py
        dataset_train = Datasets.getAudioData(
            "../noisy_trainset_28spk_wav_16000",
            "../clean_trainset_28spk_wav_16000")

        # Pick 10 random songs for validation from train set (this is always the same selection each time since the random seed is fixed)
        val_idx = np.random.choice(len(dataset_train), size=10, replace=False)
        train_idx = [i for i in range(len(dataset_train)) if i not in val_idx]
        print("Validation with training items no. " + str(train_idx))

        # Draw randomly from datasets
        dataset = dict()
        dataset["train"] = dataset_train
        dataset["valid"] = [dataset_train[i] for i in val_idx]

        # Now create dataset, for source separation task for speech enhancement
        assert (model_config["task"] == "speech")

        for subset in ["train", "valid"]:
            for i in range(len(dataset[subset])):
                dataset[subset][i] = (dataset[subset][i][0],
                                      dataset[subset][i][1])

        # Save dataset
        with open("dataset.pkl", 'wb') as file:
            pickle.dump(dataset, file)
        print("Wrote source separation for speech enhancement dataset!")

        print("LOADED DATASET")

    # The dataset structure is a dictionary with "train", "valid", "test" keys, whose entries are lists, where each element represents a noisy speech file.
    # Each noisy speech file is represented as a tuple of (mix, noise, speech) in the source separation task for speech enhancement.

    # Optimize in a supervised fashion until validation loss worsens
    sup_model_path, sup_loss = optimise(dataset=dataset)
    print("Supervised training finished! Saved model at " + sup_model_path +
          ". Performance: " + str(sup_loss))

예제 #15

0

파일 보기

def extract_features_MARS(model, scale_image_size, info_folder, data, extract_features_folder, logger, batch_size=128, workers=4, is_tencrop=False):
    logger.info('Begin extract features')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    if is_tencrop:
        logger.info('==> Using TenCrop')
        tencrop = transforms.Compose([
            transforms.Resize([int(x*1.125) for x in scale_image_size]),
            transforms.TenCrop(scale_image_size)])
    else:
        tencrop = None
    transform = transforms.Compose([
        transforms.Resize(scale_image_size),
        transforms.ToTensor(),
        normalize, ])
    train_name_path = os.path.join(info_folder, 'train_name.txt')
    test_name_path = os.path.join(info_folder, 'test_name.txt')
    train_data_folder = os.path.join(data, 'bbox_train')
    test_data_folder = os.path.join(data, 'bbox_test')
    logger.info('Train data folder: '+train_data_folder)
    logger.info('Test data folder: '+test_data_folder)
    logger.info('Begin load train data')
    train_dataloader = torch.utils.data.DataLoader(
        Datasets.MARSEvalDataset(folder=train_data_folder,
                                    image_name_file=train_name_path,
                                    transform=transform, tencrop=tencrop),
        batch_size=batch_size, shuffle=False,
        num_workers=workers, pin_memory=True)
    logger.info('Begin load test data')
    test_dataloader = torch.utils.data.DataLoader(
        Datasets.MARSEvalDataset(folder=test_data_folder,
                                    image_name_file=test_name_path,
                                    transform=transform, tencrop=tencrop),
        batch_size=batch_size, shuffle=False,
        num_workers=workers, pin_memory=True)

    train_features = extract_features(model, train_dataloader, is_tencrop)
    test_features = extract_features(model, test_dataloader, is_tencrop)
    if os.path.isdir(extract_features_folder) is False:
        os.makedirs(extract_features_folder)

    sio.savemat(os.path.join(extract_features_folder, 'train_features.mat'), {'feature_train_new': train_features})
    sio.savemat(os.path.join(extract_features_folder, 'test_features.mat'), {'feature_test_new': test_features})
    return

예제 #16

0

파일 보기

파일: compare_models.py 프로젝트: appliedinnovation/fast-depth

def main(args):

    print("Loading config file: ", args.config)
    params = utils.load_config_file(args.config)
    params["dataset_paths"] = utils.format_dataset_path(
        params["dataset_paths"])
    if "nyu" not in params:
        params["nyu"] = False

    # Data loading code
    print("Creating data loaders...")
    if params["nyu"]:
        from dataloaders.nyu import NYUDataset
        val_dataset = NYUDataset(params["dataset_paths"], split='val')
    else:
        val_dataset = Datasets.FastDepthDataset(params["dataset_paths"],
                                                split='val',
                                                depth_min=params["depth_min"],
                                                depth_max=params["depth_max"],
                                                input_shape_model=(224, 224),
                                                random_crop=False)

    # set batch size to be 1 for validation
    data_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=1,
        shuffle=False,
        num_workers=params["num_workers"],
        pin_memory=True)

    # Set GPU
    params["device"] = torch.device(
        "cuda:{}".format(params["device"])
        if params["device"] >= 0 and torch.cuda.is_available() else "cpu")
    print("Using device", params["device"])

    print("Loading models...")
    models = []
    model_names = []
    for model_dict in params["models"]:
        model_names.append(Path(model_dict["model_path"]).stem)
        model, _ = utils.load_model(model_dict, model_dict["model_path"],
                                    params["device"])
        model.to(params["device"])
        models.append(model)

    # Create output directory
    output_directory = os.path.join(params["save_folder"],
                                    ".".join(model_names))
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    params["output_directory"] = output_directory
    print("Saving results to " + output_directory)

    compare_models(params, data_loader, models)

예제 #17

0

파일 보기

def train_teacher(args):
    

    result_file = '_'.join([str(v) for v in args]) + '.pickle'
    result_file = os.path.join(Conf.OUTPUT_DIR, result_file)
    if os.path.exists(result_file):
        fid = open(result_file, 'rb')
        result = pickle.load(fid)
        fid.close()
        return result


    prefix = args[0]
    dataset = args[1]
    weight_decay = args[2]


    data = Datasets.get_cifar100(batch_size=Conf.DENSENET_BS)

    train_loader, val_loader, test_loader = data

    nb_class = get_class_count(dataset)

    model = DenseNet.DenseNet121(nb_class)


    logfile = '_'.join([str(v) for v in args]) + '.pickle'
    logfile = os.path.join(Conf.LOG_DIR, logfile)

    compute = 'gpu' if torch.cuda.is_available() else 'cpu'
    
    if Conf.STAGE == 'test':
        LR = Conf.T_TEST_LR
        Epochs = Conf.T_TEST_EPOCH
        verbose = True
    else:
        LR = Conf.T_LR
        Epochs = Conf.T_EPOCH
        verbose = False
 

    outputs = Utility.train_classifier(logfile,
                                           model,
                                           compute,
                                           train_loader,
                                           val_loader,
                                           test_loader,
                                           LR,
                                           Epochs,
                                           weight_decay,
                                           verbose)
                                           

    
    return outputs

예제 #18

0

파일 보기

파일: RentalPrice.py 프로젝트: cz293/spatial-housing-model

  def __init__(self):
      rawData = ds.EHSinterview()
      
      incomeRent = rawData.loc[:,[self.incomeField,"rentwkx","bedrqx"]]   # Filter for fields of interest
      self.renterData = incomeRent[incomeRent["rentwkx"]>0] # filter out non renters
 #     self.renterData = self.renterData[self.renterData["bedrqx"]==4] # only consider one-bed
      # split the data into 2D histogram data
      self.population, self.xbins, self.ybins = np.histogram2d(
          np.log(self.renterData["rentwkx"].values),
          np.log(self.renterData[self.incomeField].values),
          bins=[40,30])

예제 #19

0

파일 보기

def ZooplaPriceChanges():
    total = 0
    pSame = 0
    priceMap = {}
    #    distribution = DiscountDistribution()
    data = ds.ZooplaMatchedDaily()
    #    store = pd.HDFStore('rawDaily.hd5',mode='w')
    #    for chunk in data.parser:
    chunk = data.read(1000)
    chunk.rename(columns={'\xef\xbb\xbfLISTING ID': 'LISTING ID'},
                 inplace=True)
    filteredchunk = chunk[chunk["MARKET"] == "SALE"][[
        'LISTING ID', 'DAY', 'PRICE'
    ]][chunk['PRICE'] > 0]
    for row in filteredchunk.values:
        currentState = priceMap.get(row[0])
        if currentState == None:
            priceMap[row[0]] = PriceCalc(row[1], row[2])
        else:
            startDay, endDay, percent = currentState.add(row[1], row[2])
            distribution.add(startDay, endDay, percent)

    # now get deletion dates
    delData = ds.ZooplaMatchedCollated()
    #    for chunk in delData.parser:
    chunk = delData.read(1000)
    chunk.rename(columns={'\xef\xbb\xbfLISTING ID': 'LISTING ID'},
                 inplace=True)
    filteredchunk = chunk[chunk["MARKET"] == "SALE"][['LISTING ID', 'DELETED']]
    for row in filteredchunk.values:
        currentState = priceMap.get(row[0])
        if currentState != None:
            if (currentState.currentprice == currentState.initialmarketprice):
                pSame += 1
            total += 1
            startDay, endDay, percent = currentState.add(row[1], 0)
            distribution.add(startDay, endDay, percent)
            priceMap.pop(row[0])
    print len(priceMap)
    print pSame, total, pSame * 1.0 / total
    plotProbability(distribution.dist)

예제 #20

0

파일 보기

파일: Evaluate.py 프로젝트: matangover/score-informed-Wave-U-Net

def read_scores(model_config, audio_length, score_filenames, pad_time_frames):
    scores = {
        source: Datasets.read_score(score_filenames[source], audio_length,
                                    model_config['expected_sr'])
        for source in model_config['separator_source_names']
    }
    padded_scores = {
        source: np.pad(score, [(pad_time_frames, pad_time_frames)],
                       mode="constant",
                       constant_values=0.0)
        for source, score in scores.items()
    }
    return padded_scores

예제 #21

0

파일 보기

def main(cfg, model_path, output_path, chorales_path):
    model_config = cfg["model_config"]
    print(f'Running prediction for model: {model_path}')
    print(f'Reading chorale mixtures from {chorales_path}')
    chorales = Datasets.getSynthesizedChorales(chorales_path,
                                               model_config["score_informed"])
    # Dataset.preprocess_dataset divides chorales into partitions, test partition starts after 320 chorales.
    test_chorales = chorales[320:]
    print('Saving outputs to: %s' % output_path)

    for i, chorale in enumerate(test_chorales):
        print(f'Predicting chorale {i+1} out of {len(test_chorales)}...')
        predict_chorale(model_config, chorale, model_path, output_path)

예제 #22

0

파일 보기

    def __init__(self, opts):
        self.opts = opts
        self.model_path = opts.model_path
        self.output_dir = opts.output_dir
        self.gpu_id = opts.gpu_id
        self.disp_module = opts.disp_module
        self.dataset = opts.dataset
        self.batch_size = opts.batch_size
        self.train = opts.train

        # The data loader
        # getting the dataloader ready
        if self.dataset == 'kitti':
            dataset = Datasets.KittiDataset(self.opts)
        elif self.dataset == 'nyu':
            dataset = Datasets.NYUDataset(self.opts)
        else:
            raise NameError('Dataset not found')
        self.DataLoader = data.DataLoader(dataset,
                                          batch_size=self.batch_size,
                                          shuffle=False,
                                          num_workers=8)
        print('Data loader made')

        # loading the model
        disp_module = importlib.import_module(self.disp_module)
        self.DispNet = disp_module.DispResNet()
        self.DispNet.load_state_dict(torch.load(self.model_path))
        if self.gpu_id is not None:
            self.device = torch.device('cuda:' + str(self.gpu_id[0]))
            self.DispNet = self.DispNet.to(self.device)
            if len(self.gpu_id) > 1:
                self.DispNet = nn.DataParallel(self.DispNet, self.gpu_id)
        else:
            self.device = torch.device('cpu')
        self.DispNet.eval()
        print('Model Loaded')

        self.start_test()

예제 #23

0

파일 보기

파일: Evaluate.py 프로젝트: jdavibedoya/SE_Wave-U-Net

def noisy(model_config, dataset):
    csv_file_name = model_config[
        "estimates_path"] + os.path.sep + "noisy" + "_" + dataset + ".csv"
    with open(csv_file_name, 'w') as csv_file:
        csv_writer = csv.writer(csv_file, delimiter=',')
        csv_writer.writerow(
            ["target_file", "input_file", "pesq", "lsd", "ssnr", "audio_len"])

        # Get test set
        if dataset == "VCTK":
            _, test = Datasets.get_VCTK(
                model_config["preprocessed_data_path"] + "/VCTK_8k_DBE",
                model_config["input"], model_config["target"])
        elif dataset == "DAPS":
            _, test = Datasets.get_DAPS(
                model_config["preprocessed_data_path"] + "/DAPS_8k_DBE",
                model_config["input"], model_config["target"])

        for sample in test:
            # Evaluate
            input_file_name = sample[model_config["input"]]
            print("Test file " + input_file_name)
            target_file_name = sample[model_config["target"]]
            pesq, lsd, ssnr, audio_len = Metrics.Eval(
                target_file_name, input_file_name, model_config['expected_sr'])
            csv_writer.writerow([
                target_file_name, input_file_name, pesq, lsd, ssnr, audio_len
            ])
            print('PESQ:{:.3f} LSD:{:.3f} SSNR:{:.3f}'.format(pesq, lsd, ssnr))

    results_df = pd.read_csv(csv_file_name,
                             usecols=["ssnr", "lsd", "pesq", "audio_len"])
    pesq, lsd, ssnr = (results_df[["pesq", "lsd", "ssnr"]].multiply(
        results_df["audio_len"], axis=0)).sum() / sum(results_df["audio_len"])
    print('Results -> PESQ:{:.3f} LSD:{:.3f} SSNR:{:.3f}'.format(
        pesq, lsd, ssnr))

예제 #24

0

파일 보기

파일: Bias.py 프로젝트: taskova/Imitate

 def plotBias(self):
     # plot the biased data set
     isInB = np.array([
         int(elem) for elem in [
             self.X_train[i] in self.X_b_train
             for i in range(len(self.X_train))
         ]
     ])
     fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(2, 2))
     plt.xticks([])
     plt.yticks([])
     # plt.title("Biased Dataset")
     plt.scatter(x=self.X_train[np.where(isInB == 0)[0]][:, 0],
                 y=self.X_train[np.where(isInB == 0)[0]][:, 1],
                 c=Datasets.colormap(self.Y_train[np.where(isInB == 0)[0]]),
                 alpha=0.05,
                 s=1)
     plt.scatter(x=self.X_train[np.nonzero(isInB)][:, 0],
                 y=self.X_train[np.nonzero(isInB)][:, 1],
                 c=Datasets.colormap(self.Y_train[np.nonzero(isInB)]),
                 alpha=0.3,
                 s=2)
     plt.show()
     return fig

예제 #25

0

파일 보기

def check_partition(partition, model_config):
    records_dir = Path(model_config['data_path']) / partition
    records_files = sorted(
        records_dir.glob('*.tfrecords'))  # type: Iterable[str]
    records_files = map(str, records_files)
    print('Partition: ' + partition)
    print('Parsing record files:\n\t' + '\n\t'.join(records_files))
    dataset = tf.data.TFRecordDataset(records_files)
    input_shape, _output_shape = Utils.get_separator_shapes(model_config)
    dataset = dataset.map(
        lambda x: Datasets.parse_record(x, input_shape[1:], model_config))

    for i, song in enumerate(dataset):
        print('\tSong %s: %s samples' % (i + 1, int(song['length'])))
    print('Total in %s partition: %s songs' % (partition, i + 1))

예제 #26

0

파일 보기

 def __init__(self):
     # datasets
     self.dataset = Datasets.Dataset(
         '/datasets/UCF101/jpegs_256',
         '/datasets/UCF101/UCF_list',
         '04',
         'spatial_dmd',
         30,
         24,
     )
     self.train_loader = self.dataset.get_loader('train')
     self.test_loader = self.dataset.get_loader('test')
     # networks
     self.net = Networks.Network(in_channels=13,
                                 out_classes=10,
                                 pretrain_path='./zoo/spatial_pretrain.pth')

예제 #27

0

파일 보기

파일: extract_dataset.py 프로젝트: matangover/score-informed-Wave-U-Net

def extract_batches(partition, model_config):
    output_dir = tempfile.mkdtemp()
    print(f'Output to {output_dir}')
    input_shape, _output_shape = Utils.get_separator_shapes(model_config)
    dataset = Datasets.get_dataset(model_config, input_shape, _output_shape,
                                   partition)
    dataset = dataset.take(2)
    dataset = dataset.apply(tf.data.experimental.unbatch())
    for snippet_index, snippet in enumerate(dataset):
        print('\tSnippet %s' % (snippet_index + 1))
        snippet_dir = Path(output_dir) / str(snippet_index)
        os.makedirs(snippet_dir)
        write_audio('mix', model_config, snippet_dir, snippet)
        for source in model_config['separator_source_names']:
            write_audio(source, model_config, snippet_dir, snippet)
            if model_config['score_informed']:
                write_score(source, model_config, snippet_dir, snippet)

예제 #28

0

파일 보기

파일: Pipeline_Methods.py 프로젝트: TheMrGhostman/Research-Task

def CreateDataFrame(Data, config):
    """
	Input:  Data 		 ... formát z load_dataset (Bunch)
			config      ... konfigurace

	Output: df           ... dataframe se všemi příznaky

	"""
    feat = FE.Features(config=config, normalize=True)
    X = feat.fit_transform(Data=Data.H_alpha)
    lab = dat.merge_labels(labels=Data.labels)

    X = np.hstack((X, lab.reshape(lab.shape[0], 1)))
    nm = feat.get_names(labels=True)

    df = pd.DataFrame(data=X, columns=nm)
    return df

예제 #29

0

파일 보기

파일: extract_dataset.py 프로젝트: matangover/score-informed-Wave-U-Net

def extract_partition(partition, model_config):
    output_dir = tempfile.mkdtemp()
    records_dir = Path(model_config['data_path']) / partition
    records_paths = sorted(records_dir.glob('*.tfrecords'))
    records_files = map(str, records_paths)
    print('Partition: ' + partition)
    print('Parsing record files:\n\t' + '\n\t'.join(records_files))
    dataset = tf.data.TFRecordDataset(records_files)
    input_shape, _output_shape = Utils.get_separator_shapes(model_config)
    dataset = dataset.map(
        lambda x: Datasets.parse_record(x, input_shape[1:], model_config))

    dataset = dataset.take(10)
    for song_index, song in enumerate(dataset):
        print('\tSong %s: %s samples' % (song_index + 1, int(song['length'])))
        os.makedirs('%s/%s' % (output_dir, song_index))
        for source in model_config['source_names'] + ['mix']:
            librosa.output.write_wav(
                '%s/%s/%s.wav' % (output_dir, song_index, source),
                tf.squeeze(song[source]).numpy(), model_config['expected_sr'])

예제 #30

0

파일 보기

파일: RentalPrice.py 프로젝트: rht/housing-model

 def __init__(self):
     # Bring raw data from Datasets class reader for the English Housing Survey data
     raw_data = ds.EHSinterview()
     # Filter for field rentwkx, total weekly rent payable (rent plus housing benefit)
     income_rent = raw_data.loc[:, [self.incomeField, "rentwkx"]]
     # Filter out non renters and unreasonably large weekly rent values
     self.renterData = income_rent[(income_rent["rentwkx"] > 0)
                                   & (income_rent["rentwkx"] < 50000)]
     # Filter out strings at rentwkx column
     self.renterData = self.renterData[self.renterData["rentwkx"].apply(
         lambda x: not isinstance(x, str))]
     # Cast rentwkx column values as numpy float64 type
     self.renterData = self.renterData.astype({"rentwkx": np.float64})
     # Split the data into a 2D histogram with logarithmic bins (no normalisation here as we want column
     # normalisation, to be introduced when plotting)
     self.population, self.xbins, self.ybins = np.histogram2d(
         np.log(self.renterData[self.incomeField].values),
         np.log(self.renterData["rentwkx"].values),
         bins=[30, 30])
     # Transpose the matrix as histogram2d returns a list of columns instead of a list of rows
     self.population = self.population.T

Python Datasets, modeldb 예제들