Beispiel #1
0
    def predict(arg):
        img_meta, img = arg
        h, w = img.shape[:2]
        s = patch_size
        # step = s // 2 - 32
        step = s - 64
        xs = list(range(0, w - s, step)) + [w - s]
        ys = list(range(0, h - s, step)) + [h - s]
        all_xy = [(x, y) for x in xs for y in ys]
        pred_img = np.zeros((utils.N_CLASSES + 1, h, w), dtype=np.float32)
        pred_count = np.zeros((h, w), dtype=np.int32)

        def make_batch(xy_batch_):
            return (xy_batch_, torch.stack([
                utils.img_transform(img[y: y + s, x: x + s]) for x, y in xy_batch_]))

        for xy_batch, inputs in utils.imap_fixed_output_buffer(
                make_batch, tqdm.tqdm(list(utils.batches(all_xy, batch_size))),
                threads=1):
            outputs = model(utils.variable(inputs, volatile=True))
            outputs_data = np.exp(outputs.data.cpu().numpy())
            for (x, y), pred in zip(xy_batch, outputs_data):
                pred_img[:, y: y + s, x: x + s] += pred
                pred_count[y: y + s, x: x + s] += 1
        pred_img /= np.maximum(pred_count, 1)
        return img_meta, pred_img
Beispiel #2
0
    def run(self, no_threads=1):
        logging.info('parsing xml...')
        self.parse_dict()
        entries_per_thread = (len(self.raw_dict) / no_threads) + 1
        self.thread_states = {}
        # may turn out to be less then "no_threads" with small input
        started_threads = 0
        for i, batch in enumerate(
                batches(self.raw_dict.keys(), entries_per_thread)):

            t = threading.Thread(target=self.process_entries_thread,
                                 args=(i, batch))
            t.start()
            started_threads += 1
        logging.info("started {0} threads".format(started_threads))
        while True:
            if len(self.thread_states) < started_threads:
                time.sleep(1)
                continue
            elif all(self.thread_states.values()):
                logging.info(
                    "{0} threads finished successfully".format(no_threads))
                break
            else:
                raise Exception("some threads failed")
Beispiel #3
0
def train_step(sess, model, train):
    if len(train) % parameters.batch_size == 0:
        batch_num = int(len(train) / parameters.batch_size)
    else:
        batch_num = int(len(train) / parameters.batch_size) + 1

    step = 0
    loss = .0

    batches = utils.batches(train, parameters.batch_size)
    for batch in batches:
        input_x, hamds, intervals, drugs, targets = batch
        _loss, _ = sess.run(
            [model.loss, model.optimizer],
            feed_dict={
                model.input_x: input_x,
                model.input_hamd: hamds,
                model.input_interval: intervals,
                model.input_drug: drugs,
                model.input_target: targets,
                model.dropout_rate: parameters.dropout_rate
            })
        loss += _loss
        step += 1
        sys.stdout.write("\033[F")
        sys.stdout.write("\033[K")
        print("Process Training Batch: [{}/{}]".format(step, batch_num))

    return loss
Beispiel #4
0
def task_1(sess, model, test):
    step = 0
    if len(test) % parameters.batch_size == 0:
        batch_num = int(len(test) / parameters.batch_size)
    else:
        batch_num = int(len(test) / parameters.batch_size) + 1

    targets = []
    inferences = []

    batches = utils.batches(test, parameters.batch_size)
    for batch in batches:
        input_x, hamds, intervals, drugs, _targets = batch
        inference = sess.run(model.inference,
                             feed_dict={
                                 model.input_x: input_x,
                                 model.input_hamd: hamds,
                                 model.input_interval: intervals,
                                 model.input_drug: drugs,
                                 model.dropout_rate: 1.0
                             })
        targets.append(_targets)
        inferences.append(inference)

    targets = np.concatenate(targets, 0)
    inferences = np.concatenate(inferences, 0)

    rms = math.sqrt(sum((targets - inferences)**2) / len(targets))

    return rms
Beispiel #5
0
def validation_loss():
    with torch.no_grad():
        val_losses = []
        val_h = net.blank_hidden(batch_size)
        for x, y in batches(X_val, Y_val, batch_size, seq_size):
            out_val, val_h = net(x, val_h)
            val_loss = F.cross_entropy(out_val.transpose(1,2), y)
            val_losses.append(val_loss)
        val_losses = torch.stack(val_losses)
    return val_losses
Beispiel #6
0
def get_track_features(tracks, throttle=False):
    all_tracks = []

    for track_batch in utils.batches(tracks, 50):
        track_ids = [track["id"] for track in track_batch]
        print(track_ids[0])  # just print something to know stuff is happening
        track_features = sp.audio_features(track_ids)
        all_tracks += track_features

        if throttle:
            sleep(1.0)

    return all_tracks
Beispiel #7
0
def train(epochs=20):
    '''
    Train the RNN and save the model

    Inputs
    ------
    epochs: the number of rounds we train on the whole dataset
    '''
    iters = 0
    for epoch in range(epochs):

        batch_num = 0
        losses = []
        h = net.blank_hidden(batch_size)
        for x, y in batches(X, Y, batch_size, seq_size):

            # use network predictions to compute loss
            h = tuple([state.detach() for state in h])
            out, h = net(x, h)
            loss = F.cross_entropy(out.transpose(1, 2), y)

            # optimization step
            opt.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(net.parameters(), grad_norm)
            opt.step()

            # print training progress
            progress(batch_num, num_batches, iters, epochs * num_batches,
                     epoch + 1)

            # bookkeeping
            losses.append(loss)
            batch_num += 1
            iters += 1

        # plot loss after every epoch
        plot(epoch + 1,
             torch.stack(losses),
             'Loss',
             'Training',
             '#5DE58D',
             refresh=False)
        plot(epoch + 1, validation_loss(), 'Loss', 'Validation', '#4AD2FF')

        # save the model occasionally
        if epoch % save_iter == save_iter - 1:
            save_model(net, filename, epoch + 1)

    # save model at the end of training
    save_model(net, filename, 'final')
Beispiel #8
0
def batch_lookup(ids, country="us"):
    """
    Look up many iTunes tracks by IDs.
    """

    session = requests.Session()

    for ids_batch in batches(ids, size=150):
        results = {}

        for result in lookup(ids_batch, country=country, session=session):
            type = result["wrapperType"]
            id = result.get(type + "Id") or result["trackId"]
            results[id] = result

        for id in ids_batch:
            yield (id, results.get(id))
def collect_artists(genre, num_artists=SEARCH_LIMIT):
    sp = utils.get_spotipy_instance()
    artists = []

    for batch in utils.batches(range(num_artists), SEARCH_LIMIT):
        search_results = sp.search(
            q=f"genre:{genre}",
            type="artist",
            offset=batch[0],
            limit=len(batch)
        )

        artists += search_results["artists"]["items"]
    
    return sorted(
        artists,
        reverse=True, 
        key=lambda artist: artist["popularity"]
    )
Beispiel #10
0
def train(epochs=20):
    reals, total, js, iters = 0, 0, 0, 0

    for epoch in range(epochs):

        batch_num = 0
        losses = []
        h = net.blank_hidden(batch_size)
        for x, y in batches(X, Y, batch_size, seq_size):

            # use network predictions to compute loss
            h = tuple([state.detach() for state in h])
            out, h = net(x, h)
            loss = F.cross_entropy(out.transpose(1,2), y)

            # optimization step
            opt.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(net.parameters(), grad_norm)
            opt.step()

            # print training progress
            progress(batch_num, num_batches, iters, epochs * num_batches, epoch)

            # bookkeeping
            losses.append(loss)
            batch_num += 1
            iters += 1

        # plot loss after every epoch
        plot(epoch, torch.stack(losses), 'Loss', 'Training', '#5DE58D', refresh=False)
        plot(epoch, validation_loss(), 'Loss', 'Validation', '#4AD2FF')

        plot(epoch, math.log(total+1), 'Words', '\'words\'', '#52d737')
        plot(epoch, math.log(reals+1), 'Words', 'real words', '#d437d7')
        plot(epoch, js, 'Zionism', 'instances', '#c12b2b')
        print(colored('\n\n' + smpl + '\n', 'cyan')) # print sample
Beispiel #11
0
    def run(self, no_threads=1):
        logging.info('parsing xml...')
        self.parse_dict()
        # print "\n".join(["\n".join(["{0}\t{1}".format(
        #                       w, d['definition']) for d in s['senses']])
        #                  for w, s in self.raw_dict.items()])
        # print self.raw_dict
        # sys.exit(-1)
        entries_per_thread = (len(self.raw_dict) / no_threads) + 1
        self.thread_states = {}
        # may turn out to be less then "no_threads" with small input
        started_threads = 0
        if ONE_BY_ONE:
            logging.warning('running threads one by one!')
        for i, batch in enumerate(
                batches(self.raw_dict.keys(), entries_per_thread)):

            if ONE_BY_ONE:
                logging.warning('running batch #{0}'.format(i))
                self.process_entries_thread(i, batch)
            else:
                t = threading.Thread(target=self.process_entries_thread,
                                     args=(i, batch))
                t.start()
            started_threads += 1
        logging.info("started {0} threads".format(started_threads))
        while True:
            if len(self.thread_states) < started_threads:
                time.sleep(1)
                continue
            elif all(self.thread_states.values()):
                logging.info(
                    "{0} threads finished successfully".format(no_threads))
                break
            else:
                raise Exception("some threads failed")
def _predict(arg, model, patch_size, batch_size, blobs_by_img_id,
             blob_scale_by_img_id):
    (path, scale), img = arg
    img_id = int(path.stem)
    h, w = img.shape[:2]
    s = patch_size // 2
    cls_blobs = blobs_by_img_id.get(img_id)
    if not cls_blobs or not any(cls_blobs):
        return (path, scale), None
    blob_scale = blob_scale_by_img_id[img_id]
    all_xy = [(cls, i, int(round(x * blob_scale * scale)),
               int(round(y * blob_scale * scale)))
              for cls, blobs in enumerate(cls_blobs)
              for i, (x, y, _) in enumerate(blobs)]

    def make_batch(xy_batch_):
        indices, patches = [], []
        for cls, i, x, y in xy_batch_:
            patch = img[max(0, y - s):y + s, max(0, x - s):x + s]
            if patch.shape[:2] == (patch_size, patch_size):
                patches.append(utils.img_transform(patch))
                indices.append((cls, i))
        patches = torch.stack(patches) if patches else None
        return indices, patches

    all_indices, all_outputs = [], []
    for indices, inputs in utils.imap_fixed_output_buffer(
            make_batch,
            tqdm.tqdm(list(utils.batches(all_xy, batch_size))),
            threads=1):
        if inputs is not None:
            outputs = model(utils.variable(inputs, volatile=True))
            outputs = F.softmax(outputs).data.cpu().numpy()
            all_indices.extend(indices)
            all_outputs.extend(outputs)
    return img_id, (all_indices, all_outputs)
Beispiel #13
0
    def run(self, no_threads=1):
        logging.info('parsing xml...')
        self.parse_dict()
        # print "\n".join(["\n".join(["{0}\t{1}".format(
        #                       w, d['definition']) for d in s['senses']])
        #                  for w, s in self.raw_dict.items()])
        # print self.raw_dict
        # sys.exit(-1)
        entries_per_thread = (len(self.raw_dict) / no_threads) + 1
        self.thread_states = {}
        # may turn out to be less then "no_threads" with small input
        started_threads = 0
        if ONE_BY_ONE:
            logging.warning('running threads one by one!')
        for i, batch in enumerate(batches(self.raw_dict.keys(),
                                  entries_per_thread)):

            if ONE_BY_ONE:
                logging.warning('running batch #{0}'.format(i))
                self.process_entries_thread(i, batch)
            else:
                t = threading.Thread(
                    target=self.process_entries_thread, args=(i, batch))
                t.start()
            started_threads += 1
        logging.info("started {0} threads".format(started_threads))
        while True:
            if len(self.thread_states) < started_threads:
                time.sleep(1)
                continue
            elif all(self.thread_states.values()):
                logging.info(
                    "{0} threads finished successfully".format(no_threads))
                break
            else:
                raise Exception("some threads failed")
Beispiel #14
0
                                            num_outputs=1200,
                                            activation_fn=tf.nn.tanh)

layer_2 = tf.contrib.layers.fully_connected(inputs=layer_1,
                                            num_outputs=600,
                                            activation_fn=tf.nn.tanh)

_out = tf.contrib.layers.fully_connected(inputs=layer_2,
                                         num_outputs=_y.shape[1],
                                         activation_fn=tf.nn.tanh)

cost = tf.reduce_mean(tf.pow(_out - y, 2))

optimizer = tf.train.AdamOptimizer().minimize(cost)

init_op = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init_op)

    for i in range(100):
        for batch_x, batch_y in utils.batches(_x, _y, 1000):
            _, c = sess.run([optimizer, cost],
                            feed_dict={
                                x: batch_x,
                                y: batch_y
                            })
            print(c)
        c = sess.run(cost, feed_dict={x: _x_t, y: _y_t})
        print('test: ', c)
Beispiel #15
0
def fit_quantiles(X,
                  y,
                  quantiles=0.5,
                  lossfn='marginal',
                  nepochs=100,
                  val_pct=0.1,
                  batch_size=None,
                  target_batch_pct=0.01,
                  min_batch_size=20,
                  max_batch_size=100,
                  verbose=False,
                  lr=1e-1,
                  weight_decay=0.0,
                  patience=5,
                  init_model=None,
                  splits=None,
                  file_checkpoints=True,
                  clip_gradients=False,
                  **kwargs):
    if file_checkpoints:
        import uuid
        tmp_file = '/tmp/tmp_file_' + str(uuid.uuid4())

    if batch_size is None:
        batch_size = min(
            X.shape[0],
            max(
                min_batch_size,
                min(max_batch_size,
                    int(np.round(X.shape[0] * target_batch_pct)))))
        if verbose:
            print('Auto batch size chosen to be {}'.format(batch_size))

    # Standardize the features and response (helps with gradient propagation)
    Xmean = X.mean(axis=0, keepdims=True)
    Xstd = X.std(axis=0, keepdims=True)
    Xstd[Xstd == 0] = 1  # Handle constant features
    ymean, ystd = y.mean(axis=0, keepdims=True), y.std(axis=0, keepdims=True)
    tX = autograd.Variable(torch.FloatTensor((X - Xmean) / Xstd),
                           requires_grad=False)
    tY = autograd.Variable(torch.FloatTensor((y - ymean) / ystd),
                           requires_grad=False)

    # Create train/validate splits
    if splits is None:
        indices = np.arange(X.shape[0], dtype=int)
        np.random.shuffle(indices)
        train_cutoff = int(np.round(len(indices) * (1 - val_pct)))
        train_indices = indices[:train_cutoff]
        validate_indices = indices[train_cutoff:]
    else:
        train_indices, validate_indices = splits

    if np.isscalar(quantiles):
        quantiles = np.array([quantiles])
    if lossfn == 'geometric':
        quantiles = 2 * quantiles - 1
    tquantiles = autograd.Variable(torch.FloatTensor(quantiles),
                                   requires_grad=False)

    # Initialize the model
    model = QuantileNetworkModule(
        Xmean, Xstd, ymean, ystd,
        quantiles.shape[0]) if init_model is None else init_model

    # Save the model to file
    if file_checkpoints:
        torch.save(model, tmp_file)
    else:
        import pickle
        model_str = pickle.dumps(model)

    # Setup the SGD method
    optimizer = optim.SGD(model.parameters(),
                          lr=lr,
                          weight_decay=weight_decay,
                          nesterov=True,
                          momentum=0.9)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.5)

    # Track progress
    train_losses, val_losses, best_loss = np.zeros(nepochs), np.zeros(
        nepochs), None
    num_bad_epochs = 0

    if verbose:
        print('ymax and min:', tY.max(), tY.min())

    # Univariate quantile loss
    def quantile_loss(yhat, tidx):
        z = tY[tidx, None] - yhat
        return torch.max(tquantiles[None] * z, (tquantiles[None] - 1) * z)

    # Marginal quantile loss for multivariate response
    def marginal_loss(yhat, tidx):
        z = tY[tidx, :, None] - yhat
        return torch.max(tquantiles[None, None] * z,
                         (tquantiles[None, None] - 1) * z)

    # Geometric quantile loss -- uses a Euclidean unit ball definition of multivariate quantiles
    def geometric_loss(yhat, tidx):
        z = tY[tidx, :, None] - yhat
        return torch.norm(z, dim=1) + (z * tquantiles[None, None]).sum(dim=1)

    # Create the quantile loss function
    if len(tY.shape) == 1 or tY.shape[1] == 1:
        lossfn = quantile_loss
    elif lossfn == 'marginal':
        print('Using marginal loss')
        lossfn = marginal_loss
    elif lossfn == 'geometric':
        print('Using geometric loss')
        lossfn = geometric_loss

    # Train the model
    for epoch in range(nepochs):
        if verbose:
            print('\t\tEpoch {}'.format(epoch + 1))
            sys.stdout.flush()

        # Track the loss curves
        train_loss = torch.Tensor([0])
        for batch_idx, batch in enumerate(
                batches(train_indices, batch_size, shuffle=True)):
            if verbose and (batch_idx % 100 == 0):
                print('\t\t\tBatch {}'.format(batch_idx))
            tidx = autograd.Variable(torch.LongTensor(batch),
                                     requires_grad=False)

            # Set the model to training mode
            model.train()

            # Reset the gradient
            model.zero_grad()

            # Run the model and get the predicted quantiles
            yhat = model(tX[tidx])

            # Loss for all quantiles
            loss = lossfn(yhat, tidx).mean()

            # Calculate gradients
            loss.backward()

            # Clip the gradients
            if clip_gradients:
                clip_gradient(model)

            # Apply the update
            # [p for p in model.parameters() if p.requires_grad]
            optimizer.step()

            # Track the loss
            train_loss += loss.data

            if np.isnan(loss.data.numpy()):
                import warnings
                warnings.warn('NaNs encountered in training model.')
                break

        validate_loss = torch.Tensor([0])
        for batch_idx, batch in enumerate(
                batches(validate_indices, batch_size, shuffle=False)):
            if verbose and (batch_idx % 100 == 0):
                print('\t\t\tValidation Batch {}'.format(batch_idx))
            tidx = autograd.Variable(torch.LongTensor(batch),
                                     requires_grad=False)

            # Set the model to test mode
            model.eval()

            # Reset the gradient
            model.zero_grad()

            # Run the model and get the conditional mixture weights
            yhat = model(tX[tidx])

            # Track the loss
            validate_loss += lossfn(yhat, tidx).sum()

        train_losses[epoch] = train_loss.data.numpy() / float(
            len(train_indices))
        val_losses[epoch] = validate_loss.data.numpy() / float(
            len(validate_indices))

        # Adjust the learning rate down if the validation performance is bad
        if num_bad_epochs > patience:
            if verbose:
                print('Decreasing learning rate to {}'.format(lr * 0.5))
            scheduler.step(val_losses[epoch])
            lr *= 0.5
            num_bad_epochs = 0

        # If the model blew up and gave us NaNs, adjust the learning rate down and restart
        if np.isnan(val_losses[epoch]):
            if verbose:
                print(
                    'Network went to NaN. Readjusting learning rate down by 50%'
                )
            if file_checkpoints:
                os.remove(tmp_file)
            return fit_quantiles(X,
                                 y,
                                 quantiles=quantiles,
                                 lossfn=lossfn,
                                 nepochs=nepochs,
                                 val_pct=val_pct,
                                 batch_size=batch_size,
                                 target_batch_pct=target_batch_pct,
                                 min_batch_size=min_batch_size,
                                 max_batch_size=max_batch_size,
                                 verbose=verbose,
                                 lr=lr * 0.5,
                                 weight_decay=weight_decay,
                                 patience=patience,
                                 init_model=init_model,
                                 splits=splits,
                                 file_checkpoints=file_checkpoints,
                                 **kwargs)

        # Check if we are currently have the best held-out log-likelihood
        if epoch == 0 or val_losses[epoch] <= best_loss:
            if verbose:
                print(
                    '\t\t\tSaving test set results.      <----- New high water mark on epoch {}'
                    .format(epoch + 1))
            # If so, use the current model on the test set
            best_loss = val_losses[epoch]
            if file_checkpoints:
                torch.save(model, tmp_file)
            else:
                import pickle
                model_str = pickle.dumps(model)
        else:
            num_bad_epochs += 1

        if verbose:
            print('Validation loss: {} Best: {}'.format(
                val_losses[epoch], best_loss))

    # Load the best model and clean up the checkpoints
    if file_checkpoints:
        model = torch.load(tmp_file)
        os.remove(tmp_file)
    else:
        import pickle
        model = pickle.loads(model_str)

    # Return the conditional density model that marginalizes out the grid
    return model
Beispiel #16
0
    def train(self,
              model_fn=None,
              lasso=0.,
              l2=1e-4,
              lr=3e-4,
              num_epochs=250,
              batch_size=None,
              num_folds=3,
              val_pct=0.1,
              verbose=False,
              folds=None,
              weight_decay=0.01,
              random_restarts=1,
              save_dir='/tmp/',
              momentum=0.9,
              patience=3,
              clip_gradients=None):
        # Make sure we have a model of the prior
        if model_fn is None:
            model_fn = lambda nfeatures: DeepAdaptiveFDRModeler(nfeatures)

        # Lasso penalty (if any)
        lasso = autograd.Variable(torch.FloatTensor([lasso]),
                                  requires_grad=False)
        l2 = autograd.Variable(torch.FloatTensor([l2]), requires_grad=False)

        if batch_size is None:
            batch_size = int(
                max(10, min(100, np.round(self.X.shape[0] / 100.))))
            print('Batch size: {}'.format(batch_size))

        # Discrete approximation of a beta PDF support
        tbeta_grid = autograd.Variable(torch.FloatTensor(self.beta_grid),
                                       requires_grad=False)
        sys.stdout.flush()
        # Split the data into a bunch of cross-validation folds
        if folds is None:
            if verbose:
                print('\tCreating {} folds'.format(num_folds))
                sys.stdout.flush()
            folds = create_folds(self.X, k=num_folds)
        self.priors = np.zeros((self.nsamples, 2), dtype=float)
        self.models = []
        train_losses, val_losses = np.zeros(
            (len(folds), random_restarts, num_epochs)), np.zeros(
                (len(folds), random_restarts, num_epochs))
        epochs_per_fold = np.zeros(len(folds))
        for fold_idx, test_indices in enumerate(folds):
            # Create train/validate splits
            mask = np.ones(self.nsamples, dtype=bool)
            mask[test_indices] = False
            indices = np.arange(self.nsamples, dtype=int)[mask]
            np.random.shuffle(indices)
            train_cutoff = int(np.round(len(indices) * (1 - val_pct)))
            train_indices = indices[:train_cutoff]
            validate_indices = indices[train_cutoff:]
            torch_test_indices = autograd.Variable(
                torch.LongTensor(test_indices), requires_grad=False)
            best_loss = None

            # Try re-initializing a few times
            for restart in range(random_restarts):
                model = model_fn(self.nfeatures)

                # Setup the optimizers
                # optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay, momentum=momentum)
                # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=patience)
                optimizer = optim.RMSprop(model.parameters(),
                                          lr=lr,
                                          weight_decay=weight_decay)
                # optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
                # Train the model
                for epoch in range(num_epochs):
                    if verbose:
                        print('\t\tRestart {} Fold {} Epoch {}'.format(
                            restart + 1, fold_idx + 1, epoch + 1))
                        sys.stdout.flush()

                    train_loss = torch.Tensor([0])
                    for batch_idx, batch in enumerate(
                            batches(train_indices, batch_size, shuffle=False)):
                        if verbose and (batch_idx % 100 == 0):
                            print('\t\t\tBatch {}'.format(batch_idx))
                        tidx = autograd.Variable(torch.LongTensor(batch),
                                                 requires_grad=False)

                        # Set the model to training mode
                        model.train()

                        # Reset the gradient
                        model.zero_grad()

                        # Run the model and get the prior predictions
                        concentrations = model(self.tX[tidx])

                        # Calculate the loss as the negative log-likelihood of the data
                        # Use a beta prior for the treatment effect
                        prior_dist = torch.distributions.Beta(
                            concentrations[:, 0:1], concentrations[:, 1:2])

                        # Discretize the (0,1) interval to approximate the beta PDF
                        prior_probs = prior_dist.log_prob(tbeta_grid).exp()
                        prior_probs = prior_probs / prior_probs.sum(
                            dim=1, keepdim=True)

                        # Calculate the loss
                        posterior_probs = (((1 - tbeta_grid) * self.tP0[tidx] +
                                            tbeta_grid * self.tP1[tidx]) *
                                           prior_probs).sum(dim=1)
                        loss = -posterior_probs.log().mean()

                        # L1 penalty to shrink c and be more conservative
                        regularized_loss = loss + lasso * concentrations.mean(
                        ) + l2 * (concentrations**2).mean()

                        # Update the model with gradient clipping for stability
                        regularized_loss.backward()

                        # Clip the gradients if need-be
                        if clip_gradients is not None:
                            torch.nn.utils.clip_grad_norm(
                                model.parameters(), clip_gradients)

                        # Apply the update
                        [p for p in model.parameters() if p.requires_grad]
                        optimizer.step()

                        # Track the loss
                        train_loss += loss.data

                    validate_loss = torch.Tensor([0])
                    for batch_idx, batch in enumerate(
                            batches(validate_indices, batch_size)):
                        if verbose and (batch_idx % 100 == 0):
                            print(
                                '\t\t\tValidation Batch {}'.format(batch_idx))
                        tidx = autograd.Variable(torch.LongTensor(batch),
                                                 requires_grad=False)

                        # Set the model to test mode
                        model.eval()

                        # Reset the gradient
                        model.zero_grad()

                        # Run the model and get the prior predictions
                        concentrations = model(self.tX[tidx])

                        # Calculate the loss as the negative log-likelihood of the data
                        # Use a beta prior for the treatment effect
                        prior_dist = torch.distributions.Beta(
                            concentrations[:, 0:1], concentrations[:, 1:2])

                        # Discretize the (0,1) interval to approximate the beta PDF
                        prior_probs = prior_dist.log_prob(tbeta_grid).exp()
                        prior_probs = (prior_probs / prior_probs.sum(
                            dim=1, keepdim=True)).clamp(1e-8, 1 - 1e-8)

                        # Calculate the loss
                        posterior_probs = (((1 - tbeta_grid) * self.tP0[tidx] +
                                            tbeta_grid * self.tP1[tidx]) *
                                           prior_probs).sum(dim=1).clamp(
                                               1e-8, 1 - 1e-8)
                        loss = -posterior_probs.log().sum()

                        # Track the loss
                        validate_loss += loss.data

                    train_losses[fold_idx, restart,
                                 epoch] = train_loss.numpy() / float(
                                     len(train_indices))
                    val_losses[fold_idx, restart,
                               epoch] = validate_loss.numpy() / float(
                                   len(validate_indices))

                    # # Adjust the learning rate down if the validation performance is bad
                    # scheduler.step(val_losses[fold_idx, epoch])

                    # Check if we are currently have the best held-out log-likelihood
                    if verbose:
                        print('Validation loss: {} Best: {}'.format(
                            val_losses[fold_idx, restart, epoch], best_loss))
                    if (restart == 0 and epoch == 0
                        ) or val_losses[fold_idx, restart, epoch] <= best_loss:
                        if verbose:
                            print(
                                '\t\t\tSaving test set results.      <----- New high water mark for fold {} on epoch {}'
                                .format(fold_idx + 1, epoch + 1))
                        # If so, use the current model on the test set
                        best_loss = val_losses[fold_idx, restart, epoch]
                        epochs_per_fold[fold_idx] = epoch + 1
                        self.priors[test_indices] = model(
                            self.tX[torch_test_indices]).data.numpy()
                        torch.save(model,
                                   save_dir + '_fold{}.pt'.format(fold_idx))

                    if verbose:
                        means = self.priors[test_indices,
                                            0] / self.priors[test_indices].sum(
                                                axis=1)
                        print('Prior range: [{},{}]'.format(
                            means.min(), means.max()))
                        print('First 3:')
                        print(self.priors[test_indices][:3])

            # Reload the best model
            self.models.append(
                torch.load(save_dir + '_fold{}.pt'.format(fold_idx)))

        # Calculate the posterior probabilities
        if verbose:
            print('Calculating posteriors.')
            sys.stdout.flush()
        prior_grid = beta.pdf(self.beta_grid, self.priors[:, 0:1],
                              self.priors[:, 1:2])
        prior_grid /= prior_grid.sum(axis=1, keepdims=True)
        post0 = self.P0 * (1 - self.beta_grid)
        post1 = self.P1 * self.beta_grid
        self.posteriors = ((post1 / (post0 + post1)) * prior_grid).sum(axis=1)
        self.posteriors = self.posteriors.clip(1e-8, 1 - 1e-8)

        if verbose:
            print('Calculating predictions at a {:.2f}% FDR threshold'.format(
                self.fdr * 100))
            sys.stdout.flush()
        self.predictions = calc_fdr(self.posteriors, self.fdr)

        if verbose:
            print('Finished training.')
            sys.stdout.flush()

        self.folds = folds

        return {
            'train_losses': train_losses,
            'validation_losses': val_losses,
            'priors': self.priors,
            'posteriors': self.posteriors,
            'predictions': self.predictions,
            'models': self.models,
            'folds': folds
        }
Beispiel #17
0
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=100)
        writer = tf.summary.FileWriter(logdir + '/train', sess.graph)

        if restore:
            chkpt = tf.train.latest_checkpoint(checkpointdir)
            if chkpt:
                print('restoring checkpoint: {}'.format(chkpt))
                saver.restore(sess, chkpt)

        prints('Training Begins!')

        for epochNum in range(epochs):
            prints('Starting epoch {}'.format(epochNum + 1))
            batchNum = 0

            for batch, labels, denseLabels in utils.batches():
                batchNum += 1
                iteration = (batchSize * (epochNum)) + batchNum

                feed = {
                    x: batch,
                    y: labels,
                    yDense: denseLabels,
                    lr: learningRate,
                    pkeep: pkeepi,
                    pkeepConv: pkeepConvi,
                    pkeepLSTM: pkeepLSTMi,
                    tst: False
                }
                feedema = {
                    x: batch,
    def train(self, model_fn,
                    bandwidth=2., kernel_scale=0.35, variance=0.02,
                    mvn_train_samples=5, mvn_validate_samples=105,
                    validation_samples=1000,
                    validation_burn=1000,
                    validation_mcmc_samples=1000,
                    validation_thin=1,
                    lr=3e-4, num_epochs=10, batch_size=100,
                    val_pct=0.1, nfolds=5, folds=None,
                    learning_rate_decay=0.9, weight_decay=0.,
                    clip=None, group_lasso_penalty=0.,
                    save_dir='tmp/',
                    checkpoint=False,
                    target_fold=None):
        print('\tFitting model using {} folds and training for {} epochs each'.format(nfolds, num_epochs))
        torch_Y = autograd.Variable(torch.FloatTensor(self.Y), requires_grad=False)
        torch_lam_grid = autograd.Variable(torch.FloatTensor(self.lam_grid), requires_grad=False)
        torch_lam_weights = autograd.Variable(torch.FloatTensor(self.lam_weights), requires_grad=False)
        torch_c = autograd.Variable(torch.FloatTensor(self.c[:,np.newaxis,np.newaxis]), requires_grad=False)
        torch_obs = autograd.Variable(torch.FloatTensor(self.obs_mask), requires_grad=False)
        torch_dose_idxs = [autograd.Variable(torch.LongTensor(
                                np.arange(d+(d**2 - d)//2, (d+1)+((d+1)**2 - (d+1))//2)), requires_grad=False)
                                for d in range(self.ndoses)]

        # Use a fixed kernel
        Sigma = np.array([kernel_scale*(np.exp(-0.5*(i - np.arange(self.ndoses))**2 / bandwidth**2)) for i in np.arange(self.ndoses)]) + variance*np.eye(self.ndoses) # squared exponential kernel
        L = np.linalg.cholesky(Sigma)[np.newaxis,np.newaxis,:,:]

        # Use a fixed set of noise draws for validation
        Z = np.random.normal(size=(self.Y_shape[0], mvn_validate_samples, self.ndoses, 1))
        validate_noise = autograd.Variable(torch.FloatTensor(np.matmul(L, Z)[:,:,:,0]), requires_grad=False)

        self.folds = folds if folds is not None else create_folds(self.Y_shape[0], nfolds)
        nfolds = len(self.folds)
        self.fold_validation_indices = []
        self.prior_mu = np.full(self.Y_shape, np.nan, dtype=float)
        self.prior_Sigma = np.zeros((nfolds, self.ndoses, self.ndoses))
        self.train_losses, self.val_losses = np.zeros((nfolds,num_epochs)), np.zeros((nfolds,num_epochs))
        self.epochs_per_fold = np.zeros(nfolds, dtype=int)
        self.models = [None for _ in range(nfolds)]
        for fold_idx, test_indices in enumerate(self.folds):
            # Create train/validate splits
            mask = np.ones(self.Y_shape[0], dtype=bool)
            mask[test_indices] = False
            indices = np.arange(self.Y_shape[0], dtype=int)[mask]
            np.random.shuffle(indices)
            train_cutoff = int(np.round(len(indices)*(1-val_pct)))
            train_indices = indices[:train_cutoff]
            validate_indices = indices[train_cutoff:]
            torch_test_indices = autograd.Variable(torch.LongTensor(test_indices), requires_grad=False)
            self.fold_validation_indices.append(validate_indices)

            # If we are only training one specific fold, skip all the rest
            if target_fold is not None and target_fold != fold_idx:
                continue

            if checkpoint:
                self.load_checkpoint(save_dir, fold_idx)

            if self.models[fold_idx] is None:
                self.models[fold_idx] = model_fn()

            model = self.models[fold_idx]

            # Setup the optimizers
            # optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay, momentum=0.9)
            optimizer = optim.RMSprop(model.parameters(), lr=lr, weight_decay=weight_decay)
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3)
            for epoch in range(self.epochs_per_fold[fold_idx], num_epochs):
                print('\t\tFold {} Epoch {}'.format(fold_idx+1,epoch+1))
                train_loss = torch.Tensor([0])
                for batch_idx, batch in enumerate(batches(train_indices, batch_size)):
                    if batch_idx % 100 == 0:
                        print('\t\t\tBatch {}'.format(batch_idx))
                        sys.stdout.flush()

                    tidx = autograd.Variable(torch.LongTensor(batch), requires_grad=False)
                    Z = np.random.normal(size=(len(batch), mvn_train_samples, self.ndoses, 1))
                    noise = autograd.Variable(torch.FloatTensor(np.matmul(L, Z)[:,:,:,0]), requires_grad=False)

                    # Set the model to training mode
                    model.train()

                    # Reset the gradient
                    model.zero_grad()

                    # Run the model and get the prior predictions
                    mu = model(batch, tidx)

                    #### Calculate the loss as the negative log-likelihood of the data ####
                    # Get the MVN draw as mu + L.T.dot(Z)
                    beta = mu.view(-1,1,self.ndoses) + noise

                    # Logistic transform on the log-odds prior sample
                    tau = 1 / (1. + (-beta).exp())

                    # Poisson noise model for observations
                    rates = tau[:,:,:,None] * torch_lam_grid[tidx,None,:,:] + torch_c[tidx,None,:,:]
                    likelihoods = torch.distributions.Poisson(rates)

                    # Get log probabilities of the data and filter out the missing observations
                    loss = -(logsumexp(likelihoods.log_prob(torch_Y[tidx][:,None,:,None]) + torch_lam_weights[tidx][:,None,:,:], dim=-1).mean(dim=1) * torch_obs[tidx]).mean()

                    if group_lasso_penalty > 0:
                        loss += group_lasso_penalty * torch.norm(model.cell_line_features.weight, 2, 0).mean()

                    # Update the model
                    loss.backward()
                    if clip is not None:
                        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
                        for p in model.parameters():
                            p.data.add_(-lr, p.grad.data)
                    else:
                        optimizer.step()

                    train_loss += loss.data

                validate_loss = torch.Tensor([0])
                for batch_idx, batch in enumerate(batches(validate_indices, batch_size, shuffle=False)):
                    if batch_idx % 100 == 0:
                        print('\t\t\tValidation Batch {}'.format(batch_idx))
                        sys.stdout.flush()
                    
                    tidx = autograd.Variable(torch.LongTensor(batch), requires_grad=False)
                    noise = validate_noise[tidx]

                    # Set the model to training mode
                    model.eval()

                    # Reset the gradient
                    model.zero_grad()

                    # Run the model and get the prior predictions
                    mu = model(batch, tidx)

                    #### Calculate the loss as the negative log-likelihood of the data ####
                    # Get the MVN draw as mu + L.T.dot(Z)
                    beta = mu.view(-1,1,self.ndoses) + noise

                    # Logistic transform on the log-odds prior sample
                    tau = 1 / (1. + (-beta).exp())

                    # Poisson noise model for observations
                    rates = tau[:,:,:,None] * torch_lam_grid[tidx,None,:,:] + torch_c[tidx,None,:,:]
                    likelihoods = torch.distributions.Poisson(rates)

                    # Get log probabilities of the data and filter out the missing observations
                    loss = -(logsumexp(likelihoods.log_prob(torch_Y[tidx][:,None,:,None]) + torch_lam_weights[tidx][:,None,:,:], dim=-1).mean(dim=1) * torch_obs[tidx]).sum()

                    validate_loss += loss.data

                self.train_losses[fold_idx, epoch] = train_loss.numpy() / float(len(train_indices))
                self.val_losses[fold_idx, epoch] = validate_loss.numpy() / float(len(validate_indices))

                # Adjust the learning rate down if the validation performance is bad
                scheduler.step(self.val_losses[fold_idx, epoch])

                # Check if we currently have the best held-out log-likelihood
                if epoch == 0 or np.argmin(self.val_losses[fold_idx, :epoch+1]) == epoch:
                    print('\t\t\tNew best score: {}'.format(self.val_losses[fold_idx,epoch]))
                    print('\t\t\tSaving test set results.')
                    # If so, use the current model on the test set
                    mu = model(test_indices, torch_test_indices)
                    self.prior_mu[test_indices] = mu.data.numpy()
                    self.save_fold(save_dir, fold_idx)
                
                cur_mu = self.prior_mu[test_indices]
                print('First 10 data points: {}'.format(test_indices[:10]))
                print('First 10 prior means:')
                print(pretty_str(ilogit(cur_mu[:10])))
                print('Prior mean ranges:')
                for dose in range(self.ndoses):
                    print('{}: {} [{}, {}]'.format(dose,
                                                   ilogit(cur_mu[:,dose].mean()),
                                                   np.percentile(ilogit(cur_mu[:,dose]), 5),
                                                   np.percentile(ilogit(cur_mu[:,dose]), 95)))
                print('Best model score: {} (epoch {})'.format(np.min(self.val_losses[fold_idx,:epoch+1]), np.argmin(self.val_losses[fold_idx, :epoch+1])+1))
                print('Current score: {}'.format(self.val_losses[fold_idx, epoch]))
                print('')

                self.epochs_per_fold[fold_idx] += 1
                
                # Update the save point if needed
                if checkpoint:
                    self.save_checkpoint(save_dir, fold_idx, model)
                    sys.stdout.flush()
                
            
            # Reload the best model
            tmp = model.cell_features
            self.load_fold(save_dir, fold_idx)
            self.models[fold_idx].cell_features = tmp

            print('Finished fold {}. Estimating covariance matrix using elliptical slice sampler with max {} samples.'.format(fold_idx+1, validation_samples))
            validate_subset = np.random.choice(validate_indices, validation_samples, replace=False) if len(validate_indices) > validation_samples else validate_indices
            tidx = autograd.Variable(torch.LongTensor(validate_subset), requires_grad=False)
                        
            # Set the model to training mode
            self.models[fold_idx].eval()

            # Reset the gradient
            self.models[fold_idx].zero_grad()

            # Run the model and get the prior predictions
            mu_validate = self.models[fold_idx](validate_subset, tidx).data.numpy()
            
            # Run the slice sampler to get the covariance and data log-likelihoods
            Y_validate = self.Y[validate_subset].astype(int)
            Y_validate[self.obs_mask[validate_subset] == 0] = -1
            (Beta_samples,
                Sigma_samples,
                Loglikelihood_samples) = posterior_ess_Sigma(Y_validate,
                                                             mu_validate,
                                                             self.a[validate_subset],
                                                             self.b[validate_subset],
                                                             self.c[validate_subset],
                                                             Sigma=Sigma,
                                                             nburn=validation_burn,
                                                             nsamples=validation_mcmc_samples,
                                                             nthin=validation_thin,
                                                             print_freq=1)

            # Save the result
            self.prior_Sigma[fold_idx] = Sigma_samples.mean(axis=0)
            print('Last sample:')
            print(pretty_str(Sigma_samples[-1]))
            print('Mean:')
            print(pretty_str(self.prior_Sigma[fold_idx]))

            if checkpoint:
                self.clean_checkpoint(save_dir, fold_idx)

        print('Finished training.')
        
        return {'train_losses': self.train_losses,
                'validation_losses': self.val_losses,
                'mu': self.prior_mu,
                'Sigma': self.prior_Sigma,
                'models': self.models}
    model.sess.run(tf.assign(model.lr, learning_rate * lr_decay**i))
    print "learning_rate:{}".format(model.lr.eval())
    c0, c1, c2 = model.istate_cell0.c.eval(), model.istate_cell1.c.eval(
    ), model.istate_cell2.c.eval()
    h0, h1, h2 = model.istate_cell0.h.eval(), model.istate_cell1.h.eval(
    ), model.istate_cell2.h.eval()
    kappa = np.zeros((model.batch_size, model.mixture_comps, 1))
    for b in range(global_step % no_batches, no_batches):
        a = i * no_batches + b
        if global_step is not 0:
            a += 1
            global_step = 0
        if a % save_batches == 0 and (a > 0):
            model.saver.save(model.sess, save_path, global_step=a)

        x, y, s, c = batches(model.batch_size, training_data, Y, sentences,
                             model.char_steps, model.alphabets)
        my_feed_dict = {
            model.input_data: x,
            model.target_Data: y,
            model.char_sequence: c,
            model.init_kappa: kappa,
            model.istate_cell0.c: c0,
            model.istate_cell1.c: c1,
            model.istate_cell2.c: c2,
            model.istate_cell0.h: h0,
            model.istate_cell1.h: h1,
            model.istate_cell2.h: h2
        }

        [training_loss, _] = model.sess.run([model.cost, model.train_ops],
                                            my_feed_dict)
Beispiel #20
0
drcf = model.DRCF(EMBEDDING_DIM, RNN_STEP, len(user2id), len(venue2id),
                  SAMPLE_NUM)
drcf = nn.DataParallel(drcf).cuda()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, drcf.parameters()),
                       lr=LEARNING_RATE)
criterion = nn.LogSigmoid()

for i in xrange(EPOCHS):
    # Training

    drcf.train()
    step = 0
    loss = .0
    batch_num = int(len(train) / BATCH_SIZE) + 1

    batches = utils.batches(train, BATCH_SIZE, SAMPLE_NUM, venue_frequency)
    for batch in batches:
        user, candidate, checkins, samples = batch
        input_user = Variable(torch.cuda.LongTensor(user))
        input_candidate = Variable(torch.cuda.LongTensor(candidate))
        input_checkins = Variable(torch.cuda.LongTensor(checkins))
        input_samples = Variable(torch.cuda.LongTensor(samples))

        # Optimizing
        optimizer.zero_grad()
        _loss = -criterion(
            drcf(input_user, input_candidate, input_checkins,
                 input_samples)).sum()
        _loss.backward()
        optimizer.step()
        loss += _loss.cpu().data.numpy()[0]