Exemple #1
0
def main():
    """ Execution logic """
    x_0, y_0 = 0.5, 2.2
    segment_end = 5.5
    step = 0.1

    x_nodes_list, y_nodes_list = [], []
    for accuracy in range(2, 5):
        x_nodes, y_nodes = runge_kutta_interpolation(given_function, x_0, y_0,
                                                     segment_end, step,
                                                     accuracy)
        x_nodes_list.append(x_nodes)
        y_nodes_list.append(y_nodes)

    display_plot_async(x_nodes_list, y_nodes_list)

    print('Runge-Kutta interpolation results')
    print(tp.header(['X', 'Y', 'Y2', 'Y3', 'Y4']))
    for x, y, y2, y3, y4 in zip(
            x_nodes_list[0], [explicit_solution(x) for x in x_nodes_list[0]],
            y_nodes_list[0], y_nodes_list[1], y_nodes_list[2]):
        print(tp.row([
            x,
            y,
            y2,
            y3,
            y4,
        ]))
Exemple #2
0
        def minimize(self, f_df, x0, display=sys.stdout, maxiter=1e3):

            self.display = display
            self.theta = x0

            # setup
            xk = self.algorithm.send(destruct(x0).copy())
            store = defaultdict(list)
            runtimes = []
            if len(self.operators) == 0:
                self.operators = [proxops.identity()]

            # setup
            obj, grad = wrap(f_df, x0)
            transform = compose(destruct, *reversed(self.operators), self.restruct)

            self.optional_print(tp.header(['Iteration', 'Objective', '||Grad||', 'Runtime']))
            try:
                for k in count():

                    # setup
                    tstart = perf_counter()
                    f = obj(xk)
                    df = grad(xk)
                    xk = transform(self.algorithm.send(df))
                    runtimes.append(perf_counter() - tstart)
                    store['f'].append(f)

                    # Update display
                    self.optional_print(tp.row([k,
                                                f,
                                                np.linalg.norm(destruct(df)),
                                                tp.humantime(runtimes[-1])]))

                    if k >= maxiter:
                        break

            except KeyboardInterrupt:
                pass

            self.optional_print(tp.bottom(4))

            # cleanup
            self.optional_print(u'\u279b Final objective: {}'.format(store['f'][-1]))
            self.optional_print(u'\u279b Total runtime: {}'.format(tp.humantime(sum(runtimes))))
            self.optional_print(u'\u279b Per iteration runtime: {} +/- {}'.format(
                tp.humantime(np.mean(runtimes)),
                tp.humantime(np.std(runtimes)),
            ))

            # result
            return OptimizeResult({
                'x': self.restruct(xk),
                'f': f,
                'df': self.restruct(df),
                'k': k,
                'obj': np.array(store['f']),
            })
Exemple #3
0
    def update_display(self, iteration, disp_level, col_width=12):
        """
        Prints information about the optimization procedure to standard output

        Parameters
        ----------
        iteration : int
            The current iteration. Must either a positive integer or -1, which indicates the end of the algorithm

        disp_level : int
            An integer which controls how much information to display, ranging from 0 (nothing) to 3 (lots of stuff)

        col_width : int
            The width of each column in the data table, used if disp_level > 1

        """

        # exit and print nothing if disp_level is zero
        if disp_level == 0:
            return

        else:

            # simple update, no table
            if disp_level == 1 and iteration >= 0:
                print('[Iteration %i]' % iteration)

            # fancy table updates
            if disp_level > 1:

                # get the metadata from this iteration
                data = self.metadata.tail(1).irow(0).to_dict()

                # choose what keys to use
                keys = ['Time (s)', 'Primal resid', 'Dual resid']
                if disp_level > 2:
                    keys += ['Momentum (rho)', 'Primal runtimes']

                # initial update. print out table headers
                if iteration == 1:
                    hr = tableprint.hr(len(keys), column_width=col_width)
                    print('\n' + hr)
                    print(tableprint.header(keys, column_width=col_width))
                    print(hr)

                # print data
                tabledata = map(lambda d: float(d) if d.size == 1 else ', '.join(map(lambda i: '{:.2f}'.format(i), d)),
                                [np.array(data[key]) for key in keys])
                print(tableprint.row(tabledata, column_width=col_width, precision='4g'))

                if iteration == -1:
                    print(tableprint.hr(len(keys), column_width=col_width) + '\n')

            # print convergence statement
            if iteration == -1 and self.converged:
                print('Converged after %i iterations!' % len(self.metadata))
Exemple #4
0
def sample(data_path: str,
           encoder_path: str,
           vocab_path: str,
           sample_length: int = 30,
           output: str = None):
    dump = torch.load(encoder_path, map_location=lambda storage, loc: storage)
    encodermodel = dump['encodermodel']
    decodermodel = dump['decodermodel']
    # Some scaler (sklearn standardscaler)
    scaler = dump['scaler']
    # Also load previous training config
    config_parameters = dump['config']

    vocab = torch.load(vocab_path)
    print(encodermodel)
    print(decodermodel)
    # load images from previous
    encodermodel = encodermodel.to(DEVICE).eval()
    decodermodel = decodermodel.to(DEVICE).eval()

    kaldi_string = parsecopyfeats(data_path,
                                  **config_parameters['feature_args'])
    width_length = sample_length * 4
    with stdout_or_file(output) as writer:
        writer.write(
            tp.header(["InputUtterance", "Output Sentence"],
                      style='grid',
                      width=width_length))
        writer.write('\n')
        for k, features in kaldi_io.read_mat_ark(kaldi_string):
            features = scaler.transform(features)
            # Add single batch dimension
            features = torch.from_numpy(features).to(DEVICE).unsqueeze(0)
            # Generate an caption embedding
            encoded_feature, hiddens = encodermodel(features)
            sampled_ids = decodermodel.sample(encoded_feature,
                                              states=hiddens,
                                              maxlength=sample_length)
            # (1, max_seq_length) -> (max_seq_length)
            sampled_ids = sampled_ids[0].cpu().numpy()

            # Convert word_ids to words
            sampled_caption = []
            for word_id in sampled_ids:
                word = vocab.idx2word[word_id]
                sampled_caption.append(word)
                if word == '<end>':
                    break
            sentence = ''.join(sampled_caption)

            # Print out the image and the generated caption
            writer.write(
                tp.row([k, sentence], style='grid', width=width_length))
            writer.write('\n')
            writer.flush()
        writer.write(tp.bottom(2, style='grid', width=width_length))
Exemple #5
0
    def update_display(self,
                       iteration,
                       disp_level,
                       col_width=12):  # pragma: no cover
        """
        Prints information about the optimization procedure to standard output

        Parameters
        ----------
        iteration : int
            The current iteration. Must either a positive integer or -1, which indicates the end of the algorithm

        disp_level : int
            An integer which controls how much information to display, ranging from 0 (nothing) to 3 (lots of stuff)

        col_width : int
            The width of each column in the data table, used if disp_level > 1
        """

        # exit and print nothing if disp_level is zero
        if disp_level == 0:
            return

        else:

            # simple update, no table
            if disp_level == 1 and iteration >= 0:
                print('[Iteration %i]' % iteration)

            # fancy table updates
            if disp_level > 1:

                # get the metadata from this iteration
                data = valmap(last, self.metadata)

                # choose what keys to use
                keys = ['Time (s)', 'Primal resid', 'Dual resid', 'rho']

                # initial update. print out table headers
                if iteration == 1:
                    print(tableprint.header(keys, width=col_width))

                # print data
                print(
                    tableprint.row([data[k] for k in keys],
                                   width=col_width,
                                   format_spec='4g'))

                if iteration == -1:
                    print(tableprint.bottom(len(keys), width=col_width) + '\n')

            # print convergence statement
            if iteration == -1 and self.converged:
                print('Converged after %i iterations!' %
                      len(self.metadata['Primal resid']))
Exemple #6
0
def buildTable(a, b, c, d):
    n = len(a)
    print(tp.header(['i', 'ai', 'bi', 'ci', 'di'], 10))
    num = 1
    for i in range(1, n - 1):
        print(tp.row([num, "{:.3f}".format(a[i]), "{:.3f}".format(b[i]), \
            "{:.3f}".format(c[i]), "{:.3f}".format(d[i-1])], 10))
        num += 1
    print(tp.row([num, "{:.3f}".format(a[n-1]), "{:.3f}".format(b[0]), \
        "{:.3f}".format(c[n-1]), "{:.3f}".format(d[n-1])], 10))
    print(tp.bottom(5, 10))
Exemple #7
0
def _display_result(sample_mean, sample_variance, standard_deviation):
    print('\n')
    print(tp.header([
        'sample mean',
        'sample variance',
        'standard deviation',
    ]))
    print(tp.row([
        sample_mean,
        sample_variance,
        standard_deviation,
    ]))
Exemple #8
0
def _printfields(fields, print_header=True):
    names, vals = [], []
    for name, val, _ in fields:
        names.append(name)
        if val is None:
            # Display None as empty
            vals.append('')
        else:
            vals.append(val)
    # Print header
    if print_header:
        print(tableprint.header(names, width=11))
    # Print value row
    print(tableprint.row(vals, width=11))
Exemple #9
0
def geocode_crime_data(data_entries):
    """ Geocodes a list of crime data entries """

    print(tableprint.header(['City Name', 'Lat', 'Long'], width=30))
    for entry in data_entries:
        coords = geocode_city_name(entry['city_name'])
        entry['coords'] = coords

        # Add a delay for API limiting
        time.sleep(0.1)

    print(tableprint.bottom(3, width=30))

    return data_entries
Exemple #10
0
def run_explicit_euler_method():
    """ Run implicit euler interpolation calculating """
    step = 0.05
    x_0, y_0 = 0, 1
    segment_end = 1

    x_nodes, y_nodes = explicit_euler_interpolation(given_function,
                                                    x_0,
                                                    y_0,
                                                    segment_start=x_0,
                                                    segment_end=segment_end,
                                                    step=step)
    display_plot_async(x_nodes, y_nodes, "Explicit Eulerian interpolation")

    print('\nExplicit Euler method results')
    print(tp.header([
        'X',
        'Y',
        'Interpolated',
    ]))
    for x, y in zip(x_nodes, y_nodes):
        print(tp.row([x, onhand_solution(x), y]))
Exemple #11
0
def _display_result(observable_chi, critical_chi):
    print('\nhypothesis of a normal distribution')
    print(tp.header(['Observable chi', 'Critical chi', ]))
    print(tp.row([observable_chi, critical_chi, ]))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "serial_number",
        type=int,
        help=
        "the 10-digit serial number found under the magnetic backplate of your Wave Plus",
    )
    parser.add_argument(
        "--sample-period",
        type=int,
        default=300,
        help=
        "the number of seconds between reading the current values. Default: %(default)s",
    )
    parser.add_argument("--pipe",
                        action="store_true",
                        help="pipe the results to a file")
    parser.add_argument(
        "--statusbar",
        action="store_true",
        help="print air quality status suitable for statusbar",
    )
    parser.add_argument(
        "--mac-addr",
        help="the MAC address of the Wave Plus device",
    )
    args = parser.parse_args()

    if len(str(args.serial_number)) != 10:
        print("ERROR: Invalid SN format.")
        parser.print_usage()
        sys.exit(1)

    if args.sample_period <= 0:
        print("ERROR: Invalid SAMPLE-PERIOD. Must be larger than zero.")
        parser.print_usage()
        sys.exit(1)

    try:
        waveplus = WavePlus(args.serial_number, args.mac_addr)

        header = [
            "Humidity",
            "Radon ST avg",
            "Radon LT avg",
            "Temperature",
            "Pressure",
            "CO2 level",
            "VOC level",
        ]

        if args.pipe:
            print(*header, sep=",")
        elif not args.statusbar:
            print(tableprint.header(header, width=TABLEPRINT_WIDTH))

        while True:
            waveplus.connect()
            sensors = waveplus.read()

            data = {var: sensors.get_variable(var) for var in VARIABLES}
            if args.statusbar:
                statusbar_print(data)
                sys.exit(0)

            if args.pipe:
                print(*data.values(), sep=",")
            else:
                print(
                    tableprint.row(list(map(str, data.values())),
                                   width=TABLEPRINT_WIDTH))

            waveplus.disconnect()

            time.sleep(args.sample_period)

    finally:
        waveplus.disconnect()
Exemple #13
0
    def sample(self,
               experiment_path: str,
               feature_file: str,
               feature_scp: str,
               output: str = "output_word.txt",
               **kwargs):
        """Generate captions given experiment model"""
        """kwargs: {'max_length': int, 'method': str, 'beam_size': int}"""
        import tableprint as tp

        dump = torch.load(os.path.join(experiment_path, "saved.pth"),
                          map_location="cpu")
        # Load previous training config
        config = dump["config"]

        vocab_size = len(torch.load(config["vocab_file"]))
        model = self._get_model(config, vocab_size)
        model.load_state_dict(dump["model"])
        # Some scaler (sklearn standardscaler)
        scaler = dump["scaler"]
        vocabulary = torch.load(config["vocab_file"])
        zh = config["zh"]
        model = model.to(self.device)
        dataset = SJTUDatasetEval(feature=feature_file,
                                  eval_scp=feature_scp,
                                  transform=scaler.transform)
        dataloader = torch.utils.data.DataLoader(dataset,
                                                 shuffle=False,
                                                 collate_fn=collate_fn((1, )),
                                                 batch_size=16,
                                                 num_workers=0)

        width_length = 80
        pbar = ProgressBar(persist=False, ascii=True)
        writer = open(os.path.join(experiment_path, output), "w")
        writer.write(
            tp.header(["InputUtterance", "Output Sentence"],
                      width=[len("InputUtterance"), width_length]))
        writer.write('\n')

        sentences = []

        def _sample(engine, batch):
            # batch: [keys, feats, feat_lens]
            with torch.no_grad():
                model.eval()
                keys = batch[0]
                output = self._forward(model, batch, mode="sample", **kwargs)
                seqs = output["seqs"].cpu().numpy()
                for idx, seq in enumerate(seqs):
                    caption = self._convert_idx2sentence(seq,
                                                         vocabulary,
                                                         zh=zh)
                    if zh:
                        sentence = " ".join(caption)
                    else:
                        sentence = caption
                    writer.write(
                        tp.row([keys[idx], sentence],
                               width=[len("InputUtterance"), width_length]) +
                        "\n")
                    sentences.append(sentence)

        sample_engine = Engine(_sample)
        pbar.attach(sample_engine)
        sample_engine.run(dataloader)
        writer.write(
            tp.bottom(2, width=[len("InputUtterance"), width_length]) + "\n")
        writer.write("Unique sentence number: {}\n".format(len(
            set(sentences))))
        writer.close()
    for serial in device_serials:
        waveplus_devices.append(WavePlus(int(serial)))

    if (Mode == 'terminal'):
        print("\nPress ctrl+C to exit program\n")

    # print "Device serial number: %s" %(SerialNumber)

    header = [
        'Timestamp', 'Sensor Serial Number', 'Humidity - %rH',
        'Radon ST avg - Bq/m3', 'Radon LT avg - Bq/m3', 'T - degC', 'P - hPa',
        'CO2 - ppm', 'VOC - ppb'
    ]

    if (Mode == 'terminal'):
        print(tableprint.header(header, width=12))
    elif (Mode == 'pipe'):
        print(header)
    elif (Mode == 'file'):
        file = open(outfile, 'a+')
        file.write(','.join(header) + "\n")
        file.close()

    while True:
        for waveplus in waveplus_devices:
            tries = 0
            while tries <= MaxRetries:
                try:
                    tries = tries + 1
                    connected = waveplus.connect()
                    if not connected:
Exemple #15
0
def on_training_started(engine, outputfun=sys.stdout.write, header=[]):
    outputfun("<== Training Started ==>")
    for line in tp.header(header, style="grid").split("\n"):
        outputfun(line)
def main(config='config/train.yaml', **kwargs):
    """Trains a model on the given features and vocab.

    :config: A training configuration. Note that all parameters in the config can also be manually adjusted with --ARG VALUE
    :returns: None
    """

    config_parameters = parse_config_or_kwargs(config, **kwargs)
    outputdir = os.path.join(
        config_parameters['outputpath'],
        config_parameters['model'],
        datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S-%f'))
    try:
        os.makedirs(outputdir)
    except IOError:
        pass
    logger = genlogger(outputdir, 'train.log')
    logger.info("Storing data at: {}".format(outputdir))
    logger.info("<== Passed Arguments ==>")
    # Print arguments into logs
    for line in pformat(config_parameters).split('\n'):
        logger.info(line)

    # seed setting
    seed=config_parameters['seed'] # 1~5
    np.random.seed(seed)
    torch.manual_seed(seed)

    kaldi_string = parsecopyfeats(
        config_parameters['features'], **config_parameters['feature_args'])

    scaler = getattr(pre, config_parameters['scaler'])(**config_parameters['scaler_args'])
    logger.info("<== Estimating Scaler ({}) ==>".format(scaler.__class__.__name__))
    inputdim = -1
    for kid, feat in kaldi_io.read_mat_ark(kaldi_string):
        scaler.partial_fit(feat)
        inputdim = feat.shape[-1]
    assert inputdim > 0, "Reading inputstream failed"
    logger.info(
        "Features: {} Input dimension: {}".format(
            config_parameters['features'],
            inputdim))
    
    logger.info("<== Labels ==>")
    # Can be label, DAT, DADA ... default is 'label'
    target_label_name = config_parameters.get('label_type', 'label')
    if target_label_name == 'label':
        label_df = pd.read_csv(config_parameters['labels'], sep=' ', names=['speaker', 'filename', 'physical', 'system', 'label'])
    else: # 'DAT' or 'DADA'
        label_df = pd.read_csv(config_parameters['labels'], sep=' ', names=['speaker', 'filename', 'physical', 'system', 'label', 'domain'])
    label_encoder = pre.LabelEncoder()
    if target_label_name == 'label':
        label_encoder.fit(label_df[target_label_name].values.astype(str))
        # Labelencoder needs an iterable to work, so just put a list around it and fetch again the 0-th element ( just the encoded string )
        label_df['class_encoded'] = label_df[target_label_name].apply(lambda x: label_encoder.transform([x])[0])
        train_labels = label_df[['filename', 'class_encoded']].set_index('filename').loc[:, 'class_encoded'].to_dict()
    else: # 'DAT' or 'DADA'
        label_encoder_sub = pre.LabelEncoder()
        label_encoder.fit(label_df['label'].values.astype(str))
        label_df['lab_encoded'] = label_df['label'].apply(lambda x: label_encoder.transform([x])[0])
        label_encoder_sub.fit(label_df['domain'].values.astype(str))
        label_df['domain_encoded'] = label_df['domain'].apply(lambda x: label_encoder_sub.transform([x])[0])
        train_labels = label_df[['filename', 'lab_encoded', 'domain_encoded']].set_index('filename').to_dict('index')
        train_labels = {k:np.array(list(v.values())) for k, v in train_labels.items()}
        # outdomain
        outdomain = config_parameters['outdomain']
        outdomain_label = label_encoder_sub.transform([outdomain])[0]
        logger.info("Outdomain: {}, Outdomain label: {}".format(outdomain, outdomain_label))
    
    if target_label_name == 'label':
        train_dataloader, cv_dataloader = create_dataloader_train_cv(kaldi_string, train_labels, transform=scaler.transform, target_label_name=target_label_name, **config_parameters['dataloader_args'])
    else: #'DAT' or 'DADA' 
        outdomain_train_dataloader, indomain_train_dataloader, cv_dataloader = create_dataloader_train_cv(kaldi_string, train_labels, transform=scaler.transform, target_label_name=target_label_name, outdomain_label=outdomain_label, **config_parameters['dataloader_args'])

    if target_label_name == 'label':
        model = getattr(models, config_parameters['model'])(inputdim=inputdim, outputdim=len(label_encoder.classes_), **config_parameters['model_args'])
    else: # 'DAT' or 'DADA'
        model = getattr(models, config_parameters['model'])(inputdim=inputdim, outputdim1=len(label_encoder.classes_), outputdim2=len(label_encoder_sub.classes_), **config_parameters['model_args'])
    logger.info("<== Model ==>")
    for line in pformat(model).split('\n'):
        logger.info(line)
    optimizer = getattr(torch.optim, config_parameters['optimizer'])(model.parameters(), **config_parameters['optimizer_args'])

    scheduler = getattr(torch.optim.lr_scheduler, config_parameters['scheduler'])(optimizer, **config_parameters['scheduler_args'])
    criterion = getattr(loss, config_parameters['loss'])(**config_parameters['loss_args'])
    trainedmodelpath = os.path.join(outputdir, 'model.th')

    model = model.to(device)
    criterion_improved = criterion_improver(config_parameters['improvecriterion'])
    header = [
        'Epoch',
        'Lr',
        'Loss(T)',
        'Loss(CV)',
        "Acc(T)",
        "Acc(CV)",
    ]
    for line in tp.header(header, style='grid').split('\n'):
        logger.info(line)

    for epoch in range(1, config_parameters['epochs']+1):
        if target_label_name == 'label':
            train_loss, train_acc = runepoch(train_dataloader, None, model, criterion, target_label_name, optimizer, dotrain=True, epoch=epoch)
        else: # 'DAT' or 'DADA'
            train_loss, train_acc = runepoch(outdomain_train_dataloader, indomain_train_dataloader, model, criterion, target_label_name, optimizer, dotrain=True, epoch=epoch)
        cv_loss, cv_acc = runepoch(cv_dataloader, None, model, criterion, target_label_name, dotrain=False, epoch=epoch)
        logger.info(
            tp.row(
                (epoch,) + (optimizer.param_groups[0]['lr'],) +
                (str(train_loss), str(cv_loss), str(train_acc), str(cv_acc)),
                style='grid'))
        epoch_meanloss = cv_loss[0] if type(cv_loss)==tuple else cv_loss
        if epoch % config_parameters['saveinterval'] == 0:
            torch.save({'model': model,
                        'scaler': scaler,
                        'encoder': label_encoder,
                        'config': config_parameters},
                        os.path.join(outputdir, 'model_{}.th'.format(epoch)))
        # ReduceOnPlateau needs a value to work
        schedarg = epoch_meanloss if scheduler.__class__.__name__ == 'ReduceLROnPlateau' else None
        scheduler.step(schedarg)
        if criterion_improved(epoch_meanloss):
            torch.save({'model': model,
                        'scaler': scaler,
                        'encoder': label_encoder,
                        'config': config_parameters},
                        trainedmodelpath)
        if optimizer.param_groups[0]['lr'] < 1e-7:
            break
    logger.info(tp.bottom(len(header), style='grid'))
    logger.info("Results are in: {}".format(outputdir))
Exemple #17
0
 def on_epoch_begin(self, epoch, logs={}):
     tp.banner(f"Epoch {epoch}")
     print(tp.header(['iter', 'loss']))
Exemple #18
0
def train(model, experiment, monitor, num_epochs, augment=False):
    """Train the given network against the given data

    Parameters
    ----------
    model : keras.models.Model or glms.GLM
        A GLM or Keras Model object

    experiment : experiments.Experiment
        An Experiment object

    monitor : io.Monitor
        Saves the model parameters and plots of performance progress

    num_epochs : int
        Number of epochs to train for

    reduce_lr_every : int
        How often to reduce the learning rate

    reduce_rate : float
        A fraction (constant) to multiply the learning rate by

    """
    assert isinstance(model, (Model, GLM)), "'model' must be a GLM or Keras model"

    # initialize training iteration
    iteration = 0
    train_start = time()

    # loop over epochs
    try:
        for epoch in range(num_epochs):
            tp.banner('Epoch #{} of {}'.format(epoch + 1, num_epochs))
            print(tp.header(["Iteration", "Loss", "Runtime"]), flush=True)

            # loop over data batches for this epoch
            for X, y in experiment.train(shuffle=True):

                # update on save_every, assuming it is positive
                if (monitor is not None) and (iteration % monitor.save_every == 0):

                    # performs validation, updates performance plots, saves results to dropbox
                    monitor.save(epoch, iteration, X, y, model.predict)

                # train on the batch
                tstart = time()
                loss = model.train_on_batch({'stim':X, 'loss':y})[0]
                elapsed_time = time() - tstart

                # update
                iteration += 1
                print(tp.row([iteration, float(loss), tp.humantime(elapsed_time)]), flush=True)

            print(tp.bottom(3))

    except KeyboardInterrupt:
        print('\nCleaning up')

    # allows the monitor to perform any post-training visualization
    if monitor is not None:
        elapsed_time = time() - train_start
        monitor.cleanup(iteration, elapsed_time)

    tp.banner('Training complete!')
Exemple #19
0
def check_grad(f_df, xref, stepsize=1e-6, tol=1e-6, width=15, style='round', out=sys.stdout):
    """
    Compares the numerical gradient to the analytic gradient

    Parameters
    ----------
    f_df : function
        The analytic objective and gradient function to check

    x0 : array_like
        Parameter values to check the gradient at

    stepsize : float, optional
        Stepsize for the numerical gradient. Too big and this will poorly estimate the gradient.
        Too small and you will run into precision issues (default: 1e-6)

    tol : float, optional
        Tolerance to use when coloring correct/incorrect gradients (default: 1e-5)

    width : int, optional
        Width of the table columns (default: 15)

    style : string, optional
        Style of the printed table, see tableprint for a list of styles (default: 'round')
    """
    CORRECT = u'\x1b[32m\N{CHECK MARK}\x1b[0m'
    INCORRECT = u'\x1b[31m\N{BALLOT X}\x1b[0m'

    obj, grad = wrap(f_df, xref, size=0)
    x0 = destruct(xref)
    df = grad(x0)

    # header
    out.write(tp.header(["Numerical", "Analytic", "Error"], width=width, style=style) + "\n")
    out.flush()

    # helper function to parse a number
    def parse_error(number):

        # colors
        failure = "\033[91m"
        passing = "\033[92m"
        warning = "\033[93m"
        end = "\033[0m"
        base = "{}{:0.3e}{}"

        # correct
        if error < 0.1 * tol:
            return base.format(passing, error, end)

        # warning
        elif error < tol:
            return base.format(warning, error, end)

        # failure
        else:
            return base.format(failure, error, end)

    # check each dimension
    num_errors = 0
    for j in range(x0.size):

        # take a small step in one dimension
        dx = np.zeros(x0.size)
        dx[j] = stepsize

        # compute the centered difference formula
        df_approx = (obj(x0 + dx) - obj(x0 - dx)) / (2 * stepsize)
        df_analytic = df[j]

        # absolute error
        abs_error = np.linalg.norm(df_approx - df_analytic)

        # relative error
        error = abs_error if np.allclose(abs_error, 0) else abs_error / \
            (np.linalg.norm(df_analytic) + np.linalg.norm(df_approx))

        num_errors += error >= tol
        errstr = CORRECT if error < tol else INCORRECT
        out.write(tp.row([df_approx, df_analytic, parse_error(error) + ' ' + errstr],
                         width=width, style=style) + "\n")
        out.flush()

    out.write(tp.bottom(3, width=width, style=style) + "\n")
    return num_errors
Exemple #20
0
def main(features: str, vocab_file: str,
         config='config/trainconfig.yaml', **kwargs):
    """Trains a model on the given features and vocab.

    :features: str: Input features. Needs to be kaldi formatted file
    :vocab_file:str: Vocabulary generated by using build_vocab.py
    :config: A training configuration. Note that all parameters in the config can also be manually adjusted with --ARG=VALUE
    :returns: None
    """

    config_parameters = parse_config_or_kwargs(config, **kwargs)
    outputdir = os.path.join(
        config_parameters['encodermodel'] + '_' +
        config_parameters['decodermodel'],
        datetime.datetime.now().strftime('%Y-%m-%d_%H-%M'))
    try:
        os.makedirs(outputdir)
    except IOError:
        pass
    logger = genlogger(outputdir, 'train.log')
    logger.info("Storing data at: {}".format(outputdir))
    logger.info("<== Passed Arguments ==>")
    # Print arguments into logs
    for line in pformat(config_parameters).split('\n'):
        logger.info(line)

    kaldi_string = parsecopyfeats(
        features, **config_parameters['feature_args'])

    scaler = getattr(
        pre, config_parameters['scaler'])(
        **config_parameters['scaler_args'])
    inputdim = -1
    logger.info(
        "<== Estimating Scaler ({}) ==>".format(
            scaler.__class__.__name__))
    for kid, feat in kaldi_io.read_mat_ark(kaldi_string):
        scaler.partial_fit(feat)
        inputdim = feat.shape[-1]
    assert inputdim > 0, "Reading inputstream failed"
    vocabulary = torch.load(vocab_file)
    vocab_size = len(vocabulary)
    logger.info(
        "Features: {} Input dimension: {} Vocab Size: {}".format(
            features, inputdim, vocab_size))
    if 'load_pretrained' in config_parameters and config_parameters['load_pretrained']:
        encodermodeldump = torch.load(
            config_parameters['load_pretrained'],
            map_location=lambda storage, loc: storage)
        pretrainedmodel = encodermodeldump['encodermodel']
        encodermodel = models.PreTrainedCNN(
            inputdim=inputdim, pretrained_model=pretrainedmodel, **
            config_parameters['encodermodel_args'])
    else:
        encodermodel = getattr(
            models, config_parameters['encodermodel'])(
            inputdim=inputdim, **config_parameters['encodermodel_args'])
    decodermodel = getattr(
        models, config_parameters['decodermodel'])(
        vocab_size=vocab_size, **config_parameters['decodermodel_args'])
    logger.info("<== EncoderModel ==>")
    for line in pformat(encodermodel).split('\n'):
        logger.info(line)
    logger.info("<== DecoderModel ==>")
    for line in pformat(decodermodel).split('\n'):
        logger.info(line)

    params = list(encodermodel.parameters()) + list(decodermodel.parameters())

    train_dataloader, cv_dataloader = create_dataloader_train_cv(
        kaldi_string,
        config_parameters['captions_file'],
        vocab_file,
        transform=scaler.transform,
        **config_parameters['dataloader_args'])
    optimizer = getattr(
        torch.optim, config_parameters['optimizer'])(
        params,
        **config_parameters['optimizer_args'])

    scheduler = getattr(
        torch.optim.lr_scheduler,
        config_parameters['scheduler'])(
        optimizer,
        **config_parameters['scheduler_args'])
    criterion = torch.nn.CrossEntropyLoss()
    trainedmodelpath = os.path.join(outputdir, 'model.th')

    encodermodel = encodermodel.to(device)
    decodermodel = decodermodel.to(device)

    criterion_improved = criterion_improver(
        config_parameters['improvecriterion'])
    for line in tp.header(
        ['Epoch', 'MeanLoss(T)', 'StdLoss(T)', 'Loss(CV)', 'StdLoss(CV)',
         "Acc(T)", "Acc(CV)", "Forcing?"],
            style='grid').split('\n'):
        logger.info(line)
    teacher_forcing_ratio = config_parameters['teacher_forcing_ratio']
    for epoch in range(1, config_parameters['epochs']+1):
        use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
        train_loss_mean_std, train_acc = trainepoch(
            train_dataloader, encodermodel, decodermodel, criterion, optimizer,
            vocabulary, use_teacher_forcing)
        cv_loss_mean_std, cv_acc = sample_cv(
            cv_dataloader, encodermodel, decodermodel, criterion)
        logger.info(
            tp.row(
                (epoch,) + train_loss_mean_std + cv_loss_mean_std +
                (train_acc, cv_acc, use_teacher_forcing),
                style='grid'))
        epoch_meanloss = cv_loss_mean_std[0]
        if epoch % config_parameters['saveinterval'] == 0:
            torch.save({'encodermodel': encodermodel,
                        'decodermodel': decodermodel, 'scaler': scaler,
                        'config': config_parameters},
                       os.path.join(outputdir, 'model_{}.th'.format(epoch)))
        # ReduceOnPlateau needs a value to work
        schedarg = epoch_meanloss if scheduler.__class__.__name__ == 'ReduceLROnPlateau' else None
        scheduler.step(schedarg)
        if criterion_improved(epoch_meanloss):
            torch.save({'encodermodel': encodermodel,
                        'decodermodel': decodermodel, 'scaler': scaler,
                        'config': config_parameters},
                       trainedmodelpath)
        else:
            dump = torch.load(trainedmodelpath)
            encodermodel.load_state_dict(dump['encodermodel'].state_dict())
            decodermodel.load_state_dict(dump['decodermodel'].state_dict())
        if optimizer.param_groups[0]['lr'] < 1e-6:
            break
    logger.info(tp.bottom(8, style='grid'))
    # Sample results
    from sample import sample

    sample(
        data_path=features,
        encoder_path=trainedmodelpath,
        vocab_path=vocab_file,
        output=os.path.join(
            outputdir,
            'output_word.txt'))
Exemple #21
0
try:
    #---- Connect to device ----#
    wave = Wave(SerialNumber)
    wave.scan()

    if (Mode == 'terminal'):
        print "\nPress ctrl-C to exit program\n"

    print "Device serial number: %s" % (SerialNumber)

    header = [
        'Datetime', 'Humidity', 'Temperature', 'Radon ST avg', 'Radon LT avg'
    ]

    if (Mode == 'terminal'):
        print tableprint.header(header, width=20)
    elif (Mode == 'pipe'):
        print header

    while True:
        wave.connect()
        # read current values
        date_time = wave.read(SENSOR_IDX_DATETIME)
        humidity = wave.read(SENSOR_IDX_HUMIDITY)
        temperature = wave.read(SENSOR_IDX_TEMPERATURE)
        radon_st_avg = wave.read(SENSOR_IDX_RADON_ST_AVG)
        radon_lt_avg = wave.read(SENSOR_IDX_RADON_LT_AVG)

        data = [date_time, humidity, temperature, radon_st_avg, radon_lt_avg]

        # Print data
Exemple #22
0
    waveplus = WavePlus(SerialNumber)
    ledController = LedController(LedMode)

    if (Mode == 'terminal'):
        print("\nPress ctrl+C to exit program\n")

    print(f"Device serial number: {SerialNumber}")

    header = [
        'Date, time', 'Humidity', 'Radon ST avg', 'Radon LT avg',
        'Temperature', 'Pressure', 'CO2 level', 'VOC level'
    ]
    COLUMN_WIDTH = 12

    if (Mode == 'terminal'):
        print(tableprint.header(header, width=COLUMN_WIDTH))
    elif (Mode == 'pipe'):
        print(header)

    MAX_FAILURES = 5
    failure_count = 0

    while True:
        ledController.OnCommsStart()

        try:
            waveplus.connect()
            failure_count = 0
        except:
            ledController.OnCommsEnd()
            if failure_count < MAX_FAILURES:
Exemple #23
0
def check_grad(f_df,
               xref,
               stepsize=1e-6,
               tol=1e-6,
               width=15,
               style='round',
               out=sys.stdout):
    """
    Compares the numerical gradient to the analytic gradient

    Parameters
    ----------
    f_df : function
        The analytic objective and gradient function to check

    x0 : array_like
        Parameter values to check the gradient at

    stepsize : float, optional
        Stepsize for the numerical gradient. Too big and this will poorly estimate the gradient.
        Too small and you will run into precision issues (default: 1e-6)

    tol : float, optional
        Tolerance to use when coloring correct/incorrect gradients (default: 1e-5)

    width : int, optional
        Width of the table columns (default: 15)

    style : string, optional
        Style of the printed table, see tableprint for a list of styles (default: 'round')
    """
    CORRECT = u'\x1b[32m\N{CHECK MARK}\x1b[0m'
    INCORRECT = u'\x1b[31m\N{BALLOT X}\x1b[0m'

    obj, grad = wrap(f_df, xref, size=0)
    x0 = destruct(xref)
    df = grad(x0)

    # header
    out.write(
        tp.header(["Numerical", "Analytic", "Error"], width=width, style=style)
        + "\n")
    out.flush()

    # helper function to parse a number
    def parse_error(number):

        # colors
        failure = "\033[91m"
        passing = "\033[92m"
        warning = "\033[93m"
        end = "\033[0m"
        base = "{}{:0.3e}{}"

        # correct
        if error < 0.1 * tol:
            return base.format(passing, error, end)

        # warning
        elif error < tol:
            return base.format(warning, error, end)

        # failure
        else:
            return base.format(failure, error, end)

    # check each dimension
    num_errors = 0
    for j in range(x0.size):

        # take a small step in one dimension
        dx = np.zeros(x0.size)
        dx[j] = stepsize

        # compute the centered difference formula
        df_approx = (obj(x0 + dx) - obj(x0 - dx)) / (2 * stepsize)
        df_analytic = df[j]

        # absolute error
        abs_error = np.linalg.norm(df_approx - df_analytic)

        # relative error
        error = abs_error if np.allclose(abs_error, 0) else abs_error / \
            (np.linalg.norm(df_analytic) + np.linalg.norm(df_approx))

        num_errors += error >= tol
        errstr = CORRECT if error < tol else INCORRECT
        out.write(
            tp.row([df_approx, df_analytic,
                    parse_error(error) + ' ' + errstr],
                   width=width,
                   style=style) + "\n")
        out.flush()

    out.write(tp.bottom(3, width=width, style=style) + "\n")
    return num_errors
Exemple #24
0
        # shift = self.mean * (now - self.last_computed_sys_time)
        # tti[0] = tti[0] + shift
        # tti[1] = tti[1] + shift
        # tti[2] = tti[1] + shift
        tti.append(now)
        return tti


samples = NTPMonitor(5, sys.argv[1], 3, 300, .999999999)

width = 22
print(
    tableprint.header([
        'Last Offset (us)', 'Last Delay', 'Mean Offset (us)', 'RMS Offset',
        'Offset St. Dev. (us)', 'Interval Width (us)', 'Interval Lower',
        'Interval Upper'
    ],
                      style='clean',
                      width=width))

print(
    tableprint.row([
        samples.last.offset * 1e+6,
        np.average([x.offset for x in samples.data[-30:]]) * 1e6,
        samples.mean * 1e+6, samples.rms_offset * 1e6, samples.stdev * 1e+6,
        samples.tt_interval()[3] * 1e6,
        samples.tt_interval()[0],
        samples.tt_interval()[1]
    ],
                   width=width,
                   style='clean',
Exemple #25
0
try:
    #---- Initialize ----#
    waveplus = WavePlus(SerialNumber)

    if (Mode == 'terminal'):
        print "\nPress ctrl+C to exit program\n"

    print "Device serial number: %s" % (SerialNumber)

    header = [
        'Humidity', 'Radon ST avg', 'Radon LT avg', 'Temperature', 'Pressure',
        'CO2 level', 'VOC level'
    ]

    if (Mode == 'terminal'):
        print tableprint.header(header, width=12)
    elif (Mode == 'pipe'):
        print header

    while True:

        waveplus.connect()

        # read values
        sensors = waveplus.read()

        # extract
        humidity = str(sensors.getValue(SENSOR_IDX_HUMIDITY)) + " " + str(
            sensors.getUnit(SENSOR_IDX_HUMIDITY))
        radon_st_avg = str(
            sensors.getValue(SENSOR_IDX_RADON_SHORT_TERM_AVG)) + " " + str(
Exemple #26
0
 def start(self):
     print('\n'.join((self.hr,
                     tp.header(self.column_names, self.width),
                     self.hr)), flush=True)
Exemple #27
0
 def table_header(self, header, *args, **kwargs):
     self.write(tableprint.header(header, *args, **kwargs))
Exemple #28
0
def main(config='config/ReLU/0Pool/crnn_maxpool.yaml', **kwargs):
    """Trains a model on the given features and vocab.

    :features: str: Input features. Needs to be kaldi formatted file
    :config: A training configuration. Note that all parameters in the config can also be manually adjusted with --ARG=VALUE
    :returns: None
    """

    config_parameters = parse_config_or_kwargs(config, **kwargs)
    outputdir = os.path.join(
        config_parameters['outputpath'],
        config_parameters['model'],
        datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%f'))
    try:
        os.makedirs(outputdir)
    except IOError:
        pass
    logger = genlogger(outputdir, 'train.log')
    logger.info("Storing data at: {}".format(outputdir))
    logger.info("<== Passed Arguments ==>")
    # Print arguments into logs
    for line in pformat(config_parameters).split('\n'):
        logger.info(line)

    kaldi_string = parsecopyfeats(
        config_parameters['features'], **config_parameters['feature_args'])

    scaler = getattr(
        pre, config_parameters['scaler'])(
        **config_parameters['scaler_args'])
    inputdim = -1
    logger.info(
        "<== Estimating Scaler ({}) ==>".format(
            scaler.__class__.__name__))
    for kid, feat in kaldi_io.read_mat_ark(kaldi_string):
        scaler.partial_fit(feat)
        inputdim = feat.shape[-1]
    assert inputdim > 0, "Reading inputstream failed"
    logger.info(
        "Features: {} Input dimension: {}".format(
            config_parameters['features'],
            inputdim))
    logger.info("<== Labels ==>")
    label_df = pd.read_csv(config_parameters['labels'], sep='\t')
    label_df.event_labels = label_df.event_labels.str.split(',')
    label_df = label_df.set_index('filename')
    uniquelabels = list(np.unique(
        [item
         for row in label_df.event_labels.values
         for item in row]))
    many_hot_encoder = ManyHotEncoder(
        label_list=uniquelabels,
        time_resolution=1
    )
    label_df['manyhot'] = label_df['event_labels'].apply(
        lambda x: many_hot_encoder.encode(x, 1).data.flatten())

    utt_labels = label_df.loc[:, 'manyhot'].to_dict()

    train_dataloader, cv_dataloader = create_dataloader_train_cv(
        kaldi_string,
        utt_labels,
        transform=scaler.transform,
        **config_parameters['dataloader_args'])
    model = getattr(
        models,
        config_parameters['model'])(
        inputdim=inputdim,
        output_size=len(uniquelabels),
        **config_parameters['model_args'])
    logger.info("<== Model ==>")
    for line in pformat(model).split('\n'):
        logger.info(line)
    optimizer = getattr(
        torch.optim, config_parameters['optimizer'])(
        model.parameters(),
        **config_parameters['optimizer_args'])

    scheduler = getattr(
        torch.optim.lr_scheduler,
        config_parameters['scheduler'])(
        optimizer,
        **config_parameters['scheduler_args'])
    criterion = getattr(losses, config_parameters['loss'])(
        **config_parameters['loss_args'])

    trainedmodelpath = os.path.join(outputdir, 'model.th')

    model = model.to(device)
    criterion_improved = criterion_improver(
        config_parameters['improvecriterion'])
    header = [
        'Epoch',
        'UttLoss(T)',
        'UttLoss(CV)',
        "UttAcc(T)",
        "UttAcc(CV)",
        "mAUC(CV)"]
    for line in tp.header(
        header,
            style='grid').split('\n'):
        logger.info(line)

    poolingfunction_name = config_parameters['poolingfunction']
    pooling_function = parse_poolingfunction(poolingfunction_name)
    for epoch in range(1, config_parameters['epochs']+1):
        train_utt_loss_mean_std, train_utt_acc, train_auc_utt = runepoch(
            train_dataloader, model, criterion, optimizer, dotrain=True, poolfun=pooling_function)
        cv_utt_loss_mean_std, cv_utt_acc, cv_auc_utt = runepoch(
            cv_dataloader, model,  criterion, dotrain=False, poolfun=pooling_function)
        logger.info(
            tp.row(
                (epoch,) +
                (train_utt_loss_mean_std[0],
                 cv_utt_loss_mean_std[0],
                 train_utt_acc, cv_utt_acc, cv_auc_utt),
                style='grid'))
        epoch_meanloss = cv_utt_loss_mean_std[0]
        if epoch % config_parameters['saveinterval'] == 0:
            torch.save({'model': model,
                        'scaler': scaler,
                        'encoder': many_hot_encoder,
                        'config': config_parameters},
                       os.path.join(outputdir, 'model_{}.th'.format(epoch)))
        # ReduceOnPlateau needs a value to work
        schedarg = epoch_meanloss if scheduler.__class__.__name__ == 'ReduceLROnPlateau' else None
        scheduler.step(schedarg)
        if criterion_improved(epoch_meanloss):
            torch.save({'model': model,
                        'scaler': scaler,
                        'encoder': many_hot_encoder,
                        'config': config_parameters},
                       trainedmodelpath)
        if optimizer.param_groups[0]['lr'] < 1e-7:
            break
    logger.info(tp.bottom(len(header), style='grid'))
    logger.info("Results are in: {}".format(outputdir))
    return outputdir