Ejemplo n.º 1
0
def get_variables_with_prefix(prefix):
    var_list = ops.trainable_variables()
    new_list = []

    for var in var_list:
        if var.name.startswith(prefix):
            new_list.append(var)

    return new_list
Ejemplo n.º 2
0
def evaluate(args):
    option, params = load_model(args.model)
    model = build_model(**option)
    var_list = ops.trainable_variables()
    set_variables(var_list, params)

    if args.sntcost:
        for costs in evaluate_snt_cost(model, option, args.source, args.target,
                                       args.batch, args.normalize):
            for cost in costs:
                sys.stdout.write("{}\n".format(cost))
    else:
        # use the first model
        svocabs, tvocabs = model.option["vocabulary"]
        unk_symbol = model.option["unk"]
        eos_symbol = model.option["eos"]

        svocab, isvocab = svocabs
        tvocab, itvocab = tvocabs
        if args.align:
            inputs = [args.source, args.target[0], args.align]
        else:
            inputs = [args.source, args.target[0]]

        reader = textreader(inputs, False)
        stream = textiterator(reader, [args.batch, args.batch])

        for data in stream:
            xdata, xmask = convert_data(data[0], svocab, unk_symbol,
                                        eos_symbol)
            ydata, ymask = convert_data(data[1], tvocab, unk_symbol,
                                        eos_symbol)

            if not args.align:
                align = None
            else:
                align = convert_align(data[0], data[1], data[2])

            cost = evaluate_model(model,
                                  xdata,
                                  xmask,
                                  ydata,
                                  ymask,
                                  align,
                                  verbose=args.verbose)

            for i in range(len(cost)):
                if args.verbose:
                    sys.stdout.write("src: %s\n" % data[0][i])
                    sys.stdout.write("tgt: %s\n" % data[1][i])
                sys.stdout.write("cost: %f\n" % cost[i])

        stream.close()
Ejemplo n.º 3
0
def decode(args):
    option, params = load_model(args.model)
    model = rnnsearch(**option)
    set_variables(ops.trainable_variables(), params)

    # use the first model
    svocabs, tvocabs = model.option["vocabulary"]
    unk_sym = model.option["unk"]
    eos_sym = model.option["eos"]

    count = 0

    svocab, isvocab = svocabs
    tvocab, itvocab = tvocabs

    option = {}
    option["maxlen"] = args.maxlen
    option["minlen"] = args.minlen
    option["beamsize"] = args.beamsize
    option["normalize"] = args.normalize

    while True:
        line = sys.stdin.readline()

        if line == "":
            break

        data = [line]
        seq, mask = convert_data(data, svocab, unk_sym, eos_sym)
        t1 = time.time()
        tlist = beamsearch(model, seq, **option)
        t2 = time.time()

        if len(tlist) == 0:
            translation = ""
            score = -10000.0
        else:
            best, score = tlist[0]
            translation = " ".join(best[:-1])

        sys.stdout.write(translation)
        sys.stdout.write("\n")

        count = count + 1
        sys.stderr.write(str(count) + " ")
        sys.stderr.write(str(score) + " " + str(t2 - t1) + "\n")
Ejemplo n.º 4
0
def sample(args):
    option, values = load_model(args.model)
    model = build_model(**option)
    set_variables(ops.trainable_variables(), values)

    svocabs, tvocabs = model.option["vocabulary"]
    unk_symbol = model.option["unk"]
    eos_symbol = model.option["eos"]

    svocab, isvocab = svocabs
    tvocab, itvocab = tvocabs

    count = 0

    batch = args.batch

    while True:
        line = sys.stdin.readline()

        if line == "":
            break

        data = [line]
        seq, mask = convert_data(data, svocab, unk_symbol, eos_symbol)
        t1 = time.time()
        seq = numpy.repeat(seq, batch, 1)
        mask = numpy.repeat(mask, batch, 1)
        tlist = batchsample(model, seq, mask, maxlen=args.maxlen)
        t2 = time.time()

        count = count + 1

        if len(tlist) == 0:
            sys.stdout.write("\n")
        else:
            for i in range(min(args.batch, len(tlist))):
                example = tlist[i]
                sys.stdout.write(" ".join(example))
                sys.stdout.write("\n")

        sys.stderr.write(str(count) + " " + str(t2 - t1) + "\n")
Ejemplo n.º 5
0
def serialize(name, option, progress=None):
    # in order not to corrupt dumped file
    tfd, tname = tempfile.mkstemp()
    fd = open(tname, "wb")
    params = ops.trainable_variables()
    names = [p.name for p in params]
    vals = dict([(p.name, p.get_value()) for p in params])

    if progress:
        option["#progress"] = progress

    cPickle.dump(option, fd, cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(names, fd, cPickle.HIGHEST_PROTOCOL)
    # compress
    numpy.savez(fd, **vals)

    fd.close()
    os.close(tfd)
    if progress:
        del option["#progress"]
    shutil.move(tname, name)
Ejemplo n.º 6
0
def serialize(name, option):
    fd = open(name, "w")
    params = ops.trainable_variables()
    names = [p.name for p in params]
    vals = dict([(p.name, p.get_value()) for p in params])

    if option["indices"] != None:
        indices = option["indices"]
        vals["indices"] = indices
        option["indices"] = None
    else:
        indices = None

    cPickle.dump(option, fd)
    cPickle.dump(names, fd)
    # compress
    numpy.savez(fd, **vals)

    # restore
    if indices is not None:
        option["indices"] = indices

    fd.close()
Ejemplo n.º 7
0
    def __init__(self, inputs, outputs, cost, scopes, **option):
        """

        :param model:
        :param option:
        """
        if "variables" not in option or not option["variables"]:
            # not fine-tuning

            params = [
                param for scope in scopes
                for param in ops.trainable_variables(scope)
            ]
            # regularization_loss = ops.get_regularization_loss(scopes)
            # if regularization_loss:
            #     cost += regularization_loss
            # if option["l2_scale"]:
            #     get_l2 = ops.l2_regularizer(option["l2_scale"])
            #     cost += reduce(T.add, [get_l2(param) for param in params])

        else:
            pass
            # fine-tuning
            # _logger.debug("loading specified params")
            # params = option["variables"]

        grads = theano.grad(cost, params)

        gradsref = grads

        vec = [theano.shared(numpy.zeros_like(p.get_value())) for p in params]

        if "algorithm" not in option:
            option["algorithm"] = "sgd"

        if "variant" not in option:
            option["variant"] = None

        if "constraint" not in option:
            option["constraint"] = None

        if "momentum" not in option:
            option["momentum"] = False

        if "norm" not in option:
            option["norm"] = True

        if "nesterov" not in option:
            option["nesterov"] = False

        if "initialize" not in option:
            option["initialize"] = False

        if "nanguard" not in option:
            option["nanguard"] = False

        algorithm = option["algorithm"]
        variant = option["variant"]
        variant = [variant] if variant != None else []

        if option["norm"]:
            normval = constraint.global_norm(grads)
            outputs = outputs[:]
            outputs.append(normval)

        if option["constraint"]:
            method, value = option["constraint"]
            if method == "value":
                grads = constraint.clip_by_value(grads, value[0], value[1])
            if method == "norm":
                grads = constraint.clip_by_global_norm(grads, value)

        if option["nanguard"]:
            gnorm = constraint.global_norm(gradsref)
            isnan = theano.tensor.isnan(gnorm)
            isinf = theano.tensor.isinf(gnorm)
            notfinite = theano.tensor.or_(isnan, isinf)
            newgrads = []
            for p, g in zip(params, grads):
                newgrads.append(theano.tensor.switch(notfinite, 0.1 * p, g))
            grads = newgrads

        if option["nesterov"]:
            option["momentum"] = False

        gup = []
        scan_updates = ops.get_updates()

        # append update rules
        if isinstance(scan_updates, OrderedDict):
            for key, value in scan_updates.iteritems():
                gup.append((key, value))
        else:
            gup.extend(scan_updates)

        for v, g in zip(vec, grads):
            gup.append((v, g))

        if algorithm == "sgd":
            alpha = theano.tensor.scalar()
            hparams = [alpha]
            defaults = [("alpha", 1.0)]
            svar, pup = updates.sgd_updates(params, vec, *hparams)
        elif algorithm == "adagrad":
            alpha = theano.tensor.scalar()
            epsilon = theano.tensor.scalar()
            hparams = [alpha, epsilon]
            defaults = [("alpha", 1.0), ("epsilon", 1e-6)]
            svar, pup = updates.adagrad_updates(params, vec, *hparams)
        elif algorithm == "rmsprop":
            alpha = theano.tensor.scalar()
            rho = theano.tensor.scalar()
            epsilon = theano.tensor.scalar()
            hparams = [alpha, rho, epsilon]
            defaults = [("alpha", 1e-2), ("rho", 0.99), ("epsilon", 1e-8)]
            rmsparam = hparams + variant
            svar, pup = updates.rmsprop_updates(params, vec, *rmsparam)
        elif algorithm == "rmsprop_momentum":
            alpha = theano.tensor.scalar()
            rho = theano.tensor.scalar()
            epsilon = theano.tensor.scalar()
            momentum = theano.tensor.scalar()
            hparams = [alpha, rho, epsilon, momentum]
            defaults = [("alpha", 1e-4), ("rho", 0.95), ("epsilon", 1e-4)]
            defaults.append(("moment", 0.9))
            svar, pup = updates.rmsprop_momentum_updates(params, vec, *hparams)
        elif algorithm == "adadelta":
            alpha = theano.tensor.scalar()
            rho = theano.tensor.scalar()
            epsilon = theano.tensor.scalar()
            hparams = [alpha, rho, epsilon]
            defaults = [("alpha", 1.0), ("rho", 0.95), ("epsilon", 1e-6)]
            svar, pup = updates.adadelta_updates(params, vec, *hparams)
        elif algorithm == "adam":
            alpha = theano.tensor.scalar()
            beta1 = theano.tensor.scalar()
            beta2 = theano.tensor.scalar()
            epsilon = theano.tensor.scalar()
            hparams = [alpha, beta1, beta2, epsilon]
            defaults = [("alpha", 0.001), ("beta1", 0.9), ("beta2", 0.999)]
            defaults.append(("epsilon", 1e-8))
            svar, pup = updates.adam_updates(params, vec, *hparams)
        else:
            raise "Error: " + algorithm + " is not supported"

        # restore variables used by optimizer
        if option["initialize"]:
            values = option["initialize"]
            for v1, v2 in zip(svar, values):
                v1.set_value(v2)

        if option["momentum"]:
            momentum = theano.tensor.scalar()
            hparams.append(momentum)
            defaults.append(("momentum", 0.9))
            pup = updates.apply_momentum(pup, params, momentum)

        if option["nesterov"]:
            momentum = theano.tensor.scalar()
            hparams.append(momentum)
            defaults.append(("momentum", 0.9))
            pup = updates.apply_momentum(pup, params, momentum)

        optimize = theano.function(inputs,
                                   outputs,
                                   updates=gup,
                                   on_unused_input='warn')
        update = theano.function(hparams, [],
                                 updates=pup,
                                 on_unused_input='warn')

        def wrapper(**option):
            values = []
            for item in defaults:
                name = item[0]
                val = item[1]
                if name not in option:
                    option[name] = val
                values.append(option[name])
            return update(*values)

        self.optimize = optimize
        self.update = wrapper
        self.option = option
        self.algorithm = algorithm
        self.parameter = svar
Ejemplo n.º 8
0
def train(args):
    option = default_option()

    # predefined model names
    pathname, basename = os.path.split(args.model)
    modelname = get_filename(basename)
    autoname_format = os.path.join(pathname,
                                   modelname + ".iter{epoch}-{batch}.pkl")
    bestname = os.path.join(pathname, modelname + ".best.pkl")

    # load models
    if os.path.exists(args.model):
        opt, params = load_model(args.model)
        override(option, opt)
        init = False
    else:
        init = True

    if args.initialize:
        pretrain_params = load_model(args.initialize)
        pretrain_params = pretrain_params[1]
        pretrain = True
    else:
        pretrain = False

    override(option, args_to_dict(args))

    # check external validation script
    ext_val_script = option['ext_val_script']
    if not os.path.exists(ext_val_script):
        raise ValueError('File doesn\'t exist: %s' % ext_val_script)
    elif not os.access(ext_val_script, os.X_OK):
        raise ValueError('File is not executable: %s' % ext_val_script)
    # check references format
    ref_stem = None
    if option['validation'] and option['references']:
        ref_stem = misc.infer_ref_stem([option['validation']],
                                       option['references'])
        ref_stem = ref_stem[0]

    # .yaml for ultimate options
    yaml_name = "%s.settings.yaml" % modelname
    if init or not os.path.exists(yaml_name):
        with open(yaml_name, "w") as w:
            _opt = args.__dict__.copy()
            for k, v in _opt.iteritems():
                if k in option:
                    _opt[k] = option[k]
            yaml.dump(_opt, w, default_flow_style=False)
            del _opt

    print_option(option)

    # reader
    batch = option["batch"]
    sortk = option["sort"]
    shuffle = option["shuffle"]
    reader = textreader(option["corpus"], shuffle)
    processor = [data_length, data_length]

    stream = textiterator(reader, [batch, batch * sortk], processor,
                          option["limit"], option["sort"])

    # progress
    # initialize before building model
    progress = Progress(option["delay_val"], stream, option["seed"])

    # create model
    regularizer = []

    if option["l1_scale"]:
        regularizer.append(ops.l1_regularizer(option["l1_scale"]))

    if option["l2_scale"]:
        regularizer.append(ops.l2_regularizer(option["l2_scale"]))

    scale = option["scale"]
    initializer = ops.random_uniform_initializer(-scale, scale)
    regularizer = ops.sum_regularizer(regularizer)

    option["scope"] = "rnnsearch"

    model = build_model(initializer=initializer,
                        regularizer=regularizer,
                        **option)

    variables = None

    if pretrain:
        print "using pretrain"
        _pp1 = {}
        for name, val in pretrain_params:
            names = name.split('/')[1:]
            if "embedding" in names[0]:
                _pp1['/'.join(names)] = val
            else:
                _pp1['/'.join(names[1:])] = val
        matched = []
        not_matched = []
        for var in ops.trainable_variables():
            names = var.name.split('/')[1:]
            if "decoder2" in var.name:
                not_matched.append((var.name, var.get_value().size))
                continue

            if "embedding" in names[0]:
                match_name = '/'.join(names)
                var.set_value(_pp1[match_name])
            else:
                match_name = '/'.join(names[1:])
                var.set_value(_pp1[match_name])
            matched.append((var.name, var.get_value().size))
        print "------------------- matched -------------------"
        for name, size in matched:
            print name, size
        print "------------------- not matched -------------------"
        for name, size in not_matched:
            print name, size
        print "------------------- end -------------------\n"

    if not init:
        set_variables(ops.trainable_variables(), params)

    print "parameters: %d\n" % count_parameters(ops.trainable_variables())

    # tuning option
    tune_opt = {}
    tune_opt["algorithm"] = option["optimizer"]
    tune_opt["constraint"] = ("norm", option["norm"])
    tune_opt["norm"] = True
    tune_opt["variables"] = variables

    # create optimizer
    scopes = [".*"]

    trainer = optimizer(model.inputs, model.outputs, model.cost, scopes,
                        **tune_opt)

    # vocabulary and special symbol
    svocabs, tvocabs = option["vocabulary"]
    svocab, isvocab = svocabs
    tvocab, itvocab = tvocabs
    unk_sym = option["unk"]
    eos_sym = option["eos"]

    alpha = option["alpha"]

    maxepoch = option["maxepoch"]

    # restore right before training to avoid randomness changing when trying to resume progress
    if not args.reset:
        if "#progress" in option:
            print 'Restore progress >>'
            progress = (option["#progress"])
            stream = progress.iterator
            stream.set_processor(processor)
            for ttt in progress.task_manager.tasks:
                ttt.status = 4
                ttt.result = 0.0
        else:
            print 'New progress >>'
    else:
        print 'Discard progress >>'

    # setup progress
    progress.oldname = args.model
    progress.serializer = serialize

    stream = progress.iterator
    overwrite = not args.no_overwrite

    if progress.task_manager:
        print progress.task_manager

    try:
        while progress.epoch < maxepoch:
            epc = progress.epoch
            for data in stream:
                progress.tic()
                if progress.failed():
                    raise RuntimeError("progress failure")
                xdata, xmask = convert_data(data[0], svocab, unk_sym, eos_sym)
                ydata, ymask = convert_data(data[1], tvocab, unk_sym, eos_sym)
                bydata, _ = convert_data(data[1], tvocab, unk_sym, eos_sym,
                                         True)

                t1 = time.time()
                tot_cost, soft_cost, true_cost, norm = trainer.optimize(
                    xdata, xmask, ydata, ymask, bydata)
                trainer.update(alpha=alpha)
                t2 = time.time()

                # per word cost
                w_cost = true_cost * ymask.shape[1] / ymask.sum()

                progress.batch_count += 1
                progress.batch_total += 1
                progress.loss_hist.append(w_cost)

                count = progress.batch_count

                if not args.pfreq or count % args.pfreq == 0:
                    print epc + 1, progress.batch_count, w_cost, tot_cost, soft_cost, true_cost, norm, t2 - t1

                if count % option["vfreq"] == 0 and not should_skip_val(
                        args.skip_val, option["vfreq"], epc,
                        progress.batch_total):
                    if option["validation"] and option["references"]:
                        progress.add_valid(option['scope'],
                                           option['validation'], ref_stem,
                                           ext_val_script, __file__, option,
                                           modelname, bestname, serialize)

                # save after validation
                progress.toc()
                if count % option["freq"] == 0:
                    progress.save(option, autoname_format, overwrite)

                progress.tic()
                if count % option["sfreq"] == 0:
                    n = len(data[0])
                    ind = numpy.random.randint(0, n)
                    sdata = data[0][ind]
                    tdata = data[1][ind]
                    xdata = xdata[:, ind:ind + 1]
                    xmask = xmask[:, ind:ind + 1]
                    hls = beamsearch(model, xdata, xmask)
                    best, score = hls[0]
                    print "--", sdata
                    print "--", tdata
                    print "--", " ".join(best[:-1])
                progress.toc()
            print "--------------------------------------------------"
            progress.tic()
            if option["validation"] and option["references"]:
                progress.add_valid(option['scope'], option['validation'],
                                   ref_stem, ext_val_script, __file__, option,
                                   modelname, bestname, serialize)
            print "--------------------------------------------------"

            progress.toc()
            # early stopping
            if epc + 1 >= option["stop"]:
                alpha = alpha * option["decay"]

            stream.reset()

            progress.epoch += 1
            progress.batch_count = 0
            # update autosave
            option["alpha"] = alpha
            progress.save(option, autoname_format, overwrite)

        stream.close()

        progress.tic()
        print "syncing ..."
        progress.barrier()  # hangup and wait
        progress.toc()

        best_valid = max(progress.valid_hist, key=lambda item: item[1])
        (epc, count), score = best_valid

        print "best bleu {}-{}: {:.4f}".format(epc + 1, count, score)

        if progress.delay_val:
            task_elapse = sum(
                [task.elapse for task in progress.task_manager.tasks])
            print "training finished in {}({})".format(
                datetime.timedelta(seconds=int(progress.elapse)),
                datetime.timedelta(seconds=int(progress.elapse + task_elapse)))
        else:
            print "training finished in {}".format(
                datetime.timedelta(seconds=int(progress.elapse)))
        progress.save(option, autoname_format, overwrite)

    except KeyboardInterrupt:
        traceback.print_exc()
        progress.terminate()
        sys.exit(1)
    except Exception:
        traceback.print_exc()
        progress.terminate()
        sys.exit(1)
Ejemplo n.º 9
0
def train(args):
    option = default_option()

    # predefined model names
    pathname, basename = os.path.split(args.model)
    modelname = get_filename(basename)
    autoname_format = os.path.join(pathname, modelname + ".iter{epoch}-{batch}.pkl")
    bestname = os.path.join(pathname, modelname + ".best.pkl")

    # load models
    if os.path.exists(args.model):
        opt, params = load_model(args.model)
        override(option, opt)
        init = False
    else:
        init = True

    if args.initialize:
        print "initialize:", args.initialize
        pretrain_params = load_model(args.initialize)
        pretrain_params = pretrain_params[1]
        pretrain = True
    else:
        pretrain = False

    override(option, args_to_dict(args))

    # check external validation script
    ext_val_script = option['ext_val_script']
    if not os.path.exists(ext_val_script):
        raise ValueError('File doesn\'t exist: %s' % ext_val_script)
    elif not os.access(ext_val_script, os.X_OK):
        raise ValueError('File is not executable: %s' % ext_val_script)

    # check references format
    ref_stem = option['references']
    if option['validation'] and option['references']:
         ref_stem = misc.infer_ref_stem([option['validation']], option['references'])
         ref_stem = ref_stem[0]

    # .yaml for ultimate options
    yaml_name = "%s.settings.yaml" % modelname
    if init or not os.path.exists(yaml_name):
        with open(yaml_name, "w") as w:
            _opt = args.__dict__.copy()
            for k, v in _opt.iteritems():
                if k in option:
                    _opt[k] = option[k]
            yaml.dump(_opt, w,
                      default_flow_style=False)
            del _opt

    print_option(option)

    # reader
    batch = option["batch"]
    sortk = option["sort"]
    shuffle = option["shuffle"]
    reader = textreader(option["corpus"][:3], shuffle)
    processor = [data_length, data_length, data_length]
    stream = textiterator(reader, [batch, batch * sortk], processor,
                          option["limit"], option["sort"])

    reader = textreader(option["corpus"][3:], shuffle)
    processor = [data_length, data_length, data_length]
    dstream = textiterator(reader, [batch, batch * sortk], processor,
                           None, option["sort"])

    # progress
    # initialize before building model
    progress = Progress(option["delay_val"], stream, option["seed"])

    # create model
    regularizer = []

    if option["l1_scale"]:
        regularizer.append(ops.l1_regularizer(option["l1_scale"]))

    if option["l2_scale"]:
        regularizer.append(ops.l2_regularizer(option["l2_scale"]))

    scale = option["scale"]
    initializer = ops.random_uniform_initializer(-scale, scale)
    regularizer = ops.sum_regularizer(regularizer)

    option["scope"] = "rnnsearch"

    model = build_model(initializer=initializer, regularizer=regularizer,
                        **option)

    variables = None

    if pretrain:
        matched, not_matched = match_variables(ops.trainable_variables(),
                                               pretrain_params)
        if args.finetune:
            variables = not_matched
            if not variables:
                raise RuntimeError("no variables to finetune")

    if pretrain:
        restore_variables(matched, not_matched)

    if not init:
        set_variables(ops.trainable_variables(), params)

    print "parameters: %d\n" % count_parameters(ops.trainable_variables())

    # tuning option
    tune_opt = {}
    tune_opt["algorithm"] = option["optimizer"]
    tune_opt["constraint"] = ("norm", option["norm"])
    tune_opt["norm"] = True
    tune_opt["variables"] = variables

    # create optimizer
    scopes = ["((?!Shared).)*$"]
    trainer = optimizer(model.inputs, model.outputs, model.cost, scopes, **tune_opt)
    clascopes = [".*(Shared).*"]
    clatrainer = optimizer(model.inputs_cla, model.outputs_cla, model.cost_cla, clascopes, **tune_opt)

    #scopes = [".*(DSAenc).*"]
    #domain_trainer = optimizer(model.inputs, model.toutputs, model.domaincost, scopes, **tune_opt)

    # vocabulary and special symbol
    svocabs, tvocabs = option["vocabulary"]
    svocab, isvocab = svocabs
    tvocab, itvocab = tvocabs
    unk_sym = option["unk"]
    eos_sym = option["eos"]

    alpha = option["alpha"]

    maxepoch = option["maxepoch"]

    # restore right before training to avoid randomness changing when trying to resume progress
    if not args.reset:
        if "#progress" in option:
            print 'Restore progress >>'
            progress = (option["#progress"])
            stream = progress.iterator
            stream.set_processor(processor)
        else:
            print 'New progress >>'
    else:
        print 'Discard progress >>'

    if args.drop_tasks:
        print 'drop tasks'
        progress.drop_tasks()

    # setup progress
    progress.oldname = args.model
    progress.serializer = serialize

    stream = progress.iterator
    overwrite = not args.no_overwrite

    if progress.task_manager:
        print progress.task_manager

    register_killer()

    tagvocab = {}
    for idx, d in enumerate(option["dvocab"]):
        tagvocab[d] = idx

    if len(tagvocab) != option["dnum"]:
        raise ValueError('length of domain vocab %f not equal to domain num %f!' % (len(tagvocab), option["dnum"]))

    try:
        while progress.epoch < maxepoch:
            epc = progress.epoch
            for data in stream:
                progress.tic()
                if progress.failed():
                    raise RuntimeError("progress failure")
                # data = _stream.next()
                xdata, xmask = convert_data(data[0], svocab, unk_sym, eos_sym)
                ydata, ymask = convert_data(data[1], tvocab, unk_sym, eos_sym)
                tag = convert_tag(data[2], tagvocab)

                t1 = time.time()
                cost, dcost, scost, tdcost, norm = trainer.optimize(xdata, xmask, ydata, ymask, tag)
                clacost, _ = clatrainer.optimize(xdata, xmask, tag)
                trainer.update(alpha=alpha)
                clatrainer.update(alpha=alpha)

                t2 = time.time()

                # per word cost
                w_cost = cost * ymask.shape[1] / ymask.sum()

                progress.batch_count += 1
                progress.batch_total += 1
                progress.loss_hist.append(w_cost)

                if not args.pfreq or count % args.pfreq == 0:
                    print epc + 1, progress.batch_count, w_cost, dcost, tdcost, scost, clacost, norm, t2 - t1

                count = progress.batch_count

                if count % option["sfreq"] == 0:
                    dright = 0.0
                    sright = 0.0
                    tdright = 0.0
                    total = 0.0
                    for ddata in dstream:
                        txdata, txmask = convert_data(ddata[0], svocab, unk_sym, eos_sym)
                        tydata, tymask = convert_data(ddata[1], tvocab, unk_sym, eos_sym)
                        txtag = convert_tag(ddata[2], tagvocab)
                        dtag_pred, stag_pred = model.tag_predict(txdata, txmask)
                        txtag = txtag[0]
                        dpretag = []
                        for i in dtag_pred:
                            dpretag.append(int(i))

                        spretag = []
                        for i in stag_pred:
                            spretag.append(int(i))

                        tdtag_pred = model.tgt_tag_predict(txdata, txmask, tydata, tymask)
                        tdpretag = []
                        for i in tdtag_pred[0]:
                            tdpretag.append(int(i))

                        dright = dright + sum([m == n for m, n in zip(txtag, dpretag)])
                        sright = sright + sum([m == n for m, n in zip(txtag, spretag)])
                        tdright = tdright + sum([m == n for m, n in zip(txtag, tdpretag)])
                        total = total + len(dpretag)
                    dstream.reset()
                    dacc = dright * 1.0 / total
                    sacc = sright * 1.0 / total
                    tdacc = tdright * 1.0 / total
                    print "dacc:", dright, dacc
                    print "sacc", sright, sacc
                    print "tdacc", tdright, tdacc

                if count % option["vfreq"] == 0 and not should_skip_val(args.skip_val, option["vfreq"], epc,
                                                                        progress.batch_total):
                    if option["validation"] and option["references"]:
                        progress.add_valid(option['scope'], option['validation'], ref_stem, ext_val_script, __file__,
                                           option, modelname, bestname, serialize)

                # save after validation
                progress.toc()

                if count % option["freq"] == 0:
                    progress.save(option, autoname_format, overwrite)

                progress.tic()

                if count % option["sfreq"] == 0:
                    n = len(data[0])
                    ind = numpy.random.randint(0, n)
                    sdata = data[0][ind]
                    tdata = data[1][ind]
                    xdata = xdata[:, ind: ind + 1]
                    xmask = xmask[:, ind: ind + 1]

                    hls = beamsearch(model, xdata, xmask)
                    best, score = hls[0]

                    print "--", sdata
                    print "--", tdata
                    print "--", " ".join(best[:-1])
                progress.toc()
            print "--------------------------------------------------"
            progress.tic()
            if option["validation"] and option["references"]:
                progress.add_valid(option['scope'], option['validation'], ref_stem, ext_val_script, __file__, option,
                                   modelname, bestname, serialize)
            print "--------------------------------------------------"

            progress.toc()

            print "epoch cost {}".format(numpy.mean(progress.loss_hist))
            progress.loss_hist = []

            # early stopping
            if epc + 1 >= option["stop"]:
                alpha = alpha * option["decay"]

            stream.reset()

            progress.epoch += 1
            progress.batch_count = 0
            # update autosave
            option["alpha"] = alpha
            progress.save(option, autoname_format, overwrite)

        stream.close()

        progress.tic()
        print "syncing ..."
        progress.barrier()  # hangup and wait
        progress.toc()

        best_valid = max(progress.valid_hist, key=lambda item: item[1])
        (epc, count), score = best_valid

        print "best bleu {}-{}: {:.4f}".format(epc + 1, count, score)

        if progress.delay_val:
            task_elapse = sum([task.elapse for task in progress.task_manager.tasks])
            print "training finished in {}({})".format(datetime.timedelta(seconds=int(progress.elapse)),
                                                       datetime.timedelta(seconds=int(progress.elapse + task_elapse)))
        else:
            print "training finished in {}".format(datetime.timedelta(seconds=int(progress.elapse)))
        progress.save(option, autoname_format, overwrite)


    except KeyboardInterrupt:
        traceback.print_exc()
        progress.terminate()
        sys.exit(1)
    except Exception:
        traceback.print_exc()
        progress.terminate()
        sys.exit(1)
Ejemplo n.º 10
0
    def _define_net(self):
        self.prune_rate = tf.placeholder(dtype='float',
                                         shape=[18, 1],
                                         name='prune_rate')

        self.x = tf.placeholder('float', shape=[None, 3072])
        self.y = tf.placeholder(tf.int64, shape=[None])

        x_image = tf.reshape(self.x, [-1, 3, 32, 32])

        x_image = tf.transpose(x_image, perm=[0, 2, 3, 1])

        #------------------------ Net Begin ------------------------#

        w0 = trainable_variables(shape=[7, 7, 3, 16], name='w0')
        b0 = trainable_variables(shape=[16], name='b0')
        conv0_out = conv_layers(inpt=x_image,
                                kernel=pruned_weights(w0, self.prune_rate[0]),
                                bias=b0,
                                strides=[1, 1, 1, 1])

        pooling0 = tf.layers.max_pooling2d(conv0_out, (2, 2), (2, 2),
                                           padding='same',
                                           name='pooling0')

        w1_1 = trainable_variables(shape=[3, 3, 16, 16], name='w1_1')
        b1_1 = trainable_variables(shape=[16], name='b1_1')
        conv1_1 = conv_layers(pooling0,
                              pruned_weights(w1_1, self.prune_rate[1]),
                              bias=b1_1,
                              strides=[1, 1, 1, 1])

        w1_2 = trainable_variables(shape=[3, 3, 16, 16], name='w1_2')
        b1_2 = trainable_variables(shape=[16], name='b1_2')
        conv1_2 = conv_layers(conv1_1,
                              pruned_weights(w1_2, self.prune_rate[2]),
                              bias=b1_2,
                              strides=[1, 1, 1, 1])

        res1_1 = conv1_2 + pooling0

        w1_3 = trainable_variables(shape=[3, 3, 16, 16], name='w1_3')
        b1_3 = trainable_variables(shape=[16], name='b1_3')
        conv1_3 = conv_layers(conv1_2,
                              pruned_weights(w1_3, self.prune_rate[3]),
                              bias=b1_3,
                              strides=[1, 1, 1, 1])

        w1_4 = trainable_variables(shape=[3, 3, 16, 16], name='w1_4')
        b1_4 = trainable_variables(shape=[16], name='b1_4')
        conv1_4 = conv_layers(conv1_3,
                              pruned_weights(w1_4, self.prune_rate[4]),
                              bias=b1_4,
                              strides=[1, 1, 1, 1])

        res1_2 = res1_1 + conv1_4

        pooling1 = tf.layers.max_pooling2d(res1_2, (2, 2), (2, 2),
                                           padding='same',
                                           name='pooling1')

        w2_1 = trainable_variables(shape=[3, 3, 16, 32], name='w2_1')
        b2_1 = trainable_variables(shape=[32], name='b2_1')
        conv2_1 = conv_layers(pooling1,
                              pruned_weights(w2_1, self.prune_rate[5]),
                              bias=b2_1,
                              strides=[1, 1, 1, 1])

        w2_2 = trainable_variables(shape=[3, 3, 32, 32], name='w2_2')
        b2_2 = trainable_variables(shape=[32], name='b2_2')
        conv2_2 = conv_layers(conv2_1,
                              pruned_weights(w2_2, self.prune_rate[6]),
                              bias=b2_2,
                              strides=[1, 1, 1, 1])

        res2_1 = tf.pad(pooling1, [[0, 0], [0, 0], [0, 0], [8, 8]]) + conv2_2

        w2_3 = trainable_variables(shape=[3, 3, 32, 32], name='w2_3')
        b2_3 = trainable_variables(shape=[32], name='b2_3')
        conv2_3 = conv_layers(conv2_2,
                              pruned_weights(w2_3, self.prune_rate[7]),
                              bias=b2_3,
                              strides=[1, 1, 1, 1])

        w2_4 = trainable_variables(shape=[3, 3, 32, 32], name='w2_4')
        b2_4 = trainable_variables(shape=[32], name='b2_4')
        conv2_4 = conv_layers(conv2_3,
                              pruned_weights(w2_4, self.prune_rate[8]),
                              bias=b2_4,
                              strides=[1, 1, 1, 1])

        res2_2 = res2_1 + conv2_4

        pooling2 = tf.layers.max_pooling2d(res2_2, (2, 2), (2, 2),
                                           padding='same',
                                           name='pooling2')

        w3_1 = trainable_variables(shape=[3, 3, 32, 64], name='w3_1')
        b3_1 = trainable_variables(shape=[64], name='b3_1')
        conv3_1 = conv_layers(pooling2,
                              pruned_weights(w3_1, self.prune_rate[9]),
                              bias=b3_1,
                              strides=[1, 1, 1, 1])

        w3_2 = trainable_variables(shape=[3, 3, 64, 64], name='w3_2')
        b3_2 = trainable_variables(shape=[64], name='b3_2')
        conv3_2 = conv_layers(conv3_1,
                              pruned_weights(w3_2, self.prune_rate[10]),
                              bias=b3_2,
                              strides=[1, 1, 1, 1])

        res3_1 = tf.pad(pooling2, [[0, 0], [0, 0], [0, 0], [16, 16]]) + conv3_2

        w3_3 = trainable_variables(shape=[3, 3, 64, 64], name='w3_3')
        b3_3 = trainable_variables(shape=[64], name='b3_3')
        conv3_3 = conv_layers(conv3_2,
                              pruned_weights(w3_3, self.prune_rate[11]),
                              bias=b3_3,
                              strides=[1, 1, 1, 1])

        w3_4 = trainable_variables(shape=[3, 3, 64, 64], name='w3_4')
        b3_4 = trainable_variables(shape=[64], name='b3_4')
        conv3_4 = conv_layers(conv3_3,
                              pruned_weights(w3_4, self.prune_rate[12]),
                              bias=b3_4,
                              strides=[1, 1, 1, 1])

        res3_2 = res3_1 + conv3_4

        pooling3 = tf.layers.max_pooling2d(res3_2, (2, 2), (2, 2),
                                           padding='same',
                                           name='pooling3')

        w4_1 = trainable_variables(shape=[3, 3, 64, 128], name='w4_1')
        b4_1 = trainable_variables(shape=[128], name='b4_1')
        conv4_1 = conv_layers(pooling3,
                              pruned_weights(w4_1, self.prune_rate[13]),
                              bias=b4_1,
                              strides=[1, 1, 1, 1])

        w4_2 = trainable_variables(shape=[3, 3, 128, 128], name='w4_2')
        b4_2 = trainable_variables(shape=[128], name='b4_2')
        conv4_2 = conv_layers(conv4_1,
                              pruned_weights(w4_2, self.prune_rate[14]),
                              bias=b4_2,
                              strides=[1, 1, 1, 1])

        res4_1 = tf.pad(pooling3, [[0, 0], [0, 0], [0, 0], [32, 32]]) + conv4_2

        w4_3 = trainable_variables(shape=[3, 3, 128, 128], name='w4_3')
        b4_3 = trainable_variables(shape=[128], name='b4_3')
        conv4_3 = conv_layers(conv4_2,
                              pruned_weights(w4_3, self.prune_rate[15]),
                              bias=b4_3,
                              strides=[1, 1, 1, 1])

        w4_4 = trainable_variables(shape=[3, 3, 128, 128], name='w4_4')
        b4_4 = trainable_variables(shape=[128], name='b4_4')
        conv4_4 = conv_layers(conv4_3,
                              pruned_weights(w4_4, self.prune_rate[16]),
                              bias=b4_4,
                              strides=[1, 1, 1, 1])

        res4_2 = res4_1 + conv4_4

        global_average_pooling = tf.reduce_mean(res4_2, axis=[1, 2])

        w_fc = trainable_variables(shape=[128, 10], name='w_fc')
        b_fc = trainable_variables(shape=[10], name='b_fc')
        y_ = tf.matmul(global_average_pooling,
                       pruned_weights(w_fc, self.prune_rate[17])) + b_fc
        #------------------------ Net End ------------------------#

        predict = tf.argmax(y_, 1)

        correct_prediction = tf.equal(predict, self.y)

        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float64))

        saver = tf.train.Saver({
            'w0': w0,
            'b0': b0,
            'w1_1': w1_1,
            'b1_1': b1_1,
            'w1_2': w1_2,
            'b1_2': b1_2,
            'w1_3': w1_3,
            'b1_3': b1_3,
            'w1_4': w1_4,
            'b1_4': b1_4,
            'w2_1': w2_1,
            'b2_1': b2_1,
            'w2_2': w2_2,
            'b2_2': b2_2,
            'w2_3': w2_3,
            'b2_3': b2_3,
            'w2_4': w2_4,
            'b2_4': b2_4,
            'w3_1': w3_1,
            'b3_1': b3_1,
            'w3_2': w3_2,
            'b3_2': b3_2,
            'w3_3': w3_3,
            'b3_3': b3_3,
            'w3_4': w3_4,
            'b3_4': b3_4,
            'w4_1': w4_1,
            'b4_1': b4_1,
            'w4_2': w4_2,
            'b4_2': b4_2,
            'w4_3': w4_3,
            'b4_3': b4_3,
            'w4_4': w4_4,
            'b4_4': b4_4,
            'w_fc': w_fc,
            'b_fc': b_fc
        })
        saver.restore(self.sess, model_path)

        self.acc = accuracy
Ejemplo n.º 11
0
def train(args):
    option = default_option()

    # predefined model names
    pathname, basename = os.path.split(args.model)
    modelname = get_filename(basename)
    autoname = os.path.join(pathname, modelname + ".autosave.pkl")
    bestname = os.path.join(pathname, modelname + ".best.pkl")

    # load models
    if os.path.exists(args.model):
        opt, params = load_model(args.model)
        option = opt
        init = False
    else:
        init = True

    if args.initialize:
        init_params = load_model(args.initialize)
        init_params = init_params[1]
        restore = True
    else:
        restore = False

    override(option, args_to_dict(args))
    print_option(option)

    # load references
    if option["references"]:
        references = load_references(option["references"])
    else:
        references = None

    if args.skip_val:
        references = None

    criterion = option["criterion"]

    if criterion == "mrt":
        sys.stderr.write("warning: In MRT mode, batch is set to 1\n")

    # input corpus
    batch = option["batch"] if criterion == "mle" else 1
    sortk = option["sort"] or 1 if criterion == "mle" else 1
    shuffle = option["seed"] if option["shuffle"] else None
    reader = textreader(option["corpus"], shuffle)
    processor = [data_length, data_length]
    stream = textiterator(reader, [batch, batch * sortk], processor,
                          option["limit"], option["sort"])

    if shuffle and option["indices"] is not None:
        reader.set_indices(option["indices"])

    if args.reset:
        option["count"] = [0, 0]
        option["epoch"] = 0
        option["cost"] = 0.0

    skip_stream(reader, option["count"][1])
    epoch = option["epoch"]
    maxepoch = option["maxepoch"]

    # create model
    regularizer = []

    if option["l1_scale"]:
        regularizer.append(ops.l1_regularizer(option["l1_scale"]))

    if option["l2_scale"]:
        regularizer.append(ops.l2_regularizer(option["l2_scale"]))

    scale = option["scale"]
    initializer = ops.random_uniform_initializer(-scale, scale)
    regularizer = ops.sum_regularizer(regularizer)
    # set seed
    numpy.random.seed(option["seed"])
    model = rnnsearch(initializer=initializer, regularizer=regularizer,
                      **option)

    variables = None

    if restore:
        matched, not_matched = match_variables(ops.trainable_variables(),
                                               init_params)
        if args.finetune:
            variables = not_matched
            if not variables:
                raise RuntimeError("no variables to finetune")

    if not init:
        set_variables(ops.trainable_variables(), params)

    if restore:
        restore_variables(matched, not_matched)

    print "parameters:", count_parameters(ops.trainable_variables())

    # tuning option
    tune_opt = {}
    tune_opt["algorithm"] = option["optimizer"]
    tune_opt["constraint"] = ("norm", option["norm"])
    tune_opt["norm"] = True
    tune_opt["variables"] = variables

    # create optimizer
    trainer = optimizer(model, **tune_opt)

    # beamsearch option
    search_opt = {}
    search_opt["beamsize"] = option["beamsize"]
    search_opt["normalize"] = option["normalize"]
    search_opt["maxlen"] = option["maxlen"]
    search_opt["minlen"] = option["minlen"]

    # vocabulary and special symbol
    svocabs, tvocabs = option["vocabulary"]
    svocab, isvocab = svocabs
    tvocab, itvocab = tvocabs
    unk_sym = option["unk"]
    eos_sym = option["eos"]

    # summary
    count = option["count"][0]
    totcost = option["cost"]
    best_score = option["bleu"]
    alpha = option["alpha"]
    sharp = option["sharp"]

    for i in range(epoch, maxepoch):
        for data in stream:
            xdata, xmask = convert_data(data[0], svocab, unk_sym, eos_sym)
            ydata, ymask = convert_data(data[1], tvocab, unk_sym, eos_sym)

            if criterion == "mrt":
                refs = []

                for item in data[1]:
                    item = item.split()
                    item = [unk_sym if word not in tvocab else word
                            for word in item]
                    refs.append(" ".join(item))

                t1 = time.time()

                # sample from model
                nsample = option["sample"] - len(refs)
                xdata = numpy.repeat(xdata, nsample, 1)
                xmask = numpy.repeat(xmask, nsample, 1)
                maxlen = int(1.5 * len(ydata))
                examples = batchsample(model, xdata, xmask, maxlen)
                space = build_sample_space(refs, examples)
                score = numpy.zeros((len(space),), "float32")

                refs = [ref.split() for ref in refs]

                for j in range(len(space)):
                    example = space[j].split()
                    score[j] = 1.0 - bleu([example], [refs], smoothing=True)

                ydata, ymask = convert_data(space, tvocab, unk_sym, eos_sym)
                cost, norm = trainer.optimize(xdata[:, 0:1], xmask[:, 0:1],
                                              ydata, ymask, score, sharp)
                trainer.update(alpha=alpha)
                t2 = time.time()

                totcost += cost
                count += 1
                t = t2 - t1
                ac = totcost / count
                print i + 1, count, len(space), cost, norm, ac, t
            else:
                t1 = time.time()
                cost, norm = trainer.optimize(xdata, xmask, ydata, ymask)
                trainer.update(alpha = alpha)
                t2 = time.time()

                count += 1
                cost = cost * ymask.shape[1] / ymask.sum()
                totcost += cost / math.log(2)
                print i + 1, count, cost, norm, t2 - t1

            # autosave
            if count % option["freq"] == 0:
                option["indices"] = reader.get_indices()
                option["bleu"] = best_score
                option["cost"] = totcost
                option["count"] = [count, reader.count]
                serialize(autoname, option)

            if count % option["vfreq"] == 0:
                if option["validation"] and references:
                    trans = translate(model, option["validation"],
                                      **search_opt)
                    bleu_score = bleu(trans, references)
                    print "bleu: %2.4f" % bleu_score
                    if bleu_score > best_score:
                        best_score = bleu_score
                        option["indices"] = reader.get_indices()
                        option["bleu"] = best_score
                        option["cost"] = totcost
                        option["count"] = [count, reader.count]
                        serialize(bestname, option)

            if count % option["sfreq"] == 0:
                n = len(data[0])
                ind = numpy.random.randint(0, n)
                sdata = data[0][ind]
                tdata = data[1][ind]
                xdata = xdata[:, ind : ind + 1]
                xmask = xmask[:, ind : ind + 1]
                hls = beamsearch(model, xdata, xmask)
                best, score = hls[0]
                print sdata
                print tdata
                print " ".join(best[:-1])


        print "--------------------------------------------------"

        if option["validation"] and references:
            trans = translate(model, option["validation"], **search_opt)
            bleu_score = bleu(trans, references)
            print "iter: %d, bleu: %2.4f" % (i + 1, bleu_score)
            if bleu_score > best_score:
                best_score = bleu_score
                option["indices"] = reader.get_indices()
                option["bleu"] = best_score
                option["cost"] = totcost
                option["count"] = [count, reader.count]
                serialize(bestname, option)

        print "averaged cost: ", totcost / count
        print "--------------------------------------------------"

        # early stopping
        if i + 1 >= option["stop"]:
            alpha = alpha * option["decay"]

        count = 0
        totcost = 0.0
        stream.reset()

        # update autosave
        option["epoch"] = i + 1
        option["alpha"] = alpha
        option["indices"] = reader.get_indices()
        option["bleu"] = best_score
        option["cost"] = totcost
        option["count"] = [0, 0]
        serialize(autoname, option)

    print "best(bleu): %2.4f" % best_score

    stream.close()