Beispiel #1
0
def onDecodingFinish(data, output):
    if op == "gen":
        for src_id, (inp, out) in sorted(output.items(), key=lambda x:x[0]):
            print(TRG.str_rpr(out))
    elif op == "cppl":
        UF.trace("Corpus PPL:", PPL(output))
        print(PPL(output))
Beispiel #2
0
 def train(self, x_data, y_data, learn=True, *args, **kwargs):
     accum_loss, output = self._train(x_data, y_data, not learn, *args, **kwargs)
     if learn:
         if not math.isnan(float(accum_loss.data)):
             self._model.zerograds()
             accum_loss.backward()
             self._opt.update()
         else:
             UF.trace("Warning: LOSS is nan, ignoring!")
     return accum_loss.data, output
Beispiel #3
0
def main():
    args = parse_args()
    init_program_state(args)
    
    # Variable
    batch_size = args.batch
    epoch_total = args.epoch

    # data
    UF.trace("Loading corpus + dictionary")
    train, label, X, Y = load_data()

    # Setup model
    UF.trace("Setting up classifier")
    model = L.Classifier(load_model(args, X, Y))
    if not args.use_cpu: model = model.to_gpu()
    opt   = optimizers.SGD()
    opt.setup(model)
    
    # Begin training
    UF.trace("Begin training MLP")
    for ep in range(epoch_total):
        UF.trace("Epoch %d" % (ep+1))
        accum_loss = 0
        for i in range(0, len(train), batch_size):
            x_data = Variable(train[i:i+batch_size])
            y_data = Variable(label[i:i+batch_size])
            model.zerograds()
            loss = model(x_data, y_data)
            accum_loss += loss
            loss.backward()
            opt.update()
        print("Loss:", loss.data, file=sys.stderr)
        print("Accuracy:", model.accuracy.data, file=sys.stderr)

#    # Classifying training data
#    for i in range(0, len(train), batch_size):
#        x_data = Variable(train[i:i+batch_size])
#        y_data = model.predictor(x_data)
#        UF.print_classification(y_data.data, Y)

    UF.trace("Saving model....")
    with ModelFile(open(args.model_out, "w")) as model_out:
        model.predictor.save(model_out)
Beispiel #4
0
def main():
    args = parse_args()
    init_program_state(args)
    
    # Variable
    batch_size = args.batch

    # Setup model
    UF.trace("Setting up classifier")
    model = load_model(args)
    if not args.use_cpu: model = model.to_gpu()

    # data
    UF.trace("Loading corpus + dictionary")
    test = load_data(sys.stdin, model._feat, model._input)
        
    for i in range(0, len(test), batch_size):
        x_data = Variable(test[i:i+batch_size])
        y_data = model(x_data)
        UF.print_classification(y_data.data, model._trg)
Beispiel #5
0
def onDecodingStart():
    if op == "gen":
        UF.trace("Sentence generation started.")
    elif op == "cppl":
        UF.trace("Corpus PPL calculation started.")
    elif op == "sppl":
        UF.trace("Sentence PPL calculation started.")
Beispiel #6
0
    sys.exit(1)

# Loading data
x_all, y_all = load_data(sys.argv[1])
x_train, x_test = np.split(x_all, [60000])
y_train, y_test = np.split(y_all, [60000])
train_size = len(x_train)
test_size  = len(x_test)

# Init model
model = L.Classifier(MLP())
optimizer = optimizers.SGD()
optimizer.setup(model)

# Training begins here
UF.trace("Begin training")
for epoch in range(EPOCH):
    UF.trace("Epoch %d" % (epoch+1))
    indexes = np.random.permutation(train_size)
    for i in range(0, train_size, BATCH):
        x = Variable(x_train[indexes[i:i+BATCH]])
        t = Variable(y_train[indexes[i:i+BATCH]])
        model.zerograds()
        loss = model(x,t)
        loss.backward()
        optimizer.update()

# Testing begins here
UF.trace("Begin Testing")
sum_loss, sum_accuracy = 0, 0
for i in range(0, test_size, BATCH):
Beispiel #7
0
parser.add_argument("--operation", choices=["sppl", "cppl", "gen"], help="sppl: Sentence-wise ppl\ncppl: Corpus-wise ppl\ngen: Read input, start generating random words.", default="sppl")
parser.add_argument("--use_cpu", action="store_true")
parser.add_argument("--gpu", type=int, default=-1, help="Which GPU to use (Negative for cpu).")
parser.add_argument("--verbose", action="store_true")
parser.add_argument("--gen_limit", type=positive, default=50)
parser.add_argument("--eos_disc", type=float, default=0.0, help="Give fraction positive discount to output longer sentence.")
args = parser.parse_args()
op   = args.operation

if op == "sppl" and args.batch != 1:
    raise ValueError("Currently sentence based perplexity not supports multi batching.")
if args.use_cpu:
    args.gpu = -1

# Loading model
UF.trace("Setting up classifier")
model  = LanguageModel(args, use_gpu=args.gpu, collect_output=True)
VOC, _ = model.get_vocabularies()
decoding_options = {"gen_limit": args.gen_limit, "eos_disc": args.eos_disc}

# Testing callbacks
def PPL(loss):
    try:
        return math.exp(loss.data)
    except:
        return math.exp(loss)

def onDecodingStart():
    if op == "gen":
        UF.trace("Sentence generation started.")
    elif op == "cppl":
Beispiel #8
0
def onDecodingStart():
    UF.trace("Decoding started.")