stk = comstok.w2i['<s>'] comstart[0] = stk outfn = outdir+"/predictions/predict-{}-cfw.txt".format(outfile.split('.')[0]) outf = open(outfn, 'w') print("writing to file: " + outfn) batch_sets = [allfids[i:i+batchsize] for i in range(0, len(allfids), batchsize)] prep("computing predictions...\n") for c, fid_set in enumerate(batch_sets): st = timer() for fid in fid_set: refcoms[fid] = seqdata['c'+testval][fid] seqdata['c'+testval][fid] = comstart #np.asarray([stk]) bg = batch_gen(seqdata, testval, config, training=False) batch = bg.make_batch(fid_set) if config['batch_maker'] == 'datsonly': batch_results = gendescr_2inp(model, batch, comstok, comlen, batchsize, config, refcoms, c, strat='greedy') elif config['batch_maker'] == 'ast': batch_results = gendescr_3inp(model, batch, comstok, comlen, batchsize, config, refcoms, c, strat='greedy') elif config['batch_maker'] == 'ast_threed': batch_results = gendescr_4inp(model, batch, comstok, comlen, batchsize, config, refcoms, c, strat='greedy') elif config['batch_maker'] == 'threed': batch_results = gendescr_threed(model, batch, comstok, comlen, batchsize, config, refcoms, c, strat='greedy') elif config['batch_maker'] == 'graphast': batch_results = gendescr_graphast(model, batch, comstok, comlen, batchsize, config, refcoms, c, strat='greedy') elif config['batch_maker'] == 'graphast_threed': batch_results = gendescr_5inp(model, batch, comstok, comlen, batchsize, config, refcoms, c, strat='greedy') elif config['batch_maker'] == 'pathast_threed':
config['tdatlen'] = len(list(seqdata['dttrain'].values())[0]) config['sdatlen'] = seqdata['config']['sdatlen'] config['comlen'] = len(list(seqdata['ctrain'].values())[0]) config['smllen'] = len(list(seqdata['strain'].values())[0]) config['multigpu'] = multigpu config['batch_size'] = batch_size prep('creating model... ') config, model = create_model(modeltype, config) drop() print(model.summary()) gen = batch_gen(seqdata, 'train', modeltype, config) checkpoint = ModelCheckpoint(outdir + '/models/' + modeltype + '_E{epoch:02d}_' + str(timestart) + '.h5') savehist = HistoryCallback() savehist.setCatchExit(outdir, modeltype, timestart, config) valgen = batch_gen(seqdata, 'val', modeltype, config) callbacks = [checkpoint, savehist] try: history = model.fit_generator(gen, steps_per_epoch=steps, epochs=epochs, verbose=1, max_queue_size=3,
# cmd = "grep {} /nfs/projects/attn-to-fc/data/standard/output/sdats.test".format(fid) # print('sdats for {}'.format(fid)) # os.system(cmd) # print() #print(modeltypewsdats, configwsdats['batch_maker']) #print(modeltypewosdats, configwosdats['batch_maker']) #sys.exit() prep("computing predictions...\n") for c, fid_set in enumerate(batch_sets): st = timer() for fid in fid_set: seqdata['ctest'][fid] = comstart #np.asarray([stk]) bg = batch_gen(seqdata, 'test', configwsdats, training=False) batch = bg.make_batch(fid_set) if configwsdats['batch_maker'] == 'datsonly': batch_resultswsdats = gendescr_2inp(modelwsdats, batch, comstok, comlen, batchsize, configwsdats, strat, beamwidth, outfilewsdats, stopword) elif configwsdats['batch_maker'] == 'ast': batch_resultswsdats = gendescr_3inp(modelwsdats, batch, comstok, comlen, batchsize, configwsdats, strat, beamwidth, outfilewsdats, stopword, outdir) elif configwsdats['batch_maker'] == 'ast_threed': batch_resultswsdats = gendescr_4inp(modelwsdats, batch, comstok,
config['tdatlen'] = len(list(seqdata['dttrain'].values())[0]) config['sdatlen'] = seqdata['config']['sdatlen'] config['smllen'] = len(list(seqdata['strain'].values())[0]) except KeyError: pass # some configurations do not have all data, which is fine config['multigpu'] = multigpu config['batch_size'] = batch_size prep('creating model... ') config, model = create_model(modeltype, config) drop() print(model.summary()) gen = batch_gen(seqdata, 'train', config) #checkpoint = ModelCheckpoint(outdir+'/'+modeltype+'_E{epoch:02d}_TA{acc:.2f}_VA{val_acc:.2f}_VB{val_bleu:}.h5', monitor='val_loss') checkpoint = ModelCheckpoint(outdir + '/models/' + modeltype + '_E{epoch:02d}_' + str(timestart) + '.h5') savehist = HistoryCallback() savehist.setCatchExit(outdir, modeltype, timestart, config) valgen = batch_gen(seqdata, 'val', config) # If you want it to calculate BLEU Score after each epoch use callback_valgen and test_cb ##### #callback_valgen = batch_gen_train_bleu(seqdata, comvocabsize, 'val', modeltype, batch_size=batch_size) #test_cb = mycallback(callback_valgen, steps) ##### callbacks = [checkpoint, savehist]
prep('loading model... ') model = keras.models.load_model(modelfile, custom_objects={"tf":tf, "keras":keras, "OurCustomGraphLayer":OurCustomGraphLayer, "SeqSelfAttention":SeqSelfAttention}) print(model.summary()) drop() batch_sets = [allfids[i:i+batchsize] for i in range(0, len(allfids), batchsize)] refs = list() preds = list() predf = open('{}/predictions/{}_{}_{}.tsv'.format(outdir, modeltype, mid, timestart), 'w') prep("computing predictions...\n") for c, fid_set in enumerate(batch_sets): st = timer() bg = batch_gen(seqdata, testval, config, training=False, firstwords=firstwords) batch = bg.make_batch(fid_set) if config['batch_maker'] == 'fw_datsonly': batch_results = gendescr_datsonly(model, batch, batchsize, config) elif config['batch_maker'] == 'fw_datsfc': batch_results = gendescr_datsfc(model, batch, batchsize, config) elif config['batch_maker'] == 'fw_datspc': batch_results = gendescr_datspc(model, batch, batchsize, config) elif config['batch_maker'] == 'fw_datsast': batch_results = gendescr_datsast(model, batch, batchsize, config) elif config['batch_maker'] == 'fw_datsastfc': batch_results = gendescr_datsastfc(model, batch, batchsize, config) elif config['batch_maker'] == 'fw_datsastpc': batch_results = gendescr_datsastpc(model, batch, batchsize, config) elif config['batch_maker'] == 'fw_graphast':
config['multigpu'] = multigpu config['batch_size'] = batch_size prep('creating model... ') config, model = create_model(modeltype, config) drop() print(model.summary()) fn = outdir + '/histories/' + modeltype + '_conf_' + datrim + str( timestart) + '.pkl' #start with saved config confoutfd = open(fn, 'wb') pickle.dump(config, confoutfd) print('saved config to: ' + fn) if fwfile is not None: gen = batch_gen(seqdata, 'train', config, firstwords=firstwords) else: gen = batch_gen(seqdata, 'train', config) checkpoint = ModelCheckpoint(outdir + '/models/' + modeltype + '_E{epoch:02d}_' + datrim + str(timestart) + '.h5') if fwfile is not None: valgen = batch_gen(seqdata, 'val', config, firstwords=firstwords) else: valgen = batch_gen(seqdata, 'val', config) callbacks = [checkpoint] model.fit_generator(gen, steps_per_epoch=steps, epochs=epochs, verbose=1,