def buildModels(output, tasks, connection, jobConnection, extra, dummy=False): """ Build the release models. This function should be run on the cluster, so the connection argument is the same for both the batch system and the train-program it runs. """ global mainTEESDir from batch import batch for task in tasks: taskName = task if task in ["GE11", "GE09"]: taskName += ".2" command = "python " + os.path.join( mainTEESDir, "train.py" ) + " -t " + taskName + " -o %o/%j -c " + jobConnection + " --clearAll" if extra != None: command += " " + extra batch(command, input=None, connection=connection, jobTag=task, output=output, debug=True, dummy=dummy)
def buildDDI13(output, connection, dummy=False, numFolds=10): global mainTEESDir from batch import batch for fold in range(numFolds): develFolds = [ str(x) for x in (range(numFolds) + range(numFolds))[fold + 1:fold + 2 + 1] ] trainFolds = [ str(x) for x in (range(numFolds) + range(numFolds))[fold + 3:fold + 9 + 1] ] foldParameter = " --folds test=train" + str( fold) + ":devel=train" + ",train".join( develFolds) + ":train=train" + ",train".join(trainFolds) command = "python " + os.path.join( mainTEESDir, "train.py" ) + " -t DDI13 -o %o/%j -c " + connection + " --clearAll" + foldParameter batch(command, input=None, connection=connection, jobTag="DDI13-fold" + str(fold), output=output, debug=True, dummy=dummy)
def generate_model(training_set_path = TRAINING_SET_PATH, validation_set_path = VALIDATION_SET_PATH, \ training_dictionary_path = TRAINING_DICTIONARY_PATH, validation_dictionary_path = VALIDATION_DICTIONARY_PATH, \ json_path = JSON_PATH, h5_path = H5_PATH): # fix random seed for reproducibility np.random.seed(int(time.time())) # get training batch x_t, y_t = batch.batch(training_set_path, training_dictionary_path) # get validation batch x_v, y_v = batch.batch(validation_set_path, validation_dictionary_path) print("- Training and validation sets imported." + str(x_t.shape)) # create model model = Sequential() model.add(LSTM(units_first_layer, return_sequences=True, stateful=False, batch_input_shape = (None, x_t.shape[1], x_t.shape[2]))) model.add(LSTM(units_second_layer, return_sequences=True, stateful=False)) model.add(LSTM(units_third_layer, stateful=False)) # add dropout to control for overfitting model.add(Dropout(.25)) # squash output onto number of classes in probability space model.add(Dense(classes, activation='softmax')) # compile the model model.compile(loss='categorical_crossentropy', optimizer = 'adam', metrics=['accuracy']) # Fit the model print(model.fit(x_t, y_t, epochs = epochs, validation_data=(x_v, y_v))) # export model # serialize model to JSON model_json = model.to_json() with open(json_path, "w") as json_file: json_file.write(model_json) # serialize weights to HDF5 model.save_weights(h5_path) print("- Model exported to disk.") return model
def buildModels(output, tasks, connection, dummy=False): """ Build the release models. This function should be run on the cluster, so the connection argument is the same for both the batch system and the train-program it runs. """ global mainTEESDir from batch import batch for task in tasks: taskName = task if task in ["GE", "GE09"]: taskName += ".2" command = "python " + os.path.join(mainTEESDir, "train.py") + " -t " + taskName + " -o %o/%j -c " + connection + " --clearAll" batch(command, input=None, connection=connection, jobTag=task, output=output, debug=True, dummy=dummy)
def evaluateWordSort(accuracyStats, model, epoch): """Evaluate after a train epoch""" print('Epoch [{}] -- Evaluate'.format(epoch)) x_val, y_ref, text_in = batch(sentenceData, 8) y_out, _ = model(x_val, y_ref, teacher_force_ratio=0.) y_out = y_out.permute(1, 0) compareBatchAccuracy(accuracyStats, text_in, y_ref, y_out)
def buildDDI13(output, connection, dummy=False, numFolds=10, extraParameters="", testPath=""): global mainTEESDir from batch import batch commandBase = "python " + os.path.join( mainTEESDir, "train.py") + " -t DDI13 -o %o/%j -c " + connection + " --clearAll" for fold in range(numFolds): develFolds = [ str(x) for x in (range(numFolds) + range(numFolds))[fold + 1:fold + 2 + 1] ] trainFolds = [ str(x) for x in (range(numFolds) + range(numFolds))[fold + 3:fold + 9 + 1] ] foldParameter = " --folds test=train" + str( fold) + ":devel=train" + ",train".join( develFolds) + ":train=train" + ",train".join(trainFolds) command = commandBase + foldParameter + " " + extraParameters batch(command.strip(), input=None, connection=connection, jobTag="DDI13-fold" + str(fold), output=output, debug=True, dummy=dummy) if testPath != "": testFolds = " --folds devel=train0,train1,train2,train3,train4:train=train5,train6,train7,train8,train9" testCommand = commandBase + testFolds + " --testFile " + os.path.join( testPath, "DDI13-test-task9.1.xml") + " " + extraParameters batch(testCommand.strip(), input=None, connection=connection, jobTag="DDI13-test9.1", output=output, debug=True, dummy=dummy) testCommand = commandBase + testFolds + " --testFile " + os.path.join( testPath, "DDI13-test-task9.2.xml") + " " + extraParameters batch(testCommand.strip(), input=None, connection=connection, jobTag="DDI13-test9.2", output=output, debug=True, dummy=dummy)
def evaluateWordSort(model, epoch): """Evaluate after a train epoch""" print('Epoch [{}] -- Evaluate'.format(epoch)) x_val, y_val, text_val = batch(sentenceData, 8) out, _ = model(x_val, y_val, teacher_force_ratio=0.) out = out.permute(1, 0) for i in range(out.size(0)): print("=============================================") print("yref", y_val[i], out[i], y_val[i] - out[i]) print("orig", text_val[i]) v = torch.Tensor.cpu(out[i]).numpy() print("[", end="") for index in v: print(text_val[i][index] + " ", end="") print("]")
def train(pNet, optimizer, epoch, clip=1.): """Train single epoch""" print('Epoch [{}] -- Train'.format(epoch)) # x, y, t = batch(BATCH_SIZE) start = time.time() for step in range(STEPS_PER_EPOCH): optimizer.zero_grad() x, y, t = batch(sentenceData, BATCH_SIZE) # Forward out, loss = pNet(x, y) # Backward loss.backward() nn.utils.clip_grad_norm_(pNet.parameters(), clip) optimizer.step() if (step + 1) % 10 == 0: duration = time.time() - start print('Epoch [{}] loss: {} time:{:.2f}'.format( epoch, loss.item(), duration)) start = time.time()
def __init__(self, width, height, libtcod): self.width = width self.height = height self.libtcod = libtcod libtcod.console_set_custom_font('arial12x12.png', libtcod.FONT_TYPE_GREYSCALE | libtcod.FONT_LAYOUT_TCOD) libtcod.console_init_root(self.width, self.height, 'HALF SLIME 3', False) self.game = libtcod.console_new(self.width, self.height) self.title = libtcod.console_new(self.width, self.height) self.controls = libtcod.console_new(self.width, self.height) self.batch = batch(self.game, libtcod) self.numWalls = 30 self.numSlime = 20 self.gamestate = "Title" self.itemType = ['yellow', 'green'] self.objs = [] self.char = player(0, 0, '@', self.libtcod.amber, libtcod) self.items = [] self.red = 0 self.objs.append(self.char) self.loadcontent()
def buildDDI13(output, connection, dummy=False, numFolds=10, extraParameters="", testPath=""): global mainTEESDir from batch import batch commandBase = "python " + os.path.join(mainTEESDir, "train.py") + " -t DDI13 -o %o/%j -c " + connection + " --clearAll" for fold in range(numFolds): develFolds = [str(x) for x in (range(numFolds) + range(numFolds))[fold+1:fold+2+1] ] trainFolds = [str(x) for x in (range(numFolds) + range(numFolds))[fold+3:fold+9+1] ] foldParameter = " --folds test=train" + str(fold) + ":devel=train" + ",train".join(develFolds) + ":train=train" + ",train".join(trainFolds) command = commandBase + foldParameter + " " + extraParameters batch(command.strip(), input=None, connection=connection, jobTag="DDI13-fold" + str(fold), output=output, debug=True, dummy=dummy) if testPath != "": testFolds = " --folds devel=train0,train1,train2,train3,train4:train=train5,train6,train7,train8,train9" testCommand = commandBase + testFolds + " --testFile " + os.path.join(testPath, "DDI13-test-task9.1.xml") + " " + extraParameters batch(testCommand.strip(), input=None, connection=connection, jobTag="DDI13-test9.1", output=output, debug=True, dummy=dummy) testCommand = commandBase + testFolds + " --testFile " + os.path.join(testPath, "DDI13-test-task9.2.xml") + " " + extraParameters batch(testCommand.strip(), input=None, connection=connection, jobTag="DDI13-test9.2", output=output, debug=True, dummy=dummy)
def word2vec(data, num_skips, skip_window, batch_size, hidden_size, vocab_size, learning_rate, n_epochs, num_sampled): # generate target-label pairs for specific sentence, 'data' -> 'one sentence' def generate_skip(data): data_index = 0 size = 2*skip_window*len(data) assert num_skips <= 2 * skip_window centers = np.ndarray(shape=(size), dtype=np.int32) labels = np.ndarray(shape=(size, 1), dtype=np.int32) span = 2 * skip_window + 1 # [structure: skip_window, target , skip_window ] buffer = collections.deque(maxlen=span) for _ in range(span): buffer.append(data[data_index]) data_index = (data_index + 1) % len(data) for i in range(size // num_skips): target = skip_window # target label at the center of the buffer targets_to_avoid = [skip_window] for j in range(num_skips): while target in targets_to_avoid: target = random.randint(0, span - 1) targets_to_avoid.append(target) centers[i * num_skips + j] = buffer[skip_window] labels[i * num_skips + j, 0] = buffer[target] buffer.append(data[data_index]) data_index = (data_index + 1) % len(data) # Move one forward return centers, labels # produce skip pairs for each sentence then concatenate centers = [] targets = [] for sequence in data: centers_element, targets_element = generate_skip(sequence) centers.append(centers_element) targets.append(targets_element) centers = list(np.concatenate(centers)) targets = list(np.concatenate(targets)) """Tensorflow Infrastructure""" # Step 1: define the placeholders for input and output with tf.name_scope("batch_data"): center_words = tf.placeholder(tf.int32, shape=[batch_size], name='center_words') target_words = tf.placeholder(tf.int32, shape=[batch_size, 1], name='target_words') # Step 2: define weights. In word2vec, embed matrix is the weight matrix with tf.name_scope("embed"): embed_matrix = tf.Variable(tf.random_uniform([vocab_size, hidden_size], -1.0, 1.0), name = "embed_matrix") # Step 3+4: define inference + loss function with tf.name_scope("loss"): embed = tf.nn.embedding_lookup(embed_matrix, center_words, name = "embed") nce_weight = tf.Variable(tf.truncated_normal([vocab_size, hidden_size], stddev = 1.0/math.sqrt(hidden_size)), name = "nce_weight" ) nce_bias = tf.Variable(tf.zeros([vocab_size]), name = "bias") loss = tf.reduce_mean(tf.nn.nce_loss(weights = nce_weight, biases = nce_bias, labels = target_words, inputs=embed, num_sampled = num_sampled, num_classes = vocab_size, name = "loss")) # Step 5: define optimizer with tf.name_scope("optimizer"): optimizer = tf.train.AdagradOptimizer(learning_rate).minimize(loss) # Step 6: run the graph with tf.Session() as sess: sess.run(tf.global_variables_initializer()) current_epoch = 0 print('Initialized, now begin Word2Vec!') writer = tf.summary.FileWriter('./my_graph', sess.graph) batch_ = batch(centers, targets, batch_size) for current_epoch in range(n_epochs): centers_batch, targets_batch = batch_.next_(current_epoch) feed = {center_words: centers_batch, target_words: targets_batch} loss_, _ = sess.run([loss, optimizer], feed_dict = feed) if (current_epoch+1) % 10000 == 0: print('The loss for {} iteration is {}'.format(current_epoch, loss_)) current_epoch += 1 embedding = sess.run(embed_matrix, feed_dict = feed) writer.close() return embedding
def newBatch(self, batchID, batchName="", dataPath="", iniRtList=list()): self.batchList.append( batch.batch(batchID, self, batchName, dataPath, initRtList))
allEnsembles if options.ensembles=='all' else options.ensembles.split(',')) ensSlice = ((None,None) if not options.ensSlice else tuple(int(i) for i in options.ensSlice.split(':')) if ':' in options.ensSlice else (int(options.ensSlice),1+int(options.ensSlice))) templates = ((0,0) if not options.templates else tuple(int(i) for i in options.templates.split(':')) if ':' in options.templates else (int(options.templates),1+int(options.templates))) if options.batch: chunksize = 10 stack = [] for part in partitions: syschunks = chunk(systs, chunksize) enschunks = chunktuple(ensSlice, chunksize) tmpchunks = chunktuple(templates, chunksize) if syschunks: stack.extend(["./start.py --partition %s --systematics %s"%(part, ','.join(s)) for s in syschunks]) if templates: stack.extend(["./start.py --partition %s --templates %d:%d"%((part,)+t) for t in tmpchunks]) if enschunks: stack.extend(["./start.py --partition %s --ensembles %s --ensSlice %d:%d"%((part, e)+s) for s in enschunks for e in ensembles]) if not any([syschunks,tmpchunks,enschunks]): stack.append("./start.py --partition %s"%part) batch.batch(stack, site=options.site) else: for part in partitions: for tID in ([None] if templates[0]==templates[1] else range(channel_data.nTemplates)[slice(*templates)]): mp = systematics.measurement_pars(partition=part) mp.update({'doVis':options.visualize, 'evalSystematics':systs if tID == None else [], 'ensembles':ensembles if tID == None else [], 'ensSlice':ensSlice, 'templateID':tID}) print mp measurement(**mp)
file=file): if file: link.link_file(sub_dir, card_dir, file, max_backup=max_backup) else: link.link_files(sub_dir, card_dir, max_backup=max_backup) elif args.subcommand == 'unlink': if file: link.unlink_file(file) else: function = link.unlink_file elif args.subcommand == 'backup': if file: backup.backup(file, max_backup=max_backup) else: function = backup.backup else: # elif args.subcommand == 'restore': backup.restore(file, args.number) try: if args.batch: batch.batch(function, card_dir, max_backup=max_backup) elif args.batch_region: batch.batch_region(function, base_dir, region, max_backup=max_backup) else: batch.batch_all(function, base_dir, max_backup=max_backup) except AttributeError: pass