예제 #1
0
def buildModels(output, tasks, connection, jobConnection, extra, dummy=False):
    """
    Build the release models.
    
    This function should be run on the cluster, so the connection argument is the
    same for both the batch system and the train-program it runs.
    """
    global mainTEESDir
    from batch import batch
    for task in tasks:
        taskName = task
        if task in ["GE11", "GE09"]:
            taskName += ".2"
        command = "python " + os.path.join(
            mainTEESDir, "train.py"
        ) + " -t " + taskName + " -o %o/%j -c " + jobConnection + " --clearAll"
        if extra != None:
            command += " " + extra
        batch(command,
              input=None,
              connection=connection,
              jobTag=task,
              output=output,
              debug=True,
              dummy=dummy)
예제 #2
0
def buildDDI13(output, connection, dummy=False, numFolds=10):
    global mainTEESDir
    from batch import batch
    for fold in range(numFolds):
        develFolds = [
            str(x)
            for x in (range(numFolds) + range(numFolds))[fold + 1:fold + 2 + 1]
        ]
        trainFolds = [
            str(x)
            for x in (range(numFolds) + range(numFolds))[fold + 3:fold + 9 + 1]
        ]
        foldParameter = " --folds test=train" + str(
            fold) + ":devel=train" + ",train".join(
                develFolds) + ":train=train" + ",train".join(trainFolds)
        command = "python " + os.path.join(
            mainTEESDir, "train.py"
        ) + " -t DDI13 -o %o/%j -c " + connection + " --clearAll" + foldParameter
        batch(command,
              input=None,
              connection=connection,
              jobTag="DDI13-fold" + str(fold),
              output=output,
              debug=True,
              dummy=dummy)
예제 #3
0
def generate_model(training_set_path = TRAINING_SET_PATH, validation_set_path = VALIDATION_SET_PATH, \
    training_dictionary_path = TRAINING_DICTIONARY_PATH, validation_dictionary_path = VALIDATION_DICTIONARY_PATH, \
    json_path = JSON_PATH, h5_path = H5_PATH):
    # fix random seed for reproducibility
    np.random.seed(int(time.time()))
    # get training batch
    x_t, y_t = batch.batch(training_set_path, training_dictionary_path)
    # get validation batch
    x_v, y_v = batch.batch(validation_set_path, validation_dictionary_path)
    print("- Training and validation sets imported." +  str(x_t.shape))
    # create model
    model = Sequential()
    model.add(LSTM(units_first_layer, return_sequences=True, stateful=False, batch_input_shape = (None, x_t.shape[1], x_t.shape[2])))
    model.add(LSTM(units_second_layer, return_sequences=True, stateful=False))
    model.add(LSTM(units_third_layer, stateful=False))
    # add dropout to control for overfitting
    model.add(Dropout(.25))
    # squash output onto number of classes in probability space
    model.add(Dense(classes, activation='softmax'))
    # compile the model
    model.compile(loss='categorical_crossentropy', optimizer = 'adam', metrics=['accuracy'])
    # Fit the model
    print(model.fit(x_t, y_t, epochs = epochs, validation_data=(x_v, y_v)))
    # export model
    # serialize model to JSON
    model_json = model.to_json()
    with open(json_path, "w") as json_file:
        json_file.write(model_json)
    # serialize weights to HDF5
    model.save_weights(h5_path)
    print("- Model exported to disk.")
    return model
예제 #4
0
파일: Release.py 프로젝트: ninjin/TEES
def buildModels(output, tasks, connection, dummy=False):
    """
    Build the release models.
    
    This function should be run on the cluster, so the connection argument is the
    same for both the batch system and the train-program it runs.
    """
    global mainTEESDir
    from batch import batch
    for task in tasks:
        taskName = task
        if task in ["GE", "GE09"]:
            taskName += ".2"
        command = "python " + os.path.join(mainTEESDir, "train.py") + " -t " + taskName + " -o %o/%j -c " + connection + " --clearAll"
        batch(command, input=None, connection=connection, jobTag=task, output=output, debug=True, dummy=dummy)
예제 #5
0
def evaluateWordSort(accuracyStats, model, epoch):
    """Evaluate after a train epoch"""
    print('Epoch [{}] -- Evaluate'.format(epoch))

    x_val, y_ref, text_in = batch(sentenceData, 8)
    y_out, _ = model(x_val, y_ref, teacher_force_ratio=0.)
    y_out = y_out.permute(1, 0)
    compareBatchAccuracy(accuracyStats, text_in, y_ref, y_out)
예제 #6
0
def buildDDI13(output,
               connection,
               dummy=False,
               numFolds=10,
               extraParameters="",
               testPath=""):
    global mainTEESDir
    from batch import batch

    commandBase = "python " + os.path.join(
        mainTEESDir,
        "train.py") + " -t DDI13 -o %o/%j -c " + connection + " --clearAll"
    for fold in range(numFolds):
        develFolds = [
            str(x)
            for x in (range(numFolds) + range(numFolds))[fold + 1:fold + 2 + 1]
        ]
        trainFolds = [
            str(x)
            for x in (range(numFolds) + range(numFolds))[fold + 3:fold + 9 + 1]
        ]
        foldParameter = " --folds test=train" + str(
            fold) + ":devel=train" + ",train".join(
                develFolds) + ":train=train" + ",train".join(trainFolds)
        command = commandBase + foldParameter + " " + extraParameters
        batch(command.strip(),
              input=None,
              connection=connection,
              jobTag="DDI13-fold" + str(fold),
              output=output,
              debug=True,
              dummy=dummy)

    if testPath != "":
        testFolds = " --folds devel=train0,train1,train2,train3,train4:train=train5,train6,train7,train8,train9"
        testCommand = commandBase + testFolds + " --testFile " + os.path.join(
            testPath, "DDI13-test-task9.1.xml") + " " + extraParameters
        batch(testCommand.strip(),
              input=None,
              connection=connection,
              jobTag="DDI13-test9.1",
              output=output,
              debug=True,
              dummy=dummy)
        testCommand = commandBase + testFolds + " --testFile " + os.path.join(
            testPath, "DDI13-test-task9.2.xml") + " " + extraParameters
        batch(testCommand.strip(),
              input=None,
              connection=connection,
              jobTag="DDI13-test9.2",
              output=output,
              debug=True,
              dummy=dummy)
예제 #7
0
def evaluateWordSort(model, epoch):
    """Evaluate after a train epoch"""
    print('Epoch [{}] -- Evaluate'.format(epoch))

    x_val, y_val, text_val = batch(sentenceData, 8)
    out, _ = model(x_val, y_val, teacher_force_ratio=0.)
    out = out.permute(1, 0)

    for i in range(out.size(0)):
        print("=============================================")
        print("yref", y_val[i], out[i], y_val[i] - out[i])

        print("orig", text_val[i])
        v = torch.Tensor.cpu(out[i]).numpy()
        print("[", end="")
        for index in v:
            print(text_val[i][index] + " ", end="")

        print("]")
예제 #8
0
def train(pNet, optimizer, epoch, clip=1.):
    """Train single epoch"""
    print('Epoch [{}] -- Train'.format(epoch))
    # x, y, t = batch(BATCH_SIZE)
    start = time.time()
    for step in range(STEPS_PER_EPOCH):
        optimizer.zero_grad()
        x, y, t = batch(sentenceData, BATCH_SIZE)

        # Forward
        out, loss = pNet(x, y)
        # Backward
        loss.backward()
        nn.utils.clip_grad_norm_(pNet.parameters(), clip)
        optimizer.step()
        if (step + 1) % 10 == 0:
            duration = time.time() - start
            print('Epoch [{}] loss: {}  time:{:.2f}'.format(
                epoch, loss.item(), duration))
            start = time.time()
예제 #9
0
파일: window.py 프로젝트: nonsix/u18
 def __init__(self, width, height, libtcod):
     self.width = width
     self.height = height
     self.libtcod = libtcod
     libtcod.console_set_custom_font('arial12x12.png', libtcod.FONT_TYPE_GREYSCALE | libtcod.FONT_LAYOUT_TCOD)
     libtcod.console_init_root(self.width, self.height, 'HALF SLIME 3', False)
     self.game = libtcod.console_new(self.width, self.height)
     self.title = libtcod.console_new(self.width, self.height)
     self.controls = libtcod.console_new(self.width, self.height)
     self.batch = batch(self.game, libtcod)
     self.numWalls = 30
     self.numSlime = 20
     self.gamestate = "Title"
     self.itemType = ['yellow', 'green']
     self.objs = []
     self.char = player(0, 0, '@', self.libtcod.amber, libtcod)
     self.items = []
     self.red = 0
     self.objs.append(self.char)
     self.loadcontent()
예제 #10
0
파일: Release.py 프로젝트: DUT-LiuYang/TEES
def buildDDI13(output, connection, dummy=False, numFolds=10, extraParameters="", testPath=""):
    global mainTEESDir
    from batch import batch
    
    commandBase = "python " + os.path.join(mainTEESDir, "train.py") + " -t DDI13 -o %o/%j -c " + connection + " --clearAll"
    for fold in range(numFolds):
        develFolds = [str(x) for x in (range(numFolds) + range(numFolds))[fold+1:fold+2+1] ]
        trainFolds = [str(x) for x in (range(numFolds) + range(numFolds))[fold+3:fold+9+1] ]
        foldParameter = " --folds test=train" + str(fold) + ":devel=train" + ",train".join(develFolds) + ":train=train" + ",train".join(trainFolds)
        command = commandBase + foldParameter + " " + extraParameters
        batch(command.strip(), input=None, connection=connection, jobTag="DDI13-fold" + str(fold), output=output, debug=True, dummy=dummy)
    
    if testPath != "":
        testFolds = " --folds devel=train0,train1,train2,train3,train4:train=train5,train6,train7,train8,train9"
        testCommand = commandBase + testFolds + " --testFile " + os.path.join(testPath, "DDI13-test-task9.1.xml") + " " + extraParameters
        batch(testCommand.strip(), input=None, connection=connection, jobTag="DDI13-test9.1", output=output, debug=True, dummy=dummy)
        testCommand = commandBase + testFolds + " --testFile " + os.path.join(testPath, "DDI13-test-task9.2.xml") + " " + extraParameters
        batch(testCommand.strip(), input=None, connection=connection, jobTag="DDI13-test9.2", output=output, debug=True, dummy=dummy)
예제 #11
0
def word2vec(data, num_skips, skip_window, 
             batch_size,  hidden_size, vocab_size,
             learning_rate, n_epochs, num_sampled):    

    # generate target-label pairs for specific sentence, 'data' -> 'one sentence'
    
    def generate_skip(data):
    
        data_index = 0
        
        size = 2*skip_window*len(data)
        
        assert num_skips <= 2 * skip_window
        
        centers = np.ndarray(shape=(size), dtype=np.int32)
        
        labels = np.ndarray(shape=(size, 1), dtype=np.int32)
        
        span = 2 * skip_window + 1  # [structure: skip_window, target , skip_window ]
        
        buffer = collections.deque(maxlen=span)
        
        for _ in range(span):
        
            buffer.append(data[data_index])
            
            data_index = (data_index + 1) % len(data)
            
        for i in range(size // num_skips):
        
            target = skip_window  # target label at the center of the buffer
            
            targets_to_avoid = [skip_window]
            
            for j in range(num_skips):
            
                while target in targets_to_avoid:
                
                    target = random.randint(0, span - 1)
                    
                targets_to_avoid.append(target)
                
                centers[i * num_skips + j] = buffer[skip_window]
                
                labels[i * num_skips + j, 0] = buffer[target]
                
            buffer.append(data[data_index])
            
            data_index = (data_index + 1) % len(data) # Move one forward
            
        return centers, labels


    # produce skip pairs for each sentence then concatenate   
    
    centers = []
    
    targets = []
    
    for sequence in data:
    
        centers_element, targets_element = generate_skip(sequence)
        
        centers.append(centers_element)
        
        targets.append(targets_element)
        
    centers = list(np.concatenate(centers))
    
    targets = list(np.concatenate(targets))
    
    """Tensorflow Infrastructure"""
    
    # Step 1: define the placeholders for input and output 
            
    with tf.name_scope("batch_data"):
    
        center_words = tf.placeholder(tf.int32, shape=[batch_size], name='center_words')
        
        target_words = tf.placeholder(tf.int32, shape=[batch_size, 1], name='target_words')
    
    # Step 2: define weights. In word2vec, embed matrix is the weight matrix

    with tf.name_scope("embed"):
    
        embed_matrix = tf.Variable(tf.random_uniform([vocab_size, hidden_size], -1.0, 1.0), name = "embed_matrix")
    
    # Step 3+4: define inference + loss function

    with tf.name_scope("loss"):
        
        embed = tf.nn.embedding_lookup(embed_matrix, center_words, name = "embed")
        
        nce_weight = tf.Variable(tf.truncated_normal([vocab_size, hidden_size], 
                                                 stddev = 1.0/math.sqrt(hidden_size)), name = "nce_weight" )
        
        nce_bias = tf.Variable(tf.zeros([vocab_size]), name = "bias")
        
        loss = tf.reduce_mean(tf.nn.nce_loss(weights = nce_weight, biases = nce_bias, 
                                         labels = target_words, inputs=embed, 
                                         num_sampled = num_sampled, num_classes = vocab_size, name = "loss"))
    
    # Step 5: define optimizer

    with tf.name_scope("optimizer"):
        
        optimizer = tf.train.AdagradOptimizer(learning_rate).minimize(loss)


    
    # Step 6: run the graph

    with tf.Session() as sess:

        sess.run(tf.global_variables_initializer())
        
        current_epoch = 0
        
        print('Initialized, now begin Word2Vec!')
        
        writer = tf.summary.FileWriter('./my_graph', sess.graph)
        
        batch_ = batch(centers, targets, batch_size)
        
        for current_epoch in range(n_epochs):
        
            centers_batch, targets_batch = batch_.next_(current_epoch)
            
            feed = {center_words: centers_batch, target_words: targets_batch}
            
            loss_, _ = sess.run([loss, optimizer], feed_dict = feed)
            
            if (current_epoch+1) % 10000 == 0:
            
                print('The loss for {} iteration is {}'.format(current_epoch, loss_))
                
            current_epoch += 1
            
        embedding = sess.run(embed_matrix, feed_dict = feed)    
        
        writer.close()
        
    return embedding 
예제 #12
0
 def newBatch(self, batchID, batchName="", dataPath="", iniRtList=list()):
     self.batchList.append(
         batch.batch(batchID, self, batchName, dataPath, initRtList))
예제 #13
0
파일: start.py 프로젝트: gerbaudo/statsTA
                 allEnsembles if options.ensembles=='all' else
                 options.ensembles.split(','))

    ensSlice = ((None,None) if not options.ensSlice else tuple(int(i) for i in options.ensSlice.split(':')) if ':' in options.ensSlice else (int(options.ensSlice),1+int(options.ensSlice)))
    templates = ((0,0) if not options.templates else tuple(int(i) for i in options.templates.split(':')) if ':' in options.templates else (int(options.templates),1+int(options.templates)))
    
    if options.batch:
        chunksize = 10
        stack = []
        for part in partitions:
            syschunks = chunk(systs, chunksize)
            enschunks = chunktuple(ensSlice, chunksize)
            tmpchunks = chunktuple(templates, chunksize)
            if syschunks: stack.extend(["./start.py --partition %s --systematics %s"%(part, ','.join(s)) for s in syschunks])
            if templates: stack.extend(["./start.py --partition %s --templates %d:%d"%((part,)+t) for t in tmpchunks])
            if enschunks: stack.extend(["./start.py --partition %s --ensembles %s --ensSlice %d:%d"%((part, e)+s) for s in enschunks for e in ensembles])
            if not any([syschunks,tmpchunks,enschunks]): stack.append("./start.py --partition %s"%part)
        batch.batch(stack, site=options.site)
    else:
        for part in partitions:
            for tID in ([None] if templates[0]==templates[1] else 
                        range(channel_data.nTemplates)[slice(*templates)]):
                mp = systematics.measurement_pars(partition=part)
                mp.update({'doVis':options.visualize,
                           'evalSystematics':systs if tID == None else [],
                           'ensembles':ensembles if tID == None else [],
                           'ensSlice':ensSlice,
                           'templateID':tID})
                print mp
                measurement(**mp)
예제 #14
0
                                file=file):
            if file:
                link.link_file(sub_dir, card_dir, file, max_backup=max_backup)
            else:
                link.link_files(sub_dir, card_dir, max_backup=max_backup)
    elif args.subcommand == 'unlink':
        if file:
            link.unlink_file(file)
        else:
            function = link.unlink_file
    elif args.subcommand == 'backup':
        if file:
            backup.backup(file, max_backup=max_backup)
        else:
            function = backup.backup
    else:  # elif args.subcommand == 'restore':
        backup.restore(file, args.number)

    try:
        if args.batch:
            batch.batch(function, card_dir, max_backup=max_backup)
        elif args.batch_region:
            batch.batch_region(function,
                               base_dir,
                               region,
                               max_backup=max_backup)
        else:
            batch.batch_all(function, base_dir, max_backup=max_backup)
    except AttributeError:
        pass