예제 #1
0
파일: main.py 프로젝트: mchorton/vsrseg
def main(args):
    parser = argparse.ArgumentParser(
        description=("Run deep models for visual semantic role segmentation "
                     "(or detection)"))
    parser.add_argument("mode", help="Mode to run model in (e.g. 'train')")
    parser.add_argument("-s",
                        "--save_dir",
                        help="directory for saving the model",
                        default="saved_models/%s" %
                        dt.datetime.now().strftime("%Y_%m_%d_%H_%M_%S"))
    parser.add_argument("-e",
                        "--epochs",
                        help="number of epochs for training",
                        type=int,
                        default=50)
    parser.add_argument("-p",
                        "--save_per",
                        help="epochs to wait before saving",
                        type=int,
                        default=5)
    parser.add_argument("-l",
                        "--learn_rate",
                        help="learning rate",
                        type=float,
                        default=0.001)
    parser.add_argument("-c",
                        "--cuda",
                        type=int,
                        nargs="+",
                        help="ids of gpus to use during training",
                        default=[])
    parser.add_argument("-f",
                        "--fake",
                        action="store_true",
                        help=("flag to use fake data that loads quickly (for"
                              "development purposes)"))
    parser.add_argument(
        "--net",
        help="file in which model is stored. Used in test mode.",
        default=None)
    cfg = parser.parse_args(args)

    if cfg.mode == 'train':
        model = md.CtxBB()
        if cfg.fake:
            dataloader = get_fake_loader()
        else:
            dataloader = ld.get_loader("vcoco_train", ld.COCO_IMGDIR)
        trainer = md.BasicTrainer(model, dataloader, **vars(cfg))
        logging.getLogger(__name__).info("Beginning Training...")
        trainer.train(cfg.epochs)
    elif cfg.mode == 'test':
        checkpoint = torch.load(cfg.net)
        model = checkpoint["model"]
        evaluator = ev.Evaluator(**vars(cfg))
        ev.do_eval(evaluator, model, "vcoco_val", cfg.save_dir)

    else:
        logging.getLogger(__name__).error("Invalid mode '%s'" % str(cfg.mode))
        sys.exit(1)
예제 #2
0
    def trainClassifier(self):
        print '-------------------------------'
        outputPrefix = self.readField(self.config, self.name,
                                      "output_directory")
        outputDir = os.path.join(outputPrefix, self.name)
        if not os.path.exists(outputDir):
            os.mkdir(outputDir)
        maxEpoch = int(self.readField(self.config, self.name, "max_pt_epoch"))
        trainSize = int(
            self.readField(self.config, self.name, "classifier_train_size"))
        numBatch = int(trainSize / (self.batchsize))

        #         self.jsae.addAE(pretrain='mse')
        trainData = []
        valData = []
        testData = []
        trainDH = []
        output = None

        for i in xrange(self.modalsCnt):
            n = self.names[i]
            s = self.saes[i]

            t = self.readField(s.ae[1].config, s.ae[1].name, "train_data")
            trainData.append(gp.garray(np.load(t)))

            t = self.readField(s.ae[1].config, s.ae[1].name, "validation_data")
            valData.append(gp.garray(np.load(t)))

            t = self.readField(s.ae[1].config, s.ae[1].name, "train_data")
            trainDH.append(
                DataHandler(t, output, s.ae[1].vDim, s.ae[-1].hDim,
                            self.batchsize, numBatch))

        t = self.readField(self.config, self.name, "train_label")
        cat_cnt = int(self.readField(self.config, self.name, "cat_cnt"))
        labelDH = DataHandler(t, output, cat_cnt, cat_cnt, self.batchsize,
                              numBatch)

        evalFreq = int(self.readField(self.config, self.name, "eval_freq"))

        if evalFreq != 0:
            qsize = int(self.readField(self.config, self.name, "query_size"))
            labelPath = self.readField(self.config, self.name, "val_label")
            label = np.load(labelPath)
            print "path: ", labelPath
            trainLabelPath = self.readField(self.config, self.name,
                                            "train_label")
            trainLabel = np.load(trainLabelPath)
            queryPath = self.readField(self.config, self.name, "query")
            validation = evaluate.Evaluator(queryPath,
                                            label,
                                            os.path.join(outputDir, 'perf'),
                                            self.name,
                                            query_size=qsize,
                                            verbose=self.verbose)
            validation.setTrainLabel(trainLabel)

        testlabelPath = self.readField(self.config, self.name, "test_label")
        testlabel = np.load(testlabelPath)
        print "path: ", testlabelPath
        for i in xrange(self.modalsCnt):
            n = self.names[i]
            s = self.saes[i]

            t = self.readField(s.ae[1].config, s.ae[1].name, "test_data")
            testData.append(gp.garray(np.load(t)))
        test = evaluate.Evaluator(queryPath,
                                  testlabel,
                                  os.path.join(outputDir, 'perf'),
                                  self.name,
                                  query_size=qsize,
                                  verbose=self.verbose)
        test.setTrainLabel(trainLabel)

        print '>>>>>>>>>>>>>>>>>>>>>>pre-training the unfolded network<<<<<<<<<<<<<<<<<<<<'
        diff_cost = 0
        rec_cost = 0.1
        for epoch in range(maxEpoch):
            print 'depth is: ', self.jdepth - 1
            #             perf=np.zeros( nMetric)
            perf = 0
            for i in xrange(self.modalsCnt):
                trainDH[i].reset()
            labelDH.reset()

            print "epoch: ", epoch
            for i in range(numBatch):

                trainbatch = []

                for m in xrange(self.modalsCnt):
                    trainbatch.append(trainDH[m].getOneBatch())
                labelbatch = labelDH.getOneBatch()

                #                 for m in xrange(self.modalsCnt):
                #                     print trainbatch[m].shape
                #                 print labelbatch

                #use imgcost and txt cost
                curr, g, jg = self.trainClassifierOneBatch(trainbatch,
                                                           labelbatch,
                                                           epoch,
                                                           diff_cost=diff_cost,
                                                           recf=rec_cost)
                perf += curr

                for m in xrange(self.modalsCnt):
                    self.saes[m].updateParams(epoch,
                                              g[m],
                                              self.saes[m].ae,
                                              backprop=True)
                if self.has_joint:
                    self.jsae.updateParams(epoch,
                                           jg,
                                           self.jsae.ae,
                                           backprop=True)


#                 perf=self.aggregatePerf(perf, curr)

#             print 'perf is: ', perf
#             if evalFreq!=0 and (1+epoch) % evalFreq == 0:
#                 ele=self.getMMReps(valData)
#                 validation.evalClassification(ele, label, epoch, self.name, metric='euclidean')
#         print 'test:'
#         ele=self.getMMReps(testData)
#         test.evalClassification(ele, testlabel, epoch, self.name, metric='euclidean')
예제 #3
0
 def test_do_eval(self):
     self.model = md.TestCtxBB()
     evaluator = ev.Evaluator(cuda=[])
     ev.do_eval(evaluator, self.model, "vcoco_val", self.test_dir)
예제 #4
0
    def train(self):
        outputPrefix = self.readField(self.config, self.name,
                                      "output_directory")
        outputDir = os.path.join(outputPrefix, self.name)
        if not os.path.exists(outputDir):
            os.mkdir(outputDir)
        maxEpoch = int(self.readField(self.config, self.name, "max_epoch"))
        trainSize = int(self.readField(self.config, self.name, "train_size"))
        print "train size is: ", trainSize
        numBatch = int(trainSize / self.batchsize)

        normalizeImg = self.str2bool(
            self.readField(self.config, self.name, "normalize"))

        sim1 = []
        sim2 = []
        dis1 = []
        dis2 = []
        trainData = []
        valData = []
        queryData = []
        testData = []

        for i in xrange(self.modalsCnt):
            n = self.names[i]
            s = self.saes[i]

            #             if self.readField(self.config, self.name,"extract_reps")=="True":
            #                 output = self.readField(s.ae[-1].config, s.ae[-1].name, "train_reps")
            #             else:
            output = None

            t = self.readField(self.config, self.name, "sim" + n + "1")
            sim1.append(
                DataHandler(t, output, s.ae[1].vDim, s.ae[-1].hDim,
                            self.batchsize, numBatch))

            t = self.readField(self.config, self.name, "sim" + n + "2")
            sim2.append(
                DataHandler(t, output, s.ae[1].vDim, s.ae[-1].hDim,
                            self.batchsize, numBatch))

            t = self.readField(self.config, self.name, "dis" + n + "1")
            dis1.append(
                DataHandler(t, output, s.ae[1].vDim, s.ae[-1].hDim,
                            self.batchsize, numBatch))

            t = self.readField(self.config, self.name, "dis" + n + "2")
            dis2.append(
                DataHandler(t, output, s.ae[1].vDim, s.ae[-1].hDim,
                            self.batchsize, numBatch))

        showFreq = int(self.readField(self.config, self.name, "show_freq"))
        if showFreq > 0:
            visDir = os.path.join(outputDir, "vis")
            if not os.path.exists(visDir):
                os.makedirs(visDir)
            simpath = os.path.join(outputDir, "tmpVis", "sim")
            if not os.path.exists(simpath):
                os.makedirs(simpath)
            dispath = os.path.join(outputDir, "tmpVis", "dis")
            if not os.path.exists(dispath):
                os.makedirs(dispath)

        evalFreq = int(self.readField(self.config, self.name, "eval_freq"))
        if evalFreq != 0:
            qsize = int(self.readField(self.config, self.name, "query_size"))
            labelPath = self.readField(self.config, self.name, "val_label")
            label = np.load(labelPath)
            print "path: ", labelPath
            trainLabelPath = self.readField(self.config, self.name,
                                            "train_label")
            trainLabel = np.load(trainLabelPath)
            queryPath = self.readField(self.config, self.name, "query")

            for i in xrange(self.modalsCnt):
                n = self.names[i]
                s = self.saes[i]

                t = self.readField(s.ae[1].config, s.ae[1].name, "train_data")
                trainData.append(gp.garray(np.load(t)))

                t = self.readField(s.ae[1].config, s.ae[1].name,
                                   "validation_data")
                valData.append(gp.garray(np.load(t)))

        vallabelPath = self.readField(self.config, self.name, "val_label")
        vallabel = np.load(vallabelPath)
        testlabelPath = self.readField(self.config, self.name, "test_label")
        testlabel = np.load(testlabelPath)
        querylabelPath = self.readField(self.config, self.name, "query_label")
        querylabel = np.load(querylabelPath)

        for i in xrange(self.modalsCnt):
            n = self.names[i]
            s = self.saes[i]

            t = self.readField(s.ae[1].config, s.ae[1].name, "query_data")
            queryData.append(gp.garray(np.load(t)))
            t = self.readField(s.ae[1].config, s.ae[1].name, "test_data")
            testData.append(gp.garray(np.load(t)))

#         else:
#             print "Warning: no evaluation setting!"

        nCommon, nMetric, title = self.getDisplayFields()
        if self.verbose:
            print title

        print "params: ", len(self.params)
        rem, sdc, ddc, rc = self.getNextParams()
        while rem > 0:
            print rem, sdc, ddc, rc
            rem, sdc, ddc, rc = self.getNextParams()
        self.paramInd = 0
        rem, sim_diffcost, dis_diffcost, reccost = self.getNextParams()

        while rem > 0:
            if evalFreq != 0:
                validation = evaluate.Evaluator(queryPath,
                                                vallabel,
                                                os.path.join(
                                                    outputDir, 'perf', 'val'),
                                                self.name,
                                                query_size=qsize,
                                                verbose=self.verbose)
                validation.setTrainLabel(vallabel)

            test = evaluate.Evaluator(queryPath,
                                      querylabel,
                                      os.path.join(outputDir, 'perf', 'test'),
                                      self.name,
                                      query_size=qsize,
                                      verbose=self.verbose)
            test.setTrainLabel(testlabel)

            self.jdepth = self.ijdepth
            self.sparsityFactor = 0
            #pretrain
            self.trainClassifier()

            print 'testing pretrained model with parameters:', sim_diffcost, dis_diffcost, reccost
            ele = self.getMMReps(queryData)
            ele2 = self.getMMReps(testData)
            test.evalSingleModal2(ele,
                                  ele2,
                                  maxEpoch,
                                  self.name,
                                  metric='euclidean')
            test.saveTarget(ele, ele2, metric='euclidean')

            for self.jdepth in xrange(self.ijdepth, self.max_jdepth + 1):

                self.sparsityFactor = 0
                for epoch in range(maxEpoch):
                    print 'depth is: ', self.jdepth - 1
                    for i in xrange(self.modalsCnt):
                        sim1[i].reset()
                        sim2[i].reset()
                        dis1[i].reset()
                        dis2[i].reset()
                    print "epoch: ", epoch
                    for i in range(numBatch):

                        sim1batch = []
                        sim2batch = []
                        dis1batch = []
                        dis2batch = []

                        for m in xrange(self.modalsCnt):
                            sim1batch.append(sim1[m].getOneBatch())
                            sim2batch.append(sim2[m].getOneBatch())
                            dis1batch.append(dis1[m].getOneBatch())
                            dis2batch.append(dis2[m].getOneBatch())

                        #use imgcost and txt cost
                        curr, gs, jgs = self.trainOnePair(
                            sim1batch, sim2batch, True, epoch, reccost,
                            sim_diffcost, dis_diffcost)
                        curr2, gd, jgd = self.trainOnePair(
                            dis1batch, dis2batch, False, epoch, reccost,
                            sim_diffcost, dis_diffcost)

                        g = [[] for x in gs]
                        for m in xrange(self.modalsCnt):
                            g[m] = [[] for x in gs[m]]
                            for i in xrange(len(gs[m])):
                                g[m][i] = gs[m][i] + gd[m][i]

                        if self.has_joint:
                            jg = [[] for x in jgs]
                            for i in xrange(len(jgs)):
                                jg[i] = jgs[i] + jgd[i]

                        for m in xrange(self.modalsCnt):
                            self.saes[m].updateParams(epoch, g[m],
                                                      self.saes[m].ae)
                        if self.has_joint:
                            self.jsae.updateParams(epoch, jg, self.jsae.ae)
    #                     perf=self.aggregatePerf(perf, curr)

                    if evalFreq != 0 and (1 + epoch) % evalFreq == 0:
                        ele = self.getMMReps(valData)
                        ele2 = self.getMMReps(valData)
                        validation.evalSingleModal2(ele,
                                                    ele2,
                                                    epoch,
                                                    self.name,
                                                    metric='euclidean')

                if self.has_joint and self.jdepth < self.max_jdepth:
                    self.jsae.addAE()

#             if evalFreq != 0:
#                 test.saveMaps("maps-%d-%.3f-%.3f.npy" % (self.paramInd, diffcost, reccost))

            print 'testing model with parameters:', sim_diffcost, dis_diffcost, reccost
            ele = self.getMMReps(queryData)
            ele2 = self.getMMReps(testData)
            test.evalSingleModal2(ele,
                                  ele2,
                                  maxEpoch,
                                  self.name,
                                  metric='euclidean')
            test.saveTarget(ele, ele2, metric='euclidean')

            ele = self.getMMReps(valData)
            ele2 = self.getMMReps(valData)
            validation.evalSingleModal2(ele,
                                        ele2,
                                        epoch,
                                        self.name,
                                        metric='euclidean')
            validation.saveTarget(ele, ele2, metric='euclidean')
            #
            #             self.initsaes()

            rem, sim_diffcost, dis_diffcost, reccost = self.getNextParams()
            print "ind is: ", self.paramInd

#         if self.readField(self.config, self.name, "checkpoint")=="True":
#             self.doCheckpoint(outputDir)

#         if self.readField(self.config, self.name,"extract_reps")=="True":
#             if evalFreq!=0:
#                 self.extractValidationReps(validateImgData, validateTxtData, "validation_data","validation_reps")
#Uncomment this with new datahandlers
#             self.extractTrainReps(imgTrainDH, txtTrainDH, numBatch)

        self.saveConfig(outputDir)
예제 #5
0
    def train(self):
        outputPrefix = self.readField(self.config, self.name,
                                      "output_directory")
        outputDir = os.path.join(outputPrefix, self.name)
        if not os.path.exists(outputDir):
            os.mkdir(outputDir)

        imageinput = self.readField(self.isae.ae[1].config,
                                    self.isae.ae[1].name, "train_data")
        textinput = self.readField(self.tsae.ae[1].config,
                                   self.tsae.ae[1].name, "train_data")

        if self.readField(self.config, self.name, "extract_reps") == "True":
            imageoutput = self.readField(self.isae.ae[-1].config,
                                         self.isae.ae[-1].name, "train_reps")
            textoutput = self.readField(self.tsae.ae[-1].config,
                                        self.tsae.ae[-1].name, "train_reps")
        else:
            imageoutput = None
            textoutput = None

        maxEpoch = int(self.readField(self.config, self.name, "max_epoch"))
        trainSize = int(self.readField(self.config, self.name, "train_size"))
        numBatch = int(trainSize / self.batchsize)

        normalizeImg = self.str2bool(
            self.readField(self.config, self.name, "normalize"))
        imgTrainDH = DataHandler(imageinput, imageoutput, self.isae.ae[1].vDim,
                                 self.isae.ae[-1].hDim, self.batchsize,
                                 numBatch, normalizeImg)
        txtTrainDH = DataHandler(textinput, textoutput, self.tsae.ae[1].vDim,
                                 self.tsae.ae[-1].hDim, self.batchsize,
                                 numBatch)

        showFreq = int(self.readField(self.config, self.name, "show_freq"))
        if showFreq > 0:
            visDir = os.path.join(outputDir, "vis")
            if not os.path.exists(visDir):
                os.makedirs(visDir)

        evalFreq = int(self.readField(self.config, self.name, "eval_freq"))
        if evalFreq != 0:
            qsize = int(self.readField(self.config, self.name, "query_size"))
            labelPath = self.readField(self.config, self.name, "label")
            label = np.load(labelPath)
            queryPath = self.readField(self.config, self.name, "query")
            validation = evaluate.Evaluator(queryPath,
                                            label,
                                            os.path.join(outputDir, 'perf'),
                                            self.name,
                                            query_size=qsize,
                                            verbose=self.verbose)
            validateImagepath = self.readField(self.isae.ae[1].config,
                                               self.isae.ae[1].name,
                                               "validation_data")
            validateTextpath = self.readField(self.tsae.ae[1].config,
                                              self.tsae.ae[1].name,
                                              "validation_data")
            validateImgData = gp.garray(np.load(validateImagepath))
            if normalizeImg:
                validateImgData = imgTrainDH.doNormalization(validateImgData)
            validateTxtData = gp.garray(np.load(validateTextpath))
        else:
            print "Warning: no evluation setting!"

        nCommon, nMetric, title = self.getDisplayFields()
        if self.verbose:
            print title

        for epoch in range(maxEpoch):
            perf = np.zeros(nMetric)
            epoch1, imgcost, txtcost, diffcost = self.checkPath(epoch)
            imgTrainDH.reset()
            txtTrainDH.reset()
            for i in range(numBatch):
                img = imgTrainDH.getOneBatch()
                txt = txtTrainDH.getOneBatch()
                curr = self.trainOneBatch(img, txt, epoch1, imgcost, txtcost,
                                          diffcost)
                perf = self.aggregatePerf(perf, curr)

            if evalFreq != 0 and (1 + epoch) % evalFreq == 0:
                imgcode, txtcode = self.getReps(validateImgData,
                                                validateTxtData)
                validation.evalCrossModal(imgcode, txtcode, epoch, 'V')

            if showFreq != 0 and (1 + epoch) % showFreq == 0:
                imgcode, txtcode = self.getReps(validateImgData,
                                                validateTxtData)
                np.save(
                    os.path.join(visDir, '%simg' % str(
                        (epoch + 1) / showFreq)), imgcode)
                np.save(
                    os.path.join(visDir, '%stxt' % str(
                        (epoch + 1) / showFreq)), txtcode)

            if self.verbose:
                self.printEpochInfo(epoch, perf, nCommon)

        if self.readField(self.config, self.name, "checkpoint") == "True":
            self.doCheckpoint(outputDir)

        if self.readField(self.config, self.name, "extract_reps") == "True":
            if evalFreq != 0:
                self.extractValidationReps(validateImgData, validateTxtData,
                                           "validation_data",
                                           "validation_reps")
            self.extractTrainReps(imgTrainDH, txtTrainDH, numBatch)

        self.saveConfig(outputDir)
예제 #6
0
def train():

    #Initiate the Neural Network
    net = LPRNet(NUM_CLASS)

    #get the trainn and validation batch size from argument parser
    batch_size = args["batch_size"]
    val_batch_size = args["val_batch_size"]

    #initialize the custom data generator
    #Defined in utils.py
    train_gen = utils.DataIterator(img_dir=args["train_dir"],
                                   batch_size=batch_size)
    val_gen = utils.DataIterator(img_dir=args["val_dir"],
                                 batch_size=val_batch_size)

    #variable intialization used for custom training loop
    train_len = len(next(os.walk(args["train_dir"]))[2])
    val_len = len(next(os.walk(args["val_dir"]))[2])
    print("Train Len is", train_len)
    # BATCH_PER_EPOCH = None
    if batch_size == 1:
        BATCH_PER_EPOCH = train_len
    else:
        BATCH_PER_EPOCH = int(math.ceil(train_len / batch_size))

    #initialize tensorboard
    tensorboard = keras.callbacks.TensorBoard(log_dir='tmp/my_tf_logs',
                                              histogram_freq=0,
                                              batch_size=batch_size,
                                              write_graph=True)

    val_batch_len = int(math.floor(val_len / val_batch_size))
    evaluator = evaluate.Evaluator(val_gen, net, CHARS, val_batch_len,
                                   val_batch_size)
    best_val_loss = float("inf")

    #if a pretrained model is available, load weights from it
    if args["pretrained"]:
        net.load_weights(args["pretrained"])

    model = net.model
    tensorboard.set_model(model)

    #initialize the learning rate
    learning_rate = keras.optimizers.schedules.ExponentialDecay(
        args["lr"],
        decay_steps=args["decay_steps"],
        decay_rate=args["decay_rate"],
        staircase=args["staircase"])

    #define training optimizer
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    print('Training ...')
    train_loss = 0

    #starting the training loop
    for epoch in range(args["train_epochs"]):

        print("Start of epoch {} / {}".format(epoch, args["train_epochs"]))

        #zero out the train_loss and val_loss at the beginning of every loop
        #This helps us track the loss value for every epoch
        train_loss = 0
        val_loss = 0
        start_time = time.time()

        for batch in range(BATCH_PER_EPOCH):
            # print("batch {}/{}".format(batch, BATCH_PER_EPOCH))
            #get a batch of images/labels
            #the labels have to be put into sparse tensor to feed into tf.nn.ctc_loss
            train_inputs, train_targets, train_labels = train_gen.next_batch()
            train_inputs = train_inputs.astype('float32')

            train_targets = tf.SparseTensor(train_targets[0], train_targets[1],
                                            train_targets[2])

            # Open a GradientTape to record the operations run
            # during the forward pass, which enables auto-differentiation.
            with tf.GradientTape() as tape:

                #get model outputs
                logits = model(train_inputs, training=True)

                #next we pass the model outputs into the ctc loss function
                logits = tf.reduce_mean(logits, axis=1)
                logits_shape = tf.shape(logits)
                cur_batch_size = logits_shape[0]
                timesteps = logits_shape[1]
                seq_len = tf.fill([cur_batch_size], timesteps)
                logits = tf.transpose(logits, (1, 0, 2))
                ctc_loss = tf.nn.ctc_loss(labels=train_targets,
                                          inputs=logits,
                                          sequence_length=seq_len)
                loss_value = tf.reduce_mean(ctc_loss)

            #Calculate Gradients and Update it
            grads = tape.gradient(
                ctc_loss,
                model.trainable_weights,
                unconnected_gradients=tf.UnconnectedGradients.NONE)
            optimizer.apply_gradients(zip(grads, model.trainable_weights))
            train_loss += float(loss_value)

        tim = time.time() - start_time

        print("Train loss {}, time {} \n".format(
            float(train_loss / BATCH_PER_EPOCH), tim))
        #run a validation loop in every 25 epoch
        if epoch != 0 and epoch % 25 == 0:
            val_loss = evaluator.evaluate()
            #if the validation loss is less the previous best validation loss, update the saved model
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                net.save_weights(
                    os.path.join(args["saved_dir"], "new_out_model_best.pb"))
                print("Weights updated in {}/{}".format(
                    args["saved_dir"], "new_out_model_best.pb"))

            else:
                print("Validation loss is greater than best_val_loss ")

            # if epoch %500 == 0:
            # 	net.save(os.path.join(args["saved_dir"], f"new_out_model_last_{epoch}.pb"))

    net.save(os.path.join(args["saved_dir"], "new_out_model_last.pb"))
    print("Final Weights saved in {}/{}".format(args["saved_dir"],
                                                "new_out_model_last.pb"))
    tensorboard.on_train_end(None)
예제 #7
0
    intervals2425 = joblib.load(os.path.join(probes2425, path))

intervals45 = evaluate.merge(intervals45)
intervals89 = evaluate.merge(intervals89)
intervals2425 = evaluate.merge(intervals2425)

merged1 = evaluate.merge_all(0, intervals45, intervals89, intervals2425)
merged2 = evaluate.merge_all(1, intervals89, intervals45, intervals2425)
merged3 = evaluate.merge_all(2, intervals2425, intervals89, intervals45)
merged = evaluate.merge({**merged1, **merged2, **merged3})

path_output = "merged-intervals.joblib"
joblib.dump(merged, path_output)

merged_union = evaluate.merge({**intervals45, **intervals89, **intervals2425})
merged_union = evaluate.merge(merged_union)
path_output_union = "merged-intervals-union.joblib"
joblib.dump(merged_union, path_output_union)

merged_without_frequencies_union = {
    t: [v]
    for (t, (_, v)) in merged_union.items()
}
# merged_without_frequencies = {t:[v] for (t,(_,v)) in merged.items()}
_, attack, attack_freq = evaluate.load_attacks()
e = evaluate.Evaluator(attack, attack_freq)
e.evaluate(merged_without_frequencies_union)
# e.evaluate(merged_without_frequencies)
merged_union = e.evaluate_freq(merged_union)
joblib.dump(merged_union, path_output_union)
예제 #8
0
파일: main.py 프로젝트: ardaboluk/BaseML
#extractMnist.show(data[:,96], 28, 28)

# create folds for k-fold cross validation
print("Creating folds for cross validation...")
print()
numFolds = 10
folded_data, folded_labels = folds.generateFolds(data,
                                                 labels,
                                                 numFolds,
                                                 verbose=True)

# cross validation
print("Evaluating the model using cross-validation")
print()
gradientDescent = bgd.BGD(options)
evaluator = evaluate.Evaluator()
roc_points = []
pr_points = []
for i in range(len(folded_labels)):

    print("Fold", i, "as test")

    # split train and test sets
    if i == 0:
        train_data = np.hstack(folded_data[1:numFolds + 1])
        train_labels = np.hstack(folded_labels[1:numFolds + 1])
    elif i == numFolds - 1:
        train_data = np.hstack(folded_data[0:numFolds])
        train_labels = np.hstack(folded_labels[0:numFolds])
    else:
        train_data = np.concatenate((np.hstack(
예제 #9
0
#TOP LEVEL EVALUATE FILE
import sys
sys.path.append("/Users/Prerna/Desktop/Prerna/NTU/Courses-Year4-Sem1/NLP/SemEval15/code");

import evaluate
E = evaluate.Evaluator();
pPath = "UnigramPosSVM_C500.0.txt";
#gPath = "../rawdata/test/gold/twitter-dev-gold-B_rmnotav.tsv";
gPath = "../rawdata/test/AB_SemEval2013_task2_test_fixed/gold/twitter-test-gold-B_500.tsv";
PIdx = 0;
gIdx = 2;
E.evalPrediction(pPath, PIdx, gPath, gIdx);

print "Average Pos & Neg F-measure = ", E.getAvgPosNegFmeasure();
print "Average F-measure = ", E.getAvgFmeasure();
print "Average Precision = ", E.getAvgPrecision();
print "Average Recall = ", E.getAvgRecall();

print "Positive F-measure = ", E.getFmeasure_pos();
print "Negative F-measure = ", E.getFmeasure_neg();
print "Neutral F-measure = ", E.getFmeasure_neu();

print "Positive Precision = ", E.getPrecision_pos();
print "Negative Precision = ", E.getPrecision_neg();
print "Neutral Precision = ", E.getPrecision_neu();

print "Positive Recall = ", E.getRecall_pos();
print "Negative Recall = ", E.getRecall_neg();
print "Neutral Recall = ", E.getRecall_neu();

예제 #10
0
def train(arg_parser):
    '''Create an instance of model with the command line arguments and train it.

    Arguments:
    arg_parser -- An `argparse.ArgumentParser`.
    '''

    args = arg_parser.parse_args()

    if not args.em:
        args.num_samples = 1
    if args.model == 'DistMult':
        Model = distmult_model.DistMultModel
    elif args.model == 'ComplEx':
        Model = complex_model.ComplExModel
    elif args.model in ['supervised', 'supervised_nce']:
        Model = supervised_model.SupervisedModel

    if args.aux_model is not None and args.neg_samples is None:
        raise "ERROR: --aux_model provided but --neg_samples not set."

    if args.aux_model is not None and args.num_samples != 1:
        raise "ERROR: --aux_model currently only implemented for --num_samples 1."

    # Get random seed from system if the user did not specify a random seed.
    if args.rng_seed is None:
        args.rng_seed = int.from_bytes(os.urandom(4), byteorder='little')
    rng = random.Random(args.rng_seed)
    tf.set_random_seed(rng.randint(0, 2**32 - 1))

    # Create the output directory.
    try:
        os.mkdir(args.output)
    except OSError:
        if not args.force:
            sys.stderr.write(
                'ERROR: Cannot create output directory %s\n' % args.output)
            sys.stderr.write(
                'HINT: Does the directory already exist? To prevent accidental data loss this\n'
                '      script, by default, does not write to an existing output directory.\n'
                '      Specify a non-existing output directory or use the `--force`.\n')
            exit(1)
    else:
        print('Writing output into directory `%s`.' % args.output)

    try:
        with open(os.path.join(args.output, 'log'), 'w') as log_file:
            # We write log files in the form of python scripts. This way, log files are both human
            # readable and very easy to parse by different python scripts. We begin log files with
            # a shebang (`#!/usr/bin/python`) so that text editors turn on syntax highlighting.
            log_file.write('#!/usr/bin/python\n')
            log_file.write('\n')

            # Log information about the executing environment to make experiments reproducible.
            log_file.write('program = "%s"\n' % arg_parser.prog)
            log_file.write(
                'args = {\n %s\n}\n\n' % pprint.pformat(vars(args), indent=4)[1:-1])
            try:
                git_revision = subprocess.check_output(
                    ['git', 'rev-parse', 'HEAD']).decode('utf-8').strip()
                log_file.write('git_revision = "%s"\n' % git_revision)
            except:
                pass

            log_file.write('host_name = "%s"\n' % socket.gethostname())
            log_file.write('start_time = "%s"\n' %
                           str(datetime.datetime.now()))
            log_file.write('\n')

            if args.model in ['supervised', 'supervised_nce']:
                dat = SupervisedDataset(
                    args.input, args.validation_points, log_file=log_file,
                    emb_dim=None if args.embedding_dim == 512 else args.embedding_dim)
            else:
                dat = Dataset(
                    args.input, binary_files=args.binary_dataset, log_file=log_file)

            if args.aux_model is None:
                aux_model = None
            elif args.aux_model == 'uniform':
                aux_model = UniformAuxModel(
                    dat, log_file=log_file, supervised=args.model in ['supervised', 'supervised_nce'])
            elif args.aux_model == 'frequency':
                aux_model = FrequencyAuxModel(
                    dat, log_file=log_file, supervised=args.model in [
                        'supervised', 'supervised_nce'],
                    exponent=args.aux_frequency_exponent)
            elif args.model in ['supervised', 'supervised_nce']:
                aux_model = SupervisedDecisionTreeModel(
                    args.aux_model, dat, log_file=log_file)
            else:
                aux_model = DecisionTreeModel(
                    args.aux_model, dat, log_file=log_file)

            model = Model(args, dat, rng, aux_model=aux_model,
                          log_file=log_file)

            session_config = tf.ConfigProto(
                log_device_placement=True)  # TODO: remove
            # session_config = tf.ConfigProto()
            if args.trace:
                session_config.gpu_options.allow_growth = True
            session = tf.Session(config=session_config)

            init_fd = {}
            if args.model in ['supervised', 'supervised_nce']:
                init_fd[model.feed_train_features] = dat.features['train']
            session.run(tf.initializers.global_variables(),
                        feed_dict=init_fd,
                        options=tf.RunOptions(report_tensor_allocations_upon_oom=True))
            del init_fd
            if args.model in ['supervised', 'supervised_nce']:
                del dat.features['train']
            if args.initialize_from is not None:
                load_checkpoint(model, session, args.initialize_from,
                                log_file=log_file)

            if args.model in ['supervised', 'supervised_nce']:
                evaluator = evaluate.SupervisedEvaluator(
                    model, dat, args, log_file=log_file)
            else:
                evaluator = evaluate.Evaluator(
                    model, dat, args, log_file=log_file)

            training_loop(args, model, session, dat, rng, evaluator,
                          log_file=log_file)

            log_file.write('\n')
            log_file.write('end_time = "%s"\n' %
                           str(datetime.datetime.now()))
    except:
        with open(os.path.join(args.output, 'err'), 'w') as err_file:
            traceback.print_exc(file=err_file)
        exit(2)
예제 #11
0
    prefix_result = ""
    folder_file = config.get_config("test_folders")
with open(folder_file) as f:
    folders = f.readlines()

directories = [x.strip() for x in folders]

colors, attack, attack_freq = evaluate.load_attacks()

print(attack)
scores_ex = None
# evaluators = [Evaluator(attack, i) for i in identifiers]
evaluators = []
# if not name_attack:
# evaluators = [Evaluator(attack, i) for i in identifiers]
evaluators.append(evaluate.Evaluator(attack, attack_freq))
# else:
# for n in name_attack:
# evaluators.append(Evaluator(attack, n))
# nb_features = sum(config.get_config_eval("features_number"))
# nb_features_macro = config.get_config_eval("nb_features_macro")
prefix = config.get_config("section")
threshold_autoencoder_number = config.get_config_eval("threshold_autoencoder")
# threshold_macro = config.get_config_eval("threshold_macro")
# threshold_micro = config.get_config_eval("threshold_micro")
# chargement du jeu de données de test micro

# modèle micro

# if use_micro:
#     models = MultiModels()
예제 #12
0
def validate(
    val_loader,
    square_width,
    modality,
    output_directory,
    print_freq,
    test_csv,
    model: torch.nn.Module,
    epoch: int,
    write_to_file: bool = True,
) -> typing.Tuple[Result, Result, Result, np.array, typing.List[MaskedResult],
                  evaluate.Evaluator]:
    average_meter = AverageMeter()
    inside_average_meter = AverageMeter()
    outside_average_meter = AverageMeter()

    # switch to evaluate mode
    model.eval()
    evaluator = evaluate.Evaluator(val_loader.dataset.output_shape,
                                   square_width)
    end = time.time()
    results = []
    for i, (input, target) in enumerate(val_loader):
        input, target = input.cuda(), target.cuda()
        input_var = torch.autograd.Variable(input)
        torch.cuda.synchronize()
        data_time = time.time() - end

        # compute output
        end = time.time()
        depth_pred = model(input_var)
        torch.cuda.synchronize()
        gpu_time = time.time() - end
        # measure accuracy and record loss
        output1 = torch.index_select(depth_pred.data, 1,
                                     torch.cuda.LongTensor([0]))
        evaluator.add_results(output1, target)
        #assume all squares are of same size
        result = MaskedResult(val_loader.dataset.mask_inside_square)
        result.evaluate(output1, target)
        results.append(result)
        average_meter.update(result.result, gpu_time, data_time, input.size(0))
        inside_average_meter.update(result.result_inside, gpu_time, data_time,
                                    input.size(0))
        outside_average_meter.update(result.result_outside, gpu_time,
                                     data_time, input.size(0))
        end = time.time()
        # save 8 images for visualization
        skip = 50
        if modality == 'd':
            img_merge = None
        else:
            if i == 0:
                img_merge = utils.merge_ims_into_row(
                    [input, target, depth_pred], rgbd_action="both")
            elif (i < 8 * skip) and (i % skip == 0):
                row = utils.merge_ims_into_row([input, target, depth_pred],
                                               rgbd_action="both")
                img_merge = utils.add_row(img_merge, row)
            elif i == 8 * skip:
                filename = output_directory + '/comparison_' + str(
                    epoch) + '.png'
                utils.save_image(img_merge, filename)
        average = average_meter.average()
        if (i + 1) % print_freq == 0:
            #print('=> output: {}'.format(output_directory))
            def print_result(result, result_name):
                stdout.write(
                    f'Validation Epoch: {epoch} [{i + 1}/{len(val_loader)}]\t'
                    f"{result_name}: "
                    #f't_Data={data_time:.3f}({average.data_time:.3f}) '
                    #f't_GPU={gpu_time:.3f}({average.gpu_time:.3f}) '
                    f'RMSE={result.rmse:.2f}({average.rmse:.2f}) '
                    f'MAE={result.mae:.2f}({average.mae:.2f}) '
                    f'Delta1={result.delta1:.3f}({average.delta1:.3f}) '
                    #f'REL={result.absrel:.3f}({average.absrel:.3f}) '
                    #f'Lg10={result.lg10:.3f}({average.lg10:.3f}) \n'
                    '\n')

            print_result(result.result, "result")

    avg = average_meter.average()
    avg_inside = inside_average_meter.average()
    avg_outside = outside_average_meter.average()
    avg.name = "average"
    avg_inside.name = "average inside"
    avg_outside.name = "average outside"

    gpu_time = average.gpu_time
    print(f'\n*\n' + str(avg) + "\n" + str(avg_inside) + "\n" +
          str(avg_outside))

    if write_to_file:
        with open(test_csv, 'a') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writerow({
                'mse': avg.mse,
                'rmse': avg.rmse,
                'rmse inside': avg_inside.rmse,
                'rmse outside': avg_outside.rmse,
                'absrel': avg.absrel,
                'absrel inside': avg_inside.absrel,
                'absrel outside': avg_outside.absrel,
                'lg10': avg.lg10,
                'mae': avg.mae,
                'mae inside': avg_inside.mae,
                'mae outside': avg_outside.mae,
                'delta1': avg.delta1,
                'delta1 inside': avg_inside.delta1,
                'delta1 outside': avg_outside.delta1,
                'delta2': avg.delta2,
                'delta3': avg.delta3,
                'gpu_time': avg.gpu_time,
                'data_time': avg.data_time
            })
    evaluator.save_plot(
        os.path.join(output_directory, f"evaluation_epoch{epoch}.png"))

    return avg, avg_inside, avg_outside, img_merge, results, evaluator
예제 #13
0
파일: main.py 프로젝트: romaad/msae
            util.printPerfForAll(args.p[0], 0)
        elif args.p[1] == "recall":
            util.printPerfForAll(args.p[0], 1)
        elif args.p[1] == "precrecall":
            util.printPerfForAll(args.p[0], 2)
        else:
            print "wrong metric, should be 'map' or 'precrecall' or 'recall'"

    #search with real-valued/binary latent features
    if args.s:
        qpath = args.s[0]
        label = np.load(args.s[1])
        metric = args.s[-2]  #'hamming' or 'euclidean'
        qsize = int(args.s[-1])
        #query file will be created if not exists
        searcher = evaluate.Evaluator(qpath, label, "tmp", query_size=qsize)

        if args.s[2].endswith(".mat"):
            dat = scipy.io.loadmat(args.s[2])
            img = dat['Bx_te']
            txt = dat['By_te']
        else:
            img = np.load(args.s[2])
            txt = np.load(args.s[3])
        assert (img.shape == txt.shape)
        #transpose if necessary
        if img.shape[0] < img.shape[1]:
            img = img.T
            txt = txt.T
        searcher.evalCrossModal(img, txt, '', 'T', metric=metric)