예제 #1
0
def predict(file_name):

    num = []

    cap = cv2.VideoCapture(file_name)

    file_name = (file_name.split(".mp4"))[0]
    ## Creating folder to save all the 100 frames from the video
    try:
        os.makedirs("ImageData/testingData/" + file_name)
    except OSError:
        print("Error: Creating directory of data")

    ## Setting the frame limit to 100
    cap.set(cv2.CAP_PROP_FRAME_COUNT, 101)
    length = 101
    count = 0
    ## Running a loop to each frame and saving it in the created folder
    while cap.isOpened():
        count += 1
        if length == count:
            break
        _, frame = cap.read()
        if frame is None:
            continue

        ## Resizing it to 256*256 to save the disk space and fit into the model
        frame = cv2.resize(frame, (256, 256), interpolation=cv2.INTER_CUBIC)
        # Saves image of the current frame in jpg file
        name = ("ImageData/testingData/" + str(file_name) + "/frame" +
                str(count) + ".jpg")
        cv2.imwrite(name, frame)

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    addrs = []

    def load_image(addr):
        img = np.array(Image.open(addr).resize((224, 224), Image.ANTIALIAS))
        img = img.astype(np.uint8)
        return img

    def _float_feature(value):
        return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

    def _bytes_feature(value):
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

    addrs = []

    filelist = glob.glob("ImageData/testingData/" + str(file_name) + "/*.jpg")
    addrs += filelist

    train_addrs = addrs
    train_filename = "test.tfrecords"  # address to save the TFRecords file
    writer = tf.python_io.TFRecordWriter(train_filename)
    for i in range(len(train_addrs)):
        # Load the image
        img = load_image(train_addrs[i])
        feature = {
            "test/image": _bytes_feature(tf.compat.as_bytes(img.tostring()))
        }
        # Create an example protocol buffer
        example = tf.train.Example(features=tf.train.Features(feature=feature))

        # Serialize to string and write on the file
        writer.write(example.SerializeToString())

    writer.close()
    sys.stdout.flush()

    BATCH_SIZE = 20
    REG_PENALTY = 0
    NUM_IMAGES = 100
    N_EPOCHS = 1

    imgs = tf.placeholder("float", [None, 224, 224, 3],
                          name="image_placeholder")
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8,
                                allow_growth=True)
    config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options)

    with tf.Session(config=config) as sess:

        model = DAN(imgs, REG_PENALTY=REG_PENALTY, preprocess="vggface")
        tr_reader = tf.TFRecordReader()
        tr_filename_queue = tf.train.string_input_producer(["test.tfrecords"],
                                                           num_epochs=N_EPOCHS)
        _, tr_serialized_example = tr_reader.read(tr_filename_queue)
        tr_feature = {"test/image": tf.FixedLenFeature([], tf.string)}
        tr_features = tf.parse_single_example(tr_serialized_example,
                                              features=tr_feature)

        tr_image = tf.decode_raw(tr_features["test/image"], tf.uint8)
        tr_image = tf.reshape(tr_image, [224, 224, 3])
        tr_images = tf.train.shuffle_batch(
            [tr_image],
            batch_size=BATCH_SIZE,
            capacity=100,
            min_after_dequeue=BATCH_SIZE,
            allow_smaller_final_batch=True,
        )
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        sess.run(init_op)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        file_list = ["param1.pkl", "param2.pkl"]
        epoch = 0
        for pickle_file in file_list:
            error = 0
            model.load_trained_model(pickle_file, sess)
            i = 0
            while i < NUM_IMAGES:
                i += BATCH_SIZE
                try:
                    epoch_x = sess.run(tr_images)
                except:
                    if error >= 5:
                        break
                    error += 1
                    continue
                output = sess.run([model.output],
                                  feed_dict={imgs: epoch_x.astype(np.float32)})
                num.append(output[0])
            epoch += 1
        coord.request_stop()
        # Wait for threads to stop
        coord.join(threads)
    a = np.round(np.mean(np.concatenate(num), axis=0), 3)
    a_json = {
        "Extraversion": a[0],
        "Neuroticism": a[1],
        "Agreeableness": a[2],
        "Conscientiousness": a[3],
        "Openness": a[4],
    }
    return a_json
예제 #2
0
from dan import DAN

def train(epochs, model, opt, loss_fn, device='cuda:0'):
    device = torch.device(device)
    model = model.to(device)
    dummy_data = [(torch.randn(64, 100), torch.randn(64, 1)) for i in range(100)]
    for i in range(epochs):
        for dd in dummy_data:
            x, y = dd
            x = x.to(device)
            y = y.to(device)
            out = model(x)
            loss = loss_fn(out, y)
            opt.zero_grad()
            loss.backward()
            opt.step()
            
        print(f'Epoch: {i+1}/{epochs}, loss: {loss.item()}')
        
def seed_everything(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    
if __name__ == '__main__':
    seed_everything(42)

    model = DAN(100, 1, act=nn.ReLU())
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    loss_fn = nn.MSELoss()

    train(100, model, optimizer, loss_fn)
    def init_config(self,  config):
        self.model_path = config.get("model_path",None)
        self.model_list = config.get("model_list", None)
        self.model_config_path = config.get("model_config_path", None)
        if self.model_config_path is not None:
            if sys.version_info < (3, ):
                self.model_config = pkl.load(open(self.model_config_path, "rb"))
            else:
                self.model_config = pkl.load(open(self.model_config_path, "rb"), encoding="iso-8859-1")
            print("----succeeded in reading model config pkl-----")
        else:

            self.model_config = {
                # "embed_path":self.model_path,
                # "SiameseLSTM_l1":{
                #     "model":SiameseLSTM(),
                #     "model_config_path":os.path.join(self.model_path, "siamese_lstm_l1_contrastive_metric"),
                #     "embed_path":os.path.join(self.model_path, "siamese_lstm_l1_contrastive_metric"),
                #     "updated_config":{"gpu_ratio":0.9, 
                #                     "device":"/gpu:0",
                #                     "model_id":"18000"}
                # },
                "SiameseLSTM_l2":{
                    "model":SiameseLSTM(),
                    "model_config_path":os.path.join(self.model_path, "siamese_lstm_l2_contrastive_metric"),
                    "embed_path":os.path.join(self.model_path, "siamese_lstm_l2_contrastive_metric"),
                    "updated_config":{"gpu_ratio":0.9, 
                                    "device":"/gpu:0",
                                    "model_id":"21000"}
                },
                "SiameseCNN":{
                    "model":SiameseCNN(),
                    "model_config_path":os.path.join(self.model_path, "siamese_cnn"),
                    "embed_path":os.path.join(self.model_path, "siamese_cnn"),
                    "updated_config":{"gpu_ratio":0.9, 
                                    "device":"/gpu:0",
                                    "model_id":"1200"}
                },
                # "BIMPM":{
                #     "model":BIMPM(),
                #     "model_config_path":os.path.join(self.model_path, "bimpm"),
                #     "embed_path":os.path.join(self.model_path, "bimpm"),
                #     "updated_config":{"gpu_ratio":0.9, 
                #                     "device":"/gpu:0",
                #                     "model_id":"4800"}
                # },
                # "BIMPM_focal_loss":{
                #     "model":BIMPM(),
                #     "model_config_path":os.path.join(self.model_path, "bimpm_focal_loss"),
                #     "embed_path":os.path.join(self.model_path, "bimpm_focal_loss"),
                #     "updated_config":{"gpu_ratio":0.9, 
                #                     "device":"/gpu:0"}
                # },
                "MatchPyramid":{
                    "model":MatchPyramid(),
                    "model_config_path":os.path.join(self.model_path, "match_pyramid"),
                    "embed_path":os.path.join(self.model_path, "match_pyramid"),
                    "updated_config":{"gpu_ratio":0.9, 
                                    "device":"/gpu:0",
                                    "model_id":"18000"}
                },
                # "MatchPyramid_focal_loss":{
                #     "model":MatchPyramid(),
                #     "model_config_path":os.path.join(self.model_path, "match_pyramid_focal_loss"),
                #     "embed_path":os.path.join(self.model_path, "match_pyramid_focal_loss"),
                #     "updated_config":{"gpu_ratio":0.9, 
                #                     "device":"/gpu:0"}
                # },
                # "DiSAN":{
                #     "model":DiSAN(),
                #     "model_config_path":os.path.join(self.model_path, "disan"),
                #     "embed_path":os.path.join(self.model_path, "disan"),
                #     "updated_config":{"gpu_ratio":0.9, 
                #                     "device":"/gpu:0",
                #                     "model_id":"72000"}
                # },
                # "BIMPM_NEW":{
                #     "model":BIMPM_NEW(),
                #     "model_config_path":os.path.join(self.model_path, "bimpm_new"),
                #     "embed_path":os.path.join(self.model_path, "bimpm_new"),
                #     "updated_config":{"gpu_ratio":0.9, 
                #                     "device":"/gpu:2",
                #                     "model_id":"9000"}
                # },
                "QACNN":{
                    "model":QACNN(),
                    "model_config_path":os.path.join(self.model_path, "qacnn"),
                    "embed_path":os.path.join(self.model_path, "qacnn"),
                    "updated_config":{"gpu_ratio":0.9, 
                                    "device":"/gpu:2",
                                    "model_id":"14400"}
                },
                "DAN":{
                    "model":DAN(),
                    "model_config_path":os.path.join(self.model_path, "dan"),
                    "embed_path":os.path.join(self.model_path, "dan"),
                    "updated_config":{"gpu_ratio":0.9, 
                                    "device":"/gpu:2",
                                    "model_id":"14400"}
                },
                "BiBLOSA":{
                    "model":BiBLOSA(),
                    "model_config_path":os.path.join(self.model_path, "biblosa"),
                    "embed_path":os.path.join(self.model_path, "biblosa"),
                    "updated_config":{"gpu_ratio":0.9, 
                                    "device":"/gpu:2",
                                    "model_id":"14400"}
                },
                "BiBLOSA_DiSAN":{
                    "model":BiBLOSA(),
                    "model_config_path":os.path.join(self.model_path, "biblosa_disa"),
                    "embed_path":os.path.join(self.model_path, "biblosa_disa"),
                    "updated_config":{"gpu_ratio":0.9, 
                                    "device":"/gpu:2",
                                    "model_id":"14400"}
                },
                "TransformerEncoder":{
                    "model":TransformerEncoder(),
                    "model_config_path":os.path.join(self.model_path, "transformer_encoder"),
                    "embed_path":os.path.join(self.model_path, "transformer_encoder"),
                    "updated_config":{
                        "gpu_ratio":0.9,
                        "device":"/gpu:2",
                        "model_id":"14000"
                    }
                },
                "DANFast":{
                    "model":DANFast(),
                    "model_config_path":os.path.join(self.model_path, "dan_fast"),
                    "embed_path":os.path.join(self.model_path, "dan_fast"),
                    "updated_config":{
                        "gpu_ratio":0.9,
                        "device":"/gpu:2",
                        "model_id":"14000"
                    }
                },
                "LSTMMatchPyramid":{
                    "model":LSTMMatchPyramid(),
                    "model_config_path":os.path.join(self.model_path, "lstm_match_pyramid"),
                    "embed_path":os.path.join(self.model_path, "lstm_match_pyramid"),
                    "updated_config":{
                        "gpu_ratio":0.9,
                        "device":"/gpu:2",
                        "model_id":"14000"
                    }
                }
            }
            pkl.dump(self.model_config, open(os.path.join(self.model_path, "semantic_model_config_new.pkl"), "wb"))
예제 #4
0
# NUM_VID = 500
NUM_IMAGES = 599900
NUM_TEST_IMAGES = 199900
# NUM_IMAGES = 10000
# NUM_TEST_IMAGES = 4000
N_EPOCHS = 1

imgs = tf.placeholder('float', [None, 224, 224, 3], name="image_placeholder")
values = tf.placeholder('float', [None, 5], name="value_placeholder")

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.8
with tf.Session(config=config) as sess:

    model = DAN(imgs, REG_PENALTY=REG_PENALTY, preprocess='vggface')
    # output = model.output

    tr_reader = tf.TFRecordReader()
    tr_filename_queue = tf.train.string_input_producer(
        ['train_full.tfrecords'], num_epochs=N_EPOCHS)
    _, tr_serialized_example = tr_reader.read(tr_filename_queue)
    # Decode the record read by the reader
    tr_feature = {
        'train/image': tf.FixedLenFeature([], tf.string),
        'train/label': tf.FixedLenFeature([], tf.string)
    }
    tr_features = tf.parse_single_example(tr_serialized_example,
                                          features=tr_feature)
    # Convert the image data from string back to the numbers
    tr_image = tf.decode_raw(tr_features['train/image'], tf.uint8)
예제 #5
0
def visualize_loop(args, val_loader):

    image_feature_size = 512
    lidar_feature_size = 1024

    if args.model_type == 'SAN':
        question_feat_size = 512
        model = SAN(args,
                    question_feat_size,
                    image_feature_size,
                    lidar_feature_size,
                    num_classes=34,
                    qa=None,
                    encoder=args.encoder_type,
                    method='hierarchical')
    if args.model_type == 'MCB':
        question_feat_size = 512
        model = MCB(args,
                    question_feat_size,
                    image_feature_size,
                    lidar_feature_size,
                    num_classes=34,
                    qa=None,
                    encoder=args.encoder_type,
                    method='hierarchical')
    if args.model_type == 'MFB':
        question_feat_size = 512
        # image_feature_size=512
        model = MFB(args,
                    question_feat_size,
                    image_feature_size,
                    lidar_feature_size,
                    num_classes=34,
                    qa=None,
                    encoder=args.encoder_type,
                    method='hierarchical')
    if args.model_type == 'MLB':
        question_feat_size = 1024
        image_feature_size = 512
        model = MLB(args,
                    question_feat_size,
                    image_feature_size,
                    lidar_feature_size,
                    num_classes=34,
                    qa=None,
                    encoder=args.encoder_type,
                    method='hierarchical')
    if args.model_type == 'MUTAN':
        question_feat_size = 1024
        image_feature_size = 512
        model = MUTAN(args,
                      question_feat_size,
                      image_feature_size,
                      lidar_feature_size,
                      num_classes=34,
                      qa=None,
                      encoder=args.encoder_type,
                      method='hierarchical')
    if args.model_type == 'DAN':
        question_feat_size = 512
        model = DAN(args,
                    question_feat_size,
                    image_feature_size,
                    lidar_feature_size,
                    num_classes=34,
                    qa=None,
                    encoder=args.encoder_type,
                    method='hierarchical')

    data = load_weights(args, model, optimizer=None)
    if type(data) == list:
        model, optimizer, start_epoch, loss, accuracy = data
        print("Loaded  weights")
        print("Epoch: %d, loss: %.3f, Accuracy: %.4f " %
              (start_epoch, loss, accuracy),
              flush=True)
    else:
        print(" error occured while loading model training freshly")
        model = data
        return

    ###########################################################################multiple GPU use#
    # if torch.cuda.device_count() > 1:
    #     print("Using ", torch.cuda.device_count(), "GPUs!")
    #     model = nn.DataParallel(model)

    model.to(device=args.device)
    model.eval()

    import argoverse
    from argoverse.data_loading.argoverse_tracking_loader import ArgoverseTrackingLoader
    from argoverse.utils.json_utils import read_json_file
    from argoverse.map_representation.map_api import ArgoverseMap

    vocab = load_vocab(os.path.join(args.input_base, args.vocab))
    argoverse_loader = ArgoverseTrackingLoader(
        '../../../Data/train/argoverse-tracking')

    k = 1
    with torch.no_grad():
        for data in tqdm(val_loader):
            question, image_feature, ques_lengths, point_set, answer, image_name = data
            question = question.to(device=args.device)
            ques_lengths = ques_lengths.to(device=args.device)
            image_feature = image_feature.to(device=args.device)
            point_set = point_set.to(device=args.device)

            pred, wgt, energies = model(question, image_feature, ques_lengths,
                                        point_set)

            question = question.cpu().data.numpy()
            answer = answer.cpu().data.numpy()
            pred = F.softmax(pred, dim=1)
            pred = torch.argmax(pred, dim=1)
            pred = np.asarray(pred.cpu().data)
            wgt = wgt.cpu().data.numpy()
            energies = energies.squeeze(1).cpu().data.numpy()
            ques_lengths = ques_lengths.cpu().data.numpy()
            pat = re.compile(r'(.*)@(.*)')
            _, keep = np.where([answer == pred])
            temp_batch_size = question.shape[0]
            for b in range(temp_batch_size):
                q = get_ques(question[b], ques_lengths[b], vocab)
                ans = get_ans(answer[b])
                pred_ans = get_ans(pred[b])
                # print(q,ans)
                c = list(re.findall(pat, image_name[b]))[0]
                log_id = c[0]
                idx = int(c[1])
                print(k)
                argoverse_data = argoverse_loader.get(log_id)
                if args.model_type == 'SAN':
                    plot_att(argoverse_data, idx, wgt[b, :, 1, :], energies[b],
                             q, ans, args.save_dir, k, pred_ans)
                if args.model_type == 'MCB':
                    plot_att(argoverse_data, idx, wgt[b], energies[b], q, ans,
                             args.save_dir, k, pred_ans)
                if args.model_type == 'MFB':
                    plot_att(argoverse_data, idx, wgt[b, :, :, 1], energies[b],
                             q, ans, args.save_dir, k, pred_ans)
                if args.model_type == 'MLB':
                    plot_att(argoverse_data, idx, wgt[b, :, 3, :], energies[b],
                             q, ans, args.save_dir, k, pred_ans)
                if args.model_type == 'MUTAN':  #only two glimpses
                    plot_att(argoverse_data, idx, wgt[b, :, 1, :], energies[b],
                             q, ans, args.save_dir, k, pred_ans)
                if args.model_type == 'DAN':  #only two memory
                    plot_att(argoverse_data, idx, wgt[b, :, 1, :], energies[b],
                             q, ans, args.save_dir, k, pred_ans)

                k = k + 1
예제 #6
0
            "/data/xuht/guoxin/poc/duplicate_sentence_model/duplicate_models/dan/emb_mat.pkl",
            "wb"))

    import json
    json.dump(
        config,
        open(
            "/data/xuht/guoxin/poc/duplicate_sentence_model/duplicate_models/dan/config.json",
            "w"))

    api = ModelAPI(
        "/data/xuht/guoxin/poc/duplicate_sentence_model/duplicate_models/dan",
        "/data/xuht/guoxin/poc/duplicate_sentence_model/duplicate_models/dan")

    api.load_config()
    model = DAN()
    api.build_graph(model)

    api.train_step([
        train_anchor_matrix, train_check_matrix, train_label_matrix,
        train_anchor_len_matrix, train_check_len_matrix
    ], [
        dev_anchor_matrix, dev_check_matrix, dev_label_matrix,
        dev_anchor_len_matrix, dev_check_len_matrix
    ])

elif model_type == "transformer_encoder":
    os.environ["CUDA_VISIBLE_DEVICES"] = "3"
    config = {
        "vocab_size": vocab_size,
        "max_length": 200,