def predict(file_name): num = [] cap = cv2.VideoCapture(file_name) file_name = (file_name.split(".mp4"))[0] ## Creating folder to save all the 100 frames from the video try: os.makedirs("ImageData/testingData/" + file_name) except OSError: print("Error: Creating directory of data") ## Setting the frame limit to 100 cap.set(cv2.CAP_PROP_FRAME_COUNT, 101) length = 101 count = 0 ## Running a loop to each frame and saving it in the created folder while cap.isOpened(): count += 1 if length == count: break _, frame = cap.read() if frame is None: continue ## Resizing it to 256*256 to save the disk space and fit into the model frame = cv2.resize(frame, (256, 256), interpolation=cv2.INTER_CUBIC) # Saves image of the current frame in jpg file name = ("ImageData/testingData/" + str(file_name) + "/frame" + str(count) + ".jpg") cv2.imwrite(name, frame) if cv2.waitKey(1) & 0xFF == ord("q"): break addrs = [] def load_image(addr): img = np.array(Image.open(addr).resize((224, 224), Image.ANTIALIAS)) img = img.astype(np.uint8) return img def _float_feature(value): return tf.train.Feature(float_list=tf.train.FloatList(value=[value])) def _bytes_feature(value): return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) addrs = [] filelist = glob.glob("ImageData/testingData/" + str(file_name) + "/*.jpg") addrs += filelist train_addrs = addrs train_filename = "test.tfrecords" # address to save the TFRecords file writer = tf.python_io.TFRecordWriter(train_filename) for i in range(len(train_addrs)): # Load the image img = load_image(train_addrs[i]) feature = { "test/image": _bytes_feature(tf.compat.as_bytes(img.tostring())) } # Create an example protocol buffer example = tf.train.Example(features=tf.train.Features(feature=feature)) # Serialize to string and write on the file writer.write(example.SerializeToString()) writer.close() sys.stdout.flush() BATCH_SIZE = 20 REG_PENALTY = 0 NUM_IMAGES = 100 N_EPOCHS = 1 imgs = tf.placeholder("float", [None, 224, 224, 3], name="image_placeholder") gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8, allow_growth=True) config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) with tf.Session(config=config) as sess: model = DAN(imgs, REG_PENALTY=REG_PENALTY, preprocess="vggface") tr_reader = tf.TFRecordReader() tr_filename_queue = tf.train.string_input_producer(["test.tfrecords"], num_epochs=N_EPOCHS) _, tr_serialized_example = tr_reader.read(tr_filename_queue) tr_feature = {"test/image": tf.FixedLenFeature([], tf.string)} tr_features = tf.parse_single_example(tr_serialized_example, features=tr_feature) tr_image = tf.decode_raw(tr_features["test/image"], tf.uint8) tr_image = tf.reshape(tr_image, [224, 224, 3]) tr_images = tf.train.shuffle_batch( [tr_image], batch_size=BATCH_SIZE, capacity=100, min_after_dequeue=BATCH_SIZE, allow_smaller_final_batch=True, ) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) file_list = ["param1.pkl", "param2.pkl"] epoch = 0 for pickle_file in file_list: error = 0 model.load_trained_model(pickle_file, sess) i = 0 while i < NUM_IMAGES: i += BATCH_SIZE try: epoch_x = sess.run(tr_images) except: if error >= 5: break error += 1 continue output = sess.run([model.output], feed_dict={imgs: epoch_x.astype(np.float32)}) num.append(output[0]) epoch += 1 coord.request_stop() # Wait for threads to stop coord.join(threads) a = np.round(np.mean(np.concatenate(num), axis=0), 3) a_json = { "Extraversion": a[0], "Neuroticism": a[1], "Agreeableness": a[2], "Conscientiousness": a[3], "Openness": a[4], } return a_json
from dan import DAN def train(epochs, model, opt, loss_fn, device='cuda:0'): device = torch.device(device) model = model.to(device) dummy_data = [(torch.randn(64, 100), torch.randn(64, 1)) for i in range(100)] for i in range(epochs): for dd in dummy_data: x, y = dd x = x.to(device) y = y.to(device) out = model(x) loss = loss_fn(out, y) opt.zero_grad() loss.backward() opt.step() print(f'Epoch: {i+1}/{epochs}, loss: {loss.item()}') def seed_everything(seed): torch.manual_seed(seed) torch.cuda.manual_seed(seed) if __name__ == '__main__': seed_everything(42) model = DAN(100, 1, act=nn.ReLU()) optimizer = optim.Adam(model.parameters(), lr=1e-3) loss_fn = nn.MSELoss() train(100, model, optimizer, loss_fn)
def init_config(self, config): self.model_path = config.get("model_path",None) self.model_list = config.get("model_list", None) self.model_config_path = config.get("model_config_path", None) if self.model_config_path is not None: if sys.version_info < (3, ): self.model_config = pkl.load(open(self.model_config_path, "rb")) else: self.model_config = pkl.load(open(self.model_config_path, "rb"), encoding="iso-8859-1") print("----succeeded in reading model config pkl-----") else: self.model_config = { # "embed_path":self.model_path, # "SiameseLSTM_l1":{ # "model":SiameseLSTM(), # "model_config_path":os.path.join(self.model_path, "siamese_lstm_l1_contrastive_metric"), # "embed_path":os.path.join(self.model_path, "siamese_lstm_l1_contrastive_metric"), # "updated_config":{"gpu_ratio":0.9, # "device":"/gpu:0", # "model_id":"18000"} # }, "SiameseLSTM_l2":{ "model":SiameseLSTM(), "model_config_path":os.path.join(self.model_path, "siamese_lstm_l2_contrastive_metric"), "embed_path":os.path.join(self.model_path, "siamese_lstm_l2_contrastive_metric"), "updated_config":{"gpu_ratio":0.9, "device":"/gpu:0", "model_id":"21000"} }, "SiameseCNN":{ "model":SiameseCNN(), "model_config_path":os.path.join(self.model_path, "siamese_cnn"), "embed_path":os.path.join(self.model_path, "siamese_cnn"), "updated_config":{"gpu_ratio":0.9, "device":"/gpu:0", "model_id":"1200"} }, # "BIMPM":{ # "model":BIMPM(), # "model_config_path":os.path.join(self.model_path, "bimpm"), # "embed_path":os.path.join(self.model_path, "bimpm"), # "updated_config":{"gpu_ratio":0.9, # "device":"/gpu:0", # "model_id":"4800"} # }, # "BIMPM_focal_loss":{ # "model":BIMPM(), # "model_config_path":os.path.join(self.model_path, "bimpm_focal_loss"), # "embed_path":os.path.join(self.model_path, "bimpm_focal_loss"), # "updated_config":{"gpu_ratio":0.9, # "device":"/gpu:0"} # }, "MatchPyramid":{ "model":MatchPyramid(), "model_config_path":os.path.join(self.model_path, "match_pyramid"), "embed_path":os.path.join(self.model_path, "match_pyramid"), "updated_config":{"gpu_ratio":0.9, "device":"/gpu:0", "model_id":"18000"} }, # "MatchPyramid_focal_loss":{ # "model":MatchPyramid(), # "model_config_path":os.path.join(self.model_path, "match_pyramid_focal_loss"), # "embed_path":os.path.join(self.model_path, "match_pyramid_focal_loss"), # "updated_config":{"gpu_ratio":0.9, # "device":"/gpu:0"} # }, # "DiSAN":{ # "model":DiSAN(), # "model_config_path":os.path.join(self.model_path, "disan"), # "embed_path":os.path.join(self.model_path, "disan"), # "updated_config":{"gpu_ratio":0.9, # "device":"/gpu:0", # "model_id":"72000"} # }, # "BIMPM_NEW":{ # "model":BIMPM_NEW(), # "model_config_path":os.path.join(self.model_path, "bimpm_new"), # "embed_path":os.path.join(self.model_path, "bimpm_new"), # "updated_config":{"gpu_ratio":0.9, # "device":"/gpu:2", # "model_id":"9000"} # }, "QACNN":{ "model":QACNN(), "model_config_path":os.path.join(self.model_path, "qacnn"), "embed_path":os.path.join(self.model_path, "qacnn"), "updated_config":{"gpu_ratio":0.9, "device":"/gpu:2", "model_id":"14400"} }, "DAN":{ "model":DAN(), "model_config_path":os.path.join(self.model_path, "dan"), "embed_path":os.path.join(self.model_path, "dan"), "updated_config":{"gpu_ratio":0.9, "device":"/gpu:2", "model_id":"14400"} }, "BiBLOSA":{ "model":BiBLOSA(), "model_config_path":os.path.join(self.model_path, "biblosa"), "embed_path":os.path.join(self.model_path, "biblosa"), "updated_config":{"gpu_ratio":0.9, "device":"/gpu:2", "model_id":"14400"} }, "BiBLOSA_DiSAN":{ "model":BiBLOSA(), "model_config_path":os.path.join(self.model_path, "biblosa_disa"), "embed_path":os.path.join(self.model_path, "biblosa_disa"), "updated_config":{"gpu_ratio":0.9, "device":"/gpu:2", "model_id":"14400"} }, "TransformerEncoder":{ "model":TransformerEncoder(), "model_config_path":os.path.join(self.model_path, "transformer_encoder"), "embed_path":os.path.join(self.model_path, "transformer_encoder"), "updated_config":{ "gpu_ratio":0.9, "device":"/gpu:2", "model_id":"14000" } }, "DANFast":{ "model":DANFast(), "model_config_path":os.path.join(self.model_path, "dan_fast"), "embed_path":os.path.join(self.model_path, "dan_fast"), "updated_config":{ "gpu_ratio":0.9, "device":"/gpu:2", "model_id":"14000" } }, "LSTMMatchPyramid":{ "model":LSTMMatchPyramid(), "model_config_path":os.path.join(self.model_path, "lstm_match_pyramid"), "embed_path":os.path.join(self.model_path, "lstm_match_pyramid"), "updated_config":{ "gpu_ratio":0.9, "device":"/gpu:2", "model_id":"14000" } } } pkl.dump(self.model_config, open(os.path.join(self.model_path, "semantic_model_config_new.pkl"), "wb"))
# NUM_VID = 500 NUM_IMAGES = 599900 NUM_TEST_IMAGES = 199900 # NUM_IMAGES = 10000 # NUM_TEST_IMAGES = 4000 N_EPOCHS = 1 imgs = tf.placeholder('float', [None, 224, 224, 3], name="image_placeholder") values = tf.placeholder('float', [None, 5], name="value_placeholder") config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.8 with tf.Session(config=config) as sess: model = DAN(imgs, REG_PENALTY=REG_PENALTY, preprocess='vggface') # output = model.output tr_reader = tf.TFRecordReader() tr_filename_queue = tf.train.string_input_producer( ['train_full.tfrecords'], num_epochs=N_EPOCHS) _, tr_serialized_example = tr_reader.read(tr_filename_queue) # Decode the record read by the reader tr_feature = { 'train/image': tf.FixedLenFeature([], tf.string), 'train/label': tf.FixedLenFeature([], tf.string) } tr_features = tf.parse_single_example(tr_serialized_example, features=tr_feature) # Convert the image data from string back to the numbers tr_image = tf.decode_raw(tr_features['train/image'], tf.uint8)
def visualize_loop(args, val_loader): image_feature_size = 512 lidar_feature_size = 1024 if args.model_type == 'SAN': question_feat_size = 512 model = SAN(args, question_feat_size, image_feature_size, lidar_feature_size, num_classes=34, qa=None, encoder=args.encoder_type, method='hierarchical') if args.model_type == 'MCB': question_feat_size = 512 model = MCB(args, question_feat_size, image_feature_size, lidar_feature_size, num_classes=34, qa=None, encoder=args.encoder_type, method='hierarchical') if args.model_type == 'MFB': question_feat_size = 512 # image_feature_size=512 model = MFB(args, question_feat_size, image_feature_size, lidar_feature_size, num_classes=34, qa=None, encoder=args.encoder_type, method='hierarchical') if args.model_type == 'MLB': question_feat_size = 1024 image_feature_size = 512 model = MLB(args, question_feat_size, image_feature_size, lidar_feature_size, num_classes=34, qa=None, encoder=args.encoder_type, method='hierarchical') if args.model_type == 'MUTAN': question_feat_size = 1024 image_feature_size = 512 model = MUTAN(args, question_feat_size, image_feature_size, lidar_feature_size, num_classes=34, qa=None, encoder=args.encoder_type, method='hierarchical') if args.model_type == 'DAN': question_feat_size = 512 model = DAN(args, question_feat_size, image_feature_size, lidar_feature_size, num_classes=34, qa=None, encoder=args.encoder_type, method='hierarchical') data = load_weights(args, model, optimizer=None) if type(data) == list: model, optimizer, start_epoch, loss, accuracy = data print("Loaded weights") print("Epoch: %d, loss: %.3f, Accuracy: %.4f " % (start_epoch, loss, accuracy), flush=True) else: print(" error occured while loading model training freshly") model = data return ###########################################################################multiple GPU use# # if torch.cuda.device_count() > 1: # print("Using ", torch.cuda.device_count(), "GPUs!") # model = nn.DataParallel(model) model.to(device=args.device) model.eval() import argoverse from argoverse.data_loading.argoverse_tracking_loader import ArgoverseTrackingLoader from argoverse.utils.json_utils import read_json_file from argoverse.map_representation.map_api import ArgoverseMap vocab = load_vocab(os.path.join(args.input_base, args.vocab)) argoverse_loader = ArgoverseTrackingLoader( '../../../Data/train/argoverse-tracking') k = 1 with torch.no_grad(): for data in tqdm(val_loader): question, image_feature, ques_lengths, point_set, answer, image_name = data question = question.to(device=args.device) ques_lengths = ques_lengths.to(device=args.device) image_feature = image_feature.to(device=args.device) point_set = point_set.to(device=args.device) pred, wgt, energies = model(question, image_feature, ques_lengths, point_set) question = question.cpu().data.numpy() answer = answer.cpu().data.numpy() pred = F.softmax(pred, dim=1) pred = torch.argmax(pred, dim=1) pred = np.asarray(pred.cpu().data) wgt = wgt.cpu().data.numpy() energies = energies.squeeze(1).cpu().data.numpy() ques_lengths = ques_lengths.cpu().data.numpy() pat = re.compile(r'(.*)@(.*)') _, keep = np.where([answer == pred]) temp_batch_size = question.shape[0] for b in range(temp_batch_size): q = get_ques(question[b], ques_lengths[b], vocab) ans = get_ans(answer[b]) pred_ans = get_ans(pred[b]) # print(q,ans) c = list(re.findall(pat, image_name[b]))[0] log_id = c[0] idx = int(c[1]) print(k) argoverse_data = argoverse_loader.get(log_id) if args.model_type == 'SAN': plot_att(argoverse_data, idx, wgt[b, :, 1, :], energies[b], q, ans, args.save_dir, k, pred_ans) if args.model_type == 'MCB': plot_att(argoverse_data, idx, wgt[b], energies[b], q, ans, args.save_dir, k, pred_ans) if args.model_type == 'MFB': plot_att(argoverse_data, idx, wgt[b, :, :, 1], energies[b], q, ans, args.save_dir, k, pred_ans) if args.model_type == 'MLB': plot_att(argoverse_data, idx, wgt[b, :, 3, :], energies[b], q, ans, args.save_dir, k, pred_ans) if args.model_type == 'MUTAN': #only two glimpses plot_att(argoverse_data, idx, wgt[b, :, 1, :], energies[b], q, ans, args.save_dir, k, pred_ans) if args.model_type == 'DAN': #only two memory plot_att(argoverse_data, idx, wgt[b, :, 1, :], energies[b], q, ans, args.save_dir, k, pred_ans) k = k + 1
"/data/xuht/guoxin/poc/duplicate_sentence_model/duplicate_models/dan/emb_mat.pkl", "wb")) import json json.dump( config, open( "/data/xuht/guoxin/poc/duplicate_sentence_model/duplicate_models/dan/config.json", "w")) api = ModelAPI( "/data/xuht/guoxin/poc/duplicate_sentence_model/duplicate_models/dan", "/data/xuht/guoxin/poc/duplicate_sentence_model/duplicate_models/dan") api.load_config() model = DAN() api.build_graph(model) api.train_step([ train_anchor_matrix, train_check_matrix, train_label_matrix, train_anchor_len_matrix, train_check_len_matrix ], [ dev_anchor_matrix, dev_check_matrix, dev_label_matrix, dev_anchor_len_matrix, dev_check_len_matrix ]) elif model_type == "transformer_encoder": os.environ["CUDA_VISIBLE_DEVICES"] = "3" config = { "vocab_size": vocab_size, "max_length": 200,