def train(params): assert params["mode"].lower() == "train", "change training mode to 'train'" print("Creating the vocab from :", params["vocab_path"]) vocab = Vocab(params["vocab_path"], params["vocab_size"]) print("Creating the embedding_matrix from:", params["vector_path"]) embeddings_matrix = get_embedding(params["vocab_size"], params["embed_size"], vocab, params['vector_path']) tf.compat.v1.logging.info("Building the model ...") model = PGN(params, embeddings_matrix) print("Creating the batcher ...") b = batcher(params["data_dir"], vocab, params) print("Creating the checkpoint manager") checkpoint_dir = "{}".format(params["checkpoint_dir"]) ckpt = tf.train.Checkpoint(step=tf.Variable(0), PGN=model) ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_dir, max_to_keep=11) ckpt.restore(ckpt_manager.latest_checkpoint) if ckpt_manager.latest_checkpoint: print("Restored from {}".format(ckpt_manager.latest_checkpoint)) else: print("Initializing from scratch.") tf.compat.v1.logging.info("Starting the training ...") train_model(model, b, params, ckpt, ckpt_manager, "output.txt")
def test(params): assert params["mode"].lower() in [ "test", "eval" ], "change training mode to 'test' or 'eval'" print(params["beam_size"], params["batch_size"]) assert params["beam_size"] == params[ "batch_size"], "Beam size must be equal to batch_size, change the params" print("Creating the vocab ...") vocab = Vocab(params["vocab_path"], params["vocab_size"]) embeddings_matrix = get_embedding(params["vocab_size"], params["embed_size"], vocab, params['vector_path']) tf.compat.v1.logging.info("Building the model ...") model = PGN(params, embeddings_matrix) print("Creating the batcher ...") b = batcher(params["data_dir"], vocab, params) print("Creating the checkpoint manager") checkpoint_dir = "{}".format(params["checkpoint_dir"]) ckpt = tf.train.Checkpoint(step=tf.Variable(0), PGN=model) ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_dir, max_to_keep=11) path = params["model_path"] if params[ "model_path"] else ckpt_manager.latest_checkpoint ckpt.restore(path) print("Model restored") for batch in b: yield beam_decode(model, batch, vocab, params)
def get_embeddings(): auth_header = request.headers.get("Authorization", " ") access_token = auth_header.split(" ")[1] if authorizeToken(access_token): logger.info('Getting emb results') json_ = json.loads(request.data) pet_type = json_.get('pet_type', "") if pet_type == '' or pet_type not in ['cat', 'dog']: logging.debug( 'No pet type in request, allowed pet types are "cat" and "dog"' ) return 'No pet type in request, allowed pet types are "cat" and "dog"', 400 img_list = json_.get('image', list()) if len(img_list) == 0: logging.debug('No image part') return "please provide image", 400 logger.debug("got image and pet type") img_np = np.array(img_list) img_np = img_np[np.newaxis, ...] emb_model = catface_model if pet_type == 'cat' else dogface_model emb = get_embedding(img_np, emb_model) logger.info("got embeddings") response = jsonify({'emb': emb.tolist(), 'pet_type': pet_type}) return response else: return jsonify({"message": "Authentication failed"}), 401
async def register_face(name: str, refImage: UploadFile = File(...)): face = utils.extract_face(await refImage.read()) mongodb.embeddings.insert_one({ "faceName": name, "embedding": Binary(pickle.dumps(utils.get_embedding(embedder.model, face), protocol=2), subtype=128) }) return {"Success": "Face is registered successfully"}
def process_pet_face(): auth_header = request.headers.get("Authorization", " ") access_token = auth_header.split(" ")[1] if authorizeToken(access_token): logger.info('Processing image results') json_ = json.loads(request.data) pet_type = json_.get('pet_type', []) if len(pet_type) == 0: logging.debug('No pet type in request') return 'No pet type in request', 400 if any([x not in ['cat', 'dog'] for x in pet_type]): logging.debug('allowed pet types are "cat" and "dog"') logging.debug(pet_type) return 'allowed pet types are "cat" and "dog"', 400 file = json_.get('image', "") is_base64 = json_.get("is_base64", False) if file == "" or not is_base64: logging.debug('No image part') return "please provide image", 400 img_b64 = base64.b64decode(file) img_pil = Image.open(io.BytesIO(img_b64)) img_np = np.array(img_pil) logger.debug("got image and pet type") boxes, labels, kpts = predict_od_kpt_helper(img_np, img_pil) logger.info("calculated boxes and kpts") faces, labels = preprocess4embedding(img_np, labels, pet_type, kpts) logger.debug("preprocess and align faces") embs = np.empty((0, 1, 32)) for i, label in enumerate(labels): if label == 'cat': emb_model = catface_model logger.debug("using cat model") else: emb_model = dogface_model logger.debug("using cat model") logger.debug(faces[i][0][0]) emb = get_embedding(faces[i][np.newaxis, ...], emb_model) embs = np.append(embs, [emb], axis=0) logger.info("embeddings calculated") response = jsonify({ "embs": embs.tolist(), "pet_types": labels, 'kpts': kpts.tolist(), 'boxes': boxes.tolist() }) return response else: return jsonify({"message": "Authentication failed"}), 401
def train_model(self, X, y, labels, word_index, MAX_SEQUENCE_LENGTH, model_save_directory='./models/'): """ Train deep learning model """ embedding_matrix, nb_words = get_embedding('glove',word_index) input1 = Input(shape=(MAX_SEQUENCE_LENGTH,)) embedding = Embedding(input_dim=len(embedding_matrix), output_dim=self.embedding_dim, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False)(input1) # embedding = Dropout(self.drop_rate_embedding)(embedding) model = Bidirectional(LSTM(units=self.num_lstm_units, return_sequences=True, recurrent_dropout=self.drop_rate_lstm))(embedding) model = TimeDistributed(Dense(units=self.num_lstm_units, activation=self.activation_function))(model) crf = CRF(units=len(labels)) output1 = crf(model) model = Model(input1,output1) model.compile(optimizer='rmsprop',\ loss=crf.loss_function,\ metrics=[crf.accuracy]) print(model.summary()) early_stopping = EarlyStopping(monitor='val_loss', patience=3) STAMP = 'lstm_%f_%.2f' % (self.num_lstm_units, self.drop_rate_lstm) checkpoint_dir = model_save_directory + 'checkpoints/' + str(int(time())) + '/' if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) with open(bst_model_path+".json", "w") as json_file: json_file.write(model.to_json())
def search(): description = request.args.get('description') address = request.args.get('address') salary = request.args.get('salary') try: limit = int(request.args.get('limit', 20)) offset = int(request.args.get('offset', 0)) except: limit = 20 offset = 0 total = 1000 result = {'jobs': [], 'total': total} if offset > 1000: return jsonify(result) try: salary = int(salary) except: salary = None skills = [] levels = [] if description is not None: skills, levels = parse_description(description) print(skills, levels) embedding = get_embedding(skills, levels, salary) candidate_df = jobs_df if isinstance(address, str): address = address.lower().strip() if address in ADDRESSES: candidate_df = candidate_df[candidate_df['address'] == address].copy() candidate_embeddings = np.asarray(list(candidate_df['embedding'])) candidate_distance = np.sum(np.sqrt( ((candidate_embeddings - embedding) * PROPERTY_WEIGHTS)**2), axis=1) / np.sum(candidate_embeddings, axis=1) candidate_df['distance'] = candidate_distance candidate_df = candidate_df.sort_values(by='distance') candidate_df = candidate_df[SERIALIZE_PROPERTIES] candidate_df = candidate_df.iloc[offset:offset + limit] candidate_result = candidate_df.to_dict('records') result = {'jobs': candidate_result, 'total': total} return jsonify(result)
async def recognize_Face(mainImage: UploadFile = File(...)): # Extract Face face = utils.extract_face(await mainImage.read()) # Generate Embedding embedding = utils.get_embedding(embedder.model, face) # get orginal embedding distance, identityIndex = faiss_utils.searchEmbedding( np.expand_dims(embedding, axis=0)) if (distance < 0.8): prediction = mongodb.embeddings.find()[identityIndex]["faceName"] else: prediction = "Unknown" print(prediction) print(distance) return {"Prediction": str(prediction), "L2Distance": float(distance)}
def main(config = None, lang = 'eng'): # create instance of config if config is None: config = Config() train, word_to_idx = parse_dataset_muse(config.filename_train, config.label_to_idx, pos_target = config.pos_target) dev, word_to_idx = parse_dataset_muse(config.filename_dev, config.label_to_idx, word_to_idx, pos_target = config.pos_target) lang_map = { 'eng': 'en', 'ger': 'de', 'esp': 'es', 'ned': 'nl' } vectors = FastText(aligned=True, cache='.word_vectors_cache', language=lang_map[lang]) # vectors = FastText(aligned = True, cache='.word_vectors_cache') embed_table = get_embedding(vectors, word_to_idx) embedder_muse = MUSEEmbedder(word_to_idx, embed_table) fit(config, embedder_muse, train, dev)
def main(train_data_file, test_data_file, vocab_file, target_file, emb_file, model_save_dir, num_passes=10, batch_size=32): if not os.path.exists(model_save_dir): os.mkdir(model_save_dir) word_dict = load_dict(vocab_file) label_dict = load_dict(target_file) word_vector_values = get_embedding(emb_file) word_dict_len = len(word_dict) label_dict_len = len(label_dict) paddle.init(use_gpu=False, trainer_count=1) # define network topology crf_cost, crf_dec, target = ner_net(word_dict_len, label_dict_len) evaluator.sum(name="error", input=crf_dec) evaluator.chunk( name="ner_chunk", input=crf_dec, label=target, chunk_scheme="IOB", num_chunk_types=(label_dict_len - 1) / 2) # create parameters parameters = paddle.parameters.create(crf_cost) parameters.set("emb", word_vector_values) # create optimizer optimizer = paddle.optimizer.Momentum( momentum=0, learning_rate=2e-4, regularization=paddle.optimizer.L2Regularization(rate=8e-4), gradient_clipping_threshold=25, model_average=paddle.optimizer.ModelAverage( average_window=0.5, max_average_window=10000), ) trainer = paddle.trainer.SGD( cost=crf_cost, parameters=parameters, update_equation=optimizer, extra_layers=crf_dec) train_reader = paddle.batch( paddle.reader.shuffle( reader.data_reader(train_data_file, word_dict, label_dict), buf_size=1000), batch_size=batch_size) test_reader = paddle.batch( paddle.reader.shuffle( reader.data_reader(test_data_file, word_dict, label_dict), buf_size=1000), batch_size=batch_size) feeding = {"word": 0, "mark": 1, "target": 2} def event_handler(event): if isinstance(event, paddle.event.EndIteration): if event.batch_id % 1 == 0: logger.info("Pass %d, Batch %d, Cost %f, %s" % ( event.pass_id, event.batch_id, event.cost, event.metrics)) if event.batch_id % 1 == 0: result = trainer.test(reader=test_reader, feeding=feeding) logger.info("\nTest with Pass %d, Batch %d, %s" % (event.pass_id, event.batch_id, result.metrics)) if isinstance(event, paddle.event.EndPass): # save parameters with gzip.open( os.path.join(model_save_dir, "params_pass_%d.tar.gz" % event.pass_id), "w") as f: parameters.to_tar(f) result = trainer.test(reader=test_reader, feeding=feeding) logger.info("\nTest with Pass %d, %s" % (event.pass_id, result.metrics)) trainer.train( reader=train_reader, event_handler=event_handler, num_passes=num_passes, feeding=feeding)
print("[INFO] Loading Keras Facenet model...") model = facenet_keras_model("model/facenet_keras.h5") model.summary() else: model = face_model((3, 96, 96)) model.summary() print('[INFO] Loading model weights...') load_weights_from_FaceNet(model) # Convert each face image into embedding print("[INFO] Converting faces into embedding...") newTrainX = list() for face_pixels in trainX: embedding = get_embedding(face_pixels, model, args["channels_first"], mode=args["scale"]) newTrainX.append(embedding) newTrainX = np.asarray(newTrainX) print("[INFO] newTrainX shape: {}".format(newTrainX.shape)) newTestX = list() for face_pixels in testX: embedding = get_embedding(face_pixels, model, args["channels_first"], mode=args["scale"]) newTestX.append(embedding)
def main(_): # Load the configuration file. with open(FLAGS.config, 'r') as f: config = yaml.load(f) print('**********', config['experiment_name'],'**********') """ Cuda Check """ if torch.cuda.is_available(): print('Using GPU!') else: print('No GPU!') """ Data Preprocessing """ if config['data_preprocessing']: print('Pre-processing Original Data ...') data_preprocessing() print('Data Pre-processing Done!') """ Read Data & Get Embedding """ train_data = pd.read_csv('input/cleaned_train.csv') test_data = pd.read_csv('input/cleaned_test.csv') # split dataset msk = np.random.rand(len(train_data)) < 0.8 train = train_data[msk] valid = train_data[~msk] all_sents = train_data['s1'].tolist() + train_data['s2'].tolist() + test_data['s1'].tolist() + test_data['s2'].tolist() # dataset trainDS = myDS(train, all_sents) validDS = myDS(valid, all_sents) print('Data size:',train_data.shape[0], test_data.shape[0]) full_embed_path = config['embedding']['full_embedding_path'] cur_embed_path = config['embedding']['cur_embedding_path'] if os.path.exists(cur_embed_path) and not config['make_dict']: embed_dict = load_embed(cur_embed_path) print('Loaded existing embedding.') else: print('Making embedding...') embed_dict = get_embedding(trainDS.vocab._id2word, full_embed_path) save_embed(embed_dict,cur_embed_path) print('Saved generated embedding.') vocab_size = len(embed_dict) # initialize nn embedding embedding = nn.Embedding(vocab_size, config['model']['embed_size']) embed_list = [] for word in trainDS.vocab._id2word: embed_list.append(embed_dict[word]) weight_matrix = np.array(embed_list) # pass weights to nn embedding embedding.weight = nn.Parameter(torch.from_numpy(weight_matrix).type(torch.FloatTensor), requires_grad = False) """ Model Preparation """ # embedding config['embedding_matrix'] = embedding config['vocab_size'] = len(embed_dict) # model siamese = Siamese_lstm(config) print(siamese) # loss func loss_weights = Variable(torch.FloatTensor([1, 3])) if torch.cuda.is_available(): loss_weights = loss_weights.cuda() criterion = torch.nn.CrossEntropyLoss(loss_weights) # optimizer learning_rate = config['training']['learning_rate'] if config['training']['optimizer'] == 'sgd': optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, siamese.parameters()), lr=learning_rate) elif config['training']['optimizer'] == 'adam': optimizer = torch.optim.Adam(filter(lambda x: x.requires_grad, siamese.parameters()), lr=learning_rate) elif config['training']['optimizer'] == 'adadelta': optimizer = torch.optim.Adadelta(filter(lambda x: x.requires_grad, siamese.parameters()), lr=learning_rate) elif config['training']['optimizer'] == 'rmsprop': optimizer = torch.optim.RMSprop(filter(lambda x: x.requires_grad, siamese.parameters()), lr=learning_rate) print('Optimizer:', config['training']['optimizer']) print('Learning rate:', config['training']['learning_rate']) # log info train_log_string = '%s :: Epoch %i :: Iter %i / %i :: train loss: %0.4f' valid_log_string = '%s :: Epoch %i :: valid loss: %0.4f\n' # Restore saved model (if one exists). ckpt_path = os.path.join(config['ckpt_dir'], config['experiment_name']+'.pt') if os.path.exists(ckpt_path): print('Loading checkpoint: %s' % ckpt_path) ckpt = torch.load(ckpt_path) epoch = ckpt['epoch'] siamese.load_state_dict(ckpt['siamese']) optimizer.load_state_dict(ckpt['optimizer']) else: epoch = 1 print('Fresh start!\n') if torch.cuda.is_available(): criterion = criterion.cuda() siamese = siamese.cuda() """ Train """ if config['task'] == 'train': # save every epoch for visualization train_loss_record = [] valid_loss_record = [] best_record = 10.0 # training print('Experiment: {}\n'.format(config['experiment_name'])) while epoch < config['training']['num_epochs']: print('Start Epoch {} Training...'.format(epoch)) # loss train_loss = [] train_loss_sum = [] # dataloader train_dataloader = DataLoader(dataset=trainDS, shuffle=True, num_workers=2, batch_size=1) for idx, data in enumerate(train_dataloader, 0): # get data s1, s2, label = data # clear gradients optimizer.zero_grad() # input output = siamese(s1, s2) output = output.squeeze(0) # label cuda label = Variable(label) if torch.cuda.is_available(): label = label.cuda() # loss backward loss = criterion(output, label) loss.backward() optimizer.step() train_loss.append(loss.data.cpu()) train_loss_sum.append(loss.data.cpu()) # Every once and a while check on the loss if ((idx + 1) % 5000) == 0: print(train_log_string % (datetime.now(), epoch, idx + 1, len(train), np.mean(train_loss))) train_loss = [] # Record at every epoch print('Train Loss at epoch {}: {}\n'.format(epoch, np.mean(train_loss_sum))) train_loss_record.append(np.mean(train_loss_sum)) # Valid print('Epoch {} Validating...'.format(epoch)) # loss valid_loss = [] # dataloader valid_dataloader = DataLoader(dataset=validDS, shuffle=True, num_workers=2, batch_size=1) for idx, data in enumerate(valid_dataloader, 0): # get data s1, s2, label = data # input output = siamese(s1, s2) output = output.squeeze(0) # label cuda label = Variable(label) if torch.cuda.is_available(): label = label.cuda() # loss loss = criterion(output, label) valid_loss.append(loss.data.cpu()) print(valid_log_string % (datetime.now(), epoch, np.mean(valid_loss))) # Record valid_loss_record.append(np.mean(valid_loss)) epoch += 1 if np.mean(valid_loss)-np.mean(train_loss_sum) > 0.02: print("Early Stopping!") break # Keep track of best record if np.mean(valid_loss) < best_record: best_record = np.mean(valid_loss) # save the best model state_dict = { 'epoch': epoch, 'siamese': siamese.state_dict(), 'optimizer': optimizer.state_dict(), } torch.save(state_dict, ckpt_path) print('Model saved!\n') """ Inference """ if config['task'] == 'inference': testDS = mytestDS(test_data, all_sents) # Do not shuffle here test_dataloader = DataLoader(dataset=testDS, num_workers=2, batch_size=1) result = [] for idx, data in enumerate(test_dataloader, 0): # get data s1, s2 = data # input output = siamese(s1,s2) output = output.squeeze(0) # feed output into softmax to get prob prediction sm = nn.Softmax(dim=1) res = sm(output.data)[:,1] result += res.data.tolist() result = pd.DataFrame(result) print(result.shape) print('Inference Done.') res_path = os.path.join(config['result']['filepath'], config['result']['filename']) result.to_csv(res_path, header=False, index=False) print('Result has writtn to', res_path, ', Good Luck!')
import utils import numpy as np import matplotlib.pyplot as plt if __name__ == "__main__": # Similarity matrix triangle_2d = np.ones((3, 3)) # Embeddins in different dimensions embedding_1 = utils.get_embedding(triangle_2d, 1) embedding_2 = utils.get_embedding(triangle_2d, 2) embedding_3 = utils.get_embedding(triangle_2d, 3) print(embedding_1, embedding_2, embedding_3) utils.visualize(embedding_1, 1) utils.visualize(embedding_2, 2) utils.visualize(embedding_3, 3)
# -*- coding: utf-8 -*- """ Created on 2020-09-01 16:57 @Author : Justin Jiang @Email : [email protected] """ from model import LSTMclf from utils import read_data, get_embedding from keras.preprocessing.sequence import pad_sequences from keras.utils import to_categorical import numpy as np if __name__ == '__main__': embedding_dict = get_embedding() print('Preprocessing data...') training_path = '../data/cnews.train.txt' valid_path = '../data/cnews.val.txt' train_labels, train_texts = read_data(training_path) train_x = pad_sequences(train_texts, maxlen=600) train_y = to_categorical(np.asarray(train_labels)) valid_labels, valid_texts = read_data(valid_path) valid_x = pad_sequences(valid_texts, maxlen=600) valid_y = to_categorical(np.asarray(valid_labels)) print("Building model...") lstm_clf = LSTMclf() clf_model = lstm_clf.model()
if __name__ == '__main__': # save image to faces faces = [] labels = [] id = classes[name] dir = "./img-save/" + name + "/" for filename in os.listdir(dir): image = cv2.imread(dir + filename) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = cv2.resize(image, (160, 160)) face_array = np.asarray(image) faces.append(face_array) labels.append(int(id)) faces = np.array(faces) print(faces.shape) # embedding x_train = [] for face in faces: x = get_embedding(model, face) x_train.append(x) x_train = np.array(x_train) y_train = np.array(labels) print(y_train) print(x_train.shape) print(y_train.shape) # save model np.savez_compressed('./data-embedded/' + embedded_data, x_train, y_train) print("Done!")
def __init__(self, segment_model, dim_info, config, init_checkpoint, tokenizer, learning_rate, init_embedding=None): uni_embedding = None bi_embedding = None if init_embedding is not None: uni_embedding = utils.get_embedding(init_embedding, tokenizer.vocab, config.embedding_size) if "bigram_vocab" in tokenizer.__dict__: bi_embedding = utils.get_embedding(init_embedding, tokenizer.bigram_vocab, config.embedding_size) self.input_ids = tf.placeholder( dtype=tf.int64, shape=[None, None, dim_info.feature_dims['input_ids']], name='input_ids') self.input_dicts = tf.placeholder( dtype=tf.int64, shape=[None, None, dim_info.feature_dims['input_dicts']], name='input_dicts') if dim_info.label_dim == 1: self.label_ids = tf.placeholder(dtype=tf.int64, shape=[None, None], name='label_ids') else: self.label_ids = tf.placeholder( dtype=tf.int64, shape=[None, None, dim_info.label_dim], name='label_ids') self.seq_length = tf.placeholder(dtype=tf.int64, shape=[None], name='seq_length') self.dropout_keep_prob = tf.placeholder(dtype=tf.float32, name='dropout_keep_prob') self.learning_rate = tf.Variable(learning_rate, trainable=False) self.new_learning_rate = tf.placeholder(tf.float32, shape=[], name="new_learning_rate") features = { "input_ids": self.input_ids, "input_dicts": self.input_dicts, "label_ids": self.label_ids, "seq_length": self.seq_length } self.model = segment_model(config, features, self.dropout_keep_prob, init_embeddings={ "uni_embedding": uni_embedding, "bi_embedding": bi_embedding }) tvars = tf.trainable_variables() initialized_variable_names = {} if init_checkpoint: (assignment_map, initialized_variable_names ) = model_utils.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: utils.variable_summaries(var) init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) (loss, label_ids, prediction, seq_length) = self.model.get_all_results() l2_reg_lamda = config.l2_reg_lamda clip = 5 with tf.variable_scope('train_op'): self.lr_update = tf.assign(self.learning_rate, self.new_learning_rate) global_step = tf.train.get_or_create_global_step() optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) if l2_reg_lamda > 0: l2_loss = tf.add_n([ tf.nn.l2_loss(v) for v in tvars if (v.get_shape().ndims > 1 and "rate" not in v.name) ]) tf.logging.info("**** L2 Loss Variables ****") for var in tvars: if var.get_shape().ndims > 1 and "rate" not in var.name: tf.logging.info(" name = %s, shape = %s", var.name, var.shape) total_loss = loss + l2_reg_lamda * l2_loss else: total_loss = loss if config.clip_grad: grads, _ = tf.clip_by_global_norm( tf.gradients(total_loss, tvars), clip) train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step) else: train_op = optimizer.minimize(total_loss, global_step=global_step) self.loss = loss self.total_loss = total_loss self.seq_length = seq_length self.prediction = prediction self.train_op = train_op
d_action = 4 is_embdist = not is_truedist is_shapedreward = not is_binaryreward first_task, last_task = 0, 49 for i_task, (start, goal) in enumerate(test_tasks): if i_task < first_task or i_task > last_task: continue print("\n\n### Task %d ###" % i_task) """ Set up """ if is_embdist: env.reset(goal) o_goal = env.render(mode='rgb_array') s_goal = get_embedding(o_goal, model).cpu().numpy() kwargs["emb_goal"] = s_goal policy_net = DQN(d_state, d_action).cuda() target_net = DQN(d_state, d_action).cuda() target_net.load_state_dict(policy_net.state_dict()) target_net.eval() optimizer = optim.RMSprop(policy_net.parameters()) memory = ReplayMemory(len(transitions)) if not os.path.exists(os.path.join(save_path, "%d" % i_task)): os.makedirs(os.path.join(save_path, "%d" % i_task)) """ Push data into memory
from main import verification from model import build_model from utils import capture, get_embedding if __name__ == "__main__": model = build_model() #capture() employees = get_embedding(model) verification(deepface=model, employees=employees)
parser.add_argument('--seed', type=int, default=233, help="random seed for initialization") args = parser.parse_args() logger.info('Args: {}'.format(args)) config = utils.Config(args.model_dir) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') utils.set_seed(args.seed) processor = utils.LcqmcProcessor(args.data_dir, ) label2id = processor.get_label2id() tokenizer = utils.CharTokenizer(args.model_dir) config.vocab_size = len(tokenizer.vocab) embedding = utils.get_embedding( tokenizer.vocab, config.embedding_path, config.embedding_dim) model = models.Esim( config, torch.FloatTensor(embedding)).to(device) logger.info(model) loss_fn = nn.CrossEntropyLoss() if args.do_train: train_examples = processor.get_train_examples() dev_examples = processor.get_dev_examples() train_features = utils.examples_to_ids( train_examples, label2id, tokenizer, max_len=args.max_len, verbose=True) dev_features = utils.examples_to_ids( dev_examples, label2id, tokenizer, max_len=args.max_len, verbose=True) train_dataloader = DataLoader(utils.MyDataset(train_features), batch_size=args.train_batch_size,
def get_embedding_from_row(row): embedding = None embedding = get_embedding(row['skills'], [row['level']], \ (row['salary_min'] + row['salary_max']) // 2) return embedding
def model_fn_builder(segmentModel, config, init_checkpoint, tokenizer, learning_rate, num_train_steps, num_warmup_steps, init_embedding=None): """Returns `model_fn` closure for TPUEstimator.""" embedding = None if init_embedding is not None: embedding = utils.get_embedding(init_embedding, tokenizer.vocab, config.embedding_size) def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = segmentModel(config, is_training, features, embedding) tvars = tf.trainable_variables() initialized_variable_names = {} if init_checkpoint: (assignment_map, initialized_variable_names ) = model_utils.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: utils.variable_summaries(var) init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) if mode == tf.estimator.ModeKeys.TRAIN: (total_loss, per_example_loss, label_ids, prediction, seq_length) = model.get_all_results() weight = tf.sequence_mask(seq_length, dtype=tf.int64) accuracy = tf.metrics.accuracy(label_ids, prediction, weights=weight) tf.summary.scalar('accuracy', accuracy[1]) l2_reg_lamda = config.l2_reg_lamda clip = 5 with tf.variable_scope('train_op'): optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) l2_loss = tf.add_n([ tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1 ]) total_loss = total_loss + l2_reg_lamda * l2_loss grads, _ = tf.clip_by_global_norm( tf.gradients(total_loss, tvars), clip) global_step = tf.train.get_or_create_global_step() train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step) logging_hook = tf.train.LoggingTensorHook( {"accuracy": accuracy[1]}, every_n_iter=100) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, training_hooks=[logging_hook]) elif mode == tf.estimator.ModeKeys.EVAL: (total_loss, per_example_loss, label_ids, prediction, seq_length) = model.get_all_results() loss = tf.metrics.mean(per_example_loss) weight = tf.sequence_mask(seq_length, dtype=tf.int64) accuracy = tf.metrics.accuracy(label_ids, prediction, weights=weight) metrics = {"eval_loss": loss, "eval_accuracy": accuracy} output_spec = tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, eval_metric_ops=metrics) else: input_ids = features["input_ids"] label_ids = features["label_ids"] (_, _, _, prediction, seq_length) = model.get_all_results() predictions = { "input_ids": input_ids, "prediction": prediction, "ground_truths": label_ids, "length": seq_length } output_spec = tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) return output_spec return model_fn
embeddings = np.zeros([len(paths), 512]) # arcface = ArcFace(classnum=10).to('cuda') # backbone = arcface.backbone # backbone.load_state_dict(torch.load('resnet50.pth')) mobileFacenet = MobileFaceNet(512).to('cuda') mobileFacenet.load_state_dict(torch.load('mobilefacenet.pth')) mobileFacenet.eval() with torch.no_grad(): for idx, path in enumerate(paths): print('process image no:', idx) img = Image.open(path) embedding = get_embedding(mobileFacenet, img, tta=False, device='cuda') embeddings[idx] = embedding.cpu().numpy() # np.save('temp2.npy', embeddings) # embeddings = np.load('temp.npy') tpr, fpr, auc, accuracy, best_threshold = lfw.evaluate( embeddings, actual_issame) print('tpr:', tpr) print('fpr:', fpr) print('auc:', auc) print('acc:', accuracy) print('best_threshold:', best_threshold)
def test(args): setup_seed(2333) import warnings warnings.filterwarnings('ignore') if args.dataset == 'cub': num_classes = 100 elif args.dataset == 'tieredimagenet': num_classes = 351 else: num_classes = 64 if args.resume is not None: from models.resnet12 import resnet12 model = resnet12(num_classes).to(args.device) state_dict = torch.load(args.resume) model.load_state_dict(state_dict) model.to(args.device) model.eval() ici = ICI(classifier=args.classifier, num_class=args.num_test_ways, step=args.step, reduce=args.embed, d=args.dim) data_root = os.path.join(args.folder, args.dataset) dataset = DataSet(data_root, 'test', args.img_size) sampler = CategoriesSampler(dataset.label, args.num_batches, args.num_test_ways, (args.num_shots, 15, args.unlabel)) testloader = DataLoader(dataset, batch_sampler=sampler, shuffle=False, num_workers=0, pin_memory=True) k = args.num_shots * args.num_test_ways loader = tqdm(testloader, ncols=0) iterations = math.ceil(args.unlabel/args.step) + \ 2 if args.unlabel != 0 else math.ceil(15/args.step) + 2 acc_list = [[] for _ in range(iterations)] for data, indicator in loader: targets = torch.arange(args.num_test_ways).repeat( args.num_shots + 15 + args.unlabel).long()[ indicator[:args.num_test_ways * (args.num_shots + 15 + args.unlabel)] != 0] data = data[indicator != 0].to(args.device) train_inputs = data[:k] train_targets = targets[:k].cpu().numpy() test_inputs = data[k:k + 15 * args.num_test_ways] test_targets = targets[k:k + 15 * args.num_test_ways].cpu().numpy() train_embeddings = get_embedding(model, train_inputs, args.device) ici.fit(train_embeddings, train_targets) test_embeddings = get_embedding(model, test_inputs, args.device) if args.unlabel != 0: unlabel_inputs = data[k + 15 * args.num_test_ways:] unlabel_embeddings = get_embedding(model, unlabel_inputs, args.device) else: unlabel_embeddings = None acc = ici.predict(test_embeddings, unlabel_embeddings, True, test_targets) for i in range(min(iterations - 1, len(acc))): acc_list[i].append(acc[i]) acc_list[-1].append(acc[-1]) mean_list = [] ci_list = [] for item in acc_list: mean, ci = mean_confidence_interval(item) mean_list.append(mean) ci_list.append(ci) print("Test Acc Mean{}".format(' '.join( [str(i * 100)[:5] for i in mean_list]))) print("Test Acc ci{}".format(' '.join([str(i * 100)[:5] for i in ci_list])))
status = dict( epoch=0, train_loss=0.0, val_loss=-1, ) pbar.set_postfix(status) for epoch in range(args.epochs): status['epoch'] = epoch sample_seen = 0.0 running_loss = 0.0 running_acc = 0.0 if args.animation: net.eval() plots.plot_embedding( *utils.get_embedding(net, original_test_loader), filename=add_prefix('gif/epoch-%02d.png' % (epoch)), title='Latent Space at Epoch %02d' % (epoch), no_label_and_legend=args.animation) # train net.train() for i, data in enumerate(train_loader): samples_in_batch = data[0].shape[0] optimizer.zero_grad() data = map(lambda x: x.to(utils.device), data) res = net(*data) loss = net.loss(*res) loss.backward()
def main(): parser = argparse.ArgumentParser() # Load existing configuration? parser.add_argument('--load_from_cfg', type=lambda x: bool(distutils.util.strtobool(x)), default=False, help="Load from config?") parser.add_argument('--cfg_file', type=str, help="Experiment configuration file", default="config/digits/dann.yml") # Experiment identifer parser.add_argument('--id', type=str, help="Experiment identifier") parser.add_argument('--use_cuda', help="Use GPU?") # Source and target domain parser.add_argument('--source', help="Source dataset") parser.add_argument('--target', help="Target dataset") parser.add_argument('--img_dir', type=str, default="data/", help="Data directory where images are stored") parser.add_argument('--LDS_type', type=str, default="natural", help="Label Distribution Shift type") # CNN parameters parser.add_argument('--cnn', type=str, help="CNN architecture") parser.add_argument('--load_source', type=lambda x: bool(distutils.util.strtobool(x)), default=True, help="Load source checkpoint?") parser.add_argument('--l2_normalize', type=lambda x: bool(distutils.util.strtobool(x)), help="L2 normalize features?") parser.add_argument('--temperature', type=float, help="CNN softmax temperature") # Class balancing parameters parser.add_argument('--class_balance_source', type=lambda x: bool(distutils.util.strtobool(x)), help="Class-balance source?") parser.add_argument('--pseudo_balance_target', type=lambda x: bool(distutils.util.strtobool(x)), help="Pseudo class-balance target?") # DA details parser.add_argument('--da_strat', type=str, help="DA strategy") parser.add_argument('--load_da', type=lambda x: bool(distutils.util.strtobool(x)), help="Load saved DA checkpoint?") # Training details parser.add_argument('--optimizer', type=str, help="Optimizer") parser.add_argument('--batch_size', type=int, help="Batch size") parser.add_argument('--lr', type=float, help="Learning rate") parser.add_argument('--wd', type=float, help="Weight decay") parser.add_argument('--num_epochs', type=int, help="Number of Epochs") parser.add_argument('--da_lr', type=float, help="Unsupervised DA Learning rate") parser.add_argument('--da_num_epochs', type=int, help="DA Number of epochs") # Loss weights parser.add_argument('--src_sup_wt', type=float, help="Source supervised XE loss weight") parser.add_argument('--tgt_sup_wt', type=float, help="Target self-training XE loss weight") parser.add_argument('--unsup_wt', type=float, help="Target unsupervised loss weight") parser.add_argument('--cent_wt', type=float, help="Target entropy minimization loss weight") args_cmd = parser.parse_args() if args_cmd.load_from_cfg: args_cfg = dict(OmegaConf.load(args_cmd.cfg_file)) args_cmd = vars(args_cmd) for k in args_cmd.keys(): if args_cmd[k] is not None: args_cfg[k] = args_cmd[k] args = OmegaConf.create(args_cfg) else: args = args_cmd pp = pprint.PrettyPrinter(indent=4) pp.pprint(args) device = torch.device("cuda") if args.use_cuda else torch.device("cpu") ################################################################################################################ #### Setup source data loaders ################################################################################################################ print('Loading {} dataset'.format(args.source)) src_dset = UDADataset(args.source, args.LDS_type, is_target=False, img_dir=args.img_dir, batch_size=args.batch_size) src_train_dset, _, _ = src_dset.get_dsets() src_train_loader, src_val_loader, src_test_loader, src_train_idx = src_dset.get_loaders( class_balance_train=args.class_balance_source) num_classes = src_dset.get_num_classes() args.num_classes = num_classes print('Number of classes: {}'.format(num_classes)) ################################################################################################################ #### Train / load a source model ################################################################################################################ source_model = get_model(args.cnn, num_cls=num_classes, l2_normalize=args.l2_normalize, temperature=args.temperature) source_file = '{}_{}_source.pth'.format(args.source, args.cnn) source_path = os.path.join('checkpoints', 'source', source_file) if args.load_source and os.path.exists(source_path): print('\nFound source checkpoint at {}'.format(source_path)) source_model.load_state_dict( torch.load(source_path, map_location=device)) best_source_model = source_model else: print('\nSource checkpoint not found, training...') best_source_model = utils.train_source_model(source_model, src_train_loader, src_val_loader, num_classes, args, device) print('Evaluating source checkpoint on {} test set...'.format(args.source)) _, cm_source = utils.test(best_source_model, device, src_test_loader, split="test", num_classes=num_classes) per_class_acc_source = cm_source.diagonal().numpy() / cm_source.sum( axis=1).numpy() per_class_acc_source = per_class_acc_source.mean() * 100 out_str = '{} Avg. acc.: {:.2f}% '.format(args.source, per_class_acc_source) print(out_str) model = copy.deepcopy(best_source_model) ################################################################################################################ #### Setup target data loaders ################################################################################################################ print('\nLoading {} dataset'.format(args.target)) target_dset = UDADataset(args.target, args.LDS_type, is_target=True, img_dir=args.img_dir, valid_ratio=0, batch_size=args.batch_size) target_dset.get_dsets() # Manually long tail target training set for SVHN->MNIST-LT adaptation if args.LDS_type in ['IF1', 'IF20', 'IF50', 'IF100']: target_dset.long_tail_train('{}_ixs_{}'.format(args.target, args.LDS_type)) print('Evaluating source checkpoint on {} test set...'.format(args.target)) target_train_loader, target_val_loader, target_test_loader, tgt_train_idx = target_dset.get_loaders( ) acc_before, cm_before = utils.test(model, device, target_test_loader, split="test", num_classes=num_classes) per_class_acc_before = cm_before.diagonal().numpy() / cm_before.sum( axis=1).numpy() per_class_acc_before = per_class_acc_before.mean() * 100 out_str = '{}->{}-LT ({}), Before {}:\t Avg. acc={:.2f}%\tAgg. acc={:.2f}%'.format(args.source, args.target, args.LDS_type, \ args.da_strat, per_class_acc_before, acc_before) print(out_str) ################################################################################################################ #### Unsupervised adaptation of source model to target ################################################################################################################ da_file = '{:s}_{:s}_{}_{}_net_{:s}_{:s}_{:s}.pth'.format(args.id, args.da_strat, args.da_lr, args.cnn, \ args.source, args.target, args.LDS_type) outdir = 'checkpoints' os.makedirs(os.path.join(outdir, args.da_strat), exist_ok=True) outfile = os.path.join(outdir, args.da_strat, da_file) model_name = 'AdaptNet' if args.load_da and os.path.exists(outfile): print('Trained {} checkpoint found: {}, loading...\n'.format( args.da_strat, outfile)) net = get_model(model_name, num_cls=num_classes, weights_init=outfile, model=args.cnn, \ l2_normalize=args.l2_normalize, temperature=args.temperature) source_model_adapt = net.tgt_net else: net = get_model(model_name, model=args.cnn, num_cls=num_classes, src_weights_init=source_path, \ l2_normalize=args.l2_normalize, temperature=args.temperature).to(device) print(net) print('Training {} {} model for {}->{}-LT ({})\n'.format( args.da_strat, args.cnn, args.source, args.target, args.LDS_type)) opt_net = utils.generate_optimizer(net.tgt_net, args, mode='da') solver = get_solver(args.da_strat, net.tgt_net, src_train_loader, \ target_train_loader, tgt_train_idx, opt_net, device, num_classes, args) for epoch in range(args.da_num_epochs): if args.pseudo_balance_target: print( '\nEpoch {}: Re-estimating probabilities for pseudo-balancing...' .format(epoch)) # Approximately class-balance target dataloader using pseudolabels at the start of each epoch target_dset_copy = copy.deepcopy(target_dset) src_train_dset_copy = copy.deepcopy(src_train_loader.dataset) _, gtlabels, plabels = utils.get_embedding( solver.net, target_train_loader, device, num_classes, args) target_dset_copy.train_dataset.targets_copy = copy.deepcopy( target_dset_copy.train_dataset.targets ) # Create backup of actual labels target_dset_copy.train_dataset.targets = plabels tgt_train_loader_pbalanced, _, _, _ = target_dset_copy.get_loaders( class_balance_train=True) tgt_train_loader_pbalanced.dataset.targets_copy = target_dset_copy.train_dataset.targets_copy solver.tgt_loader = tgt_train_loader_pbalanced if args.da_strat == 'dann': opt_dis = utils.generate_optimizer(net.discriminator, args, mode='da') solver.solve(epoch, net.discriminator, opt_dis) else: solver.solve(epoch) print('Saving to', outfile) net.save(outfile) source_model_adapt = net.tgt_net # Evaluate adapted model print('\nEvaluating adapted model on {} test set'.format(args.target)) acc_after, cm_after = utils.test(source_model_adapt, device, target_test_loader, split="test", num_classes=num_classes) per_class_acc_after = cm_after.diagonal().numpy() / cm_after.sum( axis=1).numpy() per_class_acc_after = per_class_acc_after.mean() * 100 print('###################################') out_str = '{}->{}-LT ({}), Before {}:\t Avg. acc={:.2f}%\tAgg. acc={:.2f}%'.format(args.source, args.target, args.LDS_type, \ args.da_strat, per_class_acc_before, acc_before) out_str += '\n\t\t\tAfter {}:\t Avg. acc={:.2f}%\tAgg. acc={:.2f}%'.format( args.da_strat, per_class_acc_after, acc_after) print(out_str) utils.plot_accuracy_statistics(cm_before, cm_after, num_classes, args, target_train_loader)