def __init__(self, configs): BaseModel.__init__(self, configs) self.encoder = TransformerEncoder(configs) self.pair_scorer = ScoreModule(self.get_pair_embs_size(), [configs['ffnn_size']] * configs['ffnn_depth'], configs['dropout_rate']) # GENE embeddings (if use_gene_features enabled) if configs['use_gene_features']: self.gene_dim = GENE2DIM.get(self.configs['gene_variant'], GENE_DIM) self.event2emb = get_event2geneemb(configs['gene_variant']) for e in self.event2emb: self.event2emb[e] = self.event2emb[e].to(self.device) self.defaultgene = nn.Embedding(1, self.gene_dim) # Initialize embeddings for name, param in self.named_parameters(): if (not 'transformer' in name.lower()) and 'embedding' in name.lower(): print('Re-initialize embedding {}'.format(name)) param.data.uniform_(-0.1, 0.1) # Move model to device self.to(self.device)
def post(self): data = request.get_json() isbn = data['isbn'] name = data['name'] description = data['description'] price = data['price'] writer = data['writer'] handle = data['handle'] password = data['password'] if handle != utils.admin_handle or password != utils.admin_password: return utils.return_response(message='authentication error') model = BaseModel(isbn=isbn, name=name, description=description, price=price, writer=writer) try: db.session.add(model) db.session.commit() except SQLAlchemyError as e: current_app.logger.error(e) db.session.rollback() return utils.return_response(message='Error in Database') else: return utils.return_response(message='Data inserted Ok')
def get_classification_report_for_evaluation(base_model: BaseModel) -> Dict: y_pred, y_test = base_model.test_model() report = classification_report(y_test, y_pred, target_names=_CLASS_NAMES, output_dict=True) return report
def predict_emotion(model: BaseModel, lyrics: str) -> str: result = model.predict(lyrics) if result is not None: encoded_label, probabilities = result label = label_encoder.inverse_transform(encoded_label) return label[0] else: return 'Prediction did not succeed'
def evaluate_model(base_model: BaseModel) -> None: y_pred, y_test = base_model.test_model() print(classification_report(y_test, y_pred, target_names=_CLASS_NAMES)) cm = confusion_matrix(y_test, y_pred) df_cm = pd.DataFrame(cm, index=_CLASS_NAMES, columns=_CLASS_NAMES) show_confusion_matrix(df_cm)
def meetings(user_id): output = dict() try: loaded = BaseModel.load_many_by_user(user_id, "Meeting") output['meetings'] = [] for data in loaded: output['meetings'].append(data) if config.is_dev_environment(): print(f"DEBUG: Loading the follow from DB: {data}") status = 200 if len(output['meetings']) == 0: status = 204 except Exception as e: # TODO: Make the Exception handling less broad output['error'] = f'{e}' status = 400 return jsonify(output), status
def create_model(opt): model = BaseModel(opt) if opt.model == 'Base': pass elif opt.model == 'PATN': model = PATNTransferModel(opt) elif opt.model == 'CTPS': model = CTPSModel(opt) elif opt.model == 'CAN': model = CANModel(opt) elif opt.model == 'DCGAN': model = DCGANModel(opt) elif opt.model == "AdaIN": model = AdaIN(opt) else: raise ValueError("Model [%s] not recognized." % opt.model) print("=> model [{}] was created".format(model.name)) return model
def __init__(self, name, tokenizer, optimizer): BaseModel.__init__(self, name, tokenizer, optimizer) # Vectorize the data. self.input_texts = [] self.target_texts = [] self.input_characters = set() self.target_characters = set() for ch in self.CHARS_BASIC: self.input_characters.add(ch) self.target_characters.add(ch) lines = data.load_clean_sentences('both') for line in lines: input_text = line[1] # Swedish target_text = line[0] # English # We use "tab" as the "start sequence" character # for the targets, and "\n" as "end sequence" character. target_text = self.CH_START + target_text + self.CH_END self.input_texts.append(input_text) self.target_texts.append(target_text) for char in input_text: if char not in self.input_characters: self.input_characters.add(char) for char in target_text: if char not in self.target_characters: self.target_characters.add(char) self.input_characters = sorted(list(self.input_characters)) self.target_characters = sorted(list(self.target_characters)) self.num_encoder_tokens = len(self.input_characters) self.num_decoder_tokens = len(self.target_characters) self.max_encoder_seq_length = max( [len(txt) for txt in self.input_texts]) self.max_decoder_seq_length = max( [len(txt) for txt in self.target_texts]) print('Number of samples:', len(self.input_texts)) print('Number of unique input tokens:', self.num_encoder_tokens) print('Number of unique output tokens:', self.num_decoder_tokens) print('Max sequence length for inputs:', self.max_encoder_seq_length) print('Max sequence length for outputs:', self.max_decoder_seq_length) self.input_token_index = dict([ (char, i) for i, char in enumerate(self.input_characters) ]) self.target_token_index = dict([ (char, i) for i, char in enumerate(self.target_characters) ]) self.encoder_input_data = np.zeros( (len(self.input_texts), self.max_encoder_seq_length, self.num_encoder_tokens), dtype='float32') self.decoder_input_data = np.zeros( (len(self.input_texts), self.max_decoder_seq_length, self.num_decoder_tokens), dtype='float32') self.decoder_target_data = np.zeros( (len(self.input_texts), self.max_decoder_seq_length, self.num_decoder_tokens), dtype='float32') for i, (input_text, target_text) in enumerate( zip(self.input_texts, self.target_texts)): for t, char in enumerate(input_text): self.encoder_input_data[i, t, self.input_token_index[char]] = 1. for t, char in enumerate(target_text): # decoder_target_data is ahead of decoder_input_data by one timestep self.decoder_input_data[i, t, self.target_token_index[char]] = 1. if t > 0: # decoder_target_data will be ahead by one timestep # and will not include the start character. self.decoder_target_data[ i, t - 1, self.target_token_index[char]] = 1. # Reverse-lookup token index to decode sequences back to # something readable. self.reverse_input_char_index = dict( (i, char) for char, i in self.input_token_index.items()) self.reverse_target_char_index = dict( (i, char) for char, i in self.target_token_index.items())
def create_model(self, params=None): """ Create the input-output model. """ self.model = BaseModel(self.p)
def __init__(self, name, tokenizer, optimizer, include_dropout=False, latent_dim=256, reverse_order=False, bidi=False): """ :param reverse_order: If True, reverse the order of input tokens to ease training """ BaseModel.__init__(self, name, tokenizer, optimizer) # Collection all tokens across all input lines self.include_dropout = include_dropout self.latent_dim = latent_dim self.reverse_order = reverse_order self.bidi = bidi # If true, use a Bidirectional wrapper around the encoder LSTM self.other_tokens = set() # input self.eng_tokens = {self.CH_START, self.CH_END} # target # Collection all tokens across all input lines for idx, line in enumerate(self.eng_texts): self.eng_texts[ idx] = self.CH_START + self.eng_texts[idx] + self.CH_END self.eng_tokenized[idx] = [ self.CH_START ] + self.eng_tokenized[idx] + [self.CH_END] for token in self.other_tokenized[idx]: self.other_tokens.add(token) for token in self.eng_tokenized[idx]: self.eng_tokens.add(token) self.other_tokens = sorted(list(self.other_tokens)) self.eng_tokens = sorted(list(self.eng_tokens)) self.num_encoder_tokens = len(self.other_tokens) self.num_decoder_tokens = len(self.eng_tokens) self.max_encoder_seq_length = max( [len(txt) for txt in self.other_tokenized]) self.max_decoder_seq_length = max( [len(txt) for txt in self.eng_tokenized]) print('Number of samples:', self.num_samples) print('Number of unique input tokens:', self.num_encoder_tokens) print('Number of unique output tokens:', self.num_decoder_tokens) print('Max sequence length for inputs:', self.max_encoder_seq_length) print('Max sequence length for outputs:', self.max_decoder_seq_length) self.input_token_index = dict([ (token, i) for i, token in enumerate(self.other_tokens) ]) self.target_token_index = dict([ (token, i) for i, token in enumerate(self.eng_tokens) ]) self.encoder_input_data = numpy.zeros( (self.num_samples, self.max_encoder_seq_length, self.num_encoder_tokens), dtype='uint8') self.decoder_input_data = numpy.zeros( (self.num_samples, self.max_decoder_seq_length, self.num_decoder_tokens), dtype='uint8') self.decoder_target_data = numpy.zeros( (self.num_samples, self.max_decoder_seq_length, self.num_decoder_tokens), dtype='uint8') # Create one-hot encoded values directly for i, (input_text, target_text) in enumerate( zip(self.other_tokenized, self.eng_tokenized)): for t, token in enumerate(input_text): self.encoder_input_data[i, t, self.input_token_index[token]] = 1. if reverse_order: self.encoder_input_data = numpy.flip(self.encoder_input_data, 1) for t, token in enumerate(target_text): # decoder_target_data is ahead of decoder_input_data by one timestep self.decoder_input_data[i, t, self.target_token_index[token]] = 1. if t > 0: # decoder_target_data will be ahead by one timestep # and will not include the start character. self.decoder_target_data[ i, t - 1, self.target_token_index[token]] = 1. # Reverse-lookup token index to decode sequences back to something readable. self.reverse_input_token_index = dict( (i, token) for token, i in self.input_token_index.items()) self.reverse_target_token_index = dict( (i, token) for token, i in self.target_token_index.items())
def __init__(self): BaseModel.__init__(self) self.name = map_collections["scope"]
def validate(self): BaseModel.validate(self) self.validate_existance('user_id', User)
#!/usr/bin/python3 from models import storage from models.base import BaseModel all_objs = storage.all() print("-- Reloaded objects --") for obj_id in all_objs.keys(): obj = all_objs[obj_id] print(obj) print("-- Create a new object --") my_model = BaseModel() my_model.name = "Holberton" my_model.my_number = 89 my_model.save() print(my_model)
parser.add_argument("--epochs", type=int, default=50, help="train epochs") parser.add_argument("--batch_size", type=int, default=16, help="batch size") parser.add_argument("--emb_drop_rate", type=float, default=0.2, help="dropout rate for embeddings") parser.add_argument("--rnn_drop_rate", type=float, default=0.5, help="dropout rate for embeddings") parser.add_argument("--max_to_keep", type=int, default=1, help="maximum trained model to be saved") parser.add_argument("--no_imprv_tolerance", type=int, default=None, help="no improvement tolerance") config = Configurations(parser.parse_args()) # os environment os.environ['TF_CPP_MIN_LOG_LEVEL'] = "3" os.environ["CUDA_VISIBLE_DEVICES"] = config.gpu_idx # if dataset is not prepared, then build it if not os.path.exists(config.save_path) or not os.listdir(config.save_path): process_base(config) print("load dataset...") train_ratio = int(config.train_ratio) if float(config.train_ratio) > 1.0 else float(config.train_ratio) dataset = Dataset(config.train_set, config.dev_set, config.test_set, batch_size=config.batch_size, train_rate=train_ratio, shuffle=True) print("build model and train...") model = BaseModel(config) if config.restore_model: model.restore_last_session() if config.train: model.train(dataset) model.restore_last_session() model.evaluate_data(dataset.get_data_batches("test"), name="test") model.close_session()
def __init__(self, name, tokenizer, optimizer): BaseModel.__init__(self, name, tokenizer, optimizer)
def __call__(self, hparams, mode, iterator, **kwargs): BaseModel.__call__(self, hparams, mode, iterator, **kwargs) return self
def validate(self): BaseModel.validate(self) self.validate_existance('movement_id', Movement)
def train(model: BaseModel, config, train_dataset, val_dataset, step=0): train_iterator = DataIterator(train_dataset, batch_size=config.batch_size, num_workers=config.data.num_workers, sampler=InfiniteRandomSampler(train_dataset)) # Prepare for summary writer = SummaryWriter(config.log_dir) config_str = yaml.dump(namedtuple_to_dict(config)) writer.add_text('config', config_str) train_sampler = SubsetSequentialSampler(train_dataset, config.summary.train_samples) val_sampler = SubsetSequentialSampler(val_dataset, config.summary.val_samples) train_sample_iterator = DataIterator(train_dataset.for_summary(), sampler=train_sampler, num_workers=2) val_sample_iterator = DataIterator(val_dataset.for_summary(), sampler=val_sampler, num_workers=2) # Training loop start_time = time.time() start_step = step while True: step += 1 save_summary = step % config.summary_step == 0 d_summary, g_summary, p_summary = None, None, None if config.mode == MODE_PRED: if model.lr_sched_p is not None: model.lr_sched_p.step() x, y = next(train_iterator) p_summary = model.optimize_p(x, y, step=step, summarize=save_summary) else: if model.lr_sched_d is not None: model.lr_sched_d.step() x, y = next(train_iterator) summarize_d = save_summary and config.d_updates_per_step == 1 d_summary = model.optimize_d(x, y, step=step, summarize=summarize_d) for i in range(config.d_updates_per_step - 1): x, y = next(train_iterator) summarize_d = save_summary and ( i == config.d_updates_per_step - 2) d_summary = model.optimize_d(x, y, step=step, summarize=summarize_d) if model.lr_sched_g is not None: model.lr_sched_g.step() summarize_g = save_summary and config.g_updates_per_step == 1 g_summary = model.optimize_g(x, y, step=step, summarize=summarize_g) for i in range(config.g_updates_per_step - 1): x, y = next(train_iterator) summarize_g = save_summary and ( i == config.g_updates_per_step - 2) g_summary = model.optimize_g(x, y, step=step, summarize=summarize_g) # Print status elapsed_time = time.time() - start_time elapsed_step = step - start_step print('\r[Step %d] %s' % (step, time.strftime('%H:%M:%S', time.gmtime(elapsed_time))), end='') if elapsed_time > elapsed_step: print(' | %.2f s/it' % (elapsed_time / elapsed_step), end='') else: print(' | %.2f it/s' % (elapsed_step / elapsed_time), end='') if step % config.ckpt_step == 0: model.save(step) if save_summary: # Save summaries from optimization process for summary in [p_summary, d_summary, g_summary]: if summary is None: continue model.write_summary(writer, summary, step) # Summarize learning rates and gradients for component, optimizer in [ ('d', model.optim_d), ('g', model.optim_g), ('p', model.optim_p), ]: if optimizer is None: continue for i, group in enumerate(optimizer.param_groups): writer.add_scalar('lr/%s/%d' % (component, i), group['lr'], step) grads = [] for param in group['params']: if param.grad is not None: grads.append(param.grad.data.view([-1])) if grads: grads = torch.cat(grads, 0) writer.add_histogram('grad/%s/%d' % (component, i), grads, step) # Custom summaries model.summarize(writer, step, train_sample_iterator, val_sample_iterator)
def __init__(self): BaseModel.__init__(self) self.name = map_collections["rol_relation"]