def handle_command(c): print(c[0]) print(c) if c[0] == b'COMMAND': return NULL elif c[0] == b'SET': key = c[1] val = c[2] model.insert(key, val, epochs=13) return NULL elif c[0] == b'GET': key = c[1] v = model.get(key) return b'$%b\r\n%b\r\n' % (str(len(v)).encode("ascii"), v) elif c[0] == b'INCR': key = c[1] v = model.get(key) i = int(v.decode("ascii")) model.insert(key, str(i + 1).encode("ascii"), epochs=10) return NULL elif c[0] == b'DECR': key = c[1] v = model.get(key) i = int(v.decode("ascii")) model.insert(key, str(i - 1).encode("ascii"), epochs=10) return NULL elif c[0] == b'INCRBY': key = c[1] v = model.get(key) i = int(v.decode("ascii")) model.insert(key, str(i + int(c[2].decode("ascii"))).encode("ascii"), epochs=10) return NULL else: return b'-ERROR: Unsupported command\r\n'
def test_get(self): model = self.ModelTest() model.set(a=1) self.assertEqual(model.get('a'), 1) self.assertEqual(model.a, 1) model._data['a'] = 2 self.assertEqual(model.get('a'), 2) self.assertIs(model.get('b'), None) with self.assertRaises(AttributeError): model.b
def delete(self, ttype, name): """delete an object and its children""" try: model.get({"type":ttype, "name":name}).check_allow('delete') self.db.sql("""delete from `%s` where name=%s""" % (ttype, '%s'), name) # delete children for child_tab in self.db.sql("select child from _parent_child where parent=%s", (ttype,)): self.db.sql("""delete from `%s` where parent=%s and parent_type=%s""" \ % (child_tab['child'],'%s','%s'), (name, ttype)) except MySQLdb.Error, e: if e.args[0] == ER.NO_SUCH_TABLE: return else: raise e
def load_model(pkl_path): net = get() net.eval() f = open(pkl_path, 'rb') ckpt = pickle.load(f) f.close() param_order = [ 'conv1:conv1:conv:W', 'conv1:conv1:conv:b', 'conv2:conv2:conv:W', 'conv2:conv2:conv:b', 'conv3:conv3:conv:W', 'conv3:conv3:conv:b', 'conv4:conv4:conv:W', 'conv4:conv4:conv:b', 'fc1:fc1:fc:W', 'fc1:fc1:fc:b', 'fct:fct:fc:W', 'fct:fct:fc:b', ] for i, param in enumerate(net.parameters()): if 'fc' not in param_order[i]: param.data = torch.from_numpy(ckpt[param_order[i]]).float() else: param.data = torch.from_numpy(np.transpose( ckpt[param_order[i]])).float() return net
def test_model_numel(): from model import get net, _ = get() n = 0 for p in net.parameters(): n += p.numel() print(n)
def main(): "done" parser = argparse.ArgumentParser() parser.add_argument('-ckpt', required=True) parser.add_argument('-k', type=int, default=5) parser.add_argument('-max_len', type=int, default=250) parser.add_argument('-max_ratio', type=int, default=1.5) parser.add_argument('-no_cuda', action='store_true') parser.add_argument('-text', type=str, required=True) parser.add_argument('-lp', '--length_penalty', type=float, default=0.7) args = parser.parse_args() args.use_cuda = (args.no_cuda == False) and torch.cuda.is_available() assert args.k > 0 assert args.max_len > 10 net, _ = model.get() net, src_vocab, tgt_vocab = load_model(args.ckpt, net) if args.use_cuda: net = net.cuda() fpath = args.text try: args.text = open(fpath, encoding='utf-8').read().split('\n') except: print("error opening or reading text file") translate(args, net, src_vocab, tgt_vocab)
def delete(self, ttype, name): """delete an object and its children""" import sqlite3 try: model.get({"type":ttype, "name":name}).check_allow('delete') self.sql("""delete from `%s` where name=?""" % ttype, (name,)) # delete children for child_tab in self.sql("select child from _parent_child where parent=?", (ttype,)): self.sql("""delete from `%s` where parent=? and parent_type=?""" \ % child_tab[0], (name, ttype), as_dict=1) except sqlite3.OperationalError, e: if 'no such table' in e.args[0]: return else: raise e
def test_set(self): model = self.ModelTest() with self.assertRaises(ValueError): model.set() model.set({'a': 1}) model2 = self.ModelTest() model2.set(a=1) self.assertEqual(model._data, model2._data) model.set({'a': 1, 'b': 2}) self.assertEqual(model.get('a'), 1) self.assertEqual(model._data['b'], 2) self.assertEqual(model._data, {'a': 1, 'b': 2}) model.set(c=3, d=4) self.assertEqual(model.get('c'), 3) self.assertEqual(model.d, 4)
def run(): # set initial time start_time = time.time() # dump hyper parameters settings print(time.strftime('%Y-%m-%d %H:%M:%S') + ' Hyper-parameters setting') # get configuration args = config.get() # check for save_sample_image model save_best_model = ModelCheckpoint( filepath=args.model_file_train, verbose=1, save_best_only=True) # , save_best_only=True if os.path.isfile(args.model_file_train): print(time.strftime('%Y-%m-%d %H:%M:%S') + ' Load model from file...') skip_gram_model = load_model(args.model_file_train) else: # get embedding model print(time.strftime('%Y-%m-%d %H:%M:%S') + ' Build model...') skip_gram_model = model.get(args) # dashboard watch_board = Dashboard(folder=config.FOLDER, dump_file="dashboard.dump", statistic_file="statistic.txt", model=skip_gram_model, show_board=True) # begin training print(time.strftime('%Y-%m-%d %H:%M:%S') + " Begin training..") # Train the model each generation and show predictions against the # validation dataset for iteration in range(1, args.iterations): print( time.strftime('%Y-%m-%d %H:%M:%S') + ' Iteration %d ' % iteration) skip_gram_model.fit_generator( zhwiki_corpus.skip_gram_generator(batch_size=args.batch_size, context_window_size=5, negative_samples=10), steps_per_epoch=args.steps_per_epoch, epochs=args.epochs, callbacks=[save_best_model, watch_board], # callbacks=[save_best_model], validation_data=zhwiki_corpus.skip_gram_generator( batch_size=args.batch_size, context_window_size=5, negative_samples=10), validation_steps=100, verbose=0) # close_board windows watch_board.close_board() print("task took %.3fs" % (float(time.time()) - start_time))
def momo(): subject = request.json[0]['msys']['relay_message']['content']['subject'] m = model.get(subject) if m: engage = m['engage_percent'] >= random.random() if engage: util.fakeEngagement( request.json[0]['msys']['relay_message']['content']['html']) model.incr(subject, m['engage_percent'] >= random.random()) else: model.incr(subject, 0) return ''
def setRelations(items, *relations): for relation in relations: if relation.count(' '): k1, k2 = relation.split(' ') else: k1 = k2 = relation model = getModel(k1) assert(model is not None) if type(items) is not list: items = [items] for item in items: if item is not None and item.has_key(k1+'id'): item[k2] = model.get(item[k1+'id'])
def main(): "done" parser = argparse.ArgumentParser() parser.add_argument('-ckpt', required=True) parser.add_argument('-k', '--beam_size', type=int, default=4) parser.add_argument('-lp', '--length_penalty', type=float, default=0.7) parser.add_argument('--early_stopping', action="store_true") parser.add_argument('-max_len', type=int, default=250) parser.add_argument('-max_ratio', type=int, default=1.5) parser.add_argument('-no_cuda', action='store_true') parser.add_argument('-text', type=str, required=True) parser.add_argument('-ref_text', type=str, required=True) parser.add_argument('--batch_size', type=int, default=None) parser.add_argument('--max_batch_size', type=int, default=None) parser.add_argument('--tokens_per_batch', type=int, default=None) parser.add_argument('--greedy', action='store_true') parser.add_argument('--src_lan', type=str, default="en") parser.add_argument('--tgt_lan', type=str, default="de") parser.add_argument('--gen_a', type=float, default=1.3) parser.add_argument('--gen_b', type=int, default=5) args = parser.parse_args() args.use_cuda = (args.no_cuda == False) and torch.cuda.is_available() assert args.beam_size > 0 assert args.max_len > 10 net, _ = model.get() net, src_vocab, tgt_vocab = load_model(args.ckpt, net) if args.use_cuda: net = net.cuda() fpath = args.text try: args.text = open(fpath, encoding='utf-8').read().split('\n')[:-1] except: print("error opening or reading text file") fpath = args.ref_text try: args.ref_text = open(fpath, encoding='utf-8').read().split('\n')[:-1] except: print("error opening or reading text file") translate(args, net, src_vocab, tgt_vocab)
def load_model(checkpoint_path): """ Reload a checkpoint if we find one. """ assert os.path.isfile(checkpoint_path) ckpt = torch.load(checkpoint_path, map_location='cpu') config.src_n_vocab = ckpt['net']['module.src_emb.0.emb.weight'].size(0) config.tgt_n_vocab = ckpt['net']['module.tgt_emb.0.emb.weight'].size(0) net, _ = model.get() # reload model parameters s_dict = {} for k in ckpt["net"]: new_k = k[7:] s_dict[new_k] = ckpt["net"][k] net.load_state_dict(s_dict) src_vocab = ckpt["src_vocab"] tgt_vocab = ckpt["tgt_vocab"] return net, src_vocab, tgt_vocab
def post(self, obj, constraint={}): """post a vector object, the property name is the type. see test case for example""" modelobj = (not obj.get('parent_type')) and model.get(obj) or None # delete if exists if obj.get("type") and obj.get("name"): self.delete(obj["type"], obj["name"]) modelobj and modelobj.before_post() modelobj and modelobj.validate() obj_single, is_vector = self._get_single(obj) # save the parent self.post_single(obj_single, constraint) if is_vector: for k in obj: d = {"type":k, "parent":obj["name"], "parent_type":obj["type"]} # dict, one child only if type(obj[k]) is dict: obj[k].update(d) self.post(obj[k]) # multiple children if type(obj[k]) in (list, tuple): for child in obj[k]: # child is a dict if type(child) is dict: child.update(d) self.post(child) # child is literal (only names) elif type(child) in (str, int, float): c = {"value":child} c.update(d) self.post_single(c) else: raise Exception, "child %s must be dict or literal" % str(child) modelobj and modelobj.after_post()
def view_lyric(_id): """ View a lyric by _id. """ # Search query used to find this lyric. query = request.args.get('query', 'you beat the system, collect 10 points') back_url = '/?' + urllib.urlencode({'query': query}) response = model.get(_id) lyric = response['lyric'] artist = response['artist'] album = response['album'] title = response['title'] mxit_ga.track_event(request) track('view_lyric', request, album=album, title=title, artist=artist, lyric_id=_id) return render_template('lyric.html', lyric=Markup(lyric), artist=artist, title=title, album=album, back_url=back_url)
def _apply_model(image: np.ndarray) -> np.ndarray: model_height, model_width = config.MODEL_INPUT_SHAPE image_height, image_width = image.shape # apply model to a single slice of the image if image.shape == config.MODEL_INPUT_SHAPE: model_input = image.reshape((1, model_height, model_width, 1)) / 255.0 return ( model.get().predict(model_input).reshape((model_height, model_width)) * 255 ) # recursively apply model to the whole image elif image_height >= model_height and image_width >= model_width: n_vertical = math.ceil(image_height / model_height) n_horizontal = math.ceil(image_width / model_width) # create overflow buffer result = np.full( (n_vertical * model_height, n_horizontal * model_width), fill_value=255 ) result[:image_height, :image_width] = image buffer = result.copy() # iterate over all image slices for row, col in itertools.product(range(n_vertical), range(n_horizontal)): v_slice = slice(row * model_height, (row + 1) * model_height) h_slice = slice(col * model_width, (col + 1) * model_width) result[v_slice, h_slice] = _apply_model(result[v_slice, h_slice]) # dispose of the grid artifact _patch_joints( buffer, model_height, model_width, n_horizontal, n_vertical, result ) # crop to original image shape return result[:image_height, :image_width] else: return _apply_model(cv2.resize(image, (model_width, model_height)))
def test_id_on_inserted_model(self): class Person(Model): collection_name = 'person_test' structure = { 'name': unicode, 'desc': 'dynamic', 'age': int, } Person.connect(Testing) Person.collection.remove({}) model = Person(name='John Doe', age=35, desc='no one', bla='ble') _id = model.upsert() self.assertTrue(model.get('_id')) self.assertEqual(model._id, _id) self.assertIn('_id', model.to_dict()) self.assertEqual(model.to_dict(), dict( name=u'John Doe', age=35, desc='no one', _id=_id ))
def GET(self, id): return response.send_if_found(model.get(id))
def __init__(self, params): """ Initialize trainer. """ self.params = params # Initialize tensorboard writer train_log = SummaryWriter( os.path.join(config.tensorboard_log_path, "log", "train")) valid_log = SummaryWriter( os.path.join(config.tensorboard_log_path, "log", "valid")) self._tensorboard = TensorboardWriter(train_log, valid_log) # epoch / iteration size assert isinstance(config.epoch_size, int) assert config.epoch_size >= 1 self.epoch_size = config.epoch_size # network and criterion net, criterion = model.get() self.net = net self.criterion = criterion # data iterators self.iterators = {} train_iter, valid_iter, SRC_TEXT, TGT_TEXT = dataset.get() self.iterators["train"] = train_iter self.iterators["valid"] = valid_iter self.num_train = len(train_iter) self.SRC_TEXT = SRC_TEXT self.TGT_TEXT = TGT_TEXT # Multi-GPU if config.multi_gpu: logger.info("Using nn.parallel.DistributedDataParallel ...") self.net = nn.parallel.DistributedDataParallel( self.net, device_ids=[params.local_rank], output_device=params.local_rank) """ self.criterion = nn.parallel.DistributedDataParallel( self.criterion, device_ids=[params.local_rank], output_device=params.local_rank ) """ # set optimizers self.opt = optimizer.get(self.net) # validation metrics self.best_metrics = {} for k in config.valid_metrics.keys(): factor = config.valid_metrics[k] self.best_metrics[k] = [config.init_metric * factor, factor] # training statistics self.epoch = 0 self.n_iter = 0 self.n_total_iter = 0 self.n_sentences = 0 self.stats = OrderedDict([('processed_s', 0), ('processed_w', 0)] + [('MT-%s-%s-loss' % (config.SRC_LAN, config.TGT_LAN), [])] + [('MT-%s-%s-ppl' % (config.SRC_LAN, config.TGT_LAN), [])]) self.last_time = time.time() # reload potential checkpoints self.reload_checkpoint()
def main(): parser = argparse.ArgumentParser() # default xpu0 for non-brain++, all gpus for brain++ default_devices = '*' if os.environ.get('RLAUNCH_WORKER') else '0' parser.add_argument('-d', '--device', default=default_devices) parser.add_argument('--fast-run', action='store_true', default=False) parser.add_argument('--local', action='store_true', default=True) parser.add_argument('-c', '--continue', dest='continue_path', required=False) args = parser.parse_args() mgb.config.set_default_device(parse_devices(args.device)[0]) # XXX load network *********************************************** net = model.get() #***************************************************************** # create session sess = Session(config, args.device, net=net) # The loggers worklog = WorklogLogger(os.path.join(sess.log_dir, 'worklog.txt')) # create tensorboard loggers train_tb, val_tb = sess.tensorboards("train.events", "val.events") # The training and validation functions train_func = sess.make_func(loss_var=net.loss_var, fast_run=args.fast_run, train_state=True) val_func = sess.make_func( # you might wanna disable fast_run for validation fast_run=args.fast_run, train_state=False) opt = megskull.optimizer.AdamV8(learning_rate=10) opt(train_func) # The datasets datasets = sess.get_datasets("train", "validation", use_local=args.local) train_ds = datasets['train'] val_ds_iter = get_inf_iter_from_dataset(datasets['validation']) # vars to monitor sess.monitor_param_histogram(train_tb, worklog, interval=40) monitor_vars = list( net.extra.get("extra_config", {}).get('monitor_vars', [])) outspec = {'loss': net.loss_var} outspec.update(net.extra.get("extra_outputs", {})) # from IPython import embed # embed() # after done all decorations, compile the function train_func.compile(outspec) val_func.compile(outspec) # restore checkpoint if args.continue_path: sess.load_checkpoint(args.continue_path) # Now start train clock = sess.clock sess.start() if not args.continue_path: train_tb.put_graph(net) log_output = log_rate_limited(min_interval=0.5)(worklog.put_line) # from IPaccuracy while True: if clock.epoch >= config.nr_epoch: break opt.learning_rate = config.lr train_tb.put_scalar('learning_rate', opt.learning_rate, clock.step) time_epoch_start = tstart = time.time() for minibatch in train_ds.get_epoch_minibatch_iter(): tdata = time.time() - tstart out = train_func(**minibatch.get_kvmap()) # from IPython import embed # embed() cur_time = time.time() ttrain = cur_time - tstart time_passed = cur_time - time_epoch_start time_expected = time_passed / (clock.minibatch + 1) * train_ds.nr_minibatch_in_epoch eta = time_expected - time_passed outputs = [ "e:{},{}/{}".format(clock.epoch, clock.minibatch, train_ds.nr_minibatch_in_epoch), "{:.2g} mb/s".format(1. / ttrain), ] + [ 'passed:{:.2f}'.format(time_passed), 'eta:{:.2f}'.format(eta), ] + ["{}:{:.2g}".format(k, float(out[k])) for k in monitor_vars] if tdata / ttrain > .05: outputs += ["dp/tot: {:.2g}".format(tdata / ttrain)] log_output(' '.join(outputs)) for k, v in out.items(): if k in monitor_vars: train_tb.put_scalar(k, v, clock.step) if clock.minibatch % 5 == 0: vb = next(val_ds_iter) val_out = val_func(**vb.get_kvmap()) val_monitor_vars = [(k, float(v)) for k, v in val_out.items() if k in monitor_vars] for k, v in val_monitor_vars: val_tb.put_scalar(k, v, clock.step) log_output("Val: " + " ".join( ["{}={:.2g}".format(k, v) for k, v in val_monitor_vars])) if clock.step % 100 == 0: train_tb.flush() val_tb.flush() clock.tick() tstart = time.time() train_tb.flush() val_tb.flush() clock.tock() if clock.epoch % 5 == 0: sess.save_checkpoint('epoch_{}'.format(clock.epoch)) sess.save_checkpoint('latest') logger.info("Training is done, exit.") os._exit(0)
import model model.get()
def __init__(self, params): """ Initialize trainer. """ self.params = params # epoch / iteration size assert isinstance(config.epoch_size, int) assert config.epoch_size >= 1 self.epoch_size = config.epoch_size # network and criterion net, criterion = model.get() self.net = net self.criterion = criterion # data iterators self.iterators = {} train_iter, valid_iter, SRC_TEXT, TGT_TEXT = dataset.load() torch.distributed.barrier() print("Process {}, dataset loaded.".format(params.local_rank)) self.iterators["train"] = train_iter self.iterators["valid"] = valid_iter self.num_train = len(train_iter) self.SRC_TEXT = SRC_TEXT self.TGT_TEXT = TGT_TEXT torch.distributed.barrier() # Multi-GPU assert config.amp >= 1 or not config.fp16 if config.multi_gpu and config.fp16 == False: logger.info("Using nn.parallel.DistributedDataParallel ...") self.net = nn.parallel.DistributedDataParallel( self.net, device_ids=[params.local_rank], output_device=params.local_rank) # set optimizers self.opt = optimizer.get(self.net) torch.distributed.barrier() # Float16 / distributed if config.fp16: self.init_amp() if config.multi_gpu: logger.info("Using apex.parallel.DistributedDataParallel ...") self.net = apex.parallel.DistributedDataParallel( self.net, delay_allreduce=True) # validation metrics self.best_metrics = {} for k in config.valid_metrics.keys(): factor = config.valid_metrics[k] self.best_metrics[k] = [config.init_metric * factor, factor] # early stopping metrics self.early_stopping_metrics = {} for k in self.best_metrics: self.early_stopping_metrics[k] = self.best_metrics[k] self.decrease_counts = 0 self.decrease_counts_max = config.decrease_counts_max self.stopping_criterion = config.stopping_criterion if config.multi_gpu: self.should_terminate = torch.tensor(0).byte() self.should_terminate = self.should_terminate.cuda() else: self.should_terminate = False assert (self.stopping_criterion in self.best_metrics) or (self.stopping_criterion is None) # training statistics self.epoch = 0 self.n_iter = 0 self.n_total_iter = 0 self.n_sentences = 0 self.stats = OrderedDict([('processed_s', 0), ('processed_w', 0)] + [('MT-%s-%s-loss' % (config.SRC_LAN, config.TGT_LAN), [])] + [('MT-%s-%s-ppl' % (config.SRC_LAN, config.TGT_LAN), [])]) self.last_time = time.time() # reload potential checkpoints self.reload_checkpoint(network_only=config.reload_network_only) print("Process {}, trainer initialized.".format(params.local_rank))
def test_model_instance_access_data_from_dot_notation(self): model = self.ModelTest({'a': 1}, b='bbbbbb') self.assertEqual(model.a, 1) self.assertEqual(model.b, 'bbbbbb') self.assertEqual(model.get('a'), model.a) self.assertEqual(model.get('b'), 'bbbbbb')
def query_instances(args, unlabeled_dataset, oracle, active_func="random"): # lc stands for least confident # te stands for token entropy # tte stands for total token entropy assert active_func in [ "random", "longest", "shortest", "lc", "margin", "te", "tte" ] # lengths represents number of tokens, so BPE should be removed lengths = np.array([ len(remove_special_tok(remove_bpe(s)).split()) for s in unlabeled_dataset ]) # Preparations before querying instances # Reloading network parameters args.use_cuda = (args.no_cuda == False) and torch.cuda.is_available() net, _ = model.get() assert os.path.exists(args.checkpoint) net, src_vocab, tgt_vocab = load_model(args.checkpoint, net) if args.use_cuda: net = net.cuda() # Initialize inference dataset (Unlabeled dataset) infer_dataset = Dataset(unlabeled_dataset, src_vocab) if args.batch_size is not None: infer_dataset.BATCH_SIZE = args.batch_size if args.max_batch_size is not None: infer_dataset.max_batch_size = args.max_batch_size if args.tokens_per_batch is not None: infer_dataset.tokens_per_batch = args.tokens_per_batch infer_dataiter = iter( infer_dataset.get_iterator(shuffle=True, group_by_size=True, include_indices=True)) # Start ranking unlabeled dataset indices = np.arange(len(unlabeled_dataset)) if active_func == "random": result = get_scores(args, net, active_func, infer_dataiter, src_vocab, tgt_vocab) random.shuffle(result) indices = [item[1] for item in result] indices = np.array(indices).astype('int') for idx in indices: print("S:", unlabeled_dataset[idx]) print("H:", result[idx][2]) print("T:", oracle[idx]) print("V:", result[idx][0]) print("I:", args.input, args.reference, idx) elif active_func == "longest": result = get_scores(args, net, active_func, infer_dataiter, src_vocab, tgt_vocab) result = [(len( remove_special_tok(remove_bpe( unlabeled_dataset[item[1]])).split(' ')), item[1], item[2]) for item in result] result = sorted(result, key=lambda item: -item[0]) indices = [item[1] for item in result] indices = np.array(indices).astype('int') for idx in indices: print("S:", unlabeled_dataset[idx]) print("H:", result[idx][2]) print("T:", oracle[idx]) print("V:", -result[idx][0]) print("I:", args.input, args.reference, idx) elif active_func == "shortest": result = get_scores(args, net, active_func, infer_dataiter, src_vocab, tgt_vocab) result = [(len( remove_special_tok(remove_bpe( unlabeled_dataset[item[1]])).split(' ')), item[1], item[2]) for item in result] result = sorted(result, key=lambda item: item[0]) indices = [item[1] for item in result] indices = np.array(indices).astype('int') for idx in indices: print("S:", unlabeled_dataset[idx]) print("H:", result[idx][2]) print("T:", oracle[idx]) print("V:", result[idx][0]) print("I:", args.input, args.reference, idx) indices = indices[np.argsort(lengths[indices])] elif active_func in ["lc", "margin", "te", "tte"]: result = get_scores(args, net, active_func, infer_dataiter, src_vocab, tgt_vocab) result = sorted(result, key=lambda item: item[0]) indices = [item[1] for item in result] indices = np.array(indices).astype('int') for idx in range(len(result)): print("S:", unlabeled_dataset[result[idx][1]]) print("H:", result[idx][2]) print("T:", oracle[result[idx][1]]) print("V:", result[idx][0]) print("I:", args.input, args.reference, result[idx][1])
def get(tname): query = reader.multiple_input(tname, 'Enter requested fields:', empty=True) data = model.get(tname, query) view.print_entities(tname, data) reader.press_enter() show_table_menu(tname)
def f2(): import model return model.get()
def get_all(tname): data = model.get(tname) view.print_entities(tname, data) reader.press_enter() show_table_menu(tname)
def main(): parser = argparse.ArgumentParser() subparsers = parser.add_subparsers(help='two modes, get or translate') parser_get = subparsers.add_parser( 'get', help='Get texts that needs to be labeled or translated') parser_get.add_argument( '-AO', '--active_out', type=str, default=None, help="Output file generated by active.py score mode") parser_get.add_argument('-tb', '--tok_budget', type=int, help="Token budget") parser_get.add_argument('-bttb', '--back_translation_tok_budget', type=int, help="Back translation token budget") parser_get.add_argument('--sort', action="store_true", help="Whether to sort active out by value") parser_get.add_argument('-o', '--output', type=str, help="Output filepath") parser_get.add_argument('-on', '--output_num', type=int, default=1, help="Output filepath") parser_trans = subparsers.add_parser('translate', help='Translate sentences') parser_trans.add_argument('-i', '--input', type=str, help='Input file') parser_trans.add_argument('-o', '--output', type=str, help="Output file") parser_trans.add_argument('--ckpt', required=True) parser_trans.add_argument('--max_len', type=int, default=250) parser_trans.add_argument('--gen_a', type=float, default=1.3) parser_trans.add_argument('--gen_b', type=int, default=5) parser_trans.add_argument('--no_cuda', action='store_true') parser_trans.add_argument('--batch_size', type=int, default=None) parser_trans.add_argument('--max_batch_size', type=int, default=None) parser_trans.add_argument('--tokens_per_batch', type=int, default=None) args = parser.parse_args() args.mode = "get" if hasattr(args, 'active_out') else "translate" if args.mode == 'translate': args.use_cuda = (args.no_cuda == False) and torch.cuda.is_available() if args.mode == "get": f = open(args.active_out, 'r') lines = f.read().split('\n')[:-1] f.close() assert len(lines) % 4 == 0 active_out = [(lines[idx], lines[idx + 1], float(lines[idx + 2].split(' ')[-1]), lines[idx + 3]) for idx in range(0, len(lines), 4)] if args.sort: active_out = sorted(active_out, key=lambda item: item[2]) indices = np.arange(len(active_out)) lengths = np.array([ len( remove_special_tok(remove_bpe( item[0][len("S: "):])).split(' ')) for item in active_out ]) include_oracle = np.cumsum(lengths) <= args.tok_budget include_pseudo = np.cumsum(lengths) <= ( args.tok_budget + args.back_translation_tok_budget) include_pseudo = np.logical_xor(include_pseudo, include_oracle) include_pseudo = indices[include_pseudo] include_oracle = indices[include_oracle] others = [ idx for idx in indices if (idx not in include_pseudo) and (idx not in include_oracle) ] # Output oracle and others output_oracle = args.output + '_oracle' f = open(output_oracle, 'w') out = [] for idx in include_oracle: item = [] item.append(active_out[idx][0]) item.append('H: ' + active_out[idx][1][len('T: '):]) item.append('T: ' + active_out[idx][1][len('T: '):]) item.append('V: ' + str(active_out[idx][2])) item.append(active_out[idx][3]) out.extend(item) f.write('\n'.join(out) + '\n') f.close() output_others = args.output + '_others' f = open(output_others, 'w') out = [] for idx in others: item = [] item.append(active_out[idx][0]) item.append('H: ' + active_out[idx][1][len('T: '):]) item.append('T: ' + active_out[idx][1][len('T: '):]) item.append('V: ' + str(active_out[idx][2])) item.append(active_out[idx][3]) out.extend(item) f.write('\n'.join(out) + '\n') f.close() # Output pseudo if args.output_num > 1: n_lines = len(include_pseudo) // args.output_num + 1 for n in range(args.output_num): output_pseudo = args.output + '_pseudo_' + str(n) f = open(output_pseudo, 'w') out = [] for idx in include_pseudo[n * n_lines:(n + 1) * n_lines]: item = [] item.append(active_out[idx][0]) item.append('H: ' + active_out[idx][1][len('T: '):]) item.append('T: ' + active_out[idx][1][len('T: '):]) item.append('V: ' + str(active_out[idx][2])) item.append(active_out[idx][3]) out.extend(item) f.write('\n'.join(out) + '\n') f.close() else: assert args.output_num == 1 output_pseudo = args.output + '_pseudo' f = open(output_pseudo, 'w') out = [] for idx in include_pseudo: item = [] item.append(active_out[idx][0]) item.append('H: ' + active_out[idx][1][len('T: '):]) item.append('T: ' + active_out[idx][1][len('T: '):]) item.append('V: ' + str(active_out[idx][2])) item.append(active_out[idx][3]) out.extend(item) f.write('\n'.join(out) + '\n') f.close() elif args.mode == 'translate': assert args.max_len > 10 net, _ = model.get() net, src_vocab, tgt_vocab = load_model(args.ckpt, net) if args.use_cuda: net = net.cuda() fpath = args.input try: lines = open(fpath, 'r').read().split('\n')[:-1] active_out = [(lines[idx], lines[idx + 1], lines[idx + 2], float(lines[idx + 3].split(' ')[-1]), lines[idx + 4]) for idx in range(0, len(lines), 5)] args.text = [a[0][len('S: '):].strip() for a in active_out] args.ref_text = [a[2][len('T: '):].strip() for a in active_out] except: print("error opening or reading text file") out = translate(args, net, src_vocab, tgt_vocab, active_out) f = open(args.output, 'w') f.write('\n'.join(out) + '\n') f.close()
def __init__(self, cell, transformation=None): # Store cell self.cell = cell # Store transformation object if transformation is None: transformation = transformations.NullTransformation() self.transformation = transformation # Calculate experimental summary statistics print('Calculating summary statistics for cell ' + str(cell)) stats = sumstat.all_summary_statistics(cell) # Unpack self.ta1 = stats[0][1] self.tr1 = stats[1][1] self.ai1 = stats[2][1] self.ri1 = stats[3][1] self.iv1 = stats[4][1] # Scale factors for error self.nta = 1 / len(self.ta1) self.ntr = 1 / len(self.tr1) self.nai = 1 / len(self.ai1) self.niv = 1 / len(self.iv1) self.zta = 1 / np.max(self.ta1) self.ztr = 1 / np.max(self.tr1) self.zai = 1 self.ziv = 1 / (np.max(self.iv1) - np.min(self.iv1)) assert (self.zta > 0) assert (self.ztr > 0) assert (self.ziv > 0) # Load Myokit model model = data.load_myokit_model() model.get('membrane.V').set_label('membrane_potential') model.get('nernst.EK').set_rhs( cells.reversal_potential(cells.temperature(cell))) # Start at steady-state for -80mV # print('Updating model to steady-state.') # model.get('membrane.V').promote() # ai = model.get('ikr.O_Kr').pyfunc()(-80) # model.get('membrane.V').demote() # model.get('ikr.O_Kr').set_state_value(ai) # Create analytical model m = myokit.lib.hh.HHModel.from_component( model.get('ikr'), parameters=[ 'ikr.p1', 'ikr.p2', 'ikr.p3', 'ikr.p4', 'ikr.p5', 'ikr.p6', 'ikr.p7', 'ikr.p8', 'ikr.p9', 'ikr.p10', 'ikr.p11', 'ikr.p12', 'ikr.p13', 'ikr.p14', 'ikr.p15', 'ikr.p16', 'ikr.p17', 'ikr.p18', 'ikr.p19', 'ikr.p20', 'ikr.p21', 'ikr.p22', 'ikr.p23', 'ikr.p24', 'ikr.p25', ], ) # Load protocols, create simulations and times arrays self.simulations = [] self.times = [] for i in (2, 3, 4, 5): variant = (i == 2 and cell in (7, 8)) p = data.load_myokit_protocol(i, variant=variant) self.simulations.append(myokit.lib.hh.AnalyticalSimulation(m, p)) self.times.append( data.capacitance(p, 0.1, np.arange(0, p.characteristic_time(), 0.1))[0])
def main(args): if args.noise: summary_dir = os.path.join(args.checkpoint_dir,"noise") if args.events: summary_dir = os.path.join(args.checkpoint_dir,"events") while True: ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir) if args.eval_interval < 0 or ckpt: print ('Evaluating model') break print ('Waiting for training job to save a checkpoint') time.sleep(args.eval_interval) cfg = config.Config() cfg.batch_size = 1 cfg.n_epochs = 1 cfg.add = 1 coord = tf.train.Coordinator() while True: try: # data pipeline data_pipeline = da.data_pipeline(args.dataset, config=cfg, is_training=False) samples = { 'data': data_pipeline.samples, 'label': data_pipeline.labels, } model = md.get(args.model,samples,checkpoint_dir=args.checkpoint_dir,is_training=False,reuse=False) metrics = model.validation_metrics() summary_writer = tf.summary.FileWriter(summary_dir, None) with tf.Session() as sess: coord = tf.train.Coordinator() tf.initialize_local_variables().run() #sess.run(tf.global_variables_initializer()) threads = tf.train.start_queue_runners(sess=sess, coord=coord) model.load(sess,args.step) print ('Evaluating at step {}'.format(sess.run(model.global_step))) step = tf.train.global_step(sess, model.global_step) mean_metrics = {} for key in metrics: mean_metrics[key] = 0 n = 0 m = 0 pred = np.empty(2) true_labels = np.empty(1) print("7777777777777777777777777777777") while True: try: to_fetch = [metrics, model.layers['class_prediction'], model.layers['class_prob'], samples["label"]] metrics_,batch_prelabel, batch_pred, batch_true_label = sess.run(to_fetch) pred = np.append(pred,batch_pred) print(batch_prelabel,batch_true_label) print(batch_pred) trace = samples["data"] if batch_prelabel ==batch_true_label: m+=1 else: print(sess.run(trace[0])) print(batch_pred) #plt.plot(sess.run(trace[0])) #plt.show() true_labels = np.append(true_labels,batch_true_label) for key in metrics: mean_metrics[key] += cfg.batch_size*metrics_[key] n += cfg.batch_size mess = model.validation_metrics_message(metrics_) print ('{:03d} | '.format(n)+mess) except KeyboardInterrupt: print ('stopping evaluation') break except tf.errors.OutOfRangeError: print ('Evaluation completed ({} epochs).'.format(cfg.n_epochs)) print ("{} windows seen".format(n)) break print('true = {} | det_accuracy = {}'.format(m,m/n)) break #tf.reset_default_graph() #print ('Sleeping for {}s'.format(args.eval_interval)) #time.sleep(args.eval_interval) finally: print ('joining data threads') coord.request_stop()
def test_data_property_getter(self): model = self.ModelTest({'a': 1}, b=2) model.set({'c': 3}) self.assertEqual(model._data, {'a': 1, 'b': 2, 'c': 3}) self.assertEqual(model.get('a'), 1) self.assertEqual(model.b, 2)
print("========================================") # seed args.cuda = torch.cuda.is_available() torch.manual_seed(args.seed) print("args seed:{},cuda:{}".format(args.seed, args.cuda)) if args.cuda: torch.cuda.manual_seed(args.seed) # data loader train_loader, test_loader = dataset.get(batch_size=args.batch_size, data_root=args.data_root, num_workers=8) # model model = model.get(args.model_name, '../pretrained_models') model = torch.nn.DataParallel(model, device_ids=range(args.ngpu)) if args.cuda: model.cuda() # optimizer optimizer = optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.wd, momentum=0.9) decreasing_lr = list(map(int, args.decreasing_lr.split(','))) print('decreasing_lr: ' + str(decreasing_lr)) best_acc, old_file = 0, None t_begin = time.time() try: # ready to go
def test_incremental(): from common import config import model from utils import get_batch net, _ = model.get() net.eval() ckpt = torch.load("checkpoints/checkpoint_best_ppl.pth", map_location='cpu') # reload model parameters s_dict = {} for k in ckpt["net"]: new_k = k[7:] s_dict[new_k] = ckpt["net"][k] net.load_state_dict(s_dict) import dataset train_iter, _, SRC_TEXT, TGT_TEXT = dataset.get() #data_iter = iter(train_iter.get_iterator(True, True)) #raw_batch = next(data_iter) src = np.arange(4, 4+2000).reshape(80, 25) tgt = np.arange(4, 4+2400).reshape(80, 30) raw_batch = dataset.Batch( torch.from_numpy(src).long(), torch.from_numpy(tgt).long() ) batch = get_batch( raw_batch.src, raw_batch.tgt, SRC_TEXT.vocab, TGT_TEXT.vocab ) for k, v in batch.items(): try: print(k, v.size()) except AttributeError: pass with torch.no_grad(): enc_out = net.encode(src=batch['src'], src_mask=batch['src_mask']) # No incremental logits1 = net.decode(enc_out, batch['src_mask'], batch['tgt'], batch['tgt_mask']) logits1 = net.generator(logits1, log_prob=True) # Incremental print("Incremental encoding finished!") tlen = batch['tgt'].size(1) cache = {'cur_len':0} logits2 = [] for i in range(tlen): x = batch['tgt'][:, i].unsqueeze(-1) logit = net.decode( enc_out, batch['src_mask'], x, batch['tgt_mask'][:, i, :(i+1)].unsqueeze(-2), cache ) logit = net.generator(logit, log_prob=True) if i >= 0: ref = logits1[:, i, :] sys = logit.squeeze() ref_words = torch.topk(ref, 1)[1].squeeze() sys_words = torch.topk(sys, 1)[1].squeeze() print("Diff = {}".format(torch.sum(ref - sys).item())) print("Logits sys size : {}, Logits sys : {}".format(sys.size(), sys.sum().item())) print("Logits ref size : {}, Logits ref : {}".format(ref.size(), ref.sum().item())) if (ref_words == sys_words).all() == False: print("F**k!") print("\n") logits2.append(logit) cache['cur_len'] = i + 1 logits2 = torch.cat(logits2, dim=1).contiguous() print("Logits1: {}".format(torch.sum(logits1).item())) print("Logits2: {}".format(torch.sum(logits2).item()))
def select(table): query = view.multiple_input(table, 'Enter requested fields:') data = model.get(table, query) view.print_entities(table, data) view.press_enter() display_secondary_menu(table)
def query_instances(args, unlabeled_dataset, active_func="random", tok_budget=None): # lc stands for least confident # te stands for token entropy # tte stands for total token entropy assert active_func in [ "random", "longest", "shortest", "lc", "margin", "te", "tte" ] assert isinstance(tok_budget, int) # lengths represents number of tokens, so BPE should be removed lengths = np.array([ len(remove_special_tok(remove_bpe(s)).split()) for s in unlabeled_dataset ]) total_num = sum(lengths) if total_num < tok_budget: tok_budget = total_num # Preparations before querying instances if active_func in ["lc", "margin", "te", "tte"]: # Reloading network parameters args.use_cuda = (args.no_cuda == False) and torch.cuda.is_available() net, _ = model.get() assert os.path.exists(args.checkpoint) net, src_vocab, tgt_vocab = load_model(args.checkpoint, net) if args.use_cuda: net = net.cuda() # Initialize inference dataset (Unlabeled dataset) infer_dataset = Dataset(unlabeled_dataset, src_vocab) if args.batch_size is not None: infer_dataset.BATCH_SIZE = args.batch_size if args.max_batch_size is not None: infer_dataset.max_batch_size = args.max_batch_size if args.tokens_per_batch is not None: infer_dataset.tokens_per_batch = args.tokens_per_batch infer_dataiter = iter( infer_dataset.get_iterator(shuffle=True, group_by_size=True, include_indices=True)) # Start ranking unlabeled dataset indices = np.arange(len(unlabeled_dataset)) if active_func == "random": np.random.shuffle(indices) elif active_func == "longest": indices = indices[np.argsort(-lengths[indices])] elif active_func == "shortest": indices = indices[np.argsort(lengths[indices])] elif active_func in ["lc", "margin", "te", "tte"]: result = get_scores(args, net, active_func, infer_dataiter, src_vocab, tgt_vocab) result = sorted(result, key=lambda item: item[0]) indices = [item[1] for item in result] indices = np.array(indices).astype('int') include = np.cumsum(lengths[indices]) <= tok_budget include = indices[include] return [unlabeled_dataset[idx] for idx in include], include
def query_instances(args, unlabeled_dataset, oracle, active_func="random", labeled_dataset=None): # lc stands for least confident # te stands for token entropy # tte stands for total token entropy assert active_func in [ "random", "longest", "shortest", "lc", "margin", "te", "tte", "dden" ] # lengths represents number of tokens, so BPE should be removed lengths = np.array([ len(remove_special_tok(remove_bpe(s)).split()) for s in unlabeled_dataset ]) # Preparations before querying instances # Reloading network parameters args.use_cuda = (args.no_cuda == False) and torch.cuda.is_available() net, _ = model.get() assert os.path.exists(args.checkpoint) net, src_vocab, tgt_vocab = load_model(args.checkpoint, net) if args.use_cuda: net = net.cuda() # Initialize inference dataset (Unlabeled dataset) infer_dataset = Dataset(unlabeled_dataset, src_vocab) if args.batch_size is not None: infer_dataset.BATCH_SIZE = args.batch_size if args.max_batch_size is not None: infer_dataset.max_batch_size = args.max_batch_size if args.tokens_per_batch is not None: infer_dataset.tokens_per_batch = args.tokens_per_batch infer_dataiter = iter( infer_dataset.get_iterator(shuffle=True, group_by_size=True, include_indices=True)) # Start ranking unlabeled dataset indices = np.arange(len(unlabeled_dataset)) if active_func == "random": result = get_scores(args, net, active_func, infer_dataiter, src_vocab, tgt_vocab) random.shuffle(result) indices = [item[1] for item in result] indices = np.array(indices).astype('int') for idx in indices: print("S:", unlabeled_dataset[idx]) print("H:", result[idx][2]) print("T:", oracle[idx]) print("V:", result[idx][0]) print("I:", args.input, args.reference, idx + args.previous_num_sents) elif active_func == "longest": result = get_scores(args, net, active_func, infer_dataiter, src_vocab, tgt_vocab) result = [(len( remove_special_tok(remove_bpe( unlabeled_dataset[item[1]])).split(' ')), item[1], item[2]) for item in result] result = sorted(result, key=lambda item: -item[0]) indices = [item[1] for item in result] indices = np.array(indices).astype('int') for idx in indices: print("S:", unlabeled_dataset[idx]) print("H:", result[idx][2]) print("T:", oracle[idx]) print("V:", -result[idx][0]) print("I:", args.input, args.reference, idx + args.previous_num_sents) elif active_func == "shortest": result = get_scores(args, net, active_func, infer_dataiter, src_vocab, tgt_vocab) result = [(len( remove_special_tok(remove_bpe( unlabeled_dataset[item[1]])).split(' ')), item[1], item[2]) for item in result] result = sorted(result, key=lambda item: item[0]) indices = [item[1] for item in result] indices = np.array(indices).astype('int') for idx in indices: print("S:", unlabeled_dataset[idx]) print("H:", result[idx][2]) print("T:", oracle[idx]) print("V:", result[idx][0]) print("I:", args.input, args.reference, idx + args.previous_num_sents) indices = indices[np.argsort(lengths[indices])] elif active_func in ["lc", "margin", "te", "tte"]: result = get_scores(args, net, active_func, infer_dataiter, src_vocab, tgt_vocab) result = sorted(result, key=lambda item: item[0]) indices = [item[1] for item in result] indices = np.array(indices).astype('int') for idx in range(len(result)): print("S:", unlabeled_dataset[result[idx][1]]) print("H:", result[idx][2]) print("T:", oracle[result[idx][1]]) print("V:", result[idx][0]) print("I:", args.input, args.reference, result[idx][1] + args.previous_num_sents) elif active_func == "dden": punc = [ ".", ",", "?", "!", "'", "<", ">", ":", ";", "(", ")", "{", "}", "[", "]", "-", "..", "...", "...." ] lamb1 = 1 lamb2 = 1 p_u = {} unlabeled_dataset_without_bpe = [] labeled_dataset_without_bpe = [[], []] for s in unlabeled_dataset: unlabeled_dataset_without_bpe.append( remove_special_tok(remove_bpe(s))) for s in labeled_dataset[0]: labeled_dataset_without_bpe[0].append( remove_special_tok(remove_bpe(s))) for s in labeled_dataset[1]: labeled_dataset_without_bpe[1].append( remove_special_tok(remove_bpe(s))) for s in unlabeled_dataset_without_bpe: sentence = s.split() for token in sentence: if token not in punc: if token in p_u.keys(): p_u[token] += 1 else: p_u[token] = 1 total_dden = 0 for token in p_u.keys(): p_u[token] = math.log(p_u[token] + 1) total_dden += p_u[token] for token in p_u.keys(): p_u[token] /= total_dden count_l = {} for s in labeled_dataset_without_bpe[0]: sentence = s.split() for token in sentence: if token not in punc: if token in count_l.keys(): count_l[token] += 1 else: count_l[token] = 1 dden = [] for s in unlabeled_dataset_without_bpe: sentence = s.split() len_for_sentence = 0 sum_for_sentence = 0 for token in sentence: if token not in punc: if token in count_l.keys(): sum_for_sentence += p_u[token] * math.exp( -lamb1 * count_l[token]) else: sum_for_sentence += p_u[token] len_for_sentence += 1 if len_for_sentence != 0: sum_for_sentence /= len_for_sentence dden.append(sum_for_sentence) unlabeled_with_index = [] for i in range((len(unlabeled_dataset))): unlabeled_with_index.append((dden[i], i)) unlabeled_with_index.sort(key=lambda x: x[0], reverse=True) count_batch = {} dden_new = [] for _, i in unlabeled_with_index: sentence = unlabeled_dataset_without_bpe[i].split() len_for_sentence = 0 sum_for_sentence = 0 for token in sentence: if token not in punc: p_tmp = p_u[token] if token in count_batch.keys(): p_tmp = 0 p_tmp *= math.exp(-lamb2 * count_batch[token]) if token in count_l.keys(): p_tmp *= math.exp(-lamb1 * count_l[token]) sum_for_sentence += p_tmp len_for_sentence += 1 for token in sentence: if token not in punc: if token in count_batch.keys(): count_batch[token] += 1 else: count_batch[token] = 1 if len_for_sentence != 0: sum_for_sentence /= len_for_sentence dden_new.append((sum_for_sentence, i)) dden_new.sort(key=lambda x: x[1]) dden_sort = [] for dden_num, _ in dden_new: dden_sort.append(dden_num) ddens = np.array(dden_sort) indices = indices[np.argsort(-ddens)] for idx in indices: print("S:", unlabeled_dataset[idx]) print("T:", oracle[idx]) print("V:", -ddens[idx]) print("I:", args.input, args.reference, idx)
class_mode='categorical', color_mode="rgb", # seed=42 ) checkpoint_dir = os.path.dirname(checkpoint_path) # Create a callback that saves the model's weights cp_callback = tf.keras.callbacks.ModelCheckpoint( filepath=checkpoint_path, save_weights_only=True, verbose=1, save_freq=5 # Save every 5 epoch ) model = model.get() model.save_weights(checkpoint_path.format(epoch=0)) STEP_SIZE_TRAIN = train_data_gen.n // train_data_gen.batch_size STEP_SIZE_VALID = val_data_gen.n // val_data_gen.batch_size print("steps_per_epoch : {:d} ".format(STEP_SIZE_TRAIN)) print("validation_steps : {:d} ".format(STEP_SIZE_VALID)) history = model.fit_generator(generator=train_data_gen, steps_per_epoch=STEP_SIZE_TRAIN, validation_data=val_data_gen, validation_steps=STEP_SIZE_VALID, epochs=epochs)