Example #1
0
def handle_command(c):
    print(c[0])
    print(c)
    if c[0] == b'COMMAND':
        return NULL
    elif c[0] == b'SET':
        key = c[1]
        val = c[2]
        model.insert(key, val, epochs=13)
        return NULL
    elif c[0] == b'GET':
        key = c[1]
        v = model.get(key)
        return b'$%b\r\n%b\r\n' % (str(len(v)).encode("ascii"), v)
    elif c[0] == b'INCR':
        key = c[1]
        v = model.get(key)
        i = int(v.decode("ascii"))
        model.insert(key, str(i + 1).encode("ascii"), epochs=10)
        return NULL
    elif c[0] == b'DECR':
        key = c[1]
        v = model.get(key)
        i = int(v.decode("ascii"))
        model.insert(key, str(i - 1).encode("ascii"), epochs=10)
        return NULL
    elif c[0] == b'INCRBY':
        key = c[1]
        v = model.get(key)
        i = int(v.decode("ascii"))
        model.insert(key, str(i + int(c[2].decode("ascii"))).encode("ascii"), epochs=10)
        return NULL
    else:
        return b'-ERROR: Unsupported command\r\n'
Example #2
0
    def test_get(self):
        model = self.ModelTest()
        model.set(a=1)
        self.assertEqual(model.get('a'), 1)
        self.assertEqual(model.a, 1)

        model._data['a'] = 2
        self.assertEqual(model.get('a'), 2)

        self.assertIs(model.get('b'), None)
        with self.assertRaises(AttributeError):
            model.b
Example #3
0
	def delete(self, ttype, name):
		"""delete an object and its children"""
		try:
			model.get({"type":ttype, "name":name}).check_allow('delete')
			self.db.sql("""delete from `%s` where name=%s""" % (ttype, '%s'), name)

			# delete children
			for child_tab in self.db.sql("select child from _parent_child where parent=%s", (ttype,)):
				self.db.sql("""delete from `%s` where parent=%s and parent_type=%s""" \
					% (child_tab['child'],'%s','%s'), (name, ttype))
		except MySQLdb.Error, e:
			if e.args[0] == ER.NO_SUCH_TABLE:
				return
			else:
				raise e
Example #4
0
def load_model(pkl_path):
    net = get()
    net.eval()
    f = open(pkl_path, 'rb')
    ckpt = pickle.load(f)
    f.close()

    param_order = [
        'conv1:conv1:conv:W',
        'conv1:conv1:conv:b',
        'conv2:conv2:conv:W',
        'conv2:conv2:conv:b',
        'conv3:conv3:conv:W',
        'conv3:conv3:conv:b',
        'conv4:conv4:conv:W',
        'conv4:conv4:conv:b',
        'fc1:fc1:fc:W',
        'fc1:fc1:fc:b',
        'fct:fct:fc:W',
        'fct:fct:fc:b',
    ]
    for i, param in enumerate(net.parameters()):
        if 'fc' not in param_order[i]:
            param.data = torch.from_numpy(ckpt[param_order[i]]).float()
        else:
            param.data = torch.from_numpy(np.transpose(
                ckpt[param_order[i]])).float()

    return net
Example #5
0
def test_model_numel():
    from model import get
    net, _ = get()
    n = 0
    for p in net.parameters():
        n += p.numel()
    print(n)
Example #6
0
def main():
    "done"

    parser = argparse.ArgumentParser()
    parser.add_argument('-ckpt', required=True)
    parser.add_argument('-k', type=int, default=5)
    parser.add_argument('-max_len', type=int, default=250)
    parser.add_argument('-max_ratio', type=int, default=1.5)
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-text', type=str, required=True)
    parser.add_argument('-lp', '--length_penalty', type=float, default=0.7)

    args = parser.parse_args()
    args.use_cuda = (args.no_cuda == False) and torch.cuda.is_available()

    assert args.k > 0
    assert args.max_len > 10

    net, _ = model.get()
    net, src_vocab, tgt_vocab = load_model(args.ckpt, net)

    if args.use_cuda:
        net = net.cuda()

    fpath = args.text
    try:
        args.text = open(fpath, encoding='utf-8').read().split('\n')
    except:
        print("error opening or reading text file")

    translate(args, net, src_vocab, tgt_vocab)
Example #7
0
	def delete(self, ttype, name):
		"""delete an object and its children"""
		import sqlite3

		try:
			model.get({"type":ttype, "name":name}).check_allow('delete')
			self.sql("""delete from `%s` where name=?""" % ttype, (name,))

			# delete children
			for child_tab in self.sql("select child from _parent_child where parent=?", (ttype,)):
				self.sql("""delete from `%s` where parent=? and parent_type=?""" \
					% child_tab[0], (name, ttype), as_dict=1)
		except sqlite3.OperationalError, e:
			if 'no such table' in e.args[0]:
				return
			else:
				raise e			
Example #8
0
    def test_set(self):
        model = self.ModelTest()
        with self.assertRaises(ValueError):
            model.set()
        model.set({'a': 1})

        model2 = self.ModelTest()
        model2.set(a=1)

        self.assertEqual(model._data, model2._data)

        model.set({'a': 1, 'b': 2})
        self.assertEqual(model.get('a'), 1)
        self.assertEqual(model._data['b'], 2)
        self.assertEqual(model._data, {'a': 1, 'b': 2})

        model.set(c=3, d=4)
        self.assertEqual(model.get('c'), 3)
        self.assertEqual(model.d, 4)
Example #9
0
def run():
    # set initial time
    start_time = time.time()

    # dump hyper parameters settings
    print(time.strftime('%Y-%m-%d %H:%M:%S') + ' Hyper-parameters setting')
    # get configuration
    args = config.get()

    # check for save_sample_image model
    save_best_model = ModelCheckpoint(
        filepath=args.model_file_train, verbose=1,
        save_best_only=True)  # , save_best_only=True

    if os.path.isfile(args.model_file_train):
        print(time.strftime('%Y-%m-%d %H:%M:%S') + ' Load model from file...')
        skip_gram_model = load_model(args.model_file_train)
    else:
        # get embedding model
        print(time.strftime('%Y-%m-%d %H:%M:%S') + ' Build model...')
        skip_gram_model = model.get(args)

    # dashboard
    watch_board = Dashboard(folder=config.FOLDER,
                            dump_file="dashboard.dump",
                            statistic_file="statistic.txt",
                            model=skip_gram_model,
                            show_board=True)

    # begin training
    print(time.strftime('%Y-%m-%d %H:%M:%S') + " Begin training..")
    # Train the model each generation and show predictions against the
    # validation dataset
    for iteration in range(1, args.iterations):
        print(
            time.strftime('%Y-%m-%d %H:%M:%S') + ' Iteration %d ' % iteration)
        skip_gram_model.fit_generator(
            zhwiki_corpus.skip_gram_generator(batch_size=args.batch_size,
                                              context_window_size=5,
                                              negative_samples=10),
            steps_per_epoch=args.steps_per_epoch,
            epochs=args.epochs,
            callbacks=[save_best_model, watch_board],
            # callbacks=[save_best_model],
            validation_data=zhwiki_corpus.skip_gram_generator(
                batch_size=args.batch_size,
                context_window_size=5,
                negative_samples=10),
            validation_steps=100,
            verbose=0)

    # close_board windows
    watch_board.close_board()
    print("task took %.3fs" % (float(time.time()) - start_time))
Example #10
0
def momo():
    subject = request.json[0]['msys']['relay_message']['content']['subject']
    m = model.get(subject)
    if m:
        engage = m['engage_percent'] >= random.random()
        if engage:
            util.fakeEngagement(
                request.json[0]['msys']['relay_message']['content']['html'])
        model.incr(subject, m['engage_percent'] >= random.random())
    else:
        model.incr(subject, 0)
    return ''
Example #11
0
def setRelations(items, *relations):
    for relation in relations:
        if relation.count(' '):
            k1, k2  = relation.split(' ')
        else:
            k1 = k2 = relation

        model = getModel(k1)
        assert(model is not None)
        if type(items) is not list:
            items = [items]
        for item in items:
            if item is not None and item.has_key(k1+'id'):
                item[k2] = model.get(item[k1+'id'])
Example #12
0
def main():
    "done"

    parser = argparse.ArgumentParser()
    parser.add_argument('-ckpt', required=True)
    parser.add_argument('-k', '--beam_size', type=int, default=4)
    parser.add_argument('-lp', '--length_penalty', type=float, default=0.7)
    parser.add_argument('--early_stopping', action="store_true")
    parser.add_argument('-max_len', type=int, default=250)
    parser.add_argument('-max_ratio', type=int, default=1.5)
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-text', type=str, required=True)
    parser.add_argument('-ref_text', type=str, required=True)
    parser.add_argument('--batch_size', type=int, default=None)
    parser.add_argument('--max_batch_size', type=int, default=None)
    parser.add_argument('--tokens_per_batch', type=int, default=None)
    parser.add_argument('--greedy', action='store_true')
    parser.add_argument('--src_lan', type=str, default="en")
    parser.add_argument('--tgt_lan', type=str, default="de")
    parser.add_argument('--gen_a', type=float, default=1.3)
    parser.add_argument('--gen_b', type=int, default=5)

    args = parser.parse_args()
    args.use_cuda = (args.no_cuda == False) and torch.cuda.is_available()

    assert args.beam_size > 0
    assert args.max_len > 10

    net, _ = model.get()
    net, src_vocab, tgt_vocab = load_model(args.ckpt, net)

    if args.use_cuda:
        net = net.cuda()

    fpath = args.text
    try:
        args.text = open(fpath, encoding='utf-8').read().split('\n')[:-1]
    except:
        print("error opening or reading text file")

    fpath = args.ref_text
    try:
        args.ref_text = open(fpath, encoding='utf-8').read().split('\n')[:-1]
    except:
        print("error opening or reading text file")

    translate(args, net, src_vocab, tgt_vocab)
Example #13
0
def load_model(checkpoint_path):
    """
    Reload a checkpoint if we find one.
    """
    assert os.path.isfile(checkpoint_path)
    ckpt = torch.load(checkpoint_path, map_location='cpu')
    config.src_n_vocab = ckpt['net']['module.src_emb.0.emb.weight'].size(0)
    config.tgt_n_vocab = ckpt['net']['module.tgt_emb.0.emb.weight'].size(0)
    net, _ = model.get()

    # reload model parameters
    s_dict = {}
    for k in ckpt["net"]:
        new_k = k[7:]
        s_dict[new_k] = ckpt["net"][k]

    net.load_state_dict(s_dict)

    src_vocab = ckpt["src_vocab"]
    tgt_vocab = ckpt["tgt_vocab"]
    return net, src_vocab, tgt_vocab
Example #14
0
	def post(self, obj, constraint={}):
		"""post a vector object, the property name is the type. see test case for example"""

		modelobj = (not obj.get('parent_type')) and model.get(obj) or None
		
		# delete if exists
		if obj.get("type") and obj.get("name"):
			self.delete(obj["type"], obj["name"])

		modelobj and modelobj.before_post()
		modelobj and modelobj.validate()
				
		obj_single, is_vector = self._get_single(obj)
		# save the parent
		self.post_single(obj_single, constraint)
		if is_vector:			
			for k in obj:
				d = {"type":k, "parent":obj["name"], "parent_type":obj["type"]}
				# dict, one child only
				if type(obj[k]) is dict:
					obj[k].update(d)
					self.post(obj[k])
				
				# multiple children
				if type(obj[k]) in (list, tuple):
					for child in obj[k]:
						
						# child is a dict
						if type(child) is dict:
							child.update(d)
							self.post(child)
							
						# child is literal (only names)
						elif type(child) in (str, int, float):
							c = {"value":child}
							c.update(d)
							self.post_single(c)
						else:
							raise Exception, "child %s must be dict or literal" % str(child)	
		modelobj and modelobj.after_post()
Example #15
0
def view_lyric(_id):
    """ View a lyric by _id.
    """
    # Search query used to find this lyric.
    query = request.args.get('query', 'you beat the system, collect 10 points') 
    back_url = '/?' + urllib.urlencode({'query': query})

    response = model.get(_id)
    lyric = response['lyric']
    artist = response['artist']
    album = response['album']
    title = response['title']

    mxit_ga.track_event(request)
    track('view_lyric', request,
            album=album,
            title=title,
            artist=artist,
            lyric_id=_id)


    return render_template('lyric.html', lyric=Markup(lyric), artist=artist, title=title, album=album, back_url=back_url)
Example #16
0
def _apply_model(image: np.ndarray) -> np.ndarray:
    model_height, model_width = config.MODEL_INPUT_SHAPE
    image_height, image_width = image.shape

    # apply model to a single slice of the image
    if image.shape == config.MODEL_INPUT_SHAPE:
        model_input = image.reshape((1, model_height, model_width, 1)) / 255.0
        return (
            model.get().predict(model_input).reshape((model_height, model_width)) * 255
        )

    # recursively apply model to the whole image
    elif image_height >= model_height and image_width >= model_width:
        n_vertical = math.ceil(image_height / model_height)
        n_horizontal = math.ceil(image_width / model_width)

        # create overflow buffer
        result = np.full(
            (n_vertical * model_height, n_horizontal * model_width), fill_value=255
        )
        result[:image_height, :image_width] = image
        buffer = result.copy()

        # iterate over all image slices
        for row, col in itertools.product(range(n_vertical), range(n_horizontal)):
            v_slice = slice(row * model_height, (row + 1) * model_height)
            h_slice = slice(col * model_width, (col + 1) * model_width)

            result[v_slice, h_slice] = _apply_model(result[v_slice, h_slice])

        # dispose of the grid artifact
        _patch_joints(
            buffer, model_height, model_width, n_horizontal, n_vertical, result
        )

        # crop to original image shape
        return result[:image_height, :image_width]
    else:
        return _apply_model(cv2.resize(image, (model_width, model_height)))
Example #17
0
    def test_id_on_inserted_model(self):
        class Person(Model):
            collection_name = 'person_test'
            structure = {
                'name': unicode,
                'desc': 'dynamic',
                'age': int,
            }
        Person.connect(Testing)
        Person.collection.remove({})

        model = Person(name='John Doe', age=35, desc='no one', bla='ble')
        _id = model.upsert()
        self.assertTrue(model.get('_id'))
        self.assertEqual(model._id, _id)

        self.assertIn('_id', model.to_dict())
        self.assertEqual(model.to_dict(), dict(
            name=u'John Doe',
            age=35,
            desc='no one',
            _id=_id
        ))
Example #18
0
 def GET(self, id):
     return response.send_if_found(model.get(id))
Example #19
0
    def __init__(self, params):
        """
        Initialize trainer.
        """
        self.params = params

        # Initialize tensorboard writer
        train_log = SummaryWriter(
            os.path.join(config.tensorboard_log_path, "log", "train"))
        valid_log = SummaryWriter(
            os.path.join(config.tensorboard_log_path, "log", "valid"))
        self._tensorboard = TensorboardWriter(train_log, valid_log)

        # epoch / iteration size
        assert isinstance(config.epoch_size, int)
        assert config.epoch_size >= 1
        self.epoch_size = config.epoch_size

        # network and criterion
        net, criterion = model.get()
        self.net = net
        self.criterion = criterion

        # data iterators
        self.iterators = {}
        train_iter, valid_iter, SRC_TEXT, TGT_TEXT = dataset.get()
        self.iterators["train"] = train_iter
        self.iterators["valid"] = valid_iter
        self.num_train = len(train_iter)
        self.SRC_TEXT = SRC_TEXT
        self.TGT_TEXT = TGT_TEXT

        # Multi-GPU
        if config.multi_gpu:
            logger.info("Using nn.parallel.DistributedDataParallel ...")
            self.net = nn.parallel.DistributedDataParallel(
                self.net,
                device_ids=[params.local_rank],
                output_device=params.local_rank)
            """
            self.criterion = nn.parallel.DistributedDataParallel(
                    self.criterion, device_ids=[params.local_rank], output_device=params.local_rank
                    )
            """

        # set optimizers
        self.opt = optimizer.get(self.net)

        # validation metrics
        self.best_metrics = {}
        for k in config.valid_metrics.keys():
            factor = config.valid_metrics[k]
            self.best_metrics[k] = [config.init_metric * factor, factor]

        # training statistics
        self.epoch = 0
        self.n_iter = 0
        self.n_total_iter = 0
        self.n_sentences = 0
        self.stats = OrderedDict([('processed_s', 0), ('processed_w', 0)] +
                                 [('MT-%s-%s-loss' %
                                   (config.SRC_LAN, config.TGT_LAN), [])] +
                                 [('MT-%s-%s-ppl' %
                                   (config.SRC_LAN, config.TGT_LAN), [])])
        self.last_time = time.time()

        # reload potential checkpoints
        self.reload_checkpoint()
Example #20
0
def main():

    parser = argparse.ArgumentParser()
    # default xpu0 for non-brain++, all gpus for brain++
    default_devices = '*' if os.environ.get('RLAUNCH_WORKER') else '0'
    parser.add_argument('-d', '--device', default=default_devices)
    parser.add_argument('--fast-run', action='store_true', default=False)
    parser.add_argument('--local', action='store_true', default=True)
    parser.add_argument('-c',
                        '--continue',
                        dest='continue_path',
                        required=False)
    args = parser.parse_args()

    mgb.config.set_default_device(parse_devices(args.device)[0])

    # XXX load network ***********************************************
    net = model.get()
    #*****************************************************************
    # create session
    sess = Session(config, args.device, net=net)

    # The loggers
    worklog = WorklogLogger(os.path.join(sess.log_dir, 'worklog.txt'))
    # create tensorboard loggers
    train_tb, val_tb = sess.tensorboards("train.events", "val.events")

    # The training and validation functions
    train_func = sess.make_func(loss_var=net.loss_var,
                                fast_run=args.fast_run,
                                train_state=True)
    val_func = sess.make_func(
        # you might wanna disable fast_run for validation
        fast_run=args.fast_run,
        train_state=False)

    opt = megskull.optimizer.AdamV8(learning_rate=10)
    opt(train_func)

    # The datasets
    datasets = sess.get_datasets("train", "validation", use_local=args.local)
    train_ds = datasets['train']
    val_ds_iter = get_inf_iter_from_dataset(datasets['validation'])

    # vars to monitor
    sess.monitor_param_histogram(train_tb, worklog, interval=40)
    monitor_vars = list(
        net.extra.get("extra_config", {}).get('monitor_vars', []))

    outspec = {'loss': net.loss_var}
    outspec.update(net.extra.get("extra_outputs", {}))
    # from IPython import embed
    # embed()
    # after done all decorations, compile the function
    train_func.compile(outspec)
    val_func.compile(outspec)

    # restore checkpoint
    if args.continue_path:
        sess.load_checkpoint(args.continue_path)

    # Now start train
    clock = sess.clock
    sess.start()

    if not args.continue_path:
        train_tb.put_graph(net)

    log_output = log_rate_limited(min_interval=0.5)(worklog.put_line)

    # from IPaccuracy
    while True:
        if clock.epoch >= config.nr_epoch:
            break
        opt.learning_rate = config.lr
        train_tb.put_scalar('learning_rate', opt.learning_rate, clock.step)

        time_epoch_start = tstart = time.time()
        for minibatch in train_ds.get_epoch_minibatch_iter():
            tdata = time.time() - tstart

            out = train_func(**minibatch.get_kvmap())
            # from IPython import embed
            # embed()

            cur_time = time.time()
            ttrain = cur_time - tstart
            time_passed = cur_time - time_epoch_start

            time_expected = time_passed / (clock.minibatch +
                                           1) * train_ds.nr_minibatch_in_epoch
            eta = time_expected - time_passed

            outputs = [
                "e:{},{}/{}".format(clock.epoch, clock.minibatch,
                                    train_ds.nr_minibatch_in_epoch),
                "{:.2g} mb/s".format(1. / ttrain),
            ] + [
                'passed:{:.2f}'.format(time_passed),
                'eta:{:.2f}'.format(eta),
            ] + ["{}:{:.2g}".format(k, float(out[k])) for k in monitor_vars]

            if tdata / ttrain > .05:
                outputs += ["dp/tot: {:.2g}".format(tdata / ttrain)]
            log_output(' '.join(outputs))

            for k, v in out.items():
                if k in monitor_vars:
                    train_tb.put_scalar(k, v, clock.step)

            if clock.minibatch % 5 == 0:
                vb = next(val_ds_iter)
                val_out = val_func(**vb.get_kvmap())
                val_monitor_vars = [(k, float(v)) for k, v in val_out.items()
                                    if k in monitor_vars]

                for k, v in val_monitor_vars:
                    val_tb.put_scalar(k, v, clock.step)

                log_output("Val: " + " ".join(
                    ["{}={:.2g}".format(k, v) for k, v in val_monitor_vars]))

            if clock.step % 100 == 0:
                train_tb.flush()
                val_tb.flush()

            clock.tick()
            tstart = time.time()

        train_tb.flush()
        val_tb.flush()

        clock.tock()

        if clock.epoch % 5 == 0:
            sess.save_checkpoint('epoch_{}'.format(clock.epoch))
        sess.save_checkpoint('latest')

    logger.info("Training is done, exit.")
    os._exit(0)
Example #21
0
 def GET(self, id):
     return response.send_if_found(model.get(id))
Example #22
0
import model
model.get()

Example #23
0
    def __init__(self, params):
        """
        Initialize trainer.
        """
        self.params = params

        # epoch / iteration size
        assert isinstance(config.epoch_size, int)
        assert config.epoch_size >= 1
        self.epoch_size = config.epoch_size

        # network and criterion
        net, criterion = model.get()
        self.net = net
        self.criterion = criterion

        # data iterators
        self.iterators = {}
        train_iter, valid_iter, SRC_TEXT, TGT_TEXT = dataset.load()
        torch.distributed.barrier()
        print("Process {}, dataset loaded.".format(params.local_rank))
        self.iterators["train"] = train_iter
        self.iterators["valid"] = valid_iter
        self.num_train = len(train_iter)
        self.SRC_TEXT = SRC_TEXT
        self.TGT_TEXT = TGT_TEXT

        torch.distributed.barrier()

        # Multi-GPU
        assert config.amp >= 1 or not config.fp16
        if config.multi_gpu and config.fp16 == False:
            logger.info("Using nn.parallel.DistributedDataParallel ...")
            self.net = nn.parallel.DistributedDataParallel(
                self.net,
                device_ids=[params.local_rank],
                output_device=params.local_rank)

        # set optimizers
        self.opt = optimizer.get(self.net)

        torch.distributed.barrier()
        # Float16 / distributed
        if config.fp16:
            self.init_amp()
            if config.multi_gpu:
                logger.info("Using apex.parallel.DistributedDataParallel ...")
                self.net = apex.parallel.DistributedDataParallel(
                    self.net, delay_allreduce=True)

        # validation metrics
        self.best_metrics = {}
        for k in config.valid_metrics.keys():
            factor = config.valid_metrics[k]
            self.best_metrics[k] = [config.init_metric * factor, factor]

        # early stopping metrics
        self.early_stopping_metrics = {}
        for k in self.best_metrics:
            self.early_stopping_metrics[k] = self.best_metrics[k]

        self.decrease_counts = 0
        self.decrease_counts_max = config.decrease_counts_max
        self.stopping_criterion = config.stopping_criterion
        if config.multi_gpu:
            self.should_terminate = torch.tensor(0).byte()
            self.should_terminate = self.should_terminate.cuda()
        else:
            self.should_terminate = False
        assert (self.stopping_criterion
                in self.best_metrics) or (self.stopping_criterion is None)

        # training statistics
        self.epoch = 0
        self.n_iter = 0
        self.n_total_iter = 0
        self.n_sentences = 0
        self.stats = OrderedDict([('processed_s', 0), ('processed_w', 0)] +
                                 [('MT-%s-%s-loss' %
                                   (config.SRC_LAN, config.TGT_LAN), [])] +
                                 [('MT-%s-%s-ppl' %
                                   (config.SRC_LAN, config.TGT_LAN), [])])
        self.last_time = time.time()

        # reload potential checkpoints
        self.reload_checkpoint(network_only=config.reload_network_only)
        print("Process {}, trainer initialized.".format(params.local_rank))
Example #24
0
 def test_model_instance_access_data_from_dot_notation(self):
     model = self.ModelTest({'a': 1}, b='bbbbbb')
     self.assertEqual(model.a, 1)
     self.assertEqual(model.b, 'bbbbbb')
     self.assertEqual(model.get('a'), model.a)
     self.assertEqual(model.get('b'), 'bbbbbb')
Example #25
0
def query_instances(args, unlabeled_dataset, oracle, active_func="random"):
    # lc stands for least confident
    # te stands for token entropy
    # tte stands for total token entropy
    assert active_func in [
        "random", "longest", "shortest", "lc", "margin", "te", "tte"
    ]

    # lengths represents number of tokens, so BPE should be removed
    lengths = np.array([
        len(remove_special_tok(remove_bpe(s)).split())
        for s in unlabeled_dataset
    ])

    # Preparations before querying instances
    # Reloading network parameters
    args.use_cuda = (args.no_cuda == False) and torch.cuda.is_available()
    net, _ = model.get()

    assert os.path.exists(args.checkpoint)
    net, src_vocab, tgt_vocab = load_model(args.checkpoint, net)

    if args.use_cuda:
        net = net.cuda()

    # Initialize inference dataset (Unlabeled dataset)
    infer_dataset = Dataset(unlabeled_dataset, src_vocab)
    if args.batch_size is not None:
        infer_dataset.BATCH_SIZE = args.batch_size
    if args.max_batch_size is not None:
        infer_dataset.max_batch_size = args.max_batch_size
    if args.tokens_per_batch is not None:
        infer_dataset.tokens_per_batch = args.tokens_per_batch

    infer_dataiter = iter(
        infer_dataset.get_iterator(shuffle=True,
                                   group_by_size=True,
                                   include_indices=True))

    # Start ranking unlabeled dataset
    indices = np.arange(len(unlabeled_dataset))
    if active_func == "random":
        result = get_scores(args, net, active_func, infer_dataiter, src_vocab,
                            tgt_vocab)
        random.shuffle(result)
        indices = [item[1] for item in result]
        indices = np.array(indices).astype('int')
        for idx in indices:
            print("S:", unlabeled_dataset[idx])
            print("H:", result[idx][2])
            print("T:", oracle[idx])
            print("V:", result[idx][0])
            print("I:", args.input, args.reference, idx)
    elif active_func == "longest":
        result = get_scores(args, net, active_func, infer_dataiter, src_vocab,
                            tgt_vocab)
        result = [(len(
            remove_special_tok(remove_bpe(
                unlabeled_dataset[item[1]])).split(' ')), item[1], item[2])
                  for item in result]
        result = sorted(result, key=lambda item: -item[0])
        indices = [item[1] for item in result]
        indices = np.array(indices).astype('int')
        for idx in indices:
            print("S:", unlabeled_dataset[idx])
            print("H:", result[idx][2])
            print("T:", oracle[idx])
            print("V:", -result[idx][0])
            print("I:", args.input, args.reference, idx)
    elif active_func == "shortest":
        result = get_scores(args, net, active_func, infer_dataiter, src_vocab,
                            tgt_vocab)
        result = [(len(
            remove_special_tok(remove_bpe(
                unlabeled_dataset[item[1]])).split(' ')), item[1], item[2])
                  for item in result]
        result = sorted(result, key=lambda item: item[0])
        indices = [item[1] for item in result]
        indices = np.array(indices).astype('int')
        for idx in indices:
            print("S:", unlabeled_dataset[idx])
            print("H:", result[idx][2])
            print("T:", oracle[idx])
            print("V:", result[idx][0])
            print("I:", args.input, args.reference, idx)
        indices = indices[np.argsort(lengths[indices])]
    elif active_func in ["lc", "margin", "te", "tte"]:
        result = get_scores(args, net, active_func, infer_dataiter, src_vocab,
                            tgt_vocab)
        result = sorted(result, key=lambda item: item[0])
        indices = [item[1] for item in result]
        indices = np.array(indices).astype('int')

        for idx in range(len(result)):
            print("S:", unlabeled_dataset[result[idx][1]])
            print("H:", result[idx][2])
            print("T:", oracle[result[idx][1]])
            print("V:", result[idx][0])
            print("I:", args.input, args.reference, result[idx][1])
Example #26
0
def get(tname):
    query = reader.multiple_input(tname, 'Enter requested fields:', empty=True)
    data = model.get(tname, query)
    view.print_entities(tname, data)
    reader.press_enter()
    show_table_menu(tname)
Example #27
0
File: app.py Project: tshev/Archive
def f2():
	import model
	return model.get()	
Example #28
0
def get_all(tname):
    data = model.get(tname)
    view.print_entities(tname, data)
    reader.press_enter()
    show_table_menu(tname)
Example #29
0
def main():
    parser = argparse.ArgumentParser()
    subparsers = parser.add_subparsers(help='two modes, get or translate')

    parser_get = subparsers.add_parser(
        'get', help='Get texts that needs to be labeled or translated')
    parser_get.add_argument(
        '-AO',
        '--active_out',
        type=str,
        default=None,
        help="Output file generated by active.py score mode")
    parser_get.add_argument('-tb',
                            '--tok_budget',
                            type=int,
                            help="Token budget")
    parser_get.add_argument('-bttb',
                            '--back_translation_tok_budget',
                            type=int,
                            help="Back translation token budget")
    parser_get.add_argument('--sort',
                            action="store_true",
                            help="Whether to sort active out by value")
    parser_get.add_argument('-o', '--output', type=str, help="Output filepath")
    parser_get.add_argument('-on',
                            '--output_num',
                            type=int,
                            default=1,
                            help="Output filepath")

    parser_trans = subparsers.add_parser('translate',
                                         help='Translate sentences')
    parser_trans.add_argument('-i', '--input', type=str, help='Input file')
    parser_trans.add_argument('-o', '--output', type=str, help="Output file")
    parser_trans.add_argument('--ckpt', required=True)
    parser_trans.add_argument('--max_len', type=int, default=250)
    parser_trans.add_argument('--gen_a', type=float, default=1.3)
    parser_trans.add_argument('--gen_b', type=int, default=5)
    parser_trans.add_argument('--no_cuda', action='store_true')
    parser_trans.add_argument('--batch_size', type=int, default=None)
    parser_trans.add_argument('--max_batch_size', type=int, default=None)
    parser_trans.add_argument('--tokens_per_batch', type=int, default=None)

    args = parser.parse_args()
    args.mode = "get" if hasattr(args, 'active_out') else "translate"
    if args.mode == 'translate':
        args.use_cuda = (args.no_cuda == False) and torch.cuda.is_available()

    if args.mode == "get":
        f = open(args.active_out, 'r')
        lines = f.read().split('\n')[:-1]
        f.close()

        assert len(lines) % 4 == 0
        active_out = [(lines[idx], lines[idx + 1],
                       float(lines[idx + 2].split(' ')[-1]), lines[idx + 3])
                      for idx in range(0, len(lines), 4)]
        if args.sort:
            active_out = sorted(active_out, key=lambda item: item[2])

        indices = np.arange(len(active_out))
        lengths = np.array([
            len(
                remove_special_tok(remove_bpe(
                    item[0][len("S: "):])).split(' ')) for item in active_out
        ])
        include_oracle = np.cumsum(lengths) <= args.tok_budget
        include_pseudo = np.cumsum(lengths) <= (
            args.tok_budget + args.back_translation_tok_budget)
        include_pseudo = np.logical_xor(include_pseudo, include_oracle)
        include_pseudo = indices[include_pseudo]
        include_oracle = indices[include_oracle]
        others = [
            idx for idx in indices
            if (idx not in include_pseudo) and (idx not in include_oracle)
        ]

        # Output oracle and others
        output_oracle = args.output + '_oracle'
        f = open(output_oracle, 'w')
        out = []
        for idx in include_oracle:
            item = []
            item.append(active_out[idx][0])
            item.append('H: ' + active_out[idx][1][len('T: '):])
            item.append('T: ' + active_out[idx][1][len('T: '):])
            item.append('V: ' + str(active_out[idx][2]))
            item.append(active_out[idx][3])
            out.extend(item)

        f.write('\n'.join(out) + '\n')
        f.close()

        output_others = args.output + '_others'
        f = open(output_others, 'w')
        out = []
        for idx in others:
            item = []
            item.append(active_out[idx][0])
            item.append('H: ' + active_out[idx][1][len('T: '):])
            item.append('T: ' + active_out[idx][1][len('T: '):])
            item.append('V: ' + str(active_out[idx][2]))
            item.append(active_out[idx][3])
            out.extend(item)

        f.write('\n'.join(out) + '\n')
        f.close()

        # Output pseudo
        if args.output_num > 1:
            n_lines = len(include_pseudo) // args.output_num + 1
            for n in range(args.output_num):
                output_pseudo = args.output + '_pseudo_' + str(n)
                f = open(output_pseudo, 'w')
                out = []

                for idx in include_pseudo[n * n_lines:(n + 1) * n_lines]:
                    item = []
                    item.append(active_out[idx][0])
                    item.append('H: ' + active_out[idx][1][len('T: '):])
                    item.append('T: ' + active_out[idx][1][len('T: '):])
                    item.append('V: ' + str(active_out[idx][2]))
                    item.append(active_out[idx][3])
                    out.extend(item)

                f.write('\n'.join(out) + '\n')
                f.close()
        else:
            assert args.output_num == 1
            output_pseudo = args.output + '_pseudo'
            f = open(output_pseudo, 'w')
            out = []

            for idx in include_pseudo:
                item = []
                item.append(active_out[idx][0])
                item.append('H: ' + active_out[idx][1][len('T: '):])
                item.append('T: ' + active_out[idx][1][len('T: '):])
                item.append('V: ' + str(active_out[idx][2]))
                item.append(active_out[idx][3])
                out.extend(item)

            f.write('\n'.join(out) + '\n')
            f.close()
    elif args.mode == 'translate':

        assert args.max_len > 10

        net, _ = model.get()
        net, src_vocab, tgt_vocab = load_model(args.ckpt, net)

        if args.use_cuda:
            net = net.cuda()

        fpath = args.input
        try:
            lines = open(fpath, 'r').read().split('\n')[:-1]
            active_out = [(lines[idx], lines[idx + 1], lines[idx + 2],
                           float(lines[idx + 3].split(' ')[-1]),
                           lines[idx + 4]) for idx in range(0, len(lines), 5)]
            args.text = [a[0][len('S: '):].strip() for a in active_out]
            args.ref_text = [a[2][len('T: '):].strip() for a in active_out]
        except:
            print("error opening or reading text file")

        out = translate(args, net, src_vocab, tgt_vocab, active_out)

        f = open(args.output, 'w')
        f.write('\n'.join(out) + '\n')
        f.close()
Example #30
0
    def __init__(self, cell, transformation=None):

        # Store cell
        self.cell = cell

        # Store transformation object
        if transformation is None:
            transformation = transformations.NullTransformation()
        self.transformation = transformation

        # Calculate experimental summary statistics
        print('Calculating summary statistics for cell ' + str(cell))
        stats = sumstat.all_summary_statistics(cell)

        # Unpack
        self.ta1 = stats[0][1]
        self.tr1 = stats[1][1]
        self.ai1 = stats[2][1]
        self.ri1 = stats[3][1]
        self.iv1 = stats[4][1]

        # Scale factors for error
        self.nta = 1 / len(self.ta1)
        self.ntr = 1 / len(self.tr1)
        self.nai = 1 / len(self.ai1)
        self.niv = 1 / len(self.iv1)
        self.zta = 1 / np.max(self.ta1)
        self.ztr = 1 / np.max(self.tr1)
        self.zai = 1
        self.ziv = 1 / (np.max(self.iv1) - np.min(self.iv1))
        assert (self.zta > 0)
        assert (self.ztr > 0)
        assert (self.ziv > 0)

        # Load Myokit model
        model = data.load_myokit_model()
        model.get('membrane.V').set_label('membrane_potential')
        model.get('nernst.EK').set_rhs(
            cells.reversal_potential(cells.temperature(cell)))

        # Start at steady-state for -80mV
        # print('Updating model to steady-state.')
        # model.get('membrane.V').promote()
        # ai = model.get('ikr.O_Kr').pyfunc()(-80)
        # model.get('membrane.V').demote()
        # model.get('ikr.O_Kr').set_state_value(ai)

        # Create analytical model
        m = myokit.lib.hh.HHModel.from_component(
            model.get('ikr'),
            parameters=[
                'ikr.p1',
                'ikr.p2',
                'ikr.p3',
                'ikr.p4',
                'ikr.p5',
                'ikr.p6',
                'ikr.p7',
                'ikr.p8',
                'ikr.p9',
                'ikr.p10',
                'ikr.p11',
                'ikr.p12',
                'ikr.p13',
                'ikr.p14',
                'ikr.p15',
                'ikr.p16',
                'ikr.p17',
                'ikr.p18',
                'ikr.p19',
                'ikr.p20',
                'ikr.p21',
                'ikr.p22',
                'ikr.p23',
                'ikr.p24',
                'ikr.p25',
            ],
        )

        # Load protocols, create simulations and times arrays
        self.simulations = []
        self.times = []
        for i in (2, 3, 4, 5):
            variant = (i == 2 and cell in (7, 8))
            p = data.load_myokit_protocol(i, variant=variant)
            self.simulations.append(myokit.lib.hh.AnalyticalSimulation(m, p))
            self.times.append(
                data.capacitance(p, 0.1,
                                 np.arange(0, p.characteristic_time(),
                                           0.1))[0])
Example #31
0
def main(args):
    if args.noise:
        summary_dir =  os.path.join(args.checkpoint_dir,"noise")
    if args.events:
        summary_dir =  os.path.join(args.checkpoint_dir,"events")
    while True:
        ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir)
        if args.eval_interval < 0 or ckpt:
            print ('Evaluating model')
            break
        print  ('Waiting for training job to save a checkpoint')
        time.sleep(args.eval_interval)

    cfg = config.Config()
    cfg.batch_size = 1
    cfg.n_epochs = 1
    cfg.add = 1
    coord = tf.train.Coordinator()
    while True:
        try:
            # data pipeline
            data_pipeline = da.data_pipeline(args.dataset, config=cfg,
                                            is_training=False)
            samples = {
              'data': data_pipeline.samples,
              'label': data_pipeline.labels,
               }
            model = md.get(args.model,samples,checkpoint_dir=args.checkpoint_dir,is_training=False,reuse=False)
            metrics = model.validation_metrics()
            summary_writer = tf.summary.FileWriter(summary_dir, None)

            with tf.Session() as sess:
                coord = tf.train.Coordinator()
                tf.initialize_local_variables().run()
                #sess.run(tf.global_variables_initializer())
                threads = tf.train.start_queue_runners(sess=sess, coord=coord)

                model.load(sess,args.step)
                print  ('Evaluating at step {}'.format(sess.run(model.global_step)))

                step = tf.train.global_step(sess, model.global_step)
                mean_metrics = {}
                for key in metrics:
                    mean_metrics[key] = 0
                n = 0
                m = 0
                pred = np.empty(2)
                true_labels = np.empty(1)
                print("7777777777777777777777777777777")
                while True:
                     try:
                         to_fetch = [metrics,
                                     model.layers['class_prediction'],
                                     model.layers['class_prob'],
			             samples["label"]]
                         metrics_,batch_prelabel, batch_pred, batch_true_label = sess.run(to_fetch)
                         pred = np.append(pred,batch_pred)
                         print(batch_prelabel,batch_true_label)
                         print(batch_pred)
                         trace = samples["data"]
                         if batch_prelabel ==batch_true_label:
                             m+=1
                         else:
                             print(sess.run(trace[0]))
                             print(batch_pred)
                             #plt.plot(sess.run(trace[0]))
                             #plt.show()
                         true_labels = np.append(true_labels,batch_true_label)

                         for key in metrics:
                                mean_metrics[key] += cfg.batch_size*metrics_[key]
                         n += cfg.batch_size

                         mess = model.validation_metrics_message(metrics_)
                         print ('{:03d} | '.format(n)+mess)
                     except KeyboardInterrupt:
                        print ('stopping evaluation')
                        break

                     except tf.errors.OutOfRangeError:
                        print ('Evaluation completed ({} epochs).'.format(cfg.n_epochs))
                        print ("{} windows seen".format(n))
                        break
            print('true = {} |  det_accuracy = {}'.format(m,m/n))
            break
            #tf.reset_default_graph()
            #print ('Sleeping for {}s'.format(args.eval_interval))
            #time.sleep(args.eval_interval)
        finally:
             print ('joining data threads')
             coord.request_stop()
Example #32
0
 def test_data_property_getter(self):
     model = self.ModelTest({'a': 1}, b=2)
     model.set({'c': 3})
     self.assertEqual(model._data, {'a': 1, 'b': 2, 'c': 3})
     self.assertEqual(model.get('a'), 1)
     self.assertEqual(model.b, 2)
Example #33
0
print("========================================")

# seed
args.cuda = torch.cuda.is_available()
torch.manual_seed(args.seed)
print("args seed:{},cuda:{}".format(args.seed, args.cuda))
if args.cuda:
    torch.cuda.manual_seed(args.seed)

# data loader
train_loader, test_loader = dataset.get(batch_size=args.batch_size,
                                        data_root=args.data_root,
                                        num_workers=8)

# model
model = model.get(args.model_name, '../pretrained_models')
model = torch.nn.DataParallel(model, device_ids=range(args.ngpu))
if args.cuda:
    model.cuda()

# optimizer
optimizer = optim.SGD(model.parameters(),
                      lr=args.lr,
                      weight_decay=args.wd,
                      momentum=0.9)
decreasing_lr = list(map(int, args.decreasing_lr.split(',')))
print('decreasing_lr: ' + str(decreasing_lr))
best_acc, old_file = 0, None
t_begin = time.time()
try:
    # ready to go
Example #34
0
def test_incremental():
    from common import config
    import model
    from utils import get_batch
    net, _ = model.get()
    net.eval()
    
    ckpt = torch.load("checkpoints/checkpoint_best_ppl.pth", map_location='cpu')

    # reload model parameters
    s_dict = {}
    for k in ckpt["net"]:
        new_k = k[7:]
        s_dict[new_k] = ckpt["net"][k]

    net.load_state_dict(s_dict)
    
    import dataset
    train_iter, _, SRC_TEXT, TGT_TEXT = dataset.get()
    #data_iter = iter(train_iter.get_iterator(True, True))
    #raw_batch = next(data_iter)
    src = np.arange(4, 4+2000).reshape(80, 25)
    tgt = np.arange(4, 4+2400).reshape(80, 30)
    raw_batch = dataset.Batch(
            torch.from_numpy(src).long(),
            torch.from_numpy(tgt).long()
            )

    batch = get_batch(
            raw_batch.src, raw_batch.tgt,
            SRC_TEXT.vocab, TGT_TEXT.vocab
            )
    for k, v in batch.items():
        try:
            print(k, v.size())
        except AttributeError:
            pass

    with torch.no_grad():
        enc_out = net.encode(src=batch['src'], src_mask=batch['src_mask'])
        # No incremental
        logits1 = net.decode(enc_out, batch['src_mask'], batch['tgt'], batch['tgt_mask'])
        logits1 = net.generator(logits1, log_prob=True)

        # Incremental
        print("Incremental encoding finished!")
        tlen = batch['tgt'].size(1)
        cache = {'cur_len':0}
        logits2 = []
        for i in range(tlen):
            x = batch['tgt'][:, i].unsqueeze(-1)

            logit = net.decode(
                    enc_out, batch['src_mask'], x,
                    batch['tgt_mask'][:, i, :(i+1)].unsqueeze(-2), cache
                    )

            logit = net.generator(logit, log_prob=True)
            
            if i >= 0:
                ref = logits1[:, i, :]
                sys = logit.squeeze()
                
                ref_words = torch.topk(ref, 1)[1].squeeze()
                sys_words = torch.topk(sys, 1)[1].squeeze()

                print("Diff  = {}".format(torch.sum(ref - sys).item()))
                print("Logits sys size : {}, Logits sys : {}".format(sys.size(), sys.sum().item()))
                print("Logits ref size : {}, Logits ref : {}".format(ref.size(), ref.sum().item()))
                if (ref_words == sys_words).all() == False:
                    print("F**k!")
                print("\n")
            
            logits2.append(logit)
            cache['cur_len'] = i + 1
        logits2 = torch.cat(logits2, dim=1).contiguous()

        print("Logits1: {}".format(torch.sum(logits1).item()))
        print("Logits2: {}".format(torch.sum(logits2).item()))
Example #35
0
def select(table):
    query = view.multiple_input(table, 'Enter requested fields:')
    data = model.get(table, query)
    view.print_entities(table, data)
    view.press_enter()
    display_secondary_menu(table)
Example #36
0
def query_instances(args,
                    unlabeled_dataset,
                    active_func="random",
                    tok_budget=None):
    # lc stands for least confident
    # te stands for token entropy
    # tte stands for total token entropy
    assert active_func in [
        "random", "longest", "shortest", "lc", "margin", "te", "tte"
    ]
    assert isinstance(tok_budget, int)

    # lengths represents number of tokens, so BPE should be removed
    lengths = np.array([
        len(remove_special_tok(remove_bpe(s)).split())
        for s in unlabeled_dataset
    ])
    total_num = sum(lengths)
    if total_num < tok_budget:
        tok_budget = total_num

    # Preparations before querying instances
    if active_func in ["lc", "margin", "te", "tte"]:
        # Reloading network parameters
        args.use_cuda = (args.no_cuda == False) and torch.cuda.is_available()
        net, _ = model.get()

        assert os.path.exists(args.checkpoint)
        net, src_vocab, tgt_vocab = load_model(args.checkpoint, net)

        if args.use_cuda:
            net = net.cuda()

        # Initialize inference dataset (Unlabeled dataset)
        infer_dataset = Dataset(unlabeled_dataset, src_vocab)
        if args.batch_size is not None:
            infer_dataset.BATCH_SIZE = args.batch_size
        if args.max_batch_size is not None:
            infer_dataset.max_batch_size = args.max_batch_size
        if args.tokens_per_batch is not None:
            infer_dataset.tokens_per_batch = args.tokens_per_batch

        infer_dataiter = iter(
            infer_dataset.get_iterator(shuffle=True,
                                       group_by_size=True,
                                       include_indices=True))

    # Start ranking unlabeled dataset
    indices = np.arange(len(unlabeled_dataset))
    if active_func == "random":
        np.random.shuffle(indices)
    elif active_func == "longest":
        indices = indices[np.argsort(-lengths[indices])]
    elif active_func == "shortest":
        indices = indices[np.argsort(lengths[indices])]
    elif active_func in ["lc", "margin", "te", "tte"]:
        result = get_scores(args, net, active_func, infer_dataiter, src_vocab,
                            tgt_vocab)
        result = sorted(result, key=lambda item: item[0])
        indices = [item[1] for item in result]
        indices = np.array(indices).astype('int')

    include = np.cumsum(lengths[indices]) <= tok_budget
    include = indices[include]
    return [unlabeled_dataset[idx] for idx in include], include
Example #37
0
def query_instances(args,
                    unlabeled_dataset,
                    oracle,
                    active_func="random",
                    labeled_dataset=None):
    # lc stands for least confident
    # te stands for token entropy
    # tte stands for total token entropy
    assert active_func in [
        "random", "longest", "shortest", "lc", "margin", "te", "tte", "dden"
    ]

    # lengths represents number of tokens, so BPE should be removed
    lengths = np.array([
        len(remove_special_tok(remove_bpe(s)).split())
        for s in unlabeled_dataset
    ])

    # Preparations before querying instances
    # Reloading network parameters
    args.use_cuda = (args.no_cuda == False) and torch.cuda.is_available()
    net, _ = model.get()

    assert os.path.exists(args.checkpoint)
    net, src_vocab, tgt_vocab = load_model(args.checkpoint, net)

    if args.use_cuda:
        net = net.cuda()

    # Initialize inference dataset (Unlabeled dataset)
    infer_dataset = Dataset(unlabeled_dataset, src_vocab)
    if args.batch_size is not None:
        infer_dataset.BATCH_SIZE = args.batch_size
    if args.max_batch_size is not None:
        infer_dataset.max_batch_size = args.max_batch_size
    if args.tokens_per_batch is not None:
        infer_dataset.tokens_per_batch = args.tokens_per_batch

    infer_dataiter = iter(
        infer_dataset.get_iterator(shuffle=True,
                                   group_by_size=True,
                                   include_indices=True))

    # Start ranking unlabeled dataset
    indices = np.arange(len(unlabeled_dataset))
    if active_func == "random":
        result = get_scores(args, net, active_func, infer_dataiter, src_vocab,
                            tgt_vocab)
        random.shuffle(result)
        indices = [item[1] for item in result]
        indices = np.array(indices).astype('int')
        for idx in indices:
            print("S:", unlabeled_dataset[idx])
            print("H:", result[idx][2])
            print("T:", oracle[idx])
            print("V:", result[idx][0])
            print("I:", args.input, args.reference,
                  idx + args.previous_num_sents)
    elif active_func == "longest":
        result = get_scores(args, net, active_func, infer_dataiter, src_vocab,
                            tgt_vocab)
        result = [(len(
            remove_special_tok(remove_bpe(
                unlabeled_dataset[item[1]])).split(' ')), item[1], item[2])
                  for item in result]
        result = sorted(result, key=lambda item: -item[0])
        indices = [item[1] for item in result]
        indices = np.array(indices).astype('int')
        for idx in indices:
            print("S:", unlabeled_dataset[idx])
            print("H:", result[idx][2])
            print("T:", oracle[idx])
            print("V:", -result[idx][0])
            print("I:", args.input, args.reference,
                  idx + args.previous_num_sents)
    elif active_func == "shortest":
        result = get_scores(args, net, active_func, infer_dataiter, src_vocab,
                            tgt_vocab)
        result = [(len(
            remove_special_tok(remove_bpe(
                unlabeled_dataset[item[1]])).split(' ')), item[1], item[2])
                  for item in result]
        result = sorted(result, key=lambda item: item[0])
        indices = [item[1] for item in result]
        indices = np.array(indices).astype('int')
        for idx in indices:
            print("S:", unlabeled_dataset[idx])
            print("H:", result[idx][2])
            print("T:", oracle[idx])
            print("V:", result[idx][0])
            print("I:", args.input, args.reference,
                  idx + args.previous_num_sents)
        indices = indices[np.argsort(lengths[indices])]
    elif active_func in ["lc", "margin", "te", "tte"]:
        result = get_scores(args, net, active_func, infer_dataiter, src_vocab,
                            tgt_vocab)
        result = sorted(result, key=lambda item: item[0])
        indices = [item[1] for item in result]
        indices = np.array(indices).astype('int')

        for idx in range(len(result)):
            print("S:", unlabeled_dataset[result[idx][1]])
            print("H:", result[idx][2])
            print("T:", oracle[result[idx][1]])
            print("V:", result[idx][0])
            print("I:", args.input, args.reference,
                  result[idx][1] + args.previous_num_sents)
    elif active_func == "dden":
        punc = [
            ".", ",", "?", "!", "'", "<", ">", ":", ";", "(", ")", "{", "}",
            "[", "]", "-", "..", "...", "...."
        ]
        lamb1 = 1
        lamb2 = 1
        p_u = {}
        unlabeled_dataset_without_bpe = []
        labeled_dataset_without_bpe = [[], []]
        for s in unlabeled_dataset:
            unlabeled_dataset_without_bpe.append(
                remove_special_tok(remove_bpe(s)))
        for s in labeled_dataset[0]:
            labeled_dataset_without_bpe[0].append(
                remove_special_tok(remove_bpe(s)))
        for s in labeled_dataset[1]:
            labeled_dataset_without_bpe[1].append(
                remove_special_tok(remove_bpe(s)))
        for s in unlabeled_dataset_without_bpe:
            sentence = s.split()
            for token in sentence:
                if token not in punc:
                    if token in p_u.keys():
                        p_u[token] += 1
                    else:
                        p_u[token] = 1
        total_dden = 0
        for token in p_u.keys():
            p_u[token] = math.log(p_u[token] + 1)
            total_dden += p_u[token]
        for token in p_u.keys():
            p_u[token] /= total_dden
        count_l = {}
        for s in labeled_dataset_without_bpe[0]:
            sentence = s.split()
            for token in sentence:
                if token not in punc:
                    if token in count_l.keys():
                        count_l[token] += 1
                    else:
                        count_l[token] = 1
        dden = []
        for s in unlabeled_dataset_without_bpe:
            sentence = s.split()
            len_for_sentence = 0
            sum_for_sentence = 0
            for token in sentence:
                if token not in punc:
                    if token in count_l.keys():
                        sum_for_sentence += p_u[token] * math.exp(
                            -lamb1 * count_l[token])
                    else:
                        sum_for_sentence += p_u[token]
                len_for_sentence += 1
            if len_for_sentence != 0:
                sum_for_sentence /= len_for_sentence
            dden.append(sum_for_sentence)
        unlabeled_with_index = []
        for i in range((len(unlabeled_dataset))):
            unlabeled_with_index.append((dden[i], i))
        unlabeled_with_index.sort(key=lambda x: x[0], reverse=True)
        count_batch = {}
        dden_new = []
        for _, i in unlabeled_with_index:
            sentence = unlabeled_dataset_without_bpe[i].split()
            len_for_sentence = 0
            sum_for_sentence = 0
            for token in sentence:
                if token not in punc:
                    p_tmp = p_u[token]
                    if token in count_batch.keys():
                        p_tmp = 0
                        p_tmp *= math.exp(-lamb2 * count_batch[token])
                    if token in count_l.keys():
                        p_tmp *= math.exp(-lamb1 * count_l[token])
                    sum_for_sentence += p_tmp
                len_for_sentence += 1
            for token in sentence:
                if token not in punc:
                    if token in count_batch.keys():
                        count_batch[token] += 1
                    else:
                        count_batch[token] = 1
            if len_for_sentence != 0:
                sum_for_sentence /= len_for_sentence
            dden_new.append((sum_for_sentence, i))
        dden_new.sort(key=lambda x: x[1])
        dden_sort = []
        for dden_num, _ in dden_new:
            dden_sort.append(dden_num)
        ddens = np.array(dden_sort)
        indices = indices[np.argsort(-ddens)]
        for idx in indices:
            print("S:", unlabeled_dataset[idx])
            print("T:", oracle[idx])
            print("V:", -ddens[idx])
            print("I:", args.input, args.reference, idx)
Example #38
0
    class_mode='categorical',
    color_mode="rgb",
    # seed=42
)

checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=True,
    verbose=1,
    save_freq=5  # Save every 5 epoch
)

model = model.get()

model.save_weights(checkpoint_path.format(epoch=0))

STEP_SIZE_TRAIN = train_data_gen.n // train_data_gen.batch_size
STEP_SIZE_VALID = val_data_gen.n // val_data_gen.batch_size

print("steps_per_epoch  : {:d} ".format(STEP_SIZE_TRAIN))
print("validation_steps : {:d} ".format(STEP_SIZE_VALID))

history = model.fit_generator(generator=train_data_gen,
                              steps_per_epoch=STEP_SIZE_TRAIN,
                              validation_data=val_data_gen,
                              validation_steps=STEP_SIZE_VALID,
                              epochs=epochs)