Example #1
0
    def Setup(self):
        utils.say("Oooo 'ello, I'm Mrs. Premise!")
        self.Params = utils.read_params()

        try:
            self.Semaphore = sysv_ipc.Semaphore(self.Params["KEY"],
                                                sysv_ipc.IPC_CREX)
        except sysv_ipc.ExistentialError as err:
            self.Logger.debug(err)
            # One of my peers created the semaphore already
            self.Semaphore = sysv_ipc.Semaphore(self.Params["KEY"])
            # Waiting for that peer to do the first acquire or release
            while not self.Semaphore.o_time:
                time.sleep(.1)
        else:
            # Initializing sem.o_time to nonzero value
            self.Semaphore.release()
            # Now the semaphore is safe to use.

        try:
            self.Memory = sysv_ipc.SharedMemory(self.Params["KEY"],
                                                sysv_ipc.IPC_CREX)
        except sysv_ipc.ExistentialError as err:
            self.Logger.debug(err)
            self.Memory = sysv_ipc.SharedMemory(self.Params["KEY"])
        else:
            self.Memory.release()

        self.Logger.debug("Setup done")
        return True
Example #2
0
def p_challenge_answer():
    print("------ 挑战答题得分任务 ------")
    p = say("完成挑战答题,输入 x 退出查询")
    p.wait()
    while True:
        p = say("请输入关键字:")
        keyWord = input()
        p.kill()
        if keyWord == 'x':
            break
        searchRet = tiaozhanSearch(keyWord)
        answerList = []
        remove_str = "温馨提示:相似题目,注意区分"
        for sr in searchRet:
            for s in sr.split('· '):
                s = s.replace(remove_str, '')
                s = s.replace("\r\n\r\n", '')
                s = s.replace("【", "\033[1;31m【")
                s = s.replace("】", "】\033[0m")
                answerList.append(s)
        answerList = list(filter(None, answerList))
        p = say("搜索到" + str(len(answerList)) + "个结果")
        for i in range(0, len(answerList)):
            print('\033[1;36m <' + str(i + 1) + '>\033[0m ' + answerList[i])
        p.wait()
    beep('coin')
    p = say("恭喜已完成该任务")
    p.wait()
Example #3
0
    def __init__(self, n_d, vocab, oov="<unk>", embs=None, fix_init_embs=True):

        if embs is not None:
            lst_words = [ ]
            vocab_map = {}
            emb_vals = [ ]
            for word, vector in embs:
                assert word not in vocab_map, "Duplicate words in initial embeddings"
                vocab_map[word] = len(vocab_map)
                emb_vals.append(vector)
                lst_words.append(word)

            self.init_end = len(emb_vals) if fix_init_embs else -1
            if n_d != len(emb_vals[0]):
                say("WARNING: n_d ({}) != init word vector size ({}). Use {} instead.\n".format(
                        n_d, len(emb_vals[0]), len(emb_vals[0])
                    ))
                n_d = len(emb_vals[0])

            say("{} pre-trained embeddings loaded.\n".format(len(emb_vals)))

            for word in vocab:
                if word not in vocab_map:
                    vocab_map[word] = len(vocab_map)
                    emb_vals.append(random_init((n_d,))*(0.001 if word != oov else 0.0))

                    lst_words.append(word)

            emb_vals = np.vstack(emb_vals).astype(theano.config.floatX)
            self.vocab_map = vocab_map
            self.lst_words = lst_words
        else:
            lst_words = [ ]
            vocab_map = {}
            for word in vocab:
                if word not in vocab_map:
                    vocab_map[word] = len(vocab_map)
                    lst_words.append(word)

            self.lst_words = lst_words
            self.vocab_map = vocab_map
            emb_vals = random_init((len(self.vocab_map), n_d))
            self.init_end = -1

        if oov is not None and oov is not False:
            assert oov in self.vocab_map, "oov {} not in vocab".format(oov)
            self.oov_tok = oov
            self.oov_id = self.vocab_map[oov]
        else:
            self.oov_tok = None
            self.oov_id = -1

        self.embeddings = create_shared(emb_vals)
        if self.init_end > -1:
            self.embeddings_trainable = self.embeddings[self.init_end:]
        else:
            self.embeddings_trainable = self.embeddings

        self.n_V = len(self.vocab_map)
        self.n_d = n_d
Example #4
0
 def evaluate_data(self, batches, eval_func):
     tot_loss,  tot_acc, tot = 0.0, 0.0, 0.0
     n_c = self.nclasses
     tot_pred, tot_gold, tot_corr = [0.0]*n_c, [0.0]*n_c, [0.0]*n_c
     tot_rel_loss, tot_dom_loss, tot_adv_loss = 0.0, 0.0, 0.0
     tot_recon_loss = 0.0
     for b in batches:
         bx, by, brel, bid = b
         prob, lab_loss, rel_loss, dom_loss, adv_loss, recon_loss = eval_func(bx, by, brel, bid)
         tot_loss += lab_loss
         tot_rel_loss += rel_loss
         tot_dom_loss += dom_loss
         tot_adv_loss += adv_loss
         tot_recon_loss += recon_loss
         for gold_y, p in zip(by, prob):
             pred = np.argmax(p)
             tot += 1
             tot_pred[pred] += 1
             tot_gold[gold_y] += 1
             if pred == gold_y: 
                 tot_acc += 1
                 tot_corr[pred] += 1
     n = len(batches)
     f1 = []
     for p, g, c in zip(tot_pred, tot_gold, tot_corr):
         pre = c / p if p > 0 else 0.0
         rec = c / g if g > 0 else 0.0
         f1.append((2*pre*rec)/(pre+rec+1e-8))
         
     tot_data = sum([len(b[1]) for b in batches])
     say(("\tEvaluate data: {}\n").format(str(tot_data)))
     return tot_loss/n, tot_rel_loss/n, tot_dom_loss/n, tot_adv_loss/n/self.rho.get_value(), tot_recon_loss/n, tot_acc/tot, f1
Example #5
0
    def __init__(self, n_d, vocab, oov="<unk>", embs=None, fix_init_embs=True):

        if embs is not None:
            lst_words = [ ]
            vocab_map = {}
            emb_vals = [ ]
            for word, vector in embs:
                assert word not in vocab_map, "Duplicate words in initial embeddings"
                vocab_map[word] = len(vocab_map)
                emb_vals.append(vector)
                lst_words.append(word)

            self.init_end = len(emb_vals) if fix_init_embs else -1
            if n_d != len(emb_vals[0]):
                say("WARNING: n_d ({}) != init word vector size ({}). Use {} instead.\n".format(
                        n_d, len(emb_vals[0]), len(emb_vals[0])
                    ))
                n_d = len(emb_vals[0])

            say("{} pre-trained embeddings loaded.\n".format(len(emb_vals)))

            for word in vocab:
                if word not in vocab_map:
                    vocab_map[word] = len(vocab_map)
                    emb_vals.append(random_init((n_d,))*(0.001 if word != oov else 0.0))
                    lst_words.append(word)

            emb_vals = np.vstack(emb_vals).astype(theano.config.floatX)
            self.vocab_map = vocab_map
            self.lst_words = lst_words
        else:
            lst_words = [ ]
            vocab_map = {}
            for word in vocab:
                if word not in vocab_map:
                    vocab_map[word] = len(vocab_map)
                    lst_words.append(word)

            self.lst_words = lst_words
            self.vocab_map = vocab_map
            emb_vals = random_init((len(self.vocab_map), n_d))
            self.init_end = -1

        if oov is not None and oov is not False:
            assert oov in self.vocab_map, "oov {} not in vocab".format(oov)
            self.oov_tok = oov
            self.oov_id = self.vocab_map[oov]
        else:
            self.oov_tok = None
            self.oov_id = -1

        self.embeddings = create_shared(emb_vals)
        if self.init_end > -1:
            self.embeddings_trainable = self.embeddings[self.init_end:]
        else:
            self.embeddings_trainable = self.embeddings

        self.n_V = len(self.vocab_map)
        self.n_d = n_d
Example #6
0
 def join_game(self, player_name):
     if player_name not in self.players:
         new_player = Player(player_name)
         new_player.workers[0].position = self.world.random_coords()
         self.players.append(new_player)
         say(player_name, '%s: now playing' % player_name)
     else:
         say(player_name, '%s: already playing' % player_name)
Example #7
0
    def ready(self, args, train):
        # len * batch
        self.idxs = T.imatrix()
        self.idys = T.imatrix()
        self.init_state = T.matrix(dtype=theano.config.floatX)

        dropout_prob = np.float64(args["dropout"]).astype(theano.config.floatX)
        self.dropout = theano.shared(dropout_prob)

        self.n_d = args["hidden_dim"]

        embedding_layer = EmbeddingLayer(n_d=self.n_d,
                                         vocab=set(w for w in train))
        self.n_V = embedding_layer.n_V

        say("Vocab size: {}\tHidden dim: {}\n".format(self.n_V, self.n_d))

        activation = get_activation_by_name(args["activation"])

        rnn_layer = LSTM(n_in=self.n_d, n_out=self.n_d, activation=activation)

        output_layer = Layer(
            n_in=self.n_d,
            n_out=self.n_V,
            activation=T.nnet.softmax,
        )

        # (len*batch) * n_d
        x_flat = embedding_layer.forward(self.idxs.ravel())

        # len * batch * n_d
        x = apply_dropout(x_flat, self.dropout)
        x = x.reshape((self.idxs.shape[0], self.idxs.shape[1], self.n_d))

        # len * batch * (n_d+n_d)
        h = rnn_layer.forward_all(x, self.init_state, return_c=True)

        self.last_state = h[-1]
        h = h[:, :, self.n_d:]
        h = apply_dropout(h, self.dropout)

        self.p_y_given_x = output_layer.forward(h.reshape(x_flat.shape))

        idys = self.idys.ravel()
        self.nll = -T.log(self.p_y_given_x[T.arange(idys.shape[0]), idys])
        #self.nll = T.nnet.categorical_crossentropy(
        #                self.p_y_given_x,
        #                idys
        #            )

        self.layers = [embedding_layer, rnn_layer, output_layer]
        #self.params = [ x_flat ] + rnn_layer.params + output_layer.params
        self.params = embedding_layer.params + rnn_layer.params + output_layer.params
        self.num_params = sum(
            len(x.get_value(borrow=True).ravel()) for l in self.layers
            for x in l.params)
        say("# of params in total: {}\n".format(self.num_params))
Example #8
0
 def new_round(self):
     self.slow = False
     self.new_challenge()
     self.text = self.word['challenge']
     self.txt.set_text(self.text)
     self.input_enabled = True
     utils.say(self.word['speak'],
               self.slow,
               audio_options=self.audio_options)
Example #9
0
 def settle(self, player_name, group_name):
     player = self.get_player(player_name)
     # Enough resources
     if group.resources > [1000, 1000, 1000, 1000]:
         group.resources = [resource - 1000 for resource in group.resources]
         self.world.grid[group.position[0]][group.position[1]] = Town(player.name)
         say(player, 'You have settled in position %s.' % group.position)
     else:
         say(player, 'Could not settle in position %s. Make sure your worker group'
                ' has enough resources.' % group.position)
Example #10
0
    def handle_input(self, key):
        if isinstance(key, str) or type(key) == 'unicode':
            if key == 'enter':
                if len(self.text) > 0:
                    utils.say(self.text, audio_options=self.audio_options)
                    self.text = ""
            elif key == 'backspace':
                self.text = self.text[:-1]
            else:
                self.text = self.text + key

        self.txt.set_text(self.text.upper())
Example #11
0
def main():
    args = get_args()
    # set defaults
    if args.out is None:
        args.out = args.fasta + ".annotated"
    # translate fasta?
    query = args.fasta
    if args.seqtype == "cds":
        query = os.path.split(query)[1]
        query = os.path.join(args.temp, query)
        query = query + ".translated"
        say("Translating input fasta to:\n ", query)
        translate_fasta(args.fasta, query)
        args.seqtype = "prot"
    # perform uniref90 search
    uniref90hits = uniref_search(
        diamond=args.diamond,
        database=args.uniref90db,
        query=query,
        seqtype=args.seqtype,
        temp=args.temp,
        diamond_options=args.diamond_options,
        force_search=args.force_search,
    )
    uniref90map = parse_results(uniref90hits)
    # perform uniref50 search
    uniref50hits = uniref_search(
        diamond=args.diamond,
        database=args.uniref50db,
        query=query,
        seqtype=args.seqtype,
        temp=args.temp,
        diamond_options=args.diamond_options,
        force_search=args.force_search,
    )
    uniref50map = parse_results(uniref50hits)
    # override mappings?
    overrides = {}
    if args.transitive_map is not None:
        overrides = trans_mapping(uniref90map, args.transitive_map)
    # reannoate the fasta
    reannotate(
        query=args.fasta,
        out=args.out,
        uniref90map=uniref90map,
        uniref50map=uniref50map,
        overrides=overrides,
    )
    # done
    say("Finished successfully.")
Example #12
0
def destroy_shm_semaphore(semaphore, mapfile):
    params = utils.read_params()

    utils.say("Destroying semaphore and shared memory.")
    mapfile.close()
    # I could call memory.unlink() here but in order to demonstrate
    # unlinking at the module level I'll do it that way.
    posix_ipc.unlink_shared_memory(params["SHARED_MEMORY_NAME"])

    semaphore.release()

    # I could also unlink the semaphore by calling
    # posix_ipc.unlink_semaphore() but I'll do it this way instead.
    semaphore.unlink()
Example #13
0
def p_watch_video(times=6, ts=60):
    print("------ 视听学习得分任务 ------")
    goon("观看" + str(times) + "个新视频,每个观看" + str(ts) + "秒, \n请选择一个未观看过的视频")
    t = 1
    while True:
        say("观看第" + str(t) + "个视频")
        time.sleep(int(ts))
        t = t + 1
        if t > times:
            break
        goon("请选择下一个视频")
    beep('coin')
    p = say("恭喜已完成该任务")
    p.wait()
Example #14
0
def trans_mapping(uniref90map, p_trans_map):
    say("Loading transitive mapping file:\n ", p_trans_map)
    check_path(p_trans_map)
    overrides = {}
    uniref90map_r = {}
    for header, uniref90 in uniref90map.items():
        uniref90map_r.setdefault(uniref90, set()).add(header)
    with try_open(p_trans_map) as fh:
        for row in csv.reader(fh, csv.excel_tab):
            uniref90, uniref50 = row
            headers = uniref90map_r.get(uniref90, set())
            for h in headers:
                overrides[h] = uniref50
    return overrides
Example #15
0
def p_read_article(times=6, ts=60):
    print("------ 文章学习得分任务 ------")
    goon("阅读" + str(times) + "篇新文章,每篇阅读" + str(ts) + "秒,\n请选择一篇未阅读过的文章")
    t = 1
    while True:
        say("阅读第" + str(t) + "篇文章")
        time.sleep(int(ts))
        t = t + 1
        if t > times:
            break
        goon("请选择下一篇文章")
    beep('coin')
    p = say("恭喜已完成该任务")
    p.wait()
Example #16
0
def parse_results(results):
    say("Parsing results file:\n ", results)
    check_path(results)
    mapping = {}
    mode = get_mode(results)
    min_pident = float(mode.replace("uniref", ""))
    with try_open(results) as fh:
        for row in csv.reader(fh, csv.excel_tab):
            h = Hit(row, config=c_output_format)
            if h.qseqid not in mapping:
                if h.pident >= min_pident and h.mcov >= c_min_coverage:
                    uniref = h.sseqid.split("|")[0]
                    mapping[h.qseqid] = uniref
    return mapping
Example #17
0
def read_annotations(path):
    data_x, data_y = [], []
    fopen = gzip.open if path.endswith(".gz") else open
    with fopen(path) as fin:
        for line in fin:
            y, sep, x = line.partition("\t")
            x, y = x.split(), y.split()
            if len(x) == 0: continue
            y = np.asarray([float(v) for v in y], dtype=theano.config.floatX)
            data_x.append(x)
            data_y.append(y)
    say("{} examples loaded from {}\n".format(len(data_x), path))
    say("max text length: {}\n".format(max(len(x) for x in data_x)))
    return data_x, data_y
Example #18
0
    def parse(self, command):
        # Play music
        if command[0] == 'music':
            say('Playing music.')
            self.startPlayback()
            sv.playing = True

        # Play playlist ...
        elif command[0] == 'playlist':
            searchterm = ' '.join(command[1:])
            say('Playing' + searchterm)
            self.searchPlaylist(searchterm)
            sv.playing = True

        # Play ... playlist
        elif command[-1] == 'playlist':
            searchterm = ' '.join(command[:-1])
            say('Playing' + searchterm)
            self.searchPlaylist(searchterm)
            sv.playing = True

        # Play ...
        else:
            searchterm = ' '.join(command)
            say('Playing' + searchterm)
            self.searchTrack(searchterm)
            sv.playing = True
Example #19
0
def main(args):
    assert args.train, "Training  set required"
    assert args.dev, "Dev set required"
    assert args.test, "Test set required"
    assert args.emb, "Pre-trained word embeddings required."
    assert args.aspect_seeds, "Aspect seeds required."
	
    print args

    seeds = load_lis(args.aspect_seeds)
    say("loaded {} aspect seeds\n".format(len(seeds)))

    embedding_layer = EmbeddingLayer(
                n_d = 100,
                vocab = [ "<unk>" ],
                pre_embs = load_embedding_iterator(args.emb),
            )

    seeds_id = np.array(map(lambda seed: embedding_layer.map_to_ids(seed.strip().split()).tolist(), seeds), dtype = np.int32)

    if args.train:
	train_x, train_y = load_doc_corpus(embedding_layer, args.train)

    if args.dev:
	dev_x, dev_y = load_doc_corpus(embedding_layer, args.dev)

    if args.test:
	test_x, test_y = load_doc_corpus(embedding_layer, args.test)
    
    if args.train:
        model = Model(
                    args = args,
                    embedding_layer = embedding_layer,
                    num_aspects = len(seeds_id),
		    query = seeds_id
            )
	if args.load:
	    print 'loading model...'
	    model.load_model(args.load)
        else:
	    model.ready()
	
	print 'training...'
        model.train(
                (train_x, train_y),
                (dev_x, dev_y) if args.dev else None,
                (test_x, test_y) if args.test else None
            )
Example #20
0
def p_likesome():
    print("------ 评论、转发、订阅得分任务 ------")
    print("提示:强国号多订阅无效")
    goon("选择一篇文章,评论 1 次,转发 2 次, 选择订阅 2 个新的强国号,  \n请完成后")
    beep('coin')
    p = say("恭喜已完成该任务")
    p.wait()
Example #21
0
    def __init__(self,
                 vocab,
                 n_d=300,
                 oov="<unk>",
                 pre_embs=None,
                 fix_init_embs=False):
        # print vocab
        if pre_embs is not None:
            t_word = ''
            t_vector = []
            for word, vector in pre_embs:
                if n_d != len(vector):
                    say("WARNING: n_d ({}) != init word vector size ({}). Use {} instead.\n"
                        .format(n_d, len(vector), len(vector)))
                n_d = len(vector)
                t_word = word
                t_vector = vector
                break

            embs = random_init((len(vocab) + 1, n_d)) * 0.01
            cnt = 0
            t_word = t_word.decode('utf8')

            if t_word in vocab:
                embs[vocab[t_word]] = t_vector
                cnt = 1

            for word, vector in pre_embs:
                uword = word.decode('utf8', 'ignore')
                if len(vector) != n_d:
                    continue
                if uword in vocab:
                    if vocab[uword] >= len(vocab):
                        continue
                    embs[vocab[uword]] = vector
                    cnt += 1

            say("{} pre-trained embeddings loaded.\n".format(cnt))
            embs[len(vocab)] = random_init((n_d, )) * 0.0  # for oov embs
            emb_vals = embs  # np.vstack(embs).astype(theano.config.floatX)
        else:
            emb_vals = random_init((len(vocab) + 1, n_d)) * 0.5

        self.init_end = len(vocab) if fix_init_embs else -1
        self.embs = create_shared(emb_vals)
        self.n_d = n_d
Example #22
0
def read_annotations(path):
    data_x, data_y = [ ], [ ]
    fopen = gzip.open if path.endswith(".gz") else open
    with fopen(path) as fin:
        for line in fin:
            y, sep, x = line.partition("\t")
            x, y = x.split(), y.split()
            if len(x) == 0: continue
            y = np.asarray([ float(v) for v in y ], dtype = theano.config.floatX)
            data_x.append(x)
            data_y.append(y)
    say("{} examples loaded from {}\n".format(
            len(data_x), path
        ))
    say("max text length: {}\n".format(
        max(len(x) for x in data_x)
    ))
    return data_x, data_y
Example #23
0
def p_weekend_answer():
    print("------ 每周答题得分任务 ------")
    print("提示:本系统默认设置为每周一完成每周答题任务")
    wd = datetime.datetime.now().weekday()
    if wd == 0:  # 周一
        goon("完成每周答题,要小心别手残答错哟,  \n请完成后")
        beep('coin')
        p = say("恭喜已完成该任务")
        p.wait()
Example #24
0
 def handle_input(self, key):
     if self.input_enabled and isinstance(key, str) and len(key) == 1:
         self.input_enabled = False
         self.text = self.text.replace(Game.MISSING_LETTER, key.upper(), 1)
         self.txt.set_text(self.text)
         if (not (Game.MISSING_LETTER in self.text)):
             if self.word['screen'] == self.text:
                 self.txt.set_text(self.text_decorator(
                     self.text, "success"))
                 self.loop.set_alarm_in(1, self.alarm_new_round)
             else:
                 self.txt.set_text(self.text_decorator(self.text, "error"))
                 self.loop.set_alarm_in(2, self.alarm_reset_text)
                 utils.say(self.word['speak'],
                           self.toggle_slow(),
                           audio_options=self.audio_options)
         else:
             self.input_enabled = True
Example #25
0
def wake_me_up():
    """
    function to execute to wake up. reads a wikipedia article then gives useful information
    """

    t = get_date()

    #getting article
    page = get_rand_wiki_page()
    while len(page["text"]) < LENGTH:
        print("too short")
        page = get_rand_wiki_page()

    article = page["title"] + ". " + remove_extra_whitespace(
        soft_cut_string(page["text"], LENGTH).replace("\n", " "))

    #loading parameters
    d = get_json("wake_up.json")

    #constants for now
    temp = "25"
    conditions = "sunny"
    planning = "shopping, going to school"

    #none constant info
    weekday = get_weekday_name(get_weekday_number(t))

    intro = random.choice(d["intro"])
    outro = random.choice(d["outro"])
    salute = random.choice(d["salute"]).format(d["user_name"])
    weather = random.choice(d["weather"]).format(temp, d["temp_scale"],
                                                 conditions)
    day = random.choice(d["day"]).format(weekday, planning)

    text = " ".join([intro, article, outro, salute, weather, day])

    print(text)
    say(text)

    end_time = get_date()
    total_time = end_time - t
    print(total_time)
Example #26
0
def uniref_search(diamond=None,
                  database=None,
                  query=None,
                  seqtype=None,
                  temp=None,
                  diamond_options=None,
                  force_search=False):
    if which(diamond) is None:
        die("<diamond> is not executable as: {}".format(diamond))
    for path in [database, query, temp]:
        check_path(path)
    binary = {"nuc": "blastx", "prot": "blastp"}[seqtype]
    mode = get_mode(database)
    results = os.path.split(query)[1]
    results = os.path.join(temp, results)
    results = ".".join([results, mode, "hits"])
    command = [
        diamond,
        binary,
        "--db",
        database,
        "--query",
        query,
        "--outfmt",
        c_output_format,
        "--tmpdir",
        temp,
        "--out",
        results,
        "--id",
        get_mode(results).replace("uniref", ""),
        c_diamond_filters,
    ]
    command = " ".join([str(k) for k in command])
    command += (" " + diamond_options) if diamond_options is not None else ""
    if force_search or not os.path.exists(results):
        say("Executing:\n ", command)
        os.system(command)
    else:
        say("Using existing results file:\n ", results)
    return results
Example #27
0
def _register():
    say('登録を始めます')
    say('正面に立ってラズベリーマークを見てください')

    with picamera.PiCamera() as camera:
        camera.resolution = (1024, 768)
        camera.start_preview()
        time.sleep(2)  # 輝度調整のために最低2秒sleepする必要がある
        camera.capture('front.jpg')
        say('次にゆっくりと左右に顔を振ってください。5秒かけて左右にふるイメージです。')
        for i in range(10):
            camera.capture(f"side_{i}.jpg")
            time.sleep(0.5)
        say('撮影が完了しました。')
        camera.stop_preview()
Example #28
0
def p_guide_all():
    print("------ 视听学习得分任务 ------")
    goon("请打开电台,选择一个未听过的大于6首歌曲的专辑,并后台播放")

    p_daily_answer()
    p_weekend_answer()
    p_specific_answer()
    p_challenge_answer()
    p_likesome()

    p_read_article_by_num(5)  # 这里只需要看 5 篇,另外 1 篇在点赞任务中完成
    p_read_article_by_time()
    p_watch_local_video()

    while True:
        r = goon("请检查视听学习次数任务是否完成", "请输入未得分数,全部完成输入回车键")
        if r:
            try:
                ri = int(r)
            except:
                goon("输入错误,请重新输入")
            else:
                if ri <= 0 or ri > 6:
                    goon("输入错误,请重新输入")
                    continue
                p_watch_video_by_num(ri)
                break
        else:
            break

    while True:
        r = goon("请检查视听学习时间任务是否完成", "请输入未得分数,全部完成输入回车键")
        if r:
            try:
                ri = int(r)
            except:
                goon("输入错误,请重新输入")
            else:
                if ri < 0 or ri > 6:
                    goon("输入错误,请重新输入")
                    continue
                p_watch_video_by_time(ri)
                break
        else:
            break

    beep('coin')
    p = say("恭喜已完成全部任务")
    p.wait()
Example #29
0
def main(semaphore, mapfile, what_i_wrote =""):
    vehicle = connection_uav()
    semaphore.release()
    semaphore.acquire()
    
    s = utils.read_from_memory(mapfile)
    print s

    # I keep checking the shared memory until something new has
    # been written.
    while s == what_i_wrote:
        # Nothing new; give Mrs. Conclusion another chance to respond.
        utils.say("Releasing the semaphore")
        semaphore.release()

        utils.say("Waiting to acquire the semaphore")
        semaphore.acquire()

        s = utils.read_from_memory(mapfile)

    what_i_wrote = vehicle.attitude
    utils.write_to_memory(mapfile, what_i_wrote)

    return what_i_wrote
Example #30
0
    def ready(self):
        embedding_layer = self.embedding_layer
        args = self.args
        padding_id = embedding_layer.vocab_map["<padding>"]

        dropout = self.dropout = theano.shared(
                np.float64(args.dropout).astype(theano.config.floatX)
            )

        # len*batch
        x = self.x = T.imatrix()

        z = self.z = T.bmatrix()
        z = z.dimshuffle((0,1,"x"))

        # batch*nclasses
        y = self.y = T.fmatrix()

        n_d = args.hidden_dimension
        n_e = embedding_layer.n_d
        activation = get_activation_by_name(args.activation)

        layers = self.layers = [ ]
        depth = args.depth
        layer_type = args.layer.lower()
        for i in xrange(depth):
            if layer_type == "rcnn":
                l = ExtRCNN(
                        n_in = n_e if i == 0 else n_d,
                        n_out = n_d,
                        activation = activation,
                        order = args.order
                    )
            elif layer_type == "lstm":
                l = ExtLSTM(
                        n_in = n_e if i == 0 else n_d,
                        n_out = n_d,
                        activation = activation
                    )
            layers.append(l)

        # len * batch * 1
        masks = T.cast(T.neq(x, padding_id).dimshuffle((0,1,"x")) * z, theano.config.floatX)
        # batch * 1
        cnt_non_padding = T.sum(masks, axis=0) + 1e-8

        # (len*batch)*n_e
        embs = embedding_layer.forward(x.ravel())
        # len*batch*n_e
        embs = embs.reshape((x.shape[0], x.shape[1], n_e))
        embs = apply_dropout(embs, dropout)

        pooling = args.pooling
        lst_states = [ ]
        h_prev = embs
        for l in layers:
            # len*batch*n_d
            h_next = l.forward_all(h_prev, z)
            if pooling:
                # batch * n_d
                masked_sum = T.sum(h_next * masks, axis=0)
                lst_states.append(masked_sum/cnt_non_padding) # mean pooling
            else:
                lst_states.append(h_next[-1]) # last state
            h_prev = apply_dropout(h_next, dropout)

        if args.use_all:
            size = depth * n_d
            # batch * size (i.e. n_d*depth)
            h_final = T.concatenate(lst_states, axis=1)
        else:
            size = n_d
            h_final = lst_states[-1]
        h_final = apply_dropout(h_final, dropout)

        output_layer = self.output_layer = Layer(
                n_in = size,
                n_out = self.nclasses,
                activation = sigmoid
            )

        # batch * nclasses
        preds = self.preds = output_layer.forward(h_final)

        # batch
        loss_mat = self.loss_mat = (preds-y)**2
        loss = self.loss = T.mean(loss_mat)

        pred_diff = self.pred_diff = T.mean(T.max(preds, axis=1) - T.min(preds, axis=1))

        params = self.params = [ ]
        for l in layers + [ output_layer ]:
            for p in l.params:
                params.append(p)
        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                                        for x in params)
        say("total # parameters: {}\n".format(nparams))

        l2_cost = None
        for p in params:
            if l2_cost is None:
                l2_cost = T.sum(p**2)
            else:
                l2_cost = l2_cost + T.sum(p**2)
        l2_cost = l2_cost * args.l2_reg
        self.l2_cost = l2_cost

        cost = self.cost = loss * 10 + l2_cost
Example #31
0
# hashlib is only available in Python >= 2.5. I still want to support 
# older Pythons so I import md5 if hashlib is not available. Fortunately
# md5 can masquerade as hashlib for my purposes.
try:
    import hashlib
except ImportError:
    import md5 as hashlib
    
# 3rd party modules
import posix_ipc

# Utils for this demo
import utils


utils.say("Oooo 'ello, I'm Mrs. Premise!")

params = utils.read_params()

# Create the shared memory and the semaphore.
memory = posix_ipc.SharedMemory(params["SHARED_MEMORY_NAME"], posix_ipc.O_CREX,
                                size=params["SHM_SIZE"])
semaphore = posix_ipc.Semaphore(params["SEMAPHORE_NAME"], posix_ipc.O_CREX)

# MMap the shared memory
mapfile = mmap.mmap(memory.fd, memory.size)

# Once I've mmapped the file descriptor, I can close it without 
# interfering with the mmap. 
memory.close_fd()
Example #32
0
def run_command(message_data):
    sender = message_data['sender']
    said = message_data['said']
    # '#channel' if room, 'sender' if private message
    current_channel = message_data['current_channel']
    params = message_data['params']

    # Get title from web pages
    if 'http://' in said:
        url = extract_url(said)
        title = get_title(url)
        if title:
            say(current_channel, 'Title: %s' % title)

    # Get link to Wikipedia article
    if '[[' in said:
        for article_name in extract_article(said):
            say(current_channel, get_link(article_name))

    # Reply to mention with a random quote
    if nickname in said:
        say(current_channel, random_quote(sender))

    ## IRC commands ##
    search_term = '+'.join(params)
    
    # List all commands
    if said.find('@help') == 0:
        say(sender, 'Search engines: google, wa, ddg, drae, dpd, en, es')
        say(sender, 'Misc: random [list], conv (unit) to (unit), fetch (wikipedia_article), link <start|get|check|stop>, calc (expression)')

    # Google
    elif said.find('@google') == 0:
        say(current_channel, 'https://www.google.com/search?q=%s' % search_term)

    # Wolfram Alpha
    elif said.find('@wa') == 0:
        say(current_channel, 'http://www.wolframalpha.com/input/?i=%s' % search_term)

    # DuckDuckGo
    elif said.find('@ddg') == 0:
        say(current_channel, 'http://duckduckgo.com/?q=%s' % search_term)

    # DRAE
    elif said.find('@drae') == 0:
        say(current_channel, 'http://lema.rae.es/drae/?val=%s' % search_term)

    # DPD
    elif said.find('@dpd') == 0:
        say(current_channel, 'http://lema.rae.es/dpd/?key=%s' % search_term)

    # Jisho kanji lookup
    elif said.find('@kan') == 0:
        escaped_term = urllib2.quote(search_term)
        say(current_channel, 'http://jisho.org/kanji/details/%s' % escaped_term)

    # EN > JP
    elif said.find('@ei') == 0:
        say(current_channel, 'http://jisho.org/words?jap=&eng=%s&dict=edict' % search_term)

    # JP > EN
    elif said.find('@ni') == 0:
        escaped_term = urllib2.quote(search_term)
        say(current_channel, 'http://jisho.org/words?jap=%s&eng=&dict=edict' % escaped_term)

    # EN > ES
    elif said.find('@en') == 0:
        say(current_channel, 'http://www.wordreference.com/es/translation.asp?tranword=%s' % search_term)

    # ES > EN
    elif said.find('@es') == 0:
        say(current_channel, 'http://www.wordreference.com/es/en/translation.asp?spen=%s' % search_term)

    # Random choice
    elif said.find('@random') == 0:
        if len(params) == 1:
            say(current_channel, 'f****t')
        elif len(params) > 1:
            say(current_channel, random.choice(said.split(',').strip()))
        else:
            say(current_channel, random.choice([0, 1]))

    # Unit converter
    elif said.find('@conv') == 0:
        if 'to' not in params:
            return
        index = params.index('to')
        amount = params[0]
        unit_from = params[1:index]
        unit_from = urllib2.quote(' '.join(unit_from))
        # 'to' == params[index]
        unit_to = params[index + 1:]
        unit_to = urllib2.quote(' '.join(unit_to))

        conversion_url = 'http://www.google.com/ig/calculator?hl=en&q='

        conversion = fetch_url(conversion_url + amount + unit_from + '=?' + unit_to).read()
        parsed_conversion = conversion.split('"')

        # Check for errors
        if len(parsed_conversion[5]) == 0:
            unit_result = urllib2.unquote(unit_to)
            say(current_channel, '%s %s' % (parsed_conversion[3].split()[0], unit_result))

    # Linkrace module
    elif said.find('@link') == 0:
        # Get race links
        if params[0] == 'get':
            url = 'http://es.wikipedia.org/wiki/%s'
            start, end = random_pair()
            starturl = url % urllib2.quote(start)
            endurl = url % urllib2.quote(end)
            say(current_channel, 'Start article is %s' % starturl)
            say(current_channel, 'Goal article is %s' % endurl)

        # Check if chain is valid
        elif params[0]  == 'check':
            chain = ' '.join(params[1:])
            broken_links = check_chain(chain)
            if not broken_links:
                say(current_channel, 'The chain is valid.')
            else:
                error_list = ' | '.join(broken_links)
                say(current_channel, error_list)
                say(current_channel, 'The chain is not valid.')

    # Calculator
    elif said.find('@calc') == 0:
        expression = ''.join(params)
        result = str(calculate(expression))
        say(current_channel, result)

    # Wikipedia fetch
    elif said.find('@fetch') == 0:
        article_name = ' '.join(params)
        extract = fetch(article_name)
        say(current_channel, extract)

    # Text game
    elif said.find('@dicks') == 0:
        global game
        # Commands available for everyone
        if params[0] == 'join':
            game.join_game(sender)
        elif params[0] == 'players':
            say(current_channel, [player.name for player in game.players])
        # Commands available for players
        if sender in [player.name for player in game.players]:
            if params[0] == 'panel':
                panel_url = sprunge(game.panel(sender))
                say(sender, '[i] Uploading panel')
                say(sender, panel_url)
            elif params[0] == 'settle':
                group = params[1]
                game.settle(sender, group)
            elif params[0] == 'move':
                troop = params[1]
                new_position = [params[2], params[3]]
                game.move(sender, troop, new_position)
    
    ## Owner commands ##
    if sender == owner:
        # Disconnect
        if said == '.quit':
            execute('QUIT')
            sys.exit(0)
        
        # Send message from bot
        elif said.find('.say') == 0:
            if len(params) > 1:
                say(params[0], ' '.join(params[1:]))

        # Print userlist
        elif said.find('.users') == 0:
            say(current_channel, str(users))

        # Bot joins
        elif said.find('.join') == 0:
            channel = params[0]
            execute('JOIN %s' % channel)

        # Bot parts
        elif said.find('.part') == 0:
            execute('PART %s' % current_channel)
            del users[current_channel]

        # Bot kicks
        elif said.find('.kick') == 0:
            user = params[0]
            reason = ' '.join(params[1:])
            if not reason:
                reason = 'huh'
            bot_kick(current_channel, user, reason)
Example #33
0
    def train(self):
        args = self.args
        train_x, train_y = self.train_set
        dev_x, dev_y = self.dev_set
        test_x, test_y = self.test_set

        updates, lr, gnorm = create_optimization_updates(
                cost = self.cost,
                params = self.params,
                lr = args.learning_rate,
                rho = args.rho,
                beta1 = args.beta1,
                beta2 = args.beta2,
                momentum = args.momentum,
                gamma = args.gamma,
                method = args.learning
            )[:3]

        batch = args.batch
        index = self.index
        x = self.x
        y = self.y

        train_func = theano.function(
                inputs = [ index ],
                outputs = [ self.cost, gnorm ],
                givens = {
                    x: train_x[index*batch:(index+1)*batch],
                    y: train_y[index*batch:(index+1)*batch]
                },
                updates = updates
            )

        dev_func = theano.function(
                inputs = [ index ],
                outputs = [ self.err, self.loss ],
                givens = {
                    x: dev_x[index*batch:(index+1)*batch],
                    y: dev_y[index*batch:(index+1)*batch]
                }
            )

        test_func = theano.function(
                inputs = [ index ],
                outputs = [ self.err, self.loss ],
                givens = {
                    x: test_x[index*batch:(index+1)*batch],
                    y: test_y[index*batch:(index+1)*batch]
                }
            )

        decay_lr = args.decay_lr and args.learning.lower() != "adadelta" and \
                        args.learning.lower() != "adagrad"
        lr_0 = args.learning_rate
        iter_cnt = 0

        N = train_x.get_value(borrow=True).shape[0]
        num_batches = (N-1)/batch + 1
        processed = 0
        period = args.eval_period

        best_dev_err = 1.0

        max_epochs = args.max_epochs
        for epoch in xrange(max_epochs):
            start_time = time.time()
            tot_cost = 0
            for i in xrange(num_batches):
                iter_cnt += 1
                if decay_lr:
                    lr.set_value(np.float32(lr_0/iter_cnt**0.5))
                cost, grad_norm = train_func(i)
                tot_cost += cost

                if math.isnan(cost):
                    say("NaN !!\n")
                    return

                ed = min(N, (i+1)*batch)
                prev = processed/period
                processed += ed-i*batch

                if (i == num_batches-1) or (processed/period > prev):
                    say("Epoch={:.1f} Sample={} cost={:.4f} |g|={:.2f}\t[{:.1f}m]\n".format(
                            epoch + (i+1.0)/num_batches,
                            processed,
                            tot_cost/(i+1),
                            float(grad_norm),
                            (time.time()-start_time)/60.0
                        ))
                    dev_err, dev_loss = self.evaluate(dev_func, dev_x)
                    best_dev_err = min(best_dev_err, dev_err)
                    say("\tdev_err={:.4f} dev_loss={:.4f} best_dev={:.4f}\n".format(
                            dev_err, dev_loss, best_dev_err))
                    if dev_err == best_dev_err:
                        test_err, test_loss = self.evaluate(test_func, test_x)
                        say("\ttest_err={:.4f} test_loss={:.4f}\n".format(
                                test_err, test_loss))
                    say("\n")
Example #34
0
import utils
if PY_MAJOR_VERSION > 2:
    import utils_for_3 as flex_utils
else:
    import utils_for_2 as flex_utils

params = utils.read_params()

# Mrs. Premise has already created the message queue. I just need a handle
# to it.
mq = sysv_ipc.MessageQueue(params["KEY"])

what_i_sent = ""

for i in range(0, params["ITERATIONS"]):
    utils.say("iteration %d" % i)

    s, _ = mq.receive()
    s = s.decode()
    utils.say("Received %s" % s)

    while s == what_i_sent:
        # Nothing new; give Mrs. Premise another chance to respond.
        mq.send(s)
        
        s, _ = mq.receive()
        s = s.decode()
        utils.say("Received %s" % s)

    if what_i_sent:
        if PY_MAJOR_VERSION > 2:
Example #35
0
# hashlib is only available in Python >= 2.5. I still want to support 
# older Pythons so I import md5 if hashlib is not available. Fortunately
# md5 can masquerade as hashlib for my purposes.
try:
    import hashlib
except ImportError:
    import md5 as hashlib

# 3rd party modules
import posix_ipc

# Utils for this demo
import utils


utils.say("Oooo 'ello, I'm Mrs. Conclusion!")

params = utils.read_params()

# Mrs. Premise has already created the message queue. I just need a handle
# to it.
mq = posix_ipc.MessageQueue(params["MESSAGE_QUEUE_NAME"])

what_i_sent = ""

for i in range(0, params["ITERATIONS"]):
    utils.say("iteration %d" % i)

    s, _ = mq.receive()
    s = s.decode()
    utils.say("Received %s" % s)
Example #36
0
    def ready(self):
        generator = self.generator
        embedding_layer = self.embedding_layer
        args = self.args
        padding_id = embedding_layer.vocab_map["<padding>"]

        dropout = generator.dropout

        # len*batch
        x = generator.x
        z = generator.z_pred
        z = z.dimshuffle((0,1,"x"))

        # batch*nclasses
        y = self.y = T.fmatrix()

        n_d = args.hidden_dimension
        n_e = embedding_layer.n_d
        activation = get_activation_by_name(args.activation)

        layers = self.layers = [ ]
        depth = args.depth
        layer_type = args.layer.lower()
        for i in xrange(depth):
            if layer_type == "rcnn":
                l = ExtRCNN(
                        n_in = n_e if i == 0 else n_d,
                        n_out = n_d,
                        activation = activation,
                        order = args.order
                    )
            elif layer_type == "lstm":
                l = ExtLSTM(
                        n_in = n_e if i == 0 else n_d,
                        n_out = n_d,
                        activation = activation
                    )
            layers.append(l)

        # len * batch * 1
        masks = T.cast(T.neq(x, padding_id).dimshuffle((0,1,"x")) * z, theano.config.floatX)
        # batch * 1
        cnt_non_padding = T.sum(masks, axis=0) + 1e-8

        # len*batch*n_e
        embs = generator.word_embs

        pooling = args.pooling
        lst_states = [ ]
        h_prev = embs
        for l in layers:
            # len*batch*n_d
            h_next = l.forward_all(h_prev, z)
            if pooling:
                # batch * n_d
                masked_sum = T.sum(h_next * masks, axis=0)
                lst_states.append(masked_sum/cnt_non_padding) # mean pooling
            else:
                lst_states.append(h_next[-1]) # last state
            h_prev = apply_dropout(h_next, dropout)

        if args.use_all:
            size = depth * n_d
            # batch * size (i.e. n_d*depth)
            h_final = T.concatenate(lst_states, axis=1)
        else:
            size = n_d
            h_final = lst_states[-1]
        h_final = apply_dropout(h_final, dropout)

        output_layer = self.output_layer = Layer(
                n_in = size,
                n_out = self.nclasses,
                activation = sigmoid
            )

        # batch * nclasses
        preds = self.preds = output_layer.forward(h_final)

        # batch
        loss_mat = self.loss_mat = (preds-y)**2

        pred_diff = self.pred_diff = T.mean(T.max(preds, axis=1) - T.min(preds, axis=1))

        if args.aspect < 0:
            loss_vec = T.mean(loss_mat, axis=1)
        else:
            assert args.aspect < self.nclasses
            loss_vec = loss_mat[:,args.aspect]
        self.loss_vec = loss_vec

        zsum = generator.zsum
        zdiff = generator.zdiff
        logpz = generator.logpz

        coherent_factor = args.sparsity * args.coherent
        loss = self.loss = T.mean(loss_vec)
        sparsity_cost = self.sparsity_cost = T.mean(zsum) * args.sparsity + \
                                             T.mean(zdiff) * coherent_factor
        cost_vec = loss_vec + zsum * args.sparsity + zdiff * coherent_factor
        cost_logpz = T.mean(cost_vec * T.sum(logpz, axis=0))
        self.obj = T.mean(cost_vec)

        params = self.params = [ ]
        for l in layers + [ output_layer ]:
            for p in l.params:
                params.append(p)
        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                                        for x in params)
        say("total # parameters: {}\n".format(nparams))

        l2_cost = None
        for p in params:
            if l2_cost is None:
                l2_cost = T.sum(p**2)
            else:
                l2_cost = l2_cost + T.sum(p**2)
        l2_cost = l2_cost * args.l2_reg
        self.l2_cost = l2_cost

        self.cost_g = cost_logpz * 10 + generator.l2_cost
        self.cost_e = loss * 10 + l2_cost
Example #37
0
    def ready(self):
        embedding_layer = self.embedding_layer
        args = self.args
        padding_id = embedding_layer.vocab_map["<padding>"]

        dropout = self.dropout = theano.shared(
                np.float64(args.dropout).astype(theano.config.floatX)
            )

        # len*batch
        x = self.x = T.imatrix()

        n_d = args.hidden_dimension
        n_e = embedding_layer.n_d
        activation = get_activation_by_name(args.activation)

        layers = self.layers = [ ]
        layer_type = args.layer.lower()
        for i in xrange(2):
            if layer_type == "rcnn":
                l = RCNN(
                        n_in = n_e,
                        n_out = n_d,
                        activation = activation,
                        order = args.order
                    )
            elif layer_type == "lstm":
                l = LSTM(
                        n_in = n_e,
                        n_out = n_d,
                        activation = activation
                    )
            layers.append(l)

        # len * batch
        masks = T.cast(T.neq(x, padding_id), theano.config.floatX)

        # (len*batch)*n_e
        embs = embedding_layer.forward(x.ravel())
        # len*batch*n_e
        embs = embs.reshape((x.shape[0], x.shape[1], n_e))
        embs = apply_dropout(embs, dropout)
        self.word_embs = embs

        flipped_embs = embs[::-1]

        # len*bacth*n_d
        h1 = layers[0].forward_all(embs)
        h2 = layers[1].forward_all(flipped_embs)
        h_final = T.concatenate([h1, h2[::-1]], axis=2)
        h_final = apply_dropout(h_final, dropout)
        size = n_d * 2

        output_layer = self.output_layer = ZLayer(
                n_in = size,
                n_hidden = args.hidden_dimension2,
                activation = activation
            )

        # sample z given text (i.e. x)
        z_pred, sample_updates = output_layer.sample_all(h_final)

        # we are computing approximated gradient by sampling z;
        # so should mark sampled z not part of the gradient propagation path
        #
        z_pred = self.z_pred = theano.gradient.disconnected_grad(z_pred)
        self.sample_updates = sample_updates
        print "z_pred", z_pred.ndim

        probs = output_layer.forward_all(h_final, z_pred)
        print "probs", probs.ndim

        logpz = - T.nnet.binary_crossentropy(probs, z_pred) * masks
        logpz = self.logpz = logpz.reshape(x.shape)
        probs = self.probs = probs.reshape(x.shape)

        # batch
        z = z_pred
        self.zsum = T.sum(z, axis=0, dtype=theano.config.floatX)
        self.zdiff = T.sum(T.abs_(z[1:]-z[:-1]), axis=0, dtype=theano.config.floatX)

        params = self.params = [ ]
        for l in layers + [ output_layer ]:
            for p in l.params:
                params.append(p)
        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                                        for x in params)
        say("total # parameters: {}\n".format(nparams))

        l2_cost = None
        for p in params:
            if l2_cost is None:
                l2_cost = T.sum(p**2)
            else:
                l2_cost = l2_cost + T.sum(p**2)
        l2_cost = l2_cost * args.l2_reg
        self.l2_cost = l2_cost
Example #38
0
    def train(self, train, dev, test):
        args = self.args
        dropout = self.dropout
        padding_id = self.embedding_layer.vocab_map["<padding>"]

        if dev is not None:
            dev_batches_x, dev_batches_y = myio.create_batches(
                            dev[0], dev[1], args.batch, padding_id
                        )
        if test is not None:
            test_batches_x, test_batches_y = myio.create_batches(
                            test[0], test[1], args.batch, padding_id
                        )

        start_time = time.time()
        train_batches_x, train_batches_y = myio.create_batches(
                            train[0], train[1], args.batch, padding_id
                        )
        say("{:.2f}s to create training batches\n\n".format(
                time.time()-start_time
            ))

        updates_e, lr_e, gnorm_e = create_optimization_updates(
                               cost = self.encoder.cost_e,
                               params = self.encoder.params,
                               method = args.learning,
                               lr = args.learning_rate
                        )[:3]


        updates_g, lr_g, gnorm_g = create_optimization_updates(
                               cost = self.encoder.cost_g,
                               params = self.generator.params,
                               method = args.learning,
                               lr = args.learning_rate
                        )[:3]

        sample_generator = theano.function(
                inputs = [ self.x ],
                outputs = self.z
            )

        get_loss_and_pred = theano.function(
                inputs = [ self.x, self.y ],
                outputs = [ self.encoder.loss_vec, self.encoder.preds, self.z ]
            )

        train_generator = theano.function(
                inputs = [ self.x, self.y ],
                outputs = [ self.encoder.obj, self.encoder.loss, \
                                self.encoder.sparsity_cost, self.z, gnorm_e, gnorm_g ],
                updates = updates_e.items() + updates_g.items(),
            )

        eval_func = theano.function(
                inputs = [ self.x, self.y ],
                outputs = [ self.z, self.encoder.obj, self.true_pos, self.tot_pos, self.tot_true ]
            )

        eval_period = args.eval_period
        unchanged = 0
        best_dev = 1e+2
        best_dev_e = 1e+2
        last_train_avg_cost = None
        last_dev_avg_cost = None
        tolerance = 0.10 + 1e-3
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)

        for epoch in xrange(args.max_epochs):
            unchanged += 1
            if unchanged > 50: return

            train_batches_x, train_batches_y = myio.create_batches(
                            train[0], train[1], args.batch, padding_id
                        )

            more = True
            if args.decay_lr:
                param_bak = [ p.get_value(borrow=False) for p in self.params ]

            while more:
                processed = 0
                train_cost = 0.0
                train_loss = 0.0
                train_sparsity_cost = 0.0
                p1 = 0.0
                start_time = time.time()

                N = len(train_batches_x)
                for i in xrange(N):
                    if (i+1) % 100 == 0:
                        say("\r{}/{} {:.2f}       ".format(i+1,N,p1/(i+1)))

                    bx, by = train_batches_x[i], train_batches_y[i]
                    mask = bx != padding_id

                    cost, loss, sparsity_cost, bz, gl2_e, gl2_g = train_generator(bx, by)

                    k = len(by)
                    processed += k
                    train_cost += cost
                    train_loss += loss
                    train_sparsity_cost += sparsity_cost
                    p1 += np.sum(bz*mask) / (np.sum(mask)+1e-8)

                cur_train_avg_cost = train_cost / N

                if dev:
                    self.dropout.set_value(0.0)
                    dev_obj, dev_prec, dev_recall, dev_f1, dev_p1 = self.evaluate_data(
                            dev_batches_x, dev_batches_y, eval_func)
                    self.dropout.set_value(dropout_prob)
                    cur_dev_avg_cost = dev_obj

                more = False
                if args.decay_lr and last_train_avg_cost is not None:
                    if cur_train_avg_cost > last_train_avg_cost*(1+tolerance):
                        more = True
                        say("\nTrain cost {} --> {}\n".format(
                                last_train_avg_cost, cur_train_avg_cost
                            ))
                    if dev and cur_dev_avg_cost > last_dev_avg_cost*(1+tolerance):
                        more = True
                        say("\nDev cost {} --> {}\n".format(
                                last_dev_avg_cost, cur_dev_avg_cost
                            ))

                if more:
                    lr_val = lr_g.get_value()*0.5
                    lr_val = np.float64(lr_val).astype(theano.config.floatX)
                    lr_g.set_value(lr_val)
                    lr_e.set_value(lr_val)
                    say("Decrease learning rate to {}\n".format(float(lr_val)))
                    for p, v in zip(self.params, param_bak):
                        p.set_value(v)
                    continue

                last_train_avg_cost = cur_train_avg_cost
                if dev: last_dev_avg_cost = cur_dev_avg_cost

                say("\n")
                say(("Generator Epoch {:.2f}  costg={:.4f}  scost={:.4f}  lossg={:.4f}  " +
                    "p[1]={:.3f}  |g|={:.4f} {:.4f}\t[{:.2f}m / {:.2f}m]\n").format(
                        epoch+(i+1.0)/N,
                        train_cost / N,
                        train_sparsity_cost / N,
                        train_loss / N,
                        p1 / N,
                        float(gl2_e),
                        float(gl2_g),
                        (time.time()-start_time)/60.0,
                        (time.time()-start_time)/60.0/(i+1)*N
                    ))
                say("\t"+str([ "{:.2f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                                for x in self.encoder.params ])+"\n")
                say("\t"+str([ "{:.2f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                                for x in self.generator.params ])+"\n")

                if dev:
                    if dev_obj < best_dev:
                        best_dev = dev_obj
                        unchanged = 0
                        if args.dump and test:
                            self.dump_rationales(args.dump, test_batches_x, test_batches_y,
                                        get_loss_and_pred, sample_generator)

                    say(("\tdevg={:.4f}  f1g={:.4f}  preg={:.4f}  recg={:.4f}" +
                                "  p[1]g={:.3f}  best_dev={:.4f}\n").format(
                        dev_obj,
                        dev_f1,
                        dev_prec,
                        dev_recall,
                        dev_p1,
                        best_dev
                    ))

                    if test is not None:
                        self.dropout.set_value(0.0)
                        test_obj, test_prec, test_recall, test_f1, test_p1 = self.evaluate_data(
                            test_batches_x, test_batches_y, eval_func)
                        self.dropout.set_value(dropout_prob)
                        say(("\ttestt={:.4f}  f1t={:.4f}  pret={:.4f}  rect={:.4f}" +
                                    "  p[1]t={:.3f}\n").format(
                            test_obj,
                            test_f1,
                            test_prec,
                            test_recall,
                            test_p1
                        ))
Example #39
0
    def __init__(self,
                 n_d,
                 vocab,
                 oov="<unk>",
                 pre_embs=None,
                 fix_init_embs=False):
        if pre_embs is not None:
            vocab_map = {}
            words = []
            embs = []
            for word, vector in pre_embs:
                if word in vocab_map:
                    continue
                vocab_map[word] = len(vocab_map)
                embs.append(vector)
                words.append(word)

            self.init_end = len(embs) if fix_init_embs else -1

            if n_d != len(embs[0]):
                say("WARNING: n_d ({}) != init word vector size ({}). Use {} instead.\n"
                    .format(n_d, len(embs[0]), len(embs[0])))
                n_d = len(embs[0])

            say("{} pre-trained embeddings loaded.\n".format(len(embs)))

            for word in vocab:
                if word not in vocab_map:
                    vocab_map[word] = len(vocab_map)
                    embs.append(
                        random_init((n_d, )) * (0.01 if word != oov else 0))
                    words.append(word)

            self.emb_vals = np.vstack(embs).astype(theano.config.floatX)
            self.vocab_map = vocab_map
            self.words = words
        else:
            words = []
            vocab_map = {}
            for word in vocab:
                if word not in vocab_map:
                    vocab_map[word] = len(vocab_map)
                    words.append(word)

            self.words = words
            self.vocab_map = vocab_map
            self.emb_vals = random_init((len(self.vocab_map), n_d)) * 0.01
            self.init_end = -1

        if oov is not None and oov is not False:
            assert oov in self.vocab_map, "oov {} not in vocab".format(oov)
            self.oov_tok = oov
            self.oov_id = self.vocab_map[oov]
        else:
            self.oov_tok = None
            self.oov_id = -1

        self.embs = create_shared(self.emb_vals)
        if self.init_end > -1:
            self.embs_trainable = self.embs[self.init_end:]
        else:
            self.embs_trainable = self.embs

        self.n_vocab = len(self.vocab_map)
        self.n_d = n_d
Example #40
0
    def ready(self):
        args = self.args
        embedding_layer = self.embedding_layer
        self.n_hidden = args.hidden_dim
        self.n_in = embedding_layer.n_d
        dropout = self.dropout = theano.shared(
                np.float64(args.dropout_rate).astype(theano.config.floatX)
            )

        # x is length * batch_size
        # y is batch_size
        self.x = T.imatrix('x')
        self.y = T.ivector('y')

        x = self.x
        y = self.y
        n_hidden = self.n_hidden
        n_in = self.n_in

        # fetch word embeddings
        # (len * batch_size) * n_in
        slices  = embedding_layer.forward(x.ravel())
        self.slices = slices

        # 3-d tensor, len * batch_size * n_in
        slices = slices.reshape( (x.shape[0], x.shape[1], n_in) )

        # stacking the feature extraction layers
        pooling = args.pooling
        depth = args.depth
        layers = self.layers = [ ]
        prev_output = slices
        prev_output = apply_dropout(prev_output, dropout, v2=True)
        size = 0
        softmax_inputs = [ ]
        activation = get_activation_by_name(args.act)
        for i in range(depth):
            if args.layer.lower() == "lstm":
                layer = LSTM(
                            n_in = n_hidden if i > 0 else n_in,
                            n_out = n_hidden
                        )
            elif args.layer.lower() == "strcnn":
                layer = StrCNN(
                            n_in = n_hidden if i > 0 else n_in,
                            n_out = n_hidden,
                            activation = activation,
                            decay = args.decay,
                            order = args.order
                        )
            elif args.layer.lower() == "rcnn":
                layer = RCNN(
                            n_in = n_hidden if i > 0 else n_in,
                            n_out = n_hidden,
                            activation = activation,
                            order = args.order,
                            mode = args.mode
                        )
            else:
                raise Exception("unknown layer type: {}".format(args.layer))

            layers.append(layer)
            prev_output = layer.forward_all(prev_output)
            if pooling:
                softmax_inputs.append(T.sum(prev_output, axis=0)) # summing over columns
            else:
                softmax_inputs.append(prev_output[-1])
            prev_output = apply_dropout(prev_output, dropout)
            size += n_hidden

        # final feature representation is the concatenation of all extraction layers
        if pooling:
            softmax_input = T.concatenate(softmax_inputs, axis=1) / x.shape[0]
        else:
            softmax_input = T.concatenate(softmax_inputs, axis=1)
        softmax_input = apply_dropout(softmax_input, dropout, v2=True)

        # feed the feature repr. to the softmax output layer
        layers.append( Layer(
                n_in = size,
                n_out = self.nclasses,
                activation = softmax,
                has_bias = False
        ) )

        for l,i in zip(layers, range(len(layers))):
            say("layer {}: n_in={}\tn_out={}\n".format(
                i, l.n_in, l.n_out
            ))

        # unnormalized score of y given x
        self.p_y_given_x = layers[-1].forward(softmax_input)
        self.pred = T.argmax(self.p_y_given_x, axis=1)
        self.nll_loss = T.mean( T.nnet.categorical_crossentropy(
                                    self.p_y_given_x,
                                    y
                            ))

        # adding regularizations
        self.l2_sqr = None
        self.params = [ ]
        for layer in layers:
            self.params += layer.params
        for p in self.params:
            if self.l2_sqr is None:
                self.l2_sqr = args.l2_reg * T.sum(p**2)
            else:
                self.l2_sqr += args.l2_reg * T.sum(p**2)

        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                        for x in self.params)
        say("total # parameters: {}\n".format(nparams))
Example #41
0
# Python modules
import mmap
import os
import sys
import hashlib

# 3rd party modules
import posix_ipc

# Utils for this demo
import utils

PY_MAJOR_VERSION = sys.version_info[0]

utils.say("Oooo 'ello, I'm Mrs. Conclusion!")

params = utils.read_params()

# Mrs. Premise has already created the semaphore and shared memory.
# I just need to get handles to them.
memory = posix_ipc.SharedMemory(params["SHARED_MEMORY_NAME"])
semaphore = posix_ipc.Semaphore(params["SEMAPHORE_NAME"])

# MMap the shared memory
mapfile = mmap.mmap(memory.fd, memory.size)

# Once I've mmapped the file descriptor, I can close it without
# interfering with the mmap. This also demonstrates that os.close() is a
# perfectly legitimate alternative to the SharedMemory's close_fd() method.
os.close(memory.fd)
Example #42
0
    def train(self, train, dev, test, rationale_data):
        args = self.args
        dropout = self.dropout
        padding_id = self.embedding_layer.vocab_map["<padding>"]

        if dev is not None:
            dev_batches_x, dev_batches_y = myio.create_batches(
                            dev[0], dev[1], args.batch, padding_id
                        )
        if test is not None:
            test_batches_x, test_batches_y = myio.create_batches(
                            test[0], test[1], args.batch, padding_id
                        )
        if rationale_data is not None:
            valid_batches_x, valid_batches_y = myio.create_batches(
                    [ u["xids"] for u in rationale_data ],
                    [ u["y"] for u in rationale_data ],
                    args.batch,
                    padding_id,
                    sort = False
                )

        start_time = time.time()
        train_batches_x, train_batches_y = myio.create_batches(
                            train[0], train[1], args.batch, padding_id
                        )
        say("{:.2f}s to create training batches\n\n".format(
                time.time()-start_time
            ))

        updates_e, lr_e, gnorm_e = create_optimization_updates(
                               cost = self.generator.cost_e,
                               params = self.encoder.params,
                               method = args.learning,
                               lr = args.learning_rate
                        )[:3]


        updates_g, lr_g, gnorm_g = create_optimization_updates(
                               cost = self.generator.cost,
                               params = self.generator.params,
                               method = args.learning,
                               lr = args.learning_rate
                        )[:3]

        sample_generator = theano.function(
                inputs = [ self.x ],
                outputs = self.z_pred,
                #updates = self.generator.sample_updates
                #allow_input_downcast = True
            )

        get_loss_and_pred = theano.function(
                inputs = [ self.x, self.z, self.y ],
                outputs = [ self.generator.loss_vec, self.encoder.preds ]
            )

        eval_generator = theano.function(
                inputs = [ self.x, self.y ],
                outputs = [ self.z, self.generator.obj, self.generator.loss,
                                self.encoder.pred_diff ],
                givens = {
                    self.z : self.generator.z_pred
                },
                #updates = self.generator.sample_updates,
                #no_default_updates = True
            )

        train_generator = theano.function(
                inputs = [ self.x, self.y ],
                outputs = [ self.generator.obj, self.generator.loss, \
                                self.generator.sparsity_cost, self.z, gnorm_g, gnorm_e ],
                givens = {
                    self.z : self.generator.z_pred
                },
                #updates = updates_g,
                updates = updates_g.items() + updates_e.items() #+ self.generator.sample_updates,
                #no_default_updates = True
            )

        eval_period = args.eval_period
        unchanged = 0
        best_dev = 1e+2
        best_dev_e = 1e+2
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)

        for epoch in xrange(args.max_epochs):
            unchanged += 1
            if unchanged > 10: return

            train_batches_x, train_batches_y = myio.create_batches(
                            train[0], train[1], args.batch, padding_id
                        )

            processed = 0
            train_cost = 0.0
            train_loss = 0.0
            train_sparsity_cost = 0.0
            p1 = 0.0
            start_time = time.time()

            N = len(train_batches_x)
            for i in xrange(N):
                if (i+1) % 100 == 0:
                    say("\r{}/{}     ".format(i+1,N))

                bx, by = train_batches_x[i], train_batches_y[i]
                mask = bx != padding_id

                cost, loss, sparsity_cost, bz, gl2_g, gl2_e = train_generator(bx, by)

                k = len(by)
                processed += k
                train_cost += cost
                train_loss += loss
                train_sparsity_cost += sparsity_cost
                p1 += np.sum(bz*mask) / (np.sum(mask)+1e-8)

                if (i == N-1) or (eval_period > 0 and processed/eval_period >
                                    (processed-k)/eval_period):
                    say("\n")
                    say(("Generator Epoch {:.2f}  costg={:.4f}  scost={:.4f}  lossg={:.4f}  " +
                        "p[1]={:.2f}  |g|={:.4f} {:.4f}\t[{:.2f}m / {:.2f}m]\n").format(
                            epoch+(i+1.0)/N,
                            train_cost / (i+1),
                            train_sparsity_cost / (i+1),
                            train_loss / (i+1),
                            p1 / (i+1),
                            float(gl2_g),
                            float(gl2_e),
                            (time.time()-start_time)/60.0,
                            (time.time()-start_time)/60.0/(i+1)*N
                        ))
                    say("\t"+str([ "{:.1f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                                    for x in self.encoder.params ])+"\n")
                    say("\t"+str([ "{:.1f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                                    for x in self.generator.params ])+"\n")

                    if dev:
                        self.dropout.set_value(0.0)
                        dev_obj, dev_loss, dev_diff, dev_p1 = self.evaluate_data(
                                dev_batches_x, dev_batches_y, eval_generator, sampling=True)

                        if dev_obj < best_dev:
                            best_dev = dev_obj
                            unchanged = 0
                            if args.dump and rationale_data:
                                self.dump_rationales(args.dump, valid_batches_x, valid_batches_y,
                                            get_loss_and_pred, sample_generator)

                            if args.save_model:
                                self.save_model(args.save_model, args)

                        say(("\tsampling devg={:.4f}  mseg={:.4f}  avg_diffg={:.4f}" +
                                    "  p[1]g={:.2f}  best_dev={:.4f}\n").format(
                            dev_obj,
                            dev_loss,
                            dev_diff,
                            dev_p1,
                            best_dev
                        ))

                        if rationale_data is not None:
                            r_mse, r_p1, r_prec1, r_prec2 = self.evaluate_rationale(
                                    rationale_data, valid_batches_x,
                                    valid_batches_y, eval_generator)
                            say(("\trationale mser={:.4f}  p[1]r={:.2f}  prec1={:.4f}" +
                                        "  prec2={:.4f}\n").format(
                                    r_mse,
                                    r_p1,
                                    r_prec1,
                                    r_prec2
                            ))

                        self.dropout.set_value(dropout_prob)
Example #43
0
def main():
    print args
    assert args.embedding, "Pre-trained word embeddings required."

    embedding_layer = myio.create_embedding_layer(
                        args.embedding
                    )

    max_len = args.max_len

    if args.train:
        train_x, train_y = myio.read_annotations(args.train)
        train_x = [ embedding_layer.map_to_ids(x)[:max_len] for x in train_x ]

    if args.dev:
        dev_x, dev_y = myio.read_annotations(args.dev)
        dev_x = [ embedding_layer.map_to_ids(x)[:max_len] for x in dev_x ]

    if args.load_rationale:
        rationale_data = myio.read_rationales(args.load_rationale)
        for x in rationale_data:
            x["xids"] = embedding_layer.map_to_ids(x["x"])

    if args.train:
        model = Model(
                    args = args,
                    embedding_layer = embedding_layer,
                    nclasses = len(train_y[0])
                )
        model.ready()

        #debug_func2 = theano.function(
        #        inputs = [ model.x, model.z ],
        #        outputs = model.generator.logpz
        #    )
        #theano.printing.debugprint(debug_func2)
        #return

        model.train(
                (train_x, train_y),
                (dev_x, dev_y) if args.dev else None,
                None, #(test_x, test_y),
                rationale_data if args.load_rationale else None
            )

    if args.load_model and args.dev and not args.train:
        model = Model(
                    args = None,
                    embedding_layer = embedding_layer,
                    nclasses = -1
                )
        model.load_model(args.load_model)
        say("model loaded successfully.\n")

        # compile an evaluation function
        eval_func = theano.function(
                inputs = [ model.x, model.y ],
                outputs = [ model.z, model.encoder.obj, model.encoder.loss,
                                model.encoder.pred_diff ],
                updates = model.generator.sample_updates
            )

        # compile a predictor function
        pred_func = theano.function(
                inputs = [ model.x ],
                outputs = [ model.z, model.encoder.preds ],
                updates = model.generator.sample_updates
            )

        # batching data
        padding_id = embedding_layer.vocab_map["<padding>"]
        dev_batches_x, dev_batches_y = myio.create_batches(
                        dev_x, dev_y, args.batch, padding_id
                    )

        # disable dropout
        model.dropout.set_value(0.0)
        dev_obj, dev_loss, dev_diff, dev_p1 = model.evaluate_data(
                dev_batches_x, dev_batches_y, eval_func, sampling=True)
        say("{} {} {} {}\n".format(dev_obj, dev_loss, dev_diff, dev_p1))
Example #44
0
    def train(self, args, train, dev, test=None):
        embedding_layer = self.layers[0]

        dropout_prob = np.float64(args["dropout"]).astype(theano.config.floatX)
        batch_size = args["batch_size"]
        unroll_size = args["unroll_size"]

        train = create_batches(train, embedding_layer.map_to_ids, batch_size)

        dev = create_batches(dev, embedding_layer.map_to_ids, batch_size)

        if test is not None:
            test = create_batches(test, embedding_layer.map_to_ids, batch_size)

        cost = T.sum(self.nll) / self.idxs.shape[1]
        updates, lr, gnorm = create_optimization_updates(
                cost = cost,
                params = self.params,
                lr = args["learning_rate"],
                beta1 = args["beta1"],
                beta2 = args["beta2"],
                rho = args["rho"],
                momentum = args["momentum"],
                gamma = args["gamma"],
                eps = args["eps"],
                method = args["learning"]
            )[:3]
        #if args["learning"] == "adadelta":
        #    lr.set_value(args["learning_rate"])

        train_func = theano.function(
                inputs = [ self.idxs, self.idys, self.init_state ],
                outputs = [cost, self.last_state, gnorm ],
                updates = updates
            )
        eval_func = theano.function(
                inputs = [ self.idxs, self.idys, self.init_state ],
                outputs = [self.nll, self.last_state ]
            )

        N = (len(train[0])-1)/unroll_size + 1
        say(" train: {} tokens, {} mini-batches\n".format(
                len(train[0].ravel()), N
            ))
        say(" dev: {} tokens\n".format(len(dev[0].ravel())))

        say("\tp_norm: {}\n".format(
                self.get_pnorm_stat()
            ))

        decay_lr = args["decay_lr"] and args["learning"].lower() != "adadelta" and \
                    args["learning"].lower() != "adagrad"
        lr_0 = args["learning_rate"]
        iter_cnt = 0

        unchanged = 0
        best_dev = 1e+10
        start_time = 0
        max_epoch = args["max_epoch"]
        for epoch in xrange(max_epoch):
            if unchanged > 5: break
            start_time = time.time()

            prev_state = np.zeros((batch_size, self.n_d*2),
                            dtype=theano.config.floatX)

            train_loss = 0.0
            for i in xrange(N):
                # get current batch
                x = train[0][i*unroll_size:(i+1)*unroll_size]
                y = train[1][i*unroll_size:(i+1)*unroll_size]

                iter_cnt += 1
                if decay_lr:
                    lr.set_value(np.float32(lr_0/iter_cnt**0.5))
                cur_loss, prev_state, grad_norm = train_func(x, y, prev_state)
                train_loss += cur_loss/len(x)

                if math.isnan(cur_loss) or math.isnan(grad_norm):
                    say("\nNaN !!\n")
                    return

                if i % 10 == 0:
                    say("\r{}".format(i))

                if i == N-1:
                    self.dropout.set_value(0.0)
                    dev_preds = self.evaluate(eval_func, dev, batch_size, unroll_size)
                    dev_loss = evaluate_average(
                            predictions = dev_preds,
                            masks = None
                        )
                    dev_ppl = np.exp(dev_loss)
                    self.dropout.set_value(dropout_prob)

                    say("\r\n")
                    say( ( "Epoch={}  lr={:.3f}  train_loss={:.3f}  train_ppl={:.1f}  " \
                        +"dev_loss={:.3f}  dev_ppl={:.1f}\t|g|={:.3f}\t[{:.1f}m]\n" ).format(
                            epoch,
                            float(lr.get_value(borrow=True)),
                            train_loss/N,
                            np.exp(train_loss/N),
                            dev_loss,
                            dev_ppl,
                            float(grad_norm),
                            (time.time()-start_time)/60.0
                        ))
                    say("\tp_norm: {}\n".format(
                            self.get_pnorm_stat()
                        ))

                    # halve the learning rate
                    #if args["learning"] == "sgd" and dev_ppl > best_dev-1:
                    #    lr.set_value(np.max([lr.get_value()/2.0, np.float32(0.0001)]))

                    if dev_ppl < best_dev:
                        best_dev = dev_ppl
                        if test is None: continue
                        self.dropout.set_value(0.0)
                        test_preds = self.evaluate(eval_func, test, batch_size, unroll_size)
                        test_loss = evaluate_average(
                                predictions = test_preds,
                                masks = None
                            )
                        test_ppl = np.exp(test_loss)
                        self.dropout.set_value(dropout_prob)
                        say("\tbest_dev={:.1f}  test_loss={:.3f}  test_ppl={:.1f}\n".format(
                                best_dev, test_loss, test_ppl))
                    if best_dev > 200: unchanged += 1

        say("\n")
Example #45
0
 def print_norms(self, lst=None):
     if lst is None: lst = self.params
     say("\t{}\n".format(
             [ "{:.4f}".format(np.linalg.norm(p.get_value(borrow=True))) \
                     for p in lst ]
         ))
Example #46
0
    def ready(self, args, train):
        # len * batch
        self.idxs = T.imatrix()
        self.idys = T.imatrix()
        self.init_state = T.matrix(dtype=theano.config.floatX)

        dropout_prob = np.float64(args["dropout"]).astype(theano.config.floatX)
        self.dropout = theano.shared(dropout_prob)

        self.n_d = args["hidden_dim"]

        embedding_layer = EmbeddingLayer(
                n_d = self.n_d,
                vocab = set(w for w in train)
            )
        self.n_V = embedding_layer.n_V

        say("Vocab size: {}\tHidden dim: {}\n".format(
                self.n_V, self.n_d
            ))

        activation = get_activation_by_name(args["activation"])

        rnn_layer = LSTM(
                 n_in = self.n_d,
                 n_out = self.n_d,
                 activation = activation
            )

        output_layer = Layer(
                n_in = self.n_d,
                n_out = self.n_V,
                activation = T.nnet.softmax,
            )

        # (len*batch) * n_d
        x_flat = embedding_layer.forward(self.idxs.ravel())

        # len * batch * n_d
        x = apply_dropout(x_flat, self.dropout)
        x = x.reshape( (self.idxs.shape[0], self.idxs.shape[1], self.n_d) )

        # len * batch * (n_d+n_d)
        h = rnn_layer.forward_all(x, self.init_state, return_c=True)

        self.last_state = h[-1]
        h = h[:,:,self.n_d:]
        h = apply_dropout(h, self.dropout)

        self.p_y_given_x = output_layer.forward(h.reshape(x_flat.shape))

        idys = self.idys.ravel()
        self.nll = -T.log(self.p_y_given_x[T.arange(idys.shape[0]), idys])
        #self.nll = T.nnet.categorical_crossentropy(
        #                self.p_y_given_x,
        #                idys
        #            )

        self.layers = [ embedding_layer, rnn_layer, output_layer ]
        #self.params = [ x_flat ] + rnn_layer.params + output_layer.params
        self.params = embedding_layer.params + rnn_layer.params + output_layer.params
        self.num_params = sum(len(x.get_value(borrow=True).ravel())
                                for l in self.layers for x in l.params)
        say("# of params in total: {}\n".format(self.num_params))
Example #47
0
    def train(self, train, dev, test):
        args = self.args
        trainx, trainy = train
        batch_size = args.batch

        if dev:
            dev_batches_x, dev_batches_y = create_batches(
                    range(len(dev[0])),
                    dev[0],
                    dev[1],
                    batch_size
            )

        if test:
            test_batches_x, test_batches_y = create_batches(
                    range(len(test[0])),
                    test[0],
                    test[1],
                    batch_size
            )

        cost = self.nll_loss + self.l2_sqr

        updates, lr, gnorm = create_optimization_updates(
                cost = cost,
                params = self.params,
                lr = args.learning_rate,
                method = args.learning
            )[:3]

        train_model = theano.function(
             inputs = [self.x, self.y],
             outputs = [ cost, gnorm ],
             updates = updates,
             allow_input_downcast = True
        )

        eval_acc = theano.function(
             inputs = [self.x],
             outputs = self.pred,
             allow_input_downcast = True
        )

        unchanged = 0
        best_dev = 0.0
        dropout_prob = np.float64(args.dropout_rate).astype(theano.config.floatX)

        start_time = time.time()
        eval_period = args.eval_period

        perm = range(len(trainx))

        say(str([ "%.2f" % np.linalg.norm(x.get_value(borrow=True)) for x in self.params ])+"\n")
        for epoch in xrange(args.max_epochs):
            unchanged += 1
            if unchanged > 20: return
            train_loss = 0.0

            random.shuffle(perm)
            batches_x, batches_y = create_batches(perm, trainx, trainy, batch_size)

            N = len(batches_x)
            for i in xrange(N):

                if i % 100 == 0:
                    sys.stdout.write("\r%d" % i)
                    sys.stdout.flush()

                x = batches_x[i]
                y = batches_y[i]

                va, grad_norm = train_model(x, y)
                train_loss += va

                # debug
                if math.isnan(va):
                    print ""
                    print i-1, i
                    print x
                    print y
                    return

                if (i == N-1) or (eval_period > 0 and (i+1) % eval_period == 0):
                    self.dropout.set_value(0.0)

                    say( "\n" )
                    say( "Epoch %.1f\tloss=%.4f\t|g|=%s  [%.2fm]\n" % (
                            epoch + (i+1)/(N+0.0),
                            train_loss / (i+1),
                            float(grad_norm),
                            (time.time()-start_time) / 60.0
                    ))
                    say(str([ "%.2f" % np.linalg.norm(x.get_value(borrow=True)) for x in self.params ])+"\n")

                    if dev:
                        preds = [ eval_acc(x) for x in dev_batches_x ]
                        nowf_dev = self.eval_accuracy(preds, dev_batches_y)
                        if nowf_dev > best_dev:
                            unchanged = 0
                            best_dev = nowf_dev
                            if args.save:
                                self.save_model(args.save, args)

                        say("\tdev accuracy=%.4f\tbest=%.4f\n" % (
                                nowf_dev,
                                best_dev
                        ))
                        if args.test and nowf_dev == best_dev:
                            preds = [ eval_acc(x) for x in test_batches_x ]
                            nowf_test = self.eval_accuracy(preds, test_batches_y)
                            say("\ttest accuracy=%.4f\n" % (
                                    nowf_test,
                            ))

                        if best_dev > nowf_dev + 0.05:
                            return

                    self.dropout.set_value(dropout_prob)

                    start_time = time.time()
Example #48
0
 def finish_movement():
     troop.position = new_position
     say(player_name, "The troop %s is now at position %s." % (troop_name, new_position))
Example #49
0
    def ready(self):
        generator = self.generator
        embedding_layer = self.embedding_layer
        args = self.args
        padding_id = embedding_layer.vocab_map["<padding>"]
        unk_id = embedding_layer.vocab_map["<unk>"]
        unk_vec = embedding_layer.embeddings[unk_id]

        dropout = generator.dropout

        # len*batch
        x = generator.x
        z = generator.z_pred
        z = z.dimshuffle((0,1,"x"))

        # batch*nclasses
        y = self.y = T.fmatrix()

        n_d = args.hidden_dimension
        n_e = embedding_layer.n_d
        activation = get_activation_by_name(args.activation)

        layers = self.layers = [ ]
        depth = args.depth
        layer_type = args.layer.lower()
        for i in xrange(depth):
            l = CNN(
                    n_in = n_e if i == 0 else n_d,
                    n_out = n_d,
                    activation = activation,
                    order = args.order
                )
            layers.append(l)

        # len * batch * 1
        masks = T.cast(T.neq(x, padding_id).dimshuffle((0,1,"x")) * z, theano.config.floatX)
        # batch * 1
        cnt_non_padding = T.sum(masks, axis=0) + 1e-8

        # len*batch*n_e
        embs = generator.word_embs*z + unk_vec.dimshuffle(('x','x',0))*(1-z)

        pooling = args.pooling
        lst_states = [ ]
        h_prev = embs
        for l in layers:
            # len*batch*n_d
            h_next = l.forward_all(h_prev)
            if pooling:
                # batch * n_d
                masked_sum = T.sum(h_next * masks, axis=0)
                lst_states.append(masked_sum/cnt_non_padding) # mean pooling
            else:
                lst_states.append(T.max(h_next, axis=0))
            h_prev = apply_dropout(h_next, dropout)

        if args.use_all:
            size = depth * n_d
            # batch * size (i.e. n_d*depth)
            h_final = T.concatenate(lst_states, axis=1)
        else:
            size = n_d
            h_final = lst_states[-1]
        h_final = apply_dropout(h_final, dropout)

        output_layer = self.output_layer = Layer(
                n_in = size,
                n_out = self.nclasses,
                activation = sigmoid
            )

        # batch * nclasses
        p_y_given_x = self.p_y_given_x = output_layer.forward(h_final)
        preds = self.preds = p_y_given_x > 0.5
        print preds, preds.dtype
        print self.nclasses

        # batch
        loss_mat = T.nnet.binary_crossentropy(p_y_given_x, y)

        if args.aspect < 0:
            loss_vec = T.mean(loss_mat, axis=1)
        else:
            assert args.aspect < self.nclasses
            loss_vec = loss_mat[:,args.aspect]
        self.loss_vec = loss_vec

        self.true_pos = T.sum(preds*y)
        self.tot_pos = T.sum(preds)
        self.tot_true = T.sum(y)

        zsum = generator.zsum
        zdiff = generator.zdiff
        logpz = generator.logpz

        coherent_factor = args.sparsity * args.coherent
        loss = self.loss = T.mean(loss_vec)
        sparsity_cost = self.sparsity_cost = T.mean(zsum) * args.sparsity + \
                                             T.mean(zdiff) * coherent_factor
        cost_vec = loss_vec + zsum * args.sparsity + zdiff * coherent_factor
        cost_logpz = T.mean(cost_vec * T.sum(logpz, axis=0))
        self.obj = T.mean(cost_vec)

        params = self.params = [ ]
        for l in layers + [ output_layer ]:
            for p in l.params:
                params.append(p)
        if not args.fix_emb:
            params += embedding_layer.params
        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                                        for x in params)
        say("total # parameters: {}\n".format(nparams))

        l2_cost = None
        for p in params:
            if l2_cost is None:
                l2_cost = T.sum(p**2)
            else:
                l2_cost = l2_cost + T.sum(p**2)
        l2_cost = l2_cost * args.l2_reg
        self.l2_cost = l2_cost

        self.cost_g = cost_logpz + generator.l2_cost
        self.cost_e = loss + l2_cost
Example #50
0
# md5 can masquerade as hashlib for my purposes.
try:
    import hashlib
except ImportError:
    import md5 as hashlib

# 3rd party modules
import posix_ipc

# Utils for this demo
import utils


PY_MAJOR_VERSION = sys.version_info[0]

utils.say("Oooo 'ello, I'm Mrs. Conclusion!")

params = utils.read_params()

# Mrs. Premise has already created the semaphore and shared memory. 
# I just need to get handles to them.
memory = posix_ipc.SharedMemory(params["SHARED_MEMORY_NAME"])
semaphore = posix_ipc.Semaphore(params["SEMAPHORE_NAME"])

# MMap the shared memory
mapfile = mmap.mmap(memory.fd, memory.size)

# Once I've mmapped the file descriptor, I can close it without 
# interfering with the mmap. This also demonstrates that os.close() is a
# perfectly legitimate alternative to the SharedMemory's close_fd() method.
os.close(memory.fd)
Example #51
0
    def ready(self):
        embedding_layer = self.embedding_layer
        args = self.args
        padding_id = embedding_layer.vocab_map["<padding>"]

        dropout = self.dropout = theano.shared(
                np.float64(args.dropout).astype(theano.config.floatX)
            )

        # len*batch
        x = self.x = T.imatrix()

        n_d = args.hidden_dimension
        n_e = embedding_layer.n_d
        activation = get_activation_by_name(args.activation)

        layers = self.layers = [ ]
        layer_type = args.layer.lower()
        for i in xrange(1):
            l = CNN(
                    n_in = n_e,
                    n_out = n_d,
                    activation = activation,
                    order = args.order
                )
            layers.append(l)

        # len * batch
        masks = T.cast(T.neq(x, padding_id), "int8").dimshuffle((0,1,'x'))

        # (len*batch)*n_e
        embs = embedding_layer.forward(x.ravel())
        # len*batch*n_e
        embs = embs.reshape((x.shape[0], x.shape[1], n_e))
        embs = apply_dropout(embs, dropout)
        self.word_embs = embs

        # len*bacth*n_d
        h1 = layers[0].forward_all(embs)
        h_final = h1
        size = n_d
        h_final = apply_dropout(h_final, dropout)

        output_layer = self.output_layer = Layer(
                n_in = size,
                n_out = 1,
                activation = sigmoid
            )

        # len*batch*1
        probs = output_layer.forward(h_final)

        # len*batch
        self.MRG_rng = MRG_RandomStreams()
        z_pred_dim3 = self.MRG_rng.binomial(size=probs.shape, p=probs, dtype="int8")
        z_pred = z_pred_dim3.reshape(x.shape)

        # we are computing approximated gradient by sampling z;
        # so should mark sampled z not part of the gradient propagation path
        #
        z_pred = self.z_pred = theano.gradient.disconnected_grad(z_pred)
        print "z_pred", z_pred.ndim

        #logpz = - T.nnet.binary_crossentropy(probs, z_pred_dim3) * masks
        logpz = - T.nnet.binary_crossentropy(probs, z_pred_dim3)
        logpz = self.logpz = logpz.reshape(x.shape)
        probs = self.probs = probs.reshape(x.shape)

        # batch
        z = z_pred
        self.zsum = T.sum(z, axis=0, dtype=theano.config.floatX)
        self.zdiff = T.sum(T.abs_(z[1:]-z[:-1]), axis=0, dtype=theano.config.floatX)

        params = self.params = [ ]
        for l in layers + [ output_layer ]:
            for p in l.params:
                params.append(p)
        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                                        for x in params)
        say("total # parameters: {}\n".format(nparams))

        l2_cost = None
        for p in params:
            if l2_cost is None:
                l2_cost = T.sum(p**2)
            else:
                l2_cost = l2_cost + T.sum(p**2)
        l2_cost = l2_cost * args.l2_reg
        self.l2_cost = l2_cost
Example #52
0
try:
    import hashlib
except ImportError:
    import md5 as hashlib

# 3rd party modules
import sysv_ipc

# Utils for this demo
import utils
if PY_MAJOR_VERSION > 2:
    import utils_for_3 as flex_utils
else:
    import utils_for_2 as flex_utils

utils.say("Oooo 'ello, I'm Mrs. Premise!")

params = utils.read_params()

# Create the semaphore & shared memory. I read somewhere that semaphores 
# and shared memory have separate key spaces, so one can safely use the 
# same key for each. This seems to be true in my experience.

# For purposes of simplicity, this demo code makes no allowance for the 
# failure of the semaphore or memory constructors. This is unrealistic 
# because one can never predict whether or not a given key will be available,
# so your code must *always* be prepared for these functions to fail. 

semaphore = sysv_ipc.Semaphore(params["KEY"], sysv_ipc.IPC_CREX)
memory = sysv_ipc.SharedMemory(params["KEY"], sysv_ipc.IPC_CREX)
Example #53
0
    def ready(self):
        encoder = self.encoder
        embedding_layer = self.embedding_layer
        args = self.args
        padding_id = embedding_layer.vocab_map["<padding>"]

        dropout = self.dropout = encoder.dropout

        # len*batch
        x = self.x = encoder.x
        z = self.z = encoder.z

        n_d = args.hidden_dimension
        n_e = embedding_layer.n_d
        activation = get_activation_by_name(args.activation)

        layers = self.layers = [ ]
        layer_type = args.layer.lower()
        for i in xrange(2):
            if layer_type == "rcnn":
                l = RCNN(
                        n_in = n_e,# if i == 0 else n_d,
                        n_out = n_d,
                        activation = activation,
                        order = args.order
                    )
            elif layer_type == "lstm":
                l = LSTM(
                        n_in = n_e,# if i == 0 else n_d,
                        n_out = n_d,
                        activation = activation
                    )
            layers.append(l)

        # len * batch
        #masks = T.cast(T.neq(x, padding_id), theano.config.floatX)
        masks = T.cast(T.neq(x, padding_id), "int8").dimshuffle((0,1,"x"))

        # (len*batch)*n_e
        embs = embedding_layer.forward(x.ravel())
        # len*batch*n_e
        embs = embs.reshape((x.shape[0], x.shape[1], n_e))
        embs = apply_dropout(embs, dropout)

        flipped_embs = embs[::-1]

        # len*bacth*n_d
        h1 = layers[0].forward_all(embs)
        h2 = layers[1].forward_all(flipped_embs)
        h_final = T.concatenate([h1, h2[::-1]], axis=2)
        h_final = apply_dropout(h_final, dropout)
        size = n_d * 2

        output_layer = self.output_layer = Layer(
                n_in = size,
                n_out = 1,
                activation = sigmoid
            )

        # len*batch*1
        probs = output_layer.forward(h_final)

        # len*batch
        probs2 = probs.reshape(x.shape)
        self.MRG_rng = MRG_RandomStreams()
        z_pred = self.z_pred = T.cast(self.MRG_rng.binomial(size=probs2.shape, p=probs2), "int8")

        # we are computing approximated gradient by sampling z;
        # so should mark sampled z not part of the gradient propagation path
        #
        self.z_pred = theano.gradient.disconnected_grad(z_pred)

        z2 = z.dimshuffle((0,1,"x"))
        logpz = - T.nnet.binary_crossentropy(probs, z2) * masks
        logpz = self.logpz = logpz.reshape(x.shape)
        probs = self.probs = probs.reshape(x.shape)

        # batch
        zsum = T.sum(z, axis=0, dtype=theano.config.floatX)
        zdiff = T.sum(T.abs_(z[1:]-z[:-1]), axis=0, dtype=theano.config.floatX)

        loss_mat = encoder.loss_mat
        if args.aspect < 0:
            loss_vec = T.mean(loss_mat, axis=1)
        else:
            assert args.aspect < self.nclasses
            loss_vec = loss_mat[:,args.aspect]
        self.loss_vec = loss_vec

        coherent_factor = args.sparsity * args.coherent
        loss = self.loss = T.mean(loss_vec)
        sparsity_cost = self.sparsity_cost = T.mean(zsum) * args.sparsity + \
                                             T.mean(zdiff) * coherent_factor
        cost_vec = loss_vec + zsum * args.sparsity + zdiff * coherent_factor
        cost_logpz = T.mean(cost_vec * T.sum(logpz, axis=0))
        self.obj = T.mean(cost_vec)

        params = self.params = [ ]
        for l in layers + [ output_layer ]:
            for p in l.params:
                params.append(p)
        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                                        for x in params)
        say("total # parameters: {}\n".format(nparams))

        l2_cost = None
        for p in params:
            if l2_cost is None:
                l2_cost = T.sum(p**2)
            else:
                l2_cost = l2_cost + T.sum(p**2)
        l2_cost = l2_cost * args.l2_reg

        cost = self.cost = cost_logpz * 10 + l2_cost
        print "cost.dtype", cost.dtype

        self.cost_e = loss * 10 + encoder.l2_cost
Example #54
0
# hashlib is only available in Python >= 2.5. I still want to support 
# older Pythons so I import md5 if hashlib is not available. Fortunately
# md5 can masquerade as hashlib for my purposes.
try:
    import hashlib
except ImportError:
    import md5 as hashlib

# 3rd party modules
import posix_ipc

# Utils for this demo
import utils


utils.say("Oooo 'ello, I'm Mrs. Premise!")

params = utils.read_params()

# Create the message queue.
mq = posix_ipc.MessageQueue(params["MESSAGE_QUEUE_NAME"], posix_ipc.O_CREX)

# The first message is a random string (the current time).
s = time.asctime()
utils.say("Sending %s" % s)
mq.send(s)
what_i_sent = s

for i in range(0, params["ITERATIONS"]):
    utils.say("iteration %d" % i)
    
Example #55
0
PY_MAJOR_VERSION = sys.version_info[0]
# hashlib is only available in Python >= 2.5. I still want to support
# older Pythons so I import md5 if hashlib is not available. Fortunately
# md5 can masquerade as hashlib for my purposes.
try:
    import hashlib
except ImportError:
    import md5 as hashlib

# 3rd party modules
import posix_ipc

# Utils for this demo
import utils

utils.say("Oooo 'ello, I'm Mrs. Conclusion!")

params = utils.read_params()

# Mrs. Premise has already created the message queue. I just need a handle
# to it.
mq = posix_ipc.MessageQueue(params["MESSAGE_QUEUE_NAME"])

what_i_sent = ""

for i in range(0, params["ITERATIONS"]):
    utils.say("iteration %d" % i)

    s, _ = mq.receive()
    s = s.decode()
    utils.say("Received %s" % s)
Example #56
0
try:
    import hashlib
except ImportError:
    import md5 as hashlib

# 3rd party modules
import sysv_ipc

# Utils for this demo
import utils
if PY_MAJOR_VERSION > 2:
    import utils_for_3 as flex_utils
else:
    import utils_for_2 as flex_utils

utils.say("Oooo 'ello, I'm Mrs. Conclusion!")

params = utils.read_params()

semaphore = sysv_ipc.Semaphore(params["KEY"])
memory = sysv_ipc.SharedMemory(params["KEY"])

utils.say("memory attached at %d" % memory.address)

what_i_wrote = ""
s = ""

for i in range(0, params["ITERATIONS"]):
    utils.say("i = %d" % i)
    if not params["LIVE_DANGEROUSLY"]:
        # Wait for Mrs. Premise to free up the semaphore.