コード例 #1
0
 def train(data, label, epoch):
     assert data.shape[0] == label.shape[0]
     datasize = label.shape[0]
     indexes = np.random.permutation(datasize)
     updatetime = int(datasize // config.classifier_batch_size) + 1
     total_loss = 0
     if config.show_progress:
         bar = ProgressBar('Classifier Train epoch {} / {}'.format(
             epoch, config.classifier_epochs),
                           max=updatetime)
     for i in range(updatetime):
         pos = i * config.classifier_batch_size
         ids = indexes[pos:(pos + config.classifier_batch_size) if (
             pos + config.classifier_batch_size) < datasize else datasize]
         current_batch_size = len(ids)
         batch_data = Variable(torch.from_numpy(data[ids]))
         batch_label = Variable(torch.from_numpy(label[ids]))
         if config.cuda:
             batch_data = batch_data.cuda()
             batch_label = batch_label.cuda()
         output = model(batch_data)
         assert output.size(0) == current_batch_size
         loss = F.cross_entropy(output, batch_label)
         total_loss += loss.data
         optimizer.zero_grad()
         loss.backward()
         nn.utils.clip_grad_norm(model.parameters(),
                                 config.classifier_grad_norm_clip)
         optimizer.step()
         if config.show_progress:
             bar.next()
     if config.show_progress:
         bar.finish()
     return total_loss[0]
コード例 #2
0
 def fit(self, train_df, regressors=None):
     print("Fitting...")
     progress_bar = ProgressBar(len(train_df.columns))
     for item in train_df.columns:
         self.models[item] = Prophet(
             yearly_seasonality=self.yearly_seasonality,
             weekly_seasonality=self.weekly_seasonality,
             daily_seasonality=self.daily_seasonality,
             **self.prophet_config)
         target = train_df[item].dropna()
         if self.use_boxcox:
             idx = target.index
             target, self.lmbda_boxcox[item] = boxcox(target)
             target = pd.Series(target, index=idx)
         target.index.name = "ds"
         target.name = "y"
         if self.country_holidays is not None:
             self.models[item].add_country_holidays(country_name=self.country_holidays)
         if regressors is not None:
             target = pd.merge(target, regressors, left_index=True, right_index=True, how="left")
             for reg in regressors.columns:
                 self.models[item].add_regressor(reg)
         target = target.reset_index()
         self.models[item].fit(target)
         progress_bar.update()
     progress_bar.finish()
     return self.models
コード例 #3
0
    def train(self):
        avg_G_loss = 0
        avg_D_loss = 0
        avg_Q_loss = 0
        iterations = int(self.mnist.train.num_examples / self.batch_size)

        if self.show_progress:
            bar = ProgressBar('Train', max=iterations)

        for i in range(iterations):

            if self.show_progress:
                bar.next()

            batch_xs, _ = self.mnist.train.next_batch(self.batch_size)
            feed_dict = {self.X: batch_xs, \
                         self.z: self.z_sampler(self.batch_size), \
                         self.c_i: self.c_cat_sampler(self.batch_size), \
                         self.c_j: self.c_cont_sampler(self.batch_size), \
                         self.training: True}

            for _ in range(self.d_update):
                _, D_loss = self.sess.run([self.D_optim, self.D_loss], feed_dict=feed_dict)
            _, G_loss = self.sess.run([self.G_optim, self.G_loss], feed_dict=feed_dict)
            _, Q_loss = self.sess.run([self.Q_optim, self.Q_loss], feed_dict=feed_dict)

            avg_G_loss += G_loss / iterations
            avg_D_loss += D_loss / iterations
            avg_Q_loss += Q_loss / iterations

        if self.show_progress:
            bar.finish()

        return avg_G_loss, avg_D_loss, avg_Q_loss
コード例 #4
0
ファイル: nnlm.py プロジェクト: shaoyx/learn
    def train(self, data):
        merged = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(self.logdir + "/train", self.sess.graph)

        N = int(math.ceil(len(data) / self.batch_size))
        cost = 0

        x = np.ndarray([self.batch_size, self.win_size], dtype=np.float32)
        target = np.zeros([self.batch_size, self.nwords]) # one-hot-encoded

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar('Train', max=N)

        m = self.win_size;
        clean_data = np.concatenate((np.zeros(self.win_size, dtype=np.int32), data)) #padding head
        clean_data = np.concatenate((clean_data, np.zeros(self.batch_size, dtype=np.int32))) #padding tail
        for idx in xrange(N): #interations
            if self.show: bar.next()
            target.fill(0)
            for b in xrange(self.batch_size):
                target[b][clean_data[m]] = 1 #one-batch, one example
                x[b] = clean_data[m-self.win_size : m] # we need padding here!
                m += 1

            summary, _, loss = self.sess.run([merged, self.optim, self.loss], feed_dict={
                                                        self.input: x,
                                                        self.targets: target})
            cost += np.sum(loss)
            train_writer.add_summary(summary, idx)

        train_writer.close()
        if self.show: bar.finish()
        return cost / N 
コード例 #5
0
 def predict(self, steps):
     print("Forecasting...")
     progress_bar = ProgressBar(len(self.models.items()))
     self.fcst_ds = pd.date_range(
         start=self.train_ds.min(), 
         freq="D", 
         periods=len(self.train_ds)+steps)[-365:]
     for item, model in self.models.items():
         pred = model.predict(
             exogenous=fourier(
                 steps, 
                 seasonality=self.seasonality, 
                 n_terms=self.n_fourier_terms),
             n_periods=steps, 
             return_conf_int=True,
             alpha=(1.0 - self.confidence_interval))
         self.fcst[item] = pd.DataFrame(
             {"yhat":pred[0],
              "yhat_lower":pred[1][:,0],
              "yhat_upper":pred[1][:,1]},
             index=self.fcst_ds)
         if self.use_boxcox:
             self.fcst[item] = inv_boxcox(
                 self.fcst[item], 
                 self.lmbda_boxcox[item])
         progress_bar.update()
     progress_bar.finish()
     return pd.concat(self.fcst, axis=1)
コード例 #6
0
async def quote_many(num_quotes=1, conn_limit=20, progress=None, step=10):
    if progress is None:
        progress = ProgressBar()
        progress.max = num_quotes // step

    logger.info('Process total %d quotes with max %d concurrent connections'
                % (num_quotes, conn_limit))
    logger.debug('... progress bar increment step size: %d coroutines' % step)

    semaphore = asyncio.Semaphore(conn_limit)

    coro_to_fut = asyncio.ensure_future
    futures = [
        coro_to_fut(quote_with_lock(semaphore))
        for i in range(num_quotes)
    ]

    t_start = datetime.today()
    for ith, fut in enumerate(asyncio.as_completed(futures), 1):
        if ith % step == 0:
            progress.next()
        await fut
    t_end = datetime.today()
    progress.finish()

    logger.info('All coroutines complete in {:.2f} seconds'.format(
        (t_end - t_start).total_seconds()
    ))
    quotes = [fut.result() for fut in futures]
    return quotes
コード例 #7
0
ファイル: model.py プロジェクト: liangkai/MemN2N-tensorflow
    def train(self, data):
        N = int(math.ceil(len(data) / self.batch_size))
        cost = 0

        x = np.ndarray([self.batch_size, self.edim], dtype=np.float32)
        time = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        target = np.zeros([self.batch_size, self.nwords]) # one-hot-encoded
        context = np.ndarray([self.batch_size, self.mem_size])

        x.fill(self.init_hid)
        for t in xrange(self.mem_size):
            time[:,t].fill(t)

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar('Training', max=N)

        for idx in xrange(N):
            if self.show: bar.next()
            for b in xrange(self.batch_size):
                m = random.randrange(self.mem_size, len(data))
                target[b][data[m]] = 1
                context[b] = data[m - self.mem_size:m]

            loss, self.step = self.sess.run([self.loss,
                                             self.global_step],
                                             feed_dict={
                                                 self.input: x,
                                                 self.time: time,
                                                 self.target: target,
                                                 self.context: context})
            cost += loss

        if self.show: bar.finish()
        return cost/N/self.batch_size
コード例 #8
0
    def train(self, cnsldted_data):
        (data, trgt_aspect, trgt_Y, cntxt_mask) = cnsldted_data
        # the data above is context not input; trgt_aspect is input (x below)
        # the datastructs above assume consistent sequential storage
        N = int(math.ceil(len(data) / self.batch_size))
        # many sentences are repeated due to multiple aspects in a single
        # sentence.
        cost = 0
        accurate = 0

        #x = np.ndarray([self.batch_size, self.edim], dtype=np.float32)
        x = np.ndarray([self.batch_size], dtype=np.int32)
        # the above is input array
        target = np.zeros([self.batch_size, self.nlabels]) # one-hot-encoded
        context = np.ndarray([self.batch_size, self.mem_size])
        mask = np.ndarray([self.batch_size, self.mem_size])

        #x.fill(self.init_hid)
        #for t in xrange(self.mem_size):
        #    time[:,t].fill(t)

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar('Train', max=N)

        for idx in xrange(N):
            # wE arte doing sequenctial batching here.
            if self.show: bar.next()
            target.fill(0)
            curr_indx = idx*self.batch_size
            for b in xrange(self.batch_size):
                senti_labl = trgt_Y[curr_indx+b]
                target[b][self.labels_dict[senti_labl]] = 1
                context[b] = data[curr_indx+b]
                mask[b] = cntxt_mask[curr_indx+b]
                x[b] = trgt_aspect[curr_indx+b]
                #TODO: fill the x -- DONE
                # above assuming that each element of the list data is a list of
                # words in original sentence

            _, loss, self.step, num_accurate = self.sess.run([self.optim,
                                                self.loss,
                                                self.global_step,
                                                self.num_accurate],
                                                feed_dict={
                                                    self.input: x,
                                                    #self.time: time,
                                                    self.target: target,
                                                    self.context: context,
                                                    self.mask: mask})
            cost += np.sum(loss)
            accurate += num_accurate

        if self.show: bar.finish()
        return cost/N/self.batch_size, accurate*1./N/self.batch_size
コード例 #9
0
    def train(self, data):
        source_data, source_loc_data, target_data, target_label = data
        N = int(np.ceil(len(source_data) / self.batch_size))
        cost = 0
        if 'dl_elec.ckpt.meta' in listdir('./dl_model/'):
            restorer = tf.train.Saver()
            restorer.restore(self.sess, './dl_model/dl_elec.ckpt')
        else:
            x = np.ndarray([self.batch_size, 1], dtype=np.int32)
            time = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
            target = np.zeros([self.batch_size], dtype=np.int32)
            context = np.ndarray([self.batch_size, self.mem_size],
                                 dtype=np.int32)
            mask = np.ndarray([self.batch_size, self.mem_size])

            if self.show:
                from utils import ProgressBar
                bar = ProgressBar('Train', max=N)

            rand_idx, cur = np.random.permutation(len(source_data)), 0
            for idx in range(N):
                if self.show: bar.next()

                context.fill(self.pad_idx)
                time.fill(self.mem_size)
                target.fill(0)
                mask.fill(-1.0 * np.inf)

                for b in range(self.batch_size):
                    m = rand_idx[cur]
                    x[b][0] = target_data[m]
                    target[b] = target_label[m]
                    time[b, :len(source_loc_data[m])] = source_loc_data[m]
                    context[b, :len(source_data[m])] = source_data[m]
                    mask[b, :len(source_data[m])].fill(0)
                    cur = cur + 1

                z, a, loss, self.step = self.sess.run(
                    [self.z, self.optim, self.loss, self.global_step],
                    feed_dict={
                        self.input: x,
                        self.time: time,
                        self.target: target,
                        self.context: context,
                        self.mask: mask
                    })

                if idx % 500 == 0:
                    print("loss - ", loss)

                cost += np.sum(loss)

            if self.show: bar.finish()
        _, train_acc, train_prec, train_rec, train_f1 = self.test(data)
        return cost / N / self.batch_size, train_acc, train_prec, train_rec, train_f1
コード例 #10
0
    def our_test(self, data, word2idx, label='Test'):
        N = int(math.floor(len(data['answers']) / self.batch_size))
        cost = 0

        context = np.ndarray([self.batch_size, self.mem_size])
        time = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)

        x = np.zeros([self.batch_size, self.nwords],
                     dtype=np.float32)  # bag-of-word to encode a query
        target = np.zeros([self.batch_size, self.nwords])  # one-hot-encoded

        for t in xrange(self.mem_size):
            time[:, t].fill(t)

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar(label, max=N)

        random_perm = np.random.permutation(len(data['answers']))
        for idx in xrange(N):
            if self.show: bar.next()
            target.fill(0)
            x.fill(0)
            # constructing training examples for this batch
            for b in xrange(self.batch_size):
                # find which training example to use
                i = random_perm[idx * self.batch_size + b]

                # one-hot of target
                target[b][data['answers'][i]] = 1

                # context(only pick last part if len(context) is too long)
                raw_context = data['contexts'][i]
                raw_context = [word for sent in raw_context for word in sent]
                n_pick = min(self.mem_size, len(raw_context))
                context.fill(word2idx[''])
                context[b][:n_pick] = raw_context[-n_pick:]

                # x (bag-of-word of query)
                for word_id in data['querys'][i]:
                    x[b][word_id] += 1

            loss = self.sess.run(
                [self.loss],
                feed_dict={
                    self.input: x,
                    self.time: time,
                    self.target: target,
                    self.context: context
                })
            cost += np.sum(loss)

        if self.show: bar.finish()
        return cost / N / self.batch_size
コード例 #11
0
    def train(self, data):
        N = int(math.ceil(len(data) / self.batch_size))
        cost = 0

        x = np.ndarray([self.batch_size, self.edim], dtype=np.float32)
        time = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        target = np.zeros([self.batch_size, self.nwords])  # one-hot-encoded
        context = np.ndarray([self.batch_size, self.mem_size])

        x.fill(self.init_hid)
        # set value by column
        for t in xrange(self.mem_size):
            time[:, t].fill(t)

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar('Train', max=N)

        for idx in xrange(N):
            if self.show: bar.next()
            target.fill(0)
            for b in xrange(self.batch_size):
                m = random.randrange(self.mem_size, len(data))
                target[b][data[m]] = 1
                context[b] = data[m - self.mem_size:m]

            _, loss, self.step = self.sess.run(
                [self.optim, self.loss, self.global_step],
                feed_dict={
                    self.input: x,
                    self.time: time,
                    self.target: target,
                    self.context: context
                })

            cost += np.sum(loss)

            summary = self.sess.run(self.merged,
                                    feed_dict={
                                        self.input: x,
                                        self.time: time,
                                        self.target: target,
                                        self.context: context
                                    })
            self.writer.add_summary(summary,
                                    global_step=self.sess.run(
                                        self.global_step))

        if self.show: bar.finish()
        return cost / N / self.batch_size
コード例 #12
0
ファイル: mm_model.py プロジェクト: zwytop/deep_learning
    def train(self, data):
        n_batch = int(math.ceil(len(data) / self.batch_size))
        cost = 0

        u = np.ndarray([self.batch_size, self.edim],
                       dtype=np.float32)  # (N, 150) Will fill with 0.1
        T = np.ndarray([self.batch_size, self.mem_size],
                       dtype=np.int32)  # (N, 100) Will fill with 0..99
        target = np.zeros([self.batch_size, self.nwords])  # one-hot-encoded
        sentences = np.ndarray([self.batch_size, self.mem_size])

        u.fill(
            self.init_u
        )  # (N, 150) Fill with 0.1 since we do not need query in the language model.
        for t in range(
                self.mem_size
        ):  # (N, 100) 100 memory cell with 0 to 99 time sequence.
            T[:, t].fill(t)

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar('Train', max=n_batch)

        for idx in range(n_batch):
            if self.show:
                bar.next()
            target.fill(0)  # (128, 10,000)
            for b in range(self.batch_size):
                # We random pick a word in our data and use that as the word we need to predict using the language model.
                m = random.randrange(self.mem_size, len(data))
                target[b][data[
                    m]] = 1  # Set the one hot vector for the target word to 1

                # (N, 100). Say we pick word 1000, we then fill the memory using words 1000-150 ... 999
                # We fill Xi (sentence) with 1 single word according to the word order in data.
                sentences[b] = data[m - self.mem_size:m]

            _, loss, self.step = self.sess.run(
                [self.optim, self.loss, self.global_step],
                feed_dict={
                    self.u: u,
                    self.T: T,
                    self.target: target,
                    self.sentences: sentences
                })
            cost += np.sum(loss)

        if self.show:
            bar.finish()
        return cost / n_batch / self.batch_size
コード例 #13
0
    def train(self, data):
        source_data, source_loc_data, target_data, target_label = data
        N = int(math.ceil(len(source_data) / self.batch_size))
        cost = 0

        x = np.ndarray([self.batch_size, 1], dtype=np.int32)
        time = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        target = np.zeros([self.batch_size], dtype=np.int32)
        context = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        mask = np.ndarray([self.batch_size, self.mem_size])

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar('Train', max=N)

        rand_idx, cur = np.random.permutation(len(source_data)), 0
        for idx in xrange(N):
            if self.show: bar.next()

            context.fill(self.pad_idx)
            time.fill(self.mem_size)
            target.fill(0)
            mask.fill(-1.0 * np.inf)

            for b in xrange(self.batch_size):
                if cur >= len(rand_idx): break
                m = rand_idx[cur]
                x[b][0] = target_data[m]
                target[b] = target_label[m]
                time[b, :len(source_loc_data[m])] = source_loc_data[m]
                context[b, :len(source_data[m])] = source_data[m]
                mask[b, :len(source_data[m])].fill(0)
                cur = cur + 1

            z, _, loss, self.step = self.sess.run([self.z, self.optim,
                                                   self.loss,
                                                   self.global_step],
                                                  feed_dict={
                                                      self.input: x,
                                                      self.time: time,
                                                      self.target: target,
                                                      self.context: context,
                                                      self.mask: mask})

            cost += np.sum(loss)

        if self.show: bar.finish()
        _, train_acc, _, _, _, _ = self.test(data, True)
        return cost / N / self.batch_size, train_acc
コード例 #14
0
    def test(self, cnsldted_data, label='Test'):
        (data, trgt_aspect, trgt_Y, cntxt_mask) = cnsldted_data
        N = int(math.ceil(len(data) / self.batch_size)) - 1
        cost = 0
        accurate = 0

        x = np.ndarray([self.batch_size], dtype=np.int32)
        target = np.zeros([self.batch_size, self.nlabels])  # one-hot-encoded
        context = np.ndarray([self.batch_size, self.mem_size])
        mask = np.ndarray([self.batch_size, self.mem_size])

        #x.fill(self.init_hid)
        #for t in xrange(self.mem_size):
        #    time[:,t].fill(t)

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar(label, max=N)

        m = self.mem_size
        for idx in xrange(N):
            if self.show: bar.next()
            target.fill(0)
            curr_indx = idx * self.batch_size
            for b in xrange(self.batch_size):
                senti_labl = trgt_Y[curr_indx + b]
                target[b][self.labels_dict[senti_labl]] = 1
                context[b] = data[curr_indx + b]
                mask[b] = cntxt_mask[curr_indx + b]
                x[b] = trgt_aspect[curr_indx + b]
                # above assuming that each element of the list data is a list of
                # words in original sentence

            loss, num_accurate = self.sess.run(
                [self.loss, self.num_accurate],
                feed_dict={
                    self.input: x,
                    #self.time: time,
                    self.target: target,
                    self.context: context,
                    self.mask: mask
                })
            cost += np.sum(loss)
            accurate += num_accurate

        if self.show: bar.finish()
        return cost / N / self.batch_size, accurate * 1. / N / self.batch_size
コード例 #15
0
ファイル: rg.py プロジェクト: leVirve/ResearchGate.py
async def save_profiles(names):
    conn = aiohttp.TCPConnector(limit=50, verify_ssl=False)
    with aiohttp.ClientSession(connector=conn) as session:
        ps = [Profile(name, session) for name in names]
        futures = [asyncio.ensure_future(p.get_info()) for p in ps]
        futures += [asyncio.ensure_future(p.get_publications()) for p in ps]

        progress, step = ProgressBar(), 10
        progress.max = len(futures) // step

        for i, future in enumerate(asyncio.as_completed(futures), 1):
            if i % step == 0:
                progress.next()
            await future
        progress.finish()

    return [future.result() for future in futures]
コード例 #16
0
    def test(self, data, label='Test'):
        try:
            N = int(math.ceil(len(data) / self.batch_size))
            cost = 0

            x = np.ndarray([self.batch_size, self.edim], dtype=np.float32)
            time = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
            target = np.zeros([self.batch_size,
                               self.nwords])  # one-hot-encoded
            context = np.ndarray([self.batch_size, self.mem_size])

            x.fill(self.init_hid)
            for t in range(self.mem_size):
                time[:, t].fill(t)

            if self.show:
                from utils import ProgressBar
                bar = ProgressBar(label, max=N)

            m = self.mem_size
            for idx in range(N):
                if self.show: bar.next()
                target.fill(0)
                for b in range(self.batch_size):
                    target[b][data[m]] = 1
                    context[b] = data[m - self.mem_size:m]
                    m += 1

                    if m >= len(data):
                        m = self.mem_size

                loss = self.sess.run(
                    [self.loss],
                    feed_dict={
                        self.input: x,
                        self.time: time,
                        self.target: target,
                        self.context: context
                    })
                cost += np.sum(loss)

            if self.show: bar.finish()
            return cost / N / self.batch_size
        except Exception as e:
            print(e)
コード例 #17
0
ファイル: model.py プロジェクト: ZeweiChu/MemN2N-tensorflow
    def train(self, data):
        N = int(math.ceil(len(data) / self.batch_size))
        cost = 0

        x = np.ndarray([self.batch_size, self.edim], dtype=np.float32) # batch_size * internal_state_dimension
        time = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        target = np.zeros([self.batch_size, self.nwords]) # one-hot-encoded
        context = np.ndarray([self.batch_size, self.mem_size]) # 128 * 100

        x.fill(self.init_hid)
        for t in range(self.mem_size):
            time[:,t].fill(t)
        '''
        time = array([[ 0,  1,  2, ..., 97, 98, 99], 
        ..., 
       	[ 0,  1,  2, ..., 97, 98, 99]], dtype=int32) 128 * 100
        '''

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar('Train', max=N)

        for idx in range(N):
            if self.show: bar.next()
            target.fill(0)
            for b in range(self.batch_size):
                # generate a randome number for 100 to the length of data
                m = random.randrange(self.mem_size, len(data))
                # for this batch b, the target data[m] is set to be one
                target[b][data[m]] = 1
                # the context is range from (m - self.mem_size) to m
                context[b] = data[m - self.mem_size:m]

            _, loss, self.step = self.sess.run([self.optim,
                                                self.loss,
                                                self.global_step],
                                                feed_dict={
                                                    self.input: x,
                                                    self.time: time,
                                                    self.target: target,
                                                    self.context: context})
            cost += np.sum(loss)

        if self.show: bar.finish()
        return cost/N/self.batch_size
コード例 #18
0
 def predict(self, steps, freq="D", regressors=None):
     print("Forecasting...")
     progress_bar = ProgressBar(len(self.models.items()))
     for item, model in self.models.items():
         future = model.make_future_dataframe(steps, freq=freq).set_index("ds")
         if regressors is not None:
             future = pd.merge(future, regressors, left_index=True, right_index=True, how="left")
         pred = model.predict(future.reset_index()).set_index("ds")
         pred = pred[["yhat", "yhat_lower", "yhat_upper"]]
         self.fcst[item] = pred
         if self.use_boxcox:
             self.fcst[item] = inv_boxcox(
                 self.fcst[item], 
                 self.lmbda_boxcox[item])
         progress_bar.update()
     progress_bar.finish()
     fcst_df = pd.concat(self.fcst, axis=1).sort_index(axis=1)
     return fcst_df
コード例 #19
0
ファイル: mm_model.py プロジェクト: lzqkean/deep_learning
    def train(self, data):
        n_batch = int(math.ceil(len(data) / self.batch_size))
        cost = 0

        u = np.ndarray([self.batch_size, self.edim], dtype=np.float32)      # (N, 150) Will fill with 0.1
        T = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)    # (N, 100) Will fill with 0..99
        target = np.zeros([self.batch_size, self.nwords])                   # one-hot-encoded
        sentences = np.ndarray([self.batch_size, self.mem_size])

        u.fill(self.init_u)   # (N, 150) Fill with 0.1 since we do not need query in the language model.
        for t in range(self.mem_size):   # (N, 100) 100 memory cell with 0 to 99 time sequence.
            T[:,t].fill(t)

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar('Train', max=n_batch)

        for idx in range(n_batch):
            if self.show:
                bar.next()
            target.fill(0)      # (128, 10,000)
            for b in range(self.batch_size):
                # We random pick a word in our data and use that as the word we need to predict using the language model.
                m = random.randrange(self.mem_size, len(data))
                target[b][data[m]] = 1                       # Set the one hot vector for the target word to 1

                # (N, 100). Say we pick word 1000, we then fill the memory using words 1000-150 ... 999
                # We fill Xi (sentence) with 1 single word according to the word order in data.
                sentences[b] = data[m - self.mem_size:m]

            _, loss, self.step = self.sess.run([self.optim,
                                                self.loss,
                                                self.global_step],
                                                feed_dict={
                                                    self.u: u,
                                                    self.T: T,
                                                    self.target: target,
                                                    self.sentences: sentences})
            cost += np.sum(loss)

        if self.show:
            bar.finish()
        return cost/n_batch/self.batch_size
コード例 #20
0
ファイル: mm_model.py プロジェクト: zwytop/deep_learning
    def test(self, data, label='Test'):
        n_batch = int(math.ceil(len(data) / self.batch_size))
        cost = 0

        u = np.ndarray([self.batch_size, self.edim], dtype=np.float32)
        T = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        target = np.zeros([self.batch_size, self.nwords])  # one-hot-encoded
        sentences = np.ndarray([self.batch_size, self.mem_size])

        u.fill(self.init_u)
        for t in range(self.mem_size):
            T[:, t].fill(t)

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar(label, max=n_batch)

        m = self.mem_size
        for idx in range(n_batch):
            if self.show:
                bar.next()
            target.fill(0)
            for b in range(self.batch_size):
                target[b][data[m]] = 1
                sentences[b] = data[m - self.mem_size:m]
                m += 1

                if m >= len(data):
                    m = self.mem_size

            loss = self.sess.run(
                [self.loss],
                feed_dict={
                    self.u: u,
                    self.T: T,
                    self.target: target,
                    self.sentences: sentences
                })
            cost += np.sum(loss)

        if self.show:
            bar.finish()
        return cost / n_batch / self.batch_size
コード例 #21
0
ファイル: main.py プロジェクト: fanfeifan/MemN2N-Mxnet-Gluon
def train(model, trainer, data, label):
    softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
    # data_iter = PTBDataIter(data,
    #                       nwords=args.nwords,
    #                       batch_size=args.batch_size,
    #                       edim=args.edim,
    #                       mem_size=args.mem_size,
    #                       init_hid=args.init_hid,
    #                       is_test_data=False)
    N = int(math.ceil(len(data) / args.batch_size))
    cost = 0.0
    if args.show:
        from utils import ProgressBar
        bar = ProgressBar(label, max=N)
    x = nd.zeros((args.batch_size, args.edim),ctx=ctx)
    x[:,:] = args.init_hid
    time = nd.zeros((args.batch_size, args.mem_size),ctx=ctx)
    for t in xrange(args.mem_size):
        time[:,t] = t
    target = nd.zeros((args.batch_size,),ctx=ctx)
    context = nd.zeros((args.batch_size, args.mem_size),ctx=ctx)
    for idx in range(N):
        if args.show: bar.next()
        target[:] = 0
        for b in xrange(args.batch_size):
            m = random.randrange(args.mem_size, len(data))
            target[b] = data[m]
            context[b] = data[m - args.mem_size:m]
    #for batch in data_iter:
        #if args.show: bar.next()
        with autograd.record():
            out = model(x, time, context)
            loss = softmax_cross_entropy(out, target)
            loss.backward()
            
        grads = [i.grad() for i in model.collect_params().values()]
        gluon.utils.clip_global_norm(grads, args.max_grad_norm)
        trainer.step(args.batch_size)
        cost += nd.sum(loss).asscalar()

    if args.show: bar.finish()
    return cost/N/args.batch_size
コード例 #22
0
def run_GloVe(config, model):
    if config.show_progress:
        print("Glove model parameters")
        for p in model.parameters():
            print(p.size())
    optimizer = optim.Adagrad(model.parameters(), config.glove_lr)
    if config.show_progress:
        print("Train start")
        print('Timestamp: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()))
    for epoch in range(config.glove_epochs):
        N = ((config.unique_word_size*config.unique_word_size) // config.glove_batch_size)
        losses = []
        if config.show_progress:
            bar = ProgressBar('Glove Train epoch {} / {}'.format(epoch+1,config.glove_epochs), max=N)
        for i in range(N):
            if config.cuda and config.gpu_num > 1:
                word_u_variable, word_v_variable, words_co_occurences, words_weights = model.module.next_batch()
            else:
                word_u_variable, word_v_variable, words_co_occurences, words_weights = model.next_batch()
            forward_output = model(word_u_variable, word_v_variable)
            loss = (torch.pow((forward_output - torch.log(words_co_occurences)), 2) * words_weights).sum()
            losses.append(loss.data[0])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if config.show_progress:
                bar.next()
        if config.show_progress:
            bar.finish()
            print('Timestamp: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()))
        print('Train Epoch: {} \t Loss: {:.6f}'.format(epoch + 1, np.mean(losses)))
        if config.cuda and config.gpu_num > 1:
            np.savez('./checkpoints/word_embedding_{}.npz'.format(config.word_edim), 
                word_embedding_array=model.module.embedding(), 
                dictionary=model.module.dictionary
            )
        else:
            np.savez('./checkpoints/word_embedding_{}.npz'.format(config.word_edim), 
                word_embedding_array=model.embedding(), 
                dictionary=model.dictionary
            )
コード例 #23
0
    def train(self, data):
        try:
            N = int(math.ceil(len(data) / self.batch_size))
            cost = 0

            x = np.ndarray([self.batch_size, self.edim], dtype=np.float32)
            time = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
            target = np.zeros([self.batch_size,
                               self.nwords])  # one-hot-encoded
            context = np.ndarray([self.batch_size, self.mem_size])

            x.fill(self.init_hid)
            for t in range(self.mem_size):
                time[:, t].fill(t)

            if self.show:
                from utils import ProgressBar
                bar = ProgressBar('Train', max=N)

            for idx in range(N):
                if self.show: bar.next()
                target.fill(0)
                for b in range(self.batch_size):
                    m = random.randrange(self.mem_size, len(data))
                    target[b][data[m]] = 1
                    context[b] = data[m - self.mem_size:m]

                _, loss, self.step = self.sess.run(
                    [self.optim, self.loss, self.global_step],
                    feed_dict={
                        self.input: x,
                        self.time: time,
                        self.target: target,
                        self.context: context
                    })
                cost += np.sum(loss)
                print("idx:{0} , cost :{1}".format(idx, cost))
            if self.show: bar.finish()
            return cost / N / self.batch_size
        except Exception as e:
            print(e)
コード例 #24
0
    def train(self, cnsldted_data):
        (data, trgt_aspect, trgt_Y, cntxt_mask) = cnsldted_data
        N = int(math.ceil(len(data) / self.batch_size)) - 1
        cost = 0
        accurate = 0

        x = np.ndarray([self.batch_size], dtype=np.int32)
        target = np.zeros([self.batch_size, self.nlabels])
        context = np.ndarray([self.batch_size, self.mem_size])
        mask = np.ndarray([self.batch_size, self.mem_size])

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar('Train', max=N)

        for idx in range(N):
            if self.show:
                bar.next()
            target.fill(0)
            curr_indx = idx * self.batch_size
            for b in range(self.batch_size):
                senti_labl = trgt_Y[curr_indx + b]
                target[b][self.labels_dict[senti_labl]] = 1
                context[b] = data[curr_indx + b]
                mask[b] = cntxt_mask[curr_indx + b]
                x[b] = trgt_aspect[curr_indx + b]
            _, loss, self.step, num_accurate = self.sess.run(
                [self.optim, self.loss, self.global_step, self.num_accurate],
                feed_dict={
                    self.input: x,
                    #self.time: time,
                    self.target: target,
                    self.context: context,
                    self.mask: mask
                })
            cost += np.sum(loss)
            accurate += num_accurate

        if self.show: bar.finish()
        return cost / N / self.batch_size, accurate * 1. / N / self.batch_size
コード例 #25
0
ファイル: main.py プロジェクト: fanfeifan/MemN2N-Mxnet-Gluon
def test(model, data, label):
    softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
    #data_iter = PTBDataIter(data,
                          # nwords=args.nwords,
                          # batch_size=args.batch_size,
                          # edim=args.edim,
                          # mem_size=args.mem_size,
                          # init_hid=args.init_hid,
                          # is_test_data=True)
    N = int(math.ceil(len(data) / args.batch_size))
    cost = 0.0
    if args.show:
        from utils import ProgressBar
        bar = ProgressBar(label, max=N)
    x = nd.zeros((args.batch_size, args.edim),ctx=ctx)
    x[:,:] = args.init_hid
    time = nd.zeros((args.batch_size, args.mem_size),ctx=ctx)
    for t in xrange(args.mem_size):
        time[:,t] = t
    target = nd.zeros((args.batch_size,),ctx=ctx)
    context = nd.zeros((args.batch_size, args.mem_size),ctx=ctx)
    m = args.mem_size
    for idx in range(N):
        target[:] = 0
        for b in xrange(args.batch_size):
            #m = random.randrange(args.mem_size, len(data))
            target[b] = data[m]
            context[b] = data[m - args.mem_size:m]
            m += 1
            if m >= len(data):break

    #for batch in data_iter:
        if args.show: bar.next()
        out = model(x, time, context)
        loss = softmax_cross_entropy(out, target)
        cost += nd.sum(loss).asscalar()

    if args.show: bar.finish()
    return cost/N/args.batch_size
コード例 #26
0
def quote_many(num_quotes=1, conn_limit=20, progress=None, step=10):
    if progress is None:
        progress = ProgressBar()
        progress.max = num_quotes // step

    logger.info('Process total %d quotes with max %d concurrent connections'
                % (num_quotes, conn_limit))
    logger.debug('... progress bar increment step size: %d coroutines' % step)

    semaphore = asyncio.Semaphore(conn_limit)

    # wrap coroutines with future
    # For Python 3.4.4+, asyncio.ensure_future(...)
    # will wrap coro as Task and keep input the same
    # if it is already Future.
    try:
        coro_to_fut = asyncio.ensure_future
    except AttributeError:
        logger.warning('asyncio.ensure_future requires Python 3.4.4+. '
                       'Fall back to asyncio.async')
        coro_to_fut = asyncio.async
    futures = [
        coro_to_fut(quote_with_lock(semaphore))
        for i in range(num_quotes)
    ]

    t_start = datetime.today()
    for ith, fut in enumerate(asyncio.as_completed(futures), 1):
        if ith % step == 0:
            progress.next()
        yield from fut
    t_end = datetime.today()
    progress.finish()

    logger.info('All coroutines complete in {:.2f} seconds'.format(
        (t_end - t_start).total_seconds()
    ))
    quotes = [fut.result() for fut in futures]
    return quotes
コード例 #27
0
ファイル: mm_model.py プロジェクト: lzqkean/deep_learning
    def test(self, data, label='Test'):
        n_batch = int(math.ceil(len(data) / self.batch_size))
        cost = 0

        u = np.ndarray([self.batch_size, self.edim], dtype=np.float32)
        T = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        target = np.zeros([self.batch_size, self.nwords]) # one-hot-encoded
        sentences = np.ndarray([self.batch_size, self.mem_size])

        u.fill(self.init_u)
        for t in range(self.mem_size):
            T[:,t].fill(t)

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar(label, max=n_batch)

        m = self.mem_size
        for idx in range(n_batch):
            if self.show:
                bar.next()
            target.fill(0)
            for b in range(self.batch_size):
                target[b][data[m]] = 1
                sentences[b] = data[m - self.mem_size:m]
                m += 1

                if m >= len(data):
                    m = self.mem_size

            loss = self.sess.run([self.loss], feed_dict={self.u: u,
                                                         self.T: T,
                                                         self.target: target,
                                                         self.sentences: sentences})
            cost += np.sum(loss)

        if self.show:
            bar.finish()
        return cost/n_batch/self.batch_size
コード例 #28
0
    def gen_words(self, data, dummy_idx, N):
        # N = int(math.ceil(len(data) / self.batch_size))
        data = data.copy()
        x = np.ndarray([self.batch_size, self.edim], dtype=np.float32)
        time = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        target = np.zeros([self.batch_size, self.nwords])  # one-hot-encoded
        context = np.ndarray([self.batch_size, self.mem_size])

        x.fill(self.init_hid)
        for t in range(self.mem_size):
            time[:, t].fill(t)

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar('Generating', max=N)

        for idx in range(N):
            if self.show:
                bar.next()

        for n in range(N):
            context = np.zeros([self.batch_size, self.mem_size]) + dummy_idx

            min_len = min(len(data), self.mem_size)
            context[:, -min_len:] = data[-min_len:]

            prediction = self.sess.run(self.output, feed_dict={
                    self.input: x,
                    self.time: time,
                    self.target: target,
                    self.context: context})

            predicted_word_index = np.argmax(prediction[0])
            print(predicted_word_index)
            data = np.append(data, predicted_word_index)

        if self.show:
            bar.finish()
        return data
コード例 #29
0
    def test(self, data, label='Test'):
        N = int(math.ceil(len(data) / self.batch_size))
        cost = 0

        x = np.ndarray([self.batch_size, self.edim], dtype=np.float32)
        time = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        target = np.zeros([self.batch_size, self.nwords])  # one-hot-encoded
        context = np.ndarray([self.batch_size, self.mem_size])

        x.fill(self.init_hid)
        for t in range(self.mem_size):
            time[:, t].fill(t)

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar(label, max=N)

        m = self.mem_size
        for idx in range(N):
            if self.show:
                bar.next()
            target.fill(0)
            for b in range(self.batch_size):
                target[b][data[m]] = 1
                context[b] = data[m - self.mem_size:m]
                m += 1

                if m >= len(data):
                    m = self.mem_size

            loss = self.sess.run([self.loss], feed_dict={self.input: x,
                                                         self.time: time,
                                                         self.target: target,
                                                         self.context: context})
            cost += np.sum(loss)

        if self.show:
            bar.finish()
        return cost/N/self.batch_size
コード例 #30
0
 def fit(self, train_df):
     self.train_ds = train_df.index
     print("Fitting...")
     progress_bar = ProgressBar(len(train_df.columns))
     for item in train_df.columns:
         target = train_df[item].interpolate().bfill()
         if self.use_boxcox:
             idx = target.index
             target, self.lmbda_boxcox[item] = boxcox(target)
             target = pd.Series(target, index=idx)
         self.models[item] = pm.auto_arima(
             target,
             seasonal=False,
             exogenous=fourier(
                 len(target), 
                 seasonality=self.seasonality, 
                 n_terms=self.n_fourier_terms), 
             method="bfgs",
             suppress_warnings=True,
             **self.arima_config)
         progress_bar.update()
     progress_bar.finish()
     return self.models
コード例 #31
0
ファイル: model.py プロジェクト: eunsook-coding/TF_dsksd
    def train(self, data):
        N = int(math.ceil(len(data) / self.batch_size)) # math.ceil : returns smallest integer not less than x.
        cost = 0

        x = np.ndarray([self.batch_size, self.edim], dtype=np.float32)
        time = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        target = np.zeros([self.batch_size, self.nwords]) # one-hot-encoded
        context = np.ndarray([self.batch_size, self.mem_size])

        x.fill(self.init_hid) # 초기화 QA 테스크와는 달리 질문이 없기 때문에 0.1로 된 상수 벡터로 고정한다(embedding도 x)
        for t in range(self.mem_size):
            time[:,t].fill(t) # [[0,1,2,3,4,...,mem_size] ... ]

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar('Train', max=N)

        for idx in range(N):
            if self.show: bar.next()
            target.fill(0)
            for b in range(self.batch_size):
                m = random.randrange(self.mem_size, len(data)) # 100~ x  에서 하나를 가져와서..
                target[b][data[m]] = 1 # 타겟을 랜덤으로 고르는건가?
                context[b] = data[m - self.mem_size:m] # 그 단어의 100번째 전 단어까지를 context로 사용

            _, loss, self.step = self.sess.run([self.optim,
                                                self.loss,
                                                self.global_step],
                                                feed_dict={
                                                    self.input: x, # 0.1로 고정된 벡터
                                                    self.time: time, # temporal encoding을 위한 memory slot lookup용
                                                    self.target: target, # one-hot encoding된 101번째 예측되는 단어
                                                    self.context: context}) # 그 전의 100개의 단어
            cost += np.sum(loss)

        if self.show: bar.finish()
        return cost/N/self.batch_size
コード例 #32
0
    def test(self, inputs):
        if self.debug:
            test_size = 100
        else:
            test_size = inputs.epoch
        batchList = np.random.randint(
            inputs.epoch - 1,
            size=test_size)  # inputs.epoch do not choose last batch
        n = 0
        cost = 0
        accuracy_total = 0
        mae_total = 0
        mse_total = 0

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar('test ', max=test_size)

        for i in batchList:
            # Load next batch into Dataset class instance: inputs
            # inputs.gene_batch(i)
            indx = i

            nextBatchData = np.array(inputs.docs[indx]).astype(
                np.int32).transpose()
            # Make the labels one-hot
            labels_temp = inputs.label[indx]
            nextBatchLabels = np.array(np.eye(self.classes)[labels_temp],
                                       dtype=np.float32)
            nextWordMask = np.array(inputs.wordmask[indx]).astype(
                np.float32).transpose()
            nextSentenceMask = np.array(inputs.sentencemask[indx]).astype(
                np.float32).transpose()
            nextUsrDocs = inputs.gene_usr_context(inputs.usr[indx],
                                                  self.mem_size)
            nextPrdDocs = inputs.gene_prd_context(inputs.prd[indx],
                                                  self.mem_size)
            nextMainDocs = self.doc_emb_test[i * self.batch_size:(i + 1) *
                                             self.batch_size, :]

            _loss, _accuracy, _correctPred, _prediction, _pred1, _label1, _mse, _doc_representation = self.sess.run(
                [
                    self.loss, self.accuracy, self.correctPred,
                    self.prediction, self.temp1, self.temp2, self.mse,
                    self.doc_representation
                ],
                feed_dict={
                    self.input_data: nextBatchData,
                    self.word_vecs: self.word_vectors,
                    self.labels: nextBatchLabels,
                    self.wordmask: nextWordMask,
                    self.sentencemask: nextSentenceMask,
                    self.main_docs: nextMainDocs,
                    self.usr_docs: nextUsrDocs,
                    self.prd_docs: nextPrdDocs
                })
            if self.doc_emb_method != 'preload_no_update':  #update document representation
                self.doc_emb_test[i * self.batch_size:(i + 1) *
                                  self.batch_size, :] = _doc_representation
            # save final prediction result here to self.pred_test from self.temp1
            self.pred_test[i * self.batch_size:(i + 1) *
                           self.batch_size] = _pred1
            mae = np.sum(
                np.absolute(_pred1.astype("float") -
                            _label1.astype("float"))) / self.batch_size
            cost += _loss
            accuracy_total += _accuracy
            mse_total += _mse
            mae_total += mae

            n += 1
            if self.show: bar.next()
        if self.show: bar.finish()
        return cost / test_size, accuracy_total / test_size, mae_total / test_size, np.sqrt(
            mse_total / test_size)
コード例 #33
0
ファイル: model.py プロジェクト: alanjia163/aspect_sentiment
    def train(self, data):
        source_data, source_loc_data, target_data, target_label, _ = data
        N = int(math.ceil(len(source_data) / self.batch_size))
        cost = 0

        x = np.ndarray([self.batch_size, 1], dtype=np.float32)
        time = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        target = np.zeros([self.batch_size, 3])  # one-hot-encoded
        context = np.ndarray([self.batch_size, self.mem_size])

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar('Train', max=N)

        rand_idx, cur = np.random.permutation(len(source_data)), 0
        for idx in xrange(N):
            if self.show: bar.next()

            context.fill(self.pad_idx)
            time.fill(self.mem_size)
            target.fill(0)
            '''
        Initilialize all the padding vector to 0 before backprop.
        TODO: Code is 5x slower due to the following initialization.
        '''
            emb_a = self.A.eval()
            emb_a[self.pad_idx, :] = 0
            emb_b = self.B.eval()
            emb_b[self.pad_idx, :] = 0
            emb_c = self.C.eval()
            emb_c[self.pad_idx, :] = 0
            emb_ta = self.T_A.eval()
            emb_ta[self.mem_size, :] = 0
            emb_tb = self.T_B.eval()
            emb_tb[self.mem_size, :] = 0
            self.sess.run(self.A.assign(emb_a))
            self.sess.run(self.B.assign(emb_b))
            self.sess.run(self.C.assign(emb_c))
            self.sess.run(self.T_A.assign(emb_ta))
            self.sess.run(self.T_B.assign(emb_tb))

            for b in xrange(self.batch_size):
                m = rand_idx[cur]
                x[b][0] = target_data[m]
                target[b][target_label[m]] = 1
                time[b, :len(source_loc_data[m])] = source_loc_data[m]
                context[b, :len(source_data[m])] = source_data[m]
                cur = cur + 1

            a, loss, self.step = self.sess.run(
                [self.optim, self.loss, self.global_step],
                feed_dict={
                    self.input: x,
                    self.time: time,
                    self.target: target,
                    self.context: context
                })
            cost += np.sum(loss)

        if self.show: bar.finish()
        _, train_acc = self.test(data)
        return cost / N / self.batch_size, train_acc
コード例 #34
0
    def train(self, data):
        ''' Function to train the model on the provided data '''

        source_data, source_loc_data, target_data, target_label, orig_sent_data, delta_inv_data, W_ma_data = data
        N = int(math.ceil(len(source_data) / self.batch_size))
        cost = 0

        x = np.ndarray([self.batch_size, 1], dtype=np.int32)
        time = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        target = np.zeros([self.batch_size], dtype=np.int32)
        context = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        mask = np.ndarray([self.batch_size, self.mem_size])
        delta_inv = np.ndarray([self.batch_size, self.mem_size, self.mem_size],
                               dtype=np.float32)
        W_ma = np.ndarray([self.batch_size, self.mem_size], dtype=np.float32)

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar('Train', max=N)

        rand_idx, cur = np.random.permutation(len(source_data)), 0
        for idx in xrange(N):
            if self.show: bar.next()

            context.fill(self.pad_idx)
            time.fill(self.mem_size)
            target.fill(0)
            mask.fill(0)

            for b in xrange(self.batch_size):
                m = rand_idx[cur]
                x[b][0] = target_data[m]
                target[b] = target_label[m]
                time[b, :len(source_loc_data[m])] = source_loc_data[m]
                context[b, :len(source_data[m])] = source_data[m]
                mask[b, :len(source_data[m])].fill(1)

                crt_delta = delta_inv_data[m]
                delta_inv[b] = np.pad(
                    crt_delta, [(0, self.mem_size - len(crt_delta[0]))] * 2,
                    'constant',
                    constant_values=0)
                crt_wma = W_ma_data[m]
                crt_wma = crt_wma.reshape(crt_wma.shape[0])
                W_ma[b] = np.pad(crt_wma, [(0, self.mem_size - len(crt_wma))],
                                 'constant',
                                 constant_values=0)
                cur = cur + 1

            dinv, dout, do, kout, kinc, kin, aspin, C0, z, a, loss, self.step = self.sess.run(
                [
                    self.delta_inv, self.Dout, self.dropped_out, self.A,
                    self.Ain_c, self.Ain, self.ASPin, self.C0, self.z,
                    self.optim, self.loss, self.global_step
                ],
                feed_dict={
                    self.input: x,
                    self.time: time,
                    self.target: target,
                    self.context: context,
                    self.mask: mask,
                    self.delta_inv: delta_inv,
                    self.W_ma: W_ma,
                    self.A: self.pre_trained_context_wt,
                    self.ASP: self.pre_trained_target_wt,
                    self.LSTM_inp_dout: 0.5,
                    self.LSTM_out_dout: 0.7,
                    self.Final_dout: 0.7
                })

            if idx == 0:
                pass
                # print idx
                # print "asp - ", asp[0]
                # print "tasp - ", tasp[0]
                # print "A - ", kout
                # print "Ainc - ", kinc[0][:2][:20]
                # print "Ain - ", kin[0][:2][:20]
                # print "maskedZ - ", addedZ[0]
                # print "maskedZ - ", maskedZ[0]
                # print "dinv - ", dinv[:2][:20]
                # print "wma - ", wma[:2][:20]
                # print "Og - ", Ogg[:2][:20]
                # print "Z - ", Z[0]
                # print "ASPin - ", aspin
                # print "C0 - ", C0
                # print "U3dim - ", att[:2][:20]
                # #print "loss - ", loss
                # print "mask - ", mask[:2][:20]
                # print "Semantic Attention - ", P[:2][:20]
                # print "small z - " , z
                # print "dout - ", dout
                # print "dropped_out - ", do
            cost += np.sum(loss)

        if self.show: bar.finish()
        _, train_acc = self.test(data)
        return cost / N / self.batch_size, train_acc
コード例 #35
0
ファイル: model.py プロジェクト: mehak0503/Q-A-Chatbot
    def test(self, test_stories, test_questions, label='Test'):
        N = int(math.ceil(len(test_questions) / self.batch_size))
        cost = 0

        if self.show_progress:
            bar = ProgressBar('Train', max=N)

        for idx in range(N):

            if self.show_progress:
                bar.next()

            if idx == N - 1:
                iterations = len(test_questions) - (N - 1) * self.batch_size
            else:
                iterations = self.batch_size

            query = np.ndarray([iterations, self.max_words], dtype=np.int32)
            time = np.zeros([iterations, self.mem_size], dtype=np.int32)
            target = np.zeros([iterations, self.nwords], dtype=np.float32)
            context = np.ndarray([iterations, self.mem_size, self.max_words],
                                 dtype=np.int32)

            for b in range(iterations):
                m = idx * self.batch_size + b

                curr_q = test_questions[m]
                q_text = curr_q['question']
                story_ind = curr_q['story_index']
                sent_ind = curr_q['sentence_index']
                answer = curr_q['answer'][0]

                curr_s = test_stories[story_ind]
                curr_c = curr_s[:sent_ind + 1]

                if len(curr_c) >= self.mem_size:
                    curr_c = curr_c[-self.mem_size:]

                    for t in range(self.mem_size):
                        time[b, t].fill(t)
                else:

                    for t in range(len(curr_c)):
                        time[b, t].fill(t)

                    while len(curr_c) < self.mem_size:
                        curr_c.append([0.] * self.max_words)

                query[b, :] = q_text
                target[b, answer] = 1
                context[b, :, :] = curr_c
            _, loss, self.step = self.sess.run(
                [self.optim, self.loss, self.global_step],
                feed_dict={
                    self.query: query,
                    self.time: time,
                    self.target: target,
                    self.context: context
                })
            cost += np.sum(loss)

        if self.show_progress:
            bar.finish()

        return cost / len(test_questions)
コード例 #36
0
def train(net, params, q_data, qa_data, label):
    N = int(math.floor(len(q_data) / params.batch_size))
    q_data = q_data.T  # Shape: (200,3633)
    qa_data = qa_data.T  # Shape: (200,3633)
    # Shuffle the data
    shuffled_ind = np.arange(q_data.shape[1])
    np.random.shuffle(shuffled_ind)
    q_data = q_data[:, shuffled_ind]
    qa_data = qa_data[:, shuffled_ind]

    pred_list = []
    target_list = []

    if params.show:
        from utils import ProgressBar
        bar = ProgressBar(label, max=N)

    # init_memory_value = np.random.normal(0.0, params.init_std, ())
    for idx in range(N):
        if params.show: bar.next()

        q_one_seq = q_data[:, idx * params.batch_size:(idx + 1) *
                           params.batch_size]
        input_q = q_one_seq[:, :]  # Shape (seqlen, batch_size)
        qa_one_seq = qa_data[:, idx * params.batch_size:(idx + 1) *
                             params.batch_size]
        input_qa = qa_one_seq[:, :]  # Shape (seqlen, batch_size)

        target = qa_one_seq[:, :]
        #target = target.astype(np.int)
        #print(target)
        target = (target - 1) / params.n_question
        target = np.floor(target)
        #print(target)
        #target = target.astype(np.float) # correct: 1.0; wrong 0.0; padding -1.0

        input_q = mx.nd.array(input_q)
        input_qa = mx.nd.array(input_qa)
        target = mx.nd.array(target)

        data_batch = mx.io.DataBatch(data=[input_q, input_qa], label=[target])
        net.forward(data_batch, is_train=True)
        pred = net.get_outputs()[0].asnumpy()  #(seqlen * batch_size, 1)
        net.backward()

        norm_clipping(net._exec_group.grad_arrays, params.maxgradnorm)
        net.update()

        target = target.asnumpy().reshape(
            (-1, ))  # correct: 1.0; wrong 0.0; padding -1.0

        nopadding_index = np.flatnonzero(target != -1.0)
        nopadding_index = nopadding_index.tolist()
        pred_nopadding = pred[nopadding_index]
        target_nopadding = target[nopadding_index]

        pred_list.append(pred_nopadding)
        target_list.append(target_nopadding)

    if params.show: bar.finish()

    all_pred = np.concatenate(pred_list, axis=0)
    all_target = np.concatenate(target_list, axis=0)

    loss = binaryEntropy(all_target, all_pred)
    print("all_target", all_target)
    print("all_pred", all_pred)
    auc = compute_auc(all_target, all_pred)
    accuracy = compute_accuracy(all_target, all_pred)

    return loss, accuracy, auc
コード例 #37
0
def test(net, params, q_data, qa_data, label):
    # dataArray: [ array([[],[],..])] Shape: (3633, 200)
    N = int(math.ceil(float(len(q_data)) / float(params.batch_size)))
    q_data = q_data.T  # Shape: (200,3633)
    qa_data = qa_data.T  # Shape: (200,3633)
    seq_num = q_data.shape[1]
    pred_list = []
    target_list = []
    if params.show:
        from utils import ProgressBar
        bar = ProgressBar(label, max=N)

    count = 0
    element_count = 0
    for idx in range(N):
        if params.show: bar.next()

        inds = np.arange(idx * params.batch_size,
                         (idx + 1) * params.batch_size)
        q_one_seq = q_data.take(inds, axis=1, mode='wrap')
        qa_one_seq = qa_data.take(inds, axis=1, mode='wrap')
        #print 'seq_num', seq_num

        input_q = q_one_seq[:, :]  # Shape (seqlen, batch_size)
        input_qa = qa_one_seq[:, :]  # Shape (seqlen, batch_size)
        target = qa_one_seq[:, :]
        #target = target.astype(np.int)
        #target = (target - 1) / params.n_question
        #target = target.astype(np.float)  # correct: 1.0; wrong 0.0; padding -1.0
        target = (target - 1) / params.n_question
        target = np.floor(target)

        input_q = mx.nd.array(input_q)
        input_qa = mx.nd.array(input_qa)
        target = mx.nd.array(target)

        data_batch = mx.io.DataBatch(data=[input_q, input_qa], label=[])
        net.forward(data_batch, is_train=False)
        pred = net.get_outputs()[0].asnumpy()
        target = target.asnumpy()
        if (idx + 1) * params.batch_size > seq_num:
            real_batch_size = seq_num - idx * params.batch_size
            target = target[:, :real_batch_size]
            pred = pred.reshape(
                (params.seqlen, params.batch_size))[:, :real_batch_size]
            pred = pred.reshape((-1, ))
            count += real_batch_size
        else:
            count += params.batch_size

        target = target.reshape(
            (-1, ))  # correct: 1.0; wrong 0.0; padding -1.0
        nopadding_index = np.flatnonzero(target != -1.0)
        nopadding_index = nopadding_index.tolist()
        pred_nopadding = pred[nopadding_index]
        target_nopadding = target[nopadding_index]

        element_count += pred_nopadding.shape[0]
        #print avg_loss
        pred_list.append(pred_nopadding)
        target_list.append(target_nopadding)

    if params.show: bar.finish()
    assert count == seq_num

    all_pred = np.concatenate(pred_list, axis=0)
    all_target = np.concatenate(target_list, axis=0)

    loss = binaryEntropy(all_target, all_pred)
    auc = compute_auc(all_target, all_pred)
    accuracy = compute_accuracy(all_target, all_pred)

    return loss, accuracy, auc
コード例 #38
0
ファイル: model_backup.py プロジェクト: yjhong89/ITS
    def train(self, train_q_data, train_qa_data, valid_q_data, valid_qa_data):
        # q_data, qa_data : [samples, seq_len]
        shuffle_index = np.random.permutation(train_q_data.shape[0])
        q_data_shuffled = train_q_data[shuffle_index]
        qa_data_shuffled = train_qa_data[shuffle_index]

        training_step = train_q_data.shape[0] // self.args.batch_size
        self.sess.run(tf.global_variables_initializer())
        
        if self.args.show:
            from utils import ProgressBar
            bar = ProgressBar(label, max=training_step)

        self.train_count = 0
        if self.args.init_from:
            if self.load():
                print('Checkpoint_loaded')
            else:
                print('No checkpoint')
        else:
            if os.path.exists(os.path.join(self.args.checkpoint_dir, self.model_dir)):
                try:
                    shutil.rmtree(os.path.join(self.args.checkpoint_dir, self.model_dir))
                    shutil.rmtree(os.path.join(self.args.log_dir, self.model_dir+'.csv'))
                except(FileNotFoundError, IOError) as e:
                    print('[Delete Error] %s - %s' % (e.filename, e.strerror))
        
        best_valid_auc = 0

        # Training
        for epoch in range(0, self.args.num_epochs):
            if self.args.show:
                bar.next()

            pred_list = list()
            target_list = list()        
            epoch_loss = 0
            learning_rate = tf.train.exponential_decay(self.args.initial_lr, global_step=self.global_step, decay_steps=self.args.anneal_interval*training_step, decay_rate=0.667, staircase=True)

            #print('Epoch %d starts with learning rate : %3.5f' % (epoch+1, self.sess.run(learning_rate)))
            for steps in range(training_step):
                # [batch size, seq_len]
                q_batch_seq = q_data_shuffled[steps*self.args.batch_size:(steps+1)*self.args.batch_size, :]
                qa_batch_seq = qa_data_shuffled[steps*self.args.batch_size:(steps+1)*self.args.batch_size, :]
    
                # qa : exercise index + answer(0 or 1)*exercies_number
                # right : 1, wrong : 0, padding : -1
                target = qa_batch_seq[:,:]
                # Make integer type to calculate target
                target = target.astype(np.int)
                target_batch = (target - 1) // self.args.n_questions  
                target_batch = target_batch.astype(np.float)

                feed_dict = {self.q_data_seq:q_batch_seq, self.qa_data_seq:qa_batch_seq, self.target_seq:target_batch, self.lr:self.args.initial_lr}
                #self.lr:self.sess.run(learning_rate)
                loss_, pred_, _, = self.sess.run([self.loss, self.pred, self.train_op], feed_dict=feed_dict)
                # Get right answer index
                # Make [batch size * seq_len, 1]
                right_target = np.asarray(target_batch).reshape(-1,1)
                right_pred = np.asarray(pred_).reshape(-1,1)
                # np.flatnonzero returns indices which is nonzero, convert it list 
                right_index = np.flatnonzero(right_target != -1.).tolist()
                # Number of 'training_step' elements list with [batch size * seq_len, ]
                pred_list.append(right_pred[right_index])
                target_list.append(right_target[right_index])

                epoch_loss += loss_
                #print('Epoch %d/%d, steps %d/%d, loss : %3.5f' % (epoch+1, self.args.num_epochs, steps+1, training_step, loss_))
                

            if self.args.show:
                bar.finish()        
            
            all_pred = np.concatenate(pred_list, axis=0)
            all_target = np.concatenate(target_list, axis=0)

            # Compute metrics
            self.auc = metrics.roc_auc_score(all_target, all_pred)
            # Extract elements with boolean index
            # Make '1' for elements higher than 0.5
            # Make '0' for elements lower than 0.5
            all_pred[all_pred > 0.5] = 1
            all_pred[all_pred <= 0.5] = 0
            self.accuracy = metrics.accuracy_score(all_target, all_pred)

            epoch_loss = epoch_loss / training_step    
            print('Epoch %d/%d, loss : %3.5f, auc : %3.5f, accuracy : %3.5f' % (epoch+1, self.args.num_epochs, epoch_loss, self.auc, self.accuracy))
            self.write_log(epoch=epoch+1, auc=self.auc, accuracy=self.accuracy, loss=epoch_loss, name='training_')

            valid_steps = valid_q_data.shape[0] // self.args.batch_size
            valid_pred_list = list()
            valid_target_list = list()
            for s in range(valid_steps):
                # Validation
                valid_q = valid_q_data[s*self.args.batch_size:(s+1)*self.args.batch_size, :]
                valid_qa = valid_qa_data[s*self.args.batch_size:(s+1)*self.args.batch_size, :]
                # right : 1, wrong : 0, padding : -1
                valid_target = (valid_qa - 1) // self.args.n_questions
                valid_feed_dict = {self.q_data_seq : valid_q, self.qa_data_seq : valid_qa, self.target_seq : valid_target}
                valid_loss, valid_pred = self.sess.run([self.loss, self.pred], feed_dict=valid_feed_dict)
                # Same with training set
                valid_right_target = np.asarray(valid_target).reshape(-1,)
                valid_right_pred = np.asarray(valid_pred).reshape(-1,)
                valid_right_index = np.flatnonzero(valid_right_target != -1).tolist()    
                valid_target_list.append(valid_right_target[valid_right_index])
                valid_pred_list.append(valid_right_pred[valid_right_index])
            
            all_valid_pred = np.concatenate(valid_pred_list, axis=0)
            all_valid_target = np.concatenate(valid_target_list, axis=0)

            valid_auc = metrics.roc_auc_score(all_valid_target, all_valid_pred)
             # For validation accuracy
            all_valid_pred[all_valid_pred > 0.5] = 1
            all_valid_pred[all_valid_pred <= 0.5] = 0
            valid_accuracy = metrics.accuracy_score(all_valid_target, all_valid_pred)
            print('Epoch %d/%d, valid auc : %3.5f, valid accuracy : %3.5f' %(epoch+1, self.args.num_epochs, valid_auc, valid_accuracy))
            # Valid log
            self.write_log(epoch=epoch+1, auc=valid_auc, accuracy=valid_accuracy, loss=valid_loss, name='valid_')
            if valid_auc > best_valid_auc:
                print('%3.4f to %3.4f' % (best_valid_auc, valid_auc))
                best_valid_auc = valid_auc
                best_epoch = epoch + 1
                self.save(best_epoch)

        return best_epoch    
コード例 #39
0
    def train(self, data):
        source_data, source_loc_data, target_data, target_label, orig_sent_data, W_rm_data = data

        N = int(math.ceil(len(source_data) / self.batch_size))
        cost = 0

        x = np.ndarray([self.batch_size, 1], dtype=np.int32)
        time = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        target = np.zeros([self.batch_size], dtype=np.int32)
        context = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        mask = np.ndarray([self.batch_size, self.mem_size])
        W_rm = np.ndarray([self.batch_size, self.rules_dim, self.mem_size],
                          dtype=np.float32)

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar('Train', max=N)

        rand_idx, cur = np.random.permutation(len(source_data)), 0
        for idx in xrange(N):
            if self.show: bar.next()

            context.fill(self.pad_idx)
            time.fill(self.mem_size)
            target.fill(0)
            mask.fill(-1.0 * np.inf)
            # mask.fill(0)

            for b in xrange(self.batch_size):
                m = rand_idx[cur]
                x[b][0] = target_data[m]
                target[b] = target_label[m]
                time[b, :len(source_loc_data[m])] = source_loc_data[m]
                context[b, :len(source_data[m])] = source_data[m]
                # mask[b,:len(source_data[m])].fill(0)
                # mask[b,:len(source_data[m])].fill(1)

                crt_wrm = W_rm_data[m]  # rules_dim * sen_len
                # print crt_wrm.shape
                W_rm[b] = np.pad(crt_wrm,
                                 [(0, 0),
                                  (0, self.mem_size - crt_wrm.shape[1])],
                                 'constant',
                                 constant_values=0)
                cur = cur + 1

            _a, loss, self.step = self.sess.run(
                [self.optim, self.loss, self.global_step],
                feed_dict={
                    self.input: x,
                    self.time: time,
                    self.target: target,
                    self.context: context,
                    # self.mask: mask,
                    self.W_rm: W_rm,
                    self.A: self.pre_trained_context_wt,
                    self.ASP: self.pre_trained_target_wt
                })

            if idx == 0:
                print idx
                # print "asp - ", asp[0]
                # print "tasp - ", tasp[0]
                # print "A - ", kout
                # print "Ainc - ", kinc[0][:2][:20]
                # print "Ain - ", kin[0][:2][:20]
                # print "maskedZ - ", addedZ[0]
                # print "maskedZ - ", maskedZ[0]
                # print "dinv - ", dinv[:2][:20]
                # print "wma - ", wma[:2][:20]
                # print "Og - ", Ogg[:2][:20]
                # print "Z - ", Z[0]
                # print "ASPin - ", aspin
                # print "C0 - ", C0
                # print "U3dim - ", att[:2][:20]
                # #print "loss - ", loss
                # print "mask - ", mask[:2][:20]
                # print "Semantic Attention - ", P[:2][:20]
                # print "small z - " , z
                # print "dout - ", dout
                # print "dropped_out - ", do

            cost += np.sum(loss)

        if self.show: bar.finish()
        _, train_acc = self.test(data)
        return cost / N / self.batch_size, train_acc