Esempio n. 1
0
def generate_query_embeddings(model, instances_list):
    batch_size = 100

    embeddings_list = list()

    print('Computing embeddings for %s questions' % len(instances_list))
    slices = zip(*(iter(instances_list), ) * batch_size)
    num_batches = int(len(instances_list) / batch_size)
    for n, s in enumerate(slices):
        output.clear(output_tags='progress')
        with output.use_tags('progress'):
            print('Processing batch %s of %s' % (n + 1, num_batches))

        question_batch = list(
            [question_dict["question"] for question_dict in s])
        encodings = model.signatures['question_encoder'](
            tf.constant(question_batch))
        for i in range(len(question_batch)):
            embeddings_list.append(np.array(encodings['outputs'][i]))

    if num_batches * batch_size < len(instances_list):
        question_batch = list([
            question_dict["question"]
            for question_dict in instances_list[num_batches * batch_size:]
        ])
        encodings = model.signatures['question_encoder'](
            tf.constant(question_batch))
        for i in range(len(question_batch)):
            embeddings_list.append(np.array(encodings['outputs'][i]))

    return np.array(embeddings_list)
Esempio n. 2
0
  def GetCustomFiles(self, value):
    output.clear()
    self.from_to = (int(value[:value.find('-')]),int(value[value.find('-')+1:]))
    print(f'Custom Vocab list created from word[{self.from_to[0]}] to word[{self.from_to[1]}]')  
    print('''
--------------------------------------------------------------------------------
              Vocab file loaded! Please continue below!''')            
Esempio n. 3
0
  def GetFiles(self, a):
    output.clear()
    self.choice = a
    #if not os.path.isfile(a): os.popen(f'gsutil cp {file_dict[a]} .')
    #assert os.path.isfile(a), 'Vocab file not found!'
    self.string_ = self.GoogleBucket.get_blob(f'Vocab_txt/SAT-400/{a}').download_as_string().decode('ASCII') 
    if a != '999_vocab_SAT_ALL.txt':
      print('''
--------------------------------------------------------------------------------
              Vocab file loaded! Please continue below!''')
    else: 
      print('Please input your custom word range:')
      display(ReceiveInput('Input format: [start]-[end]', self.GetCustomFiles))
Esempio n. 4
0
 def render(self, mode='human'):
     output.clear()
     vertical_var = np.full((self.env_img.shape[0], 10, 3),
                            128,
                            dtype=np.uint8)
     # cv2_imshow(data_img) #img_bgr[yv, xv])
     print('\t\t Environment \t\t\t Drone View ')
     cv2_imshow(np.hstack([self.env_img, vertical_var, self.drone_map]))
     # print('\t\t Coverage \t\t\t Obstacles ')
     # cv2_imshow(np.hstack([self.coverage, 255*self.obstacle_map]))
     print('\t\t Coverage \t\t\t Drone Coverage ')
     cv2_imshow(
         np.hstack([
             self.coverage, vertical_var[:, :, 0], self.drone_map[:, :, 1]
         ]))
Esempio n. 5
0
def generate_document_embeddings(model, response_list, sent_list, doc_list):
    '''

    :param model: the use-QA model
    :param response_list: a tuple (sent_id, doc_id)
    :param sent_list: a list of strings
    :param doc_list: a list of strings
    :return:
    '''

    batch_size = 100

    embeddings_list = list()

    print('Computing embeddings for %s sentences' % len(response_list))
    slices = zip(*(iter(response_list), ) * batch_size)
    num_batches = int(len(response_list) / batch_size)
    for n, s in enumerate(slices):
        output.clear(output_tags='progress')
        with output.use_tags('progress'):
            print('Processing batch %s of %s' % (n + 1, num_batches))

        response_batch = list(
            [sent_list[int(sent_id)] for sent_id, doc_id in s])
        context_batch = list([doc_list[int(doc_id)] for sent_id, doc_id in s])
        encodings = model.signatures['response_encoder'](
            input=tf.constant(response_batch),
            context=tf.constant(context_batch))
        for i in range(len(response_batch)):
            embeddings_list.append(np.array(encodings['outputs'][i]))

    if batch_size * num_batches < len(response_list):
        response_batch = list([
            sent_list[int(sent_id)]
            for sent_id, doc_id in response_list[num_batches * batch_size:]
        ])
        context_batch = list([
            doc_list[int(doc_id)]
            for sent_id, doc_id in response_list[num_batches * batch_size:]
        ])
        encodings = model.signatures['response_encoder'](
            input=tf.constant(response_batch),
            context=tf.constant(context_batch))
        for i in range(len(response_batch)):
            embeddings_list.append(np.array(encodings['outputs'][i]))

    return np.array(embeddings_list)
Esempio n. 6
0
    def fit(self):
        for epoch in range(self.epoch, self.config.SOLVER.MAX_EPOCHS):
            if epoch < self.config.SOLVER.WARMUP_EPOCHS:
                lr_scale = min(
                    1.,
                    float(epoch + 1) / float(self.config.SOLVER.WARMUP_EPOCHS))
                for pg in self.optimizer.param_groups:
                    pg['lr'] = lr_scale * self.config.SOLVER.BASE_LR
                self.do_scheduler = False
            else:
                self.do_scheduler = True
            if self.config.VERBOSE:
                lr = self.optimizer.param_groups[0]['lr']
                timestamp = datetime.utcnow().isoformat()
                self.logger.info(f'\n{timestamp}\nLR: {lr}')

            t = time.time()
            summary_loss = self.train_one_epoch()

            self.logger.info(
                f'[RESULT]: Train. Epoch: {self.epoch}, summary_loss: {summary_loss.avg:.5f}, time: {(time.time() - t):.5f}'
            )
            self.save(f'{self.base_dir}/last-checkpoint.bin')

            t = time.time()
            best_score_threshold, best_final_score = self.validation()

            self.logger.info(
                f'[RESULT]: Val. Epoch: {self.epoch}, Best Score Threshold: {best_score_threshold:.2f}, Best Score: {best_final_score:.5f}, time: {(time.time() - t):.5f}'
            )
            if best_final_score > self.best_final_score:
                self.best_final_score = best_final_score
                self.best_score_threshold = best_score_threshold
                self.model.eval()
                self.save(f'{self.base_dir}/best-checkpoint.bin')
                self.save_model(f'{self.base_dir}/best-model.bin')
                self.save_predictions(f'{self.base_dir}/all_predictions.csv')

            self.early_stop(best_final_score)
            if self.early_stop_epochs > self.config.SOLVER.EARLY_STOP_PATIENCE:
                self.logger.info('Early Stopping!')
                break

            if self.epoch % self.config.SOLVER.CLEAR_OUTPUT == 0:
                output.clear()

            self.epoch += 1
Esempio n. 7
0
    def render(self, env):
        self.screen.fill(WHITE)
        self.render_robot(env.robot)
        self.render_walls(env.walls)
        self.render_pois(env.pois)
        self.render_goal(env.goal)
        #self.render_aoi()
        #pygame.display.update()

        if colab_use:
            #convert image so it can be displayed in OpenCV
            view = pygame.surfarray.array3d(self.screen)
            # convert from (width, height, channel) to (height, width, channel)
            view = view.transpose([1, 0, 2])
            # convert from rgb to bgr
            img_bgr = cv2.cvtColor(view, cv2.COLOR_RGB2BGR)
            #Display image, clear cell every 0.5 seconds
            cv2_imshow(img_bgr)
            time.sleep(0.5)
            output.clear()
Esempio n. 8
0
    def _clear_component(self, component_id=None, wait=False):
        """Clears component.

    If component_id is None, it will clear currently active component,
    otherwise it will clear one with given id.

    NOTE FOR SUBCLASS IMPLEMENTTERS:

    When _clear_output is called it will remove all outputs that were created
    within context of _active_component.

    This might produce subtle errors in situations where user clears component
    he is currently producing output for as it will destroy any output that
    is in the context of _active_component. Therefore if your widget
    needs javascript to setup the component for output it should always
    be produced by overloading _prepare_component_for_output.

    Args:
      component_id: which component to clear.
      wait: if True, the output won't be cleared until the next user output.
      See colab.output.clear for full details.

    Raises:
      WidgetException: if component_id and no active element is
      selected
    """
        _util.flush_all()
        if component_id is None:
            if self._current_component is None:
                raise WidgetException('No active component selected')
            component_id = self._current_component
        if component_id == self._current_component:
            # Do not clear the part that sets current active element.
            # If we did, this would have made all consecutive output to stream
            # to wrong outputarea on reload.
            output.clear(wait, output_tags=[component_id] + ['user_output'])
        else:
            output.clear(wait, output_tags=[component_id])
Esempio n. 9
0
def generate_document_embeddings_no_context(model, response_list):
    '''
    :param model: the use-QA model
    :param response_list: a list of strings
    :return:
    '''

    batch_size = 100

    embeddings_list = list()

    print('Computing embeddings for %s sentences' % len(response_list))
    slices = zip(*(iter(response_list), ) * batch_size)
    num_batches = int(len(response_list) / batch_size)
    for n, s in enumerate(slices):
        output.clear(output_tags='progress')
        with output.use_tags('progress'):
            print('Processing batch %s of %s' % (n + 1, num_batches))

        # according to https://tfhub.dev/google/universal-sentence-encoder-qa/3, we should repeat answer_batch if there is no context.
        answer_batch = list([sent for sent in s])
        #context_batch = list([" "])
        encodings = model.signatures['response_encoder'](
            input=tf.constant(answer_batch), context=tf.constant(answer_batch))
        for i in range(len(answer_batch)):
            embeddings_list.append(np.array(encodings['outputs'][i]))

    if batch_size * num_batches < len(response_list):
        answer_batch = list(
            [sent for sent in response_list[num_batches * batch_size:]])
        encodings = model.signatures['response_encoder'](
            input=tf.constant(answer_batch), context=tf.constant(answer_batch))
        for i in range(len(answer_batch)):
            embeddings_list.append(np.array(encodings['outputs'][i]))

    return np.array(embeddings_list)
Esempio n. 10
0
    def _clear_component(self, component_id=None, wait=False):
        """Clears currently active component.

    Args:
      component_id: which component to clear.
      wait: if True, the output won't be cleared until the next user output.
      See colab.output.clear for full details.

    Raises:
      WidgetException: if component_id and no active element is
      selected
    """
        _util.flush_all()
        if component_id is None:
            if self._current_component is None:
                raise WidgetException('No active component selected')
            component_id = self._current_component
        if component_id == self._current_component:
            # Do not clear the part that sets current active element.
            # If we did, this would have made all consecutive output to stream
            # to wrong outputarea on reload.
            output.clear(wait, output_tags=[component_id] + ['user_output'])
        else:
            output.clear(wait, output_tags=[component_id])
Esempio n. 11
0
def run_predictions(eval_set, out_filename, n_examples=None, padding=False):
  """
  Automatic prediction script with options for checkpointing, error coverage,
  padding of incomplete experiments and filedumping.
  """
  predictions = dict()
  checkpoints = []
  error_count = 0
  for k in range(1000, 11000, 1000):
    # checkpoints.append(k)
    pass

  if not n_examples:
    for i, example in enumerate(eval_set):
      try:
        predictions[example.qas_id] = answer_question(example.question_text, example.context_text)
      except:
        predictions[example.qas_id] = ""
        error_count += 1
      output.clear('status_text')
      with output.use_tags('status_text'):
        print(f"Loaded {i+1}/{len(eval_set)} Failed predictions: {error_count}")
      if i in checkpoints:
        with open('/content/gdrive/My Drive/squad/'+str(int(i/1000))+'k_'+out_filename, 'w') as fp:
          json.dump(predictions, fp)
        print(f"Saved checkpoint {i/1000}k")
  else:
    for i in range(n_examples):
      example = eval_set[i]
      predictions[example.qas_id] = answer_question(example.question_text, example.context_text)
      output.clear('status_text')
      with output.use_tags('status_text'):
        print(f"Loaded {i+1}/{n_examples}")
        
    if padding:
      for i in range(n_examples, len(eval_set), 1):
        example = eval_set[i]
        predictions[example.qas_id] = ""
        output.clear('status_text')
        with output.use_tags('status_text'):
          print(f"Loaded {i+1}/{len(eval_set)}")

  output.clear('status_text')

  with open('/content/gdrive/My Drive/squad/'+out_filename, 'w') as fp:
      json.dump(predictions, fp)
Esempio n. 12
0
def setup(
    ngrok_region=None,
    check_gpu_available=True,
    tunnel="ngrok",
    public_key=None,
    ngrok_key=None,
    secret_key=None,
    vncserver=False,
):
    print("[!] Setup process started")
    stat, msg = _setupSSHDMain(
        public_key,
        tunnel,
        ngrok_region,
        check_gpu_available,
        True,
        ngrok_key,
        secret_key,
    )
    if stat:
        if vncserver:
            _setupVNC(secret_key)
    output.clear()
    print(msg)
Esempio n. 13
0
Original file is located at
    https://colab.research.google.com/drive/1nQcQVYyrcPsd6q_qW3WUAkpv618oIuc4

##**Gender-speech-duration-calculator**
Here you will find a tool to calculate the percentage of time of female voice speech and male voice speech in a video/movie. You can either choose to paste a youtube link or you can upload your video. Install the program, choose one option and calculate the percentage of female and male speech.

##**Step 1: installation**
"""

#@title Install gender-speech-duration-calculator 
!apt-get install ffmpeg
!pip install inaSpeechSegmenter
!pip install youtube_dl
!pip install -qU ddsp
from google.colab import output
output.clear()

"""##**Step 2: upload/paste video**"""

#@title Option 1: paste your youtube link here
import youtube_dl
youtube_link= 'https://www.youtube.com/watch?v=UG_X_7g63rY&t=23s' #@param {type:"string"}
ydl_opts = {
    'format': 'bestaudio/best',
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'mp3',
        'preferredquality': '192',
    }],
    'outtmpl': 'audio.%(etx)s',
    'quiet': False
Esempio n. 14
0
    if not wrote_to_fifo:
        with open(fifo, 'w') as fifo_file:
            fifo_file.write('ignored\n')
    filtered_logfile = _timeouts_path()
    d.sendline('fuser -kw "{f}" ; rm -rf "{f}"'.format(f=filtered_logfile))
    d.expect(prompt)
    filter_script = _os.path.join(drive_dir, 'drive-filter.py')
    filter_cmd = (
        """nohup bash -c 'tail -n +0 -F "{}" | """
        """python3 {} > "{}" ' < /dev/null > /dev/null 2>&1 &""").format(
            dfs_log, filter_script, filtered_logfile)
    d.sendline(filter_cmd)
    d.expect(prompt)
    if 'ENABLE_DIRECTORYPREFETCHER' in _os.environ:
        d.sendline(
            """nohup bash -c '{d}/directoryprefetcher_binary -mountpoint={mnt}' """
            """>> {log} 2>&1 &""".format(d=drive_dir,
                                         mnt=mountpoint,
                                         log=_os.path.join(
                                             _logs_dir(), 'dpb.txt')))
        d.expect(prompt)
    d.sendline('disown -a')
    d.expect(prompt)
    d.sendline('exit')
    assert d.wait() == 0
    _output.clear(wait=True, output_tags='dfs-auth-dance')
    print('Mounted at {}'.format(mountpoint))


mount._DEBUG = False  # pylint:disable=protected-access
Esempio n. 15
0
def main():

    window_size = 6
    batch_size = 16
    model_name = 'skipgram-win12'
    ckpt_dir = '/content/gdrive/My Drive/files/{0}/{0}'.format(model_name)
    ckpt_dir = ckpt_dir + '-ep{}.ckpt'

    text, dictionary = util.nltk2data(brown, save_dict=False, remove_punc=True)
    train_inps, train_labels, eval_inps, eval_labels = util.data_dicer(
        text,
        dictionary,
        window_size,
        batch_size,
        chopoff=True,
        train_eval_split=0.8)
    # TODO: placeholders for input and output (label)
    vocab_size = len(dictionary.keys())
    embed_dim = 512
    num_sampled = 10
    num_true = 2 * window_size
    epochs = 1
    current_epoch = 1

    x_ph = tf.placeholder(tf.int32, shape=(None, ), name='x_ph')
    target_ph = tf.placeholder(tf.int32,
                               shape=(None, num_true),
                               name='target_ph')

    # TODO: construct embedding_layers USING VARIABLE SCOPE!
    with tf.variable_scope('skipgram', reuse=tf.AUTO_REUSE):
        embedding_layer = tf.get_variable('embedding_layer',
                                          shape=(vocab_size, embed_dim),
                                          dtype=tf.float32,
                                          initializer=tf.constant_initializer(
                                              np.random.randn(
                                                  vocab_size, embed_dim)))

        output_weights = tf.get_variable('output_weight',
                                         shape=(vocab_size, embed_dim),
                                         dtype=tf.float32,
                                         initializer=tf.constant_initializer(
                                             np.random.randn(
                                                 vocab_size, embed_dim)))

        output_biases = tf.get_variable('output_bias',
                                        shape=(vocab_size, ),
                                        dtype=tf.float32,
                                        initializer=tf.zeros_initializer())

    # TODO: map the word using tf.nn.lookup
    # center_word shape = [batch_size, embed_dim]
    center_word = tf.nn.embedding_lookup(embedding_layer, x_ph)

    # TODO: Check which mode, either training or eval
    # TODO: Training, calculate NCE loss
    train_loss = tf.nn.nce_loss(weights=output_weights,
                                biases=output_biases,
                                labels=target_ph,
                                inputs=center_word,
                                num_true=num_true,
                                num_sampled=num_sampled,
                                num_classes=vocab_size)
    train_batch_loss = tf.reduce_mean(train_loss)
    opt = tf.train.AdamOptimizer().minimize(train_batch_loss)

    # TODO: eval, calculate manually? refers to tensorflow guide.
    # output_weights.shape = [vocab_size, dim]
    # center_word.shape = [batch_size, dim]
    # matmul, output_weights

    # projections = tf.matmul(center_word, tf.transpose(output_weights)) + output_biases
    # eval_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=projections, labels=tf.squeeze(target_ph))
    # eval_batch_loss = tf.reduce_mean(eval_loss)

    saver = tf.train.Saver(tf.trainable_variables())

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        if current_epoch > 1:
            saver.restore(sess, ckpt_dir.format(current_epoch - 1))
        start_time = time.time()
        print("=" * 80)
        print("=" * 80)
        print("Start time: {}".format(
            time.strftime('%Y/%m/%d, %H:%M%S', time.localtime(start_time))))
        print("=" * 80)
        print("=" * 80)
        for i in range(epochs):
            epoch_start_time = time.time()
            print("Epoch: ", i + 1)

            epoch_loss = []
            # Do it over batches
            # 1. stats
            batch_number = 0
            seconds_per_batch = 0
            total_batch = len(train_inps)
            for batch_inps, batch_labels in zip(train_inps, train_labels):
                batch_start_time = time.time()
                batch_number = batch_number + 1
                left_over_time = round(
                    (total_batch + 1 - batch_number) * seconds_per_batch, 3)
                output.clear(output_tags=('batch_print'), wait=True)
                with output.use_tags('batch_print'):
                    print(
                        "Current batch:{}/{}\tSeconds per batch: {}s, {}s left."
                        .format(batch_number, total_batch + 1,
                                seconds_per_batch, left_over_time))

                feed_dict = {x_ph: batch_inps, target_ph: batch_labels}
                _, batch_loss_v = sess.run([opt, train_batch_loss],
                                           feed_dict=feed_dict)
                epoch_loss.append(batch_loss_v)
                batch_end_time = time.time()
                seconds_per_batch = round(batch_end_time - batch_start_time, 3)
                # TODO: averaging epoch_loss
            feed_dict = {x_ph: eval_inps, target_ph: eval_labels}
            [eval_loss_v] = sess.run([train_batch_loss], feed_dict=feed_dict)

            epoch_loss = np.mean(epoch_loss)
            # TODO: print train loss, print eval loss
            epoch_end_time = time.time()
            epoch_duration = epoch_end_time - epoch_start_time
            print("Epochs took: {}s".format(round(epoch_duration, 3)))
            print("Train Loss: {}\tEval Loss: {}".format(
                round(epoch_loss, 3), round(eval_loss_v, 3)))

        end_time = time.time()
        duration = end_time - start_time
        print("=" * 80)
        print("=" * 80)
        print("End time: {}".format(
            time.strftime('%Y/%m/%d, %H:%M%S', time.localtime(end_time))))
        print("Duration: {}s".format(round(duration, 2)))
        print("=" * 80)
        print("=" * 80)
        saver.save(sess, ckpt_dir.format(current_epoch))
Esempio n. 16
0
def main(data_dir, query_condition, ex_list=None):
    dem_dir = os.path.join(data_dir, "dem")
    dembz_dir = os.path.join(data_dir, "dembz")
    log_dir = os.path.join(data_dir, "log")
    csv_dir = os.path.join(data_dir, "csv")

    last_match_id = ""
    if ex_list is None:
        exist_list = [fn.name.split("_")[0] for fn in list(Path(csv_dir).glob("*.csv"))]
    else:
        exist_list = ex_list
    while True:
        matches_json = get_match_id(last_match_id)
        if len(matches_json)<=0:
            last_match_id = ""
            continue
        df_matches_all = pd.DataFrame(matches_json)
        last_match_id = df_matches_all['match_id'].values[-1]
        # matches_file = os.path.join(data_dir, "data_5236503318_8w.csv")
        # df_matches_all = pd.read_csv(matches_file)[20000:]
        df_matches = df_matches_all.query(query_condition).reset_index()
        
        from google.colab import output
        output.clear()
        
        for i in range(df_matches.shape[0]):
            print(i, "/",df_matches.shape[0])

            match_id = df_matches['match_id'][i]
            replay_json = get_replay_salt(match_id)
            cluster = replay_json[0]["cluster"]
            replay_salt = replay_json[0]["replay_salt"]
            done, filename = download_replay(match_id, cluster, replay_salt)
                
            if done:
                try:
                    t = time.time()
                    os.system('curl localhost:5600 --data-binary "@%s.dem" > %s.json | exit 1'%(filename, filename))
                    os.system("python ParseJsonRecord.py %s.json %s.csv"%(filename, filename))
                    dur = time.time()-t
                    print("P", dur)
                    df_matches.loc[i].to_json("%s.info"%filename)
                    
                    assert os.path.exists(filename+".dem.bz2")
                    assert os.path.exists(filename+".dem")
                    assert os.path.exists(filename+".json")
                    assert os.path.exists(filename+".csv")
                    assert os.path.exists(filename+".info")

                    # shutil.move(filename+".dem.bz2", os.path.join(dembz_dir, filename+".dem.bz2"))
                    # shutil.move(filename+".dem", os.path.join(dem_dir, filename+".dem"))
                    # shutil.move(filename+".json", os.path.join(log_dir, filename+".json"))
                    
                    os.remove(filename+".dem.bz2")
                    os.remove(filename+".dem")
                    os.remove(filename+".json")

                    shutil.move(filename+".info", os.path.join(csv_dir, filename+".info"))
                    shutil.move(filename+".csv", os.path.join(csv_dir, filename+".csv"))
                    exist_list.append(match_id)
                    print("saved", filename)
                except:
                    exist_list.append(match_id)
                    print("save failed", filename)
                    raise
            val = random.random()
            if val > 1 - SKIP_PERCANTAGE:
                continue

    # Get the tokens and remove words that are too short
    tokens = []
    try:
        tokens = row['cleaned_Data_Content'].split(' ')
        tokens = [token for token in tokens if len(token) > 3]
    except:
        print('A book failed')

    books.append(tokens)
    genres.append(genre)

    output.clear('status_text')
    with output.use_tags('status_text'):
        print('Books loaded: ' + str(count))
    count += 1

genres = np.array(genres)
books = np.array(books)

# Additional data processing to optimize the topics generated

commonWords = []

with open("/content/drive/My Drive/ATIML/common.txt") as common:
    for word in common:
        commonWords.append(word[:-1])
Esempio n. 18
0
  def train(self, fine_tuning = False):
    self._check_required_models()
    start_train_wall_time = datetime.datetime.now()
    for callback in self._tensorboard_callback_dict.values():
      callback.on_train_begin()
    gpus = tf.config.experimental.list_physical_devices('GPU')
    n_gpus = len(gpus)
    if self._verbose: print('This machine has %i GPUs.' % n_gpus)

    # containers for losses
    lc = Container(); self.lc = lc
    if self._load_model_at_path is not None:
      # Load model
      self.load( self._load_model_at_path )
      # Load loss record
      self._fill_training_container( self._load_model_at_path, lc )
      if self._load_model_at_path != self._save_model_at_path:
        # Save a model copy with previous best validation so that if this
        # training session is useless, we can recover previous best model
        self.save( overwrite = True, val = True )
    else:
      lc.surrogate_loss_record = {k : [] for k in self._surrogate_lkeys}
      lc.train_perf_record = {}; lc.val_perf_record = {}
      lc.best_epoch = lc.best_step = lc.last_progress_step = 0; lc.best_val_reco = np.finfo( dtype = np.float32 ).max
      lc.p_sample_period = -np.inf; lc._history_cur_batch_samples = 0;
      lc.step = lc.epoch = 0
      lc.prev_train_time = datetime.timedelta()
      lc.total_performance_measure_time = datetime.timedelta()
    last_print_cycle = -1; last_save_cycle = 0
    skipFinalPerfEval = is_new_print_cycle = False;
    exc_type = exc_val =  None
    # When fine tuning, we need to reset the validation dataset statistics
    if fine_tuning:
      lc.best_val_reco = np.finfo( dtype = np.float32 ).max

    train_perf_dict = {}
    val_perf_dict = {}
    lc.first_step = True
    lc.session_performance_measure_time = datetime.timedelta()
    lc.last_performance_measure_time = datetime.timedelta()
    lc.first_step_measure_time = datetime.timedelta()
    lc.session_step = 0
    lc.session_epoch = 0
    n_measurements = 0

    try:
      while (lc.epoch < self._max_epoches if self._max_epoches else True):
        alreadyPrintedEpoch = alreadySavedEpoch = False
        for sample_batch in self.data_sampler.training_sampler:
          with DelayedKeyboardInterrupt():
            if self._log_models_in_tensorboard:
              for callback in self._tensorboard_callback_dict.values():
                callback.on_epoch_begin(epoch = lc.step)
                callback.on_train_batch_begin(batch = lc.step)
            evaluatedPerf = False
            # TODO To measure performance on purely initialize sample, simply run
            # below without running self._train_base and without incrementing
            # lc.step
            if self.sample_parser_fcn is not None:
              #self.data_sampler.plot(data_samples = sample_batch, do_display = True)
              sample_batch = self.sample_parser_fcn(sample_batch)
            #print("Running first train step")
            surrogate_loss_dict = self._train_base(lc.epoch, lc.step, sample_batch)
            #print("Finished computing and updating one train step")
            lc.step += 1; lc.session_step += 1
            start_performance_measure = datetime.datetime.now()
            surrogate_loss_dict = self._parse_surrogate_loss( surrogate_loss_dict )
            surrogate_loss_dict['step'] = lc.step
            # Keep track of training record:
            # TODO This should be integrated in the meters, i.e. compute loss only if passing constraint below
            c_sample_period = np.log10( lc.step ) // self._log_sampling_period
            if c_sample_period  > lc.p_sample_period:
              lc.p_sample_period = c_sample_period
              lc._history_cur_batch_samples = 0
            if lc._history_cur_batch_samples < self._history_max_batch_samples:
              # NOTE handle_new_loss_step keeps track of what is plot/logged
              # during training
              # TODO This should be integrated with the meter framework
              #print("Keeping track of surrogate loss")
              with self._surrogate_summary_writer.as_default(step = lc.step) as writer:
                self._handle_new_loss_step( lc.surrogate_loss_record, surrogate_loss_dict
                                          , keys = self._surrogate_lkeys )
                for fcn in self._other_surrogate_logging_fcns:
                  fcn(surrogate_loss_dict)
            # Compute efficiency
            if ( not(lc.step % self._n_performance_measure_steps) or lc.step == 1):
              n_measurements += 1
              #print("Computing train dataset performance")
              #if lc._history_cur_batch_samples < self._history_max_batch_samples:
              train_perf_dict = self.performance_measure_fcn(
                  sampler_ds = self.data_sampler.evaluation_sampler_from_train_ds,
                  meters = self._train_perf_meters)
              train_perf_dict['step'] = lc.step
              self.lc.last_train_perf_step = lc.step
              with self._train_perf_summary_writer.as_default(step = lc.step) as writer:
                self._handle_new_loss_step(lc.train_perf_record, train_perf_dict)
                for fcn in self._other_train_perf_logging_fcns:
                  fcn(train_perf_dict)
              # Compute performance for validation dataset (when available)
              if self.data_sampler.has_val_ds:
                #print("Computing val dataset performance")
                val_perf_dict = self.performance_measure_fcn(
                    sampler_ds = self.data_sampler.evaluation_sampler_from_val_ds,
                    meters = self._val_perf_meters )
                val_perf_dict['step'] = lc.step
                self.lc.last_val_perf_step = lc.step
                evaluatedPerf = True
                with self._val_perf_summary_writer.as_default(step = lc.step) as writer:
                  # TODO Reminder handle new loss keeps track of what is plot/logged
                  # during training. Should be integrated with meter framework
                  self._handle_new_loss_step(lc.val_perf_record, val_perf_dict )
                  for fcn in self._other_val_perf_logging_fcns:
                    fcn(val_perf_dict)
                # Early stopping algo: Keep track of best model so far
                #print("Computing early stopping")
                if bool(self.early_stopping_key) and val_perf_dict[self.early_stopping_key] < lc.best_val_reco:
                  if lc.best_val_reco - val_perf_dict[self.early_stopping_key] > self._min_progress:
                    lc.last_progress_step = lc.step
                  lc.best_val_reco = val_perf_dict[self.early_stopping_key]
                  lc.best_step = lc.step; lc.best_epoch = lc.epoch 
                  self.save( overwrite = True, val = True )
                # Check whether to break due to 
                if ( lc.step - lc.last_progress_step ) >= self._max_fail:
                  raise BreakDueToMaxFail()
            # End of efficiency computation
            # Performed one model update step
            # Compute training time
            lc.session_train_time = datetime.datetime.now() - start_train_wall_time
            print_cycle = int( lc.session_train_time / self._print_interval_wall_time ) if self._print_interval_wall_time is not None else 0
            is_new_print_cycle = print_cycle > last_print_cycle
            # Compute performance measurement time:
            stop_performance_measure = datetime.datetime.now()
            this_step_performance_measure_time = stop_performance_measure - start_performance_measure
            if evaluatedPerf:
              lc.total_performance_measure_time += this_step_performance_measure_time
              lc.session_performance_measure_time += this_step_performance_measure_time
              lc.last_performance_measure_time = this_step_performance_measure_time
              if lc.first_step:
                lc.first_step_measure_time = this_step_performance_measure_time
            # Print/plot loss
            if ((self._verbose or self._online_train_plot) and 
                  (
                    (not(lc.step % self._print_interval_steps) if self._print_interval_steps is not None else False) 
                    or ((not(lc.epoch % self._print_interval_epoches)  if self._print_interval_epoches is not None else False) and not(alreadyPrintedEpoch) )
                    or is_new_print_cycle
                  )
                ):
              #print("Proceeding to printing")
              last_improvement = { 'best_val_reco' : lc.best_val_reco
                                 , 'best_step' : lc.best_step
                                 , 'last_progress_step' : lc.last_progress_step } if val_perf_dict else {}
              if self._online_train_plot:
                try:
                  from google.colab import output
                  output.clear()
                except ImportError:
                  from IPython.display import clear_output
                  clear_output(wait = True)
                plt.close('all')
                self.plot_surrogate_progress( do_display = True )
                self.plot_performance_progress( do_display = True )
                for fcn in self._online_plot_fcns:
                  fcn()
              if self._verbose: 
                self._replace_nans_with_last_report( surrogate_loss_dict, lc.surrogate_loss_record )
                self._print_progress( lc.epoch, lc.session_epoch, lc.step, lc.session_step
                                    , lc.prev_train_time, lc.session_train_time, lc.session_performance_measure_time
                                    , lc.last_performance_measure_time
                                    , lc.first_step_measure_time
                                    , n_measurements
                                    , surrogate_loss_dict, train_perf_dict, val_perf_dict, last_improvement )
              if not(lc.epoch % self._print_interval_epoches) if self._print_interval_epoches is not None else False:
                alreadyPrintedEpoch = True
              if is_new_print_cycle:
                last_print_cycle = print_cycle
                is_new_print_cycle = False
            # Finished printing
            # Check whether we have finished training
            if self._max_steps is not None and (lc.step > self._max_steps):
              raise BreakDueToUpdates()
            if self._max_train_wall_time is not None and (lc.session_train_time > self._max_train_wall_time):
              raise BreakDueToWallTime()
            # No halt requested. Increament counters
            if lc._history_cur_batch_samples < self._history_max_batch_samples:
              lc._history_cur_batch_samples += 1
            save_cycle = int( lc.session_train_time / self._save_interval_wall_time ) if self._save_interval_wall_time is not None else 0
            is_new_save_cycle = save_cycle > last_save_cycle
            # Save progress. Note that this save is not due to early stopping,
            # but rather to allow recovering current weights regardless of
            # training status
            if (
                (not(lc.step % self._save_interval_steps) if self._save_interval_steps is not None else False) 
                or ((not(lc.epoch % self._save_interval_epoches)  if self._save_interval_epoches is not None else False) and not(alreadySavedEpoch) )
                or is_new_save_cycle
               ):
              #print("Saving progress")
              loss_data = { 'surrogate_loss_record' : lc.surrogate_loss_record
                          , 'train_perf_record' : lc.train_perf_record
                          , 'val_perf_record' : lc.val_perf_record }
              self.save( overwrite = True
                  , loss_data = loss_data
                  , locals_data = lc )
              if not(lc.epoch % self._save_interval_epoches) if self._save_interval_epoches is not None else False:
                alreadySavedEpoch = True
              if is_new_save_cycle:
                last_save_cycle = save_cycle
                is_new_save_cycle = False
            if self._log_models_in_tensorboard:
              for callback in self._tensorboard_callback_dict.values():
                callback.on_epoch_end(epoch = lc.step)
                callback.on_train_batch_end(batch = lc.step, logs = val_perf_dict)
            if ( not(lc.step % self._n_performance_measure_steps) or lc.step == 1):
              self._update_writer_file(self._surrogate_summary_writer)
              self._update_writer_file(self._train_perf_summary_writer)
              self._update_writer_file(self._val_perf_summary_writer)
            lc.first_step = False
          # end of step, send delayed keyboard interrupt and allow interruptions
        lc.epoch += 1; lc.session_epoch += 1
        # Performed a full pass through training dataset
      raise BreakDueToEpoches
    except BaseException as e:
      exc_type, exc_val = sys.exc_info()[:2]
    finally:
      if isinstance( exc_val, InterruptTraining):
        print('Training finished!')
        interruptTraining = True
        if isinstance( exc_val, BreakDueToMaxFail ):
          # Recover best validation result
          print('Reason: early stopping.')
          print('Recovering Best Validation Performance @ (Epoch %i, Step %i).' % (lc.best_epoch, lc.best_step,))
          print('Reco_loss: %.3f.' % lc.best_val_reco)
          self.load( self._save_model_at_path, val = True )
          skipFinalPerfEval = True
        elif isinstance( exc_val, BreakDueToUpdates ):
          print('Reason: max steps.')
        elif isinstance( exc_val, BreakDueToEpoches ):
          print('Reason: max epoches.')
        elif isinstance( exc_val, BreakDueToWallTime):
          print('Reason: reached wall time limit.')
      # Other non-critical interruptions
      elif isinstance( exc_val, KeyboardInterrupt):
        interruptTraining = True
        print('Training finished!')
        print('Reason: user interrupted training.')
      # Critical interruptions
      elif isinstance( exc_val, (TrainingCriticalAbort, BaseException) ):
        print('ERROR: Training aborted!')
        if isinstance( exc_val, BreakDueToNonFinite ):
          print('Reason: found non-finite value!!')
        raise exc_val
      if self.data_sampler.has_val_ds:
        if not skipFinalPerfEval:
          if not evaluatedPerf:
            with self._train_perf_summary_writer.as_default(step = lc.step) as writer:
              train_perf_dict = self.performance_measure_fcn( 
                  sampler_ds = self.data_sampler.evaluation_sampler_from_train_ds,
                  meters = self._train_perf_meters
              )
              train_perf_dict['step'] = lc.step
              self._handle_new_loss_step(lc.train_perf_record, train_perf_dict )
            self._update_writer_file(self._train_perf_summary_writer)
            with self._val_perf_summary_writer.as_default(step = lc.step) as writer:
              val_perf_dict = self.performance_measure_fcn( 
                  sampler_ds = self.data_sampler.evaluation_sampler_from_val_ds,
                  meters = self._val_perf_meters
              )
              val_perf_dict['step'] = lc.step
              self._handle_new_loss_step(lc.val_perf_record, val_perf_dict )
            self._update_writer_file(self._val_perf_summary_writer)
          if bool(self.early_stopping_key): 
            if ( lc.step == lc.best_step or val_perf_dict[self.early_stopping_key] < lc.best_val_reco ):
              lc.best_val_reco = val_perf_dict[self.early_stopping_key]
              lc.best_step = lc.step; lc.best_epoch = lc.epoch 
            else:
              print('Validation Performance @ (Epoch %i, Step %i): %f.' % (lc.epoch, lc.step, val_perf_dict[self.early_stopping_key]))
              print('Recovering Best Validation Performance @ (Epoch %i, Step %i).' % (lc.best_epoch, lc.best_step,))
              print('Reco_loss: %.3f.' % (lc.best_val_reco))
              self.load(  self._save_model_at_path, val = True )
    self.save( overwrite = True, locals_data = lc )
    for callback in self._tensorboard_callback_dict.values():
      callback.on_train_end()
    # Compute final performance:
    final_performance = {}
    final_performance['train'] = self.performance_measure_fcn(
        sampler_ds = self.data_sampler.evaluation_sampler_from_train_ds,
        meters = self._train_perf_meters )
    if self.data_sampler.has_val_ds:
      final_performance['val'] = self.performance_measure_fcn(
          sampler_ds = self.data_sampler.evaluation_sampler_from_val_ds,
          meters = self._val_perf_meters )
      final_performance['val']['best_step'] = lc.best_step
      final_performance['val']['best_epoch'] = lc.best_epoch
    else:
      final_performance['val'] = dict()
    loss_data = { 'surrogate_loss_record' : lc.surrogate_loss_record
                , 'train_perf_record' : lc.train_perf_record
                , 'val_perf_record' : lc.val_perf_record
                , 'final_performance' : final_performance }
    self.save( save_models_and_optimizers = False
             , loss_data = loss_data)
    return loss_data
Esempio n. 19
0
IS_TPU = False
if os.environ.get('IS_COLAB', False):
    from google.colab import output
    try:
        with output.use_tags('setup'):
            #  !pip install convokit
            #  !python3 -m spacy download en
            tpu = tf.distribute.cluster_resolver.TPUClusterResolver(
            )  # TPU detection
            print('Running ovariable_namen TPU ',
                  tpu.cluster_spec().as_dict()['worker'])
            tf.config.experimental_connect_to_cluster(tpu)
            tf.tpu.experimental.initialize_tpu_system(tpu)
            tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)
            IS_TPU = True
        output.clear(output_tags='setup')

    except ValueError:
        logging.info('Not connected to a TPU runtime')


def make_model(tokenizer=None,
               num_layers=2,
               units=512,
               d_model=256,
               num_heads=8,
               dropout=0.1,
               max_length=32,
               warmup_steps=4000):

    logging.info('Compiling model.')
        var iddoc=document.getElementById('colorized');
        iddoc.style.color=d3.interpolate"""+cmap+"""((now%60000)/60000);},1)
    </script>"""
    display(HTML(html_str))
# %cmap_header CODE MODULES & HELPFUL TOOLS|24|Ewert|Turbo

"""#✒️ Tagged Outputs"""

import sys,time

print('The process is starting')
with output.use_tags('tagged_outputs'):
    for el in ['working \n','=> => => \n','still working \n']:
        sys.stdout.write('working \n')
        sys.stdout.flush(); time.sleep(5)
output.clear(output_tags='tagged_outputs')
print('Outputs have cleared')

"""#✒️ Linked Outputs"""

# Commented out IPython magic to ensure Python compatibility.
# %%javascript
# const listener=new BroadcastChannel('channel1');
# listener.onmessage=(msg)=>{
#   const div=document.createElement('div');
#   div.textContent=msg.data;
#   div.style.border='double white';
#   div.style.width='20%'; 
#   div.style.padding='20px';
#   document.body.appendChild(div); };