Ejemplo n.º 1
0
    def _observe_iteration_results(self,
                                   num_it: int,
                                   epoch: int,
                                   loss,
                                   mix,
                                   est_data,
                                   gt_data,
                                   aux_data: AuxData,
                                   aux_loss=None):

        est_masks, est_labels = self._get_estimations(est_data)
        f1 = None  # multi-task legacy

        iteration = num_it + (len(self.loader.dataset) //
                              self.loader.batch_size) * epoch
        if not (iteration % self.exp_config.evaluation_steps):
            wav_predicted, wav_gt = reconstruct_with_masks(
                est_masks=est_masks,
                stft_all=aux_data.stft,
                log_resample=self.exp_config.log_resample,
                stft_frame=self.exp_config.stft_frame)
            sdr = -self.metrics(wav_predicted, wav_gt)
            masks = (gt_data, est_masks)
            logger.info(f'Iteration #{iteration} si-sdr: {sdr:.4f}')
        else:
            sdr, masks, wav_predicted = None, None, None

        self._write_summary(iteration,
                            loss.item(),
                            self.optimizer.param_groups[-1]['lr'],
                            sdr,
                            masks,
                            wav_predicted,
                            aux_loss=aux_loss,
                            aux_metrics=f1)
Ejemplo n.º 2
0
def spider(urls, message_file_path):
    '''Sina WeiBo爬虫 '''

    ## 启动driver, 设置cookie
    if __is_update_cookie():
        driver = __get_and_set_cookie()
    else:
        cookie = __load_cookie()
        driver = __set_cookie(cookie)

    for url in urls:
        time.sleep(5)  # 等待加载
        try:
            driver = __grasp(driver, url)
        except:
            logger.warning(f'[chrome] failed to grasp html from {url} !!')

        try:
            message = __item(driver.page_source)

            with open(message_file_path, 'a+', encoding='utf-8') as file:
                file.writelines(message)
        except:
            logger.warning(f'[chrome] failed to parse html in {url} !!')

    driver.quit()
    logger.info('[chrome] close chrome...')
Ejemplo n.º 3
0
def __laungh_driver():
    ''' 启动chrome'''
    driver = webdriver.Chrome('./chromedriver', )
    driver.set_window_size(1440, 960)  # 设置浏览器界面
    driver.get("https://weibo.com/")  # 打开微博登录页面
    time.sleep(10)  # 等待页面加载
    logger.info('[chrome] start successfully...')
    return driver
Ejemplo n.º 4
0
def __set_cookie(cookies):
    ''' 启动浏览器并设置cookie'''
    driver = __laungh_driver()

    ## 删除并重新设置cookie
    driver.delete_all_cookies()
    for ii in cookies:
        driver.add_cookie(ii)
    logger.info('[cookie] load local cookies...')
    return driver
Ejemplo n.º 5
0
    def run_phase(self, epoch: int):
        running_loss = 0.0
        aux_loss_weight = 1e+5

        for num_it, (inputs, gt_data, *aux_data) in enumerate(self.loader):
            # zero the parameter gradients
            self.optimizer.zero_grad()

            mix, gt_data, visual_or_labels, aux_data = self._input_preprocessing(
                inputs, gt_data, aux_data)

            # forward
            # track history if only in train
            with torch.set_grad_enabled(self._is_training()):
                outputs = self.model(
                    mix, visual_or_labels
                ) if self.model.conditioned else self.model(mix)
                loss = self.criterion(outputs, gt_data)
                aux_loss = None
                total_loss = loss

                # backward + optimize only if in training phase
                if self._is_training():
                    total_loss.backward()
                    if self.exp_config.with_lstm:
                        nn.utils.clip_grad_norm_(self.model.parameters(), 0.5)
                    self.optimizer.step()
                    if (epoch == 0) and (num_it == 0):
                        self.optimizer = optim.Adam(self.model.parameters(),
                                                    lr=self.exp_config.init_lr,
                                                    weight_decay=0.0005)

                if aux_loss:
                    aux_loss = aux_loss / aux_loss_weight

            # statistics and outputs
            self._observe_iteration_results(num_it=num_it,
                                            epoch=epoch,
                                            loss=loss,
                                            mix=mix,
                                            est_data=outputs,
                                            gt_data=gt_data,
                                            aux_data=aux_data,
                                            aux_loss=aux_loss)
            running_loss += loss.item() * mix.size(0)

        epoch_loss = running_loss / len(self.loader.dataset)
        logger.info(f'{self.phase} loss: {epoch_loss:.4f}')

        return epoch_loss
Ejemplo n.º 6
0
def __get_and_set_cookie():
    ''' 启动浏览器,获取并保存cookie'''
    ## 启动chrome
    driver = __laungh_driver()

    ## 获取cookie
    driver.find_element_by_name("username").send_keys(username)  #   输入用户名
    driver.find_element_by_name("password").send_keys(password)  #   输入密码
    driver.find_element_by_xpath(
        "//a[@node-type='submitBtn']").click()  #   点击登录按钮
    time.sleep(10)  # 等待登录过程结束
    logger.info('[chrome] relogin ...')

    ## 获取并保存cookie
    time.sleep(5)  # 等待加载
    cookies = driver.get_cookies()  # 获取cookies
    __dump_cookie(cookies)  # 保存cookies
    logger.info('[cookie] save cookie to local...')
    return driver
Ejemplo n.º 7
0
    def train_model(self):
        """
        Full training pipeline function:
            - training
            - validation/validation with external metrics every 10 iteration
            - full tests every 100 epochs
        :return:
        """
        best_val_loss = np.inf
        curriculum_patience = 0

        for epoch in range(self.num_epochs):
            logger.info(f'Starting epoch {epoch}/{self.num_epochs}.')
            try:
                self._set_train()
                self.run_phase(epoch)

                self._set_eval()
                epoch_loss = self.run_phase(epoch)

                if epoch_loss < best_val_loss - 1e-4:
                    best_val_loss = epoch_loss
                    curriculum_patience = 0
                else:
                    curriculum_patience += 1

            except KeyboardInterrupt:
                torch.save(self.model.state_dict(),
                           os.path.join(self.cp_path, 'INTERRUPTED.pth'))
                logger.info('Saved interrupt')
                sys.exit(0)

            if self.exp_config.curriculum_training:
                if curriculum_patience > self.exp_config.curriculum_patience:
                    train_inc = self.train_loader.dataset.increase_n_mix()
                    self.val_loader.dataset.increase_n_mix()
                    if train_inc:
                        logger.info('Increased number of sources in a mixture')
                        curriculum_patience = 0
                        best_val_loss = np.inf
                    else:
                        self.scheduler.step(epoch_loss)
            else:
                self.scheduler.step(epoch_loss)

            if self.save_cp and not (epoch %
                                     self.exp_config.save_cp_frequency):
                torch.save(self.model.state_dict(),
                           os.path.join(self.cp_path, f'CP{epoch:04d}.pth'))
                logger.info(f'Checkpoint {epoch} saved !')
Ejemplo n.º 8
0
 def _write_summary(self,
                    iteration,
                    loss,
                    lr,
                    sdr=None,
                    masks=None,
                    audio=None,
                    aux_loss=None,
                    aux_metrics=None):
     self.writer.add_scalar(f'loss/{self.phase}', loss, iteration)
     logger.info(f'Iteration #{iteration} loss: {loss:.4f}')
     self.writer.add_scalar(f'lr/{self.phase}', lr, iteration)
     self.writer.add_scalar(f'n_max_sources/{self.phase}',
                            self.loader.dataset.n_mix_max, iteration)
     if sdr is not None:
         self.writer.add_scalar(f'sdr/{self.phase}', sdr, iteration)
     if masks is not None:
         gt_masks, predicted_masks = masks
         for ch_idx in range(gt_masks.shape[1]):
             self.writer.add_images(
                 f'gt_vs_predicted_mask_ch_{ch_idx+1}/{self.phase}',
                 torch.cat([
                     gt_masks[:, ch_idx, :, :],
                     predicted_masks[:, ch_idx, :, :]
                 ],
                           dim=2).unsqueeze_(1))
     if audio is not None:
         for piece_idx, source_idx in itertools.product(
                 range(audio.shape[0]), range(audio.shape[1])):
             self.writer.add_audio(
                 f'reconstructed_audio_{piece_idx}_{source_idx+1}/{self.phase}',
                 audio[piece_idx, source_idx],
                 sample_rate=self.exp_config.expected_sr)
     if aux_loss is not None:
         self.writer.add_scalar(f'aux_loss/{self.phase}', aux_loss,
                                iteration)
         self.writer.add_scalar(f'aux_f1/{self.phase}', aux_metrics,
                                iteration)
Ejemplo n.º 9
0
def rumMain(now):
    ''' 主程序'''
    t_bg = time.perf_counter()
    logger.info('[WeiBo System] start to run...')
    message_file_path = os.path.join(path_message, f'messeage_{now}.txt')

    spider(urls=[
        f'https://weibo.com/{ii}?is_all=1&stat_date={months}#feedtop'
        for ii in targets
    ],
           message_file_path=message_file_path)

    try:
        send_email(f'Weibo{now}', message_file_path)
        logger.info('[Email] send e-mail successfully...')
    except:
        logger.warning(f'[Email] failed to send {message_file_path}!!!')
    t_ed = time.perf_counter()
    logger.info(f'[WeiBo System] end to run, escape {t_ed - t_bg:.2f} secs...')