Exemple #1
0
    def upload_data_to_kaggle(self):
        files = []
        for output_folder in self.config.output_folder:
            t.log_message('Uploading an output folder to the Kaggle: ' +
                          output_folder)
            for item in os.listdir(output_folder):
                path = os.path.join(output_folder, item)
                if os.path.isfile(path):
                    token = self.upload_file_to_kaggle(path)
                    files.append({'token': token})
                elif os.path.isdir(path) and self.kaggle_dirmode in [
                        'zip', 'tar'
                ]:
                    temp_dir = tempfile.mkdtemp()
                    try:
                        _, dir_name = os.path.split(path)
                        archive_path = shutil.make_archive(
                            os.path.join(temp_dir, dir_name),
                            self.kaggle_dirmode, path)
                        token = self.upload_file_to_kaggle(archive_path)
                        files.append({'token': token})
                    finally:
                        shutil.rmtree(temp_dir)
            t.log_message(output_folder + ' - uploaded.')

        dataset = self.prepare_dataset(files)
        self.kaggle_api_call(resource='/datasets/create/new',
                             method='POST',
                             body=dataset)
Exemple #2
0
 def upload_file_to_kaggle(self, file_path: str):
     file_token = None
     try:
         file_name = os.path.basename(file_path)
         content_length = os.path.getsize(file_path)
         last_modified_date_utc = int(os.path.getmtime(file_path))
         post_params = [('fileName', file_name)]
         kaggle_response = self.kaggle_api_call(
             resource='/datasets/upload/file/' + str(content_length) + '/' +
             str(last_modified_date_utc),
             method='POST',
             post_params=post_params)
         kaggle_data = json.loads(kaggle_response.data.decode('utf8'))
         create_url = kaggle_data['createUrl']
         with io.open(file_path, 'rb', buffering=0) as fp:
             reader = io.BufferedReader(fp)
             session = requests.Session()
             retries = Retry(total=10, backoff_factor=0.5)
             adapter = HTTPAdapter(max_retries=retries)
             session.mount('http://', adapter)
             session.mount('https://', adapter)
             response = session.put(create_url, data=reader)
             if response.status_code == 200 or response.status_code == 201:
                 file_token = kaggle_data['token']
         if file_token is None:
             t.log_message('Upload unsuccessful: ' + file_path)
     except Exception as error:
         t.log_message('Upload filed: ' + file_path + '\n' + str(error))
     return file_token
Exemple #3
0
 def start_auth_telegram(self, client_config):
     if self.telegram_bot is None:
         t.log_message('telegram bot is None. Telegram auth canceled.')
         return
     auth = GoogleAuth()
     auth.LoadClientConfigFile(client_config_file=client_config)
     if auth.flow is None:
         auth.GetFlow()
     auth.flow.redirect_uri = OOB_CALLBACK_URN
     self.telegram_bot.send_message(
         'Please go to the following link in your browser and send me a Google verification code. \nAuth url: '
         + auth.GetAuthUrl())
     dirty = False
     code = None
     save_credentials = auth.settings.get('save_credentials')
     if auth.credentials is None and save_credentials:
         auth.LoadCredentials()
     if auth.credentials is None:
         code = self.telegram_bot.get_code()
         dirty = True
     else:
         if auth.access_token_expired:
             if auth.credentials.refresh_token is not None:
                 auth.Refresh()
             else:
                 code = self.telegram_bot.get_code()
             dirty = True
     if code is not None:
         auth.Auth(code)
     if dirty and save_credentials:
         auth.SaveCredentials()
     return auth
Exemple #4
0
 def run(self, run_file_list: list):
     for item in run_file_list:
         if not os.path.exists(item):
             t.log_message('ERROR. File not found: ' + item)
             continue
         t.log_message("Executing file: " + item)
         os.system('python ' + item)
Exemple #5
0
 def send_output_to_gdrive(self, output_folders: list, drive_folders: list):
     if self.gauth is None and self.config.gdrive_folders is not None:
         t.log_message('GoogleDrive is unauthorised. Upload canceled.')
         return
     drive = GoogleDrive(self.gauth)
     t.log_message('Uploading an output folders to the Google Drive')
     for drive_folder in drive_folders:
         for folder in output_folders:
             self.upload_to_drive(folder, drive_folder, drive)
Exemple #6
0
 def download_list(self, url_list: list):
     for item in url_list:
         t.log_message('Downloading: ' + item)
         try:
             download = Download(item, retries=5)
             download.download()
             path = os.path.abspath(download.download_path)
             _, extension = os.path.splitext(path)
             if extension[1:] in dict(shutil.get_archive_formats()).keys(
             ) and self.config.extract_archives:
                 shutil.unpack_archive(path)
         except Exception as e:
             t.log_message("ERROR. Download: " + item + ' FAILED.\n' +
                           str(e))
Exemple #7
0
 def build_workspace(self):
     if self.config is None:
         return
     self.gauth = self.get_gauth()
     if self.config.dataset_list is not None:
         self.download_list(self.config.dataset_list)
     if self.config.repos is not None:
         self.clone_repos(self.config.repos)
     if self.config.script_files is not None:
         self.run(self.config.script_files)
     if self.config.gdrive_folders is not None:
         self.send_output_to_gdrive(self.config.output_folder,
                                    self.config.gdrive_folders)
     if self.config.kaggle is not None:
         self.upload_data_to_kaggle()
         if 'dirmode' in self.config.kaggle:
             self.kaggle_dirmode = self.config.kaggle['dirmode']
     t.log_message('Done.')
     if self.telegram_bot is not None:
         self.telegram_bot.send_message('Workspace build done.')
Exemple #8
0
 def get_gauth(self):
     gauth = None
     packge_path, _ = os.path.split(__file__)
     client_config = os.path.join(packge_path, 'client_secrets.json')
     credentials_file = os.path.join(packge_path, 'drive_credentials')
     if os.path.exists(credentials_file):
         try:
             gauth = GoogleAuth()
             gauth.LoadClientConfigFile(client_config_file=client_config)
             gauth.LoadCredentialsFile(credentials_file=credentials_file)
             return gauth
         except Exception as e:
             t.log_message(str(e))
             gauth = None
     if self.config.gdrive_folders is not None and self.config.telegram_channels is not None and self.telegram_bot is not None:
         try:
             gauth = self.start_auth_telegram(client_config=client_config)
             gauth.SaveCredentialsFile(credentials_file=credentials_file)
         except Exception as e:
             t.log_message(str(e))
             gauth = None
     elif self.config.gdrive_folders is not None and self.telegram_bot is None or self.config.telegram_channels is None:
         try:
             gauth = GoogleAuth()
             gauth.LoadClientConfigFile(client_config_file=client_config)
             gauth.CommandLineAuth()
             gauth.SaveCredentialsFile(credentials_file=credentials_file)
         except Exception as e:
             t.log_message(str(e))
             gauth = None
     return gauth
Exemple #9
0
 def clone_repos(self, repos: dict):
     for repo_name, repo_data in repos.items():
         branch: str = None
         if 'branch' in repo_data:
             branch = repo_data['branch']
         if 'url' in repo_data:
             url: str = repo_data['url']
             if os.path.exists(repo_name):
                 shutil.rmtree(repo_name)
             if branch is not None:
                 t.log_message('Cloning repo: ' + url + ', branch: ' +
                               branch + ', to the folder: ' + repo_name)
                 Repo.clone_from(url=url, to_path=repo_name, branch=branch)
             else:
                 t.log_message('Cloning repo: ' + url +
                               ', to the folder: ' + repo_name)
                 Repo.clone_from(url=url, to_path=repo_name)
         else:
             t.log_message('ERROR. URL not found for a repo: ' + repo_name)