Ejemplo n.º 1
0
    def download_repo(self,
                      dest_dir: str,
                      commit_sha: str = None,
                      timeout: float = None) -> str:
        """Downloads the repository.

        Example:
            Assume the repository is named 'data-demo' owned by 'intrepiditee'.
            The method call
                download_repo('download_dir', '12ef23231a')
            returns 'download_dir/intrepiditee-data-demo-12ef23231a'.

        Args:
            dest_dir: Directory to download the repository into as a string.
            commit_sha: Commit ID that defines the version of the repository
                to download as a string. If not supplied, the master branch
                is downloaded.
            timeout: Maximum time downloading the repository can take in
                seconds, as a float. The actual timeout will be a rough
                approximation to this, likely several seconds larger.

        Returns:
            Path to a directory containing the downloaded repository,
            as a string. The repository's contents are downloaded and copied
            into the same directory structure within the returned directory.

        Raises:
            requests.Timeout: Downloading timed out.
        """
        logging.info(
            'GitHubRepoAPI.download_repo: '
            'Downloading repository %s at commit %s to %s',
            f'{self._format_repo_name()}', commit_sha, dest_dir)
        if not commit_sha:
            commit_sha = ''
        download_query = _GITHUB_DOWNLOAD_API.format_map({
            'owner_username':
            self.owner,
            'repo_name':
            self.repo,
            'commit_sha':
            commit_sha
        })

        with tempfile.TemporaryDirectory() as tmpdir:
            repo_tar = utils.download_file(download_query, tmpdir, timeout)
            logging.info('GitHubRepoAPI.download_repo: Downloaded tar %s',
                         repo_tar)
            with tarfile.open(repo_tar) as tar:
                files = tar.getnames()
                if not files:
                    raise FileNotFoundError(
                        'Downloaded tar file does not contain the repository')
                tar.extractall(dest_dir)
                repo_dir = os.path.join(dest_dir,
                                        _get_path_first_component(files[0]))
                logging.info(
                    'GitHubRepoAPI.download_repo: '
                    'Extracted repository %s', repo_dir)
                return repo_dir
Ejemplo n.º 2
0
def update_static_data():
    """
    Update static data (stops, routes, etc.)
    """
    static_zip = utils.download_file(STATIC_DATA_URL, dest_dir=DATA_DIR)

    utils.unzip_archive(static_zip, DATA_DIR)
Ejemplo n.º 3
0
 def _process_urls(self, image_url):
     """
     Downloading image from image_url, check if it's not already stored
     in directory and store it.
     :param image_url: image url that needs to be downloaded and stored
     :return:
     """
     image_name = pathlib.Path(image_url).name
     image = download_file(image_url, self.timeout)
     if image is not None:
         img_hash = hashlib.sha256(image).hexdigest()
         if img_hash not in self.hashes:
             store_file(image, self.path / image_name)
             self.hashes.update({img_hash})
         else:
             logging.info(
                 f'File has been downloaded previous time. Skipping... Image name: {image_name}'
             )
     else:
         logging.warning(
             f'Got None instead of image content from: {image_url}')
Ejemplo n.º 4
0
    def _import_one_helper(self,
                           repo_dir: str,
                           relative_import_dir: str,
                           absolute_import_dir: str,
                           import_spec: dict,
                           run_id: str = None,
                           attempt_id: str = None) -> None:
        """Helper for _import_one.

        Args:
            See _import_one.
            attempt_id: ID of the import attempt executed by the system run
                with the run_id, as a string. This is only used to communicate
                with the import progress dashboard.
        """
        urls = import_spec.get('data_download_url')
        if urls:
            for url in urls:
                utils.download_file(url, absolute_import_dir,
                                    self.config.file_download_timeout)
                if self.dashboard:
                    self.dashboard.info(f'Downloaded: {url}',
                                        attempt_id=attempt_id,
                                        run_id=run_id)

        with tempfile.TemporaryDirectory() as tmpdir:
            requirements_path = os.path.join(absolute_import_dir,
                                             self.config.requirements_filename)
            central_requirements_path = os.path.join(
                repo_dir, self.config.requirements_filename)
            interpreter_path, process = _create_venv(
                (central_requirements_path, requirements_path),
                tmpdir,
                timeout=self.config.venv_create_timeout)

            _log_process(process=process,
                         dashboard=self.dashboard,
                         attempt_id=attempt_id,
                         run_id=run_id)
            process.check_returncode()

            script_paths = import_spec.get('scripts')
            for path in script_paths:
                process = _run_user_script(
                    interpreter_path=interpreter_path,
                    script_path=os.path.join(absolute_import_dir, path),
                    timeout=self.config.user_script_timeout,
                    cwd=absolute_import_dir)
                _log_process(process=process,
                             dashboard=self.dashboard,
                             attempt_id=attempt_id,
                             run_id=run_id)
                process.check_returncode()

        inputs = self._upload_import_inputs(
            import_dir=absolute_import_dir,
            output_dir=f'{relative_import_dir}/{import_spec["import_name"]}',
            import_inputs=import_spec.get('import_inputs', []),
            attempt_id=attempt_id)

        if self.importer:
            self.importer.delete_previous_output(relative_import_dir,
                                                 import_spec)

            if self.dashboard:
                self.dashboard.info(
                    f'Submitting job to delete the previous import',
                    attempt_id=attempt_id,
                    run_id=run_id)
            self.importer.delete_import(
                relative_import_dir,
                import_spec,
                block=True,
                timeout=self.config.importer_delete_timeout)
            if self.dashboard:
                self.dashboard.info(f'Deleted previous import',
                                    attempt_id=attempt_id,
                                    run_id=run_id)
                self.dashboard.info(f'Submitting job to perform the import',
                                    attempt_id=attempt_id,
                                    run_id=run_id)
            self.importer.smart_import(
                relative_import_dir,
                inputs,
                import_spec,
                block=True,
                timeout=self.config.importer_import_timeout)
            if self.dashboard:
                self.dashboard.info(f'Import succeeded',
                                    attempt_id=attempt_id,
                                    run_id=run_id)

        if self.dashboard:
            self.dashboard.update_attempt(
                {
                    'status': 'succeeded',
                    'time_completed': utils.utctime()
                }, attempt_id)
Ejemplo n.º 5
0
def update_vehicle_position():
    """
    Update vehicle real-time data
    """
    utils.download_file(VEHICLE_POSITION_URL, dest_dir=DATA_DIR)