コード例 #1
0
    def test_md5(self):
        """
        """
        test_file = os.path.join(os.path.dirname(__file__), "fixtures",
                                 "links.md")
        # Local
        local_file = StorageFile(test_file)
        local_md5 = local_file.md5_hex
        self.assertIsNotNone(local_md5)

        # GCP
        gs_path = "gs://aries_test/links.md"
        local_file.copy(gs_path)
        gs_file = StorageFile(gs_path)
        self.assertEqual(local_md5, gs_file.md5_hex)
        gs_file.delete()

        # AWS
        if os.environ.get("AWS_SECRET_ACCESS_KEY") and os.environ.get(
                "AWS_ACCESS_KEY_ID"):
            s3_path = "s3://davelab-test/links.md"
            local_file.copy(s3_path)
            s3_file = StorageFile(s3_path)
            self.assertEqual(local_md5, s3_file.md5_hex)
            s3_file.delete()
コード例 #2
0
ファイル: utils.py プロジェクト: qiuosier/Cancer
def transfer_file(to_folder_uri, file_id=None, file_info_href=None):
    # Determine the file_id, file_info and file_content_href
    if file_id is not None:
        file_info_href = "v1pre3/files/%s" % file_id
        file_content_href = "v1pre3/files/%s/content" % file_id
    elif file_info_href is not None:
        file_id = file_info_href.strip("/").split("/")[-1]
        file_content_href = "%s/content" % file_info_href
    else:
        raise ValueError("Either BaseSpace file_id or file_info_href is needed for file transfer.")

    file_info = api_response(file_info_href)
    logger.debug("Transferring file from BaseSpace: %s" % file_content_href)

    # For FASTQ files, add basespace file ID to filename
    # Each MiSeq run may have multiple FASTQ files with the same name.
    filename = file_info.get("Name")
    if filename.endswith(".fastq.gz"):
        filename = filename.replace(".fastq.gz", "_%s.fastq.gz" % file_id)

    # Skip if a file exists and have the same size.
    to_uri = os.path.join(to_folder_uri, filename)
    dest_file = StorageFile(to_uri)
    file_size = file_info.get("Size")
    if file_size and dest_file.exists() and dest_file.size and dest_file.size == file_info.get("Size"):
        logger.debug("File %s exists at destination: %s" % (filename, to_uri))
        return to_uri
    from_uri = build_api_url(file_content_href)
    StorageFile(from_uri).copy(to_uri)
    return to_uri
コード例 #3
0
ファイル: atest_storage_gcp.py プロジェクト: parkerc71/Aries
    def test_gs_file(self):
        """Tests accessing a Google Cloud Storage file.
        """
        # Test the blob property
        # File exists
        gs_file_exists = StorageFile("gs://aries_test/file_in_root.txt")
        self.assertFalse(gs_file_exists.is_gz())
        self.assertTrue(gs_file_exists.blob.exists())
        self.assertEqual(gs_file_exists.size, 34)
        # File does not exists
        gs_file_null = StorageFile("gs://aries_test/abc.txt")
        self.assertFalse(gs_file_null.blob.exists())

        # Test the read() method
        self.assertEqual(gs_file_exists.read(), b'This is a file in the bucket root.')
        with self.assertRaises(Exception):
            gs_file_null.read()

        # Test write into a new file
        with gs_file_null('w+b') as f:
            f.write(b"abc")
            f.seek(0)
            self.assertEqual(f.read(), b"abc")

        # File will be uploaded to bucket after closed.
        # Test reading from the bucket
        self.assertEqual(gs_file_null.read(), b"abc")
        gs_file_null.delete()
コード例 #4
0
ファイル: atest_storage_gcp.py プロジェクト: parkerc71/Aries
 def test_create_and_move_blob(self):
     gs_file = StorageFile("gs://aries_test/new_file.txt")
     self.assertFalse(gs_file.blob.exists())
     gs_file.create()
     self.assertTrue(gs_file.blob.exists())
     dest = "gs://aries_test/moved_file.txt"
     gs_file.move(dest)
     self.assertFalse(gs_file.exists())
     dest_file = StorageFile(dest)
     self.assertTrue(dest_file.exists())
     dest_file.delete()
コード例 #5
0
    def get_file_size(self, path, job_name=None, **kwargs):

        # Ignore local paths
        if self.__get_file_protocol(path) == "Local":
            logging.warning(f"Ignoring path '{path}' as it is local on the disk image. Assuming the path is present!")
            return True

        try:
            # Check if path is prefix, and create StoragePrefix object and get its size
            if path.endswith("*"):
                _size = StoragePrefix(path.rstrip("*")).size

            # Check if it path exists as a file or folder, by creating StorageFile and StorageFolder object
            else:
                _file = StorageFile(path)
                _folder = StorageFolder(path)

                if _file.exists():
                    _size = _file.size
                elif _folder.exists():
                    _size = _folder.size
                else:
                    _size = 0

            # Convert to GB
            return float(_size)/2**30

        except BaseException as e:
            logging.error(f"Unable to get file size: {path}")
            if str(e) != "":
                logging.error(f"Received the following msg:\n{e}")
            raise
コード例 #6
0
ファイル: fastq_file.py プロジェクト: qiuosier/Cancer
    def __init__(self, file_path):
        file_path = str(file_path)
        if not StorageFile(file_path).exists():
            raise FileNotFoundError("File not found at %s." % file_path)

        self.file_path = file_path
        logger.debug("Initialized Illumina FASTQ object.")
コード例 #7
0
    def path_exists(self, path, job_name=None, **kwargs):

        # Ignore local paths
        if self.__get_file_protocol(path) == "Local":
            logging.warning(f"Ignoring path '{path}' as it is local on the disk image. Assuming the path is present!")
            return True

        try:
            logging.debug(f"Checking existence of {path}...")
            # Check if path is prefix, and create StoragePrefix object and check if exists
            if path.endswith("*"):
                return StoragePrefix(path.rstrip("*")).exists()

            # Check if it exists as a file or folder, by creating StorageFile and StorageFolder object
            return StorageFile(path).exists() or StorageFolder(path).exists()

        except RuntimeError as e:
            traceback.print_exc()
            if str(e) != "":
                logging.error(f"StorageHelper error for {job_name}:\n{e}")
            return False
        except:
            traceback.print_exc()
            logging.error(f"Unable to check path existence: {path}")
            raise
コード例 #8
0
ファイル: source.py プロジェクト: qiuosier/Virgo
    def get_intraday_series(self, symbol, date=None):
        """Gets a pandas data frame of intraday series data.

        Args:
            symbol (str): The name of the equity/stock.
            date (str, optional): Date, e.g. 2017-02-12. Defaults to None.

        Returns: A pandas data frame of intraday series data for the specific date.
            If date is None, the data of the last trading day will be returned.
            This function will return None,
            if date is None and there is no data available in the last 100 days.

        """
        series_type = self.intraday_series_type
        # requested_date stores the original requested date
        requested_date = date
        day_delta = 0
        df = None
        # When date is specified, empty data frame will be return if there is no data for the specific day.
        # When date is not specified, try to get data of the previous day if there is no data today
        while df is None or (requested_date is None and df.empty
                             and day_delta < 100):
            if requested_date is None:
                date = (datetime.datetime.now() -
                        datetime.timedelta(days=day_delta)).strftime(
                            self.date_fmt)
            logger.debug("Getting data for %s" % date)
            # Get the next date as string for filtering purpose
            # next_date is a string of date, which will be used to compare with data frame index.
            dt_date = datetime.datetime.strptime(date, self.date_fmt)
            dt_next = dt_date.date() + datetime.timedelta(days=1)
            next_date = dt_next.strftime(self.date_fmt)

            if self.cache:
                # Check if data has been cached.
                file_path = self.__cache_file_path(symbol, series_type, date)
                storage_file = StorageFile(file_path)
                if storage_file.exists():
                    logger.debug("Reading existing data... %s" % file_path)
                    with storage_file('r') as f:
                        df = pd.read_csv(f,
                                         index_col=0,
                                         parse_dates=['timestamp'])
                else:
                    df = self.__intraday_get_full_data(symbol)
                    df = df[(df['timestamp'] >= date)
                            & (df['timestamp'] < next_date)]
            else:
                # Request new data
                df = self.__request_data(symbol, series_type, 'full')
                df = df[(df['timestamp'] >= date)
                        & (df['timestamp'] < next_date)]

            day_delta += 1

        if df is not None:
            df.set_index('timestamp', inplace=True)
        df.symbol = symbol
        return df
コード例 #9
0
ファイル: atest_storage_gcp.py プロジェクト: parkerc71/Aries
    def setUpClass(cls):
        gs.setup_credentials("GOOGLE_CREDENTIALS", os.path.join(os.path.dirname(__file__), "gcp.json"))
        super().setUpClass()
        try:
            # Check if GCP is accessible by listing all the buckets
            storage.Client().list_buckets(max_results=1)
            cls.GCP_ACCESS = True

            # Removes test folder if it is already there
            StorageFolder("gs://aries_test/copy_test/").delete()
            StorageFile("gs://aries_test/copy_test").delete()
            StorageFile("gs://aries_test/abc.txt").delete()
            StorageFile("gs://aries_test/new_file.txt").delete()
            StorageFile("gs://aries_test/moved_file.txt").delete()
            StorageFile("gs://aries_test/local_upload.txt").delete()
        except Exception as ex:
            print("%s: %s" % (type(ex), str(ex)))
            traceback.print_exc()
コード例 #10
0
    def get_file_size(self, path, job_name=None, **kwargs):

        retry_count = kwargs.get("retry_count", 0)

        # Ignore local paths
        if self.__get_file_protocol(path) == "Local":
            logging.warning(f"Ignoring path '{path}' as it is local on the disk image. Assuming the path is present!")
            return True

        if retry_count < 5:
            try:
                # Check if path is prefix, and create StoragePrefix object and get its size
                if path.endswith("*"):
                    _size = StoragePrefix(path.rstrip("*")).size

                # Check if it path exists as a file or folder, by creating StorageFile and StorageFolder object
                else:
                    _file = StorageFile(path)
                    _folder = StorageFolder(path)
                    _size = 0

                    found = False
                    trial_count = 0
                    while not found:

                        if trial_count > 10:
                            logging.error(f"Cannot get size of '{path}' as it doesn't exist after multiple trials!")
                            break

                        time.sleep(trial_count)

                        if _file.exists():
                            _size = _file.size
                            found = True
                        elif _folder.exists():
                            _size = _folder.size
                            found = True
                        else:
                            trial_count += 1
                            logging.warning(f"Cannot get size of '{path}' as it does not exist! Trial {trial_count}/10")

                # Convert to GB
                return float(_size)/2**30

            except BaseException as e:
                logging.error(f"Unable to get file size: {path}")
                if str(e) != "":
                    logging.error(f"Received the following msg:\n{e}")
                if "dictionary changed size" in str(e):
                    kwargs['retry_count'] = retry_count + 1
                    return self.get_file_size(path, job_name, **kwargs)
                raise
        else:
            logging.warning(f"Failed to get size of '{path}'! Attempted to retrieve size {retry_count + 1} times.")
            return 0
コード例 #11
0
ファイル: atest_storage_web.py プロジェクト: parkerc71/Aries
    def test_http(self):
        """
        """
        # URL does not exist
        storage_obj = StorageFile("http://example.com/abc/")
        self.assertFalse(storage_obj.exists())

        # URL exists
        storage_obj = StorageFile("https://www.google.com")
        self.assertTrue(storage_obj.exists())

        # Download. Copy to local file.
        storage_obj = StorageFile("https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf")
        local_file_path = os.path.join(self.test_folder_path, "test.pdf")
        if os.path.exists(local_file_path):
            os.remove(local_file_path)
        storage_obj.copy(local_file_path)
        self.assertTrue(os.path.exists(local_file_path))
        self.assertGreater(StorageFile(local_file_path).size, 0)
        StorageFile(local_file_path).delete()
コード例 #12
0
ファイル: atest_storage_gcp.py プロジェクト: parkerc71/Aries
 def test_upload_from_file(self):
     gs_file = StorageFile("gs://aries_test/local_upload.txt")
     # Try to upload a file that does not exist.
     local_file_non_exist = os.path.join(os.path.dirname(__file__), "abc.txt")
     with self.assertRaises(FileNotFoundError):
         gs_file.upload_from_file(local_file_non_exist)
     # Upload a file and check the content.
     local_file = os.path.join(os.path.dirname(__file__), "fixtures", "test_file.txt")
     gs_file.upload_from_file(local_file)
     self.assertEqual(gs_file.read(), b'This is a local test file.\n')
     gs_file.delete()
コード例 #13
0
ファイル: atest_storage.py プロジェクト: parkerc71/Aries
    def test_create_copy_and_delete_file(self):
        new_folder_uri = os.path.join(self.TEST_ROOT, "new_folder")
        with TempFolder(new_folder_uri) as folder:
            self.assertTrue(folder.is_empty())

            # Create a sub folder inside the new folder
            sub_folder_uri = os.path.join(new_folder_uri, "sub_folder")
            logger.debug(sub_folder_uri)
            sub_folder = StorageFolder(sub_folder_uri).create()
            self.assertTrue(sub_folder.exists())

            # Copy an empty file
            src_file_path = os.path.join(self.TEST_ROOT, "test_folder_0",
                                         "empty_file")
            dst_file_path = os.path.join(new_folder_uri, "copied_file")
            f = StorageFile(src_file_path)
            logger.debug(f.exists())
            time.sleep(2)
            f.copy(dst_file_path)
            self.assertTrue(StorageFile(dst_file_path).exists())

            # Copy a file with content and replace the empty file
            src_file_path = os.path.join(self.TEST_ROOT, "test_folder_0",
                                         "abc.txt")
            dst_file_path = os.path.join(new_folder_uri, "copied_file")
            f = StorageFile(src_file_path)
            f.copy(dst_file_path)
            dst_file = StorageFile(dst_file_path)
            self.assertTrue(dst_file.exists())
            # Use the shortcut to read file, the content will be binary.
            self.assertEqual(dst_file.read(), b"abc\ncba\n")

            # Empty the folder. This should delete file and sub folder only
            folder.empty()
            self.assertTrue(folder.exists())
            self.assertTrue(folder.is_empty())
            self.assertFalse(sub_folder.exists())
            self.assertFalse(dst_file.exists())
コード例 #14
0
ファイル: source.py プロジェクト: qiuosier/Virgo
    def __get_valid_daily_cache(self, symbol):
        """Gets the latest un-expired cache file for daily data.

        Args:
            symbol (str): The symbol of the equity/stock.

        Returns:
            str: File path if an un-expired cache file exists. Otherwise None.
        """
        for i in range(self.daily_cache_expiration):
            d = datetime.datetime.now() - datetime.timedelta(days=i)
            file_path = self.__cache_file_path(symbol, self.daily_series_type,
                                               d.strftime(self.date_fmt))
            storage_file = StorageFile(file_path)
            if storage_file.exists():
                return storage_file
        return None
コード例 #15
0
ファイル: atest_storage.py プロジェクト: parkerc71/Aries
 def test_binary_read_write(self):
     # File does not exist, a new one will be created
     file_uri = os.path.join(self.TEST_ROOT, "test.txt")
     storage_file = StorageFile(file_uri).open("wb")
     self.assertEqual(storage_file.scheme, self.SCHEME)
     self.assertTrue(storage_file.seekable())
     self.assertFalse(storage_file.readable())
     self.assertEqual(storage_file.write(b"abc"), 3)
     self.assertEqual(storage_file.tell(), 3)
     self.assertEqual(storage_file.write(b"def"), 3)
     self.assertEqual(storage_file.tell(), 6)
     storage_file.close()
     self.assertTrue(storage_file.exists())
     storage_file.open('rb')
     self.assertEqual(storage_file.read(), b"abcdef")
     storage_file.close()
     storage_file.delete()
     self.assertFalse(storage_file.exists())
コード例 #16
0
    def test_parse_uri(self):
        """Tests parsing GCS URI
        """
        # File
        file_obj = StorageFile("s3://%s/test_file.txt" % self.TEST_BUCKET_NAME)
        self.assertEqual(file_obj.scheme, "s3")
        self.assertEqual(file_obj.path, "/test_file.txt")

        # Folder
        folder_obj = StorageFolder("s3://%s/test_folder" %
                                   self.TEST_BUCKET_NAME)
        self.assertEqual(folder_obj.uri,
                         "s3://%s/test_folder/" % self.TEST_BUCKET_NAME)
        self.assertEqual(folder_obj.scheme, "s3")
        self.assertEqual(folder_obj.path, "/test_folder/")

        # Bucket root
        folder_obj = StorageFolder("s3://%s" % self.TEST_BUCKET_NAME)
        self.assertEqual(folder_obj.uri, "s3://%s/" % self.TEST_BUCKET_NAME)
        self.assertEqual(folder_obj.scheme, "s3")
        self.assertEqual(folder_obj.path, "/")
コード例 #17
0
ファイル: atest_storage_gcp.py プロジェクト: parkerc71/Aries
 def copy_from_http(self):
     storage_obj = StorageFile("https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf")
     gs_path = "gs://davelab_temp/qq6/test.pdf"
     storage_obj.copy("gs://davelab_temp/qq6/test.pdf")
     self.assertTrue(StorageFile(gs_path).exists())
     StorageFile(gs_path).delete()