def cloud_download(self, local_dir='/tmp/'): """Retrieve file stored on cloud to local file. This specifically download files stored in google drive (looks for 'gdrive' in `self.path`); other cloud storage types still WIP. Parameters ---------- local_dir : str path where file will be stored locally (temporarily, maybe?) """ if self.path is None: # Nothing to see here (possibly raise exception?) return if not os.path.exists(self.path): import cottoncandy as cc if 'gdrive/' in self.path: gcci = cc.get_interface(backend='gdrive') cloud_dir = self.path.split('gdrive/')[-1] elif 's3/' in self.path: cloud_dir = self.path.split('s3/')[-1] bucket_name = cloud_dir.split('/')[0] cci = cc.get_interface(bucket_name=bucket_name) tmp_dir = os.path.join(local_dir, cloud_dir) if not os.path.exists(tmp_dir): os.makedirs(tmp_dir) cci.download_to_file(os.path.join(cloud_dir, self.fname), os.path.join(tmp_dir, self.fname)) # Shouldn't overwrite permanent property permanently... #self.path = tmp_dir self._tmppath = tmp_dir else: print("Doing nothing - file exists locally.")
def _get_cc_interface(self, bucket): """Get a cottoncandy interface for the given bucket. Will first check the local cache. """ if bucket not in self.ccinterfaces: self.ccinterfaces[bucket] = cc.get_interface(bucket) return self.ccinterfaces[bucket]
def get_transcript_uris(bucket='stimulidb', extension='txt'): cci = cc.get_interface(bucket) # get all the objects in `bucket` that end with `extension` all_names = cci.glob('*.%s' % extension) # fix each name into a URI and return the list fixed_names = [cc.utils.pathjoin('s3://', bucket, n) for n in all_names] return fixed_names
def GetVideoInfo(self): """ Gets video info """ vc = cv2.VideoCapture(self.fileName) if (vc.isOpened()): self.video = vc self.width = int(self.video.get(cv2.CAP_PROP_FRAME_WIDTH)) self.height = int(self.video.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.fps = self.video.get(cv2.CAP_PROP_FPS) self.nFrames = int(self.video.get(cv2.CAP_PROP_FRAME_COUNT)) self.duration = self.nFrames / self.fps # duration in seconds self.isVidCap = True else: # not a video file on disk import cottoncandy self.isVidCap = False if self.fileName[:3] == 's3:': # file is on s3; assumed to the gzipped fileName = self.fileName[5:] # since s3 files being with 's3://' bucket = fileName.split('/')[0] cloud = cottoncandy.get_interface(bucket) zippedData = cloud.download_stream(fileName[(len(bucket) + 1):]).content else: # zipped file on disk file = open(self.fileName) zippedData = file.read() file.close() self.video = '' zipFile = cottoncandy.utils.GzipInputStream(zippedData, 20 * (2 ** 20)) while True: chunk = zipFile.read(10 * (2 ** 20)) if not chunk: break self.video += chunk del zippedData, zipFile import struct metadata = struct.unpack('i' * 14, self.video[32:(32 + 56)]) self.width = metadata[8] self.height = metadata[9] self.nFrames = metadata[4] self.fps = int(1 / (metadata[0] * 1000000)) self.duration = metadata[0] * metadata[4] / 1000000.0
import cottoncandy as cc import numpy as np access_key = 'SSE14CR7P0AEZLPC7X0R' secret_key = 'K0MmeXiXotrGIiTeRwEKizkkhR4qFV8tr8cIXprI' endpoint_url = 'http://c3-dtn02.corral.tacc.utexas.edu:9002/' cci = cc.get_interface('story-mri-data', ACCESS_KEY=access_key, SECRET_KEY=secret_key, endpoint_url=endpoint_url) a = False b = False c = True sub = 'AHfs' #1. For downloading fmri data for a subject if (a): R_obs = cci.download_raw_array(sub + '/wheretheressmoke-10sessions') np.save('fmri_' + sub + '.dat', R_obs) #2. For fetching JSON file of subject xfms if (b): xfm_file = cci.download_json('subject_xfms') print(xfm_file[sub]) #3. For finding voxels with highest correlation over time. if (c): for sub in ['SJ']: #,'SS','EB03','EB05','S03']: R_obs = cci.download_raw_array(sub + '/wheretheressmoke-10sessions') #R_obs = np.load('fullmatrix_R_'+ sub +'.dat.npy') Voxels = R_obs.shape[2]
prefix = 'testcc/%s/py%s' % (DATE, sys.version[:6]) object_name = os.path.join(prefix, 'test') # login ############################## if True: # for travis testing on AWS. bucket_name = os.environ['DL_BUCKET_NAME'] AK = os.environ['DL_ACCESS_KEY'] SK = os.environ['DL_SECRET_KEY'] URL = os.environ['DL_URL'] cci = cc.get_interface(bucket_name, ACCESS_KEY=AK, SECRET_KEY=SK, endpoint_url=URL, verbose=False) else: ############################## # Warning ############################## # This will use your defaults to run the tests on. # If you use AWS, you might incur costs. cci = cc.get_interface() ############################## # tests ##############################
object_name = os.path.join(prefix, 'test') # login ############################## if True: # for travis testing on AWS. bucket_name = os.environ['DL_BUCKET_NAME'] AK = os.environ['DL_ACCESS_KEY'] SK = os.environ['DL_SECRET_KEY'] URL = os.environ['DL_URL'] cci = cc.get_interface(bucket_name, ACCESS_KEY=AK, SECRET_KEY=SK, endpoint_url=URL, verbose=False) else: ############################## # Warning ############################## # This will use your defaults to run the tests on. # If you use AWS, you might incur costs. cci = cc.get_interface() ############################## # tests
import cottoncandy as cc import os import numpy as np # Data paths bucket_name = "carson_public_datasets" bucket_path = "/human_connectome_project/3T/{subject}/mrf_training/{slice}" # Subject/slices subslices = np.load("/auto/k1/carson/glab/mrf/train_data_list.npy") # Output paths outpath = "/datasets/christine/mrf/{subject}/{slice}" cci = cc.get_interface(bucket_name) icount = 0 print("Total # of slices: %d" % len(subslices)) for isubject, islice in subslices: fpath = bucket_path.format(subject=isubject, slice=islice) opath = outpath.format(subject=isubject, slice=islice) if not os.path.exists(opath): os.makedirs(opath) mrf = cci.download_npy_array(os.path.join(fpath, "mrf.npy")) t1_t2_pd = cci.download_npy_array(os.path.join(fpath, "t1_t2_pd.npy")) np.save(os.path.join(opath, "mrf.npy"), mrf) np.save(os.path.join(opath, "t1_t2_pd.npy"), t1_t2_pd) icount += 1 if icount % 100 == 0: print("Finished %d slices" % icount)