Ejemplo n.º 1
0
    def cloud_download(self, local_dir='/tmp/'):
        """Retrieve file stored on cloud to local file.

        This specifically download files stored in google drive (looks for 'gdrive' in 
        `self.path`); other cloud storage types still WIP.

        Parameters
        ----------
        local_dir : str
            path where file will be stored locally (temporarily, maybe?)
        """
        if self.path is None:
            # Nothing to see here (possibly raise exception?)
            return
        if not os.path.exists(self.path):
            import cottoncandy as cc
            if 'gdrive/' in self.path:
                gcci = cc.get_interface(backend='gdrive')
                cloud_dir = self.path.split('gdrive/')[-1]
            elif 's3/' in self.path:
                cloud_dir = self.path.split('s3/')[-1]
                bucket_name = cloud_dir.split('/')[0]
                cci = cc.get_interface(bucket_name=bucket_name)
            tmp_dir = os.path.join(local_dir, cloud_dir)
            if not os.path.exists(tmp_dir):
                os.makedirs(tmp_dir)
            cci.download_to_file(os.path.join(cloud_dir, self.fname), os.path.join(tmp_dir, self.fname))
            # Shouldn't overwrite permanent property permanently...
            #self.path = tmp_dir
            self._tmppath = tmp_dir
        else:
            print("Doing nothing - file exists locally.")
Ejemplo n.º 2
0
 def _get_cc_interface(self, bucket):
     """Get a cottoncandy interface for the given bucket. Will first check the
     local cache.
     """
     if bucket not in self.ccinterfaces:
         self.ccinterfaces[bucket] = cc.get_interface(bucket)
     return self.ccinterfaces[bucket]
Ejemplo n.º 3
0
def get_transcript_uris(bucket='stimulidb', extension='txt'):
    cci = cc.get_interface(bucket)

    # get all the objects in `bucket` that end with `extension`
    all_names = cci.glob('*.%s' % extension)

    # fix each name into a URI and return the list
    fixed_names = [cc.utils.pathjoin('s3://', bucket, n) for n in all_names]

    return fixed_names
Ejemplo n.º 4
0
	def GetVideoInfo(self):
		"""
		Gets video info
		"""
		vc = cv2.VideoCapture(self.fileName)
		if (vc.isOpened()):
			self.video = vc
			self.width = int(self.video.get(cv2.CAP_PROP_FRAME_WIDTH))
			self.height = int(self.video.get(cv2.CAP_PROP_FRAME_HEIGHT))
			self.fps = self.video.get(cv2.CAP_PROP_FPS)
			self.nFrames = int(self.video.get(cv2.CAP_PROP_FRAME_COUNT))
			self.duration = self.nFrames / self.fps  # duration in seconds
			self.isVidCap = True
		else:	# not a video file on disk
			import cottoncandy
			self.isVidCap = False
			if self.fileName[:3] == 's3:':		# file is on s3; assumed to the gzipped

				fileName = self.fileName[5:]	# since s3 files being with 's3://'
				bucket = fileName.split('/')[0]
				cloud = cottoncandy.get_interface(bucket)
				zippedData = cloud.download_stream(fileName[(len(bucket) + 1):]).content
			else:								# zipped file on disk
				file = open(self.fileName)
				zippedData = file.read()
				file.close()
			self.video = ''
			zipFile = cottoncandy.utils.GzipInputStream(zippedData, 20 * (2 ** 20))
			while True:
				chunk = zipFile.read(10 * (2 ** 20))
				if not chunk:
					break
				self.video += chunk
			del zippedData, zipFile

			import struct
			metadata = struct.unpack('i' * 14, self.video[32:(32 + 56)])
			self.width = metadata[8]
			self.height = metadata[9]
			self.nFrames = metadata[4]
			self.fps = int(1 / (metadata[0] * 1000000))
			self.duration = metadata[0] * metadata[4] / 1000000.0
Ejemplo n.º 5
0
import cottoncandy as cc
import numpy as np

access_key = 'SSE14CR7P0AEZLPC7X0R'
secret_key = 'K0MmeXiXotrGIiTeRwEKizkkhR4qFV8tr8cIXprI'
endpoint_url = 'http://c3-dtn02.corral.tacc.utexas.edu:9002/'
cci = cc.get_interface('story-mri-data',
                       ACCESS_KEY=access_key,
                       SECRET_KEY=secret_key,
                       endpoint_url=endpoint_url)
a = False
b = False
c = True

sub = 'AHfs'
#1. For downloading fmri data for a subject
if (a):
    R_obs = cci.download_raw_array(sub + '/wheretheressmoke-10sessions')
    np.save('fmri_' + sub + '.dat', R_obs)

#2. For fetching JSON file of subject xfms
if (b):
    xfm_file = cci.download_json('subject_xfms')
    print(xfm_file[sub])

#3. For finding voxels with highest correlation over time.
if (c):
    for sub in ['SJ']:  #,'SS','EB03','EB05','S03']:
        R_obs = cci.download_raw_array(sub + '/wheretheressmoke-10sessions')
        #R_obs = np.load('fullmatrix_R_'+ sub +'.dat.npy')
        Voxels = R_obs.shape[2]
Ejemplo n.º 6
0
prefix = 'testcc/%s/py%s' % (DATE, sys.version[:6])
object_name = os.path.join(prefix, 'test')

# login
##############################

if True:
    # for travis testing on AWS.
    bucket_name = os.environ['DL_BUCKET_NAME']
    AK = os.environ['DL_ACCESS_KEY']
    SK = os.environ['DL_SECRET_KEY']
    URL = os.environ['DL_URL']

    cci = cc.get_interface(bucket_name,
                           ACCESS_KEY=AK,
                           SECRET_KEY=SK,
                           endpoint_url=URL,
                           verbose=False)
else:
    ##############################
    # Warning
    ##############################
    # This will use your defaults to run the tests on.
    # If you use AWS, you might incur costs.
    cci = cc.get_interface()

##############################
# tests
##############################

Ejemplo n.º 7
0
object_name = os.path.join(prefix, 'test')


# login
##############################

if True:
    # for travis testing on AWS.
    bucket_name = os.environ['DL_BUCKET_NAME']
    AK = os.environ['DL_ACCESS_KEY']
    SK = os.environ['DL_SECRET_KEY']
    URL = os.environ['DL_URL']

    cci = cc.get_interface(bucket_name,
                           ACCESS_KEY=AK,
                           SECRET_KEY=SK,
                           endpoint_url=URL,
                           verbose=False)
else:
    ##############################
    # Warning
    ##############################
    # This will use your defaults to run the tests on.
    # If you use AWS, you might incur costs.
    cci = cc.get_interface()




##############################
# tests
import cottoncandy as cc
import os
import numpy as np

# Data paths
bucket_name = "carson_public_datasets"
bucket_path = "/human_connectome_project/3T/{subject}/mrf_training/{slice}"
# Subject/slices
subslices = np.load("/auto/k1/carson/glab/mrf/train_data_list.npy")
# Output paths
outpath = "/datasets/christine/mrf/{subject}/{slice}"

cci = cc.get_interface(bucket_name)
icount = 0

print("Total # of slices: %d" % len(subslices))

for isubject, islice in subslices:
    fpath = bucket_path.format(subject=isubject, slice=islice)
    opath = outpath.format(subject=isubject, slice=islice)
    if not os.path.exists(opath):
        os.makedirs(opath)

    mrf = cci.download_npy_array(os.path.join(fpath, "mrf.npy"))
    t1_t2_pd = cci.download_npy_array(os.path.join(fpath, "t1_t2_pd.npy"))
    np.save(os.path.join(opath, "mrf.npy"), mrf)
    np.save(os.path.join(opath, "t1_t2_pd.npy"), t1_t2_pd)

    icount += 1
    if icount % 100 == 0:
        print("Finished %d slices" % icount)