def dir_to_h5(directory, h5_obj, h5_path="/"): # Use the openDIR object to deal with the ugliness dir_listing = openDIR( directory ) # You know what they say about assertions assert (len(dir_listing.exp_paths) == 1), "This function only does 1 run at a time:" + str(dir_listing.exp_paths) print dir_listing.total_trial_count, "blockfiles detected, compressing . . ." # Create a leaf in the h5 for HTB and LOG file related data htb_leaf = h5_obj.createGroup(h5_path, "htb") log_leaf = h5_obj.createGroup(h5_path, "log") # Create filenodes htb_fn = filenode.newNode(h5_obj, where=htb_leaf, name='htbfile') log_fn = filenode.newNode(h5_obj, where=log_leaf, name='logfile') # Get file paths log_file = dir_listing.get_LOG(dir_listing.exp_paths[0]) htb_file = dir_listing.get_HTB(dir_listing.exp_paths[0]) # Store the raw files (they are pretty small, so why not) htb_fn.write(open(htb_file).read()) log_fn.write(open(log_file).read()) # Close to force flush htb_fn.close() log_fn.close() # Open the HTB file, and break out and write each array htb_file = openHTB(htb_file) for a_name, htb_arr in zip( htb_file.db_names, htb_file.db_array ): # Figure out the type in terms of tables symantics atm = tables.Atom.from_dtype(htb_arr.dtype) # Takes spaces out of the name, so we can use natural naming nat_name = a_name.replace(" ","") # Create the array and throw the data in it leaf_arr = h5_obj.createCArray(htb_leaf, nat_name, atm, htb_arr.shape, a_name) leaf_arr[:] = htb_arr # Block file bits # Conv. lists condition = [] trial_num = [] # Pack in the block files array_created = False for n, (cn, bn, tn, f) in enumerate(dir_listing.BLK_iter(dir_listing.exp_paths[0])): print "Starting", n, "of", dir_listing.total_trial_count print "\tLoading .BLK (%s) cn:%i tn:%i" % (f, cn, tn) # Open the block file bf = openBLK(f) bf.load_data() # Create a c-array if not array_created: new_shape = bf.data_array.shape + tuple([0]) ear = h5_obj.createEArray("/", "block", tables.Int32Atom(), new_shape, "Data from '%s'" % directory) array_created = True print "\tWriting to h5." # Make the stings for the name and description of each data bloc ear.append( bf.data_array[:,:,:,newaxis] ) # Mem clear necessary? TODO ear.flush() h5_obj.flush() del bf # For later conv. condition.append(cn) trial_num.append(tn) print "\tDone." # Create the condition and trial number arrays. # (i.e avoid redoing that ugly file name munging each time) h5_obj.createArray(h5_path, "condition", array(condition)) h5_obj.createArray(h5_path, "trial_num", array(trial_num)) # Yay done! print "Done!"
from DIR_reader import openDIR from DataBlocker import dir_to_h5 import os, tables, shutil dr = openDIR("/media/jupiter/data/archive/working/frodo/") temp_file = "/tmp/tmp-data2.h5" b0rked = [] for path in dr.exp_paths: if "run" not in path: print "skipping", path continue print "Doing:", path # Include the folder name it came from and the run number in the filename output_stuff = path.split("/")[-2:] out_fn = "-".join(output_stuff) + ".h5" # Assemble the full path to the aws virtual directory out_full = os.path.join("/home/mrg/aws-esdata/frodo", out_fn) # if it is already present remotley, skip it. if os.path.isfile(out_full): try: print "Testing: (%s)" % out_full test = tables.openFile(out_full, "r+") test.close() print "Looks good, so skipping", out_fn continue