Beispiel #1
0
def extract_netcdf_variable_recursive(output,data,level_desc,tree,options,check_empty=False,hdf5=None):
    level_name=level_desc[0]
    group_name=level_desc[1]
    if group_name==None or isinstance(group_name,list):
        for group in data.groups.keys():
            if ( nc_Database.is_level_name_included_and_not_excluded(level_name,options,group) and
                 nc_Database.retrieve_tree_recursive_check_not_empty(options,data.groups[group])):
                output_grp=replicate_group(output,data,group)
                if len(tree)>0:
                    extract_netcdf_variable_recursive(output_grp,data.groups[group],tree[0],tree[1:],options,check_empty=check_empty,hdf5=hdf5[group])
                else:
                    netcdf_pointers=read_soft_links.read_netCDF_pointers(data.groups[group])
                    if hdf5!=None:
                        netcdf_pointers.replicate(output_grp,check_empty=check_empty,hdf5=hdf5[group])
                    else:
                        netcdf_pointers.replicate(output_grp,check_empty=check_empty)
    else:
        if len(tree)>0:
            if group_name=='':
                extract_netcdf_variable_recursive(output,data,tree[0],tree[1:],options,check_empty=check_empty,hdf5=hdf5)
            elif group_name in data.groups.keys():
                if hdf5!=None:
                    extract_netcdf_variable_recursive(output,data.groups[group_name],tree[0],tree[1:],options,check_empty=check_empty,hdf5=hdf5[group_name])
                else:
                    extract_netcdf_variable_recursive(output,data.groups[group_name],tree[0],tree[1:],options,check_empty=check_empty)
        else:
            netcdf_pointers=read_soft_links.read_netCDF_pointers(data.groups[group_name])
            if hdf5!=None:
                netcdf_pointers.replicate(output,check_empty=check_empty,hdf5=hdf5[group_name])
            else:
                netcdf_pointers.replicate(output,check_empty=check_empty)
    return
Beispiel #2
0
def descend_tree_recursive(database,file_expt,tree_desc,top_path,options,list_level=None,alt=False):
    if not isinstance(tree_desc,list):
        return

    if len(tree_desc)==1:
        file_list=glob.glob(top_path+'/*.nc') 
        if len(file_list)>0:
            for file in file_list:
                file_expt_copy=copy.deepcopy(file_expt)
                #file_expt_copy.path='|'.join([file,retrieval_utils.md5_for_file(open(file,'r'))])
                file_expt_copy.path=file+'|'
                if alt: 
                    file_expt_copy.model_version=file_expt_copy.model.split('-')[1]
                    file_expt_copy.model='-'.join([file_expt_copy.institute,file_expt_copy.model.split('-')[0]])
                database.nc_Database.session.add(file_expt_copy)
                database.nc_Database.session.commit()
        return file_list

    local_tree_desc=tree_desc[0]
    next_tree_desc=tree_desc[1:]


    subdir_list=[]
    #Loop through subdirectories:
    for subdir in get_immediate_subdirectories(top_path):
        if local_tree_desc+'_list' in database.header_simple.keys():
            #We keep only the subdirectories that were requested
            if subdir in database.header_simple[local_tree_desc+'_list']:
                subdir_list.append(subdir)
        else:
            #Keep all other subdirs as long as they are 
            #1) not latest version
            #2) of the form v{int}
            if not (local_tree_desc=='version' and 
                     (subdir=='latest' or (not RepresentsInt(subdir[1:])))):
                subdir_list.append(subdir)

    if list_level!=None and local_tree_desc==list_level:
        return subdir_list
    else:
        only_list=[]
        for subdir in subdir_list:
            file_expt_copy=copy.deepcopy(file_expt)
            setattr(file_expt_copy,local_tree_desc,subdir)
            #Include only subdirectories that were specified if this level was specified:
            if nc_Database.is_level_name_included_and_not_excluded(local_tree_desc,options,subdir):
                only_list.append(descend_tree_recursive(database,file_expt_copy,
                                            next_tree_desc,top_path+'/'+subdir,
                                            options,list_level=list_level,alt=alt))
        return [item for sublist in only_list for item in sublist]
Beispiel #3
0
    def retrieve_without_time(self,
                              retrieval_function,
                              output,
                              semaphores=None,
                              username=None,
                              user_pass=None):
        #This function simply retrieves all the files:
        file_path = output
        for path_to_retrieve in self.paths_list:
            file_type = self.file_type_list[list(
                self.paths_list).index(path_to_retrieve)]
            version = 'v' + str(self.version_list[list(
                self.paths_list).index(path_to_retrieve)])
            checksum = self.checksums_list[list(
                self.paths_list).index(path_to_retrieve)]
            #Get the file tree:
            args = ({
                'path': path_to_retrieve + '|' + checksum,
                'var': self.tree[-1],
                'file_path': file_path,
                'version': version,
                'file_type': file_type,
                'username': username,
                'user_pass': user_pass
            }, copy.deepcopy(self.tree))
            #'sort_table':np.argsort(sorting_paths)[sorted_paths_link==path_id][time_slice],

            #Retrieve only if it is from the requested data node:
            data_node = retrieval_utils.get_data_node(path_to_retrieve,
                                                      file_type)
            if nc_Database.is_level_name_included_and_not_excluded(
                    'data_node', self, data_node):
                if data_node in self.queues.keys():
                    #print 'Recovering '+var_to_retrieve+' in '+path_to_retrieve
                    print 'Recovering ' + '/'.join(self.tree)
                    self.queues[data_node].put((retrieval_function, ) +
                                               copy.deepcopy(args))
        return
Beispiel #4
0
    def retrieve_variables(self,
                           retrieval_function,
                           var_to_retrieve,
                           time_restriction,
                           output,
                           semaphores=None,
                           username=None,
                           user_pass=None):
        #Replicate variable to output:
        if (isinstance(output, netCDF4.Dataset)
                or isinstance(output, netCDF4.Group)):
            output = netcdf_utils.replicate_netcdf_var(output,
                                                       self.data_root,
                                                       var_to_retrieve,
                                                       chunksize=-1,
                                                       zlib=True)
            #file_path=output.filepath()
            file_path = None
            if not 'soft_links' in self.data_root.groups.keys():
                #Variable is stored here and simply retrieve it:
                output.variables[
                    var_to_retrieve][:] = self.data_root.variables[
                        var_to_retrieve][time_restriction]
                return
        else:
            file_path = output

        dimensions = dict()
        unsort_dimensions = dict()
        dims_length = []
        for dim in self.data_root.variables[var_to_retrieve].dimensions:
            if dim != 'time':
                if dim in self.data_root.variables.keys():
                    dimensions[dim] = self.data_root.variables[dim][:]
                else:
                    dimensions[dim] = np.arange(
                        len(self.data_root.dimensions[dim]))
                unsort_dimensions[dim] = None
                dims_length.append(len(dimensions[dim]))

        # Determine the paths_ids for soft links:
        paths_link = self.data_root.groups['soft_links'].variables[
            var_to_retrieve][time_restriction, 0]
        indices_link = self.data_root.groups['soft_links'].variables[
            var_to_retrieve][time_restriction, 1]

        #Convert paths_link to id in path dimension:
        paths_link = np.array([
            list(self.paths_id_list).index(path_id) for path_id in paths_link
        ])

        #Sort the paths so that we query each only once:
        unique_paths_list_id, sorting_paths = np.unique(paths_link,
                                                        return_inverse=True)

        #Maximum number of time step per request:
        max_request = 450  #maximum request in Mb
        max_time_steps = max(
            int(
                np.floor(max_request * 1024 * 1024 /
                         (32 * np.prod(dims_length)))), 1)
        for unique_path_id, path_id in enumerate(unique_paths_list_id):
            path_to_retrieve = self.paths_list[path_id]

            #Next, we check if the file is available. If it is not we replace it
            #with another file with the same checksum, if there is one!
            file_type = self.file_type_list[list(
                self.paths_list).index(path_to_retrieve)]
            remote_data = remote_netcdf.remote_netCDF(
                path_to_retrieve.replace('fileServer', 'dodsC'), file_type,
                semaphores)
            if not file_type in ['FTPServer']:
                path_to_retrieve = remote_data.check_if_available_and_find_alternative(
                    [
                        path.replace('fileServer', 'dodsC')
                        for path in self.paths_list
                    ], self.checksums_list).replace('dodsC', 'fileServer')
            #Get the file_type, checksum and version of the file to retrieve:
            file_type = self.file_type_list[list(
                self.paths_list).index(path_to_retrieve)]
            version = 'v' + str(self.version_list[list(
                self.paths_list).index(path_to_retrieve)])
            checksum = self.checksums_list[list(
                self.paths_list).index(path_to_retrieve)]

            #Append the checksum:
            path_to_retrieve += '|' + checksum

            #time_indices=sorted_indices_link[sorted_paths_link==path_id]
            time_indices = indices_link[sorting_paths == unique_path_id]
            num_time_chunk = int(
                np.ceil(len(time_indices) / float(max_time_steps)))
            for time_chunk in range(num_time_chunk):
                time_slice = slice(time_chunk * max_time_steps,
                                   (time_chunk + 1) * max_time_steps, 1)
                dimensions['time'], unsort_dimensions[
                    'time'] = indices_utils.prepare_indices(
                        time_indices[time_slice])

                #Get the file tree:
                args = ({
                    'path':
                    path_to_retrieve,
                    'var':
                    var_to_retrieve,
                    'indices':
                    dimensions,
                    'unsort_indices':
                    unsort_dimensions,
                    'sort_table':
                    np.arange(len(sorting_paths))[sorting_paths ==
                                                  unique_path_id][time_slice],
                    'file_path':
                    file_path,
                    'version':
                    version,
                    'file_type':
                    file_type,
                    'username':
                    username,
                    'user_pass':
                    user_pass
                }, copy.deepcopy(self.tree))
                #'sort_table':np.argsort(sorting_paths)[sorted_paths_link==path_id][time_slice],

                #Retrieve only if it is from the requested data node:
                data_node = retrieval_utils.get_data_node(
                    path_to_retrieve, file_type)
                if nc_Database.is_level_name_included_and_not_excluded(
                        'data_node', self, data_node):
                    if data_node in self.queues.keys():
                        if ((isinstance(output, netCDF4.Dataset)
                             or isinstance(output, netCDF4.Group))
                                or time_chunk == 0):
                            #If it is download: retrieve
                            #If it is download_raw: retrieve only first time_chunk
                            if var_to_retrieve == self.tree[-1]:
                                #print 'Recovering '+var_to_retrieve+' in '+path_to_retrieve
                                print 'Recovering ' + '/'.join(self.tree)
                            self.queues[data_node].put((retrieval_function, ) +
                                                       copy.deepcopy(args))
                    else:
                        if (isinstance(output, netCDF4.Dataset)
                                or isinstance(output, netCDF4.Group)):
                            #netcdf_utils.assign_tree(output,*getattr(netcdf_utils,retrieval_function)(args[0],args[1]))
                            netcdf_utils.assign_tree(
                                output,
                                *getattr(retrieval_utils,
                                         retrieval_function)(args[0], args[1]))
        return
Beispiel #5
0
def descend_tree_recursive(database, file_expt, tree_desc, top_path, options, ftp, list_level=None, alt=False):
    if not isinstance(tree_desc, list):
        return

    # Make sure we're at the top_path:
    try:
        ftp.cwd("/" + "/".join(top_path.split("/")[3:]))
    except ftplib.error_perm:
        return []

    if len(tree_desc) == 1:
        # If we're at the end of the tree, we should expect files:
        file_list_raw = ftp.nlst()
        # ftp.voidcmd("TYPE I")
        file_list = [file_name for file_name in file_list_raw if (len(file_name) > 3 and file_name[-3:] == ".nc")]
        # if (len(file_name)>3 and file_name[-3:]=='.nc' and ftp.size(file_name)>0)]
        # ftp.voidcmd("TYPE A")

        if len(file_list) > 0:
            for file in file_list:
                file_expt_copy = copy.deepcopy(file_expt)
                # file_expt_copy.path='|'.join([file,retrieval_utils.md5_for_file(open(file,'r'))])
                file_expt_copy.path = top_path + "/" + file + "|"
                if alt:
                    file_expt_copy.model_version = file_expt_copy.model.split("-")[1]
                    file_expt_copy.model = "-".join([file_expt_copy.institute, file_expt_copy.model.split("-")[0]])
                database.nc_Database.session.add(file_expt_copy)
                database.nc_Database.session.commit()
        return file_list

    # We're not at the end of the tree, we should expect directories:
    local_tree_desc = tree_desc[0]
    next_tree_desc = tree_desc[1:]

    subdir_list = []
    # Loop through subdirectories:
    for subdir in ftp.nlst():
        if local_tree_desc + "_list" in database.header_simple.keys():
            # We keep only the subdirectories that were requested
            if subdir in database.header_simple[local_tree_desc + "_list"]:
                subdir_list.append(subdir)
        else:
            # Keep all other subdirs as long as they are
            # 1) not latest version
            # 2) of the form v{int}
            if not (local_tree_desc == "version" and (subdir == "latest" or (not RepresentsInt(subdir[1:])))):
                subdir_list.append(subdir)

    if list_level != None and local_tree_desc == list_level:
        return subdir_list
    else:
        only_list = []
        for subdir in subdir_list:
            file_expt_copy = copy.deepcopy(file_expt)
            setattr(file_expt_copy, local_tree_desc, subdir)
            # Include only subdirectories that were specified if this level was specified:
            if nc_Database.is_level_name_included_and_not_excluded(local_tree_desc, options, subdir):
                only_list.append(
                    descend_tree_recursive(
                        database,
                        file_expt_copy,
                        next_tree_desc,
                        top_path + "/" + subdir,
                        options,
                        ftp,
                        list_level=list_level,
                        alt=alt,
                    )
                )
        return [item for sublist in only_list for item in sublist]
Beispiel #6
0
def descend_tree_recursive(database,file_expt,tree_desc,top_path,options,ftp,list_level=None,alt=False):
    if not isinstance(tree_desc,list):
        return

    #Make sure we're at the top_path:
    try:
        ftp.cwd('/'+'/'.join(top_path.split('/')[3:]))
    except ftplib.error_perm:
        return []

    if len(tree_desc)==1:
        #If we're at the end of the tree, we should expect files:
        file_list_raw=ftp.nlst()
        #ftp.voidcmd("TYPE I")
        file_list=[file_name for file_name in file_list_raw
                        if (len(file_name)>3 and file_name[-3:]=='.nc' )]
                        #if (len(file_name)>3 and file_name[-3:]=='.nc' and ftp.size(file_name)>0)]
        #ftp.voidcmd("TYPE A")

        if len(file_list)>0:
            for file in file_list:
                file_expt_copy=copy.deepcopy(file_expt)
                #file_expt_copy.path='|'.join([file,retrieval_utils.md5_for_file(open(file,'r'))])
                file_expt_copy.path=top_path+'/'+file+'|'
                if alt: 
                    file_expt_copy.model_version=file_expt_copy.model.split('-')[1]
                    file_expt_copy.model='-'.join([file_expt_copy.institute,file_expt_copy.model.split('-')[0]])
                database.nc_Database.session.add(file_expt_copy)
                database.nc_Database.session.commit()
        return file_list

    #We're not at the end of the tree, we should expect directories:
    local_tree_desc=tree_desc[0]
    next_tree_desc=tree_desc[1:]

    subdir_list=[]
    #Loop through subdirectories:
    for subdir in ftp.nlst():
        if local_tree_desc+'_list' in database.header_simple.keys():
            #We keep only the subdirectories that were requested
            if subdir in database.header_simple[local_tree_desc+'_list']:
                subdir_list.append(subdir)
        else:
            #Keep all other subdirs as long as they are 
            #1) not latest version
            #2) of the form v{int}
            if not (local_tree_desc=='version' and 
                     (subdir=='latest' or (not RepresentsInt(subdir[1:])))):
                subdir_list.append(subdir)

    if list_level!=None and local_tree_desc==list_level:
        return subdir_list
    else:
        only_list=[]
        for subdir in subdir_list:
            file_expt_copy=copy.deepcopy(file_expt)
            setattr(file_expt_copy,local_tree_desc,subdir)
            #Include only subdirectories that were specified if this level was specified:
            if nc_Database.is_level_name_included_and_not_excluded(local_tree_desc,options,subdir):
                only_list.append(descend_tree_recursive(database,file_expt_copy,
                                            next_tree_desc,top_path+'/'+subdir,
                                            options,ftp,list_level=list_level,alt=alt))
        return [item for sublist in only_list for item in sublist]