예제 #1
0
파일: lsdb.py 프로젝트: donbro/lsdb
def files_generator(basepath, options):
    """a generator which yields all files (as file_dicts) including volume, superfolder(s), 
            basepath, and then all subfiles (subject to depth_limit and enumerator options). """

    GPR.print_it25("files_generator", basepath, 2)

    superfolders_list = []
    
    basepath_url =  NSURL.fileURLWithPath_(basepath)
    
    # begin loop going upwards
    url =  NSURL.fileURLWithPath_(basepath)
    while True:
        d1 = GetURLValues(url, enumeratorURLKeys)
        superfolders_list.insert(0,d1)
        if d1[NSURLIsVolumeKey]: 
            break
        # go "upwards" one level (towards volume)
        url = url.URLByDeletingLastPathComponent()              

    GPR.print_superfolders_list("volume, superfolder(s)", superfolders_list, 4)

    # now go back down, yielding dict objects at each step:
    n = len(superfolders_list)
    for i, superfolder_dict in enumerate(superfolders_list):  
        superfolder_dict['depth'] = i+1-n
        yield superfolder_dict 

    # last dict in superfolder list is the basepath_dict
    basepath_dict =  superfolder_dict                          

    item_is_package = is_item_a_package(basepath_url)    
    if basepath_dict[NSURLIsDirectoryKey] and item_is_package and not options.scan_packages:
        GPR.print_it("\nbasepath is a directory and a package but we're not scanning packages.\n", 1)
        return

    # we've yielded basepath above, don't enumerate if basepath is not a directory (or package and we want packages)
    
    if basepath_dict[NSURLIsDirectoryKey]:
    
        enumeratorOptionKeys = 0L
        if not options.scan_packages:
            enumeratorOptionKeys |= NSDirectoryEnumerationSkipsPackageDescendants
        if not options.scan_hidden_files:
            enumeratorOptionKeys |= NSDirectoryEnumerationSkipsHiddenFiles

        enumerator = sharedFM.enumeratorAtURL_includingPropertiesForKeys_options_errorHandler_(
                                                                                        basepath_url,   
                                                                                        enumeratorURLKeys, 
                                                                                        enumeratorOptionKeys, 
                                                                                        error_handler_for_enumerator )                                        
        for url in enumerator:
            item_dict = GetURLValues(url, enumeratorURLKeys)
            depth = enumerator.level()                
            item_dict['depth'] = depth

            if options.depth_limit and (depth >= options.depth_limit-1):
                enumerator.skipDescendants()
        
            yield item_dict

    GPR.print_it2("end files_generator", basepath, verbose_level_threshold=3)
예제 #2
0
파일: lsdb_prev.py 프로젝트: donbro/lsdb
def do_fs_basepath(cnx, basepath, slist, vol_id, item_tally=defaultdict(list), force_folder_scan=False, 
                                                                                  scan_hidden_files=False, 
                                                                                  depth_limit=4, 
                                                                                  scan_packages=False):
    """do_fs_basepath is a generator yielding an ordered sequence of (status, dictionary) pairs
      
      first yield the sequence of directories above the basepath, from top down.  could be empty.
      then yield basepath, then enumerate contents of basepath if it is a directory 
      or package and we want to scan packages
      """

    n = len(slist)
    for i, superfolder_dict in enumerate(slist[:-1]):   # last one is basepath
        superfolder_dict['vol_id'] = vol_id
        superfolder_dict['depth'] = i+1-n
        yield superfolder_dict 

    #     for url in enumerator2:
    basepath_url =  NSURL.fileURLWithPath_(basepath)
    
    item_dict = slist[-1]
    depth = 0 
    item_dict['vol_id'] = vol_id
    item_dict['depth'] = depth

    # see if our current item is (effectively) a directory. check/query database if it is.
    
    item_is_package = is_item_a_package(basepath_url)
    if item_dict[NSURLIsDirectoryKey] and ((not item_is_package) or scan_packages):
        
        file_exists = db_file_exists(cnx, item_dict, vol_id)
        item_dict['directory_is_up_to_date'] =  not ((not file_exists) or  force_folder_scan)  
        if (not file_exists) or  force_folder_scan:
            folder_id         = item_dict['NSFileSystemFileNumber']
            db_query_folder(cnx,  vol_id,  item_dict, depth)

        folder_file_id         = item_dict['NSFileSystemFileNumber']            
        stak.append((depth, folder_file_id))

        yield item_dict
        
        # fall-through to do enumeration
        
    else:
    
        if item_dict[NSURLIsDirectoryKey] and item_is_package and not scan_packages:
            GPR.print_it("\nbasepath is a directory and a package but we're not scanning packages.\n", 2)
    
        yield item_dict
        
        return


    #   fall-through to do enumeration.
    #    do enumeration if we are a directory or we-are-a-package-and-we-want-package
    
    enumeratorOptionKeys = 0L
    if not scan_packages:
        enumeratorOptionKeys |= NSDirectoryEnumerationSkipsPackageDescendants
    if not scan_hidden_files:
        enumeratorOptionKeys |= NSDirectoryEnumerationSkipsHiddenFiles

    enumerator2 = sharedFM.enumeratorAtURL_includingPropertiesForKeys_options_errorHandler_(
                                        basepath_url,   enumeratorURLKeys, enumeratorOptionKeys, 
                                        error_handler_for_enumerator )
                                        
    for url in enumerator2:

        item_dict = GetURLValues(url, enumeratorURLKeys)
        depth = enumerator2.level()                
        item_dict['vol_id'] = vol_id
        item_dict['depth'] = depth
            
        while len(stak) > depth:
            stak.pop()

        # see if our current item is (effectively) a directory. check/query database if it is.

        item_is_package = is_item_a_package(url)
        if item_dict[NSURLIsDirectoryKey] and ((not item_is_package) or scan_packages):

            file_exists = db_file_exists(cnx, item_dict, vol_id)
            item_dict['directory_is_up_to_date'] =  not ((not file_exists) or  force_folder_scan)  
            if (not file_exists) or  force_folder_scan:
                folder_id         = item_dict['NSFileSystemFileNumber']
                db_query_folder(cnx,   vol_id,  item_dict, depth)
                
            # (1) in addition to checking database, also need to add new files to RS2_ins[ (depth-1, folder_id) ] += rs       
            # (2) any completely new directories (ie, not just update of existing directory) won't have
            #           any database contents to check.  (this is a lesser optimization?)


            folder_file_id = item_dict['NSFileSystemFileNumber']            
            stak.append((depth, folder_file_id))
                
        # see if our current item's folder ID is in our list of (new of forced) folders to be tracked.

        folder_id = item_dict['NSFileSystemFolderNumber']
        item_dict['current_item_directory_is_being_checked'] =  (depth-1, folder_id) in RS1_db_rels
        if (depth-1, folder_id) in RS1_db_rels:
            file_id         = item_dict['NSFileSystemFileNumber']
            filename        = item_dict[NSURLNameKey]
            file_mod_date   = item_dict[NSURLContentModificationDateKey]
            s = str(file_mod_date)
            file_mod_date = s[:-len(" +0000")]
            rs = (  vol_id,   folder_id,  filename,  file_id, file_mod_date)

            # if the current item is present in RS1 then it is no longer a "file to be deleted"
            # if in filesystem but not in database then it is a "file to be inserted"
            
            try:                
                RS1_db_rels[ (depth-1, folder_id) ] -= rs       
            except KeyError:
                RS2_ins[ (depth-1, folder_id) ] += rs       
                
        yield item_dict

    # end enumerator

    return