def do_lsdb(args, options): """do_lsdb is the high-level, self-contained routine most like the command-line invocation""" config = { 'user': '******', 'password': '', 'host': '127.0.0.1', 'database': 'files', 'buffered': True } try: cnx = mysql.connector.connect(**config) except mysql.connector.Error as err: if err.errno == errorcode.ER_ACCESS_DENIED_ERROR: print("Username or password %r and %r?" % (config['user'], config['password'])) elif err.errno == errorcode.ER_BAD_DB_ERROR: print "Database %r does not exist." % config['database'] else: print 'err:', err GPR.print_attrs("mysql.connector", cnx, verbose_level_threshold=4) item_tally = defaultdict(list) # initialize the item tallys here (kind of a per-connection tally?) try: for basepath in args: print "\nbasepath:" print print " ", basepath print slist = get_superfolders_list(basepath) vol_id = db_select_vol_id(cnx, slist[0]) # slist[0] is volume # do_fs_basepath is a generator for fs_dict in do_fs_basepath(cnx, basepath , slist, vol_id, force_folder_scan=options.force_folder_scan, scan_packages=options.scan_packages): GPR.pr7z( fs_dict , RS1_db_rels, RS2_ins, stak, ) # do final stuff at end of generator depth = 0 # ISS.pop_item_stack(depth, 2) while len(stak) > depth: # stak_before = stak[:] stak.pop() # print "pop", stak_before, "==>", stak # if ISS.folderIDAtDepth != {}: # print "\n folderIDAtDepth is not empty!", ISS.folderIDAtDepth except MyError, err: print err.description
def execute_select_query(cnx, select_query, select_data, n=3): cursor = cnx.cursor() GPR.print_it(select_query % select_data, n) cursor.execute( select_query % select_data ) zz = [z for z in cursor] cursor.close() return zz
def get_superfolders_list(basepath): """return list of superfolders from volume down to container of basepath. could be empty. """ superfolders_list = [] url = NSURL.fileURLWithPath_(basepath) # d1 = GetURLValues(url, enumeratorURLKeys) while True: # not d1[NSURLIsVolumeKey]: # base path could be a volume, then superfolder list is empty d1 = GetURLValues(url, enumeratorURLKeys) superfolders_list.insert(0,d1) if d1[NSURLIsVolumeKey]: break url = url.URLByDeletingLastPathComponent() # go "upwards" to volume GPR.print_superfolders_list("volume, superfolder(s)", superfolders_list, 4) return superfolders_list
def main(): # some favorite testing files u'/Users/donb' u'/Users/donb/Documents/Delete Imported Items on matahari?.rtfd' u'/Users/donb/Downloads/incomplete' '/Users/donb/projects' '/Volumes/Brandywine/erin esurance' '/Volumes/Chronos/TV Show' u"/Volumes/Dunharrow/iTunes Dunharrow/TV Shows/The No. 1 Ladies' Detective Agency" '/Volumes/Dunharrow/pdf/Xcode 4 Unleashed 2nd ed. - F. Anderson (Sams, 2012) WW.pdf' u'/Volumes/Sacramento/Movies/The Dark Knight (2008) (720p).mkv' '/Volumes/Taos' u'/Volumes/Ulysses/TV Shows' '/Users/donb/dev-mac/sickbeard' s = '/Volumes/Ulysses/TV Shows/Nikita/Nikita.S03E01.1080p.WEB-DL.DD5.1.H.264-KiNGS.mkv' # basepath is a directory and a package but we're not scanning packages. s = u"/Users/donb/Documents/Installing Evernote v. 4.6.2—Windows Seven.rtfd" s = u'/Volumes/Sapporo/TV Show/Winx Club/S01/Winx Club - 1x07 - Grounded (aka Friends in Need).avi' s = u'/Users/donb/Documents/ do JavaScript "var listOfFunctions = [];.rtf' s = '/Volumes/Ulysses/bittorrent' s = u'/Volumes/Ulysses/TV Shows/Lost Girl' s = '/Volumes/Ulysses/TV Shows/Nikita/' s = '/Volumes/Ulysses/TV Shows/Nikita/' s = u"/Users/donb/Documents/Installing Evernote v. 4.6.2—Windows Seven.rtfd" s = "." s = u'/Volumes/Brandywine/TV Series/White Collar/S04' s = u'/Users/donb/Ashley+Roberts/' # hack to have Textmate run with hardwired arguments while command line can be free… if os.getenv('TM_LINE_NUMBER' ): argv = [] # argv = ["--help"]+[s] # argv = ["-rd 4"] argv += ["-v"] argv += ["-v"] # argv += ["-v"] # argv += ["-a"] argv += ["-p"] # argv += ["-f"] argv += [s] else: argv = sys.argv[1:] (options, args) = do_parse_args(argv) # no args means do the current directory if args == []: args = ["."] args = [os.path.abspath(os.path.expanduser(a)) for a in args] # args = [os.path.abspath(os.path.expanduser(a.decode('utf8'))) for a in args] # LOGLEVELS = (logging.FATAL, logging.WARNING, logging.INFO, logging.DEBUG) # # # Create logger # logger = logging.getLogger('') # logger.setLevel(logging.WARNING) # # logger.addHandler(gui_log) # # logger.setLevel(LOGLEVELS[options.verbose_level-1]) # # logging.info('--------------------------------') # INFO:root:-------------------------------- (in red!) GPR.print_list("sys.argv", sys.argv) # display list of timezones if options.verbose_level >= 4: print_timezones("time_zones") GPR.print_dict("options (after optparsing)", options.__dict__, left_col_width=24, verbose_level_threshold=2) GPR.print_list("args (after optparsing)", args) do_lsdb(args, options)
def do_fs_basepath(cnx, basepath, slist, vol_id, item_tally=defaultdict(list), force_folder_scan=False, scan_hidden_files=False, depth_limit=4, scan_packages=False): """do_fs_basepath is a generator yielding an ordered sequence of (status, dictionary) pairs first yield the sequence of directories above the basepath, from top down. could be empty. then yield basepath, then enumerate contents of basepath if it is a directory or package and we want to scan packages """ n = len(slist) for i, superfolder_dict in enumerate(slist[:-1]): # last one is basepath superfolder_dict['vol_id'] = vol_id superfolder_dict['depth'] = i+1-n yield superfolder_dict # for url in enumerator2: basepath_url = NSURL.fileURLWithPath_(basepath) item_dict = slist[-1] depth = 0 item_dict['vol_id'] = vol_id item_dict['depth'] = depth # see if our current item is (effectively) a directory. check/query database if it is. item_is_package = is_item_a_package(basepath_url) if item_dict[NSURLIsDirectoryKey] and ((not item_is_package) or scan_packages): file_exists = db_file_exists(cnx, item_dict, vol_id) item_dict['directory_is_up_to_date'] = not ((not file_exists) or force_folder_scan) if (not file_exists) or force_folder_scan: folder_id = item_dict['NSFileSystemFileNumber'] db_query_folder(cnx, vol_id, item_dict, depth) folder_file_id = item_dict['NSFileSystemFileNumber'] stak.append((depth, folder_file_id)) yield item_dict # fall-through to do enumeration else: if item_dict[NSURLIsDirectoryKey] and item_is_package and not scan_packages: GPR.print_it("\nbasepath is a directory and a package but we're not scanning packages.\n", 2) yield item_dict return # fall-through to do enumeration. # do enumeration if we are a directory or we-are-a-package-and-we-want-package enumeratorOptionKeys = 0L if not scan_packages: enumeratorOptionKeys |= NSDirectoryEnumerationSkipsPackageDescendants if not scan_hidden_files: enumeratorOptionKeys |= NSDirectoryEnumerationSkipsHiddenFiles enumerator2 = sharedFM.enumeratorAtURL_includingPropertiesForKeys_options_errorHandler_( basepath_url, enumeratorURLKeys, enumeratorOptionKeys, error_handler_for_enumerator ) for url in enumerator2: item_dict = GetURLValues(url, enumeratorURLKeys) depth = enumerator2.level() item_dict['vol_id'] = vol_id item_dict['depth'] = depth while len(stak) > depth: stak.pop() # see if our current item is (effectively) a directory. check/query database if it is. item_is_package = is_item_a_package(url) if item_dict[NSURLIsDirectoryKey] and ((not item_is_package) or scan_packages): file_exists = db_file_exists(cnx, item_dict, vol_id) item_dict['directory_is_up_to_date'] = not ((not file_exists) or force_folder_scan) if (not file_exists) or force_folder_scan: folder_id = item_dict['NSFileSystemFileNumber'] db_query_folder(cnx, vol_id, item_dict, depth) # (1) in addition to checking database, also need to add new files to RS2_ins[ (depth-1, folder_id) ] += rs # (2) any completely new directories (ie, not just update of existing directory) won't have # any database contents to check. (this is a lesser optimization?) folder_file_id = item_dict['NSFileSystemFileNumber'] stak.append((depth, folder_file_id)) # see if our current item's folder ID is in our list of (new of forced) folders to be tracked. folder_id = item_dict['NSFileSystemFolderNumber'] item_dict['current_item_directory_is_being_checked'] = (depth-1, folder_id) in RS1_db_rels if (depth-1, folder_id) in RS1_db_rels: file_id = item_dict['NSFileSystemFileNumber'] filename = item_dict[NSURLNameKey] file_mod_date = item_dict[NSURLContentModificationDateKey] s = str(file_mod_date) file_mod_date = s[:-len(" +0000")] rs = ( vol_id, folder_id, filename, file_id, file_mod_date) # if the current item is present in RS1 then it is no longer a "file to be deleted" # if in filesystem but not in database then it is a "file to be inserted" try: RS1_db_rels[ (depth-1, folder_id) ] -= rs except KeyError: RS2_ins[ (depth-1, folder_id) ] += rs yield item_dict # end enumerator return
def execute_insert_into_files(cnx, query, data, verbose_level=3): """ returns kDuplicateKey if duplicate key violation, kRecordInserted if not.""" # the fields in the query argument are marked %s because a magic routine that we con't see is converting our data # into mysql-compatible strings and then inserting them into our %s-es. I think that # using %s implies that we could've used %r or '%s', etc; so I recommend not using the magic # conversion routine implied by using (query, data) but rather explicity formating the sql using # (query % data) and passing the resultant string to cursor.execute() try: cursor = cnx.cursor() GPR.print_it(query % data, verbose_level) # print "cursor._connection.charset is: " , cursor._connection.charset # Returns an iterator when multi is True, otherwise None. cursor.execute(query, data) # (…, operation, params=None, multi=False) cnx.commit() q = "select @count_by_file_name, @count_by_file_id, @msg" # via insert trigger on table "files" cursor.execute(q) trigger_vars = dict(zip(("count_by_file_name", "count_by_file_id", "msg"), [z for z in cursor][0])) # kRecordInserted means we didn't get a duplicate key error insert_result = FilesInsertResult(l=kRecordInserted, verbose_level=verbose_level, **trigger_vars) q = "select @vol_id" cursor.execute(q) vol_id = [z for z in cursor][0][0] cnx.commit() return (vol_id , insert_result) except mysql.connector.Error as err: if err.errno == 1062 and err.sqlstate == '23000': if GPR.verbose_level >= verbose_level: n1 = err.msg.index('Duplicate entry') n2 = err.msg.index('for key ') msg2 = err.msg[n1:n2-1] print " "+repr(msg2) cnx.commit() # only insert trigger table "files" sets these variables q = "select @count_by_file_name, @count_by_file_id , @msg" cursor.execute(q) trigger_vars = dict(zip(("count_by_file_name", "count_by_file_id", "msg"), [z for z in cursor][0])) # kDuplicateKey means we got a duplicate key error insert_result = FilesInsertResult( l = kDuplicateKey, verbose_level=verbose_level, **trigger_vars) q = "select @vol_id" cursor.execute(q) vol_id = [z for z in cursor][0][0] cnx.commit() return (vol_id , insert_result) elif err.errno == 1242 and err.sqlstate == '21000': # print "Subquery returns more than 1 row" print query % data else: print 'erxr:', err, err.errno , err.message , err.msg, err.sqlstate # , dir(err) return None finally: cursor.close()