def scan_gridfs(db,host): """ scans gridfs under a given database and returns a dictionary of files by mimetype """ #TODO: maybe it's better to identify files by ID in both these scan functions. docdict = defaultdict(lambda:[]) files = Connection('127.0.0.1')[db].fs.files fs = FS(db,True) cursor = files.find() for f in cursor: mt = mimetypes.guess_type(f)['filename']#classify documents by mimetype doc = fs.get(f['_id']) docdict[mt].append(doc.md5) return docdict
def scan_gridfs(db, host): """ scans gridfs under a given database and returns a dictionary of files by mimetype """ #TODO: maybe it's better to identify files by ID in both these scan functions. docdict = defaultdict(lambda: []) files = Connection('127.0.0.1')[db].fs.files fs = FS(db, True) cursor = files.find() for f in cursor: mt = mimetypes.guess_type(f)[ 'filename'] #classify documents by mimetype doc = fs.get(f['_id']) docdict[mt].append(doc.md5) return docdict
def scan_dir(path, db, recurse=False): """ Scans a directory, adds files to the GridFS and returns dictionary of files by mimetype """ fs = FS(db,True) docdict = defaultdict(lambda:[]) for p, dirs, files in os.walk(path): if not recurse: dirs = [] for f in files: mt = mimetypes.guess_type(f)[0] #classify documents by mimetype try: fullpath = os.path.join(os.getcwd(),os.path.join(p, f).decode('utf8')) except UnicodeDecodeError: print "skipping: ",f continue fid = fs.add_file(fullpath) if fid != None: doc = fs.fs.get(fid) docdict[mt].append(doc.md5) return docdict
def scan_dir(path, db, recurse=False): """ Scans a directory, adds files to the GridFS and returns dictionary of files by mimetype """ fs = FS(db, True) docdict = defaultdict(lambda: []) for p, dirs, files in os.walk(path): if not recurse: dirs = [] for f in files: mt = mimetypes.guess_type(f)[0] #classify documents by mimetype try: fullpath = os.path.join(os.getcwd(), os.path.join(p, f).decode('utf8')) except UnicodeDecodeError: print "skipping: ", f continue fid = fs.add_file(fullpath) if fid != None: doc = fs.fs.get(fid) docdict[mt].append(doc.md5) return docdict