def dump_user_installs_cron(): """ Sets up tasks to do user install dumps. """ chunk_size = 100 # Get valid users to dump. user_ids = set( Installed.objects.filter( user__enable_recommendations=True).values_list('user', flat=True)) # Clean up the path where we'll store the individual json files from each # user installs dump (which are in users/ in DUMPED_USERS_PATH). path_to_cleanup = os.path.join(settings.DUMPED_USERS_PATH, 'users') task_log.info('Cleaning up path {0}'.format(path_to_cleanup)) try: for dirpath, dirnames, filenames in walk_storage( path_to_cleanup, storage=private_storage): for filename in filenames: private_storage.delete(os.path.join(dirpath, filename)) except OSError: # Ignore if the directory does not exist. pass grouping = [] for chunk in chunked(user_ids, chunk_size): grouping.append(dump_user_installs.subtask(args=[chunk])) post = zip_users.subtask(immutable=True) ts = chord(grouping, post) ts.apply_async()
def dump_user_installs_cron(): """ Sets up tasks to do user install dumps. """ chunk_size = 100 # Get valid users to dump. user_ids = set(Installed.objects.filter(user__enable_recommendations=True) .values_list('user', flat=True)) # Clean up the path where we'll store the individual json files from each # user installs dump (which are in users/ in DUMPED_USERS_PATH). path_to_cleanup = os.path.join(settings.DUMPED_USERS_PATH, 'users') task_log.info('Cleaning up path {0}'.format(path_to_cleanup)) try: for dirpath, dirnames, filenames in walk_storage( path_to_cleanup, storage=private_storage): for filename in filenames: private_storage.delete(os.path.join(dirpath, filename)) except OSError: # Ignore if the directory does not exist. pass grouping = [] for chunk in chunked(user_ids, chunk_size): grouping.append(dump_user_installs.subtask(args=[chunk])) post = zip_users.subtask(immutable=True) ts = chord(grouping, post) ts.apply_async()
def test_storage_walk(): tmp = tempfile.mkdtemp() jn = partial(os.path.join, tmp) try: private_storage.save(jn('file1.txt'), ContentFile('')) private_storage.save(jn('one/file1.txt'), ContentFile('')) private_storage.save(jn('one/file2.txt'), ContentFile('')) private_storage.save(jn('one/two/file1.txt'), ContentFile('')) private_storage.save(jn('one/three/file1.txt'), ContentFile('')) private_storage.save(jn('four/five/file1.txt'), ContentFile('')) private_storage.save(jn(u'four/kristi\u2603/kristi\u2603.txt'), ContentFile('')) results = [(dir, set(subdirs), set(files)) for dir, subdirs, files in sorted( walk_storage(tmp, storage=private_storage))] yield (eq_, results.pop(0), (tmp, set(['four', 'one']), set(['file1.txt']))) yield (eq_, results.pop(0), (jn('four'), set(['five', 'kristi\xe2\x98\x83']), set([]))) yield (eq_, results.pop(0), (jn('four/five'), set([]), set(['file1.txt']))) yield (eq_, results.pop(0), (jn('four/kristi\xe2\x98\x83'), set([]), set(['kristi\xe2\x98\x83.txt']))) yield (eq_, results.pop(0), (jn('one'), set(['three', 'two']), set(['file1.txt', 'file2.txt']))) yield (eq_, results.pop(0), (jn('one/three'), set([]), set(['file1.txt']))) yield (eq_, results.pop(0), (jn('one/two'), set([]), set(['file1.txt' ]))) yield (eq_, len(results), 0) finally: rm_local_tmp_dir(tmp)
def test_storage_walk(): tmp = tempfile.mkdtemp() jn = partial(os.path.join, tmp) try: storage.save(jn('file1.txt'), ContentFile('')) storage.save(jn('one/file1.txt'), ContentFile('')) storage.save(jn('one/file2.txt'), ContentFile('')) storage.save(jn('one/two/file1.txt'), ContentFile('')) storage.save(jn('one/three/file1.txt'), ContentFile('')) storage.save(jn('four/five/file1.txt'), ContentFile('')) storage.save(jn(u'four/kristi\u2603/kristi\u2603.txt'), ContentFile('')) results = [(dir, set(subdirs), set(files)) for dir, subdirs, files in sorted(walk_storage(tmp))] yield (eq_, results.pop(0), (tmp, set(['four', 'one']), set(['file1.txt']))) yield (eq_, results.pop(0), (jn('four'), set(['five', 'kristi\xe2\x98\x83']), set([]))) yield (eq_, results.pop(0), (jn('four/five'), set([]), set(['file1.txt']))) yield (eq_, results.pop(0), (jn('four/kristi\xe2\x98\x83'), set([]), set(['kristi\xe2\x98\x83.txt']))) yield (eq_, results.pop(0), (jn('one'), set(['three', 'two']), set(['file1.txt', 'file2.txt']))) yield (eq_, results.pop(0), (jn('one/three'), set([]), set(['file1.txt']))) yield (eq_, results.pop(0), (jn('one/two'), set([]), set(['file1.txt']))) yield (eq_, len(results), 0) finally: rm_local_tmp_dir(tmp)
def convert(directory, delete=False): print 'Converting icons in %s' % directory pks = [] k = 0 for path, names, filenames in walk_storage(directory): for filename in filenames: old = os.path.join(path, filename) pre, ext = os.path.splitext(old) if (pre[-3:] in size_suffixes or ext not in extensions): continue if not storage.size(old): print 'Icon %s is empty, ignoring.' % old continue for size, size_suffix in zip(sizes, size_suffixes): new = '%s%s%s' % (pre, size_suffix, '.png') if os.path.exists(new): continue resize_image(old, new, (size, size), remove_src=False) if ext != '.png': pks.append(os.path.basename(pre)) if delete: storage.delete(old) k += 1 if not k % 1000: print "... converted %s" % k for chunk in chunked(pks, 100): Webapp.objects.filter(pk__in=chunk).update(icon_type='image/png')
def cleanup_extracted_file(): log.info('Removing extracted files for file viewer.') root = os.path.join(settings.TMP_PATH, 'file_viewer') # Local storage uses local time for file modification. S3 uses UTC time. now = datetime.utcnow if storage_is_remote() else datetime.now for path in storage.listdir(root)[0]: full = os.path.join(root, path) age = now() - storage.modified_time(os.path.join(full, 'manifest.webapp')) if age.total_seconds() > (60 * 60): log.debug('Removing extracted files: %s, %dsecs old.' % (full, age.total_seconds())) for subroot, dirs, files in walk_storage(full): for f in files: storage.delete(os.path.join(subroot, f)) # Nuke out the file and diff caches when the file gets removed. id = os.path.basename(path) try: int(id) except ValueError: continue key = hashlib.md5() key.update(str(id)) cache.delete('%s:memoize:%s:%s' % (settings.CACHE_PREFIX, 'file-viewer', key.hexdigest()))
def cleanup_extracted_file(): log.info('Removing extracted files for file viewer.') root = os.path.join(settings.TMP_PATH, 'file_viewer') # Local storage uses local time for file modification. S3 uses UTC time. now = datetime.utcnow if storage_is_remote() else datetime.now for path in private_storage.listdir(root)[0]: full = os.path.join(root, path) age = now() - private_storage.modified_time( os.path.join(full, 'manifest.webapp')) if age.total_seconds() > (60 * 60): log.debug('Removing extracted files: %s, %dsecs old.' % (full, age.total_seconds())) for subroot, dirs, files in walk_storage(full): for f in files: private_storage.delete(os.path.join(subroot, f)) # Nuke out the file and diff caches when the file gets removed. id = os.path.basename(path) try: int(id) except ValueError: continue key = hashlib.md5() key.update(str(id)) cache.delete('%s:memoize:%s:%s' % (settings.CACHE_PREFIX, 'file-viewer', key.hexdigest()))
def cleanup(self): try: for root, dirs, files in walk_storage(self.dest): for fname in files: storage.delete(os.path.join(root, fname)) except OSError as e: if e.errno == 2: # Directory doesn't exist, nothing to clean up. return raise
def compress_export(tarball_name, date): # We need a temporary directory on the local filesystem that will contain # all files in order to call `tar`. local_source_dir = tempfile.mkdtemp() apps_dirpath = os.path.join(settings.DUMPED_APPS_PATH, 'apps') # In case apps_dirpath is empty, add a dummy file to make the apps # directory in the tar archive non-empty. It should not happen in prod, but # it's nice to have it to prevent the task from failing entirely. with private_storage.open(os.path.join(apps_dirpath, '0', '.keep'), 'w') as fd: fd.write('.') # Now, copy content from private_storage to that temp directory. We don't # need to worry about creating the directories locally, the storage class # does that for us. for dirpath, dirnames, filenames in walk_storage(apps_dirpath, storage=private_storage): for filename in filenames: src_path = os.path.join(dirpath, filename) dst_path = os.path.join(local_source_dir, 'apps', os.path.basename(dirpath), filename) copy_stored_file(src_path, dst_path, src_storage=private_storage, dst_storage=local_storage) # Also add extra files to the temp directory. extra_filenames = compile_extra_files(local_source_dir, date) # All our files are now present locally, let's generate a local filename # that will contain the final '.tar.gz' before it's copied over to # public storage. local_target_file = tempfile.NamedTemporaryFile(suffix='.tgz', prefix='dumped-apps-') # tar ALL the things! cmd = ['tar', 'czf', local_target_file.name, '-C', local_source_dir ] + ['apps'] + extra_filenames task_log.info(u'Creating dump {0}'.format(local_target_file.name)) subprocess.call(cmd) # Now copy the local tgz to the public storage. remote_target_filename = os.path.join(settings.DUMPED_APPS_PATH, 'tarballs', '%s.tgz' % tarball_name) copy_stored_file(local_target_file.name, remote_target_filename, src_storage=local_storage, dst_storage=public_storage) # Clean-up. local_target_file.close() rm_directory(local_source_dir) return remote_target_filename
def compress_export(tarball_name, date): # We need a temporary directory on the local filesystem that will contain # all files in order to call `tar`. local_source_dir = tempfile.mkdtemp() apps_dirpath = os.path.join(settings.DUMPED_APPS_PATH, 'apps') # In case apps_dirpath is empty, add a dummy file to make the apps # directory in the tar archive non-empty. It should not happen in prod, but # it's nice to have it to prevent the task from failing entirely. with private_storage.open( os.path.join(apps_dirpath, '0', '.keep'), 'w') as fd: fd.write('.') # Now, copy content from private_storage to that temp directory. We don't # need to worry about creating the directories locally, the storage class # does that for us. for dirpath, dirnames, filenames in walk_storage( apps_dirpath, storage=private_storage): for filename in filenames: src_path = os.path.join(dirpath, filename) dst_path = os.path.join( local_source_dir, 'apps', os.path.basename(dirpath), filename) copy_stored_file( src_path, dst_path, src_storage=private_storage, dst_storage=local_storage) # Also add extra files to the temp directory. extra_filenames = compile_extra_files(local_source_dir, date) # All our files are now present locally, let's generate a local filename # that will contain the final '.tar.gz' before it's copied over to # public storage. local_target_file = tempfile.NamedTemporaryFile( suffix='.tgz', prefix='dumped-apps-') # tar ALL the things! cmd = ['tar', 'czf', local_target_file.name, '-C', local_source_dir] + ['apps'] + extra_filenames task_log.info(u'Creating dump {0}'.format(local_target_file.name)) subprocess.call(cmd) # Now copy the local tgz to the public storage. remote_target_filename = os.path.join( settings.DUMPED_APPS_PATH, 'tarballs', '%s.tgz' % tarball_name) copy_stored_file(local_target_file.name, remote_target_filename, src_storage=local_storage, dst_storage=public_storage) # Clean-up. local_target_file.close() rm_directory(local_source_dir) return remote_target_filename
def clean_old_signed(seconds=60 * 60): """Clean out apps signed for reviewers.""" log.info('Removing old apps signed for reviewers') root = settings.SIGNED_APPS_REVIEWER_PATH # Local storage uses local time for file modification. S3 uses UTC time. now = datetime.utcnow if storage_is_remote() else datetime.now for nextroot, dirs, files in walk_storage(root): for fn in files: full = os.path.join(nextroot, fn) age = now() - storage.modified_time(full) if age.total_seconds() > seconds: log.debug('Removing signed app: %s, %dsecs old.' % ( full, age.total_seconds())) storage.delete(full)
def clean_old_signed(seconds=60 * 60): """Clean out apps signed for reviewers.""" log.info('Removing old apps signed for reviewers') root = settings.SIGNED_APPS_REVIEWER_PATH # Local storage uses local time for file modification. S3 uses UTC time. now = datetime.utcnow if storage_is_remote() else datetime.now for nextroot, dirs, files in walk_storage(root, storage=private_storage): for fn in files: full = os.path.join(nextroot, fn) age = now() - private_storage.modified_time(full) if age.total_seconds() > seconds: log.debug('Removing signed app: %s, %dsecs old.' % (full, age.total_seconds())) private_storage.delete(full)
def export_data(name=None): today = datetime.datetime.today().strftime('%Y-%m-%d') if name is None: name = today # Clean up the path where we'll store the individual json files from each # app dump. for dirpath, dirnames, filenames in walk_storage( settings.DUMPED_APPS_PATH, storage=private_storage): for filename in filenames: private_storage.delete(os.path.join(dirpath, filename)) task_log.info('Cleaning up path {0}'.format(settings.DUMPED_APPS_PATH)) # Run all dump_apps task in parallel, and once it's done, add extra files # and run compression. chord(dump_all_apps_tasks(), compress_export.si(tarball_name=name, date=today)).apply_async()
def export_data(name=None): today = datetime.datetime.today().strftime('%Y-%m-%d') if name is None: name = today # Clean up the path where we'll store the individual json files from each # app dump. for dirpath, dirnames, filenames in walk_storage(settings.DUMPED_APPS_PATH, storage=private_storage): for filename in filenames: private_storage.delete(os.path.join(dirpath, filename)) task_log.info('Cleaning up path {0}'.format(settings.DUMPED_APPS_PATH)) # Run all dump_apps task in parallel, and once it's done, add extra files # and run compression. chord(dump_all_apps_tasks(), compress_export.si(tarball_name=name, date=today)).apply_async()
def export_data(name=None): today = datetime.datetime.today().strftime('%Y-%m-%d') if name is None: name = today # Clean up the path where we'll store the individual json files from each # app dump (which are in apps/ inside DUMPED_APPS_PATH). path_to_cleanup = os.path.join(settings.DUMPED_APPS_PATH, 'apps') task_log.info('Cleaning up path {0}'.format(settings.DUMPED_APPS_PATH)) try: for dirpath, dirnames, filenames in walk_storage( path_to_cleanup, storage=private_storage): for filename in filenames: private_storage.delete(os.path.join(dirpath, filename)) except OSError: # Ignore if the directory does not exist. pass # Run all dump_apps task in parallel, and once it's done, add extra files # and run compression. chord(dump_all_apps_tasks(), compress_export.si(tarball_name=name, date=today)).apply_async()
def cleanup_extracted_file(): log.info('Removing extracted files for file viewer.') root = os.path.join(settings.TMP_PATH, 'file_viewer') for path in storage.listdir(root): full = os.path.join(root, path) age = time.time() - os.stat(full)[stat.ST_ATIME] if (age) > (60 * 60): log.debug('Removing extracted files: %s, %dsecs old.' % (full, age)) for subroot, dirs, files in walk_storage(full): for f in files: storage.delete(os.path.join(subroot, files)) # Nuke out the file and diff caches when the file gets removed. id = os.path.basename(path) try: int(id) except ValueError: continue key = hashlib.md5() key.update(str(id)) cache.delete('%s:memoize:%s:%s' % (settings.CACHE_PREFIX, 'file-viewer', key.hexdigest()))
def handle(self, *args, **options): count = 0 for root, folders, files in walk_storage(_root): if not root.startswith(_subs): continue for fname in files: fname = os.path.join(root, fname) if fname.endswith(_exts): data = storage.open(fname).read() found = False for match in _loc_re.finditer(data): if not found: found = True print fname print '-' * len(fname) print match.string[match.start():match.end()] count += 1 if found: print print 'Strings found:', count
def zip_users(*args, **kw): date = datetime.datetime.utcnow().strftime('%Y-%m-%d') tarball_name = date # We need a temporary directory on the local filesystem that will contain # all files in order to call `tar`. local_source_dir = tempfile.mkdtemp() users_dirpath = os.path.join(settings.DUMPED_USERS_PATH, 'users') # In case users_dirpath is empty, add a dummy file to make the users # directory in the tar archive non-empty. It should not happen in prod, but # it's nice to have it to prevent the task from failing entirely. with private_storage.open( os.path.join(users_dirpath, '0', '.keep'), 'w') as fd: fd.write('.') # Now, copy content from private_storage to that temp directory. We don't # need to worry about creating the directories locally, the storage class # does that for us. for dirpath, dirnames, filenames in walk_storage( users_dirpath, storage=private_storage): for filename in filenames: src_path = os.path.join(dirpath, filename) dst_path = os.path.join( local_source_dir, 'users', os.path.basename(dirpath), filename) copy_stored_file( src_path, dst_path, src_storage=private_storage, dst_storage=local_storage) # Put some .txt files in place locally. context = Context({'date': date, 'url': settings.SITE_URL}) extra_filenames = ['license.txt', 'readme.txt'] for extra_filename in extra_filenames: template = loader.get_template('webapps/dump/users/' + extra_filename) dst = os.path.join(local_source_dir, extra_filename) with local_storage.open(dst, 'w') as fd: fd.write(template.render(context)) # All our files are now present locally, let's generate a local filename # that will contain the final '.tar.gz' before it's copied over to # public storage. local_target_file = tempfile.NamedTemporaryFile( suffix='.tgz', prefix='dumped-users-') # tar ALL the things! cmd = ['tar', 'czf', local_target_file.name, '-C', local_source_dir] + ['users'] + extra_filenames task_log.info(u'Creating user dump {0}'.format(local_target_file.name)) subprocess.call(cmd) # Now copy the local tgz to the public storage. remote_target_filename = os.path.join( settings.DUMPED_USERS_PATH, 'tarballs', '%s.tgz' % tarball_name) copy_stored_file(local_target_file.name, remote_target_filename, src_storage=local_storage, dst_storage=private_storage) # Clean-up. local_target_file.close() rm_directory(local_source_dir) return remote_target_filename
def zip_users(*args, **kw): date = datetime.datetime.utcnow().strftime('%Y-%m-%d') tarball_name = date # We need a temporary directory on the local filesystem that will contain # all files in order to call `tar`. local_source_dir = tempfile.mkdtemp() users_dirpath = os.path.join(settings.DUMPED_USERS_PATH, 'users') # In case users_dirpath is empty, add a dummy file to make the users # directory in the tar archive non-empty. It should not happen in prod, but # it's nice to have it to prevent the task from failing entirely. with private_storage.open(os.path.join(users_dirpath, '0', '.keep'), 'w') as fd: fd.write('.') # Now, copy content from private_storage to that temp directory. We don't # need to worry about creating the directories locally, the storage class # does that for us. for dirpath, dirnames, filenames in walk_storage(users_dirpath, storage=private_storage): for filename in filenames: src_path = os.path.join(dirpath, filename) dst_path = os.path.join(local_source_dir, 'users', os.path.basename(dirpath), filename) copy_stored_file(src_path, dst_path, src_storage=private_storage, dst_storage=local_storage) # Put some .txt files in place locally. context = Context({'date': date, 'url': settings.SITE_URL}) extra_filenames = ['license.txt', 'readme.txt'] for extra_filename in extra_filenames: template = loader.get_template('webapps/dump/users/' + extra_filename) dst = os.path.join(local_source_dir, extra_filename) with local_storage.open(dst, 'w') as fd: fd.write(template.render(context)) # All our files are now present locally, let's generate a local filename # that will contain the final '.tar.gz' before it's copied over to # public storage. local_target_file = tempfile.NamedTemporaryFile(suffix='.tgz', prefix='dumped-users-') # tar ALL the things! cmd = ['tar', 'czf', local_target_file.name, '-C', local_source_dir ] + ['users'] + extra_filenames task_log.info(u'Creating user dump {0}'.format(local_target_file.name)) subprocess.call(cmd) # Now copy the local tgz to the public storage. remote_target_filename = os.path.join(settings.DUMPED_USERS_PATH, 'tarballs', '%s.tgz' % tarball_name) copy_stored_file(local_target_file.name, remote_target_filename, src_storage=local_storage, dst_storage=private_storage) # Clean-up. local_target_file.close() rm_directory(local_source_dir) return remote_target_filename
def cleanup(self): if storage.exists(self.dest): for root, dirs, files in walk_storage(self.dest): for fname in files: storage.delete(os.path.join(root, fname))