def get_temp_file(rowkey): cf = cass.getColumnFamily("TempFiles") try: rec = cass.getRecord(cf, rowkey, columns=["size", "chunk_list"]) except cass.DatabaseError: raise DatabaseError() file_size = rec['size'] chunk_list = rec['chunk_list'].split(',') try: chunks = cass.getRecord(cf, rowkey, columns=chunk_list) except cass.DatabaseError: raise DatabaseError() file_data = ''.join([chunks[c] for c in chunk_list]) return file_data
unused_keys = data_keys.difference(referenced_keys) print 'Found', len(unused_keys), 'unused keys that can be deleted' print 'Fetching unused keys from database and writing to temp file' size_ct = save_unused_keys(unused_keys) print 'Wrote', size_ct, 'bytes to temporary db file' if len(unused_keys) == 0: print 'Since no unused keys, nothing left to do. Exiting.' sys.exit(0) else: delete_unused(unused_keys) def add_dirs(): os.environ['DJANGO_SETTINGS_MODULE'] = 'settings' thisdir = os.path.dirname( os.path.realpath( __file__ ) ) upone, tail = os.path.split(thisdir) cdndir = os.path.join(upone, 'sirikata-cdn') celerydir = os.path.join(cdndir, 'celery_tasks') sys.path.append(cdndir) sys.path.append(celerydir) if __name__ == '__main__': add_dirs() import cassandra_storage.cassandra_util as cass from celery_tasks.import_upload import get_temp_file TEMPFILES = cass.getColumnFamily("TempFiles") USERS = cass.getColumnFamily('Users') main()