def get_coll_rec(cls, user, coll_name, rec): user = User(my_id=user, redis=cls.redis, access=BaseAccess()) collection = user.get_collection_by_name(coll_name) recording = collection.get_recording(rec) if collection else None coll = collection.my_id if collection else None rec = recording.my_id if recording else None return coll, rec
def init_props(config): from webrecorder.models import User, Collection, Recording, Stats User.init_props(config) Collection.init_props(config) Recording.init_props(config) Stats.init_props(config) import webrecorder.rec.storage.storagepaths as storagepaths storagepaths.init_props(config)
def delete_if_expired(self, temp_user, temp_dir): temp_key = 't:' + temp_user sesh = self.sesh_redis.get(temp_key) if sesh == 'commit-wait': try: if not os.path.isdir(temp_dir): logger.debug( 'TempChecker: Remove Session For Already Deleted Dir: ' + temp_dir) self.sesh_redis.delete(temp_key) return True logger.debug('TempChecker: Removing if empty: ' + temp_dir) os.rmdir(temp_dir) #shutil.rmtree(temp_dir) logger.debug('TempChecker: Deleted empty dir: ' + temp_dir) self.sesh_redis.delete(temp_key) except Exception as e: logger.debug('TempChecker: Waiting for commit') return False # temp user key exists elif self.data_redis.exists(User.INFO_KEY.format(user=temp_user)): # if user still active, don't remove if self.sesh_redis.get(self.sesh_key_template.format(sesh)): #print('Skipping active temp ' + temp) return False # delete user logger.debug('TempChecker: Deleting expired user: '******'TempChecker: Deleted expired temp dir: ' + temp_dir) shutil.rmtree(temp_dir) except Exception as e: logger.warn(str(e)) return False return True
def init_props(config): from webrecorder.models import User, Collection, Recording, Stats, Auto User.init_props(config) Collection.init_props(config) Recording.init_props(config) Stats.init_props(config) Auto.init_props(config) import webrecorder.rec.storage.storagepaths as storagepaths storagepaths.init_props(config)
def test_sync_avoid_double_load(self): self.assert_exists(COLL_CDXJ, False)() self.assert_exists(REC_CDXJ, False)() collection = User(redis=self.redis, my_id=self.anon_user, access=BaseAccess()).get_collection_by_name('temp') collection.sync_coll_index(exists=False, do_async=True) time.sleep(0.1) self.assert_exists(REC_CDXJ_T, True)() collection.sync_coll_index(exists=True, do_async=True) time.sleep(0.1) self.assert_exists(REC_CDXJ_T, True)() self.sleep_try(0.1, 0.5, self.assert_exists(REC_CDXJ_T, False)) assert load_counter == 1
def delete_if_expired(self, temp_user, temp_dir): temp_key = 't:' + temp_user sesh = self.sesh_redis.get(temp_key) if sesh == 'commit-wait': # This temporary user has signed up for a permanent account and # their collections will be migrated to storage. # Clean up if that migration is complete (i.e. the dir is empty). # Otherwise, wait. if os.path.isdir(temp_dir): try: logger.debug('TempChecker: Removing if empty: ' + temp_dir) os.rmdir(temp_dir) logger.debug('TempChecker: Deleted empty dir: ' + temp_dir) except OSError as e: if e.errno == errno.ENOTEMPTY: logger.debug('TempChecker: Waiting for commit') elif e.errno != errno.ENOENT: logger.error(str(e)) return False else: logger.debug( 'TempChecker: Removing Session For Already Deleted Dir: ' + temp_dir) self.sesh_redis.delete(temp_key) return True # temp user key exists elif self.data_redis.exists(User.INFO_KEY.format(user=temp_user)): # if user still active, don't remove if self.sesh_redis.get(self.sesh_key_template.format(sesh)): return False logger.debug('TempChecker: Deleting expired user: '******'s open recordings "closed"; # return (if necessary) to give time for closing logic to complete wait_to_delete = False for collection in user.get_collections(load=False): for recording in collection.get_recordings(load=False): if recording.is_open(extend=False): recording.set_closed() logger.debug('TempChecker: Closing temp recording: ' + recording.my_id) wait_to_delete = True if wait_to_delete: return False # delete the user; signal that the user's collections should be deleted. # the temp dir containing those collections will be deleted on next pass. user.delete_me() # delete the session self.sesh_redis.delete(temp_key) return True # no user session, remove temp dir and everything in it else: try: logger.debug('TempChecker: Deleted expired temp dir: ' + temp_dir) shutil.rmtree(temp_dir) except OSError as e: if e.errno != errno.ENOENT: logger.error(str(e)) return False return True
def get_coll_rec_obj(cls, coll_name, rec): user = User(my_id=cls.anon_user, redis=cls.redis, access=BaseAccess()) collection = user.get_collection_by_name(coll_name) recording = collection.get_recording(rec) if collection else None return collection, recording
def delete_if_expired(self, temp_user, temp_dir): temp_key = 't:' + temp_user sesh = self.sesh_redis.get(temp_key) if sesh == 'commit-wait': try: if not os.path.isdir(temp_dir): logger.debug('TempChecker: Remove Session For Already Deleted Dir: ' + temp_dir) self.sesh_redis.delete(temp_key) return True logger.debug('TempChecker: Removing if empty: ' + temp_dir) os.rmdir(temp_dir) #shutil.rmtree(temp_dir) logger.debug('TempChecker: Deleted empty dir: ' + temp_dir) self.sesh_redis.delete(temp_key) except Exception as e: logger.debug('TempChecker: Waiting for commit') return False # temp user key exists elif self.data_redis.exists(User.INFO_KEY.format(user=temp_user)): # if user still active, don't remove if self.sesh_redis.get(self.sesh_key_template.format(sesh)): #print('Skipping active temp ' + temp) return False # delete user logger.debug('TempChecker: Deleting expired user: '******'TempChecker: Closing temp recording: ' + recording.my_id) wait_to_delete = True if wait_to_delete: return False user.delete_me() self.sesh_redis.delete(temp_key) # delete temp dir on next pass return True # no user session, remove temp dir and everything in it else: try: logger.debug('TempChecker: Deleted expired temp dir: ' + temp_dir) shutil.rmtree(temp_dir) except Exception as e: logger.warn(str(e)) return False return True
def process_new_pages(self): crawl_groups = {} while True: data = self.redis.rpop(Collection.NEW_PAGES_Q) if not data: break page_data = json.loads(data) rec = page_data['rec'] if rec not in crawl_groups: crawl_groups[rec] = { 'user': page_data['user'], 'coll': page_data['coll'], 'coll_name': page_data['coll_name'], 'pages': [] } crawl_groups[rec]['pages'].append({ 'pid': page_data['pid'], 'url': page_data['url'], 'timestamp': page_data['timestamp'], 'title': page_data.get('title'), }) if page_data.get('derivs_rec'): crawl_groups[rec]['derivs_rec'] = page_data.get('derivs_rec') for rec, data in crawl_groups.items(): user = User(my_id=data['user'], redis=self.redis, access=BaseAccess()) if not user: print('Invalid User: '******'coll_name']) if not collection: print('Invalid Collection: ' + data['coll_name']) continue recording = collection.get_recording(rec) # if a specific derivates recording is provided, use that derivs_rec = data.get('derivs_rec') # otherwise create derivates recording if none exists if not derivs_rec: derivs_recording = recording.get_derivs_recording() if not derivs_recording: title = 'Derivatives for: Session from ' + recording.to_iso_date( recording['created_at'], no_T=True) derivs_recording = collection.create_recording( title=title, rec_type='derivs') recording.set_derivs_recording(derivs_recording) derivs_rec = derivs_recording.my_id crawl_def = SEARCH_CRAWL_DEF.copy() crawl_def['coll'] = crawl_def['screenshot_coll'] = crawl_def[ 'text_coll'] = data['coll'] crawl_def['user_params'] = { 'user': data['user'], 'coll': data['coll'], 'coll_name': data['coll_name'], 'rec': derivs_rec, 'type': 'replay-coll', # updated later 'request_ts': '', 'browser': BROWSER } crawl_def['name'] = 'text-' + data['user'] + '-' + data['coll'] crawl_def['seed_urls'] = data['pages'] print(crawl_def) r = requests.post(self.browsertrix_url, json=crawl_def) print(r.text)