Exemple #1
0
    def get_coll_rec(cls, user, coll_name, rec):
        user = User(my_id=user, redis=cls.redis, access=BaseAccess())
        collection = user.get_collection_by_name(coll_name)
        recording = collection.get_recording(rec) if collection else None

        coll = collection.my_id if collection else None
        rec = recording.my_id if recording else None
        return coll, rec
Exemple #2
0
def init_props(config):
    from webrecorder.models import User, Collection, Recording, Stats
    User.init_props(config)
    Collection.init_props(config)
    Recording.init_props(config)
    Stats.init_props(config)

    import webrecorder.rec.storage.storagepaths as storagepaths
    storagepaths.init_props(config)
Exemple #3
0
    def delete_if_expired(self, temp_user, temp_dir):
        temp_key = 't:' + temp_user
        sesh = self.sesh_redis.get(temp_key)

        if sesh == 'commit-wait':
            try:
                if not os.path.isdir(temp_dir):
                    logger.debug(
                        'TempChecker: Remove Session For Already Deleted Dir: '
                        + temp_dir)
                    self.sesh_redis.delete(temp_key)
                    return True

                logger.debug('TempChecker: Removing if empty: ' + temp_dir)
                os.rmdir(temp_dir)
                #shutil.rmtree(temp_dir)
                logger.debug('TempChecker: Deleted empty dir: ' + temp_dir)

                self.sesh_redis.delete(temp_key)

            except Exception as e:
                logger.debug('TempChecker: Waiting for commit')
                return False

        # temp user key exists
        elif self.data_redis.exists(User.INFO_KEY.format(user=temp_user)):
            # if user still active, don't remove
            if self.sesh_redis.get(self.sesh_key_template.format(sesh)):
                #print('Skipping active temp ' + temp)
                return False

            # delete user
            logger.debug('TempChecker: Deleting expired user: '******'TempChecker: Deleted expired temp dir: ' +
                             temp_dir)
                shutil.rmtree(temp_dir)
            except Exception as e:
                logger.warn(str(e))
                return False

        return True
Exemple #4
0
def init_props(config):
    from webrecorder.models import User, Collection, Recording, Stats, Auto
    User.init_props(config)
    Collection.init_props(config)
    Recording.init_props(config)
    Stats.init_props(config)
    Auto.init_props(config)

    import webrecorder.rec.storage.storagepaths as storagepaths
    storagepaths.init_props(config)
    def test_sync_avoid_double_load(self):
        self.assert_exists(COLL_CDXJ, False)()
        self.assert_exists(REC_CDXJ, False)()

        collection = User(redis=self.redis,
                          my_id=self.anon_user,
                          access=BaseAccess()).get_collection_by_name('temp')

        collection.sync_coll_index(exists=False, do_async=True)

        time.sleep(0.1)

        self.assert_exists(REC_CDXJ_T, True)()

        collection.sync_coll_index(exists=True, do_async=True)

        time.sleep(0.1)

        self.assert_exists(REC_CDXJ_T, True)()

        self.sleep_try(0.1, 0.5, self.assert_exists(REC_CDXJ_T, False))

        assert load_counter == 1
    def test_sync_avoid_double_load(self):
        self.assert_exists(COLL_CDXJ, False)()
        self.assert_exists(REC_CDXJ, False)()

        collection = User(redis=self.redis,
                          my_id=self.anon_user,
                          access=BaseAccess()).get_collection_by_name('temp')

        collection.sync_coll_index(exists=False, do_async=True)

        time.sleep(0.1)

        self.assert_exists(REC_CDXJ_T, True)()

        collection.sync_coll_index(exists=True, do_async=True)

        time.sleep(0.1)

        self.assert_exists(REC_CDXJ_T, True)()

        self.sleep_try(0.1, 0.5, self.assert_exists(REC_CDXJ_T, False))

        assert load_counter == 1
Exemple #7
0
    def delete_if_expired(self, temp_user, temp_dir):
        temp_key = 't:' + temp_user
        sesh = self.sesh_redis.get(temp_key)

        if sesh == 'commit-wait':
            # This temporary user has signed up for a permanent account and
            # their collections will be migrated to storage.
            # Clean up if that migration is complete (i.e. the dir is empty).
            # Otherwise, wait.
            if os.path.isdir(temp_dir):
                try:
                    logger.debug('TempChecker: Removing if empty: ' + temp_dir)
                    os.rmdir(temp_dir)
                    logger.debug('TempChecker: Deleted empty dir: ' + temp_dir)
                except OSError as e:
                    if e.errno == errno.ENOTEMPTY:
                        logger.debug('TempChecker: Waiting for commit')
                    elif e.errno != errno.ENOENT:
                        logger.error(str(e))
                    return False
            else:
                logger.debug(
                    'TempChecker: Removing Session For Already Deleted Dir: ' +
                    temp_dir)

            self.sesh_redis.delete(temp_key)
            return True

        # temp user key exists
        elif self.data_redis.exists(User.INFO_KEY.format(user=temp_user)):

            # if user still active, don't remove
            if self.sesh_redis.get(self.sesh_key_template.format(sesh)):
                return False

            logger.debug('TempChecker: Deleting expired user: '******'s open recordings "closed";
            # return (if necessary) to give time for closing logic to complete
            wait_to_delete = False
            for collection in user.get_collections(load=False):
                for recording in collection.get_recordings(load=False):
                    if recording.is_open(extend=False):
                        recording.set_closed()
                        logger.debug('TempChecker: Closing temp recording: ' +
                                     recording.my_id)
                        wait_to_delete = True
            if wait_to_delete:
                return False

            # delete the user; signal that the user's collections should be deleted.
            # the temp dir containing those collections will be deleted on next pass.
            user.delete_me()

            # delete the session
            self.sesh_redis.delete(temp_key)

            return True

        # no user session, remove temp dir and everything in it
        else:
            try:
                logger.debug('TempChecker: Deleted expired temp dir: ' +
                             temp_dir)
                shutil.rmtree(temp_dir)
            except OSError as e:
                if e.errno != errno.ENOENT:
                    logger.error(str(e))
                return False

        return True
Exemple #8
0
 def get_coll_rec_obj(cls, coll_name, rec):
     user = User(my_id=cls.anon_user, redis=cls.redis, access=BaseAccess())
     collection = user.get_collection_by_name(coll_name)
     recording = collection.get_recording(rec) if collection else None
     return collection, recording
    def delete_if_expired(self, temp_user, temp_dir):
        temp_key = 't:' + temp_user
        sesh = self.sesh_redis.get(temp_key)

        if sesh == 'commit-wait':
            try:
                if not os.path.isdir(temp_dir):
                    logger.debug('TempChecker: Remove Session For Already Deleted Dir: ' + temp_dir)
                    self.sesh_redis.delete(temp_key)
                    return True

                logger.debug('TempChecker: Removing if empty: ' + temp_dir)
                os.rmdir(temp_dir)
                #shutil.rmtree(temp_dir)
                logger.debug('TempChecker: Deleted empty dir: ' + temp_dir)

                self.sesh_redis.delete(temp_key)

            except Exception as e:
                logger.debug('TempChecker: Waiting for commit')
                return False

        # temp user key exists
        elif self.data_redis.exists(User.INFO_KEY.format(user=temp_user)):
            # if user still active, don't remove
            if self.sesh_redis.get(self.sesh_key_template.format(sesh)):
                #print('Skipping active temp ' + temp)
                return False

            # delete user
            logger.debug('TempChecker: Deleting expired user: '******'TempChecker: Closing temp recording: ' + recording.my_id)
                        wait_to_delete = True

            if wait_to_delete:
                return False

            user.delete_me()

            self.sesh_redis.delete(temp_key)

            # delete temp dir on next pass
            return True

        # no user session, remove temp dir and everything in it
        else:
            try:
                logger.debug('TempChecker: Deleted expired temp dir: ' + temp_dir)
                shutil.rmtree(temp_dir)
            except Exception as e:
                logger.warn(str(e))
                return False

        return True
Exemple #10
0
    def process_new_pages(self):
        crawl_groups = {}

        while True:
            data = self.redis.rpop(Collection.NEW_PAGES_Q)
            if not data:
                break

            page_data = json.loads(data)

            rec = page_data['rec']

            if rec not in crawl_groups:
                crawl_groups[rec] = {
                    'user': page_data['user'],
                    'coll': page_data['coll'],
                    'coll_name': page_data['coll_name'],
                    'pages': []
                }

            crawl_groups[rec]['pages'].append({
                'pid':
                page_data['pid'],
                'url':
                page_data['url'],
                'timestamp':
                page_data['timestamp'],
                'title':
                page_data.get('title'),
            })

            if page_data.get('derivs_rec'):
                crawl_groups[rec]['derivs_rec'] = page_data.get('derivs_rec')

        for rec, data in crawl_groups.items():
            user = User(my_id=data['user'],
                        redis=self.redis,
                        access=BaseAccess())

            if not user:
                print('Invalid User: '******'coll_name'])

            if not collection:
                print('Invalid Collection: ' + data['coll_name'])
                continue

            recording = collection.get_recording(rec)

            # if a specific derivates recording is provided, use that
            derivs_rec = data.get('derivs_rec')

            # otherwise create derivates recording if none exists
            if not derivs_rec:
                derivs_recording = recording.get_derivs_recording()
                if not derivs_recording:
                    title = 'Derivatives for: Session from ' + recording.to_iso_date(
                        recording['created_at'], no_T=True)
                    derivs_recording = collection.create_recording(
                        title=title, rec_type='derivs')

                    recording.set_derivs_recording(derivs_recording)

                derivs_rec = derivs_recording.my_id

            crawl_def = SEARCH_CRAWL_DEF.copy()
            crawl_def['coll'] = crawl_def['screenshot_coll'] = crawl_def[
                'text_coll'] = data['coll']
            crawl_def['user_params'] = {
                'user': data['user'],
                'coll': data['coll'],
                'coll_name': data['coll_name'],
                'rec': derivs_rec,
                'type': 'replay-coll',
                # updated later
                'request_ts': '',
                'browser': BROWSER
            }
            crawl_def['name'] = 'text-' + data['user'] + '-' + data['coll']
            crawl_def['seed_urls'] = data['pages']

            print(crawl_def)

            r = requests.post(self.browsertrix_url, json=crawl_def)
            print(r.text)