예제 #1
0
파일: tasks.py 프로젝트: kumar303/rockit
def store_and_transcode(track_id, session_key):
    tr = Track.objects.get(pk=track_id)
    ftype = filetype(tr.temp_path)
    transcoders = [store_ogg]
    if ftype == 'mp3':
        store_source = store_mp3
    elif ftype == 'm4a':
        store_source = store_m4a
    else:
        raise ValueError('file type not supported: %r' % ftype)

    args = [tr.pk, session_key]
    pipeline = TaskTree()
    pipeline.push(store_source, args=args, kwargs=dict(source=True))
    for trans in transcoders:
        pipeline.push(trans, args=args)
    pipeline.push(unlink_source, args=args)
    pipeline.apply_async()
예제 #2
0
파일: tasks.py 프로젝트: bopopescu/rockit
def store_and_transcode(track_id, session_key):
    tr = Track.objects.get(pk=track_id)
    ftype = filetype(tr.temp_path)
    transcoders = [store_ogg]
    if ftype == 'mp3':
        store_source = store_mp3
    elif ftype == 'm4a':
        store_source = store_m4a
    else:
        raise ValueError('file type not supported: %r' % ftype)

    args = [tr.pk, session_key]
    pipeline = TaskTree()
    pipeline.push(store_source, args=args, kwargs=dict(source=True))
    for trans in transcoders:
        pipeline.push(trans, args=args)
    pipeline.push(unlink_source, args=args)
    pipeline.apply_async()
예제 #3
0
    def handle(self, *args, **kwargs):
        """Reindexing work.

        Creates a Tasktree that creates new indexes
        over the old ones so the search feature
        works while the indexation occurs
        """
        if not django_settings.MARKETPLACE:
            raise CommandError('This command affects both the marketplace and '
                               'AMO ES storage. But the command can only be '
                               'run from the Marketplace.')

        force = kwargs.get('force', False)

        if database_flagged() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')

        prefix = kwargs.get('prefix', '')
        log('Starting the reindexation')

        if kwargs.get('wipe', False):
            confirm = raw_input("Are you sure you want to wipe all data from "
                                "ES ? (yes/no): ")

            while confirm not in ('yes', 'no'):
                confirm = raw_input('Please enter either "yes" or "no": ')

            if confirm == 'yes':
                unflag_database()
                requests.delete(url('/'))
            else:
                raise CommandError("Aborted.")
        elif force:
            unflag_database()

        # Get list current aliases at /_aliases.
        all_aliases = requests.get(url('/_aliases')).json()

        # building the list of indexes
        indexes = set([prefix + index for index in
                       _ALIASES.values()])

        actions = []

        def add_action(*elmt):
            if elmt in actions:
                return
            actions.append(elmt)

        all_aliases = all_aliases.items()

        # creating a task tree
        log('Building the task tree')
        tree = TaskTree()
        last_action = None

        to_remove = []

        # for each index, we create a new time-stamped index
        for alias in indexes:
            is_stats = 'stats' in alias
            old_index = None

            for aliased_index, alias_ in all_aliases:
                if alias in alias_['aliases'].keys():
                    # mark the index to be removed later
                    old_index = aliased_index
                    to_remove.append(aliased_index)

                    # mark the alias to be removed as well
                    add_action('remove', aliased_index, alias)

            # create a new index, using the alias name with a timestamp
            new_index = timestamp_index(alias)

            # if old_index is None that could mean it's a full index
            # In that case we want to continue index in it
            future_alias = url('/%s' % alias)
            if requests.head(future_alias).status_code == 200:
                old_index = alias

            # flag the database
            step1 = tree.add_task(flag_database, args=[new_index, old_index,
                                                       alias])
            step2 = step1.add_task(create_mapping, args=[new_index, alias])
            step3 = step2.add_task(create_index, args=[new_index, is_stats])
            last_action = step3

            # adding new index to the alias
            add_action('add', new_index, alias)

        # Alias the new index and remove the old aliases, if any.
        renaming_step = last_action.add_task(run_aliases_actions,
                                             args=[actions])

        # unflag the database - there's no need to duplicate the
        # indexing anymore
        delete = renaming_step.add_task(unflag_database)

        # Delete the old indexes, if any
        delete.add_task(delete_indexes, args=[to_remove])

        # let's do it
        log('Running all indexation tasks')

        os.environ['FORCE_INDEXING'] = '1'
        try:
            tree.apply_async()
            time.sleep(10)   # give celeryd some time to flag the DB
            while database_flagged():
                sys.stdout.write('.')
                sys.stdout.flush()
                time.sleep(5)
        finally:
            del os.environ['FORCE_INDEXING']

        sys.stdout.write('\n')

        # let's return the /_aliases values
        aliases = call_es('_aliases').json()
        aliases = json.dumps(aliases, sort_keys=True, indent=4)
        return _SUMMARY % (len(indexes), aliases)
예제 #4
0
    def handle(self, *args, **kwargs):
        """Reindexing work.

        Creates a Tasktree that creates new indexes
        over the old ones so the search feature
        works while the indexation occurs.

        """
        force = kwargs.get('force', False)

        if is_reindexing_amo() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')

        log('Starting the reindexation', stdout=self.stdout)

        modules = get_modules(with_stats=kwargs.get('with_stats', False))

        if kwargs.get('wipe', False):
            skip_confirmation = kwargs.get('noinput', False)
            confirm = ''
            if not skip_confirmation:
                confirm = raw_input('Are you sure you want to wipe all AMO '
                                    'Elasticsearch indexes? (yes/no): ')

                while confirm not in ('yes', 'no'):
                    confirm = raw_input('Please enter either "yes" or "no": ')

            if (confirm == 'yes' or skip_confirmation):
                unflag_database(stdout=self.stdout)
                for index in set(modules.keys()):
                    ES.indices.delete(index, ignore=404)
            else:
                raise CommandError("Aborted.")
        elif force:
            unflag_database(stdout=self.stdout)

        alias_actions = []

        def add_alias_action(action, index, alias):
            action = {action: {'index': index, 'alias': alias}}
            if action in alias_actions:
                return
            alias_actions.append(action)

        # Creating a task tree.
        log('Building the task tree', stdout=self.stdout)
        tree = TaskTree()
        last_action = None

        to_remove = []

        # For each alias, we create a new time-stamped index.
        for alias, module in modules.items():
            old_index = None

            olds = ES.indices.get_aliases(alias, ignore=404)
            for old_index in olds:
                # Mark the index to be removed later.
                to_remove.append(old_index)
                # Mark the alias to be removed from that index.
                add_alias_action('remove', old_index, alias)

            # Create a new index, using the alias name with a timestamp.
            new_index = timestamp_index(alias)

            # If old_index is None that could mean it's a full index.
            # In that case we want to continue index in it.
            if ES.indices.exists(alias):
                old_index = alias

            # Flag the database.
            step1 = tree.add_task(flag_database,
                                  args=[new_index, old_index, alias])
            step2 = step1.add_task(create_new_index, args=[alias, new_index])
            step3 = step2.add_task(index_data, args=[alias, new_index])
            last_action = step3

            # Adding new index to the alias.
            add_alias_action('add', new_index, alias)

        # Alias the new index and remove the old aliases, if any.
        renaming_step = last_action.add_task(update_aliases,
                                             args=[alias_actions])

        # Unflag the database - there's no need to duplicate the
        # indexing anymore.
        delete = renaming_step.add_task(unflag_database)

        # Delete the old indexes, if any.
        if to_remove:
            delete.add_task(delete_indexes, args=[to_remove])

        # Let's do it.
        log('Running all indexation tasks', stdout=self.stdout)

        os.environ['FORCE_INDEXING'] = '1'

        # This is a bit convoluted, and more complicated than simply providing
        # the soft and hard time limits on the @task decorator. But we're not
        # using the @task decorator here, but a decorator from celery_tasktree.
        if not getattr(settings, 'CELERY_ALWAYS_EAGER', False):
            control.time_limit(
                'olympia.lib.es.management.commands.reindex.index_data',
                soft=time_limits['soft'],
                hard=time_limits['hard'])

        try:
            tree.apply_async()
            if not getattr(settings, 'CELERY_ALWAYS_EAGER', False):
                time.sleep(10)  # give celeryd some time to flag the DB
            while is_reindexing_amo():
                sys.stdout.write('.')
                sys.stdout.flush()
                time.sleep(5)
        finally:
            del os.environ['FORCE_INDEXING']

        sys.stdout.write('\n')

        # Let's return the /_aliases values.
        aliases = ES.indices.get_aliases()
        aliases = json.dumps(aliases, sort_keys=True, indent=4)
        summary = _SUMMARY % (len(modules), aliases)
        log(summary, stdout=self.stdout)
예제 #5
0
    def handle(self, *args, **kwargs):
        """Set up reindexing tasks.

        Creates a Tasktree that creates a new indexes and indexes all objects,
        then points the alias to this new index when finished.
        """
        if not settings.MARKETPLACE:
            raise CommandError('This command affects only marketplace and '
                               'should be run under Marketplace settings.')

        force = kwargs.get('force', False)
        prefix = kwargs.get('prefix', '')

        if database_flagged() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')
        elif force:
            unflag_database()

        # The list of indexes that is currently aliased by `ALIAS`.
        aliases = ES.aliases(ALIAS).keys()
        old_index = aliases[0] if aliases else None
        # Create a new index, using the index name with a timestamp.
        new_index = timestamp_index(prefix + ALIAS)

        # See how the index is currently configured.
        if old_index:
            try:
                s = (ES.get_settings(old_index).get(old_index, {})
                                               .get('settings', {}))
            except pyelasticsearch.exceptions.ElasticHttpNotFoundError:
                s = {}
        else:
            s = {}

        num_replicas = s.get('number_of_replicas',
                             settings.ES_DEFAULT_NUM_REPLICAS)
        num_shards = s.get('number_of_shards', settings.ES_DEFAULT_NUM_SHARDS)

        # Start our chain of events to re-index.
        tree = TaskTree()

        # Flag the database.
        step1 = tree.add_task(
            flag_database, args=[new_index, old_index, ALIAS])

        # Create the index and mapping.
        #
        # Note: We set num_replicas=0 here to decrease load while re-indexing.
        # In a later step we increase it which results in a more efficient bulk
        # copy in Elasticsearch.
        # For ES < 0.90 we manually enable compression.
        step2 = step1.add_task(
            create_index, args=[new_index, ALIAS,
                                {'number_of_replicas': 0,
                                 'number_of_shards': num_shards,
                                 'store.compress.tv': True,
                                 'store.compress.stored': True,
                                 'refresh_interval': '-1'}])

        # Index all the things!
        step3 = step2.add_task(run_indexing, args=[new_index])

        # After indexing we optimize the index, adjust settings, and point the
        # alias to the new index.
        step4 = step3.add_task(
            update_alias, args=[new_index, old_index, ALIAS,
                                {'number_of_replicas': num_replicas,
                                 'refresh_interval': '5s'}])

        # Unflag the database.
        step5 = step4.add_task(unflag_database)

        # Delete the old index, if any.
        if old_index:
            step5 = step5.add_task(delete_index, args=[old_index])

        step5.add_task(output_summary)

        self.stdout.write('\nNew index and indexing tasks all queued up.\n')
        os.environ['FORCE_INDEXING'] = '1'
        try:
            tree.apply_async()
        finally:
            del os.environ['FORCE_INDEXING']
예제 #6
0
 def _icons_and_images(self, bundle_obj):
     pipeline = TaskTree()
     pipeline.push(tasks.fetch_icon, args=[bundle_obj])
     pipeline.push(tasks.generate_image_assets, args=[bundle_obj])
     pipeline.apply_async()
예제 #7
0
    def handle(self, *args, **kwargs):
        """Reindexing work.

        Creates a Tasktree that creates new indexes
        over the old ones so the search feature
        works while the indexation occurs.

        """
        force = kwargs.get("force", False)

        if is_reindexing_amo() and not force:
            raise CommandError("Indexation already occuring - use --force to " "bypass")

        log("Starting the reindexation", stdout=self.stdout)

        modules = ["addons"]
        if kwargs.get("with_stats", False):
            modules.append("stats")

        if kwargs.get("wipe", False):
            skip_confirmation = kwargs.get("noinput", False)
            confirm = ""
            if not skip_confirmation:
                confirm = raw_input("Are you sure you want to wipe all AMO " "Elasticsearch indexes? (yes/no): ")

                while confirm not in ("yes", "no"):
                    confirm = raw_input('Please enter either "yes" or "no": ')

            if confirm == "yes" or skip_confirmation:
                unflag_database(stdout=self.stdout)
                for index in set(MODULES[m].get_alias() for m in modules):
                    ES.indices.delete(index, ignore=404)
            else:
                raise CommandError("Aborted.")
        elif force:
            unflag_database(stdout=self.stdout)

        alias_actions = []

        def add_alias_action(action, index, alias):
            action = {action: {"index": index, "alias": alias}}
            if action in alias_actions:
                return
            alias_actions.append(action)

        # Creating a task tree.
        log("Building the task tree", stdout=self.stdout)
        tree = TaskTree()
        last_action = None

        to_remove = []

        # For each index, we create a new time-stamped index.
        for module in modules:
            old_index = None
            alias = MODULES[module].get_alias()

            olds = ES.indices.get_aliases(alias, ignore=404)
            for old_index in olds:
                # Mark the index to be removed later.
                to_remove.append(old_index)
                # Mark the alias to be removed from that index.
                add_alias_action("remove", old_index, alias)

            # Create a new index, using the alias name with a timestamp.
            new_index = timestamp_index(alias)

            # If old_index is None that could mean it's a full index.
            # In that case we want to continue index in it.
            if ES.indices.exists(alias):
                old_index = alias

            # Flag the database.
            step1 = tree.add_task(flag_database, args=[new_index, old_index, alias])
            step2 = step1.add_task(create_new_index, args=[module, new_index])
            step3 = step2.add_task(index_data, args=[module, new_index])
            last_action = step3

            # Adding new index to the alias.
            add_alias_action("add", new_index, alias)

        # Alias the new index and remove the old aliases, if any.
        renaming_step = last_action.add_task(update_aliases, args=[alias_actions])

        # Unflag the database - there's no need to duplicate the
        # indexing anymore.
        delete = renaming_step.add_task(unflag_database)

        # Delete the old indexes, if any.
        if to_remove:
            delete.add_task(delete_indexes, args=[to_remove])

        # Let's do it.
        log("Running all indexation tasks", stdout=self.stdout)

        os.environ["FORCE_INDEXING"] = "1"

        # This is a bit convoluted, and more complicated than simply providing
        # the soft and hard time limits on the @task decorator. But we're not
        # using the @task decorator here, but a decorator from celery_tasktree.
        if not getattr(settings, "CELERY_ALWAYS_EAGER", False):
            control.time_limit(
                "olympia.lib.es.management.commands.reindex.index_data",
                soft=time_limits["soft"],
                hard=time_limits["hard"],
            )

        try:
            tree.apply_async()
            if not getattr(settings, "CELERY_ALWAYS_EAGER", False):
                time.sleep(10)  # give celeryd some time to flag the DB
            while is_reindexing_amo():
                sys.stdout.write(".")
                sys.stdout.flush()
                time.sleep(5)
        finally:
            del os.environ["FORCE_INDEXING"]

        sys.stdout.write("\n")

        # Let's return the /_aliases values.
        aliases = ES.indices.get_aliases()
        aliases = json.dumps(aliases, sort_keys=True, indent=4)
        summary = _SUMMARY % (len(modules), aliases)
        log(summary, stdout=self.stdout)
예제 #8
0
    def handle(self, *args, **kwargs):
        """Reindexing work.

        Creates a Tasktree that creates new indexes
        over the old ones so the search feature
        works while the indexation occurs
        """
        if not django_settings.MARKETPLACE:
            raise CommandError('This command affects both the marketplace and '
                               'AMO ES storage. But the command can only be '
                               'run from the Marketplace.')

        force = kwargs.get('force', False)

        if database_flagged() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')

        prefix = kwargs.get('prefix', '')
        log('Starting the reindexation')

        if kwargs.get('wipe', False):
            confirm = raw_input("Are you sure you want to wipe all data from "
                                "ES ? (yes/no): ")

            while confirm not in ('yes', 'no'):
                confirm = raw_input('Please enter either "yes" or "no": ')

            if confirm == 'yes':
                unflag_database()
                requests.delete(url('/'))
            else:
                raise CommandError("Aborted.")
        elif force:
            unflag_database()

        # Get list current aliases at /_aliases.
        all_aliases = requests.get(url('/_aliases')).json

        # building the list of indexes
        indexes = set(
            [prefix + index for index in django_settings.ES_INDEXES.values()])

        actions = []

        def add_action(*elmt):
            if elmt in actions:
                return
            actions.append(elmt)

        all_aliases = all_aliases.items()

        # creating a task tree
        log('Building the task tree')
        tree = TaskTree()
        last_action = None

        to_remove = []

        # for each index, we create a new time-stamped index
        for alias in indexes:
            is_stats = 'stats' in alias
            old_index = None

            for aliased_index, alias_ in all_aliases:
                if alias in alias_['aliases'].keys():
                    # mark the index to be removed later
                    old_index = aliased_index
                    to_remove.append(aliased_index)

                    # mark the alias to be removed as well
                    add_action('remove', aliased_index, alias)

            # create a new index, using the alias name with a timestamp
            new_index = timestamp_index(alias)

            # if old_index is None that could mean it's a full index
            # In that case we want to continue index in it
            future_alias = url('/%s' % alias)
            if requests.head(future_alias).status_code == 200:
                old_index = alias

            # flag the database
            step1 = tree.add_task(flag_database,
                                  args=[new_index, old_index, alias])
            step2 = step1.add_task(create_mapping, args=[new_index, alias])
            step3 = step2.add_task(create_index, args=[new_index, is_stats])
            last_action = step3

            # adding new index to the alias
            add_action('add', new_index, alias)

        # Alias the new index and remove the old aliases, if any.
        renaming_step = last_action.add_task(run_aliases_actions,
                                             args=[actions])

        # unflag the database - there's no need to duplicate the
        # indexing anymore
        delete = renaming_step.add_task(unflag_database)

        # Delete the old indexes, if any
        delete.add_task(delete_indexes, args=[to_remove])

        # let's do it
        log('Running all indexation tasks')

        os.environ['FORCE_INDEXING'] = '1'
        try:
            tree.apply_async()
            time.sleep(10)  # give celeryd some time to flag the DB
            while database_flagged():
                sys.stdout.write('.')
                sys.stdout.flush()
                time.sleep(5)
        finally:
            del os.environ['FORCE_INDEXING']

        sys.stdout.write('\n')

        # let's return the /_aliases values
        aliases = call_es('_aliases').json
        aliases = json.dumps(aliases, sort_keys=True, indent=4)
        return _SUMMARY % (len(indexes), aliases)
예제 #9
0
 def _icons_and_images(self, bundle_obj):
     pipeline = TaskTree()
     pipeline.push(tasks.fetch_icon, args=[bundle_obj])
     pipeline.push(tasks.generate_image_assets, args=[bundle_obj])
     pipeline.apply_async()
예제 #10
0
    def handle(self, *args, **kwargs):
        """Reindexing work.

        Creates a Tasktree that creates new indexes
        over the old ones so the search feature
        works while the indexation occurs.

        """
        force = kwargs.get('force', False)

        if is_reindexing_amo() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')

        log('Starting the reindexation', stdout=self.stdout)

        modules = get_modules(with_stats=kwargs.get('with_stats', False))

        if kwargs.get('wipe', False):
            skip_confirmation = kwargs.get('noinput', False)
            confirm = ''
            if not skip_confirmation:
                confirm = raw_input('Are you sure you want to wipe all AMO '
                                    'Elasticsearch indexes? (yes/no): ')

                while confirm not in ('yes', 'no'):
                    confirm = raw_input('Please enter either "yes" or "no": ')

            if (confirm == 'yes' or skip_confirmation):
                unflag_database(stdout=self.stdout)
                for index in set(modules.keys()):
                    ES.indices.delete(index, ignore=404)
            else:
                raise CommandError("Aborted.")
        elif force:
            unflag_database(stdout=self.stdout)

        alias_actions = []

        def add_alias_action(action, index, alias):
            action = {action: {'index': index, 'alias': alias}}
            if action in alias_actions:
                return
            alias_actions.append(action)

        # Creating a task tree.
        log('Building the task tree', stdout=self.stdout)
        tree = TaskTree()
        last_action = None

        to_remove = []

        # For each alias, we create a new time-stamped index.
        for alias, module in modules.items():
            old_index = None

            olds = ES.indices.get_aliases(alias, ignore=404)
            for old_index in olds:
                # Mark the index to be removed later.
                to_remove.append(old_index)
                # Mark the alias to be removed from that index.
                add_alias_action('remove', old_index, alias)

            # Create a new index, using the alias name with a timestamp.
            new_index = timestamp_index(alias)

            # If old_index is None that could mean it's a full index.
            # In that case we want to continue index in it.
            if ES.indices.exists(alias):
                old_index = alias

            # Flag the database.
            step1 = tree.add_task(flag_database,
                                  args=[new_index, old_index, alias])
            step2 = step1.add_task(create_new_index,
                                   args=[alias, new_index])
            step3 = step2.add_task(index_data,
                                   args=[alias, new_index])
            last_action = step3

            # Adding new index to the alias.
            add_alias_action('add', new_index, alias)

        # Alias the new index and remove the old aliases, if any.
        renaming_step = last_action.add_task(update_aliases,
                                             args=[alias_actions])

        # Unflag the database - there's no need to duplicate the
        # indexing anymore.
        delete = renaming_step.add_task(unflag_database)

        # Delete the old indexes, if any.
        if to_remove:
            delete.add_task(delete_indexes, args=[to_remove])

        # Let's do it.
        log('Running all indexation tasks', stdout=self.stdout)

        os.environ['FORCE_INDEXING'] = '1'

        # This is a bit convoluted, and more complicated than simply providing
        # the soft and hard time limits on the @task decorator. But we're not
        # using the @task decorator here, but a decorator from celery_tasktree.
        if not getattr(settings, 'CELERY_ALWAYS_EAGER', False):
            control.time_limit(
                'olympia.lib.es.management.commands.reindex.index_data',
                soft=time_limits['soft'],
                hard=time_limits['hard'])

        try:
            tree.apply_async()
            if not getattr(settings, 'CELERY_ALWAYS_EAGER', False):
                time.sleep(10)   # give celeryd some time to flag the DB
            while is_reindexing_amo():
                sys.stdout.write('.')
                sys.stdout.flush()
                time.sleep(5)
        finally:
            del os.environ['FORCE_INDEXING']

        sys.stdout.write('\n')

        # Let's return the /_aliases values.
        aliases = ES.indices.get_aliases()
        aliases = json.dumps(aliases, sort_keys=True, indent=4)
        summary = _SUMMARY % (len(modules), aliases)
        log(summary, stdout=self.stdout)