Python DownloadCount.sourcesの例

プログラミング言語: Python

名前空間/パッケージ名: olympia.stats.models

クラス/型: DownloadCount

メソッド/関数: sources

hotexamples.comのコード掲載数: 4

Python DownloadCount.sources - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのolympia.stats.models.DownloadCount.sourcesの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

DownloadCount(4)

sources(2)

index(1)

よく使われるメソッド

DownloadCount (4)

sources (2)

index (1)

コード例 #1

ファイルを表示

ファイル: download_counts_from_file.py プロジェクト: justinpotts/addons-server

    def handle(self, *args, **options):
        start = datetime.now()  # Measure the time it takes to run the script.
        day = options['date']
        if not day:
            day = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
        folder = args[0] if args else 'hive_results'
        folder = path.join(settings.TMP_PATH, folder, day)
        sep = options['separator']
        filepath = path.join(folder, 'download_counts.hive')
        # Make sure we're not trying to update with mismatched data.
        if get_date_from_file(filepath, sep) != day:
            raise CommandError('%s file contains data for another day' %
                               filepath)
        # First, make sure we don't have any existing counts for the same day,
        # or it would just increment again the same data.
        DownloadCount.objects.filter(date=day).delete()

        # Memoize the files to addon relations and the DownloadCounts.
        download_counts = {}
        # Perf: preload all the files once and for all.
        # This builds a dict where each key (the file_id we get from the hive
        # query) has the addon_id as value.
        files_to_addon = dict(
            File.objects.values_list('id', 'version__addon_id'))

        # Only accept valid sources, which are listed in the DownloadSource
        # model. The source must either be exactly one of the "full" valid
        # sources, or prefixed by one of the "prefix" valid sources.
        fulls = set(
            DownloadSource.objects.filter(type='full').values_list('name',
                                                                   flat=True))
        prefixes = DownloadSource.objects.filter(type='prefix').values_list(
            'name', flat=True)

        with codecs.open(filepath, encoding='utf8') as count_file:
            for index, line in enumerate(count_file):
                if index and (index % 1000000) == 0:
                    log.info('Processed %s lines' % index)

                splitted = line[:-1].split(sep)

                if len(splitted) != 4:
                    log.debug('Badly formatted row: %s' % line)
                    continue

                day, counter, file_id, src = splitted
                try:
                    file_id, counter = int(file_id), int(counter)
                except ValueError:  # Badly formatted? Drop.
                    continue

                if not is_valid_source(src, fulls=fulls, prefixes=prefixes):
                    continue

                # Does this file exist?
                if file_id in files_to_addon:
                    addon_id = files_to_addon[file_id]
                else:
                    continue

                # Memoize the DownloadCount.
                if addon_id in download_counts:
                    dc = download_counts[addon_id]
                else:
                    dc = DownloadCount(date=day, addon_id=addon_id, count=0)
                    download_counts[addon_id] = dc

                # We can now fill the DownloadCount object.
                dc.count += counter
                dc.sources = update_inc(dc.sources, src, counter)

        # Create in bulk: this is much faster.
        DownloadCount.objects.bulk_create(download_counts.values(), 100)
        for download_count in download_counts.values():
            save_stats_to_file(download_count)
        log.info('Processed a total of %s lines' % (index + 1))
        log.debug('Total processing time: %s' % (datetime.now() - start))

        # Clean up file.
        log.debug('Deleting {path}'.format(path=filepath))
        unlink(filepath)

コード例 #2

ファイルを表示

    def handle(self, *args, **options):
        start = datetime.now()  # Measure the time it takes to run the script.
        day = options['date']
        if not day:
            day = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
        folder = options['folder_name']
        folder = path.join(settings.TMP_PATH, folder, day)
        sep = options['separator']
        filepath = path.join(folder, 'download_counts.hive')
        # Make sure we're not trying to update with mismatched data.
        if get_date_from_file(filepath, sep) != day:
            raise CommandError('%s file contains data for another day' %
                               filepath)
        # First, make sure we don't have any existing counts for the same day,
        # or it would just increment again the same data.
        DownloadCount.objects.filter(date=day).delete()

        # Memoize the files to addon relations and the DownloadCounts.
        download_counts = {}
        # Perf: preload all the files and slugs once and for all.
        # This builds two dicts:
        # - One where each key (the file_id we get from the hive query) has
        #   the addon_id as value.
        # - One where each key (the add-on slug) has the add-on_id as value.
        files_to_addon = dict(
            File.objects.values_list('id', 'version__addon_id'))
        slugs_to_addon = dict(Addon.objects.public().values_list('slug', 'id'))

        # Only accept valid sources, which are listed in the DownloadSource
        # model. The source must either be exactly one of the "full" valid
        # sources, or prefixed by one of the "prefix" valid sources.
        fulls = set(
            DownloadSource.objects.filter(type='full').values_list('name',
                                                                   flat=True))
        prefixes = DownloadSource.objects.filter(type='prefix').values_list(
            'name', flat=True)

        with codecs.open(filepath, encoding='utf8') as count_file:
            for index, line in enumerate(count_file):
                if index and (index % 1000000) == 0:
                    log.info('Processed %s lines' % index)

                splitted = line[:-1].split(sep)

                if len(splitted) != 4:
                    log.debug('Badly formatted row: %s' % line)
                    continue

                day, counter, id_or_slug, src = splitted
                try:
                    # Clean up data.
                    id_or_slug = id_or_slug.strip()
                    counter = int(counter)
                except ValueError:
                    # Ignore completely invalid data.
                    continue

                if id_or_slug.strip().isdigit():
                    # If it's a digit, then it should be a file id.
                    try:
                        id_or_slug = int(id_or_slug)
                    except ValueError:
                        continue

                    # Does this file exist?
                    if id_or_slug in files_to_addon:
                        addon_id = files_to_addon[id_or_slug]
                    # Maybe it's an add-on ?
                    elif id_or_slug in files_to_addon.values():
                        addon_id = id_or_slug
                    else:
                        # It's an integer we don't recognize, ignore the row.
                        continue
                else:
                    # It's probably a slug.
                    if id_or_slug in slugs_to_addon:
                        addon_id = slugs_to_addon[id_or_slug]
                    else:
                        # We've exhausted all possibilities, ignore this row.
                        continue

                if not is_valid_source(src, fulls=fulls, prefixes=prefixes):
                    continue

                # Memoize the DownloadCount.
                if addon_id in download_counts:
                    dc = download_counts[addon_id]
                else:
                    dc = DownloadCount(date=day, addon_id=addon_id, count=0)
                    download_counts[addon_id] = dc

                # We can now fill the DownloadCount object.
                dc.count += counter
                dc.sources = update_inc(dc.sources, src, counter)

        # Close all old connections in this thread before we start creating the
        # `DownloadCount` values.
        # https://github.com/mozilla/addons-server/issues/6886
        # If the calculation above takes too long it might happen that we run
        # into `wait_timeout` problems and django doesn't reconnect properly
        # (potentially because of misconfiguration).
        # Django will re-connect properly after it notices that all
        # connections are closed.
        close_old_connections()

        # Create in bulk: this is much faster.
        DownloadCount.objects.bulk_create(download_counts.values(), 100)

        for download_count in download_counts.values():
            save_stats_to_file(download_count)

        log.info('Processed a total of %s lines' % (index + 1))
        log.debug('Total processing time: %s' % (datetime.now() - start))

        # Clean up file.
        log.debug('Deleting {path}'.format(path=filepath))
        unlink(filepath)

コード例 #3

ファイルを表示

ファイル: download_counts_from_file.py プロジェクト: eviljeff/olympia

    def handle(self, *args, **options):
        start = datetime.now()  # Measure the time it takes to run the script.
        day = options['date']
        if not day:
            day = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')

        sep = options['separator']

        if options['stats_source'] == 's3':
            filepath = 's3://' + '/'.join([settings.AWS_STATS_S3_BUCKET,
                                           settings.AWS_STATS_S3_PREFIX,
                                           'download_counts',
                                           day, '000000_0'])

        elif options['stats_source'] == 'file':
            folder = options['folder_name']
            folder = path.join(settings.TMP_PATH, folder, day)
            filepath = path.join(folder, 'download_counts.hive')

        # Make sure we're not trying to update with mismatched data.
        if get_date(filepath, sep) != day:
            raise CommandError('%s file contains data for another day' %
                               filepath)

        # First, make sure we don't have any existing counts for the same day,
        # or it would just increment again the same data.
        DownloadCount.objects.filter(date=day).delete()

        # Memoize the files to addon relations and the DownloadCounts.
        download_counts = {}

        # Perf: preload all the files and slugs once and for all.
        # This builds two dicts:
        # - One where each key (the file_id we get from the hive query) has
        #   the addon_id as value.
        # - One where each key (the add-on slug) has the add-on_id as value.
        files_to_addon = dict(File.objects.values_list('id',
                                                       'version__addon_id'))
        slugs_to_addon = dict(
            Addon.unfiltered.exclude(status=amo.STATUS_NULL)
            .values_list('slug', 'id'))

        # Only accept valid sources, which are constants. The source must
        # either be exactly one of the "full" valid sources, or prefixed by one
        # of the "prefix" valid sources.
        fulls = amo.DOWNLOAD_SOURCES_FULL
        prefixes = amo.DOWNLOAD_SOURCES_PREFIX

        count_file = get_stats_data(filepath)
        for index, line in enumerate(count_file):
            if index and (index % 1000000) == 0:
                log.info('Processed %s lines' % index)

            splitted = line[:-1].split(sep)

            if len(splitted) != 4:
                log.debug('Badly formatted row: %s' % line)
                continue

            day, counter, id_or_slug, src = splitted
            try:
                # Clean up data.
                id_or_slug = id_or_slug.strip()
                counter = int(counter)
            except ValueError:
                # Ignore completely invalid data.
                continue

            if id_or_slug.strip().isdigit():
                # If it's a digit, then it should be a file id.
                try:
                    id_or_slug = int(id_or_slug)
                except ValueError:
                    continue

                # Does this file exist?
                if id_or_slug in files_to_addon:
                    addon_id = files_to_addon[id_or_slug]
                # Maybe it's an add-on ?
                elif id_or_slug in files_to_addon.values():
                    addon_id = id_or_slug
                else:
                    # It's an integer we don't recognize, ignore the row.
                    continue
            else:
                # It's probably a slug.
                if id_or_slug in slugs_to_addon:
                    addon_id = slugs_to_addon[id_or_slug]
                else:
                    # We've exhausted all possibilities, ignore this row.
                    continue

            if not is_valid_source(src, fulls=fulls, prefixes=prefixes):
                continue

            # Memoize the DownloadCount.
            if addon_id in download_counts:
                dc = download_counts[addon_id]
            else:
                dc = DownloadCount(date=day, addon_id=addon_id, count=0)
                download_counts[addon_id] = dc

            # We can now fill the DownloadCount object.
            dc.count += counter
            dc.sources = update_inc(dc.sources, src, counter)

        # Close all old connections in this thread before we start creating the
        # `DownloadCount` values.
        # https://github.com/mozilla/addons-server/issues/6886
        # If the calculation above takes too long it might happen that we run
        # into `wait_timeout` problems and django doesn't reconnect properly
        # (potentially because of misconfiguration).
        # Django will re-connect properly after it notices that all
        # connections are closed.
        close_old_connections()

        # Create in bulk: this is much faster.
        DownloadCount.objects.bulk_create(download_counts.values(), 100)

        log.info('Processed a total of %s lines' % (index + 1))
        log.debug('Total processing time: %s' % (datetime.now() - start))

        if options['stats_source'] == 'file':
            # Clean up file.
            log.debug('Deleting {path}'.format(path=filepath))
            unlink(filepath)

コード例 #4

ファイルを表示

ファイル: download_counts_from_file.py プロジェクト: Mritrocker96/addons-server

    def handle(self, *args, **options):
        start = datetime.now()  # Measure the time it takes to run the script.
        day = options['date']
        if not day:
            day = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
        folder = args[0] if args else 'hive_results'
        folder = path.join(settings.TMP_PATH, folder, day)
        sep = options['separator']
        filepath = path.join(folder, 'download_counts.hive')
        # Make sure we're not trying to update with mismatched data.
        if get_date_from_file(filepath, sep) != day:
            raise CommandError('%s file contains data for another day' %
                               filepath)
        # First, make sure we don't have any existing counts for the same day,
        # or it would just increment again the same data.
        DownloadCount.objects.filter(date=day).delete()

        # Memoize the files to addon relations and the DownloadCounts.
        download_counts = {}
        # Perf: preload all the files once and for all.
        # This builds a dict where each key (the file_id we get from the hive
        # query) has the addon_id as value.
        files_to_addon = dict(File.objects.values_list('id',
                                                       'version__addon_id'))

        # Only accept valid sources, which are listed in the DownloadSource
        # model. The source must either be exactly one of the "full" valid
        # sources, or prefixed by one of the "prefix" valid sources.
        fulls = set(DownloadSource.objects.filter(type='full').values_list(
            'name', flat=True))
        prefixes = DownloadSource.objects.filter(type='prefix').values_list(
            'name', flat=True)

        with codecs.open(filepath, encoding='utf8') as count_file:
            for index, line in enumerate(count_file):
                if index and (index % 1000000) == 0:
                    log.info('Processed %s lines' % index)

                splitted = line[:-1].split(sep)

                if len(splitted) != 4:
                    log.debug('Badly formatted row: %s' % line)
                    continue

                day, counter, file_id, src = splitted
                try:
                    file_id, counter = int(file_id), int(counter)
                except ValueError:  # Badly formatted? Drop.
                    continue

                if not is_valid_source(src, fulls=fulls, prefixes=prefixes):
                    continue

                # Does this file exist?
                if file_id in files_to_addon:
                    addon_id = files_to_addon[file_id]
                else:
                    continue

                # Memoize the DownloadCount.
                if addon_id in download_counts:
                    dc = download_counts[addon_id]
                else:
                    dc = DownloadCount(date=day, addon_id=addon_id, count=0)
                    download_counts[addon_id] = dc

                # We can now fill the DownloadCount object.
                dc.count += counter
                dc.sources = update_inc(dc.sources, src, counter)

        # Create in bulk: this is much faster.
        DownloadCount.objects.bulk_create(download_counts.values(), 100)
        for download_count in download_counts.values():
            save_stats_to_file(download_count)
        log.info('Processed a total of %s lines' % (index + 1))
        log.debug('Total processing time: %s' % (datetime.now() - start))

        # Clean up file.
        log.debug('Deleting {path}'.format(path=filepath))
        unlink(filepath)