예제 #1
0
    def command(cls, options):
        from ckan import model
        if options.write:
            rev = model.repo.new_revision()
            rev.author = 'script_fix_secondary_themes_3.py'

        datasets = common.get_datasets(state='active',
                                       dataset_name=options.dataset)
        for package in datasets:
            if not 'theme-secondary' in package.extras:
                stats_outcome.add('Ignore - no secondary theme', package.name)
                continue

            secondary_theme = package.extras.get('theme-secondary')

            if secondary_theme.startswith('["["'):
                secondary_theme = LOOKUP[secondary_theme]

            secondary_theme = json.loads(secondary_theme)

            if isinstance(secondary_theme, list) and secondary_theme and len(secondary_theme[0]) == 1:
                secondary_theme = "".join(secondary_theme).replace('&', ' & ')
                if secondary_theme == 'GovernmentBusiness & Economy':
                    secondary_theme = ['Government', 'Business & Economy']
                elif secondary_theme == 'GovernmentSpending':
                    secondary_theme = ['Government Spending']
                elif secondary_theme == 'EnvironmentEducationGovernmentSpending':
                    secondary_theme = ['Environment', 'Education', 'Government Spending']
                elif secondary_theme == 'EnvironmentGovernment':
                    secondary_theme = ['Environment', 'Government']
                else:
                    secondary_theme = [secondary_theme]

            if json.dumps(secondary_theme) != package.extras.get('theme-secondary'):
                stats_outcome.add('Fixing', package.name)
 
                package.extras['theme-secondary'] = json.dumps(secondary_theme)
            else:
                stats_outcome.add('Unchanged', package.name)

        print 'Formats:\n', stats_format.report()
        print 'Outcomes:\n', stats_outcome.report()

        if options.write:
            print 'Writing...'
            model.Session.commit()
            print '...done'
            stats_format.show_time_taken()
예제 #2
0
        use_flickr=False,
        use_set5=False,
        use_urban100=False,
        patch_size=144,
        use_noise=False,
        valid_rate=0.1,
        inter='nearest',
        augment="default",
        kernel_dim=10,
    ))
PinkBlack.io.set_seeds(args.seed)

# ---------------------------------------------------------
# Prepare training/validation/test data, and its dataloaders

datasets = get_datasets(args)
print(f"datasets are prepared.")

train_dl = DataLoader(datasets['train_dataset'],
                      batch_size=args.batch_size,
                      shuffle=True,
                      num_workers=args.num_workers,
                      pin_memory=True)
valid_dl = DataLoader(datasets['valid_dataset'],
                      batch_size=args.batch_size,
                      shuffle=False,
                      num_workers=args.num_workers,
                      pin_memory=True)
test_dl = DataLoader(datasets['test_dataset'],
                     batch_size=1,
                     shuffle=False,
예제 #3
0
    def command(cls, config_ini, options):
        common.load_config(config_ini)
        common.register_translator()

        from ckan import model
        from ckanext.dgu.lib.theme import (categorize_package, PRIMARY_THEME,
                                           SECONDARY_THEMES)
        rev = model.repo.new_revision()
        rev.author = 'script-fix_themes.py'

        datasets = common.get_datasets(state='active',
                                       dataset_name=options.dataset,
                                       organization_ref=options.organization)

        def fix_theme(theme_str):
            '''Returns (fixed_theme_str, outcome)'''
            if not theme_str:
                return '', 'Blank'
            elif theme_str == 'null':
                return '', '"null"->""'
            elif theme_str in THEMES:
                return theme_str, 'Ok'
            else:
                fixed_theme = THEME_MAP.get(theme_str)
                if fixed_theme is None:
                    return theme_str, 'Unknown theme %s - recategorizing' % theme_str
                else:
                    assert (fixed_theme != theme_str)
                    return fixed_theme, 'Changed to long form'
                    package.extras[PRIMARY_THEME] = new_primary

        def recategorize(pkg):
            themes = categorize_package(pkg, stats_recategorize)
            print 'Recategorize: %s' % themes
            if themes:
                pkg.extras[PRIMARY_THEME] = themes[0]
            elif PRIMARY_THEME in pkg.extras:
                pkg.extras[PRIMARY_THEME] = ''
            if len(themes) > 1:
                pkg.extras[SECONDARY_THEMES] = '["%s"]' % themes[1]
            elif SECONDARY_THEMES in pkg.extras:
                pkg.extras[SECONDARY_THEMES] = '[]'

        for package in datasets:
            if PRIMARY_THEME in package.extras:
                primary = package.extras.get(PRIMARY_THEME)
                new_primary, outcome = fix_theme(primary)
                if new_primary != primary:
                    package.extras[PRIMARY_THEME] = new_primary
                output = stats_primary.add(outcome, package.name)
                if outcome != 'Ok':
                    print output
                if outcome.startswith('Unknown theme'):
                    recategorize(package)
                    continue
            else:
                stats_primary.add('No theme', package.name)

            if SECONDARY_THEMES in package.extras:
                secondary = package.extras.get(SECONDARY_THEMES)
                try:
                    secondary = json.loads(secondary)
                except ValueError:
                    if secondary.startswith('{') and secondary.endswith('}'):
                        # '{Crime}' -> 'Crime'
                        secondary = secondary[1:-1].strip('\"')
                        print stats_secondary.add('Tidied {}', package.name)
                    else:
                        print stats_secondary.add('Error decoding JSON',
                                                  package.name)

                if secondary == {}:
                    secondary = []

                new_secondary = []
                do_recategorize = False

                if not isinstance(secondary, list):
                    secondary = [secondary]
                for theme_str in secondary:
                    if not isinstance(theme_str, basestring):
                        print stats_secondary.add(
                            'Not a list of strings %s' % type(theme_str),
                            package.name)
                        continue
                    new_theme, outcome = fix_theme(theme_str)
                    if new_theme:
                        new_secondary.append(new_theme)
                    if outcome != 'Ok':
                        print stats_secondary.add(outcome, package.name)
                    if outcome.startswith('Unknown theme'):
                        do_recategorize = True
                if do_recategorize:
                    recategorize(package)
                    continue
                if json.dumps(new_secondary) != package.extras.get(
                        SECONDARY_THEMES):
                    stats_secondary.add('Fixed', package.name)
                    package.extras[SECONDARY_THEMES] = json.dumps(
                        new_secondary)
                else:
                    stats_secondary.add('Ok', package.name)
            else:
                stats_secondary.add('No theme', package.name)

            if 'themes-secondary' in package.extras:
                print stats_secondary.add(
                    'Old key removed: themes-secondary', '%s %s' %
                    (package.name, package.extras['themes-secondary']))
                del package.extras['themes-secondary']

        print "\nPrimary theme:"
        print stats_primary.report()
        print "\nSecondary theme:"
        print stats_secondary.report()
        print "\nRecategorizations:"
        print stats_recategorize.report()

        if options.write:
            print 'Writing'
            model.Session.commit()
예제 #4
0
    def command(cls, config_ini, options):
        common.load_config(config_ini)
        common.register_translator()

        from ckan import model
        from ckanext.dgu.lib.theme import (categorize_package, PRIMARY_THEME,
                                           SECONDARY_THEMES)
        rev = model.repo.new_revision()
        rev.author = 'script-fix_themes.py'

        datasets = common.get_datasets(state='active',
                                       dataset_name=options.dataset,
                                       organization_ref=options.organization)

        def fix_theme(theme_str):
            '''Returns (fixed_theme_str, outcome)'''
            if not theme_str:
                return '', 'Blank'
            elif theme_str == 'null':
                return '', '"null"->""'
            elif theme_str in THEMES:
                return theme_str, 'Ok'
            else:
                fixed_theme = THEME_MAP.get(theme_str)
                if fixed_theme is None:
                    return theme_str, 'Unknown theme %s - recategorizing' % theme_str
                else:
                    assert(fixed_theme != theme_str)
                    return fixed_theme, 'Changed to long form'
                    package.extras[PRIMARY_THEME] = new_primary

        def recategorize(pkg):
            themes = categorize_package(pkg, stats_recategorize)
            print 'Recategorize: %s' % themes
            if themes:
                pkg.extras[PRIMARY_THEME] = themes[0]
            elif PRIMARY_THEME in pkg.extras:
                pkg.extras[PRIMARY_THEME] = ''
            if len(themes) > 1:
                pkg.extras[SECONDARY_THEMES] = '["%s"]' % themes[1]
            elif SECONDARY_THEMES in pkg.extras:
                pkg.extras[SECONDARY_THEMES] = '[]'

        for package in datasets:
            if PRIMARY_THEME in package.extras:
                primary = package.extras.get(PRIMARY_THEME)
                new_primary, outcome = fix_theme(primary)
                if new_primary != primary:
                    package.extras[PRIMARY_THEME] = new_primary
                output = stats_primary.add(outcome, package.name)
                if outcome != 'Ok':
                    print output
                if outcome.startswith('Unknown theme'):
                    recategorize(package)
                    continue
            else:
                stats_primary.add('No theme', package.name)

            if SECONDARY_THEMES in package.extras:
                secondary = package.extras.get(SECONDARY_THEMES)
                try:
                    secondary = json.loads(secondary)
                except ValueError:
                    if secondary.startswith('{') and secondary.endswith('}'):
                        # '{Crime}' -> 'Crime'
                        secondary = secondary[1:-1].strip('\"')
                        print stats_secondary.add('Tidied {}', package.name)
                    else:
                        print stats_secondary.add('Error decoding JSON', package.name)

                if secondary == {}:
                    secondary = []

                new_secondary = []
                do_recategorize = False

                if not isinstance(secondary, list):
                    secondary = [secondary]
                for theme_str in secondary:
                    if not isinstance(theme_str, basestring):
                        print stats_secondary.add('Not a list of strings %s' % type(theme_str), package.name)
                        continue
                    new_theme, outcome = fix_theme(theme_str)
                    if new_theme:
                        new_secondary.append(new_theme)
                    if outcome != 'Ok':
                        print stats_secondary.add(outcome, package.name)
                    if outcome.startswith('Unknown theme'):
                        do_recategorize = True
                if do_recategorize:
                    recategorize(package)
                    continue
                if json.dumps(new_secondary) != package.extras.get(SECONDARY_THEMES):
                    stats_secondary.add('Fixed', package.name)
                    package.extras[SECONDARY_THEMES] = json.dumps(new_secondary)
                else:
                    stats_secondary.add('Ok', package.name)
            else:
                stats_secondary.add('No theme', package.name)

            if 'themes-secondary' in package.extras:
                print stats_secondary.add('Old key removed: themes-secondary',
                                          '%s %s' % (package.name, package.extras['themes-secondary']))
                del package.extras['themes-secondary']

        print "\nPrimary theme:"
        print stats_primary.report()
        print "\nSecondary theme:"
        print stats_secondary.report()
        print "\nRecategorizations:"
        print stats_recategorize.report()

        if options.write:
            print 'Writing'
            model.Session.commit()
예제 #5
0
    def command(cls, options):
        from ckan import model
        if options.write:
            rev = model.repo.new_revision()
            rev.author = 'script_fix_secondary_themes_2.py'

        datasets = common.get_datasets(state='active',
                                       dataset_name=options.dataset)
        for package in datasets:
            if not 'theme-secondary' in package.extras:
                stats_outcome.add('Ignore - no secondary theme', package.name)
                continue

            secondary_theme = package.extras.get('theme-secondary')

            # Convert from JSON to a list
            loop = 1
            while isinstance(secondary_theme, basestring):
                try:
                    secondary_theme = json.loads(secondary_theme)
                except ValueError:
                    if secondary_theme == 'None':
                        stats_format.add('"None" string', package.name)
                        secondary_theme = []
                    elif ',' in secondary_theme:
                        # e.g. '"Government, Society"'
                        print stats_format.add('Non-JSON string, comma separated', package.name)
                        secondary_theme = [t.strip() for t in secondary_theme.split(',')]
                    else:
                        # e.g. 'Towns & Cities'
                        print stats_format.add('Non-JSON string', '%s %r' % (package.name, secondary_theme.strip()))
                        secondary_theme = [secondary_theme.strip()]
                    break
                loop = 1
                if loop == 2:
                    stats_format.add('JSON', package.name)
                elif loop == 3:
                    # e.g. '"\\"Health\\""'
                    print stats_format.add('Multiple JSON encoded', package.name)
            if secondary_theme in ('None', '', {}):
                print stats_format.add('Empty list', package.name)
                secondary_theme = []
            assert isinstance(secondary_theme, list)

            # Filter out nulls in the list
            for filter_string in (None, 'None', ''):
                if filter_string in secondary_theme:
                    print stats_format.add('%r in the list' % filter_string, package.name)
                    secondary_theme = [theme for theme in secondary_theme
                                       if theme != filter_string]

            # Remove {} from strings e.g. ["{Government}"]
            if '{' in str(secondary_theme):
                print stats_format.add('{theme}', package.name)
                secondary_theme = [theme.strip('{}') for theme in secondary_theme]

            if json.dumps(secondary_theme) != package.extras.get('theme-secondary'):
                stats_outcome.add('Fixing', package.name)
                package.extras['theme-secondary'] = json.dumps(secondary_theme)
            else:
                stats_outcome.add('Unchanged', package.name)

        print 'Formats:\n', stats_format.report()
        print 'Outcomes:\n', stats_outcome.report()

        if options.write:
            print 'Writing...'
            model.Session.commit()
            print '...done'
            stats_format.show_time_taken()
    def command(cls, options):
        from ckan import model
        if options.write:
            rev = model.repo.new_revision()
            rev.author = 'script_fix_secondary_themes_2.py'

        datasets = common.get_datasets(state='active',
                                       dataset_name=options.dataset)
        for package in datasets:
            if not 'theme-secondary' in package.extras:
                stats_outcome.add('Ignore - no secondary theme', package.name)
                continue

            secondary_theme = package.extras.get('theme-secondary')

            # Convert from JSON to a list
            loop = 1
            while isinstance(secondary_theme, basestring):
                try:
                    secondary_theme = json.loads(secondary_theme)
                except ValueError:
                    if secondary_theme == 'None':
                        stats_format.add('"None" string', package.name)
                        secondary_theme = []
                    elif ',' in secondary_theme:
                        # e.g. '"Government, Society"'
                        print stats_format.add(
                            'Non-JSON string, comma separated', package.name)
                        secondary_theme = [
                            t.strip() for t in secondary_theme.split(',')
                        ]
                    else:
                        # e.g. 'Towns & Cities'
                        print stats_format.add(
                            'Non-JSON string',
                            '%s %r' % (package.name, secondary_theme.strip()))
                        secondary_theme = [secondary_theme.strip()]
                    break
                loop = 1
                if loop == 2:
                    stats_format.add('JSON', package.name)
                elif loop == 3:
                    # e.g. '"\\"Health\\""'
                    print stats_format.add('Multiple JSON encoded',
                                           package.name)
            if secondary_theme in ('None', '', {}):
                print stats_format.add('Empty list', package.name)
                secondary_theme = []
            assert isinstance(secondary_theme, list)

            # Filter out nulls in the list
            for filter_string in (None, 'None', ''):
                if filter_string in secondary_theme:
                    print stats_format.add('%r in the list' % filter_string,
                                           package.name)
                    secondary_theme = [
                        theme for theme in secondary_theme
                        if theme != filter_string
                    ]

            # Remove {} from strings e.g. ["{Government}"]
            if '{' in str(secondary_theme):
                print stats_format.add('{theme}', package.name)
                secondary_theme = [
                    theme.strip('{}') for theme in secondary_theme
                ]

            if json.dumps(secondary_theme) != package.extras.get(
                    'theme-secondary'):
                stats_outcome.add('Fixing', package.name)
                package.extras['theme-secondary'] = json.dumps(secondary_theme)
            else:
                stats_outcome.add('Unchanged', package.name)

        print 'Formats:\n', stats_format.report()
        print 'Outcomes:\n', stats_outcome.report()

        if options.write:
            print 'Writing...'
            model.Session.commit()
            print '...done'
            stats_format.show_time_taken()