Exemple #1
0
def upload_images(args):
    """
    tsammalex upload_images path/to/cdstar/catalog
    """

    images_path = data_file('images.csv', repos=args.tsammalex_data)
    staged_images_path = data_file('staged_images.csv',
                                   repos=args.tsammalex_data)
    checksums = set(
        d.id for d in models.CsvData('images', repos=args.tsammalex_data))
    providers = [prov(args.tsammalex_data) for prov in PROVIDERS]
    with MediaCatalog('cdstar.json',
                      repos=args.tsammalex_data,
                      json_opts=dict(indent=4)) as mcat:
        with Catalog(args.args[0],
                     cdstar_url=os.environ['CDSTAR_URL'],
                     cdstar_user=os.environ['CDSTAR_USER'],
                     cdstar_pwd=os.environ['CDSTAR_PWD']) as cat:
            for item in models.CsvData('staged_images',
                                       repos=args.tsammalex_data):
                for provider in providers:
                    if item in provider:
                        img = provider.retrieve(item, cat, checksums, mcat)
                        if img:
                            try:
                                add_rows(images_path, img.csv_row())
                            except:
                                print(img)
                                raise
                            filter_rows(staged_images_path,
                                        lambda d: d['id'] != item.id)
                        break
Exemple #2
0
def test():
    if not REPOS.exists():
        return
    data = {
        n: OrderedDict([(item.id, item)
                        for item in models.CsvData(n, on_error=error)])
        for n in CSV
    }
    data['ecoregions'] = {}
    for ecoregion in jsonlib.load(data_file('ecoregions.json'))['features']:
        data['ecoregions'][ecoregion['properties']['eco_code']] = ecoregion

    data['refs'] = {}
    with data_file('sources.bib').open(encoding='utf8') as fp:
        for line in fp:
            match = BIB_ID_PATTERN.match(line.strip())
            if match:
                data['refs'][match.group('id')] = 1

    data['countries'] = {country.alpha2: country for country in countries}

    for name in ['names', 'taxa']:
        for line, item in enumerate(data[name].values()):
            for ref in item.refs__ids:
                if '[' in ref:
                    source_id, pages = ref.split('[', 1)
                    if not pages.endswith(']'):  # pragma: no cover
                        error('invalid reference %s' % (ref, ), name, line + 2)
                else:
                    source_id = ref
                if source_id not in data['refs']:  # pragma: no cover
                    error('invalid id referenced: %s' % (source_id, ), name,
                          line + 2)

    for name, model in [(n, getattr(models, n.capitalize())) for n in CSV]:
        for line, item in enumerate(data[name].values()):
            for col in [f.name for f in attr.fields(model)]:
                if '__' in col:
                    ref, cardinality = col.split('__', 1)
                    #if ref not in data:
                    #    continue
                    ids = getattr(item, col)
                    if cardinality == 'id':
                        assert not isinstance(ids, list)
                        ids = [ids]
                    for v in ids:
                        if ref not in data:
                            raise ValueError(ref)  # pragma: no cover
                        if ref == 'refs' and '[' in v:
                            v = v.split('[')[0]
                        if v not in data[ref]:  # pragma: no cover
                            error('invalid %s id referenced: %s' % (ref, v),
                                  name, line + 2)

    if not SUCCESS:  # pragma: no cover
        raise ValueError('integrity checks failed!')
Exemple #3
0
def update_taxa(args):
    """
    Update the supplemental data for taxa from external sources.

    We go through the taxa listed in taxa.csv and look for additional information at
    GBIF, EOL and Catalogue Of Life.
    """
    with TaxaData(repos=args.tsammalex_data) as taxa:
        # add stubs for new entries in taxa.csv:
        for i, item in enumerate(
                models.CsvData('taxa', repos=args.tsammalex_data)):
            taxa.add(i, item)

        for cls in [CatalogueOfLife, GBIF, EOL]:
            print(cls.__name__)
            with cls(args.tsammalex_data) as provider:
                for spec in tqdm(taxa, leave=False):
                    provider.update_taxon(spec)