def load_objects(self, *args, **kwargs): qs = self.queryset(SLNSWCollectionWebsiteLoader.collection, {}) for doc in qs: url = py_.get(doc, 'props.pageProps.file.image.iiifImageUrl', None) asset_id = py_.get(doc, 'asset_id') # not really friendly if self.collection.find_one({"asset_id": asset_id}) is not None: logger.debug(f'skip -> {asset_id}') continue ok = False if url is not None: # print(asset_id, url) res = requests.get(url) ok = res.status_code == 200 if not ok: print(asset_id, ok) # iiifImageUrl # Select only the basic data # data = py_.pick(doc, 'asset_id', 'url_id') # # Build url # url = self.SLNSW_IFFF.format(**data) # # Get data and merge # data.update(self.load_data(url)) yield {'asset_id': asset_id, 'url': url, 'ok': ok}
def current_user_invenio_profile(): """Get current user profile.""" if current_user.is_authenticated: return { "name": py_.get(current_userprofile, "full_name", None), "email": py_.get(current_userprofile, "user.email", None), } return None
def parse_klokan_hidden(self, asset_id: str) -> dict: """Parse data extracted from Georeference website.""" mongo = self.get_collection( collection=KlokanHiddenDataLoader.collection) doc = mongo.find_one({'id': asset_id}) data = {} if doc: # data = py_.pick(doc, *['bbox', 'control_points', 'cutline']) data = py_.pick(doc, *['cutline']) # # Convert data to GeoJson # bbox = data.get('bbox', []) # if len(bbox) > 0: # bbox = Polygon(partition(2, bbox)) # data.update({'bbox': bbox}) # else: # data['bbox'] = None # # Convert control points into geolocations # lpoints = data.get('control_points', []) # npoints = [] # if len(lpoints) > 0: # for point in lpoints: # metadata = py_.pick(point, *['map', 'scan_zoom', 'map_zoom', 'address', 'pixel_x', 'pixel_y']) # lon = point['longitude'] # lat = point['latitude'] # point = Point([lon, lat]) # npoints.append({'point': point, 'metadata': metadata}) # data['control_points'] = npoints # data['control_points_count'] = len(npoints) # load wordfile wld = self.get_world_file(asset_id) # cutline to polygon cutline = data.get('cutline', []) if wld and cutline: polygon = self.pixels_to_geo_polygon(cutline, wld) data.update({'cutline': polygon}) data.update({'cutline_centroid': self.centroid(polygon)}) # bbox using the following format # [[left, bottom], [left, top], [right, top], [right, bottom], [left, bottom]] # [[west, south], [west, north], [east, north], [east, south], [west, south]] w = py_.get(doc, 'pyramid.width') h = py_.get(doc, 'pyramid.height') bbox_coord = [[0, h], [0, 0], [w, 0], [w, h]] if wld: polygon = self.pixels_to_geo_polygon(bbox_coord, wld, validated=False) data.update({'bbox_coord': polygon}) return data
def get_unsplash_photos(food, page=1): term = food.name key = f'{term}||{page}' response = UNSPLASH_CACHE.get_item(key) if not response: query_params = f'?page={page}&query={term}&client_id={UPSPLASH_APIKEY}' response = requests.get( f'{BASE}{query_params}').json() UNSPLASH_CACHE.cache_item(key, response) return py_.map(response.get('results'), lambda photo, i: UnsplashPhoto.objects.get_or_create( food=food, search_term = term, order = i * page, total = response.get('total'), width = photo.get('width'), height = photo.get('height'), color = photo.get('color'), blur_hash = photo.get('blur_hash'), description = photo.get('description'), alt_description = photo.get('alt_description'), raw = py_.get(photo, 'urls.raw'), full = py_.get(photo, 'urls.full'), small = py_.get(photo, 'urls.small'), thumb = py_.get(photo, 'urls.thumb'), regular = py_.get(photo, 'urls.regular'), unsplash_page = py_.get(photo, 'links.html'), username = py_.get(photo, 'user.username'), ancestryCategory = py_.get(photo, 'tags[0].source.ancestry.category.slug'), ancestrySubcategory = py_.get(photo, 'tags[0].source.ancestry.subcategory.slug'), )[0])
def is_valid(self, asset_id: str, data: dict) -> bool: valid = True cutline_coords = py_.get(data, 'cutline.coordinates.0', []) year = py_.get(data, 'year', None) year = 0 if year is None else year if len(cutline_coords) == 0 or year <= 0: valid = False return valid
def store(self, ix=None, receiver=None): logger.debug('story {pk:%s} whoosh init %s' % (self.pk, receiver)) if settings.TESTING: logger.debug('story {pk:%s} whoosh skipped, jus testing! %s' % (self.pk, receiver)) return if ix is None: ix = helpers.get_whoosh_index() authors = u", ".join([u'%s' % t.fullname for t in self.authors.all()]) tags = u",".join([u'%s' % t.slug for t in self.tags.all()]) writer = ix.writer() try: metadata = json.loads(self.metadata) except Exception as e: logger.exception(e) return # multilingual abstract, reduced abstracts = u"\n".join( filter( None, list( set( py_.get(metadata, 'abstract.%s' % lang[2], None) for lang in settings.LANGUAGES)))) titles = u"\n".join( filter( None, list( set( py_.get(metadata, 'title.%s' % lang[2], None) for lang in settings.LANGUAGES)))) writer.update_document( title=titles, abstract=abstracts, path=u"%s" % self.short_url, content=u"\n".join( BeautifulSoup( markdown(u"\n\n".join(filter(None, [ self.contents, ])), extensions=['footnotes'])).findAll(text=True)), tags=tags, authors=authors, status=u"%s" % self.status, classname=u"story") # print "saving", u",".join([u'%s' % t.slug for t in self.tags.all()]) writer.commit() logger.debug('story {pk:%s} whoosh completed %s' % (self.pk, receiver))
def frontpage(): """Render the front-page with the latest Knowledge Packages added.""" # retrieving the required values latest_records = get_latest_knowledge_packages(3) engagement_priority_topics_available = get_engagement_priority_topics_available( params={ "q": "props.icon:labels*", "size": 25 }).to_dict() # selecting only items with icons available py_.set( engagement_priority_topics_available, "hits.hits", (py_.chain(engagement_priority_topics_available).get( "hits.hits", []).filter(lambda x: py_.get(x, "props.icon") is not None). map(lambda x: py_.set_( x, "props.icon", url_for("static", filename=x["props"]["icon"]))) ).value(), ) # rendering! return render_template( "geo_knowledge_hub/frontpage/frontpage.html", latest_records=latest_records, engagement_priority_topics_available= engagement_priority_topics_available, )
def get_representative_field(cls): if cls.representative_field: return cls.representative_field elif py_.get(cls, 'name'): return 'name' else: return 'id'
def get_programme_activity_from_record( identity: Identity, record: Record, programme_vocabulary: str = "geowptypes") -> Union[None, Dict]: """Retrieve the GEO Work Programme activity metadata associated with the record. Args: identity (flask_principal.Identity): User identity record (invenio_records.Record): Record API Object from where the GEO Work Programme activity must be extracted. programme_vocabulary (str): Vocabulary used to search the programme metadata. Returns: Union[None, Dict]: None or the GEO Work Programme metadata (as dict). """ result = None # extracting the geo work programme activity metadata activity_id = py_.get(record, "metadata.geo_work_programme_activity.id", None) if activity_id: result = vocabulary_service.read( identity, (programme_vocabulary, activity_id)).to_dict() return result
def test_that_upstream_files_are_checked_and_paths_are_properly_re_written(self): # setup file_path = format_path("../deployment-configs/aws-example.json") raw_conf = Path(file_path).read_text() deployment_config = json.loads(raw_conf) deployment = deployment_config["default"] artifact_base_uri = "dbfs:/fake/test" requirements_payload = [] package_requirement = [] api_client = MagicMock() _file_uploader = FileUploader(api_client) # function call _adjust_job_definitions( deployment["jobs"], artifact_base_uri, requirements_payload, package_requirement, _file_uploader, api_client, ) # tests api_client.perform_query.assert_called_once() api_client.perform_query.assert_called_once_with( "GET", "/dbfs/get-status", data={"path": "dbfs:/fake/test/tests/deployment-configs/placeholder_1.py"}, headers=None, ) assert ( py_.get(deployment, "jobs.[0].spark_python_task.python_file") == "dbfs:/fake/test/tests/deployment-configs/placeholder_1.py" )
def save(self, row): # lookup = py_.pick(row, self.reference_field) lookup = {} lookup[self.reference_field] = py_.get(row, self.reference_field) row.pop('_id', None) self.collection.update_one(lookup, {'$set': row}, upsert=True)
def extract_user_stories( related_identifiers: List[Dict], user_story_typeid="user-story" ) -> Tuple[Union[None, List], Union[None, List]]: """Extract the user stories from a record. Args: related_identifiers (List[Dict]): List with Relation Object. user_story_typeid (str): Resource Type ID of the user-story records. Returns: Tuple[Union[None, List], Union[None, List]]: Tuple with the following content: - First tuple position: Related identifiers that are not user stories - Second tuple position: User Stories identifiers. See: For more information about the Relation Object, please, check the `related-identifiers` page available on InvenioRDM Documentation: - https://inveniordm.docs.cern.ch/reference/metadata/#related-identifiersworks-0-n """ return py_.partition( related_identifiers, lambda x: py_.get(x, "ui.resource_type.id") != user_story_typeid, )
def store(self, ix=None): if ix is None: ix = helpers.get_whoosh_index() writer = ix.writer() _fields = {} # get title and description in different languages for k in ['title', 'description', 'details.caption']: _fields[k] = [ self.data[k] if k in self.data and isinstance(self.data[k], basestring) else '' ] for lang in settings.LANGUAGES: _fields[k].append( py_.get(self.data, '%s.%s' % (k, lang[2]), '')) _fields[k] = ' '.join(_fields[k]).strip() # create multilanguage content by squashing stuff writer.update_document(title=_fields['title'], path=u"%s" % self.short_url, content=u"\n".join( filter(None, [ self.url, self.data.get('url', None), self.data.get('provider_name', None), self.data.get('provider_url', None), _fields['description'], _fields['details.caption'], ])), classname=u"document") writer.commit()
def prepare_record_topics( record: Dict, record_engagement_priorities_metadata: List[Dict]) -> List: """Prepare the record topics (Engagement priorities and target users) to use into the UI. Note: In the created topics list, we only include the engagement priorities without icon, since these items are presented in a image carousel. Args: record (Dict): Record Object serialized as UI Dict. record_engagement_priorities_metadata (List[Dict]): List of engagement priorities metadata object (as dict). Returns: List: List with the topics associated with the record """ # preparing the engagement priorities topics default_scheme = "Engagement Priorities" # for engagements # getting the engagement objects with titles l10n engagement_titles_l10n = py_.get(record, "ui.engagement_priorities", []) # indexing the l10n objects engagement_titles_l10n = {x["id"]: x for x in engagement_titles_l10n} record_engagement_priorities = ( py_.chain(record_engagement_priorities_metadata).filter( lambda x: py_.get(x, "props.icon") == "").map( lambda x: { "scheme": default_scheme, "title": engagement_titles_l10n[x["id"]]["title_l10n"], "model_field": "metadata.engagement_priorities", })).value() # preparing the target users topics default_scheme = "Target Audience" # for users # getting the target audience with titles l10n target_audiences = (py_.chain(record).get("ui.target_audiences", []).map( lambda x: { "scheme": default_scheme, "title": x["title_l10n"], "model_field": "metadata.target_audiences", })).value() return py_.mapcat([target_audiences, record_engagement_priorities])
def innner(context, resp_info=None): attr_source = resp_info or context call = py_.get(context, context_call) args = tuple(getattr(attr_source, x, None) for x in attributes) if fixed_args: args += fixed_args for key in kwargs_from_attrs or []: kwargs[key] = getattr(attr_source, key) return call(*args, **kwargs)
def min_lend(self, currency): """computes the minimum lendable amount for the specified currency on this platform""" if currency.lower() == self._min_lend_base: return self._min_lend_amount else: pair = currency.lower() + self._min_lend_base tick = self._get('pubticker', pair) exrate = py_.get(tick, 'last_price') if exrate is None: raise Exception( 'Failed to get last price for pair {0}'.format(pair)) return self._min_lend_amount / float(exrate)
def wallets(self): wallets = self.api_query('returnAvailableAccountBalances') new_wallets = [] # TODO: "deposit" type is bitfinex terminology. do better job of standardizing wallet output lending = py_.get(wallets, 'lending', dict(btc=0)) for currency, amount in lending.iteritems(): new_wallets.append( dict(available=amount, currency=currency, amount=amount, type='deposit')) return new_wallets
def min_lend(self, currency): """computes the minimum lendable amount for the specified currency on this platform""" if currency.lower() == self._min_lend_base: return self._min_lend_amount else: pair = self._min_lend_base.upper() + '_' + currency.upper() path = pair + '.last' ticker = self.returnTicker() exrate = py_.get(ticker, path) if exrate is None: raise Exception( 'Failed to get last price for pair {0}'.format(pair)) return self._min_lend_amount / float(exrate)
def get_pexels_photos(food, page=1): term = food.name key = f'{term}||{page}' response = PEXELS_CACHE.get_item(key) if not response: query_params = f'?page={page}&per_page=30&query={term}' response = requests.get(f'{BASE}{query_params}', headers={ "Authorization": PEXELS_APIKEY }).json() PEXELS_CACHE.cache_item(key, response) return py_.map( response.get('photos'), lambda photo, i: PexelsPhoto.objects.get_or_create( food=food, search_term=term, # engineered features for ML order=response.get('page') * i, total=response.get('total_results'), pexels_id=photo.get('id'), width=photo.get('width'), height=photo.get('height'), url=photo.get('url'), photographer=photo.get('photographer'), photographer_url=photo.get('photographer_url'), photographer_id=photo.get('photographer_id'), avg_color=photo.get('avg_color'), original=py_.get(photo, 'src.original'), large2x=py_.get(photo, 'src.large2x'), small=py_.get(photo, 'src.small'), tiny=py_.get(photo, 'src.tiny'), # large = py_.get(photo, 'src.large'), # medium = py_.get(photo, 'src.medium'), # portrait = py_.get(photo, 'src.portrait'), # landscape = py_.get(photo, 'src.landscape'), )[0])
def get_engagement_priority_from_record( identity: Identity, record: Record) -> Union[None, List[Dict]]: """Retrieve the Engagement Priority metadata associated with a record. Args: identity (flask_principal.Identity): User identity record (invenio_records.Record): Record API Object from where the engagement priorities must be extracted. Returns: Union[None, List[Dict]]: None or the engagement priorities metadata (as dict). """ # getting the engagement priority topics result = None record_engagement_priorities = py_.get(record, "metadata.engagement_priorities", []) record_engagement_priorities_ids = py_.map(record_engagement_priorities, lambda x: x["id"]) if record_engagement_priorities_ids: record_engagement_priorities = vocabulary_service.read_many( identity=identity, type="engagementprioritiestypes", ids=record_engagement_priorities_ids, ).to_dict() result = (py_.chain(record_engagement_priorities).get( "hits.hits", []).map( lambda x: py_.set_( x, "props.icon", url_for("static", filename=py_.get(x, "props.icon")), ) if py_.get(x, "props.icon") != "" else x, )).value() return result
def update_search_vector(self): """ Fill the search_vector using self.data: e.g. get data['title'] if is a basestring or data['title']['en_US'] according to the values contained into settings.LANGUAGES Note that a language configuration can be done as well, in this case consider the last value in settings.LANGUAGES (e.g. 'english') """ from django.db import connection fields = settings.MILLER_VECTORS_MULTILANGUAGE_FIELDS # initialize with slug or title # contents = [(self.slug, 'A', 'simple')] contents = [(getattr(self, _field), _weight, _config) for _field, _weight, _config in settings.MILLER_VECTORS_INITIAL_FIELDS] for _field, _weight in fields: default_value = self.data.get(_field, None) value = u"\n".join( filter(None, [ default_value if isinstance(default_value, basestring) else None ] + list( set( py_.get(self.data, '%s.%s' % (_field, lang[2]), None) for lang in settings.LANGUAGES)))) contents.append((value, _weight, 'simple')) q = ' || '.join([ "setweight(to_tsvector('simple', COALESCE(%%s,'')), '%s')" % weight for value, weight, _config in contents ]) with connection.cursor() as cursor: cursor.execute( ''.join([ """ UPDATE miller_document SET search_vector = x.weighted_tsv FROM ( SELECT id,""", q, """ AS weighted_tsv FROM miller_document WHERE miller_document.id=%s ) AS x WHERE x.id = miller_document.id """ ]), [value for value, _w, _c in contents] + [self.id]) logger.debug('document {pk:%s, slug:%s} search_vector updated.' % (self.pk, self.slug)) return contents
def parse_klokan(self, obj): # Get data we are interested data = py_.pick( obj, *['title', 'thumbnail', 'center', 'north_east', 'south_west']) # process data # 1. Convert strings to GeoCoordinates data.update({ 'center': self.str_longlat_to_geo_point(data['center']), 'north_east': self.str_longlat_to_geo_point(data['north_east']), 'south_west': self.str_longlat_to_geo_point(data['south_west']), }) # hack for metabase fields = ['center', 'north_east', 'south_west'] for f in fields: point = data[f] if point: coordinates = point.coordinates data['{}_longitude'.format(f)] = coordinates[0] data['{}_latitude'.format(f)] = coordinates[1] # 2. Extract city suburb name from title title = py_.get(data, 'title', '') data['location_name'] = py_.get(title.split(','), 0) # 3. Extract year from title as fall back option match = self.RE_TITLE_YEAR.match(title) if match: year = match.group('year') data['year_title'] = int(year) return data
def get_related_identifiers_url(record: Record, doi_prefix: str) -> List[Dict]: """Create related identifiers URL. Args: related_identifiers (Record): Record API Object from where the related identifiers will be extracted. doi_prefix (str): GEO Knowledge Hub DOI Prefix. Returns: List[Dict]: List of record related identifiers (with URL resolved) Note: The `doi_prefix` is used to check if the items are managed by the GEO Knowledge Hub. """ # extracting related identifiers related_identifiers = py_.get(record, "metadata.related_identifiers", []) new_related_identifiers = [] for related_identifier in related_identifiers: if related_identifier.get("identifier", None): pass scheme = related_identifier["scheme"] identifier = related_identifier["identifier"] related_identifier_obj = py_.set_(py_.clone_deep(related_identifier), "url", "") try: if idutils.is_url(identifier): related_identifier_obj["url"] = identifier else: # checking if the doi is internal if idutils.is_doi(identifier): identifier_split = identifier.split("/") if doi_prefix and identifier_split[0] == doi_prefix: related_identifier_obj["url"] = posixpath.join( "/records", identifier_split[1]) if not related_identifier_obj["url"]: related_identifier_obj["url"] = idutils.to_url( identifier, scheme, "https") except BaseException: related_identifier_obj["url"] = identifier new_related_identifiers.append(related_identifier_obj) return new_related_identifiers
def _metadata_builder(metadata: Dict, scheme) -> Dict: """Generate a standardized metadata for all retrievers used. Args: metadata (Dict): Metadata from related resources Returns: Dict: Dictionary with standard metadata """ # ToDo: Change the nomenclature and use the idea of serializers here. metadata_field_for_scheme = metadata_field_by_scheme.get(scheme) return { **{ metadata_field: py_.get(metadata, metadata_field_for_scheme[metadata_field], "") for metadata_field in metadata_field_for_scheme.keys() }, "ui": UIJSONSerializer().serialize_object_to_dict(metadata).get("ui"), }
def parse_slnsw_collection_website(self, asset_id: str, data: dict) -> dict: query = {'asset_id': asset_id} mongo = self.get_collection( collection=SLNSWCollectionWebsiteLoader.collection) doc = mongo.find_one(query) out = {} if doc: errors = py_.get(doc, 'props.pageProps.errors', None) if errors is not None: return {'collection_url_error': errors} props = py_.get(doc, 'props.pageProps', {}) # Zoomify URL full_iiif_url = py_.get(props, 'file.image.iiifImageUrl', None) out['iiif_identifier'] = full_iiif_url.split('/')[-1] out['full_iiif_url'] = full_iiif_url # Check if we have the full title title = py_.get(props, 'title', '') if data.get('collection_title', '') != title: out['collection_title'] = title out['collection_title_expanded'] = True notes = py_.get(props, 'recordData.notes', []) date_creation = py_.find(notes, {'type': 'dateCreation'}) if date_creation is not None: date_creation = py_.get(date_creation, 'value', None) out['date_creation'] = date_creation # Extract latest year in the date_creation as option # if year is null if isinstance(date_creation, str): data_creation = date_creation.replace('.', '') bits = data_creation.split('-') try: year = bits[-1] year = year if year != '' else bits[0] out['year_creation'] = int(year) except: out['year_creation'] = None return out
def load_objects(self): """Generator that yield klokan map dictionaries.""" # Clean collection self.collection.remove({}) # Re-create data qs = self.queryset(DXMapsData.collection, query={ 'valid': True, 'active': True }) for doc in qs: logger.debug('DXMap creating GEOJson for {asset_id}'.format(**doc)) geometry = py_.get(doc, 'cutline.coordinates', None) # If cutline exists is a valid map if geometry: poly = Polygon(geometry) # Build feature properties properties = py_.pick(doc, 'year', 'collection_title', 'asset_id', 'url_id', 'colorfulness', 'iiif_identifier', 'colored', 'cutline_centroid', 'similar', 'bbox_coord', 'location_name', 'width', 'height') properties = py_.rename_keys( properties, { 'cutline_centroid': 'centroid', 'bbox_coord': 'image_bounds', 'collection_title': 'title', 'url_id': 'collection_id' }) # build feature feature = Feature(geometry=poly, properties=properties) yield feature self.export_to_json() return []
def _get_value_from_jon(json, keys): val_list = [] for key in keys: val_list.append(_.get(json, key, '')) return val_list
def author_names(author_ids: list): return [py_.get(library_data, ['catalog', 'authorsById', author_id, 'name']) for author_id in author_ids]
def get_ESRI_metadata(self, georeferencer_id: str) -> dict: d = self.load_page(georeferencer_id) data = {'width': py_.get(d('#wld-width'), '0.value', '0'), 'height': py_.get(d('#wld-height'), '0.value', '0')} return data
def __str__(self): return str(py_.get(self, self.get_representative_field()))