def test_attribute_children(self): person_attribute = { 'name': 'Justin', 'age': 12345678909876543211234234324234, 'height': 187.96 } attr = MapAttribute() serialized = attr.serialize(person_attribute) assert attr.deserialize(serialized) == person_attribute
def test_map_of_map(self): attribute = { 'name': 'Justin', 'metrics': { 'age': 31, 'height': 187.96 } } attr = MapAttribute() serialized = attr.serialize(attribute) assert attr.deserialize(serialized) == attribute
class TypedMap(MapAttribute): map_attr = MapAttribute()
def test_raw_map_iter(self): raw = {"foo": "bar", "num": 3, "nested": {"nestedfoo": "nestedbar"}} attr = MapAttribute(**raw) assert list(iter(raw)) == list(iter(attr))
def test_raw_map_access(self): raw = {"foo": "bar", "num": 3, "nested": {"nestedfoo": "nestedbar"}} attr = MapAttribute(**raw) for k, v in six.iteritems(raw): assert attr[k] == v
class DefaultsMap(MapAttribute): map_field = MapAttribute(default={})
def __init__(self, attr_list): self.attr_types = attr_list self.attr_store = MapAttribute(of=Attribute)
class InnerMapAttribute(MapAttribute): map_attr = MapAttribute(attr_name='dyn_map_attr')
class NestedThing(MapAttribute): double_nested = MapAttribute() double_nested_renamed = MapAttribute(attr_name='something_else')
class DefaultsMap(MapAttribute): map_field = MapAttribute(default={}) string_field = UnicodeAttribute(null=True)
def test_null_attribute_raw_map(self): null_attribute = {'skip': None} attr = MapAttribute() serialized = attr.serialize(null_attribute) assert serialized == {'skip': {'NULL': True}}
class S3Model(object): BucketName = UnicodeAttribute() Region = UnicodeAttribute() Tags = MapAttribute()
def test_should_map_converts_to_json(): assert_attribute_conversion(MapAttribute(), JSONString)
class Product(Model): class Meta: table_name = "product" region = 'ap-northeast-2' id = UnicodeAttribute(hash_key=True) do_crawl = BooleanAttribute(default=False) created_at = UnicodeAttribute() last_crawled_at = UTCDateTimeAttribute(null=True) min_price = NumberAttribute(default=0) # min_price 가격 이상인 결과만 사용 queries = ListAttribute(of=ShoppingQuery) lprice = NumberAttribute(null=True) # 현재까지 최저 가격 lprice_item = MapAttribute(null=True) # 최저가격의 item 정보 def update_lprice(self, lprice_item): self.last_crawled_at = datetime.datetime.utcnow() if lprice_item: self.lprice_item = lprice_item self.lprice = int(lprice_item['lprice']) record = PriceRecord(self.id, self.last_crawled_at) record.item = lprice_item record.save() return self.save() def update_last_crawled_at(self): self.last_crawled_at = datetime.datetime.utcnow() return self.save() def search_lowest_price(self): # 최저가를 찾고, 최저가 발견시 알림 min_price_criterion = self.min_price lprice = self.lprice or 100000000 lprice_item = None try: for q in self.queries: api = naver_shopping_api.format(quote(q.query), q.display or DEFAULT_DISPLAY, q.sort or DEFAULT_SORT) api_response = requests.get(api, headers=naver_api_header) if api_response.status_code == 200: response_json = api_response.json() if not response_json.get('total', 0): # TODO: no item warning noti pass for item in response_json.get('items', {}): item_lprice = int(item['lprice']) if min_price_criterion <= item_lprice < lprice: lprice = item_lprice lprice_item = item lprice_item['query'] = q.attribute_values else: error_message = { 'message': 'Naver API Error for {}'.format(self.id), 'status_code': api_response.status_code, 'text': api_response.text, 'api': api, 'query': q.attribute_values } send_slack_notification( build_naver_warning_slack_message(error_message)) except Exception as e: error_message = { 'message': 'Exception occurs by {}'.format(self.id), 'exception': e, 'traceback': traceback.format_exc() } send_slack_notification(build_error_slack_message(error_message)) self.update_lprice(lprice_item) if lprice_item: # print(build_normal_slack_message(self)) status_code = send_slack_notification( build_normal_slack_message(self)) return lprice_item
class ModStatus(Model): class Meta: table_name = table_name() region = region() ModIdentifier = UnicodeAttribute(hash_key=True) last_error = UnicodeAttribute(null=True) last_warnings = UnicodeAttribute(null=True) last_checked = UTCDateTimeAttribute(null=True) last_indexed = UTCDateTimeAttribute(null=True) last_inflated = UTCDateTimeAttribute(null=True) last_downloaded = UTCDateTimeAttribute(null=True) release_date = UTCDateTimeAttribute(null=True) success = BooleanAttribute() frozen = BooleanAttribute(default=False) resources: 'MapAttribute[str, Any]' = MapAttribute(default={}) def mod_attrs(self) -> Dict[str, Any]: attributes = {} for key in self.get_attributes().keys(): if key == 'ModIdentifier': continue attr = getattr(self, key, None) attributes[key] = ( attr.isoformat() if attr and isinstance(attr, datetime) else attr.as_dict() if attr and isinstance(attr, MapAttribute) else attr ) return attributes # If we ever have more than 1MB of Status in the DB we'll need paginate, # however our current status sits at < 300Kb with all the fields populated. # So we'd probably need to be tracking 10,000+ mods before it becomes # a problem. @classmethod def export_all_mods(cls, compat: bool = True) -> Dict[str, Any]: data = {} for mod in cls.scan(rate_limit=5): data[mod.ModIdentifier] = mod.mod_attrs() # Persist compability with existing status ui if compat: failed = not mod.success data[mod.ModIdentifier]['failed'] = failed data[mod.ModIdentifier].pop('success') return data @classmethod def export_to_s3(cls, bucket: str, key: str, compat: bool = True) -> None: client = boto3.client('s3') client.put_object( Bucket=bucket, Key=key, Body=json.dumps(cls.export_all_mods(compat)).encode(), ) logging.info('Exported to s3://%s/%s', bucket, key) # This likely isn't super effecient, but we really should only have to use # this operation once to seed the existing history. @classmethod def restore_status(cls, filename: str) -> None: existing = json.loads(Path(filename).read_text(encoding='UTF-8')) with cls.batch_write() as batch: for key, item in existing.items(): for item_key in ['checked', 'indexed', 'inflated']: update_key = f'last_{item_key}' if not item[update_key]: continue item[update_key] = parse( item.pop(update_key) ) item['ModIdentifier'] = key item['success'] = not item['failed'] item.pop('failed') # Every batch write consumes a credit, we want to leave spare # credits available for other operations and also not error out # during the operation (pynamodb doesn't seem to have a limit # option on batch queries). if len(batch.pending_operations) == 5: batch.commit() time.sleep(1) batch.save(ModStatus(**item)) @classmethod def last_indexed_from_git(cls, ckanmeta_repo: Repo, identifier: str) -> Optional[datetime]: try: return parse(ckanmeta_repo.git.log('--', identifier, format='%aI', max_count=1).split("\n")[0]).astimezone(timezone.utc) except Exception as exc: # pylint: disable=broad-except logging.error('Unable to recover last_indexed for %s', identifier, exc_info=exc) return None @classmethod def recover_timestamps(cls, ckm_repo: CkanMetaRepo) -> None: with cls.batch_write() as batch: logging.info('Recovering timestamps...') for mod in cls.scan(rate_limit=5): if not mod.last_indexed: logging.info('Looking up timestamp for %s', mod.ModIdentifier) mod.last_indexed = cls.last_indexed_from_git( ckm_repo.git_repo, mod.ModIdentifier) if mod.last_indexed: logging.info('Saving %s: %s', mod.ModIdentifier, mod.last_indexed) batch.save(mod) logging.info('Done!')
class HistoricalDecimalAttribute(Attribute): """ A number attribute """ attr_type = NUMBER def serialize(self, value): """ Encode numbers as JSON """ return json.dumps(value, default=decimal_default) def deserialize(self, value): """ Decode numbers from JSON """ return json.loads(value) pynamodb.attributes.SERIALIZE_CLASS_MAP = { dict: MapAttribute(), list: ListAttribute(), set: ListAttribute(), bool: BooleanAttribute(), float: NumberAttribute(), int: NumberAttribute(), str: HistoricalUnicodeAttribute(), decimal.Decimal: HistoricalDecimalAttribute() }