def __init__(self, record: Union[lbsn.Post, lbsn.Place, lbsn.City, lbsn.Country] = None): super().__init__() self.key["guid"] = None self.attrs['name'] = None self.attrs['geom_center'] = None self.attrs['geom_area'] = None self.metrics['pud_hll'] = set() self.metrics['utl_hll'] = set() self.metrics['latlng_hll'] = set() if record is None: # init empty return name = None geom_area = None if isinstance(record, lbsn.Post): coordinates_geom = record.post_latlng coordinates = HF.get_coordinates_from_ewkt(coordinates_geom) # use concat lat:lng as key of no place_key available # this should later implement assignemnt based on area # intersection self.key["guid"] = HLF.hll_concat( [coordinates.lat, coordinates.lng]) elif isinstance(record, (lbsn.Place, lbsn.City, lbsn.Country)): name = HF.null_check(record.name) coordinates_geom = record.geom_center geom_area = record.geom_area # use key from place, city or country record self.key["guid"] = HLF.hll_concat_origin_guid(record) self.attrs['name'] = name self.attrs['geom_center'] = HF.return_ewkb_from_geotext( coordinates_geom) self.attrs['geom_area'] = HF.return_ewkb_from_geotext(geom_area)
def __init__(self, friends_record: Tuple[lbsn.User, lbsn.User] = None): super().__init__() self.key['user_guid'] = None self.key['user_guid_friend'] = None if friends_record is None: # init empty return # TODO: implement one-way-hashing self.key['user_guid'] = HLF.hll_concat_origin_guid( friends_record[0]) self.key['user_guid_friend'] = HLF.hll_concat_origin_guid( friends_record[1])
def __init__(self, record: lbsn.Post = None, hashtag: str = None): super().__init__() self.key['year'] = None self.key['month'] = None self.key['latitude'] = None self.key['longitude'] = None self.attrs['latlng_geom'] = None if record is None: # init empty return if isinstance(record, lbsn.Post): post_date_time = HLF.merge_dates_post(record) if post_date_time is None: return date = post_date_time.date() self.key['year'] = date.year self.key['month'] = date.month coordinates_geom = record.post_latlng coordinates = HF.get_coordinates_from_ewkt(coordinates_geom) self.key['latitude'] = coordinates.lat self.key['longitude'] = coordinates.lng # additional (optional) attributes # formatted ready for sql upsert self.attrs['latlng_geom'] = HF.return_ewkb_from_geotext( coordinates_geom) else: raise ValueError("Parsing of MonthLatLngBase only supported " "from lbsn.Post")
def __init__(self, record: lbsn.User = None): super().__init__() self.key['user_guid'] = None if record is None: # init empty return # TODO: implement one-way-hashing self.key['user_guid'] = HLF.hll_concat_origin_guid(record)
def __init__(self, record: lbsn.Post = None): super().__init__() self.key["timestamp"] = None if record is None: # init empty return post_date_time = HLF.merge_dates_post(record) self.key["timestamp"] = post_date_time
def __init__(self, record: lbsn.Post = None): super().__init__() self.key['timeofday'] = None if record is None: # init empty return post_date_time = HLF.merge_dates_post(record) # remove microseconds from datetime self.key['timeofday'] = post_date_time.time.replace(microsecond=0)
def __init__(self, record: lbsn.Post = None): super().__init__() self.key['month'] = None self.key['day'] = None if record is None: # init empty return post_date_time = HLF.merge_dates_post(record) self.key['month'] = post_date_time.month self.key['day'] = post_date_time.day
def __init__(self, record: lbsn.Post = None): super().__init__() self.key['year'] = None if record is None: # init empty return post_date_time = HLF.merge_dates_post(record) if post_date_time is None: return date = post_date_time.date() self.key['year'] = date.year
def __init__(self, record: lbsn.Post = None): super().__init__() self.key['date'] = None self.attrs['name'] = None if record is None: # init empty return post_date_time = HLF.merge_dates_post(record) if post_date_time is None: return # optional: add name of date (e.g. "New Year's Day") self.key['date'] = post_date_time.date()
def get_prepared_hll_records(self, batch_item: Dict[str, Any]): """Turns propietary hll classes into prepared sql value tuples This includes calculation of shards from individual items using the hll_worker """ hll_items = [] # (base_key, metric_key, item) hll_base_records = [] # (base_key, attr1, attr2) # the following iteration will # loop keys in case of dict # and values in case of list for index, record_item in enumerate(batch_item.values()): # get base record and value base = record_item.get_prepared_record() if base.record: hll_base_records.append(base.record) base_metric_item_tuples = HLF.concat_base_metric_item( index, base.metrics) # format tuple-values as sql-escaped strings value_str = [ self.prepare_sqlescaped_values(record) for record in base_metric_item_tuples ] # add to global list of items to be upserted hll_items.extend(value_str) # format sql for shard generation # get sql escaped values list values_str = HF.concat_values_str(hll_items) # clear line sys.stdout.write("\033[K") print(f'Calculating hll shards for {len(values_str)} values..', end='\r') # calculate hll shards from raw values hll_shards = HLF.calculate_item_shards(self.hllworker_cursor, values_str) prepared_records = HLF.concat_base_shards(hll_base_records, hll_shards) return prepared_records
def get_post_metrics(record) -> hll.HllMetrics: """Get hll metrics from lbsn.Post record""" post_hll = HLF.hll_concat_origin_guid(record) user_hll = HLF.hll_concat_user(record) pud_hll = HLF.hll_concat_userday(record) latlng_hll = HLF.hll_concat_latlng(record) place_hll = HLF.hll_concat_place(record) upt_hll = HLF.hll_concat_upt_hll(record) hll_metrics = hll.HllMetrics(post_hll=post_hll, user_hll=user_hll, pud_hll=pud_hll, latlng_hll=latlng_hll, upt_hll=upt_hll, place_hll=place_hll) return hll_metrics
def __init__(self, record: lbsn.Post = None, hashtag: str = None): super().__init__() self.key['year'] = None self.key['month'] = None self.key['hashtag'] = None if hashtag is None: # init empty return self.key['hashtag'] = hashtag.lower() if record is None: # init empty return if isinstance(record, lbsn.Post): post_date_time = HLF.merge_dates_post(record) if post_date_time is None: return date = post_date_time.date() self.key['year'] = date.year self.key['month'] = date.month else: raise ValueError("Parsing of MonthHashtagBase only supported " "from lbsn.Post")
def __init__(self, record: Union[lbsn.Post, lbsn.Place] = None): super().__init__() self.key['place_guid'] = None self.attrs['geom_center'] = None self.attrs['geom_area'] = None self.attrs['name'] = None self.metrics['pud_hll'] = set() self.metrics['utl_hll'] = set() if record is None: return if isinstance(record, lbsn.Post): # Post can be of Geoaccuracy "Place" without any # actual place id assigned (e.g. Flickr Geoaccuracy level < 10) # in this case, concat lat:lng as primary key coordinates_geom = record.post_latlng if not record.place_pkey.id: coordinates = HF.get_coordinates_from_ewkt(coordinates_geom) self.key['place_guid'] = HLF.hll_concat( [coordinates.lat, coordinates.lng]) else: self.key['place_guid'] = record.place_pkey.id # additional (optional) attributes # formatted ready for sql upsert self.attrs['geom_center'] = HF.return_ewkb_from_geotext( coordinates_geom) # geom_area not available from lbsn.Post elif isinstance(record, lbsn.Place): coordinates_geom = record.geom_center coordinates = HF.get_coordinates_from_ewkt(coordinates_geom) self.key['place_guid'] = record.pkey.id # self.key['place_guid'] = HLF.hll_concat( # [coordinates.lat, coordinates.lng]) self.attrs['geom_center'] = HF.return_ewkb_from_geotext( coordinates_geom) self.attrs['geom_area'] = HF.return_ewkb_from_geotext( HF.null_check(record.geom_area)) self.attrs['name'] = record.name
def get_place_metrics(record) -> hll.HllMetrics: """Get hll metrics from lbsn.Place record""" place_hll = HLF.hll_concat_origin_guid(record) hll_metrics = hll.HllMetrics(place_hll=place_hll) return hll_metrics