예제 #1
0
    def __init__(self,
                 record: Union[lbsn.Post, lbsn.Place, lbsn.City,
                               lbsn.Country] = None):
        super().__init__()
        self.key["guid"] = None
        self.attrs['name'] = None
        self.attrs['geom_center'] = None
        self.attrs['geom_area'] = None
        self.metrics['pud_hll'] = set()
        self.metrics['utl_hll'] = set()
        self.metrics['latlng_hll'] = set()
        if record is None:
            # init empty
            return
        name = None
        geom_area = None
        if isinstance(record, lbsn.Post):
            coordinates_geom = record.post_latlng
            coordinates = HF.get_coordinates_from_ewkt(coordinates_geom)
            # use concat lat:lng as key of no place_key available
            # this should later implement assignemnt based on area
            # intersection
            self.key["guid"] = HLF.hll_concat(
                [coordinates.lat, coordinates.lng])
        elif isinstance(record, (lbsn.Place, lbsn.City, lbsn.Country)):
            name = HF.null_check(record.name)
            coordinates_geom = record.geom_center
            geom_area = record.geom_area
            # use key from place, city or country record
            self.key["guid"] = HLF.hll_concat_origin_guid(record)

        self.attrs['name'] = name
        self.attrs['geom_center'] = HF.return_ewkb_from_geotext(
            coordinates_geom)
        self.attrs['geom_area'] = HF.return_ewkb_from_geotext(geom_area)
예제 #2
0
 def __init__(self, friends_record: Tuple[lbsn.User, lbsn.User] = None):
     super().__init__()
     self.key['user_guid'] = None
     self.key['user_guid_friend'] = None
     if friends_record is None:
         # init empty
         return
     # TODO: implement one-way-hashing
     self.key['user_guid'] = HLF.hll_concat_origin_guid(
         friends_record[0])
     self.key['user_guid_friend'] = HLF.hll_concat_origin_guid(
         friends_record[1])
예제 #3
0
    def __init__(self, record: lbsn.Post = None, hashtag: str = None):
        super().__init__()
        self.key['year'] = None
        self.key['month'] = None
        self.key['latitude'] = None
        self.key['longitude'] = None
        self.attrs['latlng_geom'] = None
        if record is None:
            # init empty
            return
        if isinstance(record, lbsn.Post):
            post_date_time = HLF.merge_dates_post(record)
            if post_date_time is None:
                return
            date = post_date_time.date()
            self.key['year'] = date.year
            self.key['month'] = date.month

            coordinates_geom = record.post_latlng
            coordinates = HF.get_coordinates_from_ewkt(coordinates_geom)
            self.key['latitude'] = coordinates.lat
            self.key['longitude'] = coordinates.lng
            # additional (optional) attributes
            # formatted ready for sql upsert
            self.attrs['latlng_geom'] = HF.return_ewkb_from_geotext(
                coordinates_geom)
        else:
            raise ValueError("Parsing of MonthLatLngBase only supported "
                             "from lbsn.Post")
예제 #4
0
 def __init__(self, record: lbsn.User = None):
     super().__init__()
     self.key['user_guid'] = None
     if record is None:
         # init empty
         return
     # TODO: implement one-way-hashing
     self.key['user_guid'] = HLF.hll_concat_origin_guid(record)
예제 #5
0
 def __init__(self, record: lbsn.Post = None):
     super().__init__()
     self.key["timestamp"] = None
     if record is None:
         # init empty
         return
     post_date_time = HLF.merge_dates_post(record)
     self.key["timestamp"] = post_date_time
예제 #6
0
 def __init__(self, record: lbsn.Post = None):
     super().__init__()
     self.key['timeofday'] = None
     if record is None:
         # init empty
         return
     post_date_time = HLF.merge_dates_post(record)
     # remove microseconds from datetime
     self.key['timeofday'] = post_date_time.time.replace(microsecond=0)
예제 #7
0
 def __init__(self, record: lbsn.Post = None):
     super().__init__()
     self.key['month'] = None
     self.key['day'] = None
     if record is None:
         # init empty
         return
     post_date_time = HLF.merge_dates_post(record)
     self.key['month'] = post_date_time.month
     self.key['day'] = post_date_time.day
예제 #8
0
 def __init__(self, record: lbsn.Post = None):
     super().__init__()
     self.key['year'] = None
     if record is None:
         # init empty
         return
     post_date_time = HLF.merge_dates_post(record)
     if post_date_time is None:
         return
     date = post_date_time.date()
     self.key['year'] = date.year
예제 #9
0
 def __init__(self, record: lbsn.Post = None):
     super().__init__()
     self.key['date'] = None
     self.attrs['name'] = None
     if record is None:
         # init empty
         return
     post_date_time = HLF.merge_dates_post(record)
     if post_date_time is None:
         return
     # optional: add name of date (e.g. "New Year's Day")
     self.key['date'] = post_date_time.date()
예제 #10
0
    def get_prepared_hll_records(self, batch_item: Dict[str, Any]):
        """Turns propietary hll classes into prepared sql value tuples

        This includes calculation of shards from individual items
        using the hll_worker
        """
        hll_items = []  # (base_key, metric_key, item)
        hll_base_records = []  # (base_key, attr1, attr2)
        # the following iteration will
        # loop keys in case of dict
        # and values in case of list
        for index, record_item in enumerate(batch_item.values()):
            # get base record and value
            base = record_item.get_prepared_record()
            if base.record:
                hll_base_records.append(base.record)
                base_metric_item_tuples = HLF.concat_base_metric_item(
                    index, base.metrics)
                # format tuple-values as sql-escaped strings
                value_str = [
                    self.prepare_sqlescaped_values(record)
                    for record in base_metric_item_tuples
                ]
                # add to global list of items to be upserted
                hll_items.extend(value_str)
        # format sql for shard generation
        # get sql escaped values list
        values_str = HF.concat_values_str(hll_items)
        # clear line
        sys.stdout.write("\033[K")
        print(f'Calculating hll shards for {len(values_str)} values..',
              end='\r')
        # calculate hll shards from raw values
        hll_shards = HLF.calculate_item_shards(self.hllworker_cursor,
                                               values_str)
        prepared_records = HLF.concat_base_shards(hll_base_records, hll_shards)
        return prepared_records
예제 #11
0
 def get_post_metrics(record) -> hll.HllMetrics:
     """Get hll metrics from lbsn.Post record"""
     post_hll = HLF.hll_concat_origin_guid(record)
     user_hll = HLF.hll_concat_user(record)
     pud_hll = HLF.hll_concat_userday(record)
     latlng_hll = HLF.hll_concat_latlng(record)
     place_hll = HLF.hll_concat_place(record)
     upt_hll = HLF.hll_concat_upt_hll(record)
     hll_metrics = hll.HllMetrics(post_hll=post_hll,
                                  user_hll=user_hll,
                                  pud_hll=pud_hll,
                                  latlng_hll=latlng_hll,
                                  upt_hll=upt_hll,
                                  place_hll=place_hll)
     return hll_metrics
예제 #12
0
 def __init__(self, record: lbsn.Post = None, hashtag: str = None):
     super().__init__()
     self.key['year'] = None
     self.key['month'] = None
     self.key['hashtag'] = None
     if hashtag is None:
         # init empty
         return
     self.key['hashtag'] = hashtag.lower()
     if record is None:
         # init empty
         return
     if isinstance(record, lbsn.Post):
         post_date_time = HLF.merge_dates_post(record)
         if post_date_time is None:
             return
         date = post_date_time.date()
         self.key['year'] = date.year
         self.key['month'] = date.month
     else:
         raise ValueError("Parsing of MonthHashtagBase only supported "
                          "from lbsn.Post")
예제 #13
0
 def __init__(self, record: Union[lbsn.Post, lbsn.Place] = None):
     super().__init__()
     self.key['place_guid'] = None
     self.attrs['geom_center'] = None
     self.attrs['geom_area'] = None
     self.attrs['name'] = None
     self.metrics['pud_hll'] = set()
     self.metrics['utl_hll'] = set()
     if record is None:
         return
     if isinstance(record, lbsn.Post):
         # Post can be of Geoaccuracy "Place" without any
         # actual place id assigned (e.g. Flickr Geoaccuracy level < 10)
         # in this case, concat lat:lng as primary key
         coordinates_geom = record.post_latlng
         if not record.place_pkey.id:
             coordinates = HF.get_coordinates_from_ewkt(coordinates_geom)
             self.key['place_guid'] = HLF.hll_concat(
                 [coordinates.lat, coordinates.lng])
         else:
             self.key['place_guid'] = record.place_pkey.id
         # additional (optional) attributes
         # formatted ready for sql upsert
         self.attrs['geom_center'] = HF.return_ewkb_from_geotext(
             coordinates_geom)
         # geom_area not available from lbsn.Post
     elif isinstance(record, lbsn.Place):
         coordinates_geom = record.geom_center
         coordinates = HF.get_coordinates_from_ewkt(coordinates_geom)
         self.key['place_guid'] = record.pkey.id
         # self.key['place_guid'] = HLF.hll_concat(
         #     [coordinates.lat, coordinates.lng])
         self.attrs['geom_center'] = HF.return_ewkb_from_geotext(
             coordinates_geom)
         self.attrs['geom_area'] = HF.return_ewkb_from_geotext(
             HF.null_check(record.geom_area))
         self.attrs['name'] = record.name
예제 #14
0
 def get_place_metrics(record) -> hll.HllMetrics:
     """Get hll metrics from lbsn.Place record"""
     place_hll = HLF.hll_concat_origin_guid(record)
     hll_metrics = hll.HllMetrics(place_hll=place_hll)
     return hll_metrics