def create_bigtable_rows(jdata):
    import datetime
    import json
    import zlib
    import hashlib
    from google.cloud.bigtable import row

    column_family_id = "profile"

    jdata["client_id"] = hashlib.sha256(
        jdata["client_id"].encode("utf8")).hexdigest()

    row_key = jdata["client_id"]
    column = "payload".encode()

    # Coerce float columns to int
    for k in [
            "bookmark_count",
            "tab_open_count",
            "total_uri",
            "unique_tlds",
    ]:
        jdata[k] = int(jdata[k] or 0)
    jdata["subsession_length"] = int(jdata["subsession_length"] or 0)

    direct_row = row.DirectRow(row_key=row_key)
    direct_row.set_cell(
        column_family_id,
        column,
        zlib.compress(json.dumps(jdata).encode("utf8")),
        timestamp=datetime.datetime.utcnow(),
    )

    return direct_row
def delete_bigtable_rows(element):
    from google.cloud.bigtable import row
    import hashlib

    row_key = hashlib.sha256(element['client_id'].encode("utf8")).hexdigest()
    direct_row = row.DirectRow(row_key=row_key)
    direct_row.delete()
    return direct_row
Esempio n. 3
0
    def process(self, element, timestamp, *args, **kwargs):
        from google.cloud.bigtable import row

        pids = ','.join(element['item_id'])
        row_key = element['user_id']

        direct_row = row.DirectRow(row_key)
        direct_row.set_cell('umaylike', 'user_id', pids, timestamp)
        direct_row.set_cell('umaylike', 'rule', 'RULE_NAME', timestamp)
        yield direct_row
Esempio n. 4
0
 def process(self, element):
     cf = 'cf'
     column_names = ['prediction', 'time', 'prob_0', 'prob_1']
     direct_row = row.DirectRow(row_key=element['event_id'])
     for name in column_names:
         direct_row.set_cell(column_family_id=cf,
                             column=name,
                             value=element[name],
                             timestamp=datetime.datetime.now())
     yield direct_row
Esempio n. 5
0
 def generate_row(self, index=0):
     rand = choice(string.ascii_letters + string.digits)
     value = ''.join(rand for i in range(100))
     column_family_id = 'cf1'
     key = "beam_key%s" % ('{0:07}'.format(index))
     direct_row = row.DirectRow(row_key=key)
     for column_id in range(10):
         direct_row.set_cell(column_family_id,
                             ('field%s' % column_id).encode('utf-8'), value,
                             datetime.datetime.now())
     return direct_row
Esempio n. 6
0
    def _generate(self):
        value = ''.join(self.rand for i in range(100))

        for index in range(self.number):
            key = "beam_key%s" % ('{0:07}'.format(index))
            direct_row = row.DirectRow(row_key=key)
            for column_id in range(10):
                direct_row.set_cell(self.column_family_id,
                                    ('field%s' % column_id).encode('utf-8'),
                                    value, datetime.datetime.now())
            yield direct_row
Esempio n. 7
0
  def process(self, ranges):
    for row_id in range(int(ranges[0]), int(ranges[1][0])):
      key = "beam_key%s" % ('{0:07}'.format(row_id))
      rand = random.choice(string.ascii_letters + string.digits)

      direct_row = row.DirectRow(row_key=key)
      _ = [direct_row.set_cell(
                    'cf1',
                    ('field%s' % i).encode('utf-8'),
                    ''.join(rand for _ in range(100)),
                    datetime.datetime.now()) for i in range(10)]
      self.generate_row.inc()
      yield direct_row
Esempio n. 8
0
    def process(self, row_values):
        """ Process beam pipeline using an element.

        :type row_value: dict
        :param row_value: dict: dict values with row_key and row_content having
        family, column_id and value of row.
        """
        direct_row = row.DirectRow(row_key=row_values["row_key"])

        for row_value in row_values["row_content"]:
            direct_row.set_cell(
                row_value["column_family_id"],
                row_value["column_id"],
                row_value["value"],
                datetime.datetime.now())

        yield direct_row
 def process(self, key):
     direct_row = row.DirectRow(row_key=key)
     direct_row.set_cell("stats_summary", b"os_build", b"android",
                         datetime.datetime.now())
     return [direct_row]
Esempio n. 10
0
                 instance_type=None,
                 labels=None):
        return CustomInstance(
            instance_id,
            self,
            display_name=display_name,
            instance_type=instance_type,
            labels=labels,
        )


client = Client(project=beam_options['project_id'], admin=True)
instance = client.instance(beam_options['instance_id'])
table = instance.table(beam_options['table_id'], mutation_timeout=600000)
if not table.exists():
    max_versions_rule = column_family.MaxVersionsGCRule(2)
    column_family_id = 'cf1'
    column_families = {column_family_id: max_versions_rule}
    table.create(column_families=column_families)
mutation_batcher = table.mutations_batcher()

for i in range(0, 10):
    key = "beam_key%s" % i
    row_element = row.DirectRow(row_key=key)

    row_element.set_cell('cf1', ('field%s' % i).encode('utf-8'), 'abc',
                         datetime.datetime.now())

    mutation_batcher.mutate(row_element)
mutation_batcher.flush()