Beispiel #1
0
def rows_to_table(auth, project_id, dataset_id, table_id, rows, schema=[], skip_rows=1, disposition='WRITE_TRUNCATE', wait=True):
  if project.verbose: print('BIGQUERY ROWS TO TABLE: ', project_id, dataset_id, table_id)

  buffer_data = BytesIO()
  buffer_writer = codecs.getwriter('utf-8')
  writer = csv.writer(buffer_writer(buffer_data), delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
  has_rows = False

  if rows == []:
    if project.verbose: print('BigQuery Zero Rows')
    return io_to_table(auth, project_id, dataset_id, table_id, buffer_data, 'CSV', schema, skip_rows, disposition, wait)

  for is_last, row in flag_last(rows):

    # write row to csv buffer
    writer.writerow(row)

    # write the buffer in chunks
    if is_last or buffer_data.tell() + 1 > BIGQUERY_BUFFERSIZE:
      if project.verbose: print('BigQuery Buffer Size', buffer_data.tell())
      buffer_data.seek(0) # reset for read
      io_to_table(auth, project_id, dataset_id, table_id, buffer_data, 'CSV', schema, skip_rows, disposition)

      # reset buffer for next loop, be sure to do an append to the table
      buffer_data.seek(0) #reset for write
      buffer_data.truncate() # reset for write ( yes its needed for EOF marker )
      disposition = 'WRITE_APPEND' # append all remaining records
      skip_rows = 0
      has_rows = True

  # if no rows, clear table to simulate empty write
  if not has_rows:
    if project.verbose: print('BigQuery Zero Rows')
    return io_to_table(auth, project_id, dataset_id, table_id, buffer_data, 'CSV', schema, skip_rows, disposition, wait)
Beispiel #2
0
def json_to_table(auth, project_id, dataset_id, table_id, json_data, schema=None, disposition='WRITE_TRUNCATE', wait=True):
  if project.verbose: print('BIGQUERY JSON TO TABLE: ', project_id, dataset_id, table_id)

  buffer_data = BytesIO()
  has_rows = False

  for is_last, record in flag_last(json_data):

    # check if json is already string encoded, and write to buffer
    buffer_data.write((record if isinstance(record, str) else json.dumps(record)).encode('utf-8'))

    # write the buffer in chunks
    if is_last or buffer_data.tell() + 1 > BIGQUERY_BUFFERSIZE:
      if project.verbose: print('BigQuery Buffer Size', buffer_data.tell())
      buffer_data.seek(0) # reset for read
      io_to_table(auth, project_id, dataset_id, table_id, buffer_data, 'NEWLINE_DELIMITED_JSON', schema, 0, disposition)

      # reset buffer for next loop, be sure to do an append to the table
      buffer_data.seek(0) #reset for write
      buffer_data.truncate() # reset for write ( yes its needed for EOF marker )
      disposition = 'WRITE_APPEND' # append all remaining records
      has_rows = True

    # if not end append newline, for newline delimited json
    else:
      buffer_data.write('\n'.encode('utf-8'))

  # if no rows, clear table to simulate empty write
  if not has_rows:
    if project.verbose: print('BigQuery Zero Rows')
    return io_to_table(auth, project_id, dataset_id, table_id, buffer_data, 'NEWLINE_DELIMITED_JSON', schema, skip_rows, disposition, wait)
Beispiel #3
0
def conversions_upload(auth, account, floodlight_activity_id, conversion_type, conversion_rows, encryption_entity=None, update=False):
  """ Uploads an offline conversion list to DCM.

  BulletProofing: https://developers.google.com/doubleclick-advertisers/guides/conversions_upload

  Handles errors and segmentation of conversion so list can be any size.

  Args:
    * auth: (string) Either user or service.
    * account: (string) [account:advertiser@profile] token.
    * floodlight_activity_id: (int) ID of DCM floodlight to upload conversions to.
    * converstion_type: (string) One of the following: encryptedUserId, encryptedUserIdCandidates, gclid, mobileDeviceId.
    * conversion_rows: (iterator) List of the following rows: Ordinal, timestampMicros, encryptedUserId | encryptedUserIdCandidates | gclid | mobileDeviceId.
    * encryption_entity: (object) See EncryptionInfo docs: https://developers.google.com/doubleclick-advertisers/v3.2/conversions/batchinsert#encryptionInfo

  """

  account_id, advertiser_id = parse_account(auth, account)
  is_superuser, profile_id = get_profile_for_api(auth, account_id)

  kwargs = { 'profileId':profile_id, 'accountId':account_id } if is_superuser else { 'profileId':profile_id }
  kwargs['id'] = floodlight_activity_id
  response = API_DCM(auth, internal=is_superuser).floodlightActivities().get(**kwargs).execute()

  # upload in batch sizes of DCM_CONVERSION_SIZE
  row_count = 0
  row_buffer = []
  for is_last, row in flag_last(conversion_rows):
    row_buffer.append(row)

    if is_last or len(row_buffer) == DCM_CONVERSION_SIZE:

      if project.verbose: print('CONVERSION UPLOADING ROWS: %d - %d' % (row_count,  row_count + len(row_buffer)))

      body = {
        'conversions': [{
          'floodlightActivityId': floodlight_activity_id,
          'floodlightConfigurationId': response['floodlightConfigurationId'],
          'ordinal': row[0],
          'timestampMicros': row[1],
          'quantity':1,
          'value':0.0,
          conversion_type: row[2],
        } for row in row_buffer]
      }

      if encryption_entity: body['encryptionInfo'] = encryption_entity

      kwargs = { 'profileId':profile_id, 'accountId':account_id } if is_superuser else { 'profileId':profile_id }
      kwargs['body'] = body

      if update: results = API_DCM(auth, internal=is_superuser).conversions().batchupdate(**kwargs).execute()
      else: results = API_DCM(auth, internal=is_superuser).conversions().batchinsert(**kwargs).execute()

      # stream back satus
      for status in results['status']: yield status

      # clear the buffer
      row_count += len(row_buffer)
      row_buffer = []