Beispiel #1
0
def write_to_new_line_json(items, output_path, entity):
    try:
        #filename = os.path.join(output_path, entity + '_' + last_run_date + '.json')
        filename = os.path.join(output_path, entity + '.json')
        
        with open(filename,'w') as out:
            if k in ['account', 'property', 'view']:
                for item in items:
                    json.dump(item,out)
                    out.write('\n')
            else:
                for d in items:
                    for item in d:
                        json.dump(item,out)
                        out.write('\n')
                    
        return True, filename
    except Exception as e:
        mu.update_log(log_file, 'write_to_new_line_json' + str(e))
        return False, None
source_account = #accountID string
dest_view = #ID string for view/profile

## functions
def get_service(api_name, api_version, scopes, key_file_location):
    credentials = ServiceAccountCredentials.from_json_keyfile_name(key_file_location, scopes=scopes)
    service = build(api_name, api_version, credentials=credentials)
    return service

## main
if __name__ == '__main__':

    ## init
    log_file = os.path.join(output_path,'log','ga_filters' + '.txt')
    mu.update_log(log_file, 'exe filepath = ' + os.path.realpath(__file__))
    return_value = 1

    ## connect to api
    mu.update_log(log_file, 'connecting to api')
    a_readonly = get_service(api_name='analytics', api_version='v3', scopes=a_readonly_scope, key_file_location=service_account)
    a_edit = get_service(api_name='analytics', api_version='v3', scopes=a_edit_scope, key_file_location=service_account)

    link_filters = []

    ## get all filter refs for the specified account
    mu.update_log(log_file, 'getting source filters for ' + source_account)
    link_filters = []
    source_filter_ids = a_readonly.management().filters().list(accountId=source_account).execute()
    [link_filters.append(source_filter_id['id']) for source_filter_id in source_filter_ids.get('items',[]) ]
    
Beispiel #3
0
def wait_check(query_count):
    if query_count == 1900:
        mu.update_log(log_file, 'quick snooze')
        time.sleep(100)
        query_count = 0
Beispiel #4
0
    if query_count == 1900:
        mu.update_log(log_file, 'quick snooze')
        time.sleep(100)
        query_count = 0
    
## main
if __name__ == '__main__':
    
    ## initialise
    log_file = os.path.join(output_path,'log', 'ga_etl' + '.txt')    
    return_value= 1
    files_to_upload = []
    query_count = 0

    ## connects to api
    mu.update_log(log_file, 'connecting to api service')
    a_serv = get_service(api_name='analytics', api_version='v3', scopes=a_scope, key_file_location=service_account)
    au_serv = get_service(api_name='analytics', api_version='v3', scopes=au_scope, key_file_location=service_account)

    ## extracts accounts, properties & views
    mu.update_log(log_file, 'extracting accounts, properties & views')
    entities['account'] = (a_serv.management().accounts().list().execute()).get('items',[])
    entities['property'] = (a_serv.management().webproperties().list(accountId='~all').execute()).get('items',[])
    entities['view'] = (a_serv.management().profiles().list(accountId='~all',webPropertyId='~all').execute()).get('items',[])
    query_count += 3

    ## iterates over accounts
    mu.update_log(log_file, 'iterate over accounts')  
    for a in entities['account']:
        ## populates account_filter & account_user
        entities['account_filter'].append((a_serv.management().filters().list(accountId=a['id']).execute()).get('items',[]))                                        
Beispiel #5
0
            fields = ()
        return bq.SchemaField(name=field_dict['name'],
                              field_type=field_dict['type'],
                              mode=field_dict['mode'],
                              fields=fields)


## main
if __name__ == '__main__':
    ## initialise
    gcs_client = storage.Client()
    bq_client = bq.Client()
    log_file = os.path.join(output_path, 'log', 'etl_gcs_to_bq' + '.txt')

    ## import etl config
    mu.update_log(log_file, 'importing etl config')
    bucket = gcs_client.get_bucket(gcs_bucket)
    blob = bucket.get_blob(gcs_path + etl_config)
    task_config = json.loads(blob.download_as_string())

    ## iterate over task_config
    for task in task_config:
        mu.update_log(log_file,
                      'importing schema ' + task['destination_table'])
        job_schema = Schema(gcs_bucket, gcs_path + task['schema_file'])
        job_schema.process_schema()
        mu.update_log(log_file,
                      'running import for ' + task['destination_table'])
        dataset_ref = bq_client.dataset(task['dataset'])
        job_config = bq.LoadJobConfig()
        job_config.write_disposition = task['write_disposition']
Beispiel #6
0
        Filter.__init__(self,data)
        self.advancedDetails = data['advancedDetails']


## functions
def get_service(api_name, api_version, scopes, key_file_location):
    credentials = ServiceAccountCredentials.from_json_keyfile_name(key_file_location, scopes=scopes)
    service = build(api_name, api_version, credentials=credentials)
    return service

## main
if __name__ == '__main__':

    ## init
    log_file = os.path.join(output_path,'log','ga_filters' + '.txt')
    mu.update_log(log_file, 'exe filepath = ' + os.path.realpath(__file__))
    return_value = 1

    ## connect to api
    mu.update_log(log_file, 'connecting to api')
    a_readonly = get_service(api_name='analytics', api_version='v3', scopes=a_readonly_scope, key_file_location=service_account)
    a_edit = get_service(api_name='analytics', api_version='v3', scopes=a_edit_scope, key_file_location=service_account)

    ## get source account filters
    mu.update_log(log_file, 'getting source filters for ' + source_account)
    source_filters = a_readonly.management().filters().list(accountId=source_account).execute()
    dest_filters = []

    ## populate the destination filter array
    for source_filter in source_filters.get('items',[]):
        if source_filter['type'] == 'EXCLUDE':