def get_filenames(resp, service, random_number): try: for key, items in resp.iteritems(): for item in items: log.info( '############################################################################################' ) filename = item['name'] lock_file = check_for_lock_file(filename, random_number, service) if not lock_file: log.info( 'File was not locked and hence locking it and processing the files' ) # ARCHIVE THE FILE FIRST copy_resp = copy_file_to_archive(filename, service, BUCKET_NAME, ARCHIVE_BUCKET_NAME) if len(copy_resp) == 0: log.error( ' ERROR IN COPYING FILE --- SKIPPING FILE -- {0} '. format(filename)) else: log.info(' COPYING FILE DONE ') log.info('Getting file -- {0}'.format(filename)) get_file_resp = get_file(filename, service) if len(get_file_resp) == 0: log.error('Error in getting file -- {0}'.format( get_file_resp)) else: process_file_resp = process_file( filename, get_file_resp, service) if len(process_file_resp) == 0: log.error( 'Error in Processing file -- {0}'.format( filename)) else: delete_resp = delete_file( filename, service) # type is string if success if isinstance(delete_resp, dict) and len(delete_resp) == 0: log.error( ' Error in Deleting file --- {0} '. format(filename)) else: log.info( ' {0} Locked --- Do Nothing -----'.format(filename)) continue log.info( '############################################################################################' ) except Exception as e: log.error('Error in accessing File -- {0}'.format(e[0]))
def insert_project_data(project_id, project_name): done = False try: project = Project('other', project_id, project_name, 'other', '', '', '', 0) db_session.add(project) db_session.commit() done = True except IntegrityError as e: # log.info('---- Project DATA ALREADY IN DB --- UPDATE ------') db_session.rollback() project = Project.query.filter_by(project_id=project_id).first() project.project_name = project_name db_session.commit() done = True except Exception as e: log.error( ' ------------- ERROR IN ADDING PROJECT DATA TO THE DB ------------- {0}' .format(e[0])) log.error( ' ------------- ERROR IN ADDING PROJECT DATA TO THE DB ------------- {0}<---->{1}' .format(project_id, project_name)) return done
def insert_data(instanceId, key, value): done = False log.info('---- starting to add info to DB {0}, {1}, {2} ----'.format( instanceId, key, value)) try: log.info( '--------------------- ADDED INFO TO DB ---------------------') instance = Instance(instanceId=instanceId, key=key, value=value) db_session.add(instance) db_session.commit() done = True except IntegrityError as e: log.info('---- DATA ALREADY IN DB --- UPDATE ------') # log.info('instanceId = {0}<----> key = {1}<-----> value = {2}'.format(instanceId, key, value)) db_session.rollback() instance = Instance.query.filter_by(instanceId=instanceId, key=key).first() instance.value = value db_session.commit() done = True except Exception as e: log.error( ' ------------- ERROR IN ADDING DATA TO THE DB ------------- {0}'. format(e[0])) return done
def insert_data(usage_date, cost, project_id, resource_type, account_id, usage_value, measurement_unit): done = False try: usage = Usage(usage_date, cost, project_id, resource_type, account_id, usage_value, measurement_unit) db_session.add(usage) db_session.commit() done = True except IntegrityError as e: # log.info('---- DATA ALREADY IN DB --- UPDATE ------') # log.info('{0}<---->{1}<----->{2}<------>{3}<------>{4}'.format(usage_date, cost, project_id, resource_type,usage_value)) db_session.rollback() usage = Usage.query.filter_by(project_id=project_id, usage_date=usage_date, resource_type=resource_type).first() usage.cost = cost usage.usage_value = usage_value usage.measurement_unit = measurement_unit db_session.commit() done = True except Exception as e: log.error( ' ------------- ERROR IN ADDING DATA TO THE DB ------------- {0}'. format(e[0])) return done
def copy_file_to_archive(filename, service, main_bucket, dest_bucket): resp = dict() try: log.info('Starting to move the file to {0} ---- {1}'.format( dest_bucket, filename)) copy_object = service.objects().copy(sourceBucket=main_bucket, sourceObject=filename, destinationBucket=dest_bucket, destinationObject=filename, body={}) resp = copy_object.execute() log.info('DONE Moving of file - {0} to Archive -{1} '.format( filename, dest_bucket)) # delete_moved_file(filename, service) except Exception as e: log.error('Error in Copying the object to archive folder - {0}'.format( e[0])) return resp
def insert_project__table_data(data): project = dict() try: ''' update the Project table with project_name with data['projectName'] if there else use data['projectId'] if there else add as support ''' if 'projectNumber' in data: project_id = 'ID-' + str(data['projectNumber']) if 'projectName' in data: insert_done = insert_project_data(project_id, data['projectName']) else: insert_done = insert_project_data(project_id, project_id) else: project_id = 'Not Available' insert_done = insert_project_data(project_id, 'support') if not insert_done: log.info(data) raise Exception("DB Error: Information not stored") project = dict(message=' data has been added to db') except Exception as e: log.error('Error in inserting data into the DB -- {0}'.format(e[0])) db_session.rollback() traceback.print_exc() return project
def insert_usage_data(data_list, filename, service): usage = dict() try: data_count = 0 total_count = 0 log.info('---- Starting to Add/Update billing data -----') for data in data_list: total_count += 1 date = data['startTime'] resource_type = str(data['lineItemId']).replace("com.google.cloud/services", "").replace( "com.googleapis/services", "") account_id = str(data['accountId']) usage_date = datetime.datetime.strptime( date.split("-")[0] + '-' + date.split("-")[1] + '-' + date.split("-")[2], "%Y-%m-%dT%H:%M:%S") # check if there is projectnumber else add it as Not available if 'projectNumber' in data: project_id = 'ID-' + str(data['projectNumber']) else: project_id = 'Not Available' if len(data['measurements']) != 0: usage_value = float(data['measurements'][0]['sum']) measurement_unit = str(data['measurements'][0]['unit']) else: usage_value = float(0) measurement_unit = str('none') # check if credits is there if so then add it to cost cost = float(data['cost']['amount']) if 'credits' in data: cost = float(data['cost']['amount']) # log.info('CREDITS PRESENT FOR THIS DATA') # log.info('cost before-- {0}'.format(cost)) for credit in data['credits']: cost += float(credit['amount']) # log.info('{0}<---->{1}<----->{2}<------>{3}'.format(usage_date, project_id, credit['amount'], cost)) # log.info('cost after -- {0}'.format(cost)) if cost == 0: # log.info('--- COST is 0 --- NOT adding to DB') continue else: # log.info('INSERTING DATA INTO DB -- {0}'.format(data)) insert_done = insert_data(usage_date, cost, project_id, resource_type, account_id, usage_value, measurement_unit) if not insert_done: log.info(data) continue else: data_count += 1 usage = dict(message=' data has been added to db') log.info( 'DONE adding {0} items out of {1} for file -- {2} into the db '.format(data_count, total_count, filename)) except Exception as e: log.error('Error in inserting data into the DB -- {0}'.format(e[0])) db_session.rollback() return usage
def insert_usage_data(data_list, filename, service): usage = dict() # data_list is a string in csv format # read csv to db reader = csv.DictReader(data_list.splitlines(), delimiter=',') try: data_count = 0 total_count = 0 for data in reader: total_count += 1 usage_date = datetime.datetime.strptime(data['Report Date'], '%Y-%m-%d') if len(data['Quantity']) != 0: usage_value = int(data['Quantity']) measurement_unit = str(data['Unit']) else: usage_value = 0 measurement_unit = "none" resource_uri = str(data['Resource URI'].replace( "https://www.googleapis.com/compute/v1/projects", "")) location = str(data['Location']) resource_id = str(data['ResourceId']) resource_type = str(data['MeasurementId']).replace( "com.google.cloud/services", "") insert_done = insert_data(usage_date=usage_date, resource_type=resource_type, resource_id=resource_id, resource_uri=resource_uri, location=location, usage_value=usage_value, measurement_unit=measurement_unit) if not insert_done: log.info(data) log.debug('row not added!!!') continue else: log.debug('row added') data_count += 1 usage = dict(message=' data has been added to db') log.info( 'DONE adding {0} items out of {1} for file -- {2} into the db '. format(data_count, total_count, filename)) except Exception as e: log.error('Error in inserting data into the DB -- {0}'.format(e[0])) db_session.rollback() return usage
def get_file(filename, service): file_content = dict() try: # Get Payload Data req = service.objects().get_media(bucket=BUCKET_NAME, object=filename) file_content = req.execute() except Exception as e: log.error('Error in getting the file -- {0}, {1}'.format( filename, e[0])) return file_content
def update_lockfile(filename, lock_value, service, bucket): done = False try: resource = dict(metadata=dict(lock=lock_value, startTime=str(utcnow()))) copy_object = service.objects().copy(sourceBucket=bucket, sourceObject=filename, destinationBucket=bucket, destinationObject=filename, body=resource) resp = copy_object.execute() done = True except Exception as e: log.error(' Error while updating the lock file --- {0}'.format(e[0])) return done
def delete_file(filename, service): resp = dict() try: log.info('Starting to Delete the file {0} from {1}'.format(filename, BUCKET_NAME)) delete_object = service.objects().delete(bucket=BUCKET_NAME, object=filename) resp = delete_object.execute() log.info('DONE Deleting file - {0} from - {1} '.format(filename, BUCKET_NAME)) except Exception as e: log.error('Error in deleting the old file - {0}'.format(e[0])) # add code to add metadata or rename the file return resp
def get_file(filename, service, bucket): file_content = dict() try: # Get Payload Data req = service.objects().get_media( bucket=bucket, object=filename) file_content = req.execute() except Exception as e: log.error('Error in getting the file -- {0}, {1}'.format(filename, e[0])) return file_content
def get_project_list_data(): log.info('In Project List Data ----') data = dict() project_list = dict() try: projects = get_distinct_projects() for (project) in projects: project_list[project[0]] = project[0] log_output('PROJECT LIST') log_output(project_list) for (project) in project_list: log.info('INSIDE LOOP') # Request an access token from the metadata server. token_data = get_access_token() resp_access = token_data['resp_access'] content_access = token_data['content_access'] if resp_access.status == 200: # Extract the access token from the response. d = json.loads(content_access) access_token = d['access_token'] # Save the access token # log.debug('access_token -- {0}'.format(access_token)) # Construct the request to Google Cloud Storage if project != 'Not Available': project_id = project.split('-')[1] else: project_id = 'Not Available' project_data = get_project_data(project_id, access_token) resp = project_data['resp'] content = project_data['content'] if resp.status == 200: # log.debug('Project_data {0} -- {1}'.format(project_id, content)) data = json.loads(content) project_list[project] = data['name'] else: log.error('Project_data Error {0} -- {1}'.format(project_id, resp.status)) else: log.error('Access Token Error {0}'.format(resp_access.status)) except Exception as e: log_error(e) return project_list
def update_lockfile(filename, lock_value, service): done = False try: resource = dict( metadata=dict(lock=lock_value, startTime=str(utcnow()))) copy_object = service.objects().copy(sourceBucket=BUCKET_NAME, sourceObject=filename, destinationBucket=BUCKET_NAME, destinationObject=filename, body=resource) resp = copy_object.execute() done = True except Exception as e: log.error(' Error while updating the lock file --- {0}'.format(e[0])) return done
def delete_file(filename, service): resp = dict() try: log.info('Starting to Delete the file {0} from {1}'.format( filename, BUCKET_NAME)) delete_object = service.objects().delete(bucket=BUCKET_NAME, object=filename) resp = delete_object.execute() log.info('DONE Deleting file - {0} from - {1} '.format( filename, BUCKET_NAME)) except Exception as e: log.error('Error in deleting the old file - {0}'.format(e[0])) # add code to add metadata or rename the file return resp
def insert_usage_data(data_list, filename, service): usage = dict() # data_list is a string in csv format # read csv to db reader = csv.DictReader(data_list.splitlines(), delimiter=',') try: data_count = 0 total_count = 0 for data in reader: total_count += 1 usage_date = datetime.datetime.strptime(data['Report Date'], '%Y-%m-%d') if len(data['Quantity']) != 0: usage_value = int(data['Quantity']) measurement_unit = str(data['Unit']) else: usage_value = 0 measurement_unit = "none" resource_uri = str(data['Resource URI'].replace("https://www.googleapis.com/compute/v1/projects", "")) location = str(data['Location']) resource_id = str(data['ResourceId']) resource_type = str(data['MeasurementId']).replace("com.google.cloud/services", "") insert_done = insert_data(usage_date=usage_date, resource_type=resource_type, resource_id=resource_id, resource_uri=resource_uri, location=location, usage_value=usage_value, measurement_unit=measurement_unit) if not insert_done: log.info(data) log.debug('row not added!!!') continue else: log.debug('row added') data_count += 1 usage = dict(message=' data has been added to db') log.info( 'DONE adding {0} items out of {1} for file -- {2} into the db '.format(data_count, total_count, filename)) except Exception as e: log.error('Error in inserting data into the DB -- {0}'.format(e[0])) db_session.rollback() return usage
def regions_list(id): data =[] items_list =[] status = 200 print id try: # Get the application default credentials. When running locally, these are # available after running `gcloud init`. When running on compute # engine, these are available from the environment. credentials = GoogleCredentials.get_application_default() # Construct the service object for interacting with the Cloud Storage API - # the 'storage' service, at version 'v1'. # You can browse other available api services and versions here: # https://developers.google.com/api-client-library/python/apis/ service = discovery.build('compute', 'v1', credentials=credentials) # Create a request to objects.list to retrieve a list of objects. fields_to_return = \ 'nextPageToken,items(name,size,contentType,metadata(my-key))' req = service.regions().list(project=id) # If you have too many items to list in one request, list_next() will # automatically handle paging with the pageToken. while req: resp = req.execute() data.append(resp) #print(json.dumps(resp, indent=2)) req = service.regions().list_next(req, resp) for datas in data: items_list += datas['items'] except Exception as e: log.error(' Error in getting Bucket Details - {0}'.format(e[0])) message = e[0] status = 500 response = dict(data=items_list, status=status) return response
def insert_project_data(project_id, project_name): done = False log.info('{0}<---->{1}'.format(project_id, project_name)) try: project = Project('other', project_id, project_name, 'other', '', '', '', 0) db_session.add(project) db_session.commit() done = True except IntegrityError as e: # log.info('---- Project DATA ALREADY IN DB --- UPDATE ------') db_session.rollback() project = Project.query.filter_by(project_id=project_id).first() project.project_name = project_name db_session.commit() done = True except Exception as e: log.error(' ------------- ERROR IN ADDING DATA TO THE DB ------------- {0}'.format(e[0])) return done
def process_file(filename, file_content, service): insert_resp = dict() try: log.info('Processing file -- {0} -- STARTING'.format(filename)) data_list = json.loads(file_content) ''' parse the json and load the data to db once loading done move the file to archive folder todo : attach timestamp and tmp to file name while processing ''' insert_resp = insert_usage_data(data_list, filename, service) log.info('Processing file -- {0} -- ENDING'.format(filename)) except Exception as e: log.error('Error in processing the file -- {0}'.format(e[0])) return insert_resp
def copy_file_to_archive(filename, service, main_bucket, dest_bucket): resp = dict() try: log.info('Starting to move the file to {0} ---- {1}'.format(dest_bucket, filename)) copy_object = service.objects().copy(sourceBucket=main_bucket, sourceObject=filename, destinationBucket=dest_bucket, destinationObject=filename, body={}) resp = copy_object.execute() log.info('DONE Moving of file - {0} to Archive -{1} '.format(filename, dest_bucket)) # delete_moved_file(filename, service) except Exception as e: log.error('Error in Copying the object to archive folder - {0}'.format(e[0])) return resp
def get_filenames(resp, service, random_number, bucket): try: for key, items in resp.iteritems(): for item in items: log.info('############################################################################################') filename = item['name'] lock_file = check_for_lock_file(filename, random_number, service, bucket) if not lock_file: log.info('File was not locked and hence locking it and processing the files') # ARCHIVE THE FILE FIRST archive_bucket = bucket +'-archive' copy_resp = copy_file_to_archive(filename, service, bucket, archive_bucket) log.info(copy_resp) if len(copy_resp) == 0: log.error(' ERROR IN COPYING FILE --- SKIPPING FILE -- {0} '.format(filename)) else: log.info(' COPYING FILE DONE ') log.info('Getting file -- {0}'.format(filename)) get_file_resp = get_file(filename, service, bucket) if len(get_file_resp) == 0: log.error('Error in getting file -- {0}'.format(get_file_resp)) else: process_file_resp = process_file(filename, get_file_resp, service) if len(process_file_resp) == 0: log.error('Error in Processing file -- {0}'.format(filename)) else: delete_resp = delete_file(filename, service, bucket) # type is string if success if isinstance(delete_resp, dict) and len(delete_resp) == 0: log.error(' Error in Deleting file --- {0} '.format(filename)) else: log.info(' {0} Locked --- Do Nothing -----'.format(filename)) continue log.info('############################################################################################') except Exception as e: log.error('Error in accessing File -- {0}'.format(e[0]))
def insert_data(usage_date, cost, project_id, resource_type, account_id, usage_value, measurement_unit): done = False try: usage = Usage(usage_date, cost, project_id, resource_type, account_id, usage_value, measurement_unit) db_session.add(usage) db_session.commit() done = True except IntegrityError as e: # log.info('---- DATA ALREADY IN DB --- UPDATE ------') # log.info('{0}<---->{1}<----->{2}<------>{3}<------>{4}'.format(usage_date, cost, project_id, resource_type,usage_value)) db_session.rollback() usage = Usage.query.filter_by(project_id=project_id, usage_date=usage_date, resource_type=resource_type).first() usage.cost = cost usage.usage_value = usage_value usage.measurement_unit = measurement_unit db_session.commit() done = True except Exception as e: log.error(' ------------- ERROR IN ADDING DATA TO THE DB ------------- {0}'.format(e[0])) return done
def insert_project__table_data(data_list, filename, service): project = dict() try: data_count = 0 total_count = 0 log.info('---- Starting to Add/Update Project Name -----') for data in data_list: total_count += 1 ''' update the Project table with project_name with data['projectName'] if there else use data['projectId'] if there else add as support ''' if 'projectNumber' in data: project_id = 'ID-' + str(data['projectNumber']) if 'projectName' in data: insert_done = insert_project_data(project_id, data['projectName']) else: insert_done = insert_project_data(project_id, project_id) else: project_id = 'Not Available' insert_done = insert_project_data(project_id, 'support') if not insert_done: log.info(data) continue else: data_count += 1 project = dict(message=' data has been added to db') log.info( 'DONE adding {0} items out of {1} for file -- {2} into the db '. format(data_count, total_count, filename)) except Exception as e: log.error('Error in inserting data into the DB -- {0}'.format(e[0])) db_session.rollback() return project
def insert_project__table_data(data_list, filename, service): project = dict() try: data_count = 0 total_count = 0 log.info('---- Starting to Add/Update Project Name -----') for data in data_list: total_count += 1 ''' update the Project table with project_name with data['projectName'] if there else use data['projectId'] if there else add as support ''' if 'projectNumber' in data: project_id = 'ID-' + str(data['projectNumber']) if 'projectName' in data: insert_done = insert_project_data(project_id, data['projectName']) else: insert_done = insert_project_data(project_id, project_id) else: project_id = 'Not Available' insert_done = insert_project_data(project_id, 'support') if not insert_done: log.info(data) continue else: data_count += 1 project = dict(message=' data has been added to db') log.info( 'DONE adding {0} items out of {1} for file -- {2} into the db '.format(data_count, total_count, filename)) except Exception as e: log.error('Error in inserting data into the DB -- {0}'.format(e[0])) db_session.rollback() return project
def insert_usage_data(data_list, filename, service): usage = dict() try: data_count = 0 total_count = 0 for data in data_list: total_count += 1 date = data['startTime'] resource_type = str(data['lineItemId']).replace( "com.google.cloud/services", "").replace("com.googleapis/services", "") account_id = str(data['accountId']) usage_date = datetime.datetime.strptime( date.split("-")[0] + '-' + date.split("-")[1] + '-' + date.split("-")[2], "%Y-%m-%dT%H:%M:%S") # check if there is projectnumber else add it as Not available if 'projectNumber' in data: project_id = 'ID-' + str(data['projectNumber']) else: project_id = 'Not Available' if len(data['measurements']) != 0: usage_value = float(data['measurements'][0]['sum']) measurement_unit = str(data['measurements'][0]['unit']) else: usage_value = float(0) measurement_unit = str('none') # check if credits is there if so then add it to cost cost = float(data['cost']['amount']) if 'credits' in data: cost = float(data['cost']['amount']) # log.info('CREDITS PRESENT FOR THIS DATA') # log.info('cost before-- {0}'.format(cost)) for credit in data['credits']: cost += float(credit['amount']) # log.info('{0}<---->{1}<----->{2}<------>{3}'.format(usage_date, project_id, credit['amount'], cost)) # log.info('cost after -- {0}'.format(cost)) if cost == 0: # log.info('--- COST is 0 --- NOT adding to DB') continue else: # log.info('INSERTING DATA INTO DB -- {0}'.format(data)) insert_done = insert_data(usage_date, cost, project_id, resource_type, account_id, usage_value, measurement_unit) if not insert_done: log.info(data) continue else: data_count += 1 usage = dict(message=' data has been added to db') log.info( 'DONE adding {0} items out of {1} for file -- {2} into the db '. format(data_count, total_count, filename)) except Exception as e: log.error('Error in inserting data into the DB -- {0}'.format(e[0])) db_session.rollback() return usage
def check_for_lock_file(filename, random_no, service, bucket): locked = True try: log.info('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') log.info('------- GET THE {0} -- {1}------'.format(filename, random_no)) # Get Payload Data req = service.objects().get( bucket=bucket, object=filename) get_file_meta_data = req.execute() # check for metadata -- lock lock_metadata = None startTime_metadata = None startTime_metadata_day = None startTime_metadata_month = None startTime_metadata_hour = None hourdiff = 0 mindiff = 0 today = utcnow().day thisMonth = utcnow().month now_hour = utcnow().hour now_min = utcnow().minute if today < 10: today = '0' + str(today) if thisMonth < 10: thisMonth = '0' + str(thisMonth) if 'metadata' in get_file_meta_data and 'lock' in get_file_meta_data['metadata']: lock_metadata = str(get_file_meta_data['metadata']['lock']) if 'metadata' in get_file_meta_data and 'startTime' in get_file_meta_data['metadata']: startTime_metadata = str(get_file_meta_data['metadata']['startTime']) startTime_metadata_month = startTime_metadata.split('-')[1] startTime_metadata_day = startTime_metadata.split('-')[2].split(" ")[0] startTime_metadata_hour = startTime_metadata.split('-')[2].split(" ")[1].split(":")[0] startTime_metadata_min = startTime_metadata.split('-')[2].split(" ")[1].split(":")[1] # check for time difference if more than 1 then restart the process hourdiff = int(now_hour) - int(startTime_metadata_hour) mindiff = int(now_min) - int(startTime_metadata_hour) log.info('METADATA -- {0} -- {1}'.format(lock_metadata, startTime_metadata)) if lock_metadata is not None and startTime_metadata is not None \ and startTime_metadata_day == str(today) and startTime_metadata_month == str(thisMonth)\ and mindiff < 30: log.info(' Lock metadata found and is same day') locked = True else: log.info(' No lock metadata found or StartTime was old') update_done = update_lockfile(filename, random_no, service, bucket) if update_done: log.info(' Updating the lock file was done -- Recheck for the random no --{0}'.format(random_no)) req = service.objects().get( bucket=bucket, object=filename) get_file_meta_data = req.execute() lock_metadata = str(get_file_meta_data['metadata']['lock']) if lock_metadata == random_no: log.info(' Checking for random No done and MATCHED -- Start the process --{0}'.format(random_no)) log.info(' File --{0}'.format(filename)) locked = False else: log.info(' Checking for random No done and DID NOT MATCH -- DO NOTHING') locked = True else: log.info(' Updating the lock file was not done -- So donot do anything') locked = True log.info('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') except Exception as e: log.error(' Error in getting Lock file -- {0}'.format(e[0])) return locked
def data_processor(job_type): status = 200 message = 'Prcoess Complete ' startTime = datetime.datetime.now() lock_file = True try: bucket = BUCKET_NAME archive_bucket = ARCHIVE_BUCKET_NAME random_number = binascii.hexlify(os.urandom(32)).decode() log.info(' RANDOM NUMBER --- {0}'.format(random_number)) # Get the application default credentials. When running locally, these are # available after running `gcloud init`. When running on compute # engine, these are available from the environment. credentials = GoogleCredentials.get_application_default() # Construct the service object for interacting with the Cloud Storage API - # the 'storage' service, at version 'v1'. # You can browse other available api services and versions here: # https://developers.google.com/api-client-library/python/apis/ service = discovery.build('storage', 'v1', credentials=credentials) # Make a request to buckets.get to retrieve a list of objects in the # specified bucket. req = service.buckets().get(bucket=bucket) resp = req.execute() # print(json.dumps(resp, indent=2)) # Create a request to objects.list to retrieve a list of objects. fields_to_return = \ 'nextPageToken,items(name,size,contentType,metadata(my-key))' req = service.objects().list(bucket=bucket, fields=fields_to_return) file_count = 0 log.info('Process {0} Start time --- {1}'.format(bucket, startTime)) # If you have too many items to list in one request, list_next() will # automatically handle paging with the pageToken. while req: resp = req.execute() # print(json.dumps(resp, indent=2)) if len(resp) == 0: log.info( '############################################################################################' ) log.info('--------- THE BUCKET LIST IS EMPTY --------------') log.info('--------- NO FILES TO PROCESS --------------') log.info(resp) log.info( '############################################################################################' ) else: get_filenames(resp, service, random_number) req = service.objects().list_next(req, resp) except Exception as e: log.error(' Error in getting Bucket Details - {0}'.format(e[0])) message = e[0] status = 500 endTime = datetime.datetime.now() log.info('Process End time --- {0}'.format(endTime)) elapsedTime = endTime - startTime time = 'Total Time to Process all the files -- {0}'.format( divmod(elapsedTime.total_seconds(), 60)) log.info(time) log.info(' ARGS PASSED --- {0}'.format(job_type)) if job_type == 'now': set_scheduler(os.environ.get('SCHEDULER_HOUR'), os.environ.get('SCHEDULER_MIN')) response = dict(data=json.dumps(message), status=status, time=time) return response
def insert_instance_data(instance_list, zone): instance = dict() # data_list is a string in csv format # read csv to db try: for obj in instance_list: log.info( "--------- Processing metadata for instanceID: {0} -------------------------" .format(obj['id'])) instance_id = obj['id'] project_name = obj['zone'].split('/')[6] insert_data(instance_id, 'project', project_name) insert_data(instance_id, 'zone', zone) insert_data(instance_id, 'creationTimestamp', obj['creationTimestamp']) insert_data(instance_id, 'selfLink', obj['selfLink']) insert_data(instance_id, 'status', obj['status']) insert_data(instance_id, 'name', obj['name']) insert_data(instance_id, 'machineType', obj['machineType']) if 'tags' in obj and 'items' in obj['tags']: for tag in obj['tags']['items']: insert_data(instance_id, 'tags.items', tag) for networkInterfaces in obj['networkInterfaces']: insert_data(instance_id, 'networkInterfaces.network', networkInterfaces['network']) insert_data(instance_id, 'networkInterfaces.networkIP', networkInterfaces['networkIP']) for accessconfig in networkInterfaces['accessConfigs']: if 'natIP' in accessconfig: insert_data(instance_id, 'networkInterfaces.accessconfig.natIP', accessconfig['natIP']) for disk in obj['disks']: insert_data(instance_id, 'disks.type', disk['type']) insert_data(instance_id, 'disks.mode', disk['mode']) insert_data(instance_id, 'disks.interface', disk['interface']) insert_data(instance_id, 'disks.source', disk['source']) for license in disk['licenses']: insert_data(instance_id, 'disks.license', license) if 'items' in obj['metadata']: for metadata in obj['metadata']['items']: if metadata['key'] != 'ssh-keys': insert_data(instance_id, 'metadata.' + metadata['key'], metadata['value']) for serviceAccount in obj['serviceAccounts']: insert_data(instance_id, 'serviceAccounts.email', serviceAccount['email']) for scope in serviceAccount['scopes']: insert_data(instance_id, 'serviceAccounts.scope', scope) insert_data(instance_id, 'scheduling.onHostMaintenance', obj['scheduling']['onHostMaintenance']) insert_data(instance_id, 'scheduling.automaticRestart', obj['scheduling']['automaticRestart']) insert_data(instance_id, 'scheduling.preemptible', obj['scheduling']['preemptible']) except Exception as e: log.error('Error in inserting data into the DB -- {0}'.format(e[0])) db_session.rollback() return instance
def data_processor(job_type): status = 200 message = dict(success=[], fail=[]) startTime = datetime.datetime.now() lock_file = True project_ids = get_project_ids() for project in project_ids: try: project = project.split('-')[1] log.info( '--------- PROJECT ID: {0} --------------'.format(project)) random_number = binascii.hexlify(os.urandom(32)).decode() # log.info(' RANDOM NUMBER --- {0}'.format(random_number)) # Get the application default credentials. When running locally, these are # available after running `gcloud init`. When running on compute # engine, these are available from the environment. credentials = GoogleCredentials.get_application_default() # Construct the compute service object (version v1) for interacting # with the API. You can browse other available API services and versions at # https://developers.google.com/api-client-library/python/apis/ service = discovery.build('compute', 'v1', credentials=credentials) zones = service.zones() request = zones.list(project=project) # get all zones for each project while request is not None: response = request.execute() # get all instance metadata for each zone for zone in response['items']: z = zone['description'].encode('ascii', 'ignore') instance_req = service.instances().list(project=project, zone=z) while instance_req is not None: instance_res = instance_req.execute() if 'items' in instance_res: insert_instance_data(instance_res['items'], z) instance_req = service.instances().list_next( previous_request=instance_req, previous_response=instance_res) request = zones.list_next(previous_request=request, previous_response=response) log.info('Process {0} Start time --- {1}'.format( project, startTime)) # If you have too many items to list in one request, list_next() will # automatically handle paging with the pageToken. message['success'].append(project) except Exception as e: log.error(' Error in getting Project Details - {0}'.format(e)) message['fail'].append({'project': project, 'error': str(e)}) status = 500 pass endTime = datetime.datetime.now() log.info('Process End time --- {0}'.format(endTime)) elapsedTime = endTime - startTime time = 'Total Time to Process all the files -- {0}'.format( divmod(elapsedTime.total_seconds(), 60)) log.info(time) log.info(' ARGS PASSED --- {0}'.format(job_type)) response = dict(data=json.dumps(message), status=status, time=time) return response
def data_processor(job_type): # get list of buckets; for each project, do try except status = 200 message = dict(success=[], fail=[]) startTime = datetime.datetime.now() lock_file = True # get list of project names project_list = db_session.query(Project.project_name).distinct() project_names = (item[0] for item in project_list) for b in project_names: try: bucket = b + "-usage-export" archive_bucket = bucket + '-archive' log.info('--------- THE BUCKET : {0} --------------'.format(bucket)) random_number = binascii.hexlify(os.urandom(32)).decode() log.info(' RANDOM NUMBER --- {0}'.format(random_number)) # Get the application default credentials. When running locally, these are # available after running `gcloud init`. When running on compute # engine, these are available from the environment. credentials = GoogleCredentials.get_application_default() # Construct the service object for interacting with the Cloud Storage API - # the 'storage' service, at version 'v1'. # You can browse other available api services and versions here: # https://developers.google.com/api-client-library/python/apis/ service = discovery.build('storage', 'v1', credentials=credentials) # Make a request to buckets.get to retrieve a list of objects in the # specified bucket. req = service.buckets().get(bucket=bucket) resp = req.execute() # print(json.dumps(resp, indent=2)) # Create a request to objects.list to retrieve a list of objects. fields_to_return = \ 'nextPageToken,items(name,size,contentType,metadata(my-key))' req = service.objects().list(bucket=bucket, fields=fields_to_return) file_count = 0 log.info('Process {0} Start time --- {1}'.format(bucket, startTime)) # If you have too many items to list in one request, list_next() will # automatically handle paging with the pageToken. while req: resp = req.execute() # print(json.dumps(resp, indent=2)) if len(resp) == 0: log.info('############################################################################################') log.info('--------- THE BUCKET LIST IS EMPTY --------------') log.info('--------- NO FILES TO PROCESS --------------') log.info(resp) log.info('############################################################################################') else: get_filenames(resp, service, random_number, bucket) req = service.objects().list_next(req, resp) message['success'].append(bucket) except Exception as e: log.error(' Error in getting Bucket Details - {0}'.format(e)) message['fail'].append({'bucket':bucket, 'error':str(e)}) status = 500 pass endTime = datetime.datetime.now() log.info('Process End time --- {0}'.format(endTime)) elapsedTime = endTime - startTime time = 'Total Time to Process all the files -- {0}'.format(divmod(elapsedTime.total_seconds(), 60)) log.info(time) log.info(' ARGS PASSED --- {0}'.format(job_type)) # if job_type == 'now': # set_scheduler(os.environ.get('SCHEDULER_HOUR'), os.environ.get('SCHEDULER_MIN')) response = dict(data=json.dumps(message), status=status, time=time) return response
def check_for_lock_file(filename, random_no, service): locked = True try: log.info( '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') log.info('------- GET THE {0} -- {1}------'.format( filename, random_no)) # Get Payload Data req = service.objects().get(bucket=BUCKET_NAME, object=filename) get_file_meta_data = req.execute() # check for metadata -- lock lock_metadata = None startTime_metadata = None startTime_metadata_day = None startTime_metadata_month = None startTime_metadata_hour = None hourdiff = 0 mindiff = 0 today = utcnow().day thisMonth = utcnow().month now_hour = utcnow().hour now_min = utcnow().minute if today < 10: today = '0' + str(today) if thisMonth < 10: thisMonth = '0' + str(thisMonth) if 'metadata' in get_file_meta_data and 'lock' in get_file_meta_data[ 'metadata']: lock_metadata = str(get_file_meta_data['metadata']['lock']) if 'metadata' in get_file_meta_data and 'startTime' in get_file_meta_data[ 'metadata']: startTime_metadata = str( get_file_meta_data['metadata']['startTime']) startTime_metadata_month = startTime_metadata.split('-')[1] startTime_metadata_day = startTime_metadata.split('-')[2].split( " ")[0] startTime_metadata_hour = startTime_metadata.split('-')[2].split( " ")[1].split(":")[0] startTime_metadata_min = startTime_metadata.split('-')[2].split( " ")[1].split(":")[1] # check for time difference if more than 1 then restart the process hourdiff = int(now_hour) - int(startTime_metadata_hour) mindiff = int(now_min) - int(startTime_metadata_hour) log.info('METADATA -- {0} -- {1}'.format(lock_metadata, startTime_metadata)) if lock_metadata is not None and startTime_metadata is not None \ and startTime_metadata_day == str(today) and startTime_metadata_month == str(thisMonth)\ and mindiff < 30: log.info(' Lock metadata found and is same day') locked = True else: log.info(' No lock metadata found or StartTime was old') update_done = update_lockfile(filename, random_no, service) if update_done: log.info( ' Updating the lock file was done -- Recheck for the random no --{0}' .format(random_no)) req = service.objects().get(bucket=BUCKET_NAME, object=filename) get_file_meta_data = req.execute() lock_metadata = str(get_file_meta_data['metadata']['lock']) if lock_metadata == random_no: log.info( ' Checking for random No done and MATCHED -- Start the process --{0}' .format(random_no)) log.info(' File --{0}'.format(filename)) locked = False else: log.info( ' Checking for random No done and DID NOT MATCH -- DO NOTHING' ) locked = True else: log.info( ' Updating the lock file was not done -- So donot do anything' ) locked = True log.info( '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') except Exception as e: log.error(' Error in getting Lock file -- {0}'.format(e[0])) return locked