def bulk_insert_csv(self, file_path, table_name, cols): try: # Reading from CSV file df = pd.read_csv(file_path, encoding='utf8', usecols=cols) df = df.values.tolist() if not df: logger.info('No entries to insert into database.') return logger.info('Successfully read {} rows from CSV file {}'.format( len(df), file_path)) except pd.io.common.EmptyDataError as ex: logger.error(ex) raise ex try: column_str = str(tuple(cols)).replace("'", "\"") wildcard_str = str(tuple(map(lambda x: "?", cols))).replace("'", "") query_template = 'INSERT INTO {} {} VALUES {}'.format( table_name, column_str, wildcard_str) logger.debug(f'executemany query template: \'{query_template}\'') # Performing Bulk Insert into RDS logger.debug(df) self.cursor.executemany(query_template, df) self.cursor.commit() logger.info('Insert success.') except pyodbc.Error as ex: logger.error(ex) raise ex
def _get_file_id(self, remote_filepath, folder_id=None): """ Gets the fileId of the first file matching the file name specified :param remote_filepath: path of the file in the drive whose fileId is to be retrieved :return: file_id of the file if it exists """ folder_name, file_name = os.path.split(remote_filepath) if not folder_id: folder_id = self._get_folder_id(folder_name) logger.debug(f'searching for {file_name} in {folder_name}') page_token = None matches = [] while True: response = self.drive.files().list(q=f"name='{file_name}' and '{folder_id}' in parents and trashed = false", spaces='drive', fields='nextPageToken, files(id, name, parents)', pageToken=page_token).execute() results = response.get('files', []) matches += results page_token = response.get('nextPageToken', None) if page_token is None: break logger.debug(f'{file_name} not found, searching in next page...') if len(matches) > 1: logger.critical(f'Duplicates of {remote_filepath} found. File paths should be unique') raise DuplicateError('File paths should be unique') elif len(matches) == 1: return matches[0]['id'] else: raise FileNotFoundError(f'{folder_name}/{file_name} does not exist')
def _get_folder_id(self, folder_name): """ Retrieves the folder id of the specified folder :param folder_name: Name of the folder whose id is to be retrieved :return: Id of the specified folder """ if not folder_name or folder_name == 'root': return 'root' # Search for folder id in Drive page_token = None folders = [] while True: response = self.drive.files().list( q=f"trashed = false and mimeType='application/vnd.google-apps.folder' and name='{folder_name}'", spaces='drive', fields='nextPageToken, files(id, name)', pageToken=page_token).execute() for folder in response.get('files', []): folders.append(folder) page_token = response.get('nextPageToken', None) if page_token is None: break if not folders: logger.debug(f'Unable to find folder named{folder_name}') raise FileNotFoundError(f'{folder_name} does not exist') elif len(folders) != 1: raise DuplicateError(f'Multiple folders with the name \'{folder_name}\' found. ' f'Folder names should be unique.') folder = folders[0] logger.debug(f'{folder["name"]}[{folder["id"]}]') return folder["id"]
def authenticate_by_auth_flow(self): """ Attempt to authenticate GDrive v3 API using authorization flow as specified in client secrets file """ logger.debug(f'Attemping to authenticate by InstalledAppFlow @ {self.client_secrets_path} ...') flow = InstalledAppFlow.from_client_secrets_file(self.client_secrets_path, SCOPES) credentials = flow.run_local_server(port=0) # Save the credentials for the next run if credentials.valid: self.credentials = credentials with open(self.token_path, 'wb') as token: p_dump(self.credentials, token) return True return False
def __init__(self, db_endpoint, db_name, db_uid, db_pw): connection_string = 'DRIVER={ODBC Driver 17 for SQL Server}; ' \ + f'SERVER={db_endpoint}; ' \ f'DATABASE={db_name}; ' \ f'UID={db_uid}; ' \ f'PWD={db_pw}' try: logger.debug(connection_string) self.conn = pyodbc.connect(connection_string) self.cursor = self.conn.cursor() logger.info('Able to connect.') except pyodbc.Error as ex: logger.error('Failed to connect.') raise ex
def authenticate_by_token(self): """ Attempt to authenticate GDrive v3 API using saved token (if one exists) """ logger.debug(f'Attempting to authenticate by token @ {self.token_path} ...') # The file token.pickle stores the user's access and refresh tokens, and is created automatically when the # authorization flow completes for the first time. if os.path.isfile(self.token_path): with open(self.token_path, 'rb') as token: try: credentials = p_load(token) if credentials.expired and credentials.refresh_token: logger.debug('Refreshing expired credentials ...') credentials.refresh(Request()) if credentials.valid: self.credentials = credentials return True except AttributeError as ex: logger.debug(f'Unable to unserialize {self.token_path} as {Credentials.__class__.__qualname__}') raise ex elif os.path.isdir(self.token_path): raise IsADirectoryError(f'Serialized token file was expected. \'{self.token_path}\' is a directory') elif not os.path.exists(self.token_path): raise FileNotFoundError(f'Serialized token file was expected. No such file: \'{self.token_path}\'') return False