Esempio n. 1
0
 def bulk_insert_csv(self, file_path, table_name, cols):
     try:
         # Reading from CSV file
         df = pd.read_csv(file_path, encoding='utf8', usecols=cols)
         df = df.values.tolist()
         if not df:
             logger.info('No entries to insert into database.')
             return
         logger.info('Successfully read {} rows from CSV file {}'.format(
             len(df), file_path))
     except pd.io.common.EmptyDataError as ex:
         logger.error(ex)
         raise ex
     try:
         column_str = str(tuple(cols)).replace("'", "\"")
         wildcard_str = str(tuple(map(lambda x: "?",
                                      cols))).replace("'", "")
         query_template = 'INSERT INTO {} {} VALUES {}'.format(
             table_name, column_str, wildcard_str)
         logger.debug(f'executemany query template: \'{query_template}\'')
         # Performing Bulk Insert into RDS
         logger.debug(df)
         self.cursor.executemany(query_template, df)
         self.cursor.commit()
         logger.info('Insert success.')
     except pyodbc.Error as ex:
         logger.error(ex)
         raise ex
    def _get_file_id(self, remote_filepath, folder_id=None):
        """
        Gets the fileId of the first file matching the file name specified

        :param remote_filepath: path of the file in the drive whose fileId is to be retrieved
        :return: file_id of the file if it exists
        """
        folder_name, file_name = os.path.split(remote_filepath)
        if not folder_id:
            folder_id = self._get_folder_id(folder_name)
        logger.debug(f'searching for {file_name} in {folder_name}')
        page_token = None
        matches = []
        while True:
            response = self.drive.files().list(q=f"name='{file_name}' and '{folder_id}' in parents and trashed = false",
                                               spaces='drive',
                                               fields='nextPageToken, files(id, name, parents)',
                                               pageToken=page_token).execute()
            results = response.get('files', [])
            matches += results
            page_token = response.get('nextPageToken', None)
            if page_token is None:
                break
            logger.debug(f'{file_name} not found, searching in next page...')
        if len(matches) > 1:
            logger.critical(f'Duplicates of {remote_filepath} found. File paths should be unique')
            raise DuplicateError('File paths should be unique')
        elif len(matches) == 1:
            return matches[0]['id']
        else:
            raise FileNotFoundError(f'{folder_name}/{file_name} does not exist')
    def _get_folder_id(self, folder_name):
        """ Retrieves the folder id of the specified folder

        :param folder_name: Name of the folder whose id is to be retrieved
        :return: Id of the specified folder
        """
        if not folder_name or folder_name == 'root':
            return 'root'
        # Search for folder id in Drive
        page_token = None
        folders = []
        while True:
            response = self.drive.files().list(
                q=f"trashed = false and mimeType='application/vnd.google-apps.folder' and name='{folder_name}'",
                spaces='drive',
                fields='nextPageToken, files(id, name)',
                pageToken=page_token).execute()
            for folder in response.get('files', []):
                folders.append(folder)
            page_token = response.get('nextPageToken', None)
            if page_token is None:
                break

        if not folders:
            logger.debug(f'Unable to find folder named{folder_name}')
            raise FileNotFoundError(f'{folder_name} does not exist')

        elif len(folders) != 1:
            raise DuplicateError(f'Multiple folders with the name \'{folder_name}\' found. '
                                 f'Folder names should be unique.')

        folder = folders[0]
        logger.debug(f'{folder["name"]}[{folder["id"]}]')
        return folder["id"]
 def authenticate_by_auth_flow(self):
     """
     Attempt to authenticate GDrive v3 API using authorization flow as specified in client secrets file
     """
     logger.debug(f'Attemping to authenticate by InstalledAppFlow @ {self.client_secrets_path} ...')
     flow = InstalledAppFlow.from_client_secrets_file(self.client_secrets_path, SCOPES)
     credentials = flow.run_local_server(port=0)
     # Save the credentials for the next run
     if credentials.valid:
         self.credentials = credentials
         with open(self.token_path, 'wb') as token:
             p_dump(self.credentials, token)
         return True
     return False
Esempio n. 5
0
    def __init__(self, db_endpoint, db_name, db_uid, db_pw):
        connection_string = 'DRIVER={ODBC Driver 17 for SQL Server}; ' \
                                 + f'SERVER={db_endpoint}; ' \
                                   f'DATABASE={db_name}; ' \
                                   f'UID={db_uid}; ' \
                                   f'PWD={db_pw}'

        try:
            logger.debug(connection_string)
            self.conn = pyodbc.connect(connection_string)
            self.cursor = self.conn.cursor()
            logger.info('Able to connect.')
        except pyodbc.Error as ex:
            logger.error('Failed to connect.')
            raise ex
 def authenticate_by_token(self):
     """
     Attempt to authenticate GDrive v3 API using saved token (if one exists)
     """
     logger.debug(f'Attempting to authenticate by token @ {self.token_path} ...')
     # The file token.pickle stores the user's access and refresh tokens, and is created automatically when the
     # authorization flow completes for the first time.
     if os.path.isfile(self.token_path):
         with open(self.token_path, 'rb') as token:
             try:
                 credentials = p_load(token)
                 if credentials.expired and credentials.refresh_token:
                     logger.debug('Refreshing expired credentials ...')
                     credentials.refresh(Request())
                 if credentials.valid:
                     self.credentials = credentials
                     return True
             except AttributeError as ex:
                 logger.debug(f'Unable to unserialize {self.token_path} as {Credentials.__class__.__qualname__}')
                 raise ex
     elif os.path.isdir(self.token_path):
         raise IsADirectoryError(f'Serialized token file was expected. \'{self.token_path}\' is a directory')
     elif not os.path.exists(self.token_path):
         raise FileNotFoundError(f'Serialized token file was expected. No such file: \'{self.token_path}\'')
     return False