Beispiel #1
0
 def setUp(self):
     self.old_login = self.update_connection(SFTP_CONNECTION_USER)
     self.hook = SFTPHook()
     os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
     with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as file:
         file.write('Test file')
Beispiel #2
0
def pollForFiles(**kwargs):
    # Create some local scope variables for use later in proc
    sftpName = kwargs['SFTP_Name']
    sftpConnName = kwargs['SFTP_Connection_Name']
    feedGroups = kwargs['Feed_Groups']
    
    # Connect to SFTP site using provided credentials - should be saved in Connections
    sourceHook = SFTPHook(ftp_conn_id = sftpConnName)

    # Create empty dictionary for storing files that match file masks
    fileMatches = {}

    # Loop through feed locations and their regex for this SFTP site.
    for i in feedGroups:
        fullPath = i['Feed_Group_Location']
        filePattern = i['Feed_Group_Regex']
        feedGroupName = i['Feed_Group_Name']

        try:
            directory = sourceHook.describe_directory(path = fullPath)
            for file in directory.keys():
                if re.match(filePattern, file):
                    fileMatches[os.path.join(fullPath, file)] = directory[file]
        except Exception as e:
            logging.error('Error attempting to poll feed group {} in directory {}'.format(feedGroupName, fullPath))
            raise e

    # If we do not find a file that matches a file mask in any of the directories, exit.
    if not fileMatches:
        return 0

    # If no trigger files or renaming is utilized by the client when placing files on SFTP, we
    #   have to resort to polling for files, waiting for a time period and then comparing the size/modified time
    #   to see if they are ready to pull down.
    time.sleep(SLEEP_TIME)

    for j in feedGroups:
        fullPath = j['Feed_Group_Location']
        filePattern = j['Feed_Group_Regex']
        feedGroupName = j['Feed_Group_Name']
        newFileMatches = {}

        try:
            newDirResults = sourceHook.describe_directory(fullPath)
            # Add only the files that match regular expression for this feed group
            for file in newDirResults.keys():
                if re.match(filePattern, file):
                    newFileMatches[os.path.join(fullPath, file)] = newDirResults[file]

            for file in newFileMatches.keys():
                # fullFilePath = os.path.join(fullPath, file)

                if file in fileMatches.keys():
                    if newFileMatches[file]['size'] == fileMatches[file]['size'] and \
                            newFileMatches[file]['modify'] == fileMatches[file]['modify']:
                        
                        readyFile = file + '.ready'
                        
                        # If file hasn't changed size or modified time since first look, set to ready for another process to pick up and transfer.
                        sourceHook.conn.rename(file, readyFile)
                        logging.info('SFTP: {} FeedGroup: {} File: {} is ready.'.format(sftpName, feedGroupName, os.path.basename(file)))
                        
                        triggerConfig = {
                            'SFTP_Name': sftpName, 
                            'SFTP_Connection_Name': sftpConnName,
                            'File_Name': readyFile,
                        }

                        triggerConfig.update(j)
                        
                        trigger_dag(
                            dag_id = 'SingleFileTransferJob',
                            run_id = 'trig_{}'.format(timezone.utcnow().isoformat()),
                            conf = json.dumps(triggerConfig),
                            execution_date = None,
                            replace_microseconds = False
                        )
        except Exception as e:
            logging.error('Error attempting to rename files in feed group {} in directory {}'.format(feedGroupName, fullPath))
            raise e
Beispiel #3
0
def createLocalTestFile(**kwargs):
    """
    Create a test file locally.
    """

    loadDirectory = '/usr/local/airflow/test_dir'
    destDirectory = '/airflow_test/postgresdb_etl_poc_flatfiles/auto'
    fileName = 'BP_STORE_{}.txt'.format(datetime.now().strftime(
        ('%Y%m%d%H%M%S')))
    fullPath = os.path.join(loadDirectory, fileName)
    streetNames = [
        'Main St', 'Washington Ave', 'Dover Ln', 'Marshall Pkwy',
        'Pinecone Dr', 'Corona Wy'
    ]
    cityNames = [
        'Dallas', 'Lubbock', 'Denver', 'Raleigh', 'Tallahassee', 'Bozeman'
    ]
    states = ['TX', 'CA', 'WA', 'AL', 'NC', 'NE']
    statuses = ['OPEN', 'CLOSE', 'OPEN', 'OPEN', 'OPEN', 'OPEN']

    rowList = []

    for i in range(random.randint(4, 20)):
        tempDict = {}
        tempDict['storeNumber'] = random.randint(1000, 1100)
        tempDict['storeName'] = 'Store {}'.format(getRandomString(10))
        tempDict['storeAddress1'] = '{} {}'.format(random.randint(1, 9999),
                                                   random.choice(streetNames))
        tempDict['storeAddress2'] = None
        tempDict['storeCity'] = random.choice(cityNames)
        tempDict['storeState'] = random.choice(states)
        tempDict['storeZip'] = '{}-{}'.format(
            str(random.randint(0, 99999)).zfill(5),
            str(random.randint(0, 9999)).zfill(4))
        tempDict['storeStatus'] = random.choice(statuses)
        rowList.append(tempDict)

    csv_columns = [
        'storeNumber', 'storeName', 'storeAddress1', 'storeAddress2',
        'storeCity', 'storeState', 'storeZip', 'storeStatus'
    ]

    try:
        with open(fullPath, 'w') as f:
            writer = csv.DictWriter(f, fieldnames=csv_columns)
            writer.writeheader()
            for data in rowList:
                writer.writerow(data)

        print('')
    except IOError:
        print('Issue writing to file')
        raise IOError

    sourceHook = SFTPHook(ftp_conn_id='kub2VM')

    try:
        sourceHook.store_file(os.path.join(destDirectory, fileName), fullPath)
    except:
        logging.error(
            'Trouble with the store_file step. SourceDir: {} -- DestDir: {}'.
            format(os.path.join(destDirectory, fileName), fullPath))
    finally:
        if sourceHook.conn:
            sourceHook.close_conn()
Beispiel #4
0
 def test_no_host_key_check_default(self, get_connection):
     connection = Connection(login='******', host='host')
     get_connection.return_value = connection
     hook = SFTPHook()
     self.assertEqual(hook.no_host_key_check, False)
Beispiel #5
0
 def __init__(self, path, sftp_conn_id='sftp_default', *args, **kwargs):
     super(SFTPSensor, self).__init__(*args, **kwargs)
     self.path = path
     self.hook = SFTPHook(sftp_conn_id)
Beispiel #6
0
 def setUp(self):
     configuration.load_test_config()
     self.hook = SFTPHook()
     os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
     with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as f:
         f.write('Test file')
 def __init__(self, filepath, filepattern, sftp_conn_id='sftp_default', *args, **kwargs):
     super(SFTPSensor, self).__init__(*args, **kwargs)
     self.filepath = filepath
     self.filepattern = filepattern
     self.hook = SFTPHook(sftp_conn_id)
Beispiel #8
0
def get_file(**kwargs):
    conn = SFTPHook('sftp_novatus')
    d = conn.describe_directory('/outgoing/')
    conn.retrieve_file(path, f"/tmp/{file_name}")
 def execute(self, context):
     conn = SFTPHook(ftp_conn_id=self.conn_id)
     my_conn = conn.get_conn()
     my_conn.sftp_client.file(self.file_path, 'a+')
 def _create_hook(self):
     """Return connection hook."""
     return SFTPHook(ftp_conn_id=self.ftp_conn_id)
 def setUp(self):
     self.hook = SFTPHook()
     os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
     with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as file:
         file.write('Test file')