예제 #1
0
 def close(self):
     """
     Closes any connection to vertica
     """
     if self.connection.opened():
         logger.info(' connection closed.')
         self.connection.close()
예제 #2
0
    def test_schema_versions(self):
        logger.info("Schemas")
        name = 'property'
        self.register = Registry(path_configs)

        r_schema_1 = self.register.get(name, version=1)
        r_schema_2 = self.register.get(name, version=2)
        r_schema_3 = self.register.get(name, version=3)
        r_schema_4 = self.register.get(name, version=4)
        r_schema_5 = self.register.get(name, version=5)

        _file_test_1 = schema.Parse(_avro_test_1)
        _file_test_2 = schema.Parse(_avro_test_2)
        _file_test_3 = schema.Parse(_avro_test_3)
        _file_test_4 = schema.Parse(_avro_test_4)
        _file_test_5 = schema.Parse(_avro_test_5)

        self.assertEqual(r_schema_1, _file_test_1)
        self.assertEqual(r_schema_2, _file_test_2)
        self.assertEqual(r_schema_3, _file_test_3)
        self.assertEqual(r_schema_4, _file_test_4)
        self.assertEqual(r_schema_5, _file_test_5)

        self.assertRaises(SchemaVersionNotFound,
                          lambda: self.register.get(name, version=6))
예제 #3
0
    def get(self, name=None, version=VERSION):
        key = '%s_%s' % (name, str(version))
        _schema = os.path.join(self.path, name)

        if key in self.cache_schemas:
            logger.debug('key : %s ', key)
            return self.cache_schemas[key]

        if os.path.exists(_schema):
            element = os.listdir(_schema)
            avro_file = '%s.avsc' % version
            logger.info("Files - > %s" % sorted(element))
            if avro_file in sorted(element):
                _file = os.path.join(self.path, name, avro_file)

                try:
                    with open(_file, 'rb') as f:
                        data = f.read()
                    self._cache(key, avro.schema.Parse(data))
                    return avro.schema.Parse(data)
                except IOError as e:
                    logger.warning("See exception below; skipping file %s",
                                   _file)
                    logger.exception(e)
            else:
                raise SchemaVersionNotFound
        else:
            raise SchemaNotFound
예제 #4
0
 def execute(self, context):
     if self.source is None:
         self.source = self.dag_params['source']
     # start the Postgres client
     client = Postgres()
     # Save the data into the Postgres from the csv path
     result = Postgres.copy_expert(client, self.table, self.source)
     logger.info("The CSV has been saved successfully")
     return result
예제 #5
0
    def execute(self, context):
        if self.source is None:
            self.source = self.dag_params['source']

        logger.info(self.destination)
        s3_client = ClientS3(s3['bucket'])
        result = s3_client.upload_multiple(self.source, self.destination,
                                           self.extension)
        logger.info("The files has been saved successfully")
        return self.source
예제 #6
0
 def test_upload(self):
     files_path = os.path.join(PATH, 'files', 'file1.txt')
     logger.info('Files path %s' % files_path)
     try:
         self.client.upload(files_path, 'file1.txt')
         passed = True
     except Exception as e:
         logger.info(f"No such config file in {str(e)}")
         passed = False
     assert passed
예제 #7
0
 def list(self):
     """
     List objects into bucket
     """
     objects = []
     bucket = self.clientS3.Bucket(self.bucket)
     for s3_file in bucket.objects.all():
         logger.info('File > %s' % s3_file.key)
         objects.append(s3_file.key)
     return objects
예제 #8
0
    def test_download(self):
        path_down = os.path.join(PATH, 'files', 'file1-down.txt')
        passed = False
        self.client.download('file1.txt', path_down)
        try:
            passed = True
        except Exception as e:
            passed = False
            logger.info(f"No such config file' in {str(e)}")

        assert passed
예제 #9
0
    def test_serialize_avro(self):
        logger.info("testing")

        serializer = AvroSerializer(NAME, version=VERSION)
        data = {
            "name": "TEXT INTO MESSAGE",
            "favorite_color": "111",
            "favorite_number": random.randint(0, 10)
        }
        serialize = serializer.serialize(data)
        self.assertIn(bytes("TEXT INTO MESSAGE", "utf-8"), serialize)
예제 #10
0
 def test_read(self):
     """
       Execute the ```read``` function and verify if the file exist.
     """
     logger.info("Read")
     try:
         _file = read(file_path)
         self.assertIsNotNone(_file)
         passed = True
     except Exception as e:
         passed = 'No such file' in str(e)
     self.assertEqual(passed, True)
예제 #11
0
 def get_folder_id(self, name):
     results = self.service.files().list(
         pageSize=10,
         q=("name = '{0}'".format(name) +
            " and mimeType = 'application/vnd.google-apps.folder'"),
         corpora="user",
         fields="nextPageToken, files(id, name, webContentLink, " +
         "createdTime, modifiedTime)").execute()
     item = results.get('files', [])
     logger.info(item)
     if not item:
         return None
     return item[0]['id']
예제 #12
0
    def test_list(self):
        """
        Find list for success.
        """
        passed = False
        try:
            list_s3 = self.client.list()
            logger.info('list s3 > %s' % list_s3)
            passed = True
        except Exception as e:
            passed = False
            logger.info(f"No such config file in {str(e)}")

        assert passed
예제 #13
0
    def test_create(self):
        """
         Execute the ```create``` function and verify if the file was created.
        """
        logger.setLevel(logging.DEBUG)
        logger.info("testing")
        url = 'https://www.facebook.com/elvikito'
        params = {}

        _file = create(url, params)
        self.assertIsNotNone(_file)

        response = requests.get(url, params=params)
        content = response.content
        self.assertEqual(_file[1], content[1])
예제 #14
0
    def connect(self):
        """
        Creates a connection or returns the current one.
        Return:
            connection
        """
        if self.connection is not None:
            logger.info(" connection: %s " % (self.connection is not None))
            return self.connection
        try:
            self.connection = DataPostgres.connect(**self.options)
        except Exception as e:
            logger.critical("Unable to connect to DB: {0}".format(e.message))
            raise

        return self.connection
예제 #15
0
    def __init__(self, lookups={}, *args, **kwargs):
        """
        Operator to make accesible xcom params in `self.dag_params`.
        The `lookup` param, maps how the xcom params are going to be stored.
        It uses the keys as param keys in self.dag_params and the values as
        the taskids.
        I.E::
            t = XComParams(lookups={'var1': 'taskid_1'})

        In function `execute` the `self.dag_params` var will be populated as
        a dictionary:

        >>> t.dag_params
        {'var1': 'what taskid_id task returned in `execute` function'}
        """
        logger.info(kwargs)
        self.lookups = lookups
        BaseOperator.__init__(self, *args, **kwargs)
예제 #16
0
    def download(self, gpath, path):
        if self.listfiles() is not None:
            items = self.listfiles()
            for item in items:
                if gpath in item['name']:
                    request = self.service.files().get_media(fileId=item['id'])
                    fh = io.BytesIO()
                    downloader = MediaIoBaseDownload(fh, request)
                    done = False
                    while done is False:
                        status, done = downloader.next_chunk()
                        logger.info(int(status.progress() * 100))

                    f = open(path + '/' + item['name'], 'wb')
                    f.write(fh.getvalue())
                    f.close()
        else:
            logger.info('No files found.')
예제 #17
0
    def upload_recursive(self, path, s3path, extension=None):
        '''
        Upload recursivitly all files inside in the directory even the files
        in directories inside the path.

        Args:
            path (str): The directory path.
            s3path (str): The s3 path to put the directory.
            extension (str): Filter through extension of the files to be
                             uploaded.
        '''
        for root, dirs, files in os.walk(path):
            d = root.replace(path, '')
            d = d[1:] if d.startswith('/') else d
            s3path_to_load = os.path.join(s3path, d)
            self.upload_multiple(root, s3path_to_load, extension=extension)

        logger.info(f"Files uploaded {num_files}")
        return num_files
예제 #18
0
    def __init__(self, table, source=None, *args, **kwargs):
        """
        Insert data into table `{table}` all csv files found in
        source, source should be accesible thourhg `self.dag_params`
        if not passed.
        Args:
            table (str): Vertica table name
            source (str): path to csv file, in case of None the class
            should look at `dag_params`
        """
        """
        Save the csv path into the Vertica table jobs.jobs
        """

        logger.info("Starting to save the CSV to Vertica")

        super(PopulatePostgres, self).__init__(*args, **kwargs)
        self.source = source
        self.table = table
예제 #19
0
 def test_upload_multiple(self):
     directory_path = os.path.join(PATH, 'files', 'multiple')
     s3_directory_path = os.path.join('test', 'multiple')
     logger.info('Files path %s' % directory_path)
     list_files = []
     try:
         self.client.upload_multiple(directory_path,
                                     s3_directory_path,
                                     extension='txt')
         bucket = self.client.clientS3.Bucket(self.client.bucket)
         list_test = bucket.objects.all()
         list_files = list(map(lambda x: x._key, list(list_test)))
         passed = True
     except Exception as e:
         logger.error(e)
         passed = False
     assert passed
     assert 'test/multiple/file1.txt' in list_files
     assert 'test/multiple/file2.txt' in list_files
예제 #20
0
    def connect(self):
        """
        Creates a connection or returns the current one.
        Return:
            connection
        """
        if self.connection is not None:
            logger.info(" connection: %s " % (self.connection is not None))
            if not self.connection.opened():
                logger.info("connection is closed")
                return self.reconect()

            if self.connection.opened():
                return self.connection
        try:
            self.connection = connect(**self.options)
        except Exception as e:
            logger.critical("Unable to connect to DB: {0}".format(e.message))
            raise

        return self.connection
예제 #21
0
    def upload(self, path, gpath):
        mime = MimeTypes()
        file_metadata = {
            'name': os.path.basename(path),
        }
        if self.get_item_id(gpath) is not None:
            file_metadata['parents'] = [self.get_folder_id(gpath)]

        media = MediaFileUpload(path,
                                mimetype=mime.guess_type(
                                    os.path.basename(path))[0],
                                resumable=True)
        id_file = []
        try:
            file = self.service.files().create(body=file_metadata,
                                               media_body=media,
                                               fields='id').execute()
            id_file = file.get('id')
        except HttpError:
            logger.info('corrupted file')
            pass
        return id_file
예제 #22
0
    def __init__(self, path=None):
        logger.info(f"++ Registry.init")
        self.cache_schemas = {}

        if path is not None:
            self.path = path
        elif os.environ.get('PIPE_SCHEMA_REGISTRY'):
            self.path = os.environ.get('PIPE_SCHEMA_REGISTRY')
        else:
            self.path = os.path.join(HOME, '.pipeutils', 'registry')

        logger.info(f"   path: {os.environ.get('PIPE_SCHEMA_REGISTRY')}")
        logger.info(f"   path: {self.path}")
예제 #23
0
 def close(self):
     """
     Closes any connection to vertica
     """
     logger.info(' connection closed.')
     return self.connection.close()
예제 #24
0
import unittest
import os
import logging
import json

from pipeutils.avro import Registry, SchemaVersionNotFound, SchemaNotFound
from pipeutils import logger
from avro import schema

logger.setLevel(logging.DEBUG)
path = os.path.dirname(os.path.realpath(__file__))
path_configs = os.path.join(path, 'registry')
logger.info(f"path_configs: {path_configs}")

_schema = json.dumps({
    "type":
    "record",
    "name":
    "X",
    "fields": [{
        "name": "y",
        "type": {
            "type": "record",
            "name": "Y",
            "fields": [{
                "name": "Z",
                "type": "X"
            }]
        }
    }]
})
예제 #25
0
 def listfiles(self):
     items = results.get('files', [])
     if not items:
         logger.info('No files found.')
     else:
         return items
예제 #26
0
 def test_get_schema(self):
     register = Registry(path_configs)
     r_schema = register.get(name, version)
     logger.info(r_schema)
     self.assertIn('name', r_schema.to_json())
     self.assertEqual('test', r_schema.to_json()['name'])