#!/usr/bin/env python # -*- coding: utf-8 -*- import base import string import os import shutil try: import yaml except: from brewery.utils import MissingPackage yaml = MissingPackage("PyYAML", "YAML directory data source/target", "http://pyyaml.org/") class YamlDirectoryDataSource(base.DataSource): """docstring for ClassName """ def __init__(self, path, extension="yml", expand=False, filename_field=None): """Creates a YAML directory data source stream. The data source reads files from a directory and treats each file as single record. For example, following directory will contain 3 records:: data/ contract_0.yml
#!/usr/bin/env python # -*- coding: utf-8 -*- import base import brewery.metadata as metadata try: import gdata.spreadsheet.text_db except: from brewery.utils import MissingPackage gdata = MissingPackage("gdata", "Google data (spreadsheet) source/target") # Documentation: # http://gdata-python-client.googlecode.com/svn/trunk/pydocs/ class GoogleSpreadsheetDataSource(base.DataSource): """Reading data from a google spreadsheet. Some code taken from OKFN Swiss library. """ def __init__(self, spreadsheet_key=None, spreadsheet_name=None, worksheet_id=None, worksheet_name=None, query_string="", username=None, password=None): """Creates a Google Spreadsheet data source stream. :Attributes:
#!/usr/bin/env python # -*- coding: utf-8 -*- import base import datetime from brewery.metadata import FieldList try: import xlrd except: from brewery.utils import MissingPackage xlrd = MissingPackage("xlrd", "Reading MS Excel XLS Files", "http://pypi.python.org/pypi/xlrd") class XLSDataSource(base.DataSource): """Reading Microsoft Excel XLS Files Requires the xlrd package (see pypi). Based on the OKFN Swiss library. """ def __init__(self, resource, sheet=None, encoding=None, skip_rows=None, read_header=True): """Creates a XLS spreadsheet data source stream. :Attributes:
(sqlalchemy.types.Boolean, "boolean", "flag"), (sqlalchemy.types.Binary, "unknown", "typeless") ) concrete_sql_type_map = { "string": sqlalchemy.types.Unicode, "text": sqlalchemy.types.UnicodeText, "date": sqlalchemy.types.Date, "time": sqlalchemy.types.DateTime, "integer": sqlalchemy.types.Integer, "float": sqlalchemy.types.Numeric, "boolean": sqlalchemy.types.SmallInteger } except: from brewery.utils import MissingPackage sqlalchemy = MissingPackage("sqlalchemy", "SQL streams", "http://www.sqlalchemy.org/", comment = "Recommended version is > 0.7") _sql_to_brewery_types = () concrete_sql_type_map = {} def split_table_schema(table_name): """Get schema and table name from table reference. Returns: Tuple in form (schema, table) """ split = table_name.split('.') if len(split) > 1: return (split[0], split[1]) else: return (None, split[0])
#!/usr/bin/env python # -*- coding: utf-8 -*- import base import brewery.dq try: import pymongo except ImportError: from brewery.utils import MissingPackage pymongo = MissingPackage("pymongo", "MongoDB streams", "http://www.mongodb.org/downloads/") class MongoDBDataSource(base.DataSource): """docstring for ClassName """ def __init__(self, collection, database=None, host=None, port=None, expand=False, **mongo_args): """Creates a MongoDB data source stream. :Attributes: * collection: mongo collection name * database: database name * host: mongo database server host, default is ``localhost`` * port: mongo port, default is ``27017``
#!/usr/bin/env python # -*- coding: utf-8 -*- from __future__ import absolute_import from ..dq.base import FieldTypeProbe from .base import DataSource, DataTarget from ..metadata import expand_record, Field try: from pyes.es import ES from pyes.exceptions import TypeMissingException except ImportError: from brewery.utils import MissingPackage pyes = MissingPackage("pyes", "ElasticSearch streams", "http://www.elasticsearch.org/") class ESDataSource(DataSource): """ docstring for ClassName """ def __init__(self, document_type, index=None, host=None, port=None, expand=False, **elasticsearch_args): """Creates a ElasticSearch data source stream.