コード例 #1
0
ファイル: etl.py プロジェクト: mlipper/bray
def get_services(job):
    """Return the names-to-services dict Bonobo uses for runtime injection."""
    return {
        FS_IN_SERVICE_ID: bonobo.open_fs(job.data_dir),
        FS_OUT_SERVICE_ID: bonobo.open_fs(job.data_dir),
        "search": job.search,
    }
コード例 #2
0
    def setup_services(self):
        services = {
            'trace_counter': itertools.count(),
            f'fs.data.{self.project_name}': bonobo.open_fs(self.input_path)
        }

        common_path = pathlib.Path(settings.pipeline_common_service_files_path)
        print(f'Common path: {common_path}', file=sys.stderr)
        for file in common_path.rglob('*'):
            service = self._service_from_path(file)
            if service:
                services[file.stem] = service

        proj_path = pathlib.Path(
            settings.pipeline_project_service_files_path(self.project_name))
        print(f'Project path: {proj_path}', file=sys.stderr)
        for file in proj_path.rglob('*'):
            service = self._service_from_path(file)
            if service:
                if file.stem in services:
                    warnings.warn(
                        f'*** Project is overloading a shared service file: {file}'
                    )
                services[file.stem] = service

        return services
コード例 #3
0
 def test_bonodoo_reader_fields(self):
     folder = tempfile.TemporaryDirectory()
     filename = 'test_file.csv'
     value_1 = {'id': 2}
     value_2 = {'id': 3}
     read = OdooReader(
         model='res.users',
         domain=[],
         fields=['id'],
     )
     with patch('xmlrpc.client.ServerProxy') as mk:
         mock_server = mk.return_value
         mock_server.login.return_value = 1
         mock_server.execute_kw.return_value = [value_1, value_2]
         graph = Graph()
         graph.add_chain(read, CsvWriter(filename, fs='fs.data'))
         bonobo.run(graph,
                    services={
                        'fs.data': bonobo.open_fs(folder.name),
                        'odoo.server': self.server,
                    })
         mk.assert_called()
     with open(os.path.join(folder.name, filename), 'r') as f:
         lines = f.readlines()
         self.assertEqual(len(lines), 3)
         self.assertEqual(ast.literal_eval(lines[1]), value_1.get('id'))
         self.assertEqual(ast.literal_eval(lines[2]), value_2.get('id'))
     folder.cleanup()
コード例 #4
0
def get_services(**options):
    """
    This function builds the services dictionary, which is a simple dict of names-to-implementation used by bonobo
    for runtime injection.

    It will be used on top of the defaults provided by bonobo (fs, http, ...). You can override those defaults, or just
    let the framework define them. You can also define your own services and naming is up to you.

    :return: dict
    """

    services = {
        'mysql':
        create_engine('mysql+mysqldb://localhost/aws', echo=False),
        's3':
        bonobo.open_fs('s3://mozilla-programmatic-billing'),
        'redshift':
        create_engine(
            'redshift+psycopg2://etl_edw@mozit-dw-dev.czbv3z9khmhv.us-west-2.redshift.amazonaws.com/edw-dev-v1',
            echo=False),
        'vertica':
        create_engine(options['vertica_dsn'].format(
            host=options['vertica_host'],
            username=options['vertica_username'],
            password=options['vertica_password']),
                      echo=False)
    }

    services['database'] = services[options['database']]

    return services
コード例 #5
0
ファイル: test_csv.py プロジェクト: schevalier/bonobo
def test_read_csv_from_file(tmpdir):
    fs, filename = open_fs(tmpdir), 'input.csv'
    fs.open(filename, 'w').write('a,b,c\na foo,b foo,c foo\na bar,b bar,c bar')

    reader = CsvReader(path=filename, delimiter=',')

    context = CapturingNodeExecutionContext(reader, services={'fs': fs})

    context.start()
    context.write(BEGIN, Bag(), END)
    context.step()
    context.stop()

    assert len(context.send.mock_calls) == 2

    args0, kwargs0 = context.send.call_args_list[0]
    assert len(args0) == 1 and not len(kwargs0)
    args1, kwargs1 = context.send.call_args_list[1]
    assert len(args1) == 1 and not len(kwargs1)

    assert args0[0].args[0] == {
        'a': 'a foo',
        'b': 'b foo',
        'c': 'c foo',
    }
    assert args1[0].args[0] == {
        'a': 'a bar',
        'b': 'b bar',
        'c': 'c bar',
    }
コード例 #6
0
 def __init__(self, root):
     self._root = Path(root)
     self._fs = bonobo.open_fs(self._root, create=True)
     self._users_file_name = "users.json"
     self._channels_file_name = "channels.json"
     self._enriched_messages_file_name = "enriched-messages.json"
     self._message_count_file = "message-count.json"
     self._status_file_name = "status.json"
     self._org_messages_file_name = "org-messages.json"
コード例 #7
0
def test_file_writer_out_of_context(tmpdir):
    fs, filename = open_fs(tmpdir), 'output.txt'

    writer = FileWriter(path=filename)

    with writer.open(fs) as fp:
        fp.write('Yosh!')

    assert fs.open(filename).read() == 'Yosh!'
コード例 #8
0
def test_file_writer_in_context(tmpdir, lines, output):
    fs, filename = open_fs(tmpdir), 'output.txt'

    writer = FileWriter(path=filename)
    context = NodeExecutionContext(writer, services={'fs': fs})

    context.start()
    context.recv(BEGIN, *map(Bag, lines), END)
    for _ in range(len(lines)):
        context.step()
    context.stop()

    assert fs.open(filename).read() == output
コード例 #9
0
def execute(input,
            output,
            reader=None,
            reader_options=None,
            writer=None,
            writer_options=None,
            options=None):
    reader = resolve_factory(reader, input, READER)(input)
    writer = resolve_factory(writer, output, WRITER)(output)

    graph = bonobo.Graph()
    graph.add_chain(reader, writer)

    return bonobo.run(graph, services={
        'fs': bonobo.open_fs(),
    })
コード例 #10
0
ファイル: test_csv.py プロジェクト: schevalier/bonobo
def test_write_csv_to_file(tmpdir):
    fs, filename = open_fs(tmpdir), 'output.csv'

    writer = CsvWriter(path=filename)
    context = NodeExecutionContext(writer, services={'fs': fs})

    context.write(BEGIN, Bag({'foo': 'bar'}), Bag({'foo': 'baz', 'ignore': 'this'}), END)

    context.start()
    context.step()
    context.step()
    context.stop()

    assert fs.open(filename).read() == 'foo\nbar\nbaz\n'

    with pytest.raises(AttributeError):
        getattr(context, 'file')
コード例 #11
0
def test_write_json_to_file(tmpdir):
    fs, filename = open_fs(tmpdir), 'output.json'

    writer = JsonWriter(path=filename)
    context = NodeExecutionContext(writer, services={'fs': fs})

    context.start()
    context.recv(BEGIN, Bag({'foo': 'bar'}), END)
    context.step()
    context.stop()

    assert fs.open(filename).read() == '[{"foo": "bar"}]'

    with pytest.raises(AttributeError):
        getattr(context, 'file')

    with pytest.raises(AttributeError):
        getattr(context, 'first')
コード例 #12
0
    def handle(
        self,
        input_filename,
        output_filename,
        reader=None,
        reader_option=None,
        writer=None,
        writer_option=None,
        option=None,
        limit=None,
        transformation=None,
    ):
        reader_factory = default_registry.get_reader_factory_for(
            input_filename, format=reader)
        reader_kwargs = _resolve_options((option or []) +
                                         (reader_option or []))

        if output_filename == '-':
            writer_factory = bonobo.PrettyPrinter
            writer_args = ()
        else:
            writer_factory = default_registry.get_writer_factory_for(
                output_filename, format=writer)
            writer_args = (output_filename, )
        writer_kwargs = _resolve_options((option or []) +
                                         (writer_option or []))

        transformations = ()

        if limit:
            transformations += (bonobo.Limit(limit), )

        transformations += _resolve_transformations(transformation)

        graph = bonobo.Graph()
        graph.add_chain(
            reader_factory(input_filename, **reader_kwargs),
            *transformations,
            writer_factory(*writer_args, **writer_kwargs),
        )

        return bonobo.run(graph, services={
            'fs': bonobo.open_fs(),
        })
コード例 #13
0
ファイル: convert.py プロジェクト: a-musing-moose/bonobo
    def handle(
            self,
            input_filename,
            output_filename,
            reader=None,
            reader_option=None,
            writer=None,
            writer_option=None,
            option=None,
            limit=None,
            transformation=None,
    ):
        reader_factory = default_registry.get_reader_factory_for(input_filename, format=reader)
        reader_kwargs = _resolve_options((option or []) + (reader_option or []))

        if output_filename == '-':
            writer_factory = bonobo.PrettyPrinter
            writer_args = ()
        else:
            writer_factory = default_registry.get_writer_factory_for(output_filename, format=writer)
            writer_args = (output_filename, )
        writer_kwargs = _resolve_options((option or []) + (writer_option or []))

        transformations = ()

        if limit:
            transformations += (bonobo.Limit(limit), )

        transformations += _resolve_transformations(transformation)

        graph = bonobo.Graph()
        graph.add_chain(
            reader_factory(input_filename, **reader_kwargs),
            *transformations,
            writer_factory(*writer_args, **writer_kwargs),
        )

        return bonobo.run(
            graph, services={
                'fs': bonobo.open_fs(),
            }
        )
コード例 #14
0
	def __init__(self, input_path, contents, **kwargs):
		project_name = 'people'
		self.input_path = input_path
		self.services = None

		helper = PeopleUtilityHelper(project_name)

		super().__init__(project_name, helper=helper)

		self.graph = None
		self.models = kwargs.get('models', settings.arches_models)
		self.contents_header_file = contents['header_file']
		self.contents_files_pattern = contents['files_pattern']
		self.limit = kwargs.get('limit')
		self.debug = kwargs.get('debug', False)

		fs = bonobo.open_fs(input_path)
		with fs.open(self.contents_header_file, newline='') as csvfile:
			r = csv.reader(csvfile)
			self.contents_headers = [v.lower() for v in next(r)]
コード例 #15
0
    def setup_services(self):
        services = {
            'trace_counter': itertools.count(),
            f'fs.data.{self.project_name}': bonobo.open_fs(self.input_path)
        }

        common_path = pathlib.Path(settings.pipeline_common_service_files_path)
        if self.verbose:
            print(f'Common path: {common_path}', file=sys.stderr)
        for file in common_path.rglob('*'):
            service = self._service_from_path(file)
            if service:
                services[file.stem] = service

        proj_path = pathlib.Path(
            settings.pipeline_project_service_files_path(self.project_name))
        if self.verbose:
            print(f'Project path: {proj_path}', file=sys.stderr)
        for file in proj_path.rglob('*'):
            service = self._service_from_path(file)
            if service:
                if file.stem in services:
                    warnings.warn(
                        f'*** Project is overloading a shared service file: {file}'
                    )
                services[file.stem] = service

        # re-arrange the materials map service data to use a tuple as the dictionary key
        mm = {}
        for v in services.get('materials_map', []):
            otype = v['object_type']
            m = v['materials']
            if ';' in m:
                m = frozenset([m.strip() for m in m.split(';')])
            else:
                m = frozenset([m])
            key = (otype, m)
            mm[key] = v
        services['materials_map'] = mm

        return services
コード例 #16
0
def test_read_json_from_file(tmpdir):
    fs, filename = open_fs(tmpdir), 'input.json'
    fs.open(filename, 'w').write('[{"x": "foo"},{"x": "bar"}]')
    reader = JsonReader(path=filename)

    context = CapturingNodeExecutionContext(reader, services={'fs': fs})

    context.start()
    context.recv(BEGIN, Bag(), END)
    context.step()
    context.stop()

    assert len(context.send.mock_calls) == 2

    args0, kwargs0 = context.send.call_args_list[0]
    assert len(args0) == 1 and not len(kwargs0)
    args1, kwargs1 = context.send.call_args_list[1]
    assert len(args1) == 1 and not len(kwargs1)

    assert args0[0].args[0] == {'x': 'foo'}
    assert args1[0].args[0] == {'x': 'bar'}
コード例 #17
0
def test_file_reader_in_context(tmpdir):
    fs, filename = open_fs(tmpdir), 'input.txt'

    fs.open(filename, 'w').write('Hello\nWorld\n')

    reader = FileReader(path=filename)
    context = CapturingNodeExecutionContext(reader, services={'fs': fs})

    context.start()
    context.recv(BEGIN, Bag(), END)
    context.step()
    context.stop()

    assert len(context.send.mock_calls) == 2

    args0, kwargs0 = context.send.call_args_list[0]
    assert len(args0) == 1 and not len(kwargs0)
    args1, kwargs1 = context.send.call_args_list[1]
    assert len(args1) == 1 and not len(kwargs1)

    assert args0[0].args[0] == 'Hello'
    assert args1[0].args[0] == 'World'
コード例 #18
0
 def test_bonodoo_function_single(self):
     folder = tempfile.TemporaryDirectory()
     filename = 'test_file.csv'
     read = OdooModelFunction(model='res.users', function='test_function')
     value_1 = {'id': 2}
     with patch('xmlrpc.client.ServerProxy') as mk:
         mock_server = mk.return_value
         mock_server.login.return_value = 1
         mock_server.execute_kw.return_value = value_1
         graph = Graph()
         graph.add_chain(read, CsvWriter(filename, fs='fs.data'))
         bonobo.run(graph,
                    services={
                        'fs.data': bonobo.open_fs(folder.name),
                        'odoo.server': self.server,
                    })
         mk.assert_called()
     with open(os.path.join(folder.name, filename), 'r') as f:
         lines = f.readlines()
         self.assertEqual(len(lines), 1)
         self.assertEqual(ast.literal_eval(lines[0]), value_1)
     folder.cleanup()
コード例 #19
0
ファイル: services.py プロジェクト: a-musing-moose/bonobo
def create_container(services=None, factory=Container):
    """
    Create a container with reasonable default service implementations for commonly use, standard-named, services.

    Services:
    - `fs` defaults to a fs2 instance based on current working directory
    - `http`defaults to requests

    :param services:
    :return:
    """
    container = factory(services) if services else factory()

    if not 'fs' in container:
        import bonobo
        container.setdefault('fs', bonobo.open_fs())

    if not 'http' in container:
        import requests
        container.setdefault('http', requests)

    return container
コード例 #20
0
ファイル: services.py プロジェクト: mouadhkaabachi/bonobo
def create_container(services=None, factory=Container):
    """
    Create a container with reasonable default service implementations for commonly use, standard-named, services.

    Services:
    - `fs` defaults to a fs2 instance based on current working directory
    - `http`defaults to requests

    :param services:
    :return:
    """
    container = factory(services) if services else factory()

    if not 'fs' in container:
        import bonobo
        container.setdefault('fs', bonobo.open_fs())

    if not 'http' in container:
        import requests
        container.setdefault('http', requests)

    return container
コード例 #21
0
ファイル: simple.py プロジェクト: Sjoerd82/bonobo_trans
def get_services():
    return {
        'sqlalchemy.engine':
        sqlalchemy.create_engine('sqlite:///paintstore.db'),
        'fs.output': bonobo.open_fs()
    }
コード例 #22
0
ファイル: testing.py プロジェクト: zhenyu-captain/bonobo
 def get_services_for_writer(self, tmpdir):
     fs, filename = open_fs(tmpdir), 'output.' + self.extension
     return fs, filename, {'fs': fs}
コード例 #23
0
ファイル: services.py プロジェクト: zkan/bonobo
def get_services():
    return {**examples.get_services(), "fs.output": open_fs()}
コード例 #24
0
def get_services():
    return {
        "fs": bonobo.open_fs(get_datasets_dir("datasets")),
        "fs.static": bonobo.open_examples_fs("datasets", "static"),
    }
コード例 #25
0
ファイル: crawler.py プロジェクト: tssujt/bonobo-tutorial
def get_services(**options):
    http = requests.Session()
    http.headers = {'User-Agent': 'Monkeys!'}
    return {'http': http, 'fs': bonobo.open_fs()}
コード例 #26
0
ファイル: services.py プロジェクト: zhenyu-captain/bonobo
def get_services():
    return {'fs': bonobo.open_fs(get_datasets_dir('datasets'))}
コード例 #27
0
    def __init__(self, input_path, contents, **kwargs):
        project_name = 'people'
        self.input_path = input_path
        self.services = None

        vocab.register_instance('address', {
            'parent': model.Type,
            'id': '300386983',
            'label': 'Street Address'
        })
        vocab.register_instance('location', {
            'parent': model.Type,
            'id': '300393211',
            'label': 'Location'
        })
        vocab.register_instance('occupation', {
            'parent': model.Type,
            'id': '300263369',
            'label': 'Occupation'
        })

        vocab.register_vocab_class(
            'Residing', {
                "parent": model.Activity,
                "id": "300393179",
                "label": "Residing",
                "metatype": "location"
            })
        vocab.register_vocab_class(
            'Establishment', {
                "parent": model.Activity,
                "id": "300393212",
                "label": "Establishment",
                "metatype": "location"
            })
        vocab.register_vocab_class(
            'StreetAddress', {
                "parent": model.Identifier,
                "id": "300386983",
                "label": "Street Address"
            })

        vocab.register_vocab_class(
            "CreatingOccupation", {
                "parent": model.Activity,
                "id": "300404387",
                "label": "Creating Artwork",
                "metatype": "occupation"
            })
        vocab.register_vocab_class(
            "CollectingOccupation", {
                "parent": model.Activity,
                "id": "300077121",
                "label": "Collecting",
                "metatype": "occupation"
            })
        vocab.register_vocab_class(
            "DealingOccupation", {
                "parent": model.Activity,
                "id": "300055675",
                "label": "Commercial Dealing in Artwork",
                "metatype": "occupation"
            })
        vocab.register_vocab_class(
            "OwningOccupation", {
                "parent": model.Activity,
                "id": "300055603",
                "label": "Owning",
                "metatype": "occupation"
            })

        vocab.register_instance('form type', {
            'parent': model.Type,
            'id': '300444970',
            'label': 'Form'
        })
        vocab.register_vocab_class(
            'EntryTextForm', {
                "parent": model.LinguisticObject,
                "id": "300438434",
                "label": "Entry",
                "metatype": "form type"
            })

        helper = PeopleUtilityHelper(project_name)

        super().__init__(project_name, helper=helper)

        self.graph = None
        self.models = kwargs.get('models', settings.arches_models)
        self.contents_header_file = contents['header_file']
        self.contents_files_pattern = contents['files_pattern']
        self.limit = kwargs.get('limit')
        self.debug = kwargs.get('debug', False)

        fs = bonobo.open_fs(input_path)
        with fs.open(self.contents_header_file, newline='') as csvfile:
            r = csv.reader(csvfile)
            self.contents_headers = [v.lower() for v in next(r)]
コード例 #28
0
ファイル: testing.py プロジェクト: a-musing-moose/bonobo
 def get_services_for_reader(self, tmpdir):
     fs, filename = open_fs(tmpdir), 'input.' + self.extension
     with fs.open(filename, self.mode) as fp:
         fp.write(self.input_data)
     return fs, filename, {'fs': fs}
コード例 #29
0
 def get_services_for_reader(self, tmpdir):
     fs, filename = open_fs(tmpdir), "input." + self.extension
     with fs.open(filename, self.mode) as fp:
         fp.write(self.input_data)
     return fs, filename, {"fs": fs}
コード例 #30
0
 def get_services_for_writer(self, tmpdir):
     fs, filename = open_fs(tmpdir), "output." + self.extension
     return fs, filename, {"fs": fs}
コード例 #31
0
ファイル: services.py プロジェクト: a-musing-moose/bonobo
def get_services():
    return {'fs': bonobo.open_fs(get_datasets_dir('datasets'))}
コード例 #32
0
ファイル: _services.py プロジェクト: a-musing-moose/bonobo
def get_services():
    return {
        'fs': open_fs(get_examples_path()),
        'fs.output': open_fs(),
    }
コード例 #33
0
def get_services():
    return {
        'fs': bonobo.open_examples_fs('datasets'),
        'fs.output': bonobo.open_fs(),
    }
コード例 #34
0
ファイル: testing.py プロジェクト: a-musing-moose/bonobo
 def get_services_for_writer(self, tmpdir):
     fs, filename = open_fs(tmpdir), 'output.' + self.extension
     return fs, filename, {'fs': fs}
コード例 #35
0
ファイル: tut02e03_writeasmap.py プロジェクト: tate11/bonobo
def get_services():
    return {
        'fs': bonobo.open_examples_fs('datasets'),
        'fs.output': bonobo.open_fs(),
    }
コード例 #36
0
ファイル: _services.py プロジェクト: zhenyu-captain/bonobo
def get_services():
    return {'fs': open_fs(get_examples_path()), 'fs.output': open_fs()}
コード例 #37
0
def get_services():
    return {'fs': bonobo.open_fs(dirname(__file__))}