Exemple #1
0
def get_services(job):
    """Return the names-to-services dict Bonobo uses for runtime injection."""
    return {
        FS_IN_SERVICE_ID: bonobo.open_fs(job.data_dir),
        FS_OUT_SERVICE_ID: bonobo.open_fs(job.data_dir),
        "search": job.search,
    }
Exemple #2
0
    def setup_services(self):
        services = {
            'trace_counter': itertools.count(),
            f'fs.data.{self.project_name}': bonobo.open_fs(self.input_path)
        }

        common_path = pathlib.Path(settings.pipeline_common_service_files_path)
        print(f'Common path: {common_path}', file=sys.stderr)
        for file in common_path.rglob('*'):
            service = self._service_from_path(file)
            if service:
                services[file.stem] = service

        proj_path = pathlib.Path(
            settings.pipeline_project_service_files_path(self.project_name))
        print(f'Project path: {proj_path}', file=sys.stderr)
        for file in proj_path.rglob('*'):
            service = self._service_from_path(file)
            if service:
                if file.stem in services:
                    warnings.warn(
                        f'*** Project is overloading a shared service file: {file}'
                    )
                services[file.stem] = service

        return services
Exemple #3
0
 def test_bonodoo_reader_fields(self):
     folder = tempfile.TemporaryDirectory()
     filename = 'test_file.csv'
     value_1 = {'id': 2}
     value_2 = {'id': 3}
     read = OdooReader(
         model='res.users',
         domain=[],
         fields=['id'],
     )
     with patch('xmlrpc.client.ServerProxy') as mk:
         mock_server = mk.return_value
         mock_server.login.return_value = 1
         mock_server.execute_kw.return_value = [value_1, value_2]
         graph = Graph()
         graph.add_chain(read, CsvWriter(filename, fs='fs.data'))
         bonobo.run(graph,
                    services={
                        'fs.data': bonobo.open_fs(folder.name),
                        'odoo.server': self.server,
                    })
         mk.assert_called()
     with open(os.path.join(folder.name, filename), 'r') as f:
         lines = f.readlines()
         self.assertEqual(len(lines), 3)
         self.assertEqual(ast.literal_eval(lines[1]), value_1.get('id'))
         self.assertEqual(ast.literal_eval(lines[2]), value_2.get('id'))
     folder.cleanup()
Exemple #4
0
def get_services(**options):
    """
    This function builds the services dictionary, which is a simple dict of names-to-implementation used by bonobo
    for runtime injection.

    It will be used on top of the defaults provided by bonobo (fs, http, ...). You can override those defaults, or just
    let the framework define them. You can also define your own services and naming is up to you.

    :return: dict
    """

    services = {
        'mysql':
        create_engine('mysql+mysqldb://localhost/aws', echo=False),
        's3':
        bonobo.open_fs('s3://mozilla-programmatic-billing'),
        'redshift':
        create_engine(
            'redshift+psycopg2://etl_edw@mozit-dw-dev.czbv3z9khmhv.us-west-2.redshift.amazonaws.com/edw-dev-v1',
            echo=False),
        'vertica':
        create_engine(options['vertica_dsn'].format(
            host=options['vertica_host'],
            username=options['vertica_username'],
            password=options['vertica_password']),
                      echo=False)
    }

    services['database'] = services[options['database']]

    return services
Exemple #5
0
def test_read_csv_from_file(tmpdir):
    fs, filename = open_fs(tmpdir), 'input.csv'
    fs.open(filename, 'w').write('a,b,c\na foo,b foo,c foo\na bar,b bar,c bar')

    reader = CsvReader(path=filename, delimiter=',')

    context = CapturingNodeExecutionContext(reader, services={'fs': fs})

    context.start()
    context.write(BEGIN, Bag(), END)
    context.step()
    context.stop()

    assert len(context.send.mock_calls) == 2

    args0, kwargs0 = context.send.call_args_list[0]
    assert len(args0) == 1 and not len(kwargs0)
    args1, kwargs1 = context.send.call_args_list[1]
    assert len(args1) == 1 and not len(kwargs1)

    assert args0[0].args[0] == {
        'a': 'a foo',
        'b': 'b foo',
        'c': 'c foo',
    }
    assert args1[0].args[0] == {
        'a': 'a bar',
        'b': 'b bar',
        'c': 'c bar',
    }
Exemple #6
0
 def __init__(self, root):
     self._root = Path(root)
     self._fs = bonobo.open_fs(self._root, create=True)
     self._users_file_name = "users.json"
     self._channels_file_name = "channels.json"
     self._enriched_messages_file_name = "enriched-messages.json"
     self._message_count_file = "message-count.json"
     self._status_file_name = "status.json"
     self._org_messages_file_name = "org-messages.json"
Exemple #7
0
def test_file_writer_out_of_context(tmpdir):
    fs, filename = open_fs(tmpdir), 'output.txt'

    writer = FileWriter(path=filename)

    with writer.open(fs) as fp:
        fp.write('Yosh!')

    assert fs.open(filename).read() == 'Yosh!'
Exemple #8
0
def test_file_writer_in_context(tmpdir, lines, output):
    fs, filename = open_fs(tmpdir), 'output.txt'

    writer = FileWriter(path=filename)
    context = NodeExecutionContext(writer, services={'fs': fs})

    context.start()
    context.recv(BEGIN, *map(Bag, lines), END)
    for _ in range(len(lines)):
        context.step()
    context.stop()

    assert fs.open(filename).read() == output
Exemple #9
0
def execute(input,
            output,
            reader=None,
            reader_options=None,
            writer=None,
            writer_options=None,
            options=None):
    reader = resolve_factory(reader, input, READER)(input)
    writer = resolve_factory(writer, output, WRITER)(output)

    graph = bonobo.Graph()
    graph.add_chain(reader, writer)

    return bonobo.run(graph, services={
        'fs': bonobo.open_fs(),
    })
Exemple #10
0
def test_write_csv_to_file(tmpdir):
    fs, filename = open_fs(tmpdir), 'output.csv'

    writer = CsvWriter(path=filename)
    context = NodeExecutionContext(writer, services={'fs': fs})

    context.write(BEGIN, Bag({'foo': 'bar'}), Bag({'foo': 'baz', 'ignore': 'this'}), END)

    context.start()
    context.step()
    context.step()
    context.stop()

    assert fs.open(filename).read() == 'foo\nbar\nbaz\n'

    with pytest.raises(AttributeError):
        getattr(context, 'file')
Exemple #11
0
def test_write_json_to_file(tmpdir):
    fs, filename = open_fs(tmpdir), 'output.json'

    writer = JsonWriter(path=filename)
    context = NodeExecutionContext(writer, services={'fs': fs})

    context.start()
    context.recv(BEGIN, Bag({'foo': 'bar'}), END)
    context.step()
    context.stop()

    assert fs.open(filename).read() == '[{"foo": "bar"}]'

    with pytest.raises(AttributeError):
        getattr(context, 'file')

    with pytest.raises(AttributeError):
        getattr(context, 'first')
Exemple #12
0
    def handle(
        self,
        input_filename,
        output_filename,
        reader=None,
        reader_option=None,
        writer=None,
        writer_option=None,
        option=None,
        limit=None,
        transformation=None,
    ):
        reader_factory = default_registry.get_reader_factory_for(
            input_filename, format=reader)
        reader_kwargs = _resolve_options((option or []) +
                                         (reader_option or []))

        if output_filename == '-':
            writer_factory = bonobo.PrettyPrinter
            writer_args = ()
        else:
            writer_factory = default_registry.get_writer_factory_for(
                output_filename, format=writer)
            writer_args = (output_filename, )
        writer_kwargs = _resolve_options((option or []) +
                                         (writer_option or []))

        transformations = ()

        if limit:
            transformations += (bonobo.Limit(limit), )

        transformations += _resolve_transformations(transformation)

        graph = bonobo.Graph()
        graph.add_chain(
            reader_factory(input_filename, **reader_kwargs),
            *transformations,
            writer_factory(*writer_args, **writer_kwargs),
        )

        return bonobo.run(graph, services={
            'fs': bonobo.open_fs(),
        })
Exemple #13
0
    def handle(
            self,
            input_filename,
            output_filename,
            reader=None,
            reader_option=None,
            writer=None,
            writer_option=None,
            option=None,
            limit=None,
            transformation=None,
    ):
        reader_factory = default_registry.get_reader_factory_for(input_filename, format=reader)
        reader_kwargs = _resolve_options((option or []) + (reader_option or []))

        if output_filename == '-':
            writer_factory = bonobo.PrettyPrinter
            writer_args = ()
        else:
            writer_factory = default_registry.get_writer_factory_for(output_filename, format=writer)
            writer_args = (output_filename, )
        writer_kwargs = _resolve_options((option or []) + (writer_option or []))

        transformations = ()

        if limit:
            transformations += (bonobo.Limit(limit), )

        transformations += _resolve_transformations(transformation)

        graph = bonobo.Graph()
        graph.add_chain(
            reader_factory(input_filename, **reader_kwargs),
            *transformations,
            writer_factory(*writer_args, **writer_kwargs),
        )

        return bonobo.run(
            graph, services={
                'fs': bonobo.open_fs(),
            }
        )
Exemple #14
0
	def __init__(self, input_path, contents, **kwargs):
		project_name = 'people'
		self.input_path = input_path
		self.services = None

		helper = PeopleUtilityHelper(project_name)

		super().__init__(project_name, helper=helper)

		self.graph = None
		self.models = kwargs.get('models', settings.arches_models)
		self.contents_header_file = contents['header_file']
		self.contents_files_pattern = contents['files_pattern']
		self.limit = kwargs.get('limit')
		self.debug = kwargs.get('debug', False)

		fs = bonobo.open_fs(input_path)
		with fs.open(self.contents_header_file, newline='') as csvfile:
			r = csv.reader(csvfile)
			self.contents_headers = [v.lower() for v in next(r)]
Exemple #15
0
    def setup_services(self):
        services = {
            'trace_counter': itertools.count(),
            f'fs.data.{self.project_name}': bonobo.open_fs(self.input_path)
        }

        common_path = pathlib.Path(settings.pipeline_common_service_files_path)
        if self.verbose:
            print(f'Common path: {common_path}', file=sys.stderr)
        for file in common_path.rglob('*'):
            service = self._service_from_path(file)
            if service:
                services[file.stem] = service

        proj_path = pathlib.Path(
            settings.pipeline_project_service_files_path(self.project_name))
        if self.verbose:
            print(f'Project path: {proj_path}', file=sys.stderr)
        for file in proj_path.rglob('*'):
            service = self._service_from_path(file)
            if service:
                if file.stem in services:
                    warnings.warn(
                        f'*** Project is overloading a shared service file: {file}'
                    )
                services[file.stem] = service

        # re-arrange the materials map service data to use a tuple as the dictionary key
        mm = {}
        for v in services.get('materials_map', []):
            otype = v['object_type']
            m = v['materials']
            if ';' in m:
                m = frozenset([m.strip() for m in m.split(';')])
            else:
                m = frozenset([m])
            key = (otype, m)
            mm[key] = v
        services['materials_map'] = mm

        return services
Exemple #16
0
def test_read_json_from_file(tmpdir):
    fs, filename = open_fs(tmpdir), 'input.json'
    fs.open(filename, 'w').write('[{"x": "foo"},{"x": "bar"}]')
    reader = JsonReader(path=filename)

    context = CapturingNodeExecutionContext(reader, services={'fs': fs})

    context.start()
    context.recv(BEGIN, Bag(), END)
    context.step()
    context.stop()

    assert len(context.send.mock_calls) == 2

    args0, kwargs0 = context.send.call_args_list[0]
    assert len(args0) == 1 and not len(kwargs0)
    args1, kwargs1 = context.send.call_args_list[1]
    assert len(args1) == 1 and not len(kwargs1)

    assert args0[0].args[0] == {'x': 'foo'}
    assert args1[0].args[0] == {'x': 'bar'}
Exemple #17
0
def test_file_reader_in_context(tmpdir):
    fs, filename = open_fs(tmpdir), 'input.txt'

    fs.open(filename, 'w').write('Hello\nWorld\n')

    reader = FileReader(path=filename)
    context = CapturingNodeExecutionContext(reader, services={'fs': fs})

    context.start()
    context.recv(BEGIN, Bag(), END)
    context.step()
    context.stop()

    assert len(context.send.mock_calls) == 2

    args0, kwargs0 = context.send.call_args_list[0]
    assert len(args0) == 1 and not len(kwargs0)
    args1, kwargs1 = context.send.call_args_list[1]
    assert len(args1) == 1 and not len(kwargs1)

    assert args0[0].args[0] == 'Hello'
    assert args1[0].args[0] == 'World'
Exemple #18
0
 def test_bonodoo_function_single(self):
     folder = tempfile.TemporaryDirectory()
     filename = 'test_file.csv'
     read = OdooModelFunction(model='res.users', function='test_function')
     value_1 = {'id': 2}
     with patch('xmlrpc.client.ServerProxy') as mk:
         mock_server = mk.return_value
         mock_server.login.return_value = 1
         mock_server.execute_kw.return_value = value_1
         graph = Graph()
         graph.add_chain(read, CsvWriter(filename, fs='fs.data'))
         bonobo.run(graph,
                    services={
                        'fs.data': bonobo.open_fs(folder.name),
                        'odoo.server': self.server,
                    })
         mk.assert_called()
     with open(os.path.join(folder.name, filename), 'r') as f:
         lines = f.readlines()
         self.assertEqual(len(lines), 1)
         self.assertEqual(ast.literal_eval(lines[0]), value_1)
     folder.cleanup()
Exemple #19
0
def create_container(services=None, factory=Container):
    """
    Create a container with reasonable default service implementations for commonly use, standard-named, services.

    Services:
    - `fs` defaults to a fs2 instance based on current working directory
    - `http`defaults to requests

    :param services:
    :return:
    """
    container = factory(services) if services else factory()

    if not 'fs' in container:
        import bonobo
        container.setdefault('fs', bonobo.open_fs())

    if not 'http' in container:
        import requests
        container.setdefault('http', requests)

    return container
Exemple #20
0
def create_container(services=None, factory=Container):
    """
    Create a container with reasonable default service implementations for commonly use, standard-named, services.

    Services:
    - `fs` defaults to a fs2 instance based on current working directory
    - `http`defaults to requests

    :param services:
    :return:
    """
    container = factory(services) if services else factory()

    if not 'fs' in container:
        import bonobo
        container.setdefault('fs', bonobo.open_fs())

    if not 'http' in container:
        import requests
        container.setdefault('http', requests)

    return container
Exemple #21
0
def get_services():
    return {
        'sqlalchemy.engine':
        sqlalchemy.create_engine('sqlite:///paintstore.db'),
        'fs.output': bonobo.open_fs()
    }
Exemple #22
0
 def get_services_for_writer(self, tmpdir):
     fs, filename = open_fs(tmpdir), 'output.' + self.extension
     return fs, filename, {'fs': fs}
Exemple #23
0
def get_services():
    return {**examples.get_services(), "fs.output": open_fs()}
Exemple #24
0
def get_services():
    return {
        "fs": bonobo.open_fs(get_datasets_dir("datasets")),
        "fs.static": bonobo.open_examples_fs("datasets", "static"),
    }
Exemple #25
0
def get_services(**options):
    http = requests.Session()
    http.headers = {'User-Agent': 'Monkeys!'}
    return {'http': http, 'fs': bonobo.open_fs()}
Exemple #26
0
def get_services():
    return {'fs': bonobo.open_fs(get_datasets_dir('datasets'))}
Exemple #27
0
    def __init__(self, input_path, contents, **kwargs):
        project_name = 'people'
        self.input_path = input_path
        self.services = None

        vocab.register_instance('address', {
            'parent': model.Type,
            'id': '300386983',
            'label': 'Street Address'
        })
        vocab.register_instance('location', {
            'parent': model.Type,
            'id': '300393211',
            'label': 'Location'
        })
        vocab.register_instance('occupation', {
            'parent': model.Type,
            'id': '300263369',
            'label': 'Occupation'
        })

        vocab.register_vocab_class(
            'Residing', {
                "parent": model.Activity,
                "id": "300393179",
                "label": "Residing",
                "metatype": "location"
            })
        vocab.register_vocab_class(
            'Establishment', {
                "parent": model.Activity,
                "id": "300393212",
                "label": "Establishment",
                "metatype": "location"
            })
        vocab.register_vocab_class(
            'StreetAddress', {
                "parent": model.Identifier,
                "id": "300386983",
                "label": "Street Address"
            })

        vocab.register_vocab_class(
            "CreatingOccupation", {
                "parent": model.Activity,
                "id": "300404387",
                "label": "Creating Artwork",
                "metatype": "occupation"
            })
        vocab.register_vocab_class(
            "CollectingOccupation", {
                "parent": model.Activity,
                "id": "300077121",
                "label": "Collecting",
                "metatype": "occupation"
            })
        vocab.register_vocab_class(
            "DealingOccupation", {
                "parent": model.Activity,
                "id": "300055675",
                "label": "Commercial Dealing in Artwork",
                "metatype": "occupation"
            })
        vocab.register_vocab_class(
            "OwningOccupation", {
                "parent": model.Activity,
                "id": "300055603",
                "label": "Owning",
                "metatype": "occupation"
            })

        vocab.register_instance('form type', {
            'parent': model.Type,
            'id': '300444970',
            'label': 'Form'
        })
        vocab.register_vocab_class(
            'EntryTextForm', {
                "parent": model.LinguisticObject,
                "id": "300438434",
                "label": "Entry",
                "metatype": "form type"
            })

        helper = PeopleUtilityHelper(project_name)

        super().__init__(project_name, helper=helper)

        self.graph = None
        self.models = kwargs.get('models', settings.arches_models)
        self.contents_header_file = contents['header_file']
        self.contents_files_pattern = contents['files_pattern']
        self.limit = kwargs.get('limit')
        self.debug = kwargs.get('debug', False)

        fs = bonobo.open_fs(input_path)
        with fs.open(self.contents_header_file, newline='') as csvfile:
            r = csv.reader(csvfile)
            self.contents_headers = [v.lower() for v in next(r)]
Exemple #28
0
 def get_services_for_reader(self, tmpdir):
     fs, filename = open_fs(tmpdir), 'input.' + self.extension
     with fs.open(filename, self.mode) as fp:
         fp.write(self.input_data)
     return fs, filename, {'fs': fs}
Exemple #29
0
 def get_services_for_reader(self, tmpdir):
     fs, filename = open_fs(tmpdir), "input." + self.extension
     with fs.open(filename, self.mode) as fp:
         fp.write(self.input_data)
     return fs, filename, {"fs": fs}
Exemple #30
0
 def get_services_for_writer(self, tmpdir):
     fs, filename = open_fs(tmpdir), "output." + self.extension
     return fs, filename, {"fs": fs}
Exemple #31
0
def get_services():
    return {'fs': bonobo.open_fs(get_datasets_dir('datasets'))}
Exemple #32
0
def get_services():
    return {
        'fs': open_fs(get_examples_path()),
        'fs.output': open_fs(),
    }
def get_services():
    return {
        'fs': bonobo.open_examples_fs('datasets'),
        'fs.output': bonobo.open_fs(),
    }
Exemple #34
0
 def get_services_for_writer(self, tmpdir):
     fs, filename = open_fs(tmpdir), 'output.' + self.extension
     return fs, filename, {'fs': fs}
Exemple #35
0
def get_services():
    return {
        'fs': bonobo.open_examples_fs('datasets'),
        'fs.output': bonobo.open_fs(),
    }
Exemple #36
0
def get_services():
    return {'fs': open_fs(get_examples_path()), 'fs.output': open_fs()}
Exemple #37
0
def get_services():
    return {'fs': bonobo.open_fs(dirname(__file__))}