Ejemplo n.º 1
0
	def _construct_graph(self, single_graph=False, services=None):
		if single_graph:
			graph = bonobo.Graph()
			g1 = graph
			g2 = graph
		else:
			g1 = bonobo.Graph()
			g2 = bonobo.Graph()

		_ = self._add_geog_graph(g1)

		_ = self._add_abstracts_graph(g2)
		_ = self._add_journals_graph(g2)
		_ = self._add_series_graph(g2)
		_ = self._add_people_graph(g2)
		_ = self._add_corp_graph(g2)
# 		_ = self._add_tal_graph(g2)
# 		_ = self._add_subject_graph(g2)

		if single_graph:
			self.graph_0 = graph
			return [graph]
		else:
			self.graphs = [g1, g2]
			return self.graphs
Ejemplo n.º 2
0
def test_postgres(postgres):
    #bonobo.settings.QUIET.set(True)

    db_name = 'my_db'
    port = postgres['NetworkSettings']['Ports']['5432/tcp'][0]['HostPort']
    wait_for_postgres(port)
    root_engine = create_root_engine(port)
    _execute_sql(root_engine, "CREATE ROLE my_user WITH LOGIN PASSWORD '';")
    _execute_sql(
        root_engine,
        'CREATE DATABASE {name} WITH OWNER=my_user TEMPLATE=template0 ENCODING="utf-8"'
        .format(name=db_name))

    engine = create_engine('my_user', db_name, port)
    metadata.create_all(engine)

    services = {'sqlalchemy.engine': engine}

    graph = bonobo.Graph()
    graph.add_chain(extract, bonobo_sqlalchemy.InsertOrUpdate(TABLE_1))
    assert bonobo.run(graph, services=services)

    buf = Bufferize()
    graph = bonobo.Graph()
    graph.add_chain(
        bonobo_sqlalchemy.Select('SELECT * FROM ' + TABLE_1),
        buf,
    )
    assert bonobo.run(graph, services=services)
    assert buf.buffer == [((0, 'value for 0'), {}), ((1, 'value for 1'), {}),
                          ((2, 'value for 2'), {}), ((3, 'value for 3'), {}),
                          ((4, 'value for 4'), {}), ((5, 'value for 5'), {}),
                          ((6, 'value for 6'), {}), ((7, 'value for 7'), {}),
                          ((8, 'value for 8'), {}), ((9, 'value for 9'), {})]

    graph = bonobo.Graph(
        bonobo_sqlalchemy.Select('SELECT * FROM ' + TABLE_1),
        bonobo_sqlalchemy.InsertOrUpdate(TABLE_2),
    )
    assert bonobo.run(graph, services=services)

    buf = Bufferize()
    graph = bonobo.Graph()
    graph.add_chain(
        bonobo_sqlalchemy.Select('SELECT * FROM ' + TABLE_2),
        buf,
    )
    assert bonobo.run(graph, services=services)
    assert buf.buffer == [((0, 'value for 0'), {}), ((1, 'value for 1'), {}),
                          ((2, 'value for 2'), {}), ((3, 'value for 3'), {}),
                          ((4, 'value for 4'), {}), ((5, 'value for 5'), {}),
                          ((6, 'value for 6'), {}), ((7, 'value for 7'), {}),
                          ((8, 'value for 8'), {}), ((9, 'value for 9'), {})]
Ejemplo n.º 3
0
def get_graph_old_file():
    graph = bonobo.Graph()
    old_formatter = LineFormatter(old_file_list_file)

    graph.add_chain(extract_old_file, old_formatter.reformat, load_old)

    return graph
Ejemplo n.º 4
0
def get_inventory_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()

    graph.add_chain(
        bonobo.CsvReader('Deckbox-inventory.csv'),
        bonobo.Filter(lambda *args: args[-1] != 'English'),
        inventory,
        bonobo.Rename(Card_Number='Card Number',
                      Tradelist_Count='Tradelist Count'),
        #        bonobo_sqlalchemy.InsertOrUpdate(
        #            'cards',
        #            discriminant=(
        #                'Name',
        #               'Edition',
        # 'Card_Number',
        # 'Foil',
        #          ),
        #          engine='cards'),
        _name='main',
    )

    return graph
Ejemplo n.º 5
0
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """

    graph = bonobo.Graph()

    split_dbs = bonobo.noop

    graph.add_chain(
        bonobo.CsvReader('/etl/metrics-insights/workday-users.csv',
                         fs='brickftp'), employee_active, find_badge_id,
        bonobo.UnpackItems(0), split_dbs)

    for engine in list(set(options['engine'])):
        graph.add_chain(bonobo_sqlalchemy.InsertOrUpdate(
            table_name=options['table_name'] + options['table_suffix'],
            discriminant=('badgeid', ),
            buffer_size=10,
            engine=engine),
                        _input=split_dbs)

    return graph
Ejemplo n.º 6
0
def get_cache_graph(**options):
    """

    This graphs builds a cache of badges from ccure
    
    :return: bonobo.Graph

    """

    graph = bonobo.Graph()

    graph.add_chain(
        bonobo.CsvReader(
            '/etl/ccure/uploads/BadgeID/ccure_BadgeID_AllButVendor.txt',
            fields=('badge_id', 'empty1', 'last_name', 'empty2', 'first_name',
                    'empty3', 'issued_on', 'empty4', 'disabled', 'empty5',
                    'valid_until', 'empty6', 'flag2', 'empty7', 'flag3',
                    'empty8', 'flag4'),
            delimiter='|',
            fs='brickftp'),
        badge_active,
        cache,
    )

    return graph
Ejemplo n.º 7
0
def get_bu_graph(**options):
    graph = bonobo.Graph()
    graph.add_chain(
        get_business_units,
        join_cost_centers,
        centerstone_BU_SupOrg_Merge_remap,
        centerstone_BussUnit_remap,
    )

    graph.add_chain(
        #bonobo.Limit(3),
        #bonobo.PrettyPrinter(),
        productLineLevel1_remap,
        unique_product_line,
        bonobo.UnpackItems(0),
        bonobo.PrettyPrinter(),
        bonobo.CsvWriter('/etl/centerstone/downloads/ProductLineLevel1.txt' +
                         options['suffix'],
                         lineterminator="\n",
                         delimiter="\t",
                         fs="brickftp"),
        _input=centerstone_BussUnit_remap)
    graph.add_chain(
        teamLevel3_remap,
        bonobo.UnpackItems(0),
        bonobo.CsvWriter('/etl/centerstone/downloads/TeamLevel3.txt' +
                         options['suffix'],
                         lineterminator="\n",
                         delimiter="\t",
                         fs="brickftp"),
        _input=centerstone_BussUnit_remap)

    return graph
Ejemplo n.º 8
0
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph
    """
    graph = bonobo.Graph()

    graph.add_chain(
        load_config,
        load_existing_files,
        extract_raw_folders,
        extract_raw_files_from_folder,
    )

    graph.add_chain(data_copy, _input=None, _name="copy")

    graph.add_chain(
        # bonobo.Limit(30),
        process_patient_data,
        _input=extract_raw_files_from_folder,
        _output="copy",
    )

    graph.add_chain(
        # bonobo.Limit(30),
        process_image,
        _input=process_patient_data,
        _output="copy",
    )

    graph.add_chain(process_dicom_data, upload_text_data, _input=process_image)

    return graph
Ejemplo n.º 9
0
    def _generate_graph(self) -> None:
        """[Generate the Bonobo.Graph for the current Report]
        
            The report generation follows the rules:
            - Tasks which do not depend on any are executed in parallel
            - Tasks which depend on a given task await the previous' completion to be executed
            
        Returns:
            bonobo.Graph -- [The generated bonobo.Graph]
        """
        graph: bonobo.Graph = bonobo.Graph()
                
        #Generate a dependencies 1-d tree, (child_id, parent_id) so we can isolate parents
        # Parent: graph.add_chain(parent, _input= None)
        # Child : graph.add_chain(child1, _output = parent)
        tasks: Dict[int, Task] = self._tasks


        # Assess the nodes' relationships and connect them as input or output (or both)
        for node in tasks.values():
            if node.children is not None:
                for child_id in node.children:
                    if child_id is not None:  
                        graph >> node >> tasks[child_id]            
            elif node.parents is None:
                graph >> node         

        self._graph = graph if len(graph.nodes) > 0 else None
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph
    """
    graph = bonobo.Graph()

    graph.add_chain(DataExtractor(), _input=None, _name="extractor")

    graph.add_chain(
        list_clinical_files,
        load_clinical_files,
        _output="extractor",
    )

    graph.add_chain(
        list_image_metadata_files,
        load_image_metadata_files,
        _output="extractor",
    )

    graph.add_chain(
        get_storage_stats,
        _output="extractor",
    )

    return graph
Ejemplo n.º 11
0
    def build_graph(self, **options):

        # Building the Graph:
        self.graph = bonobo.Graph()
        self.graph.add_chain(self.extract, self.transform, self.load)

        return self.graph
Ejemplo n.º 12
0
def get_graph_locations_with_mapping_other():

    graph = bonobo.Graph()
    mapping_formatter = LineFormatter(file_locations_with_mapping_other)
    graph.add_chain(extract_locations_with_mapping_other,
                    mapping_formatter.reformatMapping, load_mapping_other)
    return graph
Ejemplo n.º 13
0
    def run(self, services=None, **options):
        '''Run the People bonobo pipeline.'''
        print(f'- Limiting to {self.limit} records per file', file=sys.stderr)
        if not services:
            services = self.get_services(**options)

        print('Running graph component...', file=sys.stderr)
        graph = self.get_graph(**options, services=services)
        self.run_graph(graph, services=services)

        print('Serializing static instances...', file=sys.stderr)
        for model, instances in self.static_instances.used_instances().items():
            g = bonobo.Graph()
            nodes = self.serializer_nodes_for_model(model=self.models[model],
                                                    use_memory_writer=False)
            values = instances.values()
            source = g.add_chain(GraphListSource(values))
            self.add_serialization_chain(g,
                                         source.output,
                                         model=self.models[model],
                                         use_memory_writer=False)
            self.run_graph(g, services={})

        print('Writing people-groups mapping data to disk')
        pg_file = pathlib.Path(
            settings.pipeline_tmp_path).joinpath('people_groups.json')
        with pg_file.open('w') as fh:
            json.dump(services['people_groups'], fh)
Ejemplo n.º 14
0
def get_costcenter_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()
    graph.add_chain(
        get_cost_centers,
        cache_cost_centers,
        centerstone_CostCenter_remap,
        #bonobo.PrettyPrinter(),
        bonobo.UnpackItems(0),
        # Can't skip the header, but must
        bonobo.CsvWriter(
            '/etl/centerstone/downloads/CostCenterLevel2.txt' +
            options['suffix'],
            lineterminator="\n",
            delimiter="\t",
            fs="brickftp"),
        bonobo.CsvWriter(
            'CostCenterLevel2.txt' + options['suffix'],
            lineterminator="\n",
            delimiter="\t",
            fs="centerstone"),
        bonobo.count,
        _name="main")

    return graph
Ejemplo n.º 15
0
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()

    split_dbs = bonobo.noop

    graph.add_chain(extract_accounts,
                    transform,
                    valid_aws_account,
                    bonobo.UnpackItems(0),
                    split_dbs,
                    _name="main")

    for engine in list(set(options['engine'])):
        graph.add_chain(bonobo_sqlalchemy.InsertOrUpdate(
            table_name=options['table_name'] + options['table_suffix'],
            discriminant=('linked_account_number', ),
            engine=engine),
                        _input=split_dbs)

    return graph
Ejemplo n.º 16
0
def main():
    graph = bonobo.Graph(
        extract_data_from_csv,
        transform_data,
        load_data_to_dw
    )
    bonobo.run(graph)
Ejemplo n.º 17
0
def get_graph(**options):
    graph = bonobo.Graph()

    graph.add_chain(extract, transform)
    graph.add_chain(plot, _input=transform)
    graph.add_chain(analytics, _input=transform)
    return graph
Ejemplo n.º 18
0
def get_graph(graph=None, *, _limit=(), _print=()):
    """
    Extracts a list of cafes with on euro in Paris, renames the name, address and zipcode fields,
    reorders the fields and formats to json and csv files.

    """
    graph = graph or bonobo.Graph()

    producer = (
        graph.get_cursor() >> ODSReader(dataset="liste-des-cafes-a-un-euro",
                                        netloc="opendata.paris.fr") >>
        PartialGraph(*_limit) >> bonobo.UnpackItems(0) >> bonobo.Rename(
            name="nom_du_cafe", address="adresse", zipcode="arrondissement") >>
        bonobo.Format(city="Paris", country="France") >> bonobo.OrderFields([
            "name", "address", "zipcode", "city", "country", "geometry",
            "geoloc"
        ]) >> PartialGraph(*_print))

    # Comma separated values.
    graph.get_cursor(producer.output) >> bonobo.CsvWriter(
        "coffeeshops.csv",
        fields=["name", "address", "zipcode", "city"],
        delimiter=",")

    # Standard JSON
    graph.get_cursor(
        producer.output) >> bonobo.JsonWriter(path="coffeeshops.json")

    # Line-delimited JSON
    graph.get_cursor(
        producer.output) >> bonobo.LdjsonWriter(path="coffeeshops.ldjson")

    return graph
Ejemplo n.º 19
0
def get_raw_threads_graph(day):
    graph = bonobo.Graph()
    graph.add_chain(ChannelsSource(day),
                    MessagesFetcher(day, day + datetime.timedelta(days=1)),
                    remove_invalid_messages, process_channel_message,
                    add_thread_to_message, db.JsonRawThreadsWriter(day))
    return graph
 def build_graph(self):
     graph = bonobo.Graph(
         self.generate_data,
         self.uppercase,
         self.output,
     )
     return graph
Ejemplo n.º 21
0
    def __init__(self):
        """ initialize command """

        self.graph = bonobo.Graph(
            self.read_recs,
            self.write_recs
        )
Ejemplo n.º 22
0
def get_graph(**options):
    return bonobo.Graph(
        bonobo_sqlalchemy.Select('SELECT * FROM table',
                                 engine='sqlalchemy.pgengine'),
        bonobo_sqlalchemy.InsertOrUpdate(table_name='table_1',
                                         engine='sqlalchemy.pgengine'),
    )
Ejemplo n.º 23
0
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()
    graph.add_chain(extract_accounts,
                    transform,
                    bonobo.JsonWriter('aws_accounts_ex.json'),
                    valid_aws_account,
                    _name="main")

    graph.add_chain(
        bonobo.JsonWriter('aws_accounts.json'),
        _input="main",
    )

    graph.add_chain(
        bonobo.UnpackItems(0),
        bonobo.CsvWriter('aws_accounts.csv'),
        _input=valid_aws_account,
    )

    graph.add_chain(
        bonobo.UnpackItems(0),
        bonobo_sqlalchemy.InsertOrUpdate(table_name='aws_accounts' +
                                         options['table_suffix'],
                                         discriminant=('account_id', ),
                                         engine='db'),
        _input=valid_aws_account,
    )

    return graph
Ejemplo n.º 24
0
def test_execution():
    graph = bonobo.Graph()

    result_args = []
    result_nt = []
    result_bt = []

    graph.add_chain(extract_nt, transform_using_args, StoreInList(result_args))
    graph.add_chain(transform_nt, StoreInList(result_nt), _input=extract_nt)
    graph.add_chain(extract_bt, transform_using_args, StoreInList(result_bt))

    with GraphExecutionContext(graph) as context:
        context.run_until_complete()

    assert result_args == [
        (2, "Guido", "guido.py"),
        (4, "Larry", "larry.pl"),
        (6, "Dennis", "dennis.c"),
        (8, "Yukihiro", "yukihiro.rb"),
    ]

    assert result_nt == [(1, "GUIDO", ".py"), (2, "LARRY", ".pl"),
                         (3, "DENNIS", ".c"), (4, "YUKIHIRO", ".rb")]

    assert result_bt == [
        (2, "Guido", "guido.py"),
        (4, "Larry", "larry.pl"),
        (6, "Dennis", "dennis.c"),
        (8, "Yukihiro", "yukihiro.rb"),
    ]
Ejemplo n.º 25
0
def get_graph(**options):
    graph = bonobo.Graph()
    graph.add_chain(
        extract_fablabs,
        bonobo.Limit(10),
        bonobo.PrettyPrinter(),
    )
    return graph
Ejemplo n.º 26
0
def get_graph_stg():
	graph = bonobo.Graph()
	graph.add_chain(
		source.SourceQualifier(**sources['staging_1']),
		t_transform,
		target.LoadTarget(**targets['target']),
	)
	return graph
Ejemplo n.º 27
0
def get_graph(**options):
    graph = bonobo.Graph()
    graph.add_chain(
        extract_fablabs,
        bonobo.Limit(10),
        write_repr_to_file,
    )
    return graph
Ejemplo n.º 28
0
def main():
    
    graph = bonobo.Graph(
        extract_data_from_xlxs,
        transform_data,
        load_into_new_xlsx_file
    )
    bonobo.run(graph)
Ejemplo n.º 29
0
def test_run_graph_noop():
    graph = bonobo.Graph(bonobo.noop)
    assert len(graph) == 1

    with patch('bonobo._api._is_interactive_console',
               side_effect=lambda: False):
        result = bonobo.run(graph)
    assert isinstance(result, GraphExecutionContext)
Ejemplo n.º 30
0
def get_graph():
    graph = bonobo.Graph()
    graph.add_chain(
        extract,
        print,
    )

    return graph