コード例 #1
0
def process(client, params):
    """
	The ETL pipeline.

	It contains the main nodes of the extract-transform-load 
	pipeline from the COVID-19 process. Mainly, it prepares 
	which files are necessary to download from the url-source. 
	Then, it goes through each node by performing the operations 
	described, namely: gathering, transforming, storing and 
	visualizing the information.

	"""
    data_preparation.run(client, params)

    for file in params.files_to_download:
        params.file = file

        if not data_gathering.done(client, params):
            data_gathering.update(client, params)

        if not data_transform.done(client, params):
            data_transform.update(client, params)

        if not data_storage.done(client, params):
            data_storage.update(client, params)

    if not data_viz.done(client, params):
        data_viz.update(client, params)
コード例 #2
0
def process(client, params):
    # It fails when missing something.
    requirements.check(client, params)

    if not data_extraction.done(client, params):
        data_extraction.update(client, params)

    if not data_transform.done(client, params):
        data_transform.update(client, params)

    if not data_storage.done(client, params):
        data_storage.update(client, params)
コード例 #3
0
ファイル: pipeline.py プロジェクト: serachung/ironhack_labs
def process(client, params):  
	"""
	The ETL pipeline.
	"""
	data_requirements.check(client, params)

	if not data_gathering.done(client, params):
		data_gathering.update(client, params)

	if not data_transform.done(client, params):
		data_transform.update(client, params)

	if not data_storage.done(client, params): 
		data_storage.update(client, params)
コード例 #4
0
def process(client, params):
    """
	The ETL pipeline.

	It contains the main nodes of the extract-transform-load 
	pipeline from the process. 
	"""
    data_preparation.run(client, params)

    gather_done = data_gathering.done(client, params)
    if len(gather_done) > 0:
        data_gathering.update(client, params, gather_done)

    gather_done = ['onibus']

    if not data_transform.done(client, params, gather_done):
        df_pass, df_linha, df_metro = data_transform.update(
            client, params, gather_done)
        nome_pass = '******'
        nome_linha = 'linha'
        nome_metro = 'metro'

    if not data_storage.done(client, params):
        data_storage.update(client, params, df_pass, nome_pass)
        data_storage.update(client, params, df_linha, nome_linha)
        data_storage.update(client, params, df_metro, nome_metro)

    if not data_viz.done(client, params):
        data_viz.update(client, params)
コード例 #5
0
ファイル: pipeline.py プロジェクト: ahcamachod/Labs
def process(client, params):
    """
	The ETL pipeline.

	It contains the main nodes of the extract-transform-load 
	pipeline from the process. 
	"""
    data_preparation.run(client, params)

    if not data_gathering.done(client, params):
        data_gathering.update(client, params)

    if not data_transform.done(client, params):
        data_transform.update(client, params)

    if not data_storage.done(client, params):
        data_storage.update(client, params)
コード例 #6
0
def process(client, params):  
    """
    The ETL pipeline.
    
    It contains the main nodes of the extract-transform-load 
    pipeline from the process. 
    
    Parameters
    ----------
    
    client: Client
    parmas: Params
    
    Notes 
    -----
    The main idea is to consider each task as a conceptual **node**. 
    This function, `process` is the **pipeline** that integrates all 
    tasks together. Each node is a .py file imported from the `nodes`
    directory. 
    
    The main idea is that each node can be in one of the following state:
        - up-to-date: the task to be done given the input parameters is 
        already completed. Hence, no rework is needed.

        - out-of-date: the task to be done is not completed and should be 
        run.

    """
	data_preparation.run(client, params)

	if not data_gathering.done(client, params):
		data_gathering.update(client, params)

	if not data_transform.done(client, params):
		data_transform.update(client, params)

	if not data_storage.done(client, params):
		data_storage.update(client, params)

	if not data_viz.done(client, params):
		data_viz.update(client, params)
コード例 #7
0
def process(client, params):
    """
	This is the core of the ETL-pipeline.
	"""

    # It fails when missing something.
    requirements.check(client, params)
    # --> params.csv_files

    for file_url in params.csv_files:
        params.file_url = file_url

        if not data_extraction.done(client, params):
            data_extraction.update(client, params)
            # -->

        if not data_transform.done(client, params):
            data_transform.update(client, params)

        if not data_storage.done(client, params):
            data_storage.update(client, params)