def test_set_output_pass():
    attributes = [Attribute("att1", AttributeType.TEXT)]
    sets = [Set("set1", SetType.OUTPUT, attributes)]
    output = [Set("set4", SetType.OUTPUT, attributes)]
    expected_result = [x.get_specification() for x in output]
    transformation = Transformation("tf1", sets=sets)
    transformation.output = output
    assert transformation.output == expected_result
def test_get_specification_pass():
    tag = "tf1"
    sets = [
        Set("set1", SetType.INPUT, [Attribute("att1", AttributeType.TEXT)])
    ]
    expected_result = {
        "sets": [x.get_specification() for x in sets],
        "tag": tag
    }
    transformation = Transformation(tag, sets)
    assert transformation.get_specification() == expected_result
def test_set_sets_pass():
    attributes = [Attribute("att1", AttributeType.TEXT)]
    sets = [Set("set1", SetType.INPUT, attributes)]
    new_sets = [
        Set("set1", SetType.INPUT, attributes),
        Set("set2", SetType.OUTPUT, attributes)
    ]
    expected_result = [x.get_specification() for x in new_sets]
    transformation = Transformation("tf1", sets=sets)
    transformation.sets = new_sets
    assert transformation.sets == expected_result
def test_get_input_pass():
    attributes = [Attribute("att1", AttributeType.TEXT)]
    sets = [Set("set1", SetType.INPUT, attributes)]
    expected_result = [sets[0].get_specification()]
    transformation = Transformation("tf1", sets=sets)
    assert transformation.input == expected_result
Example #5
0
        }

    }

    #PROVENIÊNCIA
    ############################

    dataflow_tag = "prov-df-{}".format(aggreg_unit)
    df = Dataflow(dataflow_tag)

    logger.info('Inicializando o processador Spark')
    processador = ProcessadorSparkClass(logger, spark, df, dataflow_tag)

    ##PROVENIÊNCIA PROSPECTIVA
    #Transformação para extrair o primeiro stats: ExtrairStats1
    tf1 = Transformation('load_data') ## Usando o nome da task spark
    tf1_input = Set("i{}1".format('load_data'), SetType.INPUT,
        [
            Attribute("datafiles", AttributeType.TEXT),
            Attribute("tables", AttributeType.TEXT),
            Attribute("currenttime", AttributeType.TEXT),
            Attribute("aggregationunit", AttributeType.TEXT),
            Attribute("csvseparator", AttributeType.TEXT)
        ])

    tf1_output = Set("o{}1".format('load_data'), SetType.OUTPUT,
      [
            Attribute("currenttime", AttributeType.TEXT),
            Attribute("elapsedtime", AttributeType.NUMERIC)
      ])
Example #6
0
#-------------------------------------
#dirin_do_ficheiro = sys.argv[0]
#dirin_arg_pas = sys.argv[0:]
###print "O nome do diretorio de entrada do ficheiro e: " + dirin_do_ficheiro 
###print "E os argumentos passados sao: " + str(dirin_arg_pas)


############################
#PROVENIÊNCIA
############################
dataflow_tag = "mafft-df"
df = Dataflow(dataflow_tag)

##PROVENIÊNCIA PROSPECTIVA
#Transformação para extrair nome dos arquivos: ExtrairNome
tf1 = Transformation("ExtrairNome")
tf1_input = Set("iExtrairNome", SetType.INPUT,
  [Attribute("DIRIN_FILE", AttributeType.FILE)])
tf1_output = Set("oExtrairNome", SetType.OUTPUT,
  [Attribute("FASTA_FILE", AttributeType.FILE),
  Attribute("MAFFT_FILE", AttributeType.FILE)])
tf1.set_sets([tf1_input, tf1_output])
df.add_transformation(tf1)

#Transformação para ler o arquivo e contar o numero de sequencias: ContarSequencias
tf2 = Transformation("ContarSequencias")
tf2_input = Set("iContarSequencias", SetType.INPUT,
  [Attribute("FASTA_FILE", AttributeType.FILE)])#leitor file-fasta/att text-file
tf2_output = Set("oContarSequencias", SetType.OUTPUT,
  [Attribute("NUMERO_SEQUENCIAS", AttributeType.NUMERIC)])
tf2.set_sets([tf2_input, tf2_output])