def test_set_attribute_type_pass(): name = "att1" type = AttributeType.TEXT new_type = AttributeType.NUMERIC attribute = Attribute(name, type) attribute.type = new_type assert attribute.type == new_type.value
def test_set_name_pass(): name = "att1" new_name = "att-modified" type = AttributeType.TEXT attribute = Attribute(name, type) attribute.name = new_name assert attribute.name == new_name
def test_set_attributes_pass(): tag = "set1" type = SetType.INPUT attributes = [Attribute("att1", AttributeType.TEXT)] new_attributes = [Attribute("att1", AttributeType.NUMERIC)] expected_result = [x.get_specification() for x in new_attributes] set = Set(tag, type, attributes) set.attributes = new_attributes assert set.attributes == expected_result
def test_get_dependency_pass(): tag = "set1" type = SetType.INPUT dependency = "dependency" attributes = [Attribute("att1", AttributeType.TEXT)] set = Set(tag, type, attributes, dependency=dependency) assert set.dependency == dependency
def test_set_output_pass(): attributes = [Attribute("att1", AttributeType.TEXT)] sets = [Set("set1", SetType.OUTPUT, attributes)] output = [Set("set4", SetType.OUTPUT, attributes)] expected_result = [x.get_specification() for x in output] transformation = Transformation("tf1", sets=sets) transformation.output = output assert transformation.output == expected_result
def test_set_type_pass(): tag = "set1" type = SetType.INPUT new_type = SetType.OUTPUT attributes = [Attribute("att1", AttributeType.TEXT)] set = Set(tag, type, attributes) set.type = new_type assert set.type == new_type.value
def test_get_specification_pass(): tag = "tf1" sets = [ Set("set1", SetType.INPUT, [Attribute("att1", AttributeType.TEXT)]) ] expected_result = { "sets": [x.get_specification() for x in sets], "tag": tag } transformation = Transformation(tag, sets) assert transformation.get_specification() == expected_result
def test_set_sets_pass(): attributes = [Attribute("att1", AttributeType.TEXT)] sets = [Set("set1", SetType.INPUT, attributes)] new_sets = [ Set("set1", SetType.INPUT, attributes), Set("set2", SetType.OUTPUT, attributes) ] expected_result = [x.get_specification() for x in new_sets] transformation = Transformation("tf1", sets=sets) transformation.sets = new_sets assert transformation.sets == expected_result
def test_get_specification_pass(): tag = "set1" type = SetType.INPUT attributes = [Attribute("att1", AttributeType.TEXT)] expected_result = { "attributes": [attributes[0].get_specification()], "tag": tag, "type": type.value } set = Set(tag, type, attributes) assert set.get_specification() == expected_result
def test_get_name_pass(): name = "att1" type = AttributeType.TEXT attribute = Attribute(name, type) assert attribute.name == name
def test_get_specification_pass(): name = "att1" type = AttributeType.TEXT expected_specification = {"name": name, "type": type.value} attribute = Attribute(name, type) assert attribute.get_specification() == expected_specification
def test_get_attribute_type_pass(): name = "att1" type = AttributeType.TEXT attribute = Attribute(name, type) assert attribute.type == type.value
def test_get_input_pass(): attributes = [Attribute("att1", AttributeType.TEXT)] sets = [Set("set1", SetType.INPUT, attributes)] expected_result = [sets[0].get_specification()] transformation = Transformation("tf1", sets=sets) assert transformation.input == expected_result
#PROVENIÊNCIA ############################ dataflow_tag = "prov-df-{}".format(aggreg_unit) df = Dataflow(dataflow_tag) logger.info('Inicializando o processador Spark') processador = ProcessadorSparkClass(logger, spark, df, dataflow_tag) ##PROVENIÊNCIA PROSPECTIVA #Transformação para extrair o primeiro stats: ExtrairStats1 tf1 = Transformation('load_data') ## Usando o nome da task spark tf1_input = Set("i{}1".format('load_data'), SetType.INPUT, [ Attribute("datafiles", AttributeType.TEXT), Attribute("tables", AttributeType.TEXT), Attribute("currenttime", AttributeType.TEXT), Attribute("aggregationunit", AttributeType.TEXT), Attribute("csvseparator", AttributeType.TEXT) ]) tf1_output = Set("o{}1".format('load_data'), SetType.OUTPUT, [ Attribute("currenttime", AttributeType.TEXT), Attribute("elapsedtime", AttributeType.NUMERIC) ]) tf1.set_sets([tf1_input, tf1_output]) df.add_transformation(tf1) tf2 = Transformation('initial_data_stats') ## Usando o nome da task spark
#dirin_arg_pas = sys.argv[0:] ###print "O nome do diretorio de entrada do ficheiro e: " + dirin_do_ficheiro ###print "E os argumentos passados sao: " + str(dirin_arg_pas) ############################ #PROVENIÊNCIA ############################ dataflow_tag = "mafft-df" df = Dataflow(dataflow_tag) ##PROVENIÊNCIA PROSPECTIVA #Transformação para extrair nome dos arquivos: ExtrairNome tf1 = Transformation("ExtrairNome") tf1_input = Set("iExtrairNome", SetType.INPUT, [Attribute("DIRIN_FILE", AttributeType.FILE)]) tf1_output = Set("oExtrairNome", SetType.OUTPUT, [Attribute("FASTA_FILE", AttributeType.FILE), Attribute("MAFFT_FILE", AttributeType.FILE)]) tf1.set_sets([tf1_input, tf1_output]) df.add_transformation(tf1) #Transformação para ler o arquivo e contar o numero de sequencias: ContarSequencias tf2 = Transformation("ContarSequencias") tf2_input = Set("iContarSequencias", SetType.INPUT, [Attribute("FASTA_FILE", AttributeType.FILE)])#leitor file-fasta/att text-file tf2_output = Set("oContarSequencias", SetType.OUTPUT, [Attribute("NUMERO_SEQUENCIAS", AttributeType.NUMERIC)]) tf2.set_sets([tf2_input, tf2_output]) df.add_transformation(tf2)
def test_get_type_pass(): tag = "set1" type = SetType.INPUT attributes = [Attribute("att1", AttributeType.TEXT)] set = Set(tag, type, attributes) assert set.type == type.value