def run(put, map, reduce, entrada, saida, nome): mrl.hdfs_rm_dir(entrada) #remover caso exista mrl.hdfs_mkdir(entrada) #criar uma nova vazia mrl.hdfs_put(put, entrada) #Insere os dados mrl.hdfs_rm_dir(saida) #remover caso exista mrl.run_map_reduce(map, reduce, entrada, saida) #map_reduce mrl.hdfs_get("saida", nome)
import map_reduce_lib as mrl mrl.hdfs_rm_dir("data") mrl.hdfs_mkdir("data") mrl.hdfs_put("algumapoesia/*", "data") mrl.hdfs_rm_dir("saida") #Rodando o streaming mrl.run_map_reduce("map.py","reduce.py","/data","/saida")
import map_reduce_lib as mrl mrl.hdfs_rm_dir("data") mrl.hdfs_mkdir("data") mrl.hdfs_put("algumapoesia/*", "data") mrl.hdfs_rm_dir("saida") #Rodando o streaming mrl.run_map_reduce("map.py", "reduce.py", "/data", "/saida")