Example #1
0
def handleHdfsDownload(hdfs_path, local_path):
    client = InsecureClient("http://hdfs.neurolearn.com:50070", user="******")
    client.download(hdfs_path, local_path, overwrite=True)
    print('Downloaded Images from HDFS.')
    return local_path
Example #2
0
import os
import pandas as pd

os.environ[
    'WEBHDFS_URI'] = 'http://172.17.0.2:50070'  # A NA PAS FAIRE, POUR TESTS EN LOCAL UNIQUEMENT

source_csv = '/user/root/test.csv'

content = '"eva","date","duration","country","purpose","crew"\n'
content += '"1","1965-06-03T00:00:00","0:36","USA","First U.S. EVA. Used HHMU and took  photos.","Ed White"\n'
content += '"9","1966-11-13T00:00:00","2:06","USA","Attached tether between Agena and Gemini.","Buzz Aldrin"\n'

# Etape 0 : ecriture du fichier pour le test
# - Instance de client pointant sur un HDFS en particulier
source_hdfs_url = os.environ['WEBHDFS_URI']
source_hdfs = InsecureClient(url=source_hdfs_url, user='******')

with source_hdfs.write(source_csv,
                       append=False,
                       overwrite=True,
                       encoding='utf-8') as writer:
    writer.write(content)

################################################################################

# Etape 1 : lecture du fichier
# - Reutilisation du client HDFS
with source_hdfs.read(source_csv, encoding='utf-8') as reader:
    data = pd.read_csv(reader, index_col=0)

data = data.ix[:, ['date', 'country']]
Example #3
0
from hdfs import InsecureClient
from pyspark.sql import SparkSession
from json import dump
import json

client_hdfs = InsecureClient('http://localhost:9870', user='******')

# spark = SparkSession \
#     .builder \
#     .master("local") \
#     .appName("Protob Conversion to Parquet") \
#     .config("spark.some.config.option", "some-value") \
#     .getOrCreate()

spark = SparkSession \
    .builder \
    .appName("myApp") \
    .config("spark.mongodb.input.uri", "mongodb://localhost:27017/numtest.kafka") \
    .config("spark.mongodb.output.uri", "mongodb://localhost:27017/numtest.kafka") \
    .config('spark.jars.packages', 'org.mongodb.spark:mongo-spark-connector_2.11:2.3.2') \
    .getOrCreate()

mongo = spark.read.format("com.mongodb.spark.sql.DefaultSource").option(
    "spark.mongodb.input.uri",
    'mongodb://localhost:27017/numtest.kafka').load()
results = mongo.toJSON().map(lambda j: json.loads(j)).collect()
mongo.show()
with client_hdfs.write('/home/hadoop/hdfs/helloworld2.json',
                       encoding='utf-8') as writer:
    dump(results, writer)
Example #4
0
from __future__ import print_function  # In python 2.7

from flask import Flask, request
import subprocess
import logging
import os
from hdfs import InsecureClient
import shutil

app = Flask(__name__)
app.config['DEBUG'] = True
app.url_map.strict_slashes = False

hdfsLocation = "hdfs:50070"
client_hdfs = InsecureClient('http://' + hdfsLocation)


def debug(string):
    app.logger.debug(string)


@app.route('/uploader', methods=['POST'])
def upload():

    #/sparkgis/sample_data/
    loc = request.form.get('name')
    f = request.files['file']
    f.save("tmp/file")

    client_hdfs.upload(str(loc), "../web/tmp/file")
Example #5
0
import pandas
from hdfs import InsecureClient
import os
from csv import reader

client_hdfs = InsecureClient('http://localhost:50070')

client_hdfs.

with client_hdfs.read('/user/hdfs/wiki/helloworld.csv', encoding = 'utf-8',) as reader:
    df = pd.read_csv(reader,index_col=0)
#with client_hdfs.write('/user/temp/repourldata.csv',encoding='utf-8') as writer:
#    df.to_csv(writer)

print('SUCCESSFULL RUN')
Example #6
0
import io
# For Data Lake
from hdfs import InsecureClient
# For Data Warehouse
from pyhive import hive

import pandas as pd

df_source = pd.read_csv(r'output/price.csv')

# Define HDFS interface
hdfs_interface = InsecureClient('http://localhost:50070')
hdfs_interface.list('/')

# Delete old data
hdfs_interface.delete('/wqd7005/raw_price', recursive=True, skip_trash=True)

# Create hdfs directories to store data
hdfs_interface.makedirs('/wqd7005')
hdfs_interface.makedirs('/wqd7005/raw_price')
hdfs_interface.list('/wqd7005')

# Write data to raw_price directory

# text buffer
s_buf = io.StringIO()
# saving a data frame to a buffer (same as with a regular file):
df_source.to_csv(s_buf, index=False, header=False)

hdfs_interface.write('/wqd7005/raw_price/000000_0',
                     data=s_buf.getvalue(),
Example #7
0
 def __init__(self, *opts):
     self.client = InsecureClient(current_app.config['WEBHDFS_ADDR'], user=current_app.config['WEBHDFS_USER'])
Example #8
0
	def __init__(self, from_directory, images_path, to_directory):
		self.from_directory = from_directory
		self.to_directory = to_directory
		self.images_path = images_path
		self.hdfs_client = InsecureClient('http://192.168.1.4:9870', user='******')
Example #9
0
    l = 0
    r = 7
    length = len(df)
    while (l < length):
        pd.io.sql.to_sql(df[l:r],
                         'opt_bi_range',
                         yconnect,
                         schema='ezp-opt',
                         if_exists='append',
                         index=False)
        l += 1
        r += 1

    #ę•°ę®å†™å…„hdfsäø­
    from hdfs import InsecureClient
    hdfs_client = InsecureClient('http://cluster2-master:50070', user='******')
    if len(result.head(1)):
        pandas_df = result
        pandas_df.to_csv('/home/jifenduiquan.csv')
        dir_path = '/tmp'
        file_list = hdfs_client.list(dir_path)
        target_file = 'jifenduiquan.csv'
        append = True if target_file in file_list else False
        with hdfs_client.write("{}/{}".format(dir_path, target_file),
                               append=append,
                               encoding='utf-8') as writer:
            pandas_df.to_csv(writer, header=True)

except:
    pass
Example #10
0

def uploadHdfs(client, localPath):
    hdfsPath = localPath.replace("D://hdfs/", "/")
    # client.delete(hdfsPath)
    client.upload(hdfsPath, localPath, overwrite=True)
    pass


if __name__ == '__main__':
    cf = configparser.ConfigParser()
    cf.read("config.ini")

    # hdfs = cf.get("hdfs", "hdfs_url")
    hdfs = cf.get("hdfs_test", "hdfs_url")
    client = InsecureClient(hdfs, user='******')
    # client = InsecureClient(hdfs, user='******')
    # client = InsecureClient(hdfs, user='******')

    # fl = client.list("/user/dev/euler/inneralgoframefile")
    # fl = client.list("/user/dev/boss/customerfile/2018/useralgorithm/")
    # fl = client.list("/user/dev/boss/customerfile/11/")
    # fl = client.list("/user/dev/boss/customerfile/56/324555663560933382/euler/file/transform/")
    # fl = client.list("/user/dev/boss/customerfile/2018/dataset")
    # fl = client.list("/user/dev/boss/customerfile/11/10/euler/trainView/319356072888631296/")
    fl = client.list("/user/dev/boss/customerfile")
    # fl = client.list("/user/root/euler/inneralgofile")
    # client.makedirs("/launcher/code/")
    print(fl)

    # hdfspath = "/user/dev/boss/customerfile/11/10/euler/trainView/315259954451775488/model/sk.pkl"
Example #11
0
def get_hdfs_files(db_name, table_name):
    client = InsecureClient(cons.BASE_URL, user=cons.USER_NAME)
    content = client.walk(cons.HDFS_PATH + db_name + '.db/' + table_name, 1)
    return content
Example #12
0
        path = '/home/bda/data'
        logging.info("Write JSON and CSV to : " + path)
        with open(path + '/solarlog_' + str(site_id) + '_' + epoch_time_now +
                  '.json',
                  'w',
                  encoding='utf-8') as outfile:
            json.dump(solar_data, outfile, indent=4, ensure_ascii=False)

        #write the same data as .csv since it is more easy to handel with hdfs..
        with open(
                path + '/solarlog_' + str(site_id) + '_' + epoch_time_now +
                '.csv', 'w') as f:
            w = csv.DictWriter(f, solar_data.keys(), dialect=csv.excel_tab)
            w.writeheader()
            w.writerow(solar_data)

        # write data to hdfs
        logging.info("Write csv to hdfs : /data/solarlog/")
        client = InsecureClient('http://nh-01.ip-plus.net:50070', user='******')
        with client.write('/data/solarlog/solarlog_' + str(site_id) + '_' +
                          epoch_time_now + '.csv',
                          encoding='utf-8') as writer:
            w = csv.DictWriter(writer,
                               solar_data.keys(),
                               dialect=csv.excel_tab)
            w.writeheader()
            w.writerow(solar_data)

    # Write to KAFKA
    kafka_produce(solar_data)
Example #13
0
import os
import sys
import posixpath as psp

from tensorflow.contrib import learn
from tensorflow.contrib.learn.python import SKCompat as skflow
from sklearn.metrics import mean_squared_error
from hdfs3 import HDFileSystem
from lstm_predictor2 import lstm_model, load_csvdata, rnn_model, gru_model
filename = "ptp_resultsRNNall.txt"
results = open(filename, 'a')

warnings.filterwarnings("ignore")
working_dir = "/user/canast02/ystr=2016"
from hdfs import InsecureClient
hdfs = InsecureClient(url='http://pythia1.in.cs.ucy.ac.cy:50070',
                      user='******')
fnames = hdfs.list(working_dir)

print(fnames)

LOG_DIR = 'resources/logs/'
TIMESTEPS = 1
RNN_LAYERS = [{'num_units': 16}, {'num_units': 16}]
DENSE_LAYERS = []
TRAINING_STEPS = 1000
PRINT_STEPS = TRAINING_STEPS  # / 10
BATCH_SIZE = 64

regressor = skflow(
    learn.Estimator(model_fn=rnn_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS)))
#   model_dir=LOG_DIR)
from hdfs import InsecureClient
import datetime

client = InsecureClient('http://localhost:14000')


def exists(path):
    return client.status(path, strict=False) != None


def mkdir(path):
    print('MKDIR: ' + path)
    client.makedirs(path)


def touch(path):
    print('TOUCH: ' + path)
    with client.write(path) as writer:
        writer.write('')


def append(path, data):
    print('APPEND: ' + path)
    with client.write(path, append=True) as writer:
        writer.write(data + '\n')


def write(path, data):
    print('WRITE: ' + path)
    with client.write(path) as writer:
        writer.write(data + '\n')
Example #15
0
File: webhdfs.py Project: ush98/dvc
    def _webhdfs(self):
        from hdfs import InsecureClient

        client = InsecureClient(f"http://{self.host}:{self.port}", self.user)
        yield client
Example #16
0
def find_result(image_path, image_name):

    result = []  # risultato dell'object detection
    with open(image_path, 'rb') as f:
        data = f.read()  # legge l'immagine di input

    channel = implementations.insecure_channel('0.0.0.0', 4001)  # crea un canale non sicuro con l'host, numero di porta 4001

    # crea il servizio di prediction che permette di accedere ai modelli caricati nel model server
    stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)

    request = predict_pb2.PredictRequest()  # richiesta da mandare al server
    # PredictRequest specifica quale modello TensorFlow eseguire, quali sono i tensori di input e come sono filtrati
    # gli output prima di essere restituiti all'utente
    request.model_spec.name = 'coco_model'
    request.model_spec.signature_name = 'detection_signature'  # signature del modello
    request.inputs['inputs'].CopyFrom(tf.contrib.util.make_tensor_proto(data, shape=[1]))  # tensori di input
    res = stub.Predict(request, 10.0)  # risultati della richiesta di prediction, 10 secs timeout

    scores = res.outputs['detection_scores'].float_val  # score degli oggetti trovati in ordine decrescente
    classes = res.outputs['detection_classes'].float_val  # id delle classi trovate, ordinate con score decrescente
    # print zip(classes, scores)
    # vettore con la posizione normalizzata dei bounding box dell'immagine: ymin, xmin, ymax, xmax
    # i bounding box sono ordinati dal bbx dell'oggetto con score maggiore
    boxes = res.outputs['detection_boxes'].float_val
    # trasformo il vettore in modo che ogni elemento sia una quadrupla che identifica il bounding box
    boxes = np.reshape(boxes, [100, 4])

    # per salvare l'immagine con i bounding box, dobbiamo aprire l'immagine e sfruttare la libreria vis_util di tensorflow
    im = imageio.imread(image_path)  # legge l'immagine come un array multidimensionale
    label_map_path = "Label_maps/mscoco_label_map.pbtxt"
    label_map = label_map_util.load_labelmap(label_map_path) # mappa id-label
    categories = label_map_util.convert_label_map_to_categories(label_map=label_map, max_num_classes=90)  # lista di dizionari
    category_index = label_map_util.create_category_index(categories)  # dizionario coppie chiave ("id"), valore ("nome classe")

    # viene creato un array (img_height, img_width, 3) con i bounding box sovrapposti
    image_vis = vis_util.visualize_boxes_and_labels_on_image_array(
        im,
        boxes,
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        category_index,
        max_boxes_to_draw=10, # num max di bounding box da visualizzare
        min_score_thresh=.2,  # soglia minima dei bounding box da visualizzare
        use_normalized_coordinates=True,
        line_thickness=5)  # larghezza linea del contorno dei box

    imageio.imwrite("Images_bbx/{}_coco.jpg".format(image_name), image_vis)  # salva l'array come un'immagine JPEG

    client_hdfs = InsecureClient('http://localhost:50070')  # client per accedere al HDFS

    #tramite il modulo socket verifichiamo se la porta dell'hdfs e' connessa
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) #crea un nuovo socket
    port_result = sock.connect_ex(('localhost', 50070))
    # se la porta e' attiva restituisce 0, altrimenti un valore diverso da 0
    if port_result == 0:  # se l'HDFS e' connesso, vi sposto l'immagine con i bounding box predetti
        client_hdfs.upload('/zora-object-detection/images/{}_coco.jpg'.format(image_name),"Images_bbx/{}_coco.jpg".format(image_name))
        os.remove("Images_bbx/{}_coco.jpg".format(image_name))
    vowels = ("a", "e", "i", "o", "u")
    bbx_coco = []  # vettore con le coordinate dei bounding box trovati con il COCO Model

    # se lo score dell'oggetto con score piu' alto nell'immagine e' maggiore o uguale a 0.6,
    # si effettua la detection e si costruisce la stringa da passare al robot
    if (scores[0] >= 0.6):
        labels_coco = []  # vettore delle labels trovate con il COCO Model
        labels_pets = []  # vettore delle labels trovate con il Pets Model
        labels_people = []  # vettore delle labels trovate con il People Model
        bbx_pets = []  # coordinate dei bounding box relative al Pets Model
        bbx_people = []  # coordinate dei bounding box relative al modello people
        boxes=boxes.tolist()  # trasforma l'array multidimensionale in lista
        j = 0
        for i in range(0, len(classes)):
            # consideriamo tutte le labels con uno score >= 0.6 e i cui bounding box non corrispondono a quelli gia'
            # inseriti con score piu' alto
            if (scores[i] >= 0.6 and boxes[i] not in bbx_coco):
                labels_coco.append(str(category_index[int(classes[i])]['name']))
                bbx_coco.append(boxes[i])
                j = j + 1

        # se tra le labels ci sono persone si cerca il sesso con un object detection tramite il People Model
        if ("person" in labels_coco):
            n_people = labels_coco.count("person")
            labels_people, bbx_people = pets_people_detection.find_labels(image_path, image_name, stub, request,
                                                                          "people_model", n_people)
        # se tra le labels ci sono cani o gatti si cercano le razze con un object detection tramite il Pets Model
        if ("cat" in labels_coco or "dog" in labels_coco):
            n_cat = labels_coco.count("cat")  # numero di gatti
            n_dog = labels_coco.count("dog")  # numero di cani
            n_pets = n_cat + n_dog
            # labels e bounding box trovati con il pets model
            labels_pets, bbx_pets = pets_people_detection.find_labels(image_path, image_name, stub, request, "pets_model", n_pets)

        # dizionario avente come chiavi i nomi delle labels e valori le rispettive occorrenze in labels_coco
        counter = Counter(labels_coco)
        counter = list(counter.items())  # converte il dizionario in una lista di coppie con nome della label e occorrenza

        #costruisco la stringa "string" da passare al robot
        string = "I see "
        for i in range(0, len(counter)):  # la lunghezza del counter corrisponde al numero di labels diverse
            if counter[i][1] == 1:  # se la classe ha solo un'occorrenza
                if (counter[i][0].startswith(vowels)):  # se il nome della classe inizia per vocale
                    string += "an " + counter[i][0] + ", "
                else:
                    string += "a " + counter[i][0] + ", "
            else: # se la classe ha piu' occorrenze
                s = ["sheep", "scissors", "skis"]
                es = ["sandwich", "toothbrush", "wine glass", "bus", "bench", "couch"]
                if (counter[i][0] in s):
                    string += str(counter[i][1]) + " " + counter[i][0] + ", "
                elif (counter[i][0] in es):
                    string += str(counter[i][1]) + " " + counter[i][0] + "es, "
                elif (counter[i][0] == "knife"):
                    string += str(counter[i][1]) + " knives, "
                elif (counter[i][0] == "person"):
                    string += str(counter[i][1]) + " people, "
                else:
                    string += str(counter[i][1]) + " " + counter[i][0] + "s, "
        string = string.rstrip(", ")  # se la frase termina con una virgola, devo trascurare quest'ultima
        if ("," in string):
            k = string.rfind(",")
            string = string[:k] + " and" + string[k + 1:]  # sostituisce l'ultima virgola della frase con "and"
        string += "!"
        if (labels_people != []):  # se trovo il sesso della/e persona/e
            n_man = labels_people.count("man")
            n_woman = labels_people.count("woman")
            if len(labels_people) == 1 and n_people == 1:  # nell'immagine c'e' una persona
                if(n_man == 1):
                    string += " He is a man"
                else:
                    string += " She is a woman"
            elif len(labels_people) == 1 and n_people > 1:
                # nell'immagine ci sono piu' persone ma ho riconosciuto il sesso di una
                string += " A person is a " + labels_people[0]
            else:  # ho riconosciuto il sesso di piu' persone
                if n_man > 1 and n_woman == 0:
                    string += " There are " + str(n_man) + " men"
                elif n_man == 0 and n_woman > 1:
                    string += " There are " + str(n_woman) + " women"
                elif n_man == 1 and n_woman == 1:
                    string += " There is a man and a woman"
                elif n_man > 1 and n_woman == 1:
                    string += " There is a woman and " + str(n_man) + " men"
                elif n_man == 1 and n_woman > 1:
                    string += " There is a man and " + str(n_woman) + " women"
                else:
                    string += " There are " + str(n_man) + " men and " + str(n_woman) + " women"
            string += "."
        if (labels_pets != []):  # se trovo le razze dei cani e/o dei gatti
            cat_breeds = []
            dog_breeds = []
            for i in range(0,len(labels_pets)):
                # separo le razze in due liste in base all'iniziale del nome: maiuscolo per la razza di un gatto e
                # minuscolo per la razza di un cane
                if labels_pets[i][0].isupper():
                    cat_breeds.append(labels_pets[i])
                else:
                    dog_breeds.append(labels_pets[i])
            if (n_cat == 1 and n_dog == 0):  # nell'immagine c'e' un gatto ma non ci sono cani
                pet_string = " The cat breed is " + labels_pets[0]
            elif (n_cat == 0 and n_dog == 1):  # nell'immagine c'e' un cane ma non ci sono gatti
                pet_string = " The dog breed is " + labels_pets[0]
            elif (n_cat > 1 and n_dog == 0):  # nell'immagine ci sono piu' gatti, ma non ci sono cani
                if (len(labels_pets) == 1):  # ho trovato la razza di uno solo dei gatti
                    pet_string = " The breed of a cat is " + labels_pets[0]
                else:  # ho trovato la razza dei gatti
                    pet_string = " The cat breeds are "
                    for i in range(0, len(labels_pets)):
                        pet_string += labels_pets[i] + ", "
            elif (n_cat == 0 and n_dog > 1):  # nell'immagine ci sono piu' cani, ma non ci sono gatti
                if (len(labels_pets) == 1):  # ho trovato la razza di un solo cane
                    pet_string = " The breed of a dog is " + labels_pets[0]
                else:  # ho trovato la razza dei cani
                    pet_string = " The dog breeds are "
                    for i in range(0, len(labels_pets)):
                        pet_string += labels_pets[i] + ", "
            else: #nell'immagine ci sono sia cani che gatti
                if (len(labels_pets) == 1):  # se con il Pets Model ho trovato solo il nome di una razza
                    if not dog_breeds:  # se non ci sono razze di cani
                        if n_cat == 1:
                            pet_string = " The cat breed is " + labels_pets[0]
                        else:
                            pet_string = " The breed of a cat is " + labels_pets[0]
                    else:  # se non ci sono razze di gatti
                        if n_dog == 1:
                            pet_string = " The dog breed is " + labels_pets[0]
                        else:
                            pet_string = " The breed of a dog is " + labels_pets[0]
                else:  # ho trovato piu' razze
                    if not cat_breeds and n_dog > 1:  # se non ci sono razze di gatti e ci sono piu' cani nell'immagine
                        pet_string = " The dog breeds are "
                    elif not dog_breeds and n_cat > 1:  # se non ci sono razze di cani e ci sono piu' gatti
                        pet_string = " The cat breeds are "
                    else:  # se ci sono razze di cani e di gatti
                        pet_string = " The dog and cat breeds are "
                    for i in range(0, len(labels_pets)):
                        pet_string += labels_pets[i] + ", "
            pet_string = pet_string.rstrip(", ")
            if ("," in pet_string):  # sostituisco l'ultima virgola della stringa con "and"
                k = pet_string.rfind(",")
                pet_string = pet_string[:k] + " and" + pet_string[k + 1:]
            pet_string += "."
            string += pet_string
        result = [string]

        log_string = string + '\n'  # stringa da passare al file di log salvato in hdfs

        # consideramo anche l'oggetto con score piu' alto compreso tra 0.5 e 0.6 se non coincide con un oggetto
        # gia' considerato
        if(scores[j]<0.6 and scores[j]>0.5 and boxes[j] not in bbx_coco):
            class_name=str(category_index[int(classes[j])]['name'])
            bbx_coco.append(boxes[j])
            if(class_name in labels_coco):
                result.append("Is there also another " + class_name + "?")
                log_string += "Maybe there is also another " + class_name + ".\n"
            else:
                if class_name.startswith(vowels):
                    result.append("Is there also an " + class_name + "?")
                    log_string += "Maybe there is also an " + class_name + ".\n"
                else:
                    result.append("Is there also a " + class_name + "?")
                    log_string += "Maybe there is also a " + class_name + ".\n"

        log_string = add_bbx_log(log_string, bbx_coco, '{}_coco.jpg'.format(image_name))
        if(labels_people != []):
            log_string = add_bbx_log(log_string, bbx_people, '{}_people.jpg'.format(image_name))
        if (labels_pets != []):
            log_string = add_bbx_log(log_string, bbx_pets, '{}_pets.jpg'.format(image_name))

    # se la label con lo score piu' alto ha score compreso tra 0.2 e 0.6, inserisco in un vettore tutte le labels
    # con lo score compreso tra queste soglie
    elif (scores[0] < 0.6 and scores[0] > 0.2):
        # stringa da inserire nel file di log con le ipotesi su possibili oggetti presenti nell'immagine
        log_string = "I'm not sure what's in the picture.\nMaybe there is"
        for i in range(0, len(classes)):
            if (scores[i] < 0.6 and scores[i] > 0.2):
                class_name = str(category_index[int(classes[i])]['name'])
                if (class_name not in result):
                    result.append(class_name)
                    bbx_coco.append(boxes[i])
                    if class_name.startswith(vowels):
                        log_string += " an " + class_name + ","
                    else:
                        log_string += " a " + class_name + ","
        log_string = log_string.rstrip(", ")
        if ("," in log_string):  # sostituisco l'ultima virgola della stringa con "or"
            k = log_string.rfind(",")
            log_string = log_string[:k] + " or" + log_string[k + 1:]
        log_string += ".\n"
        log_string = add_bbx_log(log_string, bbx_coco, '{}_coco.jpg'.format(image_name))

    # se la label con lo score piu' alto ha uno score <= 0.2 passo al robot il vettore "result" nullo
    else:
        log_string = "I don't know what's in the picture!"

    if port_result == 0:  # se l'HDFS e' connesso, creo il file di log con la stringa
        with client_hdfs.write('/zora-object-detection/logs/{}.log'.format(image_name)) as writer:
            writer.write(log_string)

    return result
Example #17
0
from rpy2 import robjects
from git_clone import git_clone
from hdfs import InsecureClient
import shutil

# Next Round
print("Hello again")

# Check if there is data for a prediction
client_hdfs = InsecureClient('http://awscdh6-ma.sap.local:9870', user='******')
hdfs_content = client_hdfs.list('/tmp/tbr/BARMER/XSA')
print(hdfs_content)
print()

if len(hdfs_content) > 0 and hdfs_content[0] == 'iris.csv':

    print('Starte Prediction')

    #Herkunft des R-Scripts
    source_path = 'https://github.com/JimKnopfSun/BARMER_XSA.git'

    #Ziel des R-Scripts auf XSA
    target_path = '/usr/sap/HN2/home/testdir/'

    #Leere alte Script-Downloads im XSA
    shutil.rmtree(path=target_path + "/BARMER_XSA",
                  ignore_errors=True,
                  onerror=None)

    #Lade R-Script nach XSA
    git_clone(source_path, target_path)
Example #18
0
import  pandas as pd
from io import StringIO
from hdfs import InsecureClient , HdfsError
client = InsecureClient('http://datalake:50070')

def read_file(msg):
    
    attr = dict()
    api.send("outData",api.Message(attributes=attr, body= 'Param Loading Triggered'))
    sdl_file = '/shared/SLT/SFLIGHT/data.csv'
    with client.read(sdl_file , encoding='utf-8') as reader:
        df = pd.read_csv(reader)
        api.send("outData",api.Message(attributes=attr, body= str(df_key)))
        api.send("out2",str(df_key))

api.set_port_callback("input", read_file)
Example #19
0
#export feature class to hdfs

import arcpy
import pandas as pd
from hdfs import InsecureClient
import os

arcpy.env.workspace = r'C:\folder\data.gdb'

fc = r'Sample'

field_names = [f.name for f in arcpy.ListFields(fc)]

Vars = ['OBJECTID', 'Param1', 'Param2', 'Param3']
spatRef = arcpy.Describe(fc).spatialReference
data = arcpy.da.FeatureClassToNumPyArray(fc, ["SHAPE@X", "SHAPE@Y"] + Vars)

df = pd.DataFrame(data)

client_hdfs = InsecureClient('http://127.0.0.1:50070')

with client_hdfs.write('/user/root/folder/data.csv',
                       encoding='utf-8') as writer:
    df.to_csv(writer)
Example #20
0
import os
import time
from hdfs.client import Client
from hdfs import InsecureClient
# RootPath = 'D:\\aaa'
from hdfs3 import HDFileSystem
# hdfs = HDFileSystem("http://192.168.1.95", port=50070)
# print(hdfs.ls('/user/data'))

RootPath = '/home/qinyu/myproject/python_project/examination_pro/hdfscontroll/inventory'
HdfsRootPath = '/qjb'
LogPath = '/home/qinyu/myproject/python_project/examination_pro/hdfscontroll/log/'

# client = Client("http://192.168.1.95:50070", timeout=1000)
client = InsecureClient(url="http://192.168.1.95:50070", user="******")


def savelog(logPath, log):
    if not os.path.exists(logPath):
        os.makedirs(logPath)

    stamp = int(time.time())
    logName = time.strftime("%Y%m%d_%H%M", time.localtime(stamp)) + '.log'
    logFilePath = os.path.join(logPath, logName)
    file = open(logFilePath, 'w')
    file.write(log + '\n')
    file.close()


# for i in client.list('/'):
#     print(i)
Example #21
0
                        if args.json:
                            json_writer.writeJSONPSMSfromArchive(archivePath, jsonPath)
                    
                    else:
                        log.warning("No files downloaded!")
            
            except Exception as err:
                log.error("Exception {}".format(err)) 
                error = err

            if csvs:
                log.info("CSVs generated during execution!")
                hdfs_parent = os.path.join(os.sep, os.environ['HDFS_USER'], str(row.csv_generator_job_id))
                for csv in csvs:
                    hdfs_path = os.path.join(hdfs_parent, csv.split(os.sep)[-1])

                    # put csv into hdfs
                    #put = Popen(["hadoop", "fs", "-put", csv, hdfs_path], stdin=PIPE, bufsize=-1)
                    #put.communicate()

                    client = InsecureClient('{}:{}'.format(os.environ['HDFS_HOST'], os.environ['HDFS_PORT']), user=os.environ['HDFS_USER'])
                    client.upload(hdfs_path, csv)

                session.execute("UPDATE CSV_GENERATOR_JOBS SET STATUS = 1, CSV_HDFS_PATH={}, PRIDE_ID={}, JOB_RESULT_MESSAGE=\'{}\' WHERE CSV_GENERATOR_JOB_ID={} IF EXISTS;".format(hdfs_parent, projects, "success", str(row.csv_generator_job_id)))
            else:
                log.warning("No CSVs generated during execution!")
                session.execute("UPDATE CSV_GENERATOR_JOBS SET STATUS = -1, JOB_RESULT_MESSAGE=\'{}\' WHERE CSV_GENERATOR_JOB_ID={} IF EXISTS;".format(str(error), str(row.csv_generator_job_id)))

        log.info('Sleeping for {} seconds!'.format(os.environ['TIMEOUT']))
        time.sleep(int(os.environ['TIMEOUT']))
Example #22
0
import pyspark.sql.types as t
from hdfs import InsecureClient
from pyspark import SparkContext
from pyspark.sql import SparkSession

from schemas.omdb import schema_actors
from udfs import (
    general_awards_by_keyword,
    nominated_by_keyword,
    omdb_data,
    won_by_keyword,
)

os.environ["HADOOP_USER_NAME"] = HADOOP_USER_NAME

client_hdfs = InsecureClient(f"http://{HADOOP_NAMENODE}:50070",
                             user=HADOOP_USER_NAME)

# get preprocessed opusdata filename
hdfs_path = "/processed/opusdata.csv"

filename = [f for f in client_hdfs.list(hdfs_path) if f.endswith(".csv")][0]

sc = SparkContext(SPARK_URI)

parent_dir = os.path.dirname(os.path.abspath(__file__))
sc.addPyFile(os.path.join(parent_dir, "utils.py"))

sparkSession = (
    SparkSession.builder.appName("preprocessing-opusdata-and-omdb").config(
        "spark.hadoop.dfs.client.use.datanode.hostname", "true").getOrCreate())
Example #23
0
 def __init__(self, bucket_name: str, folder_name: str):
     super().__init__(bucket_name, folder_name)
     self.client = InsecureClient(url=settings.HDFS_CONN,
                                  user=settings.HDFS_USERNAME)
Example #24
0
def remove_in_hdfs(hdfs_path):
    client = InsecureClient('http://quickstart.cloudera:50070', user='******')
    client.delete(hdfs_path, recursive=True)
Example #25
0
from hdfs import InsecureClient

hdfs_cli = InsecureClient('http://192.168.1.4:9870', user='******')

hdfs_cli.delete('/images', recursive=True)
hdfs_cli.delete('/images_augmented', recursive=True)
hdfs_cli.delete('/images_crop', recursive=True)
hdfs_cli.delete('/images_norm', recursive=True)
hdfs_cli.delete('/image_test', recursive=True)
hdfs_cli.delete('/image_test_crop', recursive=True)
hdfs_cli.delete('/image_test_ready', recursive=True)
hdfs_cli.delete('/algo_trained', recursive=True)

Example #26
0
def get_hdfs_client():
    return InsecureClient("http://192.168.2.109:50070",
                          user="******",
                          root="/")
Example #27
0
 def __init__(self, namenode="localhost", port="50070"):
     self.WEB_HDFS_URL = 'http://' + namenode + ':' + str(port)
     print namenode, ">>", port, ">>", self.WEB_HDFS_URL
     self.client = InsecureClient(self.WEB_HDFS_URL)
Example #28
0
def hdfs_client():
    # TODO: Configure from env
    return InsecureClient('http://%s:50070' % master_ip(),
                          user=config.HDFS_USER)
Example #29
0
def get_hdfs_client():
    return InsecureClient(get_hdfs_address(), root='/')
Example #30
0
def stream(string, lines, t):
    """
	Stream tweets from twitter and save them to file every hour

	Args:
		lines - array of streaming words
		t - Twarc class

	Returns:
		boolean - True (OK) / False (Error)
	"""
    words = lines
    string = string

    hour_keywords = {}

    # make timestamps
    timestr = time.strftime("%Y-%m-%d_%H-%M-%S")
    datestr = time.strftime("%Y-%m-%d")

    # get total time for check time
    start_time = time.time()

    # create directories and files for keywords
    tweets_to_write = {}
    indexes = {}
    client = InsecureClient('http://192.168.1.12:50070', user='******')
    for word in words:
        dir_word = word.replace(" ", "_")

        # for statistics
        if not os.path.isdir("data/statistics"):
            os.makedirs("data/statistics")

        # for statistics date
        if not os.path.isdir("data/statistics/" + datestr):
            os.makedirs("data/statistics/" + datestr)

        # for keyword
        if not os.path.isdir("data/" + dir_word):
            os.makedirs("data/" + dir_word)

        # for date
        if not os.path.isdir("data/" + dir_word + "/" + datestr):
            os.makedirs("data/" + dir_word + "/" + datestr)

        # create json file for writing data
        with open("data/" + dir_word + "/" + datestr + "/" + timestr + ".json",
                  "w") as fw:
            fw.write("[")

        tweets_to_write[dir_word] = []
        indexes[dir_word] = 0

    minutes = 1
    while True:
        try:
            # find lines in twitter
            print "String query: %s" % string
            for tweet in t.stream(string):
                # regex to find keyword
                for word in words:
                    dir_word = word.replace(" ", "_")
                    filename = "data/" + dir_word + "/" + datestr + "/" + timestr
                    # create list of words in keyword
                    wlist = word.split()
                    # length of this list
                    w_length = len(wlist)
                    check = 0
                    # for every word in keyword
                    for w in wlist:
                        # check if word is in tweet
                        keyword = re.search("%s" % w, tweet["text"],
                                            re.IGNORECASE)
                        if keyword:
                            check += 1
                    # if every word from keyword is in tweet, save to file
                    if check == w_length:
                        print "Tweet language: %s" % tweet['lang']
                        if tweet['lang'] in languages:
                            dumped_json = json.dumps(tweet)
                            tweets_to_write[dir_word].append(dumped_json)
                            with open(filename + ".json", "a") as fw:

                                fw.write(dumped_json)
                                fw.write(",")

                    # counting total
                            if word in total_keywords:
                                total_keywords[word] += 1
                            else:
                                total_keywords[word] = 1
                    # counting hourly
                            if word in hour_keywords:
                                hour_keywords[word] += 1
                            else:
                                hour_keywords[word] = 1
                            if len(tweets_to_write[dir_word]) % 10 == 0:
                                print "Goint to write into %s_%s" % (
                                    filename, indexes[dir_word])
                                with client.write(filename + "_" +
                                                  str(indexes[dir_word]),
                                                  encoding='utf-8') as writer:
                                    writer.write("\n".join(tweets_to_write))
                                indexes[dir_word] = indexes[dir_word] + 1
                                tweets_to_write[dir_word] = []

                # exit every hour and start function again
                if start_time + 3600 < time.time():
                    for word in words:
                        dir_word = word.replace(" ", "_")
                        with open(
                                "data/" + dir_word + "/" + datestr + "/" +
                                timestr + ".json", "a+") as fw:
                            fw.seek(-1, os.SEEK_END)
                            if fw.read() == ",":
                                fw.seek(-1, os.SEEK_END)
                                fw.truncate()
                            fw.write("]")
                    # hour statistics
                    with open(
                            "data/statistics" + "/" + datestr + "/" + timestr +
                            ".txt", "w") as fw:
                        for word in hour_keywords:
                            fw.write(
                                str(word) + " : " + str(hour_keywords[word]) +
                                "\n")
                    # total statistics
                    with open("data/statistics/statistics.txt", "w") as fw:
                        for word in total_keywords:
                            fw.write(
                                str(word) + " : " + str(total_keywords[word]) +
                                "\n")
                    return True

        # except for quit application
        except KeyboardInterrupt:
            for word in words:
                dir_word = word.replace(" ", "_")
                with open(
                        "data/" + dir_word + "/" + datestr + "/" + timestr +
                        ".json", "a+") as fw:
                    fw.seek(-1, os.SEEK_END)
                    if fw.read() == ",":
                        fw.seek(-1, os.SEEK_END)
                        fw.truncate()
                    fw.write("]")
            # hour statistics
            with open(
                    "data/statistics" + "/" + datestr + "/" + timestr + ".txt",
                    "w") as fw:
                for word in hour_keywords:
                    fw.write(
                        str(word) + " : " + str(hour_keywords[word]) + "\n")
            # total statistics
            with open("data/statistics/statistics.txt", "w") as fw:
                for word in total_keywords:
                    fw.write(
                        str(word) + " : " + str(total_keywords[word]) + "\n")
            sys.stdout.write("QUIT\n")
            sys.exit(0)
        # except for problems with key
        except KeyError:
            # exit every hour and start function again
            if start_time + 3600 < time.time():
                for word in words:
                    dir_word = word.replace(" ", "_")
                    with open(
                            "data/" + dir_word + "/" + datestr + "/" +
                            timestr + ".json", "a+") as fw:
                        fw.seek(-1, os.SEEK_END)
                        if fw.read() == ",":
                            fw.seek(-1, os.SEEK_END)
                            fw.truncate()
                        fw.write("]")
                # hour statistics
                with open(
                        "data/statistics" + "/" + datestr + "/" + timestr +
                        ".txt", "w") as fw:
                    for word in hour_keywords:
                        fw.write(
                            str(word) + " : " + str(hour_keywords[word]) +
                            "\n")
                # total statistics
                with open("data/statistics/statistics.txt", "w") as fw:
                    for word in total_keywords:
                        fw.write(
                            str(word) + " : " + str(total_keywords[word]) +
                            "\n")
                return True
            continue
    # error
    return False