def test(): """ """ client = Client("192.168.99.100", 9000) for f in client.ls(['/files']): print f for line in client.cat([f.get('path')]): for l in line: print l
def metrics(): print "Recieved metrics request..." metric_prefix = "hdfs_directory_stats" metrics = {"the_number_one": "1"} c = Client("namenode", 8020) filepaths = map(lambda entry: entry['path'], c.ls([sys.argv[1]])) lines = reduce(lambda a, b: a + b, [1 for f in c.cat(filepaths) for _ in f]) metrics['lines_of_text_in_directory'] = lines template_kwargs = { 'metrics': metrics, 'dir': sys.argv[1], 'metric_prefix': metric_prefix } return Response(render_template("metrics", **template_kwargs), mimetype='text/plain')
class HdfsReader: """ HdfsReader class Connects to an hdfs endpoint (namenode) and checks argo profile files stored there Uses a specific base path for determining argo file destinations """ def __init__(self, namenode, port, base_path): """ Initialized HdfsReader which is used to check/read profile files from hdfs Args: namenode: str. hdfs namenode host port: int. hdfs namenode port base_path: str. base path to destination used for argo """ self.client = Client(namenode, port) self.base_path = base_path def gen_profile_path(self, tenant, report, profile_type): """ Generates a valid hdfs path to a specific profile Args: tenant: str. tenant to be used report: str. report to be used profile_type: str. profile_type (operations|reports|aggregations|thresholds) Returns: str: hdfs path """ templates = dict() templates.update({ 'operations': '{0}_ops.json', 'aggregations': '{0}_{1}_ap.json', 'reports': '{0}_{1}_cfg.json', 'thresholds': '{0}_{1}_thresholds.json', 'recomputations': 'recomp.json' }) sync_path = self.base_path.replace("{{tenant}}", tenant) filename = templates[profile_type].format(tenant, report) return os.path.join(sync_path, filename) def cat(self, tenant, report, profile_type): """ Returns the contents of a profile stored in hdfs Args: tenant: str. tenant name report: str. report name profile_type: str. profile type (operations|reports|aggregations|thresholds) Returns: """ path = self.gen_profile_path(tenant, report, profile_type) try: txt = self.client.cat([path]) j = json.loads(txt.next().next()) return j, True except FileNotFoundException: return None, False def rem(self, tenant, report, profile_type): """ Removes a profile file that already exists in hdfs (in order to be replaced) Args: tenant: str. tenant name report: str. report name profile_type: str. profile type (operations|reports|aggregations|thresholds) Returns: """ path = self.gen_profile_path(tenant, report, profile_type) try: self.client.delete([path]).next() return True except FileNotFoundException: return False
def main(opts, args): hadoop_host = HADOOP_HOST hadoop_user_dir = None if opts.hdfs: print("hdfs enter") if opts.host: hadoop_host = opts.host hadoop_user_dir = opts.hdfs uni_gram_cnt = 0 bi_gram_cnt = 0 tri_gram_cnt = 0 four_gram_cnt = 0 five_gram_cnt = 0 result_buffer = [] source_input = None if not hadoop_user_dir: if len(args) > 2: source_input = sys.argv[1] else: source_input = sys.stdin for line in source_input: result_buffer.append(line) items = line.split() items_cnt = len(items) if items_cnt == 3: # 1-grams uni_gram_cnt +=1 elif items_cnt == 4: #2-grams bi_gram_cnt += 1 elif items_cnt == 5: #3-grams tri_gram_cnt += 1 elif items_cnt == 6: four_gram_cnt += 1 elif items_cnt == 7: five_gram_cnt +=1 else: print "connect to haddoop" hadoop_client = Client(hadoop_host, 8020, use_trash=False) for g in hadoop_client.cat([os.path.join(hadoop_user_dir, "*.txt")]): for line in g: result_buffer.append(line) items = line.split() items_cnt = len(items) if items_cnt == 3: # 1-grams uni_gram_cnt +=1 elif items_cnt == 4: #2-grams bi_gram_cnt += 1 elif items_cnt == 5: #3-grams tri_gram_cnt += 1 elif items_cnt == 6: four_gram_cnt += 1 elif items_cnt == 7: five_gram_cnt +=1 print('\\data\\') if uni_gram_cnt != 0: print("ngram 1=%s" % uni_gram_cnt) if bi_gram_cnt != 0: print("ngram 2=%s" % bi_gram_cnt) if tri_gram_cnt != 0: print("ngram 3=%s" % tri_gram_cnt) if four_gram_cnt != 0: print("ngram 4=%s" % four_gram_cnt) if five_gram_cnt != 0: print("ngram 5=%s" % five_gram_cnt) result_iter = iter(result_buffer) print print_ngram(result_iter, 1, uni_gram_cnt) print print_ngram(result_iter, 2, bi_gram_cnt) print print_ngram(result_iter, 3, tri_gram_cnt) print print("\\end\\")
#!/usr/bin/env python from snakebite.client import Client import time import os host = '100.127.13.16' port = 8020 client = Client(host, port, use_trash=False) path = '/data/landing/mobileye/20180604T202528Z/BTYS7524024D1P9DGN/0197/20180406_Run2_ES8VB21_301127_Day_Rainy_CA_AEB_Collection_' def getInf(path): result = [] for x in client.ls([path]): result.append(x) ordered = sorted(result, key=lambda x: x['path']) for f in ordered: if f['file_type'] == 'd': yield os.path.join(f['path'], 'EMP.inf') for inf in getInf(path): print inf for content in client.cat([inf]): for line in content: print line
# hdfs_host='100.127.6.35' hdfs_host='100.127.13.16' # hdfs_port=9820 hdfs_port=8020 client = Client(host=hdfs_host, port= hdfs_port, use_trash=False, effective_user='******') if len(sys.argv) < 2: print 'inf_verification.py path' sys.exit(0) input_dir=sys.argv[1] input_files=[] for clip in client.ls([input_dir]): if clip['file_type'] == 'd': input_files.append(clip['path']) for folder in sorted(input_files): for inf in client.cat([getInf(folder)]): for content in inf: start=None end=None for aline in content.split('\n'): if aline.startswith('startTime'): start=aline.strip() elif aline.startswith('endTime'): end=aline.strip() print '{}\t{}\t{}'.format(os.path.basename(folder),start, end)
#!/usr/bin/env python from snakebite.client import Client import time import os host='100.127.13.16' port=8020 client = Client(host, port, use_trash=False) path='/data/landing/mobileye/20180604T202528Z/BTYS7524024D1P9DGN/0197/20180406_Run2_ES8VB21_301127_Day_Rainy_CA_AEB_Collection_' def getInf(path): result=[] for x in client.ls([path]): result.append(x) ordered=sorted(result, key=lambda x: x['path']) for f in ordered: if f['file_type'] == 'd': yield os.path.join(f['path'],'EMP.inf') for inf in getInf(path): print inf for content in client.cat([inf]): for line in content: print line