def update_stats(): if storage == 'hdfs': from snakebite import client hdfs = client.Client(hdfs_address, hdfs_port, use_trash=False) parts = [] for result in sorted([r['path'] for r in hdfs.ls([results_dir])]): if not hdfs.test(result + "/_SUCCESS", exists=True): continue if result in processed_results: continue processed_results.add(result) parts += sorted([r['path'] for r in hdfs.ls([result + "/part*"])]) if not parts: return for part in hdfs.text(parts): part_stats = part.split('\n') for stat in part_stats: if not stat: continue hashtag, count = stat.split(' ') hashtags[hashtag] = hashtags.get(hashtag, 0) + int(count) print "Processed data in: %s" % parts else: from cassandra.cluster import Cluster cluster = Cluster([cassandra_address]) session = cluster.connect(cassandra_keyspace) query = "select * from {}".format(cassandra_table) for row in session.execute(query): hashtags[row.hashtag] = row.count sorted_stats = to_jqcloud_format( sorted(hashtags.items(), key=lambda x: x[1], reverse=True)) max_top = min(len(hashtags), top_list_len) stats[ 'popularity'] = sorted_stats if top_list_len == 0 else sorted_stats[: max_top]
def create_client(): return client.Client("10.80.255.239", port=9000)
def create_client(): configs = HDFSConfig.get_external_config() namenodes = [] config = configs[0] namenode, port = config['namenode'], config['port'] return client.Client(namenode, port=port)
def createClient(self, namenode, port): p = 'namenode=' + str(namenode) + " port=" + str(port) print p return SBClient.Client(namenode, port=port)