def cdf(): config = ConfigParser.RawConfigParser(allow_no_value=True) config.read('../config/config.cfg') hive_client = hive_utils.HiveClient(server=config.get('Hive', 'server'), port=config.get('Hive', 'port')) base_query = 'select count(*) as cnt from (select count(*) as num from checkin group by user_id) a where a.num > ' ax = [] ay = [] st = 5 tot_user = 2286501.0 while True: count_query = base_query + str(st) cnt_int = 0 for row in hive_client.execute(count_query): ax.append(st) cnt_per = float(row['cnt']) / tot_user #print type(row['cnt']) ay.append(cnt_per) print row['cnt'], cnt_per st = st + 50 if int(row['cnt']) <= 1: break print count_query fig, axi = plt.subplots() axi.set_yscale("log") axi.set_xscale("log") axi.set_ylim(0, 1e6) axi.set_ylim(1e-10, 1e0) axi.set_xlabel('Number of checkins per user') axi.set_ylabel('CCDF') plt.plot(ax, ay, 'o') plt.savefig('cdf_test.png')
def df(): config = ConfigParser.RawConfigParser(allow_no_value=True) config.read('../config/config.cfg') hive_client = hive_utils.HiveClient(server=config.get('Hive', 'server'), port=config.get('Hive', 'port')) #Select top n check-in countries top10_country = "select country,count(*) as count from checkintest group by country sort by count desc limit 10" top10_state_us = "select state,count(*) as count from checkintest where country='US' group by state sort by count desc limit 10" top10_city_us = "select city,count(*) as count from checkintest where country='US' group by city sort by count desc limit 10" country_label = [] country_cnt = [] for row in hive_client.execute(top10_country): country_label.append(row['country']) country_cnt.append(int(row['count'])) us_state_label = [] us_state_cnt = [] for row in hive_client.execute(top10_state_us): us_state_label.append(row['state']) us_state_cnt.append(int(row['count'])) us_city_label = [] us_city_cnt = [] for row in hive_client.execute(top10_city_us): us_city_label.append(row['city']) us_city_cnt.append(int(row['count'])) plot_bar_chart(country_cnt, country_label, 'Check-in Count', 'Top 10 Check-in countries', 'top10_country.png') plot_bar_chart(us_state_cnt, us_state_label, 'Check-in Count', 'Top 10 Check-in states in US', 'top10_state_us.png') plot_bar_chart(us_city_cnt, us_city_label, 'Check-in Count', 'Top 10 Check-in cities in US', 'top10_city_us.png')
LINES TERMINATED BY '\n' STORED AS TEXTFILE; """ query1 = """ LOAD DATA LOCAL INPATH '/home/ameni/PycharmProjects/Product_Service/productdb.csv' INTO TABLE products.catalog; """ query2 = """ SELECT * FROM products.catalog LIMIT 5; """ hive_client = hive_utils.HiveClient( server='0.0.0.0', port=10000, db='products' ) a = hive_client.execute(query) b = hive_client.execute(query1) c = hive_client.execute(query2) c = list(c) import pyhs2 with pyhs2.connect(host='localhost', port=10000, authMechanism="PLAIN", user='******', password='******', database='your_default_db') as conn: with conn.cursor() as cur:
import hive_utils from hive_service.ttypes import HiveServerException # get the input parameters if len(sys.argv) != 6: print 'Usage: python hiveserver1-client.py <hive_host> <hive_port> <db_name> <hadoop_user> <hadoop_password>' sys.exit() hiveHost = sys.argv[1] hivePort = sys.argv[2] dbName = sys.argv[3] hadoopUser = sys.argv[4] hadoopPassword = sys.argv[5] # do the connection client = hive_utils.HiveClient(server=hiveHost, port=hivePort, db=dbName) # create a loop attending HiveQL queries while (1): query = raw_input('remotehive> ') try: if not query: continue if query == 'exit': sys.exit() # execute the query for row in client.execute(query): print row