def retrieve_pie_date(start_time,finish_time,generate="unknow",groupby="unknow",**kwargs): '''start_time and end_time should be like this '2010-01-01' The available kwargs can be: "JobGroup","FinalMajorStatus","User","JobType","JobClass","ProcessingType", "UserGroup","FinalMinorStatus","Site"... ''' if((generate=="unknow") or (groupby=="unknow")): print "error" return -1 start_timestamp = int(time.mktime(time.strptime(start_time,"%Y-%m-%d"))) finsih_timestamp = int(time.mktime(time.strptime(finish_time,"%Y-%m-%d"))) if len(kwargs)==0: print "no kwargs" #return 0 groupby = groupby.lower() cf_name = "new_cum_groupby_%s"%groupby cf = pycassa.ColumnFamily(pool,cf_name) groupby_list = [] sum_generate_list = [] sum_generate = 0 start = time.time() for key,columns in cf.get_range(column_count=365000,column_start=(generate,start_time),column_finish=(generate,finish_time)): groupby_list.append(key) for value in columns.values(): sum_generate +=(value) sum_generate_list.append(sum_generate) sum_generate = 0 pairs = zip(groupby_list,sum_generate_list) else: print "has kwargs" #return 0 cf = pycassa.ColumnFamily(pool,'bucket_data_cli') expr_list = [] for key,value in eval_prefs(**kwargs).items(): kwarg_expr = pycassa.create_index_expression(key,value) expr_list.append(kwarg_expr) bucketLength_expr = pycassa.create_index_expression("bucketLength",604800) expr_list.append(bucketLength_expr) start_expr = pycassa.create_index_expression("startTime",start_timestamp,pycassa.GTE) expr_list.append(start_expr) end_expr = pycassa.create_index_expression("startTime",finsih_timestamp,pycassa.LTE) expr_list.append(end_expr) #print len(expr_list) #return 0 clause = pycassa.create_index_clause(expr_list,count=600000) data_dict = Counter() for key,columns in cf.get_indexed_slices(clause): data_dict[columns[groupby]] +=columns[generate] pairs = data_dict.items() return pairs
def generate_linegraph(start_time="",end_time="",generate="unknow",groupby="unknow",cumulative=False,**kwargs): '''generate:CPUTime,DiskSpace,ExecTime,InputSandBoxSize,OutPutSandBoxSize,JobCount groupby:site,user,processingtype,country,grid dafaule cumulative=Falese,if you want to generate a cumulative graph,set it True ''' if((generate=="unknow") or(groupby=="unknow")): print "error" return -1 start_timestamp = int(time.mktime(time.strptime(start_time,"%Y-%m-%d"))) end_timestamp = int(time.mktime(time.strptime(end_time,"%Y-%m-%d"))) if len(kwargs)==0: cf = pycassa.ColumnFamily(pool,'bucket_data_cli') expr_list = [] bucketLength_expr = pycassa.create_index_expression("bucketLength",604800) expr_list.append(bucketLength_expr) start_expr = pycassa.create_index_expression("startTime",start_timestamp,pycassa.GTE) expr_list.append(start_expr) end_expr = pycassa.create_index_expression("startTime",end_timestamp,pycassa.LTE) expr_list.append(end_expr) for key,value in eval_prefs(**kwargs).items(): kwarg_expr = pycassa.create_index_expression(key,value) expr_list.append(kwarg_expr) clause = pycassa.create_index_clause(expr_list,count=600000) start = time.time() data_dict = {} for key,columns in cf.get_indexed_slices(clause): data_dict.setdefault(columns[groupby],Counter()) data_dict[columns[groupby]][columns["startTime"]] += columns[generate] #print data_dict new_data_dict = {} for key,values in data_dict.items(): new_data_dict.setdefault(key,[]) new_data_dict[key] = sorted(data_dict[key].items(),key=lambda k:k[0]) #print new_data_dict #return 0 if cumulative: print "True" for key,values in new_data_dict.items(): cum_value = make_cumulative_data(zip(*values)[1]) new_data_dict[key] = zip(zip(*values)[0],cum_value) #print new_data_dict #return 0 groupby_list = [] time_list = [] value_list = [] fig = pylab.figure() ax = pylab.axes() zorder = 0.0 y_max = 0.0 y_max_list = [] for key,values in new_data_dict.items(): groupby_list.append(key) color = generate_color(str(key)) time_list,value_list = zip(*values) x_min = time_list[0] x_max = time_list[-1] y_tmp_max = max(value_list) if y_max<y_tmp_max: y_max = y_tmp_max y_max_list.append(y_max) pairs = [(x_min,0)]+values+[(x_max,0)] poly = pylab.Polygon(pairs,fill=True,facecolor=color,linewidth=.2,zorder=-y_max) ax.add_patch(poly) #zorder -=0.1 #stamp_start_time = int(time.mktime(time.strptime(start_time,'%Y-%m-%d'))) #stamp_end_time = int(time.mktime(time.strptime(end_time,'%Y-%m-%d'))) ax.set_xlim(start_timestamp,end_timestamp+1) ax.set_ylim(0,max(y_max_list)+1) ax.set_xticklabels([time.strftime('%y-%m-%d',time.localtime(x_time)) for x_time in ax.get_xticks()]) #set the legend fontP = FontProperties() #set legend size fontP.set_size('xx-small') ax.legend(groupby_list,loc=0,bbox_to_anchor=(1,1.05),prop = fontP) pylab.title('%s groupby %s from %s to %s'%(generate,groupby,start_time,end_time)) end = time.time() pylab.xlabel('Processing time is: %.5ss'%(end-start)) pylab.savefig('linegraph') imgData = cStringIO.StringIO() pylab.savefig(imgData, format='png') imgData.seek(0) pylab.close() return imgData else: print 'no kwargs' groupby = groupby.lower() cf_name = "new_cum_groupby_%s"%groupby cf = pycassa.ColumnFamily(pool,cf_name) groupby_list = [] time_list = [] value_list = [] y_max_list = [] fig = pylab.figure() ax = pylab.axes() start = time.time() zorder = 0.0 y_max = 0.0 for key,columns in cf.get_range(column_count=365000,column_start=(generate,start_time),column_finish=(generate,end_time)): #print key,columns #''' groupby_list.append(key) color = generate_color(str(key)) for name,value in columns.items(): #time_list.append(int(time.mktime(time.strptime(name[1],'%Y-%m-%d'))/86400)) time_list.append(int(time.mktime(time.strptime(name[1],'%Y-%m-%d')))) value_list.append(value) x_min = time_list[0] x_max = time_list[-1] if cumulative: value_list = make_cumulative_data(value_list) y_tmp_max = max(value_list) if y_max<y_tmp_max: y_max=y_tmp_max pairs = zip(time_list,value_list) pairs = [(x_min,0)]+pairs+[(x_max,0)] #print pairs time_list = [] #clean time_list value_list = [] #clean value_list poly = pylab.Polygon(pairs,fill=True,facecolor=color,linewidth=.2,zorder=zorder) ax.add_patch(poly) zorder -=0.1 ax.set_xlim(start_timestamp,end_timestamp+1) ax.set_ylim(0,y_max+1) ax.set_xticklabels([time.strftime('%y-%m-%d',time.localtime(int(day))) for day in ax.get_xticks()]) #set the legend fontP = FontProperties() fontP.set_size('xx-small') ax.legend(groupby_list,loc=0,bbox_to_anchor=(1,1.05),prop = fontP) pylab.title('%s groupby %s from %s to %s'%(generate,groupby,start_time,end_time)) end = time.time() pylab.xlabel('Processing time is: %.5ss'%(end-start)) #day_finish_time = int(time.mktime(time.strptime(finish_time,'%Y-%m-%d'))/86400) #print 'pricessing time is:%s'%(end-start) pylab.savefig('linegraph') imgData = cStringIO.StringIO() pylab.savefig(imgData, format='png') imgData.seek(0) pylab.close() return imgData