def process_file(self): potiron_path = potiron.potiron_path lentwo = False ck = self.red.sismember('CK', 'YES') for v in self.fieldvalues: # if any field value is in format 'value-protocol' (or 'value-all'), it means we want to display each protocol separatly if len(v.split('-')) >= 2: lentwo = True # Using the format 'value-protocol' is not possible if combined keys are not deployed in the current redis database if lentwo and not ck: sys.stderr.write('Combined keys are not used in this redis dataset') sys.exit(1) if not self.outputdir.endswith('/'): self.outputdir = "{}/".format(self.outputdir) if not os.path.exists(self.outputdir): os.makedirs(self.outputdir) # Definition of the protocols currently present in our dataset protocols = self.red.smembers('PROTOCOLS') # Define the strings used for legends, titles, etc. concerning fields field_string, field_in_file_name = field2string(self.field, potiron_path) field_data = create_dict(self.field, potiron_path) all_proto = False for fv in self.fieldvalues: v = fv.split('-') # If we want to display the values for all the procotols, we display the sum of all of them as well if len(v) >= 2 and (v[1] == '*' or v[1] == 'all'): all_proto = True self.fieldvalues.append(v[0]) # Creation of the figure and the tools used on it namefile=self.output_name(field_in_file_name,lentwo, all_proto) # As displaying values for all the protocols may generate a lot of lines in the plot, # We help users showing them the protocol when they have there cursor in the line if all_proto: hover = HoverTool(tooltips = [('count','@y'),('protocol','@prot')]) else: hover = HoverTool(tooltips = [('count','@y')]) taptool = TapTool() TOOLS = [hover,PanTool(),BoxZoomTool(),WheelZoomTool(), taptool, SaveTool(), ResetTool()] p = figure(width=self.plot_width,height=self.plot_height,tools=TOOLS) # Definition of some variables which will be used and modified with the iterations at_least_one = False days = calendar.monthrange(int(self.date[0:4]),int(self.date[4:6]))[1] maxVal = 0 minVal = sys.maxsize maxDay = 0 vlength = len(self.fieldvalues) actual_values = [] nbLine = 0 # day_string = "@x" for v in range(vlength): # For each selected field or occurrence value = self.fieldvalues[v].split('-') actual_field = value[0] if len(value) >= 2: # If we specified a or all protocol(s) protocol = value[1] if protocol == "*" or protocol == "all": for prot in protocols: score=[] dayValue=[] proto = prot.decode() exists = False keys = self.red.keys("{}:{}:{}*:{}".format(self.source,proto,self.date,self.field)) for k in sorted(keys): redisKey = k.decode() day = redisKey.split(':')[2][-2:] countValue = self.red.zscore(redisKey, actual_field) if countValue is not None: exists = True score.append(countValue) else: score.append(0) dayValue.append(day) if exists: at_least_one = True # We define the color of the line, draw it color = palette[nbLine%10] protos = [] for x in dayValue: protos.append("{}_{}".format(x, proto)) prots = [proto] * len(score) sourceplot = ColumnDataSource(data=dict( x = dayValue, y = score, protocol = protos, prot = prots )) leg = def_legend(actual_field, proto, self.field, field_string, field_data) p.line(x='x',y='y',legend=leg,line_color=color,line_width=2,source=sourceplot) c = p.scatter(x='x',y='y',legend=leg,size=10,color=color,alpha=0.1,source=sourceplot) taptool.renderers.append(c) # In order to have the interaction on click nbLine += 1 maxScore = max(score) # Update the min and max scores scaling if maxVal < maxScore: # in order to define the lower and upper maxVal = maxScore # limits for the graph minScore = min(score) if minVal > minScore: minVal = minScore # Definition of the last day for which there is data to display if int(dayValue[-1]) > maxDay: maxDay = int(dayValue[-1]) actual_value = "{}-{}".format(actual_field, protocol) actual_values.append(actual_value) else: score=[] dayValue=[] exists = False keys = self.red.keys("{}:{}:{}*:{}".format(self.source,protocol,self.date,self.field)) for k in sorted(keys): redisKey = k.decode() day = redisKey.split(':')[2][-2:] countValue = self.red.zscore(redisKey, actual_field) if countValue is not None: exists = True score.append(countValue) else: score.append(0) dayValue.append(day) if exists: # If at least one occurrence for the current value of field has been found at_least_one = True # We define the color of the line, draw it color = palette[nbLine%10] leg = def_legend(actual_field, protocol, self.field, field_string, field_data) p.line(x=dayValue,y=score,legend=leg,line_color=color,line_width=2) c = p.scatter(x=dayValue,y=score,legend=leg,size=10,color=color,alpha=0.1) taptool.renderers.append(c) # In order to have the interaction on click nbLine += 1 maxScore = max(score) # Update the min and max scores scaling if maxVal < maxScore: # in order to define the lower and upper maxVal = maxScore # limits for the graph minScore = min(score) if minVal > minScore: minVal = minScore # Definition of the last day for which there is data to display if int(dayValue[-1]) > maxDay: maxDay = int(dayValue[-1]) actual_value = "{}-{}".format(actual_field, protocol) actual_values.append(actual_value) else: # on the other case, we don't split informations for each protocol score=[] dayValue=[] exists = False # If combined keys are used, we must by the way take data from all the keys (i.e for each protocol) if ck: for d in range(1,days+1): # For each day with data stored in redis exists_day = False day = format(d, '02d') countValue = 0 keys = self.red.keys("{}:*:{}{}:{}".format(self.source,self.date,day,self.field)) for k in keys: redisKey = k.decode() tmpscore = self.red.zscore(redisKey, actual_field) countValue += tmpscore if tmpscore is not None else 0 exists_day = True if exists_day: if countValue > 0: exists = True score.append(countValue) dayValue.append(day) else: # When combined keys are not used, we only need to read the scores for each day keys = self.red.keys("{}:{}*:{}".format(self.source,self.date,self.field)) for k in sorted(keys): redisKey = k.decode() day = redisKey.split(':')[2][-2:] countValue = self.red.zscore(redisKey, actual_field) if countValue is not None: exists = True score.append(countValue) else: score.append(0) dayValue.append(day) if exists: # If at least one occurrence for the current value of field has been found at_least_one = True # We define the color of the line, draw it color = palette[nbLine%10] leg = def_legend(actual_field, None, self.field, field_string, field_data) if all_proto: protos = [] for x in dayValue: protos.append(x) prots = ['all protocols'] * len(score) sourceplot = ColumnDataSource(data=dict( x = dayValue, y = score, protocol = protos, prot = prots )) p.line(x='x',y='y',legend=leg,line_color=color,line_width=2,source=sourceplot) c = p.scatter(x='x',y='y',legend=leg,size=10,color=color,alpha=0.1,source=sourceplot) else: p.line(x=dayValue,y=score,legend=leg,line_color=color,line_width=2) c = p.scatter(x=dayValue,y=score,legend=leg,size=10,color=color,alpha=0.1) taptool.renderers.append(c) # In order to have the interaction on click nbLine += 1 maxScore = max(score) # Update the min and max scores scaling if maxVal < maxScore: # in order to define the lower and upper maxVal = maxScore # limits for the graph minScore = min(score) if minVal > minScore: minVal = minScore # Definition of the last day for which there is data to display if int(dayValue[-1]) > maxDay: maxDay = int(dayValue[-1]) actual_value = "{}".format(actual_field) actual_values.append(actual_value) if at_least_one: # If at least one value has been found in redis with our selection if lentwo: # Defines the name of the files to call with a click on a point in the plot taptool.callback = OpenURL(url="{}_{}_with-protocols_{}-{}[email protected]".format(self.source, field_in_file_name,self.date[0:4],self.date[4:6])) else: taptool.callback = OpenURL(url="{}_{}_{}-{}[email protected]".format(self.source, field_in_file_name,self.date[0:4],self.date[4:6])) output_file("{}.html".format(namefile), title=namefile.split("/")[-1]) # Definition of some parameters of the graph fieldvalues_string = plot_annotation(self.field, potiron_path, actual_values, field_string, field_data) p.title.text = "Number of {} {}seen each day in {} {}".format(field_string, fieldvalues_string, potiron.year[self.date[4:6]], self.date[0:4]) p.yaxis[0].formatter = BasicTickFormatter(use_scientific=False) p.xaxis.axis_label = "Days" p.yaxis.axis_label = "Count" p.legend.location = "top_left" p.legend.click_policy = "hide" # Definition of some parameters for the logo with Image.open(self.logofile) as im : im_width, im_height = im.size xdr = maxDay + 1 upper_space = 10 if nbLine > 2: upper_space *= (nbLine / 2) ydrmax = maxVal + maxVal * upper_space / 100 ydrmin = minVal - maxVal * 5 / 100 p.x_range = Range1d(0,xdr) p.y_range = Range1d(ydrmin,ydrmax) height = (ydrmax - ydrmin) / self.logo_y_scale width = xdr / ((self.logo_y_scale * im_height * self.plot_width) / (im_width * self.plot_height)) p.image_url(url=[self.logofile],x=[xdr],y=[ydrmax-ydrmax*2/100],w=[width],h=[height],anchor="top_right") # Process the graph save(p) if self.links: export_csv = export_csv_all_days_per_month.Export_Csv(self.red, self.source, self.date, self.field, 10, ['-1'], self.outputdir, True, True, self.logofile, ck, lentwo) ck = True if red.sismember('CK', 'YES') else False export_csv.process_all_files() else: print ("There is no such value for a {} you specified: {}".format(field_string,self.fieldvalues))
def process_file(self): potiron_path = potiron.potiron_path lentwo = False ck = self.red.sismember('CK', 'YES') for v in self.fieldvalues: # if any field value is in format 'value-protocol' (or 'value-all'), it means we want to display each protocol separatly if len(v.split('-')) >= 2: lentwo = True # Using the format 'value-protocol' is not possible if combined keys are not deployed in the current redis database if lentwo and not ck: sys.stderr.write( 'Combined keys are not used in this redis dataset') sys.exit(1) if not self.outputdir.endswith('/'): self.outputdir = "{}/".format(self.outputdir) if not os.path.exists(self.outputdir): os.makedirs(self.outputdir) # Definition of the protocols currently present in our dataset protocols = self.red.smembers('PROTOCOLS') # Define the strings used for legends, titles, etc. concerning fields field_string, field_in_file_name = field2string( self.field, potiron_path) field_data = create_dict(self.field, potiron_path) all_proto = False for fv in self.fieldvalues: v = fv.split('-') # If we want to display the values for all the procotols, we display the sum of all of them as well if len(v) >= 2 and (v[1] == '*' or v[1] == 'all'): all_proto = True self.fieldvalues.append(v[0]) # Creation of the figure and the tools used on it namefile = self.output_name(field_in_file_name, lentwo, all_proto) # As displaying values for all the protocols may generate a lot of lines in the plot, # We help users showing them the protocol when they have there cursor in the line if all_proto: hover = HoverTool(tooltips=[('count', '@y'), ('protocol', '@prot')]) else: hover = HoverTool(tooltips=[('count', '@y')]) taptool = TapTool() TOOLS = [ hover, PanTool(), BoxZoomTool(), WheelZoomTool(), taptool, SaveTool(), ResetTool() ] p = figure(width=self.plot_width, height=self.plot_height, tools=TOOLS) # Definition of some variables which will be used and modified with the iterations at_least_one = False days = calendar.monthrange(int(self.date[0:4]), int(self.date[4:6]))[1] maxVal = 0 minVal = sys.maxsize maxDay = 0 vlength = len(self.fieldvalues) actual_values = [] nbLine = 0 # day_string = "@x" for v in range(vlength): # For each selected field or occurrence value = self.fieldvalues[v].split('-') actual_field = value[0] if len(value) >= 2: # If we specified a or all protocol(s) protocol = value[1] if protocol == "*" or protocol == "all": for prot in protocols: score = [] dayValue = [] proto = prot.decode() exists = False keys = self.red.keys("{}:{}:{}*:{}".format( self.source, proto, self.date, self.field)) for k in sorted(keys): redisKey = k.decode() day = redisKey.split(':')[2][-2:] countValue = self.red.zscore( redisKey, actual_field) if countValue is not None: exists = True score.append(countValue) else: score.append(0) dayValue.append(day) if exists: at_least_one = True # We define the color of the line, draw it color = palette[nbLine % 10] protos = [] for x in dayValue: protos.append("{}_{}".format(x, proto)) prots = [proto] * len(score) sourceplot = ColumnDataSource( data=dict(x=dayValue, y=score, protocol=protos, prot=prots)) leg = def_legend(actual_field, proto, self.field, field_string, field_data) p.line(x='x', y='y', legend=leg, line_color=color, line_width=2, source=sourceplot) c = p.scatter(x='x', y='y', legend=leg, size=10, color=color, alpha=0.1, source=sourceplot) taptool.renderers.append( c) # In order to have the interaction on click nbLine += 1 maxScore = max( score) # Update the min and max scores scaling if maxVal < maxScore: # in order to define the lower and upper maxVal = maxScore # limits for the graph minScore = min(score) if minVal > minScore: minVal = minScore # Definition of the last day for which there is data to display if int(dayValue[-1]) > maxDay: maxDay = int(dayValue[-1]) actual_value = "{}-{}".format( actual_field, protocol) actual_values.append(actual_value) else: score = [] dayValue = [] exists = False keys = self.red.keys("{}:{}:{}*:{}".format( self.source, protocol, self.date, self.field)) for k in sorted(keys): redisKey = k.decode() day = redisKey.split(':')[2][-2:] countValue = self.red.zscore(redisKey, actual_field) if countValue is not None: exists = True score.append(countValue) else: score.append(0) dayValue.append(day) if exists: # If at least one occurrence for the current value of field has been found at_least_one = True # We define the color of the line, draw it color = palette[nbLine % 10] leg = def_legend(actual_field, protocol, self.field, field_string, field_data) p.line(x=dayValue, y=score, legend=leg, line_color=color, line_width=2) c = p.scatter(x=dayValue, y=score, legend=leg, size=10, color=color, alpha=0.1) taptool.renderers.append( c) # In order to have the interaction on click nbLine += 1 maxScore = max( score) # Update the min and max scores scaling if maxVal < maxScore: # in order to define the lower and upper maxVal = maxScore # limits for the graph minScore = min(score) if minVal > minScore: minVal = minScore # Definition of the last day for which there is data to display if int(dayValue[-1]) > maxDay: maxDay = int(dayValue[-1]) actual_value = "{}-{}".format(actual_field, protocol) actual_values.append(actual_value) else: # on the other case, we don't split informations for each protocol score = [] dayValue = [] exists = False # If combined keys are used, we must by the way take data from all the keys (i.e for each protocol) if ck: for d in range( 1, days + 1): # For each day with data stored in redis exists_day = False day = format(d, '02d') countValue = 0 keys = self.red.keys("{}:*:{}{}:{}".format( self.source, self.date, day, self.field)) for k in keys: redisKey = k.decode() tmpscore = self.red.zscore(redisKey, actual_field) countValue += tmpscore if tmpscore is not None else 0 exists_day = True if exists_day: if countValue > 0: exists = True score.append(countValue) dayValue.append(day) else: # When combined keys are not used, we only need to read the scores for each day keys = self.red.keys("{}:{}*:{}".format( self.source, self.date, self.field)) for k in sorted(keys): redisKey = k.decode() day = redisKey.split(':')[2][-2:] countValue = self.red.zscore(redisKey, actual_field) if countValue is not None: exists = True score.append(countValue) else: score.append(0) dayValue.append(day) if exists: # If at least one occurrence for the current value of field has been found at_least_one = True # We define the color of the line, draw it color = palette[nbLine % 10] leg = def_legend(actual_field, None, self.field, field_string, field_data) if all_proto: protos = [] for x in dayValue: protos.append(x) prots = ['all protocols'] * len(score) sourceplot = ColumnDataSource(data=dict( x=dayValue, y=score, protocol=protos, prot=prots)) p.line(x='x', y='y', legend=leg, line_color=color, line_width=2, source=sourceplot) c = p.scatter(x='x', y='y', legend=leg, size=10, color=color, alpha=0.1, source=sourceplot) else: p.line(x=dayValue, y=score, legend=leg, line_color=color, line_width=2) c = p.scatter(x=dayValue, y=score, legend=leg, size=10, color=color, alpha=0.1) taptool.renderers.append( c) # In order to have the interaction on click nbLine += 1 maxScore = max( score) # Update the min and max scores scaling if maxVal < maxScore: # in order to define the lower and upper maxVal = maxScore # limits for the graph minScore = min(score) if minVal > minScore: minVal = minScore # Definition of the last day for which there is data to display if int(dayValue[-1]) > maxDay: maxDay = int(dayValue[-1]) actual_value = "{}".format(actual_field) actual_values.append(actual_value) if at_least_one: # If at least one value has been found in redis with our selection if lentwo: # Defines the name of the files to call with a click on a point in the plot taptool.callback = OpenURL( url="{}_{}_with-protocols_{}-{}[email protected]".format( self.source, field_in_file_name, self.date[0:4], self.date[4:6])) else: taptool.callback = OpenURL(url="{}_{}_{}-{}[email protected]".format( self.source, field_in_file_name, self.date[0:4], self.date[4:6])) output_file("{}.html".format(namefile), title=namefile.split("/")[-1]) # Definition of some parameters of the graph fieldvalues_string = plot_annotation(self.field, potiron_path, actual_values, field_string, field_data) p.title.text = "Number of {} {}seen each day in {} {}".format( field_string, fieldvalues_string, potiron.year[self.date[4:6]], self.date[0:4]) p.yaxis[0].formatter = BasicTickFormatter(use_scientific=False) p.xaxis.axis_label = "Days" p.yaxis.axis_label = "Count" p.legend.location = "top_left" p.legend.click_policy = "hide" # Definition of some parameters for the logo with Image.open(self.logofile) as im: im_width, im_height = im.size xdr = maxDay + 1 upper_space = 10 if nbLine > 2: upper_space *= (nbLine / 2) ydrmax = maxVal + maxVal * upper_space / 100 ydrmin = minVal - maxVal * 5 / 100 p.x_range = Range1d(0, xdr) p.y_range = Range1d(ydrmin, ydrmax) height = (ydrmax - ydrmin) / self.logo_y_scale width = xdr / ((self.logo_y_scale * im_height * self.plot_width) / (im_width * self.plot_height)) p.image_url(url=[self.logofile], x=[xdr], y=[ydrmax - ydrmax * 2 / 100], w=[width], h=[height], anchor="top_right") # Process the graph save(p) if self.links: export_csv = export_csv_all_days_per_month.Export_Csv( self.red, self.source, self.date, self.field, 10, ['-1'], self.outputdir, True, True, self.logofile, ck, lentwo) ck = True if red.sismember('CK', 'YES') else False export_csv.process_all_files() else: print("There is no such value for a {} you specified: {}".format( field_string, self.fieldvalues))
else: outputdir = args.outputdir[0] if not outputdir.endswith('/'): outputdir = "{}/".format(outputdir) if not os.path.exists(outputdir): os.makedirs(outputdir) if args.unix is None: sys.stderr.write('A Unix socket must be specified.\n') sys.exit(1) usocket = args.unix[0] r = redis.Redis(unix_socket_path=usocket) potiron_path = os.path.dirname(os.path.realpath(__file__))[:-3] field_string, field_in_file_name = field2string(field, potiron_path) ck = r.sismember("CK", "YES") protocols = r.smembers("PROTOCOLS") days = calendar.monthrange(int(date[0:4]), int(date[4:6]))[1] outputname = output_name(source,field_in_file_name,date,outputdir) if not os.path.exists(outputdir): os.makedirs(outputdir) f = open("{}.csv".format(outputname), 'w') days_string = "{},".format(field_in_file_name) for day in range(1,days+1): d = format(day, '02d') days_string += "{}-{},".format(month,d) f.write("{}\n".format(days_string[:-1])) val = {}
def process_all_files(self): current_path = potiron.current_path # Module directory potiron_path = potiron.potiron_path # Project directory # Definition of the strings containing the informations of the field, used in the legend and the file name field_string, field_in_file_name = field2string(self.field, potiron_path) if self.links: bokeh = bokeh_month.Bokeh_Month(self.red, self.source, self.field, self.date, [], self.outputdir, self.logofile, False) days = calendar.monthrange(int(self.date[0:4]),int(self.date[4:6]))[1] for d in range(1,days+1): # For each day of the month namefile_data, namefile_date = self.output_name(field_in_file_name,format(d, '02d')) day = format(d, '02d') keys = self.red.keys("{}:*{}{}:{}".format(self.source,self.date,day,self.field)) # While call from bokeh module, lentwo means that a value has the format 'value-protocol' # here, it comes from the '-p' (= '--without-protocol') parameter (USING THE PARAMETER SET THE VARIABLE TO FALSE) # on both cases, True means separate the protocols, and False means take the complete scores with all the protocols together if self.lentwo: score = {} for k in keys: redisKey = k.decode() protocol = redisKey.split(':')[1] namefile = "{}_with-protocols_{}_{}".format(namefile_data, namefile_date, protocol) val = self.process_file(redisKey, namefile, protocol, field_string) # we create and process the output datafile self.process_score(redisKey, score) # update the complete scores if self.links: for v in val: # for each bubble in the chart, we create the bokeh plot corresponding to the value self.generate_links('{}-all-protocols'.format(v), namefile, bokeh) # the complete scores with protocols together are processed and the result in written in another datafile general_namefile = "{}_with-protocols_{}".format(namefile_data, namefile_date) res = self.process_general_file(score, general_namefile, field_string) if self.links: for v in res: # for each bubble in the chart, we create the bokeh plot corresponding to the value self.generate_links('{}-all-protocols'.format(v), namefile, bokeh) else: # On the other case, we want to have the complete score for all the protocols together if self.ck: # if combined keys are used anyway score = {} for k in keys: # we take the scores for each protocol redisKey = k.decode() self.process_score(redisKey, score) # the scores of each protocol are added together and wirtten in one unique datafile general_namefile = "{}_{}".format(namefile_data, namefile_date) res = self.process_general_file(score, general_namefile, field_string) if self.links: for v in res: # for each bubble in the chart, we create the bokeh plot corresponding to the value self.generate_links(v, general_namefile, bokeh) else: # no combined keys # here is the basic case where each score comes from one key, and there is one key per day redisKey = k.decode() namefile = "{}_{}".format(namefile_data, namefile_date) val = self.process_file(redisKey, namefile, None, field_string) if self.links: for v in val: # for each bubble in the chart, we create the bokeh plot corresponding to the value self.generate_links(v, namefile, bokeh) if self.gen: # Generate all the html files to display the charts, from the datafiles, following the template name_string = '##NAME##' logo_string = '##LOGO##' with open('{}/template.html'.format(current_path), 'r') as i: t = i.readlines() for file in os.listdir(self.outputdir): if file.endswith('.csv'): with open('{}{}.html'.format(self.outputdir,file[:-4]), 'w') as o: for l in t: if name_string in l: l = l.replace(name_string, file[:-4]) if logo_string in l: l = l.replace(logo_string, self.logofile) o.write(l)