def evaluation(input_folder, output_file): analyzed_files = (Path(input_folder)).listdir('*analyzed.csv') REL = len(analyzed_files) REC = 0 RR = 0 ARI = 0 BRI = 0 for f_path in analyzed_files: topic_id = f_path.basename()[3:-13] topic_row = get_topic_info(topic_id) detection_time = dt_parser(str(topic_row[1])) RR_temp = 0 for i, wind_time, count, eta, trend_analysis in pandas.read_csv( f_path, sep=',', header=None).itertuples(): if trend_analysis == 1: REC += 1 current_time = dt_parser(wind_time) if current_time.strftime( CORPUS_DATE_FORMAT) == detection_time.strftime( CORPUS_DATE_FORMAT): RR_temp = 1 elif current_time < detection_time: BRI += 1 else: ARI += 1 RR += RR_temp NR = REL - RR if not Path(output_file).exists(): header = 'k,folder,rel,rec,rr,nr,bri,ari,recobrado,precision\n' with open(output_file, 'x') as out: out.write(header) with open(output_file, 'a') as out: csv_output = f'{Path(input_folder).basename()},{input_folder},{REL},{REC},{RR},{NR},{BRI},{ARI},{RR/float(REL):1.3f},{RR/float(RR + BRI):1.3f}\n' out.write(csv_output)
def analyze(generator, model): """ This function acts on CSV data for a single counter. It loops over the items generated by the first argument. Each item is expected to be a tuple of: [interval_start_time] [interval_duration_in_sec] [interval_count] Each count is used to update the model, and the model result is added to the return list. """ logger = logging.getLogger("analyze") if logger.handlers == []: fmtr = logging.Formatter( '%(asctime)s %(name)s:%(lineno)s - %(levelname)s - %(message)s') hndlr = logging.StreamHandler() hndlr.setFormatter(fmtr) logger.addHandler(hndlr) output_data = [] for line in generator: try: time_interval_start = dt_parser(line[0]) except ValueError: print(line[0]) sys.exit() time_interval_duration = line[1] count = float(line[2]) model.update(count=count, interval_start_time=time_interval_start) # result = float(model.get_result()) trend, result = model.get_result() result = float(result) # trim digits in outputs if count > 0: trimmed_count = round(count, -int(floor(log10(count))) + 1) else: trimmed_count = 0 if result > 0: trimmed_result = round(result, -int(floor(log10(result))) + 1) else: trimmed_result = 0 # output_data.append( (str(time_interval_start), count, trimmed_result) ) output_data.append( (str(time_interval_start), count, trimmed_result, trend)) logger.debug("{0} {1:>8} {2}".format(time_interval_start, trimmed_count, trimmed_result)) return output_data
def check_date_range(args, img_time): """Check image time versus included date range :param args: (object) argparse object. :param img_time: date-time string :return: boolean """ # Convert image datetime to unix time timestamp = dt_parser(img_time) time_delta = timestamp - datetime.datetime(1970, 1, 1) unix_time = (time_delta.days * 24 * 3600) + time_delta.seconds # Does the image date-time fall outside or inside the included range if unix_time < args.start_date or unix_time > args.end_date: return False else: return True
def check_date_range(args, img_time): """Check image time versus included date range :param args: (object) argparse object. :param img_time: date-time string :return: boolean """ # Convert image datetime to unix time timestamp = dt_parser(img_time) time_delta = timestamp - datetime.datetime(1970, 1, 1) unix_time = (time_delta.days * 24 * 3600) + time_delta.seconds # Does the image date-time fall outside or inside the included range if unix_time < args.start_date or unix_time > args.end_date: return False else: return True
def _check_date_range(start_date, end_date, img_time): """Check image time versus included date range. Args: start_date: Start date in Unix time end_date: End date in Unix time img_time: Image datetime :param start_date: int :param end_date: int :param img_time: str :return: bool """ # Convert image datetime to unix time timestamp = dt_parser(img_time) time_delta = timestamp - datetime.datetime(1970, 1, 1) unix_time = (time_delta.days * 24 * 3600) + time_delta.seconds # Does the image date-time fall outside or inside the included range if unix_time < start_date or unix_time > end_date: return False else: return True
def _check_date_range(start_date, end_date, img_time): """Check image time versus included date range. Args: start_date: Start date in Unix time end_date: End date in Unix time img_time: Image datetime :param start_date: int :param end_date: int :param img_time: str :return: bool """ # Convert image datetime to unix time timestamp = dt_parser(img_time.replace("-", "")) time_delta = timestamp - datetime.datetime(1970, 1, 1) unix_time = (time_delta.days * 24 * 3600) + time_delta.seconds # Does the image date-time fall outside or inside the included range if unix_time < start_date or unix_time > end_date: return False else: return True
def process_results(args): """ Get results from individual files. Parse the results and recompile for SQLite. Args: args: (object) argparse object. Returns: Raises: """ # Add a header to each output file # Metadata table metadata_fields = ["image_id", "run_id"] metadata_fields.extend(args.valid_meta.keys()) # args.metadata_file.write('#' + '\t'.join(map(str, metadata_fields)) + '\n') # Feature data table feature_fields = [ "area", "hull-area", "solidity", "perimeter", "width", "height", "longest_axis", "center-of-mass-x", "center-of-mass-y", "hull_vertices", "in_bounds", "ellipse_center_x", "ellipse_center_y", "ellipse_major_axis", "ellipse_minor_axis", "ellipse_angle", "ellipse_eccentricity", ] opt_feature_fields = [ "y-position", "height_above_bound", "height_below_bound", "above_bound_area", "percent_above_bound_area", "below_bound_area", "percent_below_bound_area", ] # args.features_file.write('#' + '\t'.join(map(str, feature_fields + opt_feature_fields)) + '\n') # Signal channel data table signal_fields = ["bin-number", "channel_name", "values"] # bin-number blue green red lightness green-magenta blue-yellow hue saturation value # Initialize the database with the schema template if create is true if args.create: # Create SQL structure based on accepted metadata and features args.sq.execute( "CREATE TABLE IF NOT EXISTS `runinfo` (`run_id` INTEGER PRIMARY KEY, `datetime` INTEGER NOT NULL, `command` TEXT NOT NULL);" ) args.sq.execute( "CREATE TABLE IF NOT EXISTS `metadata` (`image_id` INTEGER PRIMARY KEY, `run_id` INTEGER NOT NULL, `" + "` TEXT NOT NULL, `".join(map(str, metadata_fields[2:])) + "` TEXT NOT NULL);" ) args.sq.execute( "CREATE TABLE IF NOT EXISTS `features` (`image_id` INTEGER PRIMARY KEY, `" + "` TEXT NOT NULL, `".join(map(str, feature_fields + opt_feature_fields)) + "` TEXT NOT NULL);" ) args.sq.execute( "CREATE TABLE IF NOT EXISTS `analysis_images` (`image_id` INTEGER NOT NULL, `type` TEXT NOT NULL, `image_path` TEXT NOT NULL);" ) args.sq.execute( "CREATE TABLE IF NOT EXISTS `signal` (`image_id` INTEGER NOT NULL, `" + "` TEXT NOT NULL, `".join(map(str, signal_fields)) + "` TEXT NOT NULL);" ) # Walk through the image processing job directory and process data from each file for (dirpath, dirnames, filenames) in os.walk(args.jobdir): for filename in filenames: meta = {} images = {} features = [] feature_data = {} signal = [] signal_data = {} boundary = [] boundary_data = {} # Open results file with open(dirpath + "/" + filename) as results: # For each line in the file for row in results: # Remove the newline character row = row.rstrip("\n") # Split the line by tab characters cols = row.split("\t") # If the data is of class meta, store in the metadata dicitonary if cols[0] == "META": meta[cols[1]] = cols[2] # If the data is of class image, store in the image dictionary elif cols[0] == "IMAGE": images[cols[1]] = cols[2] # If the data is of class shapes, store in the shapes dictionary elif cols[0] == "HEADER_SHAPES": features = cols elif cols[0] == "SHAPES_DATA": for i, datum in enumerate(cols): if i > 0: feature_data[features[i]] = datum # If the data is of class histogram/signal, store in the signal dictionary elif cols[0] == "HEADER_HISTOGRAM": signal = cols elif cols[0] == "HISTOGRAM_DATA": for i, datum in enumerate(cols): if i > 0: signal_data[signal[i]] = datum # If the data is of class boundary (horizontal rule), store in the boundary dictionary elif "HEADER_BOUNDARY" in cols[0]: boundary = cols # Temporary hack boundary_data["y-position"] = cols[0].replace("HEADER_BOUNDARY", "") elif cols[0] == "BOUNDARY_DATA": for i, datum in enumerate(cols): if i > 0: boundary_data[boundary[i]] = datum # Check to see if the image failed, if not continue # Convert image datetime to unix time timestamp = dt_parser(meta["timestamp"]) time_delta = timestamp - datetime.datetime(1970, 1, 1) unix_time = (time_delta.days * 24 * 3600) + time_delta.seconds # Print the image metadata to the aggregate output file args.image_id += 1 meta["image_id"] = args.image_id meta["run_id"] = args.run_id meta["unixtime"] = unix_time meta_table = [] for field in metadata_fields: meta_table.append(meta[field]) if len(feature_data) != 0: args.metadata_file.write("|".join(map(str, meta_table)) + "\n") # Print the image feature data to the aggregate output file feature_data["image_id"] = args.image_id # Boundary data is optional, if it's not there we need to add in placeholder data if len(boundary_data) == 0: for field in opt_feature_fields: boundary_data[field] = 0 feature_data.update(boundary_data) feature_table = [args.image_id] for field in feature_fields + opt_feature_fields: feature_table.append(feature_data[field]) args.features_file.write("|".join(map(str, feature_table)) + "\n") # Print the analysis image data to the aggregate output file for img_type in images: args.analysis_images_file.write( "|".join(map(str, (args.image_id, img_type, images[img_type]))) + "\n" ) # Print the image signal data to the aggregate output file for key in signal_data.keys(): if key != "bin-number": signal_data[key] = signal_data[key].replace("[", "") signal_data[key] = signal_data[key].replace("]", "") signal_table = [args.image_id, signal_data["bin-number"], key, signal_data[key]] args.signal_file.write("|".join(map(str, signal_table)) + "\n") else: args.fail_log.write("|".join(map(str, meta_table)) + "\n")
def plot(input_generator,config): """ input_generator is a generator of tuples with the following structure: (time_interval_start, count, eta) """ logger = logging.getLogger("plot") if logger.handlers == []: fmtr = logging.Formatter('%(asctime)s %(name)s:%(lineno)s - %(levelname)s - %(message)s') hndlr = logging.StreamHandler() hndlr.setFormatter(fmtr) logger.addHandler(hndlr) # if this throws a configparser.NoSectionError, # then let it rise uncaught, since nothing will work plot_config = config['plot'] # get parameters and set defaults logscale_eta = plot_config.getboolean('logscale_eta',fallback=False) use_x_var = plot_config.getboolean('use_x_var',fallback=True) do_plot_parameters = plot_config.getboolean('do_plot_parameters',fallback=False) start_tm = dt_parser( plot_config.get("start_time","1900-01-01") ) stop_tm = dt_parser( plot_config.get("stop_time","2050-01-01") ) rebin_factor = plot_config.getint("rebin_factor",fallback=1) rebin_config = dict(config.items("rebin")) plot_config["x_unit"] = "{0:d} {1:s}" .format( int(rebin_config["n_binning_unit"]) * rebin_factor, rebin_config["binning_unit"]) """ # only useful if we revive 'counter_name' parameter if 'counter_name' in rebin_config: if plot_config["plot_title"] == "": plot_config["plot_title"] = rebin_config["counter_name"] if plot_config["plot_file_name"] == "": plot_config["plot_file_name"] = rebin_config["counter_name"] """ # TODO: should just put this in a dataframe data = [(dt_parser(tup[0]),float(tup[1]),float(tup[2])) for tup in input_generator if dt_parser(tup[0]) > start_tm and dt_parser(tup[0]) < stop_tm ] if rebin_factor <= 1: tbs = [tup[0] for tup in data] cts = [tup[1] for tup in data] eta = [tup[2] for tup in data] # do a hacky rebin, just for plotting else: tbs = [] cts = [] eta = [] tbs_tmp = None cts_tmp = 0 eta_tmp = 0 counter = 0 for tbs_i,cts_i,eta_i in data: tbs_tmp = tbs_i cts_tmp += cts_i eta_tmp += eta_i counter += 1 if counter == rebin_factor: counter = 0 tbs.append(tbs_tmp) cts.append(cts_tmp) eta.append(eta_tmp/float(rebin_factor)) tbs_tmp = None cts_tmp = 0 eta_tmp = 0 if cts == []: sys.stderr.write("'cts' list is empty\n") return -1 max_cts = max(cts) min_cts = min(cts) # build the plotting surface fig,(ax1,ax2) = plt.subplots(2,sharex=True) # plot the data if use_x_var: ax1.plot(tbs,cts,'k-') else: ax1.plot(cts,'k-') ax1.set_xlim(0,len(cts)) plotter="plot" if logscale_eta: plotter="semilogy" if use_x_var: getattr(ax2,plotter)(tbs,eta,'r') else: getattr(ax2,plotter)(eta,'r') ax2.set_xlim(0,len(eta)) # adjust spacing ax1.set_ylim(min_cts*0.9,max_cts*1.7) min_eta = 0 if min(eta) > 0: min_eta = min(eta) * 0.9 ax2.set_ylim(min_eta, max(eta)*1.1) # remove the horizintal space between plots plt.subplots_adjust(hspace=0) # modify ticklabels for tl in ax1.get_yticklabels(): tl.set_color('k') tl.set_fontsize(10) for tl in ax2.get_yticklabels(): tl.set_color('r') tl.set_fontsize(10) # y labels y_label = plot_config.get('y_label','counts') ax1.set_ylabel(y_label,color='k',fontsize=12) ax2.set_ylabel("eta",color='r',fontsize=12) ax1.yaxis.set_major_locator(plticker.MaxNLocator(4)) ax2.yaxis.set_major_locator(plticker.MaxNLocator(5)) # x date formatting if use_x_var: day_locator = mdates.DayLocator() hour_locator = mdates.HourLocator() day_formatter = mdates.DateFormatter('%Y-%m-%d') ax2.xaxis.set_major_formatter( day_formatter ) ax2.xaxis.set_major_locator( day_locator ) ax2.xaxis.set_minor_locator( hour_locator ) fig.autofmt_xdate() ax2.set_xlabel("time ({} bins)".format(plot_config["x_unit"].rstrip('s'))) ax1.grid(True) ax2.grid(True) # build text box for parameter display if do_plot_parameters: props = dict(boxstyle='round',facecolor='white', alpha=0.5) model_name = config['analyze']['model_name'] model_pars = "" for k,v in list(config[model_name + '_model'].items()): model_pars += "{}: {}\n".format(k,v) text_str = "model: {}\n{}".format(model_name,str(model_pars)) ax1.text(0.05,0.95, text_str, bbox=props, verticalalignment='top', fontsize=8, transform=ax1.transAxes ) plt.suptitle("{}".format( plot_config.get("plot_title","SET A PLOT TITLE"))) # write the image try: os.makedirs(plot_config.get("plot_dir",".")) except OSError: pass plot_file_name = "{}/{}.{}".format( plot_config.get("plot_dir",".").rstrip('/'), plot_config.get("plot_file_name","plot"), plot_config.get("plot_file_extension","png") ) plt.savefig(plot_file_name) plt.close()
def rebin(input_generator, start_time = str(datetime.datetime(1970,1,1)), stop_time = str(datetime.datetime(2020,1,1)), binning_unit = 'hours', n_binning_unit = 1, **kwargs ): """ This function must be passed the following positional argument: input_generator Optional keyword arguments are: binning_unit n_binning_unit stop_time start_time The 'input_generator' object must yield tuples like: [interval start time], [interval duration in sec], [interval count] The function return a list of tuples like: [new interval start time], [new interval duration in sec], [new interval count] """ logger = logging.getLogger("rebin") start_time = dt_parser(start_time) stop_time = dt_parser(stop_time) # these are just for keeping track of what range of date/times we observe in the data max_stop_time = datetime.datetime(1970,1,1) min_start_time = datetime.datetime(2020,1,1) input_data = [] # put the data into a list of (TimeBucket, count) tuples for line in input_generator: try: this_start_time = dt_parser(line[0]) except ValueError: continue dt = datetime.timedelta(seconds=int(float(line[1]))) this_stop_time = this_start_time + dt if this_stop_time > stop_time: continue if this_start_time < start_time: continue time_bucket = TimeBucket(this_start_time, this_stop_time) count = line[2] input_data.append((time_bucket, count)) if this_stop_time > max_stop_time: max_stop_time = this_stop_time if this_start_time < min_start_time: min_start_time = this_start_time input_data_sorted = sorted(input_data) # make a grid with appropriate bin size grid_start_time = datetime_truncate.truncate(min_start_time,binning_unit.rstrip('s')) grid_stop_time = datetime_truncate.truncate(max_stop_time,binning_unit.rstrip('s')) grid_dt = datetime.timedelta(**{binning_unit:int(n_binning_unit)}) tb_stop_time = grid_start_time + grid_dt tb = TimeBucket(grid_start_time,tb_stop_time) # make list of TimeBuckets for bins grid = [] while tb.stop_time <= grid_stop_time: #logger.debug("{}".format(tb)) grid.append(tb) tb_start_time = tb.stop_time tb_stop_time = tb_start_time + grid_dt tb = TimeBucket(tb_start_time,tb_stop_time) grid.append(tb) # add data to a dictionary with keys mapped to the grid indicies output_data = collections.defaultdict(float) for input_tb,input_count in input_data_sorted: logger.debug("input. TB: {}, count: {}".format(input_tb,input_count)) for grid_tb in grid: if input_tb in grid_tb: idx = grid.index(grid_tb) output_data[idx] += float(input_count) break elif input_tb.intersects(grid_tb): # assign partial count of input_tb to grid_tb idx_lower = grid.index(grid_tb) frac_lower = input_tb.get_fraction_overlapped_by(grid_tb) output_data[idx_lower] += (float(input_count) * frac_lower) try: idx = idx_lower + 1 frac = input_tb.get_fraction_overlapped_by(grid[idx]) while frac > 0: output_data[idx] += (frac * float(input_count)) idx += 1 frac = input_tb.get_fraction_overlapped_by(grid[idx]) except IndexError: pass break else: pass # put data back into a sorted list of tuples sorted_output_data = [] # use these to strip off leading and trailing zero-count entries prev_count = 0 last_non_zero_ct_idx = -1 # the grid is already time ordered, and the output_data are indexed for idx,dt in enumerate(grid): if idx in output_data: count = output_data[idx] last_non_zero_ct_idx = idx else: count = 0 if count != 0 or prev_count != 0: if count > 0: trimmed_count = int(count) #trimmed_count = round(count, -int(floor(log10(count)))+1) else: trimmed_count = 0 sorted_output_data.append((str(dt.start_time),dt.size().total_seconds(),trimmed_count)) prev_count = count sorted_output_data = sorted_output_data[:last_non_zero_ct_idx+1] # return the data structure return sorted_output_data
def process_results(args): """ Get results from individual files. Parse the results and recompile for SQLite. Args: args: (object) argparse object. Returns: Raises: """ # Add a header to each output file # Metadata table metadata_fields = ['image_id', 'run_id'] metadata_fields.extend(args.valid_meta.keys()) #args.metadata_file.write('#' + '\t'.join(map(str, metadata_fields)) + '\n') # Feature data table feature_fields = [ 'area', 'hull-area', 'solidity', 'perimeter', 'width', 'height', 'longest_axis', 'center-of-mass-x', 'center-of-mass-y', 'hull_vertices', 'in_bounds', 'ellipse_center_x', 'ellipse_center_y', 'ellipse_major_axis', 'ellipse_minor_axis', 'ellipse_angle', 'ellipse_eccentricity' ] opt_feature_fields = [ 'y-position', 'height_above_bound', 'height_below_bound', 'above_bound_area', 'percent_above_bound_area', 'below_bound_area', 'percent_below_bound_area' ] #args.features_file.write('#' + '\t'.join(map(str, feature_fields + opt_feature_fields)) + '\n') # Signal channel data table signal_fields = ['bin-number', 'channel_name', 'values'] #bin-number blue green red lightness green-magenta blue-yellow hue saturation value # Initialize the database with the schema template if create is true if args.create: # Create SQL structure based on accepted metadata and features args.sq.execute( 'CREATE TABLE IF NOT EXISTS `runinfo` (`run_id` INTEGER PRIMARY KEY, `datetime` INTEGER NOT NULL, `command` TEXT NOT NULL);' ) args.sq.execute( 'CREATE TABLE IF NOT EXISTS `metadata` (`image_id` INTEGER PRIMARY KEY, `run_id` INTEGER NOT NULL, `' + '` TEXT NOT NULL, `'.join(map(str, metadata_fields[2:])) + '` TEXT NOT NULL);') args.sq.execute( 'CREATE TABLE IF NOT EXISTS `features` (`image_id` INTEGER PRIMARY KEY, `' + '` TEXT NOT NULL, `'.join( map(str, feature_fields + opt_feature_fields)) + '` TEXT NOT NULL);') args.sq.execute( 'CREATE TABLE IF NOT EXISTS `analysis_images` (`image_id` INTEGER NOT NULL, `type` TEXT NOT NULL, `image_path` TEXT NOT NULL);' ) args.sq.execute( 'CREATE TABLE IF NOT EXISTS `signal` (`image_id` INTEGER NOT NULL, `' + '` TEXT NOT NULL, `'.join(map(str, signal_fields)) + '` TEXT NOT NULL);') # Walk through the image processing job directory and process data from each file for (dirpath, dirnames, filenames) in os.walk(args.jobdir): for filename in filenames: meta = {} images = {} features = [] feature_data = {} signal = [] signal_data = {} boundary = [] boundary_data = {} # Open results file with open(dirpath + '/' + filename) as results: # For each line in the file for row in results: # Remove the newline character row = row.rstrip('\n') # Split the line by tab characters cols = row.split('\t') # If the data is of class meta, store in the metadata dicitonary if cols[0] == 'META': meta[cols[1]] = cols[2] # If the data is of class image, store in the image dictionary elif cols[0] == 'IMAGE': images[cols[1]] = cols[2] # If the data is of class shapes, store in the shapes dictionary elif cols[0] == 'HEADER_SHAPES': features = cols elif cols[0] == 'SHAPES_DATA': for i, datum in enumerate(cols): if i > 0: feature_data[features[i]] = datum # If the data is of class histogram/signal, store in the signal dictionary elif cols[0] == 'HEADER_HISTOGRAM': signal = cols elif cols[0] == 'HISTOGRAM_DATA': for i, datum in enumerate(cols): if i > 0: signal_data[signal[i]] = datum # If the data is of class boundary (horizontal rule), store in the boundary dictionary elif 'HEADER_BOUNDARY' in cols[0]: boundary = cols # Temporary hack boundary_data['y-position'] = cols[0].replace( 'HEADER_BOUNDARY', '') elif cols[0] == 'BOUNDARY_DATA': for i, datum in enumerate(cols): if i > 0: boundary_data[boundary[i]] = datum # Check to see if the image failed, if not continue # Convert image datetime to unix time timestamp = dt_parser(meta['timestamp']) time_delta = timestamp - datetime.datetime(1970, 1, 1) unix_time = (time_delta.days * 24 * 3600) + time_delta.seconds # Print the image metadata to the aggregate output file args.image_id += 1 meta['image_id'] = args.image_id meta['run_id'] = args.run_id meta['unixtime'] = unix_time meta_table = [] for field in metadata_fields: meta_table.append(meta[field]) if (len(feature_data) != 0): args.metadata_file.write('|'.join(map(str, meta_table)) + '\n') # Print the image feature data to the aggregate output file feature_data['image_id'] = args.image_id # Boundary data is optional, if it's not there we need to add in placeholder data if len(boundary_data) == 0: for field in opt_feature_fields: boundary_data[field] = 0 feature_data.update(boundary_data) feature_table = [args.image_id] for field in feature_fields + opt_feature_fields: feature_table.append(feature_data[field]) args.features_file.write('|'.join(map(str, feature_table)) + '\n') # Print the analysis image data to the aggregate output file for img_type in images: args.analysis_images_file.write('|'.join( map(str, (args.image_id, img_type, images[img_type]))) + '\n') # Print the image signal data to the aggregate output file for key in signal_data.keys(): if key != 'bin-number': signal_data[key] = signal_data[key].replace('[', '') signal_data[key] = signal_data[key].replace(']', '') signal_table = [ args.image_id, signal_data['bin-number'], key, signal_data[key] ] args.signal_file.write( '|'.join(map(str, signal_table)) + '\n') else: args.fail_log.write('|'.join(map(str, meta_table)) + '\n')
def query_tweets(tweet_ids, groupings, endpoint, engagement_types, max_tweet_ids = 25, date_range = (None,None) ): """ Return engagements for specified Tweets, groupings engagements, and endpoint. Providing start/end times enables historical mode. There are two iterations to manage: - splitting the tweet IDs into acceptably small chunks - splitting the date range into acceptably small chunks """ if ( date_range[0] is None and date_range[1] is not None ) or \ ( date_range[0] is not None and date_range[1] is None ): raise DateRangeException("Must specify both or neither of the 'date_range' tuple elements") MAX_DATE_RANGE_IN_DAYS = 27 def yield_date_range(start_date, end_date): """ yield datetime objects in MAX_DATE_RANGE_IN_DAYS intervals """ for n in range(0, int((end_date - start_date).days), MAX_DATE_RANGE_IN_DAYS): yield start_date + datetime.timedelta(n) results = {} # must create physical list run 'len' on it tweet_ids = list(tweet_ids) # split tweet ID list into chunks of size 'max_tweet_ids' for tweet_ids_chunk in [tweet_ids[i:i+max_tweet_ids] for i in range(0, len(tweet_ids), max_tweet_ids)]: results_for_these_ids = {} post_data = { 'tweet_ids' : tweet_ids_chunk, 'engagement_types' : engagement_types, 'groupings' : groupings } if date_range == (None,None): results_for_these_ids = make_request(post_data,endpoint) else: # this is historical mode start_time = dt_parser(date_range[0]) end_time = dt_parser(date_range[1]) # standard timed query if (end_time - start_time).days <= MAX_DATE_RANGE_IN_DAYS: start_time = dt_parser(date_range[0]) end_time = dt_parser(date_range[1]) post_data['start'] = start_time.strftime('%Y-%m-%d') post_data['end'] = end_time.strftime('%Y-%m-%d') results_for_these_ids = make_request(post_data,endpoint) # extended timed query else: # iterate over all chunks of dates for this_start_time in yield_date_range(start_time, end_time): this_end_time = this_start_time + datetime.timedelta(MAX_DATE_RANGE_IN_DAYS) if this_end_time > end_time: this_end_time = end_time post_data['start'] = this_start_time.strftime('%Y-%m-%d') post_data['end'] = this_end_time.strftime('%Y-%m-%d') results_for_these_ids_and_dates = make_request(post_data,endpoint) combine_results(results_for_these_ids,results_for_these_ids_and_dates,groupings) combine_results(results,results_for_these_ids,groupings) return results
def query_tweets(tweet_ids, groupings, endpoint, engagement_types, max_tweet_ids=25, date_range=(None, None)): """ Return engagements for specified Tweets, groupings engagements, and endpoint. Providing start/end times enables historical mode. There are two iterations to manage: - splitting the tweet IDs into acceptably small chunks - splitting the date range into acceptably small chunks """ if ( date_range[0] is None and date_range[1] is not None ) or \ ( date_range[0] is not None and date_range[1] is None ): raise DateRangeException( "Must specify both or neither of the 'date_range' tuple elements") #if datetime.datetime.strptime(date_range[1],'%Y-%m-%d') > datetime.datetime.now(): # raise DateRangeException("Tweet was posted less than 27 days ago. Use 'totals' endpoint.") MAX_DATE_RANGE_IN_DAYS = 27 def yield_date_range(start_date, end_date): """ yield datetime objects in MAX_DATE_RANGE_IN_DAYS intervals """ for n in range(0, int((end_date - start_date).days), MAX_DATE_RANGE_IN_DAYS): yield start_date + datetime.timedelta(n) def chunks(iterable, size=1): """ yield list representations of 'size'-length, consecutive slices of 'iterable' """ iterator = iter(iterable) for first in iterator: yield list( itertools.chain([first], itertools.islice(iterator, size - 1))) results = {} # split tweet ID list into chunks of size 'max_tweet_ids' for tweet_ids_chunk in chunks(tweet_ids, max_tweet_ids): results_for_these_ids = {} post_data = { 'tweet_ids': tweet_ids_chunk, 'engagement_types': engagement_types, 'groupings': groupings } if date_range == (None, None): # this is '28hr' or 'totals' mode results_for_these_ids = make_request(post_data, endpoint) else: # this is historical mode start_time = dt_parser(date_range[0]) end_time = dt_parser(date_range[1]) # standard timed query (only one call required) if (end_time - start_time).days <= MAX_DATE_RANGE_IN_DAYS: start_time = dt_parser(date_range[0]) end_time = dt_parser(date_range[1]) post_data['start'] = start_time.strftime('%Y-%m-%d') post_data['end'] = end_time.strftime('%Y-%m-%d') results_for_these_ids = make_request(post_data, endpoint) # extended timed query (multiple calls required) else: # iterate over all chunks of dates for this_start_time in yield_date_range(start_time, end_time): this_end_time = this_start_time + datetime.timedelta( MAX_DATE_RANGE_IN_DAYS) if this_end_time > end_time: this_end_time = end_time post_data['start'] = this_start_time.strftime('%Y-%m-%d') post_data['end'] = this_end_time.strftime('%Y-%m-%d') results_for_these_ids_and_dates = make_request( post_data, endpoint) combine_results(results_for_these_ids, results_for_these_ids_and_dates, groupings) combine_results(results, results_for_these_ids, groupings) return results
def query_tweets(tweet_ids, groupings, endpoint, engagement_types, max_tweet_ids = 25, date_range = (None,None) ): """ Return engagements for specified Tweets, groupings engagements, and endpoint. Providing start/end times enables historical mode. There are two iterations to manage: - splitting the tweet IDs into acceptably small chunks - splitting the date range into acceptably small chunks """ MAX_DATE_RANGE_IN_DAYS = 27 def yield_date_range(start_date, end_date): for n in range(0, int((end_date - start_date).days), MAX_DATE_RANGE_IN_DAYS): yield start_date + datetime.timedelta(n) results = {} # must creat physical list run 'len' on it tweet_ids = list(tweet_ids) # split tweet ID list into chunks of size 'max_tweet_ids' for tweet_ids_chunk in [tweet_ids[i:i+max_tweet_ids] for i in range(0, len(tweet_ids), max_tweet_ids)]: results_for_these_ids = {} post_data = { 'tweet_ids' : tweet_ids_chunk, 'engagement_types' : engagement_types, 'groupings' : groupings } if date_range == (None,None): results_for_these_ids = make_request(post_data,endpoint) else: start_time = dt_parser(date_range[0]) end_time = dt_parser(date_range[1]) # standard timed query if (end_time - start_time).days <= MAX_DATE_RANGE_IN_DAYS: start_time = dt_parser(date_range[0]) end_time = dt_parser(date_range[1]) post_data['start'] = start_time.strftime('%Y-%m-%d') post_data['end'] = end_time.strftime('%Y-%m-%d') results_for_these_ids = make_request(post_data,endpoint) # extended timed query else: # iterate over all chunks of dates for this_start_time in yield_date_range(start_time, end_time): this_end_time = this_start_time + datetime.timedelta(MAX_DATE_RANGE_IN_DAYS) if this_end_time > end_time: this_end_time = end_time post_data['start'] = this_start_time.strftime('%Y-%m-%d') post_data['end'] = this_end_time.strftime('%Y-%m-%d') results_for_these_ids_and_dates = make_request(post_data,endpoint) combine_results(results_for_these_ids,results_for_these_ids_and_dates,groupings) combine_results(results,results_for_these_ids,groupings) return results
def multi_plot(input_generators, config, styles=[]): """ input_generators is a list and each element is a generator of tuples with the following structure: (time_interval_start, count, eta, trend) """ # if this throws a configparser.NoSectionError, # then let it rise uncaught, since nothing will work plot_config = config['plot'] # get parameters and set defaults logscale_eta = plot_config.getboolean('logscale_eta', fallback=False) use_x_var = plot_config.getboolean('use_x_var', fallback=True) do_plot_parameters = plot_config.getboolean('do_plot_parameters', fallback=False) legend = plot_config.getboolean('legend', fallback=False) start_tm = dt_parser(plot_config.get("start_time", "1900-01-01")) stop_tm = dt_parser(plot_config.get("stop_time", "2050-01-01")) rebin_factor = plot_config.getint("rebin_factor", fallback=1) rebin_config = dict(config.items("rebin")) plot_config["x_unit"] = "{0:d} {1:s}".format( int(rebin_config["n_binning_unit"]) * rebin_factor, rebin_config["binning_unit"]) """ # only useful if we revive 'counter_name' parameter if 'counter_name' in rebin_config: if plot_config["plot_title"] == "": plot_config["plot_title"] = rebin_config["counter_name"] if plot_config["plot_file_name"] == "": plot_config["plot_file_name"] = rebin_config["counter_name"] """ # build the plotting surface fig, (ax1, ax2, ax3) = plt.subplots(3, sharex=True) max_cts = 0 min_cts = sys.maxsize max_eta = 0 min_eta = sys.maxsize used_colors = [] for index, input_generator in enumerate(input_generators): data = [ (dt_parser(tup[0]), float(tup[1]), float(tup[2]), float(tup[3])) for tup in input_generator if dt_parser(tup[0]) > start_tm and dt_parser(tup[0]) < stop_tm ] if rebin_factor <= 1: tbs = [tup[0] for tup in data] cts = [tup[1] for tup in data] eta = [tup[2] for tup in data] # trends: # -1 = 'decreasing' # 0 = 'no trend' # 1 = 'increasing' trends = [ random.uniform(0.5, 1.0) if tup[3] == 1 else 0 for tup in data ] else: tbs = [] cts = [] eta = [] trends = [] tbs_tmp = None cts_tmp = 0 eta_tmp = 0 trend_tmp = 0 counter = 0 for tbs_i, cts_i, eta_i, trend_i in data: tbs_tmp = tbs_i cts_tmp += cts_i eta_tmp += eta_i trend_tmp += 1 if trend_i == 1 else 0 counter += 1 if counter == rebin_factor: counter = 0 tbs.append(tbs_tmp) cts.append(cts_tmp) eta.append(eta_tmp / float(rebin_factor)) trends.append(trend_tmp) tbs_tmp = None cts_tmp = 0 eta_tmp = 0 trend_tmp = 0 if cts == []: sys.stderr.write("'cts' list is empty\n") continue # return -1 max_cts = max(max_cts, max(cts)) min_cts = min(min_cts, min(cts)) ## PLOTTING PART color = styles[index][1] # plot the counts if use_x_var: ax1.plot(tbs, cts, color, alpha=0.7) else: ax1.plot(cts, color, alpha=0.7) ax1.set_xlim(0, len(cts)) # plot the etas plotter = "plot" if logscale_eta: plotter = "semilogy" if use_x_var: getattr(ax2, plotter)(tbs, eta, color, alpha=0.7) else: getattr(ax2, plotter)(eta, color, alpha=0.7) ax2.set_xlim(0, len(eta)) max_eta = max(max_eta, max(eta)) min_eta = min(min_eta, min(cts)) # plot the trends if use_x_var: ax3.plot(tbs, trends, color + '.', alpha=0.7, label=styles[index][0]) elif trends != []: ax3.plot(trends, color + '.', alpha=0.7, label=styles[index][0]) ax3.set_xlim(0, len(trends)) for i, t in enumerate(trends): if t > 0: ax1.axvline(x=tbs[i], color=color, linewidth=0.5, linestyle='--', alpha=0.7) ax2.axvline(x=tbs[i], color=color, linewidth=0.5, linestyle='--', alpha=0.7) ax3.axvline(x=tbs[i], color=color, linewidth=0.5, linestyle='--', alpha=0.7) # adjust spacing ax1.set_ylim(min_cts * 0.9, max_cts * 1.7) min_eta = 0 if min_eta > 0: min_eta = min_eta * 0.9 ax2.set_ylim(min_eta, max_eta * 1.1) # remove the horizintal space between plots plt.subplots_adjust(hspace=0) # modify ticklabels plt.xticks(rotation=30) for tl in ax1.get_yticklabels(): tl.set_color('k') tl.set_fontsize(10) for tl in ax2.get_yticklabels(): tl.set_color('r') tl.set_fontsize(10) for tl in ax3.get_yticklabels(): tl.set_color('b') tl.set_fontsize(10) for tl in ax3.get_xticklabels(): tl.set_color('k') tl.set_fontsize(7) # y labels y_label = plot_config.get('y_label', 'counts') ax1.set_ylabel(y_label, color='k', fontsize=12) ax2.set_ylabel("eta", color='r', fontsize=12) ax3.set_ylabel("trend", color='b', fontsize=12) ax1.yaxis.set_major_locator(plticker.MaxNLocator(4)) ax2.yaxis.set_major_locator(plticker.MaxNLocator(5)) # x date formatting # if use_x_var: # day_locator = mdates.DayLocator() # hour_locator = mdates.HourLocator() # day_formatter = mdates.DateFormatter('%Y-%m-%d') # ax2.xaxis.set_major_formatter( day_formatter ) # ax2.xaxis.set_major_locator( day_locator ) # ax2.xaxis.set_minor_locator( hour_locator ) # fig.autofmt_xdate() # ax2.set_xlabel("time ({} bins)".format(plot_config["x_unit"].rstrip('s'))) ax1.grid(True) ax2.grid(True) ax3.grid(True) # build text box for parameter display if do_plot_parameters: props = dict(boxstyle='round', facecolor='white', alpha=0.5) model_name = config['analyze']['model_name'] model_pars = "" for k, v in config[model_name + '_model'].items(): model_pars += "{}: {}\n".format(k, v) text_str = "model: {}\n{}".format(model_name, str(model_pars)) text_str += 'topics_count: ' + str(len(input_generators)) text_str += '\nrebin_factor: ' + str(rebin_factor) ax1.text(0.05, 0.95, text_str, bbox=props, verticalalignment='top', fontsize=8, transform=ax1.transAxes) if legend: ax3.legend(fontsize=5.8, fancybox=True, loc='lower left', title='Topics') plt.suptitle(u"{}".format(plot_config.get("plot_title", "SET A PLOT TITLE"))) ## write the image plot_file_name = u"{}/{}.{}".format( plot_config.get("plot_dir", ".").rstrip('/'), plot_config.get("plot_file_name", "output"), plot_config.get("plot_file_extension", "png")) # print("Saved at: " + plot_file_name) plt.savefig(plot_file_name) plt.close()
def plot(input_generator, config): """ input_generator is a generator of tuples with the following structure: (time_interval_start, count, eta) """ use_x_var = True if "use_x_var" in config: use_x_var = bool(config["use_x_var"]) if "y_label" not in config: config["y_label"] = "counts" if "start_time" in config and "stop_time" in config: start_tm = dt_parser(config["start_time"]) stop_tm = dt_parser(config["stop_time"]) data = [(dt_parser(tup[0]), float(tup[1]), float(tup[2])) for tup in input_generator if dt_parser(tup[0]) > start_tm and dt_parser(tup[0]) < stop_tm ] else: data = [(dt_parser(tup[0]), float(tup[1]), float(tup[2])) for tup in input_generator] if "rebin_factor" not in config or int(config["rebin_factor"]) == 1: tbs = [tup[0] for tup in data] cts = [tup[1] for tup in data] eta = [tup[2] for tup in data] # do a hacky rebin, just for plotting else: tbs = [] cts = [] eta = [] tbs_tmp = None cts_tmp = 0 eta_tmp = 0 counter = 0 for tbs_i, cts_i, eta_i in data: tbs_tmp = tbs_i cts_tmp += cts_i eta_tmp += eta_i counter += 1 if counter == int(config["rebin_factor"]): counter = 0 tbs.append(tbs_tmp) cts.append(cts_tmp) eta.append(eta_tmp / float(config["rebin_factor"])) tbs_tmp = None cts_tmp = 0 eta_tmp = 0 if cts == []: print("'cts' list is empty") return -1 max_cts = max(cts) min_cts = min(cts) # build the plotting surface fig, (ax1, ax2) = plt.subplots(2, sharex=True) # plot the data if use_x_var: ax1.plot(tbs, cts, 'k-') else: ax1.plot(cts, 'k-') ax1.set_xlim(0, len(cts)) plotter = "plot" if config["logscale_eta"]: plotter = "semilogy" if use_x_var: getattr(ax2, plotter)(tbs, eta, 'r') else: getattr(ax2, plotter)(eta, 'r') ax2.set_xlim(0, len(eta)) # adjust spacing ax1.set_ylim(min_cts * 0.9, max_cts * 1.7) min_eta = 0 if min(eta) > 0: min_eta = min(eta) * 0.9 ax2.set_ylim(min_eta, max(eta) * 1.1) # remove the horizintal space between plots plt.subplots_adjust(hspace=0) # modify ticklabels for tl in ax1.get_yticklabels(): tl.set_color('k') tl.set_fontsize(10) for tl in ax2.get_yticklabels(): tl.set_color('r') tl.set_fontsize(10) # y labels ax1.set_ylabel(config["y_label"], color='k', fontsize=12) ax2.set_ylabel("eta", color='r', fontsize=12) ax1.yaxis.set_major_locator(plticker.MaxNLocator(4)) ax2.yaxis.set_major_locator(plticker.MaxNLocator(5)) # x date formatting if use_x_var: formatter = mdates.DateFormatter('%Y-%m-%d') ax2.xaxis.set_major_formatter(formatter) fig.autofmt_xdate() ax2.set_xlabel("time ({} bins)".format(config["x_unit"].rstrip('s'))) ax1.grid(True) ax2.grid(True) plt.suptitle(u"{}".format(config["plot_title"])) try: os.makedirs(config["plot_dir"]) except OSError: pass plot_file_name = u"{}/{}.{}".format(config["plot_dir"].rstrip('/'), config["plot_file_name"], config["plot_file_extension"]) plt.savefig(plot_file_name) plt.close()
def process_results(args): """ Get results from individual files. Parse the results and recompile for SQLite. Args: args: (object) argparse object. Returns: Raises: """ # Add a header to each output file # Metadata table metadata_fields = ['image_id', 'run_id'] metadata_fields.extend(args.valid_meta.keys()) #args.metadata_file.write('#' + '\t'.join(map(str, metadata_fields)) + '\n') # Feature data table feature_fields = ['area', 'hull-area', 'solidity', 'perimeter', 'width', 'height', 'longest_axis', 'center-of-mass-x', 'center-of-mass-y', 'hull_vertices', 'in_bounds'] opt_feature_fields = ['y-position', 'height_above_bound', 'height_below_bound', 'above_bound_area', 'percent_above_bound_area', 'below_bound_area', 'percent_below_bound_area'] #args.features_file.write('#' + '\t'.join(map(str, feature_fields + opt_feature_fields)) + '\n') # Signal channel data table signal_fields = ['bin-number', 'channel_name', 'values'] #bin-number blue green red lightness green-magenta blue-yellow hue saturation value # Initialize the database with the schema template if create is true if args.create: # Create SQL structure based on accepted metadata and features args.sq.execute('CREATE TABLE IF NOT EXISTS `runinfo` (`run_id` INTEGER PRIMARY KEY, `datetime` INTEGER NOT NULL, `command` TEXT NOT NULL);') args.sq.execute('CREATE TABLE IF NOT EXISTS `metadata` (`image_id` INTEGER PRIMARY KEY, `run_id` INTEGER NOT NULL, `' + '` TEXT NOT NULL, `'.join(map(str, metadata_fields[2:])) + '` TEXT NOT NULL);') args.sq.execute('CREATE TABLE IF NOT EXISTS `features` (`image_id` INTEGER PRIMARY KEY, `' + '` TEXT NOT NULL, `'.join(map(str, feature_fields + opt_feature_fields)) + '` TEXT NOT NULL);') args.sq.execute('CREATE TABLE IF NOT EXISTS `analysis_images` (`image_id` INTEGER NOT NULL, `type` TEXT NOT NULL, `image_path` TEXT NOT NULL);') args.sq.execute('CREATE TABLE IF NOT EXISTS `signal` (`image_id` INTEGER NOT NULL, `' + '` TEXT NOT NULL, `'.join(map(str, signal_fields)) + '` TEXT NOT NULL);') # Walk through the image processing job directory and process data from each file for (dirpath, dirnames, filenames) in os.walk(args.jobdir): for filename in filenames: meta = {} images = {} features = [] feature_data = {} signal = [] signal_data = {} boundary = [] boundary_data = {} # Open results file with open(dirpath + '/' + filename) as results: # For each line in the file for row in results: # Remove the newline character row = row.rstrip('\n') # Split the line by tab characters cols = row.split('\t') # If the data is of class meta, store in the metadata dicitonary if cols[0] == 'META': meta[cols[1]] = cols[2] # If the data is of class image, store in the image dictionary elif cols[0] == 'IMAGE': images[cols[1]] = cols[2] # If the data is of class shapes, store in the shapes dictionary elif cols[0] == 'HEADER_SHAPES': features = cols elif cols[0] == 'SHAPES_DATA': for i, datum in enumerate(cols): if i > 0: feature_data[features[i]] = datum # If the data is of class histogram/signal, store in the signal dictionary elif cols[0] == 'HEADER_HISTOGRAM': signal = cols elif cols[0] == 'HISTOGRAM_DATA': for i, datum in enumerate(cols): if i > 0: signal_data[signal[i]] = datum # If the data is of class boundary (horizontal rule), store in the boundary dictionary elif 'HEADER_BOUNDARY' in cols[0]: boundary = cols # Temporary hack boundary_data['y-position'] = cols[0].replace('HEADER_BOUNDARY', '') elif cols[0] == 'BOUNDARY_DATA': for i, datum in enumerate(cols): if i > 0: boundary_data[boundary[i]] = datum # Check to see if the image failed, if not continue if (len(feature_data) != 0): # Convert image datetime to unix time timestamp = dt_parser(meta['timestamp']) time_delta = timestamp - datetime.datetime(1970,1,1) unix_time = (time_delta.days * 24 * 3600) + time_delta.seconds # Print the image metadata to the aggregate output file args.image_id += 1 meta['image_id'] = args.image_id meta['run_id'] = args.run_id meta['unixtime'] = unix_time meta_table = [] for field in metadata_fields: meta_table.append(meta[field]) args.metadata_file.write('|'.join(map(str, meta_table)) + '\n') # Print the image feature data to the aggregate output file feature_data['image_id'] = args.image_id # Boundary data is optional, if it's not there we need to add in placeholder data if len(boundary_data) == 0: for field in opt_feature_fields: boundary_data[field] = 0 feature_data.update(boundary_data) feature_table = [args.image_id] for field in feature_fields + opt_feature_fields: feature_table.append(feature_data[field]) args.features_file.write('|'.join(map(str, feature_table)) + '\n') # Print the analysis image data to the aggregate output file for img_type in images: args.analysis_images_file.write('|'.join(map(str, (args.image_id, img_type, images[img_type]))) + '\n') # Print the image signal data to the aggregate output file for key in signal_data.keys(): if key != 'bin-number': signal_data[key] = signal_data[key].replace('[','') signal_data[key] = signal_data[key].replace(']','') signal_table = [args.image_id, signal_data['bin-number'], key, signal_data[key]] args.signal_file.write('|'.join(map(str, signal_table)) + '\n')
def plot(input_generator, config): """ input_generator is a generator of tuples with the following structure: (time_interval_start, count, eta) """ use_x_var = True if "use_x_var" in config: use_x_var = bool(config["use_x_var"]) if "y_label" not in config: config["y_label"] = "counts" if "start_time" in config and "stop_time" in config: start_tm = dt_parse(config["start_time"]) stop_tm = dt_parser(config["stop_time"]) data = [(dt_parser(tup[0]),float(tup[1]),float(tup[2])) for tup in input_generator if dt_parser(tup[0]) > start_tm and dt_parser(tup[0]) < stop_tm ] else: data = [(dt_parser(tup[0]),float(tup[1]),float(tup[2])) for tup in input_generator] if "rebin_factor" not in config or int(config["rebin_factor"]) == 1: tbs = [tup[0] for tup in data] cts = [tup[1] for tup in data] eta = [tup[2] for tup in data] # do a hacky rebin, just for plotting else: tbs = [] cts = [] eta = [] tbs_tmp = None cts_tmp = 0 eta_tmp = 0 counter = 0 for tbs_i,cts_i,eta_i in data: tbs_tmp = tbs_i cts_tmp += cts_i eta_tmp += eta_i counter += 1 if counter == int(config["rebin_factor"]): counter = 0 tbs.append(tbs_tmp) cts.append(cts_tmp) eta.append(eta_tmp/float(config["rebin_factor"])) tbs_tmp = None cts_tmp = 0 eta_tmp = 0 if cts == []: print("'cts' list is empty") return -1 max_cts = max(cts) min_cts = min(cts) # build the plot fig = plt.figure() plt.title(u"{}".format(config["plot_title"])) ax1 = fig.add_subplot(111) if use_x_var: ax1.plot(tbs,cts,'k-') else: ax1.plot(cts,'k-') ax1.set_xlim(0,len(cts)) ## fancify ax1.set_ylim(min_cts*0.9,max_cts*1.7) for tl in ax1.get_yticklabels(): tl.set_color('k') ax1.set_ylabel(config["y_label"],color='k',fontsize=12) tl.set_fontsize(10) plt.locator_params(axis = 'y', nbins = 4) if use_x_var: formatter = mdates.DateFormatter('%Y-%m-%d') ax1.xaxis.set_major_formatter( formatter ) fig.autofmt_xdate() ax1.set_xlabel("time ({} bins)".format(config["x_unit"].rstrip('s'))) if config['plot_eta']: ax2 = ax1.twinx() plotter="plot" if config["logscale_eta"]: plotter="semilogy" if use_x_var: getattr(ax2,plotter)(tbs,eta,'r') else: getattr(ax2,plotter)(eta,'r') ax2.set_xlim(0,len(eta)) min_eta = 0 if min(eta) > 0: min_eta = min(eta) * 0.9 ax2.set_ylim(min_eta, max(eta)*1.1) ax2.set_ylabel("eta",color='r',fontsize=12) for tl in ax2.get_yticklabels(): tl.set_color('r') tl.set_fontsize(10) if not config["plot_eta"]: config["plot_file_name"] += "_no_eta" try: os.makedirs(config["plot_dir"]) except OSError: pass plot_file_name = u"{}/{}.{}".format(config["plot_dir"].rstrip('/'), config["plot_file_name"],config["plot_file_extension"]) plt.savefig(plot_file_name) plt.close()