def get_indexes(config): indexes = list(range(0, len(list(config.attributes_dict.values())[0]))) for obs, value in config.attributes.observables.types.items(): any = CommonTypes.any.value if obs in config.attributes_dict: if isinstance(value, list): passed_indexes = [] for v in value: if is_float(v): v = float(v) if v.is_integer(): v = int(v) passed_indexes += pass_indexes(config, obs, v, any) else: if is_float(value): value = float(value) if value.is_integer(): value = int(value) passed_indexes = pass_indexes(config, obs, value, any) indexes = list(set(indexes).intersection(passed_indexes)) else: raise ValueError('Wrong observables.types key.') indexes.sort() return indexes
def get_indexes(config): indexes = list(range(0, len(list(config.observables_dict.values())[0]))) for obs, value in config.attributes.observables.types.items(): any = CommonTypes.any.value if obs in config.observables_dict: if obs == 'age': if len(value) == 2: left = float(value[0]) right = float(value[1]) passed_indexes = pass_indexes_interval( config, obs, left, right) else: raise ValueError( 'Wrong observables_dict key for age. It should be (left, right).' ) else: if isinstance(value, list): passed_indexes = [] for v in value: if is_float(v): v = float(v) if v.is_integer(): v = int(v) passed_indexes += pass_indexes(config, obs, v, any) else: if is_float(value): value = float(value) if value.is_integer(): value = int(value) passed_indexes = pass_indexes(config, obs, value, any) indexes = list(set(indexes).intersection(passed_indexes)) else: raise ValueError('Wrong observables.types key.') indexes.sort() print(f'number of indexes: {len(indexes)}') return indexes
def load_observables_dict(config): fn = get_data_base_path(config) + '/' + config.attributes.observables.name fn_txt = fn + '.txt' fn_xlsx = fn + '.xlsx' fn_pkl = fn + '.pkl' if os.path.isfile(fn_pkl): f = open(fn_pkl, 'rb') observables_dict = pickle.load(f) f.close() else: if os.path.isfile(fn_xlsx): df = pd.read_excel(fn_xlsx) tmp_dict = df.to_dict() observables_dict = {} for key in tmp_dict: curr_dict = tmp_dict[key] observables_dict[key] = list(curr_dict.values()) elif os.path.isfile(fn_txt): f = open(fn_txt) key_line = f.readline() keys = key_line.split('\t') keys = [x.rstrip() for x in keys] observables_dict = {} for key in keys: observables_dict[key] = [] for line in f: values = line.split('\t') for key_id in range(0, len(keys)): key = keys[key_id] value = values[key_id].rstrip() if is_float(value): value = float(value) if value.is_integer(): observables_dict[key].append(int(value)) else: observables_dict[key].append(float(value)) else: observables_dict[key].append(value) f.close() else: raise ValueError('No observables file') f = open(fn_pkl, 'wb') pickle.dump(observables_dict, f, pickle.HIGHEST_PROTOCOL) f.close() return observables_dict
def load_observables_categorical_dict(config): fn = get_data_base_path(config) + '/' + config.attributes.observables.name + '_categorical' fn_pkl = fn + '.pkl' if os.path.isfile(fn_pkl): f = open(fn_pkl, 'rb') observables_categorical_dict = pickle.load(f) f.close() else: observables_categorical_dict = {} if config.observables_dict is not None: observables_dict = config.observables_dict else: observables_dict = load_observables_dict(config) na_values = ['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A', 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null', '-', '--'] for key in observables_dict: all_numeric = True for i in range(0, len(observables_dict[key])): value = observables_dict[key][i] if value in na_values: value = np.nan if is_float(value): value = float(value) if value.is_integer(): observables_dict[key][i] = value else: observables_dict[key][i] = float(value) else: observables_dict[key][i] = value all_numeric = False if all_numeric: observables_categorical_dict[key] = np.asarray(config.observables_dict[key]) else: observables_categorical_dict[key] = categorize_data(np.asarray(config.observables_dict[key])) f = open(fn_pkl, 'wb') pickle.dump(observables_categorical_dict, f, pickle.HIGHEST_PROTOCOL) f.close() return observables_categorical_dict
def load_observables_dict(config): fn = get_data_base_path(config) + '/' + config.attributes.observables.name fn_txt = fn + '.txt' fn_pkl = fn + '.pkl' if os.path.isfile(fn_pkl): f = open(fn_pkl, 'rb') attributes_dict = pickle.load(f) f.close() else: f = open(fn_txt) key_line = f.readline() keys = key_line.split('\t') keys = [x.rstrip() for x in keys] attributes_dict = {} for key in keys: attributes_dict[key] = [] for line in f: values = line.split('\t') for key_id in range(0, len(keys)): key = keys[key_id] value = values[key_id].rstrip() if is_float(value): value = float(value) if value.is_integer(): attributes_dict[key].append(int(value)) else: attributes_dict[key].append(float(value)) else: attributes_dict[key].append(value) f.close() f = open(fn_pkl, 'wb') pickle.dump(attributes_dict, f, pickle.HIGHEST_PROTOCOL) f.close() return attributes_dict
def load_cells_dict(config): fn = get_data_base_path(config) + '/' + config.attributes.cells.name fn_txt = fn + '.txt' fn_pkl = fn + '.pkl' if os.path.isfile(fn_pkl): f = open(fn_pkl, 'rb') cells_dict = pickle.load(f) f.close() else: f = open(fn_txt) key_line = f.readline() keys = key_line.split('\t') # First column is always sample name keys = [x.rstrip() for x in keys][1::] cells_dict = {} for key in keys: cells_dict[key] = [] for line in f: values = line.split('\t')[1::] for key_id in range(0, len(keys)): key = keys[key_id] value = values[key_id].rstrip() if is_float(value): cells_dict[key].append(float(value)) else: cells_dict[key].append(value) f.close() f = open(fn_pkl, 'wb') pickle.dump(cells_dict, f, pickle.HIGHEST_PROTOCOL) f.close() return cells_dict
def run(self, config, configs_child): if config.experiment.method == Method.histogram: plot_data = [] for config_child in configs_child: curr_plot_data = [] target = self.get_strategy.get_target(config_child) is_number_list = [is_float(t) for t in target] if False in is_number_list: xbins = {} else: bin_size = config.experiment.params['bin_size'] xbins = dict(start=min(target) - 0.5 * bin_size, end=max(target) + 0.5 * bin_size, size=bin_size) color = cl.scales['8']['qual']['Set1'][configs_child.index( config_child)] if config_child.experiment.method == Method.histogram: histogram = go.Histogram( x=target, name=str(config_child.attributes.observables), xbins=xbins, marker=dict( opacity=config.experiment.params['opacity'], color=color)) curr_plot_data.append(histogram) plot_data += curr_plot_data config.experiment_data['data'] = plot_data
def run(self, config, configs_child): if config.experiment.data in [ DataType.betas, DataType.betas_adj, DataType.residuals, DataType.resid_old, DataType.epimutations, DataType.entropy, DataType.cells, DataType.genes ]: if config.experiment.method in [ Method.scatter, Method.variance_histogram ]: self.iterate(config, configs_child) elif config.experiment.method == Method.curve: x_target = config.experiment.method_params['x'] y_target = config.experiment.method_params['y'] number_of_points = int( config.experiment.method_params['number_of_points']) plot_data = [] for config_child in configs_child: if x_target == 'count': xs = list(range(1, number_of_points + 1)) else: if x_target in config_child.advanced_data: xs = config_child.advanced_data[x_target][ 0:number_of_points] else: raise ValueError( f'{x_target} not in {config_child}.') if y_target in config_child.advanced_data: ys = config_child.advanced_data[y_target][ 0:number_of_points] else: raise ValueError(f'{y_target} not in {config_child}.') color = cl.scales['8']['qual']['Set1'][configs_child.index( config_child)] coordinates = color[4:-1].split(',') color_transparent = 'rgba(' + ','.join( coordinates) + ',' + str(0.5) + ')' color_border = 'rgba(' + ','.join(coordinates) + ',' + str( 0.9) + ')' scatter = go.Scatter( x=xs, y=ys, name=get_names(config_child, config.experiment.method_params), mode='lines+markers', marker=dict(size=10, color=color_transparent, line=dict( width=2, color=color_border, )), ) plot_data.append(scatter) config.experiment_data['data'] = plot_data elif config.experiment.data == DataType.observables: if config.experiment.method == Method.histogram: plot_data = [] num_points = [] for config_child in configs_child: curr_plot_data = [] targets = self.get_strategy.get_target(config_child) num_points.append(len(targets)) is_number_list = [is_float(t) for t in targets] if False in is_number_list: xbins = {} else: bin_size = config.experiment.method_params['bin_size'] xbins = dict(start=min(targets) - 0.5 * bin_size, end=max(targets) + 0.5 * bin_size, size=bin_size) color = cl.scales['8']['qual']['Set1'][configs_child.index( config_child)] if config_child.experiment.method == Method.histogram: histogram = go.Histogram( x=targets, name=get_names(config_child, config.experiment.method_params) + f': {len(targets)}', xbins=xbins, marker=dict(opacity=config.experiment. method_params['opacity'], color=color, line=dict(color='#444444', width=1))) curr_plot_data.append(histogram) plot_data += curr_plot_data # Sorting by total number of points order = np.argsort(num_points)[::-1] config.experiment_data['data'] = [ plot_data[index] for index in order ]
def release(self, config, configs_child): if config.experiment.task_params is None or config.experiment.task_params[ 'type'] == 'run': if config.experiment.data in [ DataType.betas, DataType.betas_adj, DataType.residuals, DataType.resid_old, DataType.epimutations, DataType.entropy, DataType.cells, DataType.genes ]: if config.experiment.method in [Method.scatter, Method.range]: for item_id, items in enumerate( config.experiment_data['item']): if config.experiment.data in [ DataType.betas, DataType.betas_adj, DataType.residuals, DataType.resid_old, ]: if items in config.cpg_gene_dict: aux = config.cpg_gene_dict[items] if isinstance(aux, list): aux_str = ';'.join(aux) else: aux_str = str(aux) else: aux_str = 'non-genic' title = items + '(' + aux_str + ')' elif config.experiment.data == DataType.genes: title = items else: title = '' if config.experiment.method == Method.range: layout = get_layout(config) else: layout = get_layout(config, title) if config.experiment.data == DataType.cells: layout.yaxis = get_axis(items) raw_item_id = config.experiment.method_params[ 'items'].index(items) if 'x_ranges' in config.experiment.method_params: x_range = config.experiment.method_params[ 'x_ranges'][raw_item_id] if x_range != 'auto' or 'auto' not in x_range: layout.xaxis.range = x_range if 'y_ranges' in config.experiment.method_params: y_range = config.experiment.method_params[ 'y_ranges'][raw_item_id] if y_range != 'auto' or 'auto' not in y_range: layout.yaxis.range = y_range if config.experiment.method == Method.range: borders = config.experiment.method_params[ 'borders'] labels = [] tickvals = [] for seg_id in range(0, len(borders) - 1): x_center = (borders[seg_id + 1] + borders[seg_id]) * 0.5 tickvals.append(x_center) labels.append( f'{borders[seg_id]} to {borders[seg_id + 1] - 1}' ) layout.xaxis.tickvals = tickvals layout.xaxis.ticktext = labels fig = go.Figure( data=config.experiment_data['data'][item_id], layout=layout) config.experiment_data['fig'].append(fig) elif config.experiment.method == Method.scatter_comparison: x_num = len(configs_child) if x_num == 3: x_begin = 0.11 elif x_num == 2: x_begin = 0.2 else: x_begin = 0.075 x_end = 1 x_shift = (x_end - x_begin) / x_num x_size = x_shift - 0.01 x_domains = [] for x_id in range(0, x_num): x = x_begin + x_shift * x_id x_domains.append([x, x + x_size]) y_num = len(configs_child[0].experiment_data['item']) if y_num == 1: y_begin = 0.25 elif y_num == 2: y_begin = 0.2 else: y_begin = 0.06 y_end = 1 y_shift = (y_end - y_begin) / y_num y_size = y_shift - 0.02 y_domains = [] for y_id in range(0, y_num): y = y_begin + y_shift * y_id y_domains.append([y, y + y_size]) for configs_child_id, config_child in enumerate( configs_child): for item_id, items in enumerate( config_child.experiment_data['data']): if configs_child_id == 0: x_string = 'x' else: x_string = f'x{configs_child_id + 1}' if item_id == 0: y_string = 'y' else: y_string = f'y{item_id + 1}' if isinstance(items, list): for item in items: item.xaxis = x_string item.yaxis = y_string if item.mode == 'markers': item.marker.size = 1 item.marker.line.width = 0.2 if item.mode == 'lines': item.line.width = 1 config.experiment_data['data'].append(item) else: items.xaxis = x_string items.yaxis = y_string if items.mode == 'markers': items.marker.size = 1 items.marker.line.width = 0.2 if items.mode == 'lines': items.line.width = 1 config.experiment_data['data'].append(items) layout = {} layout['template'] = 'plotly_white' layout['showlegend'] = False layout['margin'] = { 'l': 0, 'r': 0, 'b': 0, 't': 0, } height_per_row = 125 width_per_col = 200 layout['height'] = height_per_row * y_num layout['width'] = width_per_col * x_num for x_id in range(0, x_num): if x_id == 0: x_string_add = '' else: x_string_add = str(x_id + 1) layout['xaxis' + x_string_add] = {} layout['xaxis' + x_string_add]['domain'] = x_domains[x_id] layout['xaxis' + x_string_add]['anchor'] = 'x' + x_string_add layout['xaxis' + x_string_add]['zeroline'] = False layout['xaxis' + x_string_add]['showgrid'] = True layout['xaxis' + x_string_add]['showline'] = True layout['xaxis' + x_string_add]['mirror'] = 'allticks' layout['xaxis' + x_string_add]['titlefont'] = dict( family='Arial', size=13, color='black') layout['xaxis' + x_string_add]['tickfont'] = dict( family='Arial', size=10, color='black') db = config.experiment.method_params['data_bases'][ x_id] x_title = db layout['xaxis' + x_string_add]['title'] = x_title x_range = config.experiment.method_params['x_ranges'][ x_id] if x_range != 'auto' or 'auto' not in x_range: layout['xaxis' + x_string_add]['range'] = x_range for y_id in range(0, y_num): if y_id == 0: y_string_add = '' else: y_string_add = str(y_id + 1) layout['yaxis' + y_string_add] = {} layout['yaxis' + y_string_add]['domain'] = y_domains[y_id] layout['yaxis' + y_string_add]['anchor'] = 'y' + y_string_add layout['yaxis' + y_string_add]['zeroline'] = False layout['yaxis' + y_string_add]['showgrid'] = True layout['yaxis' + y_string_add]['showline'] = True layout['yaxis' + y_string_add]['mirror'] = 'allticks' layout['yaxis' + y_string_add]['titlefont'] = dict( family='Arial', size=13, color='black') layout['yaxis' + y_string_add]['tickfont'] = dict( family='Arial', size=10, color='black') y_title = config.experiment.method_params['items'][ y_id] if config.experiment.data in [ DataType.betas, DataType.betas_adj, DataType.residuals, DataType.resid_old, ]: if 'aux' in config.experiment.method_params: aux = config.experiment.method_params['aux'][ y_id] if is_float(aux) and math.isnan(aux): aux = '' y_title = str(y_title) + '<br>' + str(aux) layout['yaxis' + y_string_add]['title'] = y_title y_range = config.experiment.method_params['y_ranges'][ y_id] if y_range != 'auto' or 'auto' not in y_range: layout['yaxis' + y_string_add]['range'] = y_range fig = go.Figure(data=config.experiment_data['data'], layout=layout) config.experiment_data['fig'] = fig elif config.experiment.method == Method.variance_histogram: for data in config.experiment_data['data']: layout = get_layout(config) layout.xaxis.title = '$\\Delta$' layout.yaxis.title = '$PDF$' fig = ff.create_distplot(data['hist_data'], data['group_labels'], show_hist=False, show_rug=False, colors=data['colors']) fig['layout'] = layout config.experiment_data['fig'] = fig elif config.experiment.method == Method.curve: layout = get_layout(config) config.experiment_data['fig'] = go.Figure( data=config.experiment_data['data'], layout=layout) elif config.experiment.data == DataType.observables: if config.experiment.method == Method.histogram: layout = get_layout(config) if 'x_range' in config.experiment.method_params: if config.experiment.method_params['x_range'] != 'auto': layout.xaxis.range = config.experiment.method_params[ 'x_range'] config.experiment_data['fig'] = go.Figure( data=config.experiment_data['data'], layout=layout) elif config.experiment.task_params['type'] == 'prepare': pass
def run(self, config, configs_child): if config.experiment.data in [DataType.betas, DataType.betas_adj, DataType.residuals_common, DataType.residuals_special]: if config.experiment.method == Method.scatter: item = config.experiment.method_params['item'] line = config.experiment.method_params['line'] add = config.experiment.method_params['add'] fit = config.experiment.method_params['fit'] semi_window = config.experiment.method_params['semi_window'] box_b = config.experiment.method_params['box_b'] box_t = config.experiment.method_params['box_t'] plot_data = [] for config_child in configs_child: # Plot data targets = self.get_strategy.get_target(config_child) data = self.get_strategy.get_single_base(config_child, [item])[0] # Colors setup color = cl.scales['8']['qual']['Set1'][configs_child.index(config_child)] coordinates = color[4:-1].split(',') color_transparent = 'rgba(' + ','.join(coordinates) + ',' + str(0.1) + ')' color_border = 'rgba(' + ','.join(coordinates) + ',' + str(0.8) + ')' # Adding scatter scatter = go.Scatter( x=targets, y=data, name=get_names(config_child), mode='markers', marker=dict( size=4, color=color_border, line=dict( width=1, color=color_border, ) ), ) plot_data.append(scatter) # Linear regression x = sm.add_constant(targets) y = data results = sm.OLS(y, x).fit() intercept = results.params[0] slope = results.params[1] intercept_std = results.bse[0] slope_std = results.bse[1] # Adding regression line if line == 'yes': x_min = np.min(targets) x_max = np.max(targets) y_min = slope * x_min + intercept y_max = slope * x_max + intercept scatter = go.Scatter( x=[x_min, x_max], y=[y_min, y_max], mode='lines', marker=dict( color=color ), line=dict( width=6, color=color ), showlegend=False ) plot_data.append(scatter) # Adding polygon area if add == 'polygon': pr = PolygonRoutines( x=targets, y=[], params={ 'intercept': intercept, 'slope': slope, 'intercept_std': intercept_std, 'slope_std': slope_std }, method=config_child.experiment.method ) scatter = pr.get_scatter(color_transparent) plot_data.append(scatter) # Adding box curve if fit == 'no' and semi_window != 'none': xs, bs, ms, ts = process_box(targets, data, semi_window, box_b, box_t) scatter = go.Scatter( x=xs, y=bs, name=get_names(config_child), mode='lines', line=dict( width=4, color=color_border ), showlegend=False ) plot_data.append(scatter) scatter = go.Scatter( x=xs, y=ms, name=get_names(config_child), mode='lines', line=dict( width=6, color=color_border ), showlegend=False ) plot_data.append(scatter) scatter = go.Scatter( x=xs, y=ts, name=get_names(config_child), mode='lines', line=dict( width=4, color=color_border ), showlegend=False ) plot_data.append(scatter) # Adding best curve if fit == 'yes' and semi_window != 'none': residuals = data characteristics_dict = {} init_variance_characteristics_dict(characteristics_dict, 'box_b') init_variance_characteristics_dict(characteristics_dict, 'box_m') init_variance_characteristics_dict(characteristics_dict, 'box_t') xs_box, bs_box, ms_box, ts_box = process_box(targets, residuals, semi_window, box_b, box_t) variance_processing(xs_box, bs_box, characteristics_dict, 'box_b') variance_processing(xs_box, ms_box, characteristics_dict, 'box_m') variance_processing(xs_box, ts_box, characteristics_dict, 'box_t') R2 = np.min([characteristics_dict['box_b_best_R2'][-1], characteristics_dict['box_t_best_R2'][-1]]) characteristics_dict['best_R2'].append(R2) if characteristics_dict['box_t_best_type'] == [0]: # lin-lin axes ys_t = np.zeros(2, dtype=float) ys_b = np.zeros(2, dtype=float) intercept_box_t = characteristics_dict['box_t_lin_lin_intercept'][0] slope_box_t = characteristics_dict['box_t_lin_lin_slope'][0] intercept_box_b = characteristics_dict['box_b_lin_lin_intercept'][0] slope_box_b = characteristics_dict['box_b_lin_lin_slope'][0] ys_t[0] = slope_box_t * xs_box[0] + intercept_box_t ys_b[0] = slope_box_b * xs_box[0] + intercept_box_b ys_t[1] = slope_box_t * xs_box[-1] + intercept_box_t ys_b[1] = slope_box_b * xs_box[-1] + intercept_box_b xs = [xs_box[0], xs_box[-1]] elif characteristics_dict['box_t_best_type'] == [1]: # lin-log axes ys_t = np.zeros(len(ts_box), dtype=float) ys_b = np.zeros(len(bs_box), dtype=float) intercept_box_t = characteristics_dict['box_t_lin_log_intercept'][0] slope_box_t = characteristics_dict['box_t_lin_log_slope'][0] if characteristics_dict['box_b_lin_log_intercept'][0] != 'NA' and \ characteristics_dict['box_b_lin_log_slope'][0] != 'NA': intercept_box_b = characteristics_dict['box_b_lin_log_intercept'][0] slope_box_b = characteristics_dict['box_b_lin_log_slope'][0] is_lin_log = True else: intercept_box_b = characteristics_dict['box_b_lin_lin_intercept'][0] slope_box_b = characteristics_dict['box_b_lin_lin_slope'][0] is_lin_log = False for box_id in range(0, len(xs_box)): ys_t[box_id] = np.exp(slope_box_t * xs_box[box_id] + intercept_box_t) if is_lin_log: ys_b[box_id] = np.exp(slope_box_b * xs_box[box_id] + intercept_box_b) else: ys_b[box_id] = slope_box_b * xs_box[box_id] + intercept_box_b xs = xs_box elif characteristics_dict['box_t_best_type'] == [2]: # log-log axes ys_t = np.zeros(len(ts_box), dtype=float) ys_b = np.zeros(len(bs_box), dtype=float) intercept_box_t = characteristics_dict['box_t_log_log_intercept'][0] slope_box_t = characteristics_dict['box_t_log_log_slope'][0] if characteristics_dict['box_b_log_log_intercept'][0] != 'NA' and \ characteristics_dict['box_b_log_log_slope'][0] != 'NA': intercept_box_b = characteristics_dict['box_b_log_log_intercept'][0] slope_box_b = characteristics_dict['box_b_log_log_slope'][0] is_log_log = True else: intercept_box_b = characteristics_dict['box_b_lin_lin_intercept'][0] slope_box_b = characteristics_dict['box_b_lin_lin_slope'][0] is_log_log = False for box_id in range(0, len(xs_box)): ys_t[box_id] = np.exp(slope_box_t * np.log(xs_box[box_id]) + intercept_box_t) if is_log_log: ys_b[box_id] = np.exp(slope_box_b * np.log(xs_box[box_id]) + intercept_box_b) else: ys_b[box_id] = slope_box_b * xs_box[box_id] + intercept_box_b xs = xs_box scatter = go.Scatter( x=xs, y=ys_t, name=get_names(config_child), mode='lines', line=dict( width=4, color=color_border ), showlegend=False ) plot_data.append(scatter) scatter = go.Scatter( x=xs, y=ys_b, name=get_names(config_child), mode='lines', line=dict( width=4, color=color_border ), showlegend=False ) plot_data.append(scatter) config.experiment_data['data'] = plot_data elif config.experiment.method == Method.variance_histogram: item = config.experiment.method_params['item'] plot_data = { 'hist_data': [], 'group_labels': [], 'colors': [] } for config_child in configs_child: plot_data['group_labels'].append(str(config_child.attributes.observables)) plot_data['colors'].append(cl.scales['8']['qual']['Set1'][configs_child.index(config_child)]) targets = self.get_strategy.get_target(config_child) data = self.get_strategy.get_single_base(config_child, [item])[0] if config_child.experiment.method == Method.linreg: x = sm.add_constant(targets) y = data results = sm.OLS(y, x).fit() plot_data['hist_data'].append(results.resid) config.experiment_data['data'] = plot_data elif config.experiment.method == Method.curve: x_target = config.experiment.method_params['x'] y_target = config.experiment.method_params['y'] number_of_points = int(config.experiment.method_params['number_of_points']) plot_data = [] for config_child in configs_child: if x_target == 'count': xs = list(range(1, number_of_points + 1)) else: if x_target in config_child.advanced_data: xs = config_child.advanced_data[x_target][0:number_of_points] else: raise ValueError(f'{x_target} not in {config_child}.') if y_target in config_child.advanced_data: ys = config_child.advanced_data[y_target][0:number_of_points] else: raise ValueError(f'{y_target} not in {config_child}.') color = cl.scales['8']['qual']['Set1'][configs_child.index(config_child)] coordinates = color[4:-1].split(',') color_transparent = 'rgba(' + ','.join(coordinates) + ',' + str(0.5) + ')' color_border = 'rgba(' + ','.join(coordinates) + ',' + str(0.9) + ')' scatter = go.Scatter( x=xs, y=ys, name=get_names(config_child), mode='lines+markers', marker=dict( size=10, color=color_transparent, line=dict( width=2, color=color_border, ) ), ) plot_data.append(scatter) config.experiment_data['data'] = plot_data elif config.experiment.data == DataType.epimutations: if config.experiment.method == Method.scatter: plot_data = [] y_type = config.experiment.method_params['y_type'] for config_child in configs_child: indexes = config_child.attributes_indexes x = self.get_strategy.get_target(config_child) y = np.zeros(len(indexes), dtype=int) for subj_id in range(0, len(indexes)): col_id = indexes[subj_id] subj_col = self.get_strategy.get_single_base(config_child, [col_id]) y[subj_id] = np.sum(subj_col) color = cl.scales['8']['qual']['Set1'][configs_child.index(config_child)] coordinates = color[4:-1].split(',') color_transparent = 'rgba(' + ','.join(coordinates) + ',' + str(0.7) + ')' color_border = 'rgba(' + ','.join(coordinates) + ',' + str(0.8) + ')' scatter = go.Scatter( x=x, y=y, name=get_names(config_child), mode='markers', marker=dict( size=4, color=color_transparent, line=dict( width=1, color=color_border, ) ), ) plot_data.append(scatter) # Adding regression line x_linreg = sm.add_constant(x) if y_type == 'log': y_linreg = np.log(y) else: y_linreg = y results = sm.OLS(y_linreg, x_linreg).fit() intercept = results.params[0] slope = results.params[1] x_min = np.min(x) x_max = np.max(x) if y_type == 'log': y_min = np.exp(slope * x_min + intercept) y_max = np.exp(slope * x_max + intercept) else: y_min = slope * x_min + intercept y_max = slope * x_max + intercept scatter = go.Scatter( x=[x_min, x_max], y=[y_min, y_max], mode='lines', marker=dict( color=color ), line=dict( width=6, color=color ), showlegend=False ) plot_data.append(scatter) config.experiment_data['data'] = plot_data elif config.experiment.method == Method.range: plot_data = [] borders = config.experiment.method_params['borders'] for config_child in configs_child: color = cl.scales['8']['qual']['Set1'][configs_child.index(config_child)] coordinates = color[4:-1].split(',') color_transparent = 'rgba(' + ','.join(coordinates) + ',' + str(0.5) + ')' indexes = config_child.attributes_indexes x = self.get_strategy.get_target(config_child) y = np.zeros(len(indexes), dtype=int) for subj_id in range(0, len(indexes)): col_id = indexes[subj_id] subj_col = self.get_strategy.get_single_base(config_child, [col_id]) y[subj_id] = np.sum(subj_col) for seg_id in range(0, len(borders) - 1): x_center = (borders[seg_id + 1] + borders[seg_id]) * 0.5 curr_box = [] for subj_id in range(0, len(indexes)): if borders[seg_id] <= x[subj_id] < borders[seg_id + 1]: curr_box.append(y[subj_id]) trace = go.Box( y=curr_box, x=[x_center] * len(curr_box), name=f'{borders[seg_id]} to {borders[seg_id + 1] - 1}', marker=dict( color=color_transparent ) ) plot_data.append(trace) config.experiment_data['data'] = plot_data elif config.experiment.data == DataType.entropy: if config.experiment.method == Method.scatter: plot_data = [] for config_child in configs_child: indexes = config_child.attributes_indexes x = self.get_strategy.get_target(config_child) y = self.get_strategy.get_single_base(config_child, indexes) color = cl.scales['8']['qual']['Set1'][configs_child.index(config_child)] coordinates = color[4:-1].split(',') color_transparent = 'rgba(' + ','.join(coordinates) + ',' + str(0.7) + ')' color_border = 'rgba(' + ','.join(coordinates) + ',' + str(0.8) + ')' scatter = go.Scatter( x=x, y=y, name=get_names(config_child), mode='markers', marker=dict( size=4, color=color_transparent, line=dict( width=1, color=color_border, ) ), ) plot_data.append(scatter) # Adding regression line x_linreg = sm.add_constant(x) y_linreg = y results = sm.OLS(y_linreg, x_linreg).fit() intercept = results.params[0] slope = results.params[1] x_min = np.min(x) x_max = np.max(x) y_min = slope * x_min + intercept y_max = slope * x_max + intercept scatter = go.Scatter( x=[x_min, x_max], y=[y_min, y_max], mode='lines', marker=dict( color=color ), line=dict( width=6, color=color ), showlegend=False ) plot_data.append(scatter) config.experiment_data['data'] = plot_data elif config.experiment.data == DataType.observables: if config.experiment.method == Method.histogram: plot_data = [] for config_child in configs_child: curr_plot_data = [] targets = self.get_strategy.get_target(config_child) is_number_list = [is_float(t) for t in targets] if False in is_number_list: xbins = {} else: bin_size = config.experiment.method_params['bin_size'] xbins = dict( start=min(targets) - 0.5 * bin_size, end=max(targets) + 0.5 * bin_size, size=bin_size ) color = cl.scales['8']['qual']['Set1'][configs_child.index(config_child)] if config_child.experiment.method == Method.histogram: histogram = go.Histogram( x=targets, name=get_names(config_child), xbins=xbins, marker=dict( opacity=config.experiment.method_params['opacity'], color=color, line=dict( color='#444444', width=1 ) ) ) curr_plot_data.append(histogram) plot_data += curr_plot_data config.experiment_data['data'] = plot_data