def _nat_order_labels(self, mat, labels): # get the natural order indices natindices = index_natsorted(labels) # order the matrix ordered_mat = np.array(order_by_index(mat, natindices)) ordered_labels = np.array(order_by_index(labels, natindices)) return ordered_mat, ordered_labels
def test_order_by_index(): # Return the indexes of how the iterable would be sorted. a = ['num3', 'num5', 'num2'] index = [2, 0, 1] assert order_by_index(a, index) == ['num2', 'num3', 'num5'] assert order_by_index(a, index) == [a[i] for i in index] assert order_by_index(a, index, True) != [a[i] for i in index] assert list(order_by_index(a, index, True)) == [a[i] for i in index]
def natsort_contacts(self): """ Natural sort out the time series by its channel labels. For example: A1,A2, ..., B1, B2, ..., Z1, Z2, ..., A'1, A'2, ... """ self.buffchanlabels = self.chanlabels.copy() natinds = self.contacts.natsort_contacts() self.mat = np.array(order_by_index(self.mat, natinds)) self.metadata["chanlabels"] = np.array( order_by_index(self.chanlabels, natinds))
def prepare(df): # Sort columns df = df.sort_index(axis=1) # Natsort all rows df = df.reindex(index=order_by_index(df.index, index_natsorted(zip(df.to_numpy())))) # Recreate index for comparison later return df
def get_data(label, max_items): # generate some random data items = ['%s_%d' % (label, i + 1) for i in list(range(max_items))] x1 = np.random.randint(low=1, high=100, size=max_items).tolist() x2 = np.random.randint(low=1, high=100, size=max_items).tolist() x3 = np.random.randint(low=1, high=100, size=max_items).tolist() # insert NAs to the first row items.insert(0, NA_ID) x1.insert(0, NA_TEXT) x2.insert(0, NA_TEXT) x3.insert(0, NA_TEXT) # create pandas dataframe colname = '%s_pk' % label data = {colname: items, 'x1': x1, 'x2': x2, 'x3': x3} df = pd.DataFrame(data) # https://stackoverflow.com/questions/29580978/naturally-sorting-pandas-dataframe df = df.reindex( index=order_by_index(df.index, index_natsorted(df[colname]))) # create bokeh ColumnDataSource and DataTable columns = [ TableColumn(field=colname, title='ID'), TableColumn(field='x1', title='x1'), TableColumn(field='x2', title='x2'), TableColumn(field='x3', title='x3'), ] ds = ColumnDataSource(df) dt = DataTable(source=ds, columns=columns, width=300, height=300) return df, ds, dt
def natsort_contacts(self): """ Sort out the time series by its channel labels, so they go in a natural ordering. A1,A2, ..., B1, B2, ..., Z1, Z2, ..., A'1, A'2, ... :return: """ print("Trying to sort naturally contacts in result object") self.buffchanlabels = self.chanlabels.copy() # pass natinds = self.contacts.natsort_contacts() self.mat = np.array(order_by_index(self.mat, natinds)) self.metadata['chanlabels'] = np.array( order_by_index(self.chanlabels, natinds))
def prepare(self, df): # Sort columns df = df.sort_index(axis=1) # Natsort all rows df = df.reindex(index=order_by_index(df.index, index_natsorted(zip(df.to_numpy())))) # Recreate index for comparison later df.reset_index(level=0, drop=True, inplace=True) return df
def natural_sort(df, by='id', index=False): ''' Sort a pandas dataframe "naturally" by column or by index. ''' if index: return df.reindex(index=natsorted(df.index)) return df.reindex(index=order_by_index(df.index, index_natsorted(df[by])))
def bar_plot_mmgbsa_results(excel_file, sort=True, titles=None): """ Load data from an Excel file with the summary of the MMGBSA results, in a sheet which has to be called "MMGBSA". Create a plot for each ligand that is found under the 'Ligand' column in the table. :param excel_file: str, Name of the Excel file with the data :param sort: bool, Whether to sort the plot by increasing MMGBSA values :param titles: list, Name for each of the plots (as many as there are ligands in the table) :return f_list: list, A list of matplotlib figures """ # sns.set_style('whitegrid') df = pd.read_excel(excel_file, sheetname="MMGBSA") df = df.reindex(index=order_by_index(df.index, index_natsorted(df.Run))) ligands = df.Ligand.unique() f_list = [] if titles is None: titles = [None for _ in ligands] elif len(titles) != len(ligands): raise ValueError('len of ligands and titles is not equal.') for lig, title in zip(ligands, titles): lig_df = df[df.Ligand == lig] if sort: lig_df.sort_values(by='MMGBSA (mean)', inplace=True) ax = lig_df.plot(x="Run", y="MMGBSA (mean)", yerr='MMGBSA (Std)', kind='bar', legend=False, figsize=figure_dims(1400), title=title) overall_mean = lig_df['MMGBSA (mean)'].mean() overall_std = lig_df['MMGBSA (mean)'].std() print("{} {:02f} {:02f}".format(lig, overall_mean, overall_std)) xmin, xmax = ax.get_xlim() # Mean line ax.plot( [xmin, xmax], [overall_mean, overall_mean], linewidth=1.5, color='blue' ) # Upper std bar ax.plot( [xmin, xmax], [overall_mean + overall_std, overall_mean + overall_std], linestyle='dashed', linewidth=1, color='blue' ) # Lower std bar ax.plot( [xmin, xmax], [overall_mean - overall_std, overall_mean - overall_std], linestyle='dashed', linewidth=1, color='blue' ) ax.set_ylim(top=0) ax.set_ylabel(ylabel=r'$\Delta$G binding (kcal/mol)', size=14) ax.set_xlabel(xlabel='Run', size=14) f = pp.gcf() f.tight_layout() f_list.append(f) return f_list
def sort(self, column, order): self.layoutAboutToBeChanged.emit() if order == 0: self._dataframe = self._dataframe.reindex(index=order_by_index( self._dataframe.index, index_natsorted( eval('self._dataframe.%s' % (list(self._dataframe.columns)[column]))))) else: self._dataframe = self._dataframe.reindex(index=order_by_index( self._dataframe.index, reversed( index_natsorted( eval('self._dataframe.%s' % (list(self._dataframe.columns)[column])))))) self._dataframe.reset_index(inplace=True, drop=True) self.setDataFrame(self._dataframe) self.layoutChanged.emit()
def avgPrefs(prefsfiles): """Gets average of site-specific preferences. Args: `prefsfiles` (list) List of CSV files containing preferences, must all be for same sites and characters. Returns: A `pandas.DataFrame` containing the average of the preferences in `prefsfiles`. In this returned data frame, `site` is the index >>> tf1 = tempfile.NamedTemporaryFile >>> tf2 = tempfile.NamedTemporaryFile >>> with tf1(mode='w') as file1, tf2(mode='w') as file2: ... x = file1.write('site,A,C,G,T\\n' ... '10,0.2,0.2,0.5,0.1\\n' ... '2a,0.3,0.3,0.3,0.1') ... file1.flush() ... x = file2.write('site,A,C,G,T\\n' ... '10,0.4,0.1,0.1,0.4\\n' ... '2a,0.3,0.4,0.1,0.2') ... file2.flush() ... avg = avgPrefs([file1.name, file2.name]) >>> (avg['site'] == ['2a', '10']).all() True >>> numpy.allclose(avg['A'], [0.3, 0.3]) True >>> numpy.allclose(avg['C'], [0.35, 0.15]) True >>> numpy.allclose(avg['G'], [0.2, 0.3]) True >>> numpy.allclose(avg['T'], [0.15, 0.25]) True """ assert len(prefsfiles) >= 1 prefs = [ pandas.read_csv(f, index_col='site').sort_index() for f in prefsfiles ] # make sure all have the same columns in the same order cols = prefs[0].columns for i in range(len(prefs)): assert set(cols) == set(prefs[i].columns) prefs[i] = prefs[i][cols] avgprefs = pandas.concat(prefs).groupby('site').mean().reset_index() # natural sort by site: https://stackoverflow.com/a/29582718 avgprefs = avgprefs.reindex(index=natsort.order_by_index( avgprefs.index, natsort.index_natsorted(avgprefs.site, signed=True))) return avgprefs
def main(): """Main function for pyim-annotate.""" args = parse_args() insertions = Insertion.from_csv(args.insertions, sep='\t') annotator = args.caller.from_args(args) annotated = list(annotator.annotate(insertions)) annotated_frame = Insertion.to_frame(annotated) annotated_frame = annotated_frame.reindex(index=order_by_index( annotated_frame.index, index_natsorted(annotated_frame.id))) annotated_frame.to_csv(str(args.output), sep='\t', index=False)
def checkdf(self, p, x): """ :return: merged dataframes """ labels = x.keys() colnames = ['standard_hue', 'ntrial', 'all_intensities', 'all_responses', 'reversal value'] df = pd.DataFrame(columns=colnames, index=labels) for label in labels: sheet = x[label] df.loc[label, colnames[0]] = float(p[p['label'] == label]['standard']) df.loc[label, colnames[1]] = len(sheet) df.loc[label, colnames[2]] = np.array((sheet['All Intensities'])) df.loc[label, colnames[3]] = np.array((sheet['All Responses'])) df.loc[label, colnames[4]] = np.array((sheet['Reversal Intensities'])) df = df.reindex(index=order_by_index(df.index, index_natsorted(df.index, reverse=False))) return df
def sumxrl(self): par, xls, count = self.readxrl() dfs = pd.concat([self.checkdf(p, x) for p, x in zip(par, xls)], axis=0) summary = dfs.groupby(level=0).agg({ 'standard_hue': 'unique', 'ntrial': lambda x: sum(x) / count, 'reversal value': [self.meanvalue, self.stdvalue], 'all_responses': [self.meanvalue, self.stdvalue] }) summary = summary.reindex(index=order_by_index( summary.index, index_natsorted(summary.index, reverse=False))) return dfs, summary
def _get_stranded_f(self, half_entries, f, sort=False): counter = 0 dfs = [] chromosomes = self.chromosomes if f == "tail": chromosomes = reversed(chromosomes) default = pd.DataFrame(columns=self.columns) for chromosome in chromosomes: plus = self.dfs.get((chromosome, "+"), default) minus = self.dfs.get((chromosome, "-"), default) if sort: plus = plus.sort_values(sort_cols) minus = minus.sort_values(sort_cols) plus = getattr(plus, f)(half_entries) minus = getattr(minus, f)(half_entries) df = pd.concat([plus, minus]) if sort: df = df.sort_values(sort_cols) counter += len(df) dfs.append(df) if counter >= half_entries: break df = pd.concat(dfs) # got twice as many entries as needed before sort. Halve here: df = getattr(df, f)(half_entries) # dfs = {df.Chromosome.iloc[0]: df for df in} df = df.reset_index(drop=True) df = df.reindex(index=natsort.order_by_index( df.index, natsort.index_natsorted(zip(df.Chromosome)))) return df
def main(options): # open all files # read all headers fnames = options.inbw bw = {} header = {} chroms = {} for fname in fnames: print("opening file for input: " + os.path.split(fname)[1]) bw[fname] = pyBigWig.open(fname) header[fname] = bw[fname].chroms() chroms[fname] = list(header[fname].keys())[0] # define order based on chromosome names extracted from headers idx = natsort.index_natsorted(chroms) fnames = natsort.order_by_index(fnames, idx) # open bigwig for output print("opening bigwig file for output (%s)" % options.outfname) out_bw = pyBigWig.open(options.outfname, "w") assert (out_bw is not None) # construct sorted header and write to output header = [list(header[f].items())[0] for f in fnames] print(str(header)) out_bw.addHeader(header) # loop over sorted chromosome-names/file-names ## import data from input bw ## add read data to output bw for fname in fnames: print("exporting data from chrom; " + chroms[fname] + " (file: " + fname + ")") ints = bw[fname].intervals(chroms[fname]) chrs = [chroms[fname]] * len(ints) out_bw.addEntries(chrs, [i[0] for i in ints], ends=[i[1] for i in ints], values=[i[2] for i in ints]) print("closing bigwig file for output (%s)" % options.outfname) out_bw.close() return True
def natsort_contacts(self) -> Tuple: """ Naturally sort the contacts. Keeps the applied indices in self.naturalinds Returns ------- naturalinds """ if self.naturalinds == None: self.naturalinds = index_natsorted(self.chanlabels) self.chanlabels = np.array( order_by_index(self.chanlabels, self.naturalinds)) else: warnings.warn( "Already naturally sorted contacts! Extract channel labels naturally sorted by calling " "chanlabels, and apply ordering to other channel level data with naturalinds." ) return self.naturalinds
def convert_clusters(clusters, cluster_size_min, cluster_size_max, out_path, normalise): ''' Args: clusters(Object): Clusters object cluster_size_min(int): minimum size of cluster (more than or equal to int) cluster_size_max(int): maximum size of cluster (less than or equal to int) out_path(str): Out file path ''' assert cluster_size_min > 1, 'Minimum cluster size needs to be > 1' all_clusters = [] for barcode, cluster in clusters.get_items(): cs = cluster.size('DPM') if cs >= cluster_size_min and cs <= cluster_size_max: all_clusters.extend(cluster2sfws(cluster, 'DPM', normalise)) column_names=['str1', 'chr1', 'pos1', 'frag1', 'str2', 'chr2', 'pos2','frag2', 'score'] df = pd.DataFrame(all_clusters, columns=column_names) df_out = df.reindex(index=order_by_index(df.index, index_natsorted(zip(df.chr1, df.chr2, df.pos1, df.pos2)))) df_out.to_csv(out_path, sep=' ', index=False, header=False)
def write_click_csv(self, n_frames): out = pd.DataFrame(self.mouse_clicks) out = out.reindex(index=order_by_index( out.index, index_natsorted(out['frame'], reverse=False))) out = out.assign(time=[0]*out.shape[0]) out = out.assign(visible=[1]*out.shape[0]) cols = ['frame', 'time', 'visible', 'x', 'y'] out = out[cols] # reindex or change the order of columns str_ = out.to_csv(sep=';', index=False) h, w = self.frames[0].shape[:2] header = 'VideoWidth:{}\nVideoHeight:{}\nDisplayWidth:0\nDisplayHeight:0\n'.format(w, h) str_ = header + str_ text_file = open(self.out_csv, 'w') n = text_file.write(str_) text_file.close() print('written {}'.format(self.out_csv))
def list_of_names(fname='SavedData/searchResult.php'): names = [] ulist = [] bs = BeautifulSoup(open(fname), features='lxml') pbar = ProgressBar() for species in pbar(bs.findAll('i')): for parents in species.parents: if parents.name == 'td': [names.append(name) for name in species.contents] [ulist.append(x) for x in names if x not in ulist] # deleting double values df = pd.DataFrame() # create dataframe df['name'] = ulist df = df.reindex(index=order_by_index( df.index, index_natsorted(df['name'], reverse=False))) # sort alphabetically df = df.reset_index(drop=True) # fix index return df
def cluster2sfws(cluster, read_type, normalise=True): '''Convert a cluster class object (a single cluster) to a dictionary in the sfws format (Juicer tools Pre format with score) Note: Juicer short format with score (sfws) A whitespace separated file that contains, on each line <str1> <chr1> <pos1> <frag1> <str2> <chr2> <pos2> <frag2> <score> https://github.com/aidenlab/juicer/wiki/Pre IMPORTANT NOTE pre throws away reads that map to the same restriction fragment. If you use dummy numbers for the frag field, be sure they are different for the different read ends; that is, <frag1> should be 0 and <frag2> should be 1. str = strand (0 for forward, anything else for reverse) Args: cluster(Cluster): A single Cluster object that holds all the position (reads) read_type(str): RPM or DPM ''' cluster_pos = [] for position in cluster: if position._type == read_type: cluster_pos.append(('0' if position._strand == '+' else '1', position._chromosome, position._start_coordinate)) pairs = list(combinations(cluster_pos, 2)) if normalise: score = 2.0 / len(cluster_pos) else: score = 1 out = [] for a, b in pairs: # chr1 > chr2 order a, b = order_by_index([a, b], index_natsorted([a[1], b[1]])) out.append([*a, 0, *b, 1, score]) return out
def tidyToWide(tidy_df, valuecol): """Converts tidy `diffsel` data frame to wide form. The `diffsel` data frames returned by ``dms2_diffsel`` (and also other dataframes, such as the `fracsurvive` ones from ``dms_fracsurvive`` are in tidy form. This function converts them to wide form. Args: `tidy_df` (pandas DataFrame) Data frame in tidy form. Should have columns named `site`, `wildtype`, `mutation`, and something with the name matching `valuecol`. `valuecol` (string) Name of value column in `df`, such `diffsel` or `fracsurvive`. Returns: Wide form dataframe. Will have columns `site` (as string), `wildtype`, and all characters (e.g., amino acids) for which values are given. Natural sorted by `site`. >>> tidy_df = pandas.read_csv(io.StringIO( ... '''site wildtype mutation diffsel ... 3 A D -1.5 ... 3 A C 10.1 ... 2 A C 10.1 ... 1 C D 9.5 ... 1 C A 0.2 ... 2 A D -1.5'''), ... delim_whitespace=True, index_col=False) >>> wide_df = tidyToWide(tidy_df, valuecol='diffsel') >>> print(wide_df.to_string(float_format=lambda x: '{0:.1f}'.format(x))) site A C D wildtype 0 1 0.2 0.0 9.5 C 1 2 0.0 10.1 -1.5 A 2 3 0.0 10.1 -1.5 A """ assert isinstance(tidy_df, pandas.DataFrame) cols = ['site', 'wildtype', 'mutation', valuecol] assert set(cols) == set(tidy_df.columns), ('expected columns ' '{0}\nactual columns {1}'.format(cols, tidy_df.columns)) # make site a string tidy_df['site'] = tidy_df['site'].astype(str) # sort on site as here: https://stackoverflow.com/a/29582718 tidy_df = tidy_df.reindex(index=natsort.order_by_index(tidy_df.index, natsort.index_natsorted(tidy_df.site, signed=True))) # convert to wide form, keeping wildtype identities tidy_df = tidy_df.set_index('site', drop=True) wt = tidy_df['wildtype'] wide_df = (tidy_df.pivot(columns='mutation', values=valuecol) .fillna(0.0) .join(wt) .reset_index() ) wide_df = wide_df.drop_duplicates().reset_index(drop=True) return wide_df
plt.ylabel('') plt.legend(fontsize=50) plt.savefig('../outputs/{}'.format(outputname), bbox_inches="tight", dpi=300, format='png') counts = df.groupby(['label','source']).count()[['key']].reset_index() counts.source = counts.source.apply(lambda x: 'Kaggle' if x=='kg' else 'StackOverflow') total_kg, total_so = df.groupby('source').count().topic.tolist() counts['counts'] = counts.key counts['key'] = counts.apply(lambda x: x.key/total_kg if x.source=='Kaggle' else x.key/total_so, axis=1) counts = counts.reindex(index=order_by_index(counts.index, index_natsorted(counts.label))) hist(counts) #%%####################################################################### # Topic Relationships # ########################################################################## str_contains = 'kg' gephi = df[['key','topics']] gephi = gephi.explode('topics') gephi['topic'] = gephi.topics.apply(lambda x: x[0]) gephi['value'] = gephi.topics.apply(lambda x: x[1]) gephi = gephi.drop(columns=['topics']) gephi = gephi[gephi.value>0.15] nodes = pd.DataFrame(set(gephi.topic.tolist() + gephi.key.tolist()), columns=['Id']) nodes['Color'] = nodes.Id
def test_order_by_index_returns_generator_with_iter_True(): a = ['num3', 'num5', 'num2'] index = [2, 0, 1] assert order_by_index(a, index, True) != [a[i] for i in index] assert list(order_by_index(a, index, True)) == [a[i] for i in index]
def test_order_by_index_sorts_list_according_to_order_of_integer_list(): a = ["num3", "num5", "num2"] index = [2, 0, 1] assert order_by_index(a, index) == ["num2", "num3", "num5"] assert order_by_index(a, index) == [a[i] for i in index]
def __init__(self, input_path, organism, args): self.testing = args.test if os.path.isdir(input_path): self.beds = [] self.bednames = [] for dirpath, dnames, fnames in walklevel(input_path, level=0): for f in fnames: if f.endswith(".bed"): name = os.path.basename(f).replace(".bed", "") bed = GenomicRegionSet(name) bed.read(os.path.join(dirpath, f)) if args.test: bed.sequences = bed.sequences[0:10] bed.sort() self.beds.append(bed) self.bednames.append(name) index = natsort.index_natsorted(self.bednames) self.beds = natsort.order_by_index(self.beds, index) self.bednames = natsort.order_by_index(self.bednames, index) elif os.path.isfile(input_path): if input_path.endswith(".bed"): name = os.path.basename(input_path).replace(".bed", "") bed = GenomicRegionSet(name) bed.read(input_path) if args.test: bed.sequences = bed.sequences[0:10] bed.sort() self.beds = [bed] self.bednames = [name] else: self.EM = ExperimentalMatrix() self.EM.read(input) self.beds = self.EM.get_regionsets() self.bednames = self.EM.get_regionsnames() else: print("***Please make sure that there are BED files in " + input_path) sys.exit(1) self.organism = organism self.chromosomes = GenomicRegionSet(organism) self.chromosomes.get_genome_data(organism=organism, chrom_X=True) genome = GenomeData(organism=organism) self.fasta_dir = genome.get_genome() self.stats = OrderedDict() self.ind_col = {} size_panel = 6 rows = len(self.beds) cols = 2 if args.biotype: self.ind_col["Biotype"] = cols cols += 1 if args.repeats: self.ind_col["Repeats"] = cols cols += 1 if args.genposi: self.ind_col["Genetic position"] = cols cols += 1 if args.labels: for label in args.labels: self.ind_col[label] = cols cols += 1 self.fig_f, self.fig_axs = plt.subplots(rows + 1, cols, dpi=300, figsize=(cols * size_panel, rows * size_panel)) self.table_h = {} self.tables = {} self.count_table = {} self.count_tableh = [] for i, bed in enumerate(self.beds): self.table_h[self.bednames[i]] = [self.bednames[i]] self.tables[self.bednames[i]] = [] self.tables[self.bednames[i]].append([r.toString() for r in bed]) self.table_h[self.bednames[i]].append("strand") self.tables[self.bednames[i]].append([r.orientation if r.orientation else "." for r in bed]) self.count_table[bed.name] = {} if args.coverage: self.coverage = True else: self.coverage = False self.background = []
import os import numpy as np import natsort import vtk import sys dataPath = sys.argv[1] outputfile = sys.argv[2] for dirName, subDir, fileList in os.walk(dataPath): pass index = natsort.index_natsorted(fileList) fileList = natsort.order_by_index(fileList, index) stringArray = vtk.vtkStringArray() for i, fileName in enumerate(fileList): stringArray.InsertNextValue(fileName) reader = vtk.vtkDICOMImageReader() reader.SetDirectoryName(dataPath) reader.SetFileNames(stringArray) writer = vtk.vtkMetaImageWriter() writer.SetInputConnection(reader.GetOutputPort()) writer.SetFileName(outputfile + '.mhd') writer.Write()
def test_order_by_index_sorts_list_according_to_order_of_integer_list(): given = ["num3", "num5", "num2"] index = [2, 0, 1] expected = [given[i] for i in index] assert expected == ["num2", "num3", "num5"] assert order_by_index(given, index) == expected
def test_order_by_index_returns_generator_with_iter_true(): given = ["num3", "num5", "num2"] index = [2, 0, 1] assert order_by_index(given, index, True) != [given[i] for i in index] assert list(order_by_index(given, index, True)) == [given[i] for i in index]
def bar_tab(df_means, df_stdev, Time, Treatments, number_cmpds_run): colors = [ "firebrick", "navy", 'green', 'orange', 'violet', 'lawngreen', 'powderblue', 'lightgreen', 'yellow', 'olive', 'red', 'grey', 'skyblue', 'indigo', 'slategray', 'hotpink', 'peachpuff', 'powderblue' ] Cmpd0 = df_means.columns[len(Treatments) + len(Time)] cmpd_options = cmpd_options_func(df_means, len(Treatments) + len(Time), number_cmpds_run) time_vals = df_means[Time[0]].drop_duplicates().tolist() time_vals = natsorted(time_vals) df_means = df_means.reindex(index=order_by_index( df_means.index, index_natsorted(df_means[Time[0]]))) MEANs = df_means.groupby(Treatments)[Cmpd0].apply(list).to_dict() STDs = df_stdev.groupby(Treatments)[Cmpd0].apply(list).to_dict() keys = [] u_keys = [] l_keys = [] results = {'time_vals': time_vals} for h in range(len(MEANs)): kk = list(MEANs.keys())[h][0] for tot in range(1, len(Treatments)): sk = list(MEANs.keys())[h][tot] if type(sk).__name__ != 'str': sk = str(sk) kk += '_' + sk keys.append(kk) u_keys.append('upper ' + kk) l_keys.append('lower ' + kk) mu = list(MEANs.values())[h] sd = list(STDs.values())[h] upper = [x + e for x, e in zip(mu, sd)] lower = [x - e for x, e in zip(mu, sd)] results.update({keys[h]: mu}) results.update({u_keys[h]: upper}) results.update({l_keys[h]: lower}) source = ColumnDataSource(data=results) p = figure(x_range=time_vals, plot_height=1000, plot_width=1000, title=Cmpd0, toolbar_location="right") legend_it = [] for hh in range(len(MEANs)): c = p.vbar(x=dodge('time_vals', -0.4 + (.8 * hh / len(MEANs)), range=p.x_range), top=keys[hh], width=(0.8 / len(MEANs)), source=source, color=colors[hh]) p.add_layout( Whisker(source=source, base=dodge('time_vals', -0.4 + (.8 * hh / len(MEANs)), range=p.x_range), upper=u_keys[hh], lower=l_keys[hh], level="overlay")) legend_it.append((keys[hh], [c])) legend = Legend(items=legend_it, location=(0, 0)) legend.click_policy = "mute" p.add_layout(legend, 'right') p.x_range.range_padding = 0.1 p.xgrid.grid_line_color = None p.legend.orientation = "vertical" #This is where the widget is setup select = Select(title='Select your compound:', value=Cmpd0, options=cmpd_options) select_sd = Select(title="Standard Deviation:", value='1', options=['0', '1', '2', '3']) def update_title(attrname, old, new): p.title.text = select.value select.on_change('value', update_title) def update_data(attrname, old, new): cmpd = select.value std = int(select_sd.value) MEANs = df_means.groupby(Treatments)[cmpd].apply(list).to_dict() STDs = df_stdev.groupby(Treatments)[cmpd].apply(list).to_dict() results1 = {'time_vals': time_vals} for y in range(len(MEANs)): mu = list(MEANs.values())[y] sd = list(STDs.values())[y] upper = [x + std * e for x, e in zip(mu, sd)] lower = [x - std * e for x, e in zip(mu, sd)] results1.update({keys[y]: mu}) results1.update({u_keys[y]: upper}) results1.update({l_keys[y]: lower}) source.data = results1 for w in [select, select_sd]: w.on_change('value', update_data) # Set up layouts and add to document inputs = widgetbox(select, select_sd) layout = row(column(inputs), p, width=1000) tab = Panel(child=layout, title='Bar Charts') return tab
def control_timetable(timetable, header): print(Fore.YELLOW + '{:-^30}'.format(header)) start_order_details = [] for i in range(1, 4): start_order_details.append([j[1] for j in timetable if j[0] == i]) # разбиваем интервалы по деталям, выйдет 4 списка, использ. # для подсчета ожидания order_by_details = [] for i in start_order_details[0]: order_by_details.append([j for j in timetable if j[1] == i]) # для подсчета простоев сорт. наше расписание по ГВМ timetable = ordered_timetable(timetable) # список порядка запуска деталей на ГВМ, убираем номер операции # и ГВМ со списка интервалов, для обсчета ожидания start_order_gvm = [] order_by_details_norm = [] for i in order_by_details: start_order_gvm.append([j[:1][0] for j in i]) order_by_details_norm.append([j[2:] for j in i]) # считаем послеоперационные простои ГВМ timetable = downtime(timetable) for i in range(3): index = natsort.index_natsorted(start_order_details[i]) timetable[i] = natsort.order_by_index(timetable[i], index) print(Fore.WHITE + "\nDowntime: " + Fore.CYAN + '{}'.format(min([sum(i) for i in timetable]))) for i in timetable: print(Fore.WHITE + '{!s:>18s}'.format(i)) # считаем ожидание деталей перед обработкой print("\nWaiting:") order_by_details = waiting(order_by_details_norm) # упорядочиваем ожидание для каждой детали по ГВМ: 1 2 3 index = natsort.index_natsorted(start_order_details[0]) order_by_details = natsort.order_by_index(order_by_details, index) start_order_gvm = natsort.order_by_index(start_order_gvm, index) for i in range(4): index = natsort.index_natsorted(start_order_gvm[i]) order_by_details[i] = natsort.order_by_index(order_by_details[i], index) for i in order_by_details: print(Fore.WHITE + '{!s:>16s}'.format(i)) #считаем локальний резерв order_by_details = numpy.transpose(order_by_details) print('\nLocal resource:\n') for i in range(3): for j in range(4): first = timetable[i][j] try: second = order_by_details[i+1][j] except IndexError: second = float('inf') print(" L({0}, {1}) = min({2}, {3}) = {4}".format( \ i+1, j+1, first, second, min(first, second))) print('\n') print(Style.RESET_ALL)
def plot_ref(self, ref_dir, tag, other=False, strand=False, background=False, bin=False): print("Processing " + tag + " ....") refs = [] refs_names = [] if os.path.isdir(ref_dir): for f in os.listdir(ref_dir): if f.endswith(".bed"): name = os.path.basename(f).replace(".bed", "") bed = GenomicRegionSet(name) bed.read(os.path.join(ref_dir, f)) if self.testing: bed.sequences = bed.sequences[0:10] # bed.merge() refs.append(bed) refs_names.append(name) elif os.path.isfile(ref_dir) and ref_dir.endswith(".bed"): name = os.path.basename(ref_dir).replace(".bed", "") bed = GenomicRegionSet(name) bed.read(ref_dir) if self.testing: bed.sequences = bed.sequences[0:10] # bed.merge() refs.append(bed) refs_names.append(name) else: print("*** Error: Not a valid directory: " + ref_dir) sys.exit(1) if background and len(refs) == 1: background = False self.background = self.background + [len(ref) for ref in refs] index = natsort.index_natsorted(refs_names) refs = natsort.order_by_index(refs, index) refs_names = natsort.order_by_index(refs_names, index) self.count_tableh = self.count_tableh + refs_names if other: refs_names.append("Else") self.count_tableh = self.count_tableh + [tag+"_else"] if strand: ref_plus = [] ref_minus = [] for ref in refs: ref_plus.append(ref.filter_strand(strand="+")) ref_minus.append(ref.filter_strand(strand="-")) if background: # refs_names.append("Background") if self.coverage: # background_counts = [len(ref) for ref in refs] background_cov = [ref.total_coverage() for ref in refs] background_prop = [float(100) * b / sum(background_cov) for b in background_cov] if other: b = background_cov + [0] else: b = background_cov self.background = self.background + b else: background_counts = [ len(ref) for ref in refs ] background_prop = [ float(100) * b/sum(background_counts) for b in background_counts] if other: b = background_counts + [0] else: b = background_counts self.background = self.background + b else: self.background = self.background + [0] * len(refs) # Counting through all references overlapping_counts = [] for i, bed in enumerate(self.beds): c = [] if strand: bed_plus = bed.filter_strand(strand="+") bed_minus = bed.filter_strand(strand="-") if other: sum_ref_plus = GenomicRegionSet("ref_plus") sum_ref_minus = GenomicRegionSet("ref_minus") else: if other: sum_ref = GenomicRegionSet("ref") for j, ref in enumerate(refs): # print([bed.name, ref.name]) if strand: if self.coverage: cc = bed_plus.intersect(ref_plus[j]).total_coverage() + \ bed_minus.intersect(ref_minus[j]).total_coverage() else: cc = bed_plus.count_by_regionset(ref_plus[j]) + bed_minus.count_by_regionset(ref_minus[j]) if other: sum_ref_plus.combine(ref_plus[j]) sum_ref_minus.combine(ref_minus[j]) else: if self.coverage: cc = bed.intersect(ref).total_coverage() else: cc = bed.count_by_regionset(ref) if other: sum_ref.combine(ref) c.append(cc) self.count_table[bed.name][ref.name] = cc if other: if self.coverage: c.append(bed.total_coverage() - sum(c)) else: if strand: sum_ref_plus.merge() sum_ref_minus.merge() remain_regions_p = bed_plus.subtract(sum_ref_plus, whole_region=True) remain_regions_m = bed_minus.subtract(sum_ref_minus, whole_region=True) remain_regions = remain_regions_p.combine(remain_regions_m, output=True) else: sum_ref.merge() remain_regions = bed.subtract(sum_ref, whole_region=True) c.append(len(remain_regions)) for j, ref in enumerate(refs): self.count_table[bed.name][tag+"_else"] = c[-1] overlapping_counts.append(c) # Tables for i, bed in enumerate(self.beds): for j, ref in enumerate(refs): names = bed.map_names(ref, strand=strand, convert_nt=True) self.table_h[self.bednames[i]].append(refs_names[j]) self.tables[self.bednames[i]].append(names) # Generate Figure if other: color_list = plt.cm.Set1(numpy.linspace(0, 1, len(refs_names))).tolist() else: color_list = plt.cm.Set1(numpy.linspace(0, 0.95, len(refs_names))).tolist() for i in range(len(self.beds) + 1): # Plot try: ax = self.fig_axs[i, self.ind_col[tag]] except: try: ax = self.fig_axs[i] except: ax = self.fig_axs if i == 0: proportion = [] for counts in overlapping_counts: ss = sum(counts) if ss > 0: proportion.append([x / ss * 100 for x in counts]) else: proportion.append([0 for x in counts]) if background: if other: proportion.append(background_prop + [0]) len_ref = len(refs) + 1 else: proportion.append(background_prop) len_ref = len(refs) bottom = [0] * (len(self.bednames) + 1) xlabels = self.bednames + ["Background"] else: len_ref = len(refs) bottom = [0] * len(self.bednames) xlabels = self.bednames ptable = [] # print(proportion) # print(len_ref) for j in range(len_ref): ptable.append([x[j] for x in proportion]) width = 0.6 for j, y in enumerate(ptable): ax.bar(range(len(bottom)), y, width=width, bottom=bottom, color=color_list[j], edgecolor="none", align='center') bottom = [x + y for x, y in zip(bottom, y)] ax.set_title(tag) ax.yaxis.tick_left() ax.set_xticks(range(len(xlabels))) ax.set_xticklabels(xlabels, fontsize=7, rotation=20, ha="right") ax.set_ylabel("Percentage %") # ax.tick_params(axis='x', which='both', top='off', bottom='off', labelbottom=True) ax.set_ylim([0, 100]) ax.set_xlim([-0.5, len(xlabels) - 0.5]) plt.tight_layout() elif i > 0: x = [x for x in range(len(overlapping_counts[i - 1]))] ax.bar(x, overlapping_counts[i - 1], color=color_list, linewidth=0, edgecolor="none", align='center') ax.set_title(self.bednames[i - 1]) # ax.set_ylabel("Number") ax.set_xticks([x for x in range(len(overlapping_counts[i - 1]))]) ax.set_xticklabels(refs_names, fontsize=7, rotation=20, ha="right") ax.set_xlim([-0.5, len(overlapping_counts[i - 1]) - 0.5]) plt.tight_layout() ax.set_xlabel(tag)
def plotSiteDiffSel(names, diffselfiles, plotfile, diffseltype, maxcol=2, white_bg=False): """Plot site diffsel or fracsurvive along sequence. Despite the function name, this function can be used to plot either differential selection or fraction surviving. Args: `names` (list or series) Names of samples for which we plot statistics. `diffselfiles` (list or series) ``*sitediffsel.csv`` files from ``dms2_diffsel`` or ``*sitefracsurvive.csv`` files from ``dms2_fracsurvive``. `plotfile` (str) Name of created PDF plot file. `diffseltype` (str) Type of diffsel or fracsurvive to plot: - `positive`: positive sitediffsel - `total`: positive and negative sitediffsel - `max`: maximum mutdiffsel - `minmax`: minimum and maximum mutdiffsel - `avgfracsurvive`: total site fracsurvive - `maxfracsurvive`: max mutfracsurvive at site `maxcol` (int) Number of columns in faceted plot. `white_bg` (bool) Plots will have a white background with limited other formatting. """ assert len(names) == len(diffselfiles) == len(set(names)) > 0 assert os.path.splitext(plotfile)[1].lower() == '.pdf' diffsels = [pandas.read_csv(f).assign(name=name) for (name, f) in zip(names, diffselfiles)] assert all([set(diffsels[0]['site']) == set(df['site']) for df in diffsels]), "diffselfiles not all for same sites" diffsel = pandas.concat(diffsels, ignore_index=True) ylabel = 'differential selection' if diffseltype == 'positive': rename = {'positive_diffsel':'above'} elif diffseltype == 'total': rename = {'positive_diffsel':'above', 'negative_diffsel':'below'} elif diffseltype == 'max': rename = {'max_diffsel':'above'} elif diffseltype == 'minmax': rename = {'max_diffsel':'above', 'min_diffsel':'below'} elif diffseltype in ['avgfracsurvive', 'maxfracsurvive']: ylabel = 'fraction surviving' rename = {diffseltype:'above'} else: raise ValueError("invalid diffseltype {0}".format(diffseltype)) diffsel = (diffsel.rename(columns=rename) .melt(id_vars=['site', 'name'], value_vars=list(rename.values()), value_name='diffsel', var_name='direction') ) # natural sort by site: https://stackoverflow.com/a/29582718 diffsel = diffsel.reindex(index=natsort.order_by_index( diffsel.index, natsort.index_natsorted(diffsel.site, signed=True))) # now some manipulations to make site str while siteindex is int diffsel['site'] = diffsel['site'].apply(str) diffsel['siteindex'] = pandas.Categorical(diffsel['site'], diffsel['site'].unique()).codes ncol = min(maxcol, len(names)) nrow = math.ceil(len(names) / float(ncol)) # make name a category to preserve order diffsel['name'] = diffsel['name'].astype('category', categories=names) (xbreaks, xlabels) = breaksAndLabels(diffsel['siteindex'].unique(), diffsel['site'].unique(), n=6) if white_bg: p = (ggplot(diffsel, aes(x='siteindex', y='diffsel', color='direction', fill='direction')) + geom_step(size=0.3) + xlab('site') + ylab(ylabel) + scale_x_continuous(breaks=xbreaks, labels=xlabels) + scale_color_manual(COLOR_BLIND_PALETTE) + scale_fill_manual(COLOR_BLIND_PALETTE) + guides(color=False) + theme(panel_background=element_rect(fill='white'), axis_line_x=element_line(color='black'), axis_line_y=element_line(color='black'), panel_grid=element_blank(), panel_border=element_blank(), strip_background=element_blank() ) ) else: p = (ggplot(diffsel, aes(x='siteindex', y='diffsel', color='direction')) + geom_step(size=0.4) + xlab('site') + ylab(ylabel) + scale_x_continuous(breaks=xbreaks, labels=xlabels) + scale_color_manual(COLOR_BLIND_PALETTE) + guides(color=False) ) if not ((len(names) == 1) and ((not names[0]) or names[0].isspace())): p += facet_wrap('~name', ncol=ncol) p += theme(figure_size=(4.6 * (0.3 + ncol), 1.9 * (0.2 + nrow))) p.save(plotfile, verbose=False) plt.close()
def main(): parser = parse_args_coolpuppy() args = parser.parse_args() if args.post_mortem: def _excepthook(exc_type, value, tb): traceback.print_exception(exc_type, value, tb) print() pdb.pm() sys.excepthook = _excepthook logging.basicConfig(format="%(message)s", level=getattr(logging, args.logLevel)) logging.info(args) if args.seed is not None: np.random.seed(args.seed) if args.n_proc == 0: nproc = -1 else: nproc = args.n_proc c = cooler.Cooler(args.coolfile) if not os.path.isfile(args.baselist) and args.baselist != "-": raise FileExistsError("Loop(base) coordinate file doesn't exist") if args.unbalanced: balance = False else: balance = args.weight_name coolname = os.path.splitext(os.path.basename(c.filename))[0] if args.baselist != "-": bedname = os.path.splitext(os.path.basename(args.baselist))[0] else: bedname = "stdin" args.baselist = sys.stdin if args.bed2 is not None: bedname += "_vs_" + os.path.splitext(os.path.basename(args.bed2))[0] if args.nshifts > 0: control = True else: control = False if args.expected is not None: if args.nshifts > 0: logging.warning("With specified expected will not use controls") control = False if not os.path.isfile(args.expected): raise FileExistsError("Expected file doesn't exist") expected = pd.read_csv(args.expected, sep="\t", header=0) else: expected = False if args.mindist is None: mindist = "auto" else: mindist = args.mindist if args.maxdist is None: maxdist = np.inf else: maxdist = args.maxdist if args.minsize is None: minsize = 0 else: minsize = args.minsize if args.maxsize is None: maxsize = np.inf else: maxsize = args.maxsize if args.incl_chrs == "all": incl_chrs = np.array(c.chromnames).astype(str) else: incl_chrs = args.incl_chrs.split(",") if args.by_window and args.rescale: raise NotImplementedError( """Rescaling with by-window pileups is not supported""" ) if args.rescale and args.rescale_size % 2 == 0: raise ValueError("Please provide an odd rescale_size") if args.anchor is not None: if "_" in args.anchor: anchor, anchor_name = args.anchor.split("_") anchor = cooler.util.parse_region_string(anchor) else: anchor = cooler.util.parse_region_string(args.anchor) anchor_name = args.anchor else: anchor = None if anchor: fchroms = [anchor[0]] else: chroms = np.array(c.chromnames).astype(str) fchroms = [] for chrom in chroms: if chrom not in args.excl_chrs.split(",") and chrom in incl_chrs: fchroms.append(chrom) if args.anchor is not None: anchor = cooler.util.parse_region_string(args.anchor) CC = CoordCreator( baselist=args.baselist, resolution=c.binsize, bed2=args.bed2, bed2_ordered=args.bed2_ordered, anchor=anchor, pad=args.pad * 1000, chroms=fchroms, minshift=args.minshift, maxshift=args.maxshift, nshifts=args.nshifts, minsize=minsize, maxsize=maxsize, mindist=mindist, maxdist=maxdist, local=args.local, subset=args.subset, seed=args.seed, ) CC.process() PU = PileUpper( clr=c, CC=CC, balance=balance, expected=expected, control=control, coverage_norm=args.coverage_norm, rescale=args.rescale, rescale_pad=args.rescale_pad, rescale_size=args.rescale_size, ) if args.outdir == ".": args.outdir = os.getcwd() if args.outname == "auto": outname = f"{coolname}-{c.binsize / 1000}K_over_{bedname}" if args.nshifts > 0 and args.expected is None: outname += f"_{args.nshifts}-shifts" if args.expected is not None: outname += "_expected" if args.nshifts <= 0 and args.expected is None: outname += "_noNorm" if anchor: outname += f"_from_{anchor_name}" if args.local: outname += "_local" if minsize > 0 or maxsize < np.inf: outname += f"_len_{minsize}-{maxsize}" elif args.mindist is not None or args.maxdist is not None: outname += f"_dist_{mindist}-{maxdist}" if args.rescale: outname += "_rescaled" if args.unbalanced: outname += "_unbalanced" if args.coverage_norm: outname += "_covnorm" if args.subset > 0: outname += f"_subset-{args.subset}" if args.by_window: outname = f"Enrichment_{outname}.txt" else: outname += ".np.txt" else: outname = args.outname if args.by_window: if CC.kind != "bed": raise ValueError("Can't make by-window pileups without making combinations") if args.local: raise ValueError("Can't make local by-window pileups") if anchor: raise ValueError("Can't make by-window combinations with an anchor") # if args.coverage_norm: # raise NotImplementedError("""Can't make by-window combinations with # coverage normalization - please use # balanced data instead""") finloops = PU.pileupsByWindowWithControl(nproc=nproc) p = Pool(nproc) data = p.map(prepare_single, finloops.items()) p.close() data = pd.DataFrame( data, columns=[ "chr", "start", "end", "N", "Enrichment1", "Enrichment3", "CV3", "CV5", ], ) data = data.reindex( index=order_by_index( data.index, index_natsorted(zip(data["chr"], data["start"])) ) ) try: data.to_csv(os.path.join(args.outdir, outname), sep="\t", index=False) except FileNotFoundError: os.mkdir(args.outdir) data.to_csv(os.path.join(args.outdir, outname), sep="\t", index=False) finally: logging.info( f"Saved enrichment table to {os.path.join(args.outdir, outname)}" ) if args.save_all: outdict = { "%s:%s-%s" % key: (val[0], val[1].tolist()) for key, val in finloops.items() } import json json_path = ( os.path.join(args.outdir, os.path.splitext(outname)[0]) + ".json" ) with open(json_path, "w") as fp: json.dump(outdict, fp) # , sort_keys=True, indent=4) logging.info(f"Saved individual pileups to {json_path}") else: pup = PU.pileupsWithControl(nproc) try: save_array_with_header(pup, vars(args), os.path.join(args.outdir, outname)) except FileNotFoundError: try: os.mkdir(args.outdir) except FileExistsError: pass save_array_with_header(pup, vars(args), os.path.join(args.outdir, outname)) finally: logging.info(f"Saved output to {os.path.join(args.outdir, outname)}")
def heatmap_tab(df_means, df_stdev, Time, Treatments, number_cmpds_run): Cmpd0 = df_means.columns[len(Treatments) + len(Time)] cmpd_options = cmpd_options_func(df_means, len(Treatments) + len(Time), number_cmpds_run) df_means = df_means.reindex(index=order_by_index( df_means.index, index_natsorted(df_means[Time[0]]))) if len(Treatments) == 4: df_means[Treatments[0]] = df_means[Treatments[0]].astype('str') df_means[Treatments[1]] = df_means[Treatments[1]].astype('str') df_means[Treatments[2]] = df_means[Treatments[2]].astype('str') df_means[Treatments[3]] = df_means[Treatments[3]].astype('str') elif len(Treatments) == 3: df_means[Treatments[0]] = df_means[Treatments[0]].astype('str') df_means[Treatments[1]] = df_means[Treatments[1]].astype('str') df_means[Treatments[2]] = df_means[Treatments[2]].astype('str') elif len(Treatments) == 2: df_means[Treatments[0]] = df_means[Treatments[0]].astype('str') df_means[Treatments[1]] = df_means[Treatments[1]].astype('str') elif len(Treatments) == 1: df_means[Treatments[0]] = df_means[Treatments[0]].astype('str') df_means['Treatment'] = df_means[Time[0]].str.cat(df_means[Treatments], sep=' - ') df_stdev['Treatment'] = df_means[Time[0]].str.cat(df_means[Treatments], sep=' - ') treatments = list(df_means['Treatment']) df_m = df_means[['Treatment', Cmpd0]].copy() df_m2 = df_m.set_index(df_m[df_m.columns[0]].astype(str)) df_m2.drop(df_m.columns[0], axis=1, inplace=True) df_s = df_stdev[['Treatment', Cmpd0]].copy() df_s2 = df_s.set_index(df_s[df_s.columns[0]].astype(str)) df_s2.drop(df_s2.columns[0], axis=1, inplace=True) df_HM = pd.DataFrame(index=df_means['Treatment'], columns=df_means['Treatment']) df_HM.index.name = 'Treatment1' df_HM.columns.name = 'Treatment2' for i_t in treatments: for j_t in treatments: m1 = df_m2.loc[i_t, Cmpd0] m2 = df_m2.loc[j_t, Cmpd0] sd1 = df_s2.loc[i_t, Cmpd0] sd2 = df_s2.loc[j_t, Cmpd0] a1 = [m1 - sd1, m1 + sd1] b1 = [m2 - sd2, m2 + sd2] a2 = [m1 - 2 * sd1, m1 + 2 * sd1] b2 = [m2 - 2 * sd2, m2 + 2 * sd2] a3 = [m1 - 3 * sd1, m1 + 3 * sd1] b3 = [m2 - 3 * sd2, m2 + 3 * sd2] if getOverlap(a1, b1) > 0: marker = 'green' elif getOverlap(a2, b2) > 0: marker = 'yellow' elif getOverlap(a3, b3) > 0: marker = 'orange' else: marker = 'red' df_HM.loc[i_t, j_t] = marker hm_colors = df_HM.values.reshape(-1).tolist() t2 = treatments * len(treatments) t1 = [] for tt in treatments: for i in range(len(treatments)): t1.append(tt) source = ColumnDataSource({ 'treat1': t1, 'treat2': t2, 'colors': hm_colors }) p = figure(title="Categorical Heatmap", x_range=treatments, y_range=treatments, plot_height=1000, plot_width=1000) p.rect(x='treat1', y='treat2', color='colors', width=1, height=1, line_color='black', line_width=2, source=source) p.xaxis.major_label_orientation = np.pi / 2 select = Select(title='Select your compound:', value=Cmpd0, options=cmpd_options) def update_data(attrname, old, new): cmpd = select.value df_m = df_means[['Treatment', cmpd]].copy() df_m2 = df_m.set_index(df_m[df_m.columns[0]].astype(str)) df_m2.drop(df_m.columns[0], axis=1, inplace=True) df_s = df_stdev[['Treatment', cmpd]].copy() df_s2 = df_s.set_index(df_stdev[df_s.columns[0]].astype(str)) df_s2.drop(df_s2.columns[0], axis=1, inplace=True) df_HM = pd.DataFrame(index=df_means['Treatment'], columns=df_means['Treatment']) df_HM.index.name = 'Treatment1' df_HM.columns.name = 'Treatment2' for i_t in treatments: for j_t in treatments: m1 = df_m2.loc[i_t, cmpd] m2 = df_m2.loc[j_t, cmpd] sd1 = df_s2.loc[i_t, cmpd] sd2 = df_s2.loc[j_t, cmpd] a1 = [m1 - sd1, m1 + sd1] b1 = [m2 - sd2, m2 + sd2] a2 = [m1 - 2 * sd1, m1 + 2 * sd1] b2 = [m2 - 2 * sd2, m2 + 2 * sd2] a3 = [m1 - 3 * sd1, m1 + 3 * sd1] b3 = [m2 - 3 * sd2, m2 + 3 * sd2] if getOverlap(a1, b1) > 0: marker = 'green' elif getOverlap(a2, b2) > 0: marker = 'yellow' elif getOverlap(a3, b3) > 0: marker = 'orange' else: marker = 'red' df_HM.loc[i_t, j_t] = marker hm_colors = df_HM.values.reshape(-1).tolist() t2 = treatments * len(treatments) t1 = [] for tt in treatments: for i in range(len(treatments)): t1.append(tt) results1 = {'treat1': t1, 'treat2': t2, 'colors': hm_colors} source.data = results1 for w in [select]: w.on_change('value', update_data) # Create a row layout inputs = widgetbox(select) layout = row(inputs, p, width=1500) tab = Panel(child=layout, title='Heatmap') return tab
def test_order_by_index_sorts_list_according_to_order_of_integer_list(): a = ['num3', 'num5', 'num2'] index = [2, 0, 1] assert order_by_index(a, index) == ['num2', 'num3', 'num5'] assert order_by_index(a, index) == [a[i] for i in index]