def _get_chosen_spacial_prob(self, spacial_probs, spacial_choice): spacial_probs = tf.stack( spacial_probs, axis=-1) # [T, screen_dim, num_screen_dimensions] spacial_probs = util.index( spacial_probs, spacial_choice) # [T, num_screen_dimensions] return util.index(spacial_probs, self.action_input % tf.convert_to_tensor(self.num_screen_dims)) # [T]
def _get_chosen_selection_probs(self, selection_probs, selection_choice): """ :param selection_probs: Tensor of integers of shape [T, num_units, num_selection_actions] :param selection_choice: Tensor of shape [T] of type int :return: """ selection_probs = util.index(selection_probs, selection_choice) # [T, num_selection_actions] num_selection_actions = self.interface.num_unit_selection_actions index = (self.action_input - self.num_screen_dims) % tf.convert_to_tensor(num_selection_actions) return util.index(selection_probs, index) # [T]
def make_input(self, s_file='untitled', options=None): if self.table is None: util.error_msg('Clustering.make_input: missing Clustering.table!') S=self.table.header() S_up=[ s.upper() for s in S] opt=self.input_opt opt.update(options or {}) self.input_opt=opt S_miss=[s for s in opt['DATA_COLS'] if S.index(s)<0] if len(S_miss)>0: util.error_msg('Clustering.make_input: missing data column: '+", ".join(S_miss)) i_id=util.index(opt['ID'], S) if (i_id<0): i_id=S_up.index('GENE') if i_id<0: util.error_msg('Clustering.make_input: no column is specified as the ID!') opt['ID']=S[i_id] if type(opt['DESCRIPTION']) is str: opt['DESCRIPTION']=[opt['DESCRIPTION']] I_des=[util.index(s, S) for s in opt['DESCRIPTION'] if util.index(s, S)>=0] if (len(I_des)==0): I_des=[i_id] opt['DESCRIPTION']=[opt['ID']] else: for i in I_des: self.table.iloc[:, i]=util.sarray2sarray(self.table.iloc[:,i]) i_w=util.index(opt['WEIGHT_COL'], S) opt['DATA_COLS']=self.get_default_exp_cols(opt['DATA_COLS']) n_exp=len(opt['DATA_COLS']) if n_exp==0: util.error_msg('Clustering.make_input: no data column is specified!') S_out=[] S_out.append('Gene\tDescription\tWeight\t'+'\t'.join(opt['DATA_COLS'])) if opt['EXP_WEIGHT'] is None or len(opt['EXP_WEIGHT'])!=n_exp: S_out.append('Exp\t\t'+'\t1'*n_exp) else: S_out.append('Exp\t\t\t'+'\t'.join(util.rarray2sarray(opt['EXP_WEIGHT'], s_format='%g', s_null=1.0))) #df.fillna('', inplace=True) i_cols=[S.index(s) for s in opt['DATA_COLS']] if opt['GENE_WEIGHT'] is not None and len(opt['GENE_WEIGHT'])==len(self.table): if opt['WEIGHT_COL']=='': opt['WEIGHT_COL']='WEIGHT' self.table[opt['WEIGHT_COL']]=opt['GENE_WEIGHT'] for i in range(len(self.table)): s=str(self.table.iloc[i, i_id])+'\t'+":".join(self.table.iloc[i, I_des])+'\t'+str(self.table.iloc[i, i_w] if i_w>=0 else 1) R=np.array([x for x in self.table.iloc[i,i_cols]]) if opt['GENE_NORMALIZE'] and opt['NORMALIZE_METHOD']=='Z': valid=util.no_nan(R) if len(valid)>1: R=(R-np.mean(valid))/np.std(R, ddof=1) s+='\t'+'\t'.join(['' if pd.isnull(x) else str(x) for x in R]) S_out.append(s) if re.search(r'\.input$', s_file) is not None: s_file=re.sub(r'\.input$', '', s_file) util.save_list(s_file+".input", S_out, s_end='\n') self.input=s_file
def _train_log_probs_with_units(self, nonspacial_probs, spacial_probs_x, spacial_probs_y, selection_probs): nonspacial_log_probs = tf.log( util.index(nonspacial_probs, self.action_input) + 1e-10) # TODO: This only works if all screen dimensions are the same. Should pad to greatest length probs_y = self._get_chosen_spacial_prob(spacial_probs_y, self.spacial_input[:, 1]) probs_x = self._get_chosen_spacial_prob(spacial_probs_x, self.spacial_input[:, 0]) probs_selection = self._get_chosen_selection_probs( selection_probs, self.unit_selection_input) selection_log_prob = tf.log(probs_selection + 1e-10) spacial_log_probs = tf.log(probs_x + 1e-10) + tf.log(probs_y + 1e-10) result = nonspacial_log_probs result = result + tf.where(self.action_input < self.num_screen_dims, x=spacial_log_probs, y=tf.zeros_like(spacial_log_probs)) is_select_action = tf.logical_and( self.action_input >= self.num_screen_dims, self.action_input < self.num_screen_dims + self.num_select_actions) result = result + tf.where(is_select_action, x=selection_log_prob, y=tf.zeros_like(selection_log_prob)) return result
def f(s): args2 = reify(args, s) subsets = [self.index[key] for key in enumerate(args) if key in self.index] if subsets: # we are able to reduce the pool early facts = intersection(*sorted(subsets, key=len)) else: facts = self.facts varinds = [i for i, arg in enumerate(args2) if isvar(arg)] valinds = [i for i, arg in enumerate(args2) if not isvar(arg)] vars = index(args2, varinds) vals = index(args2, valinds) assert not any(var in s for var in vars) return ( merge(dict(zip(vars, index(fact, varinds))), s) for fact in self.facts if vals == index(fact, valinds) )
def main(): parser = argparse.ArgumentParser() parser.add_argument('input_filename') parser.add_argument("--log", type=str, default='INFO', help="Logging setting (e.g., INFO, DEBUG)") args = parser.parse_args() # Setting logging parameters numeric_level = getattr(logging, args.log.upper(), None) if not isinstance(numeric_level, int): raise ValueError('Invalid log level: %s' % loglevel) logging.basicConfig(level=numeric_level, format='%(asctime)s %(message)s') exemplifier = FrameExemplifier() # Load data start, stop, step = 0, 2700, 1 #start, stop, step = None, None, None logging.info('Loading from %s (start frame=%s, end frame=%s, increment=%s)', args.input_filename, *map(str, [start, stop, step])) sample_inds = list(range(start, stop, step)) all_frames = util.grab_frame(args.input_filename) frame_sample = (util.index(all_frames, sample_inds)) # Flatten H channel of every item in sample H_rows = np.vstack( cv2.cvtColor(im, cv2.COLOR_BGR2HSV)[:,:,0].ravel() for im in frame_sample) n_exemplars_list = np.linspace(3, 50, 10, dtype=int) n_bins_list = np.linspace(10, 45, 5, dtype=int) scores = np.zeros((len(n_bins_list), len(n_exemplars_list), )) for j, n_bins in enumerate(n_bins_list): X = np.vstack( np.histogram(H, bins=n_bins, range=(0, 180.))[0] for H in H_rows) for i, n_exemplars in enumerate(n_exemplars_list): kmeans_obj = exemplifier.pipeline.steps[-1][-1] kmeans_obj.n_clusters = n_exemplars best_X_inds = exemplifier.from_features(X) score = exemplifier.pipeline.score(X) scores[j, i] = score plt.figure() for n_bins, score_list in zip(n_bins_list, scores): plt.plot(n_exemplars_list, score_list, label='nbins=%i'%(n_bins)) #plt.imshow(scores, interpolation='nearest') plt.legend(loc='best') plt.xlabel('Number of clusters') plt.ylabel('KMeans score') plt.title('KMeans score versus #clusters and #bins') plt.show()
def _train_log_probs(self, nonspacial_probs, spacial_probs_x, spacial_probs_y): nonspacial_log_probs = tf.log(util.index(nonspacial_probs, self.action_input) + 0.00000001) # TODO: This only works if all screen dimensions are the same. Should pad to greatest length probs_y = self._get_chosen_spacial_prob(spacial_probs_y, self.spacial_input[:, 1]) probs_x = self._get_chosen_spacial_prob(spacial_probs_x, self.spacial_input[:, 0]) spacial_log_probs = tf.log(probs_x + 0.0000001) + tf.log(probs_y + 0.0000001) result = nonspacial_log_probs + tf.where(self.action_input < self.num_screen_dims, x=spacial_log_probs, y=tf.zeros_like(spacial_log_probs)) return result
def read_cdt(s_file): if not s_file.endswith('.cdt'): s_file += '.cdt' if not os.path.exists(s_file): util.error_msg("File not exist: " + s_file + "!") f = open(s_file) S_header = f.readline().strip().split("\t") i_w = util.index("GWEIGHT", S_header) i_gene = util.index('GENE', S_header) i_name = util.index('NAME', S_header) l_start = False R_exp = [] R_gene = [] data = [] offset = 0 while True: line = f.readline() if not line: break S = line.strip().split("\t") if S[0] == 'EWEIGHT': for i in range(1, len(S)): if S[i] != "": offset = i break tmp = [] if i_gene >= 0: tmp.append(S_header[i_gene]) if i_name >= 0: tmp.append(S_header[i_name]) S_header = tmp + S_header[offset:] R_exp = util.sarray2rarray(S[offset:]) if i_w < 0: i_w = offset - 1 l_start = True elif l_start: one = [] if i_gene >= 0: one.append(S[i_gene]) if i_name >= 0: one.append(S[i_name]) one.extend(util.sarray2rarray(S[offset:])) data.append(one) R_gene.append(float(S[i_w])) f.close() t = pd.DataFrame(data, columns=S_header) return (t, R_exp, R_gene)
def from_table(self, t_edge): self.data = {} if "TYPE" not in t_edge.header(): t_edge['TYPE'] = ['Direct'] * len(t_edge) idx = util.index('SCORE', [x.upper() for x in t_edge.header()]) for i in range(len(t_edge)): if not self.allow_indirect and t_edge['TYPE'].iloc[i] in [ "Indirect", "ppp" ]: continue s1 = t_edge['Gene_A'].iloc[i] s2 = t_edge['Gene_B'].iloc[i] if s1 not in self.data: self.data[s1] = {} if s2 not in self.data: self.data[s2] = {} score = 1 if idx < 0 else t_edge.iat[i, idx] self.data[s1][s2] = score self.data[s2][s1] = score
def make_dashboard_part(part_meta, template_params, sub_part_function=None): base_part = part_meta['_base'] part_template = get_template(base_part) part_template = metaify_template_string(part_template, part_meta) template_params_to_expand = [ p for p in template_params if templating.find_parameter(part_template, p['type']) >= 0 ] combinations = get_all_param_value_combinations(template_params_to_expand) result = [] logging.debug('part_template:{}'.format(part_template)) logging.debug('template_params:{}'.format(template_params)) logging.debug('combinations:{}'.format(combinations)) if combinations: for combination in combinations: replacements = {} sub_template_params = template_params[:] for param in combination: param_type = param['type'] param_value = param['value'] replacements[param_type] = param_value idx = util.index(sub_template_params, lambda x: x['type'] == param_type) sub_template_params[idx] = { 'type': param_type, 'values': [param_value] } logging.debug('replacements:{}'.format(replacements)) logging.debug('sub_template_params:{}'.format(sub_template_params)) part_string = templating.replace(part_template, replacements) part = json.loads(part_string) if sub_part_function: sub_part_function(part, part_meta, sub_template_params) if part not in result: result.append(part) else: part_string = templating.replace(part_template, {}) part = json.loads(part_string) if sub_part_function: sub_part_function(part, part_meta, template_params) if part not in result: result.append(part) return result
def insert_table(slide, t, top=None, col_width=None): (left, width, height) = (296260, 8551480, 5078313) if top is None: top = 1347965 + 25 * 914400 // 72 rows = len(t) cols = len(t.header()) table = slide.shapes.add_table(rows + 1, cols, left, top, width, height).table #table.columns[0].width = Inches(2.0) #table.columns[1].width = Inches(4.0) # write column headings S_header = t.header() if col_width is not None: width_remain = width for k, v in col_width.items(): i = t.col_index(k) table.columns[i].width = int(v) width_remain -= v w = int(max(width_remain / (cols - len(col_width)), 914400 // 2)) for i, x in enumerate(S_header): if x not in col_width: table.columns[i].width = w table.rows[0].height = 14 * 914400 // 72 for i in range(cols): table.cell(0, i).text = S_header[i] for j in range(rows): table.cell(j + 1, i).text = '' if pd.isnull(t.iat[j, i]) else str(t.iat[j, i]) if '_Color_' in S_header: # used for color legend j = util.index('_Color_', S_header) for i in range(1, rows + 1): table.cell(i, j).fill.solid() s_hex = t.iat[i - 1, j].replace('#', '') if re.search(r'^[A-F0-9]{6}$', s_hex): table.cell(i, j).fill.fore_color.rgb = RGBColor.from_string(s_hex) return table
def cluster_frames(): seed = 0 np.random.seed(seed) parser = argparse.ArgumentParser() parser.add_argument('input_filename') parser.add_argument("data_proportion", nargs='?', type=float, default=1., help="Proportion of full dataset to be used") parser.add_argument("--log", type=str, default='INFO', help="Logging setting (e.g., INFO, DEBUG)") parser.add_argument('-o', '--output_filename', help='Filename of video to be saved (default: does not save)') args = parser.parse_args() # Setting logging parameters numeric_level = getattr(logging, args.log.upper(), None) if not isinstance(numeric_level, int): raise ValueError('Invalid log level: %s' % loglevel) logging.basicConfig(level=numeric_level, format='%(asctime)s %(message)s') sample_inds = [212, 699, 988, 1105, 2190, 2318] logging.info('Loading %i images... ', len(sample_inds)) # Load data d = 6 # size of patch all_frames = util.grab_frame(args.input_filename) im_originals = list(util.index(all_frames, sample_inds)) im_height, im_width = im_originals[0].shape[:2] all_patch_rows = np.array(list( patch.ravel() for im in im_originals for patch in util.yield_windows(im, (d, d), (1, 1)) )) num_rows_per_im = len(all_patch_rows) // len(im_originals) num_im = len(im_originals) logging.info('Loaded %i examples from %i images', len(all_patch_rows), len(im_originals)) # Randomly sample a subset of the data sample_size = int(args.data_proportion * len(all_patch_rows)) inds = np.random.choice(len(all_patch_rows), sample_size) X = all_patch_rows[inds] logging.info('Sampled %.1f%% of dataset = %i', 100 * args.data_proportion, sample_size) ############################# Define pipeline ############################# std_scaler = (sklearn.preprocessing.StandardScaler, {}) coates_scaler = (CoatesScaler.CoatesScaler, {}) pca = (sklearn.decomposition.PCA, {'whiten':True, 'copy':True} ) zca = (ZCA.ZCA, {'regularization': .1}) n_clusters = 100 mbkmeans = (sklearn.cluster.MiniBatchKMeans, { 'n_clusters': n_clusters, 'batch_size': 3000, }) skmeans = (SphericalKMeans.SphericalKMeans, { 'n_clusters': n_clusters, 'max_iter': 10, }) kmeans = (sklearn.cluster.KMeans, { 'n_clusters': n_clusters, #'random_state': np.random.RandomState, #'n_jobs': -1, #'n_init': 1, #'max_iter': 10, }) # Define pipeline steps = [coates_scaler, zca, kmeans] pipeline = sklearn.pipeline.make_pipeline( *[fun(**kwargs) for fun, kwargs in steps]) # Define pointers to certain steps for future processing whitener = pipeline.steps[1][1] # second step dic = pipeline.steps[-1][1] # last step steps = [(obj.__class__, obj.get_params()) for name, obj in pipeline.steps] util.print_steps(steps) ######################### Train pipeline ################################## logging.info('Training model...') pipeline.fit(X) logging.info('done.') ######################### Display atoms of dictionary ##################### frames = util.grab_frame(args.input_filename) patch_row_chunks = ( np.array(list( patch.ravel() for patch in util.yield_windows(im, (d, d), (1, 1)))) for im in frames) def im_displays(): for patch_rows in patch_row_chunks: y = pipeline.predict(patch_rows) # Map to [0, 1) so that imshow scales across entire colormap spectrum y = y / n_clusters newshape = (im_height - d + 1, im_width - d + 1, ) segmentation = np.reshape(y, newshape) # Apply color map and remove alpha channel cmap = plt.cm.Set1 colored_segmentation = cmap(segmentation)[:, :, :3] colored_segmentation = (colored_segmentation * 255).astype(np.uint8) yield colored_segmentation #frames = itertools.islice(im_displays(), 5) frames = im_displays() save_video = args.output_filename is not None if save_video: write_frames_to_disk(frames, args.output_filename) else: display_frames(frames) return logging.info('Displaying atoms of dictionary') # Inverse whiten atoms of dictionary atom_rows = dic.cluster_centers_ if hasattr(whitener, 'inverse_transform'): atom_rows = whitener.inverse_transform(atom_rows) plt.figure() for i, atom_row in enumerate(atom_rows): patch = atom_row.reshape(d, d, -1)[::-1] plt.subplot(10, 10, i + 1) plt.imshow(patch, interpolation='nearest') plt.xticks(()) plt.yticks(()) plt.suptitle('Atoms of dictionary learnt from %i patches by %s' % \ (len(atom_rows), dic.__class__.__name__)) plt.figure() displayed_patches = X[np.random.choice(len(X), 100)] for i, patch in enumerate(displayed_patches): plt.subplot(10, 10, i + 1) plt.imshow(patch.reshape([d, d, -1])[:,:,::-1], interpolation='nearest') plt.xticks(()) plt.yticks(()) plt.show()
def map(self, X, n_CPU=0, l_quit=None): """X: list[input tuple], list of input parameters. If workers were started with f=None, each element in X in passed to the wrapper task. In that case, we expect X to be a tuple (or list) and the first element of X must be either the method pointer or its registered name. However, many methods, such as instance method or func within a func, cannot be pickled, therefore the method cannot be send over the pipe. We should pre-register such methods and call them by name. Need example later. l_quit: boolean, default None, if specified, controls whether workers quit or not after tasks are processed. If not, workers wait for future tasks. return list""" # very similar to the idea in https://stackoverflow.com/questions/3288595/multiprocessing-how-to-use-pool-map-on-a-function-defined-in-a-class, author klaus se l_quit = l_quit if l_quit is not None else self.QUIT #if self.is_busy(): # util.error_msg('Works are still busy!') if n_CPU == 0: n_CPU = self.n_use # defaults to n_use n_CPU = min( n_CPU, self.n_CPU ) # one could start 8 CPUs, but only use 4 for mapping, if the work takes lots of memory res = [] n_input = len(X) if n_input == 0 and not l_quit: return res #print '=============', self.c_proc if not self.has_started() and n_input > 0: util.warn_msg( 'Please start processes first, no worker is running!') util.warn_msg('However, we will process the task with ONE cpu!!!') return [self.wrapper(x) for x in X] if n_input > 0 and n_CPU == 0: return [self.wrapper(x) for x in X] s_pid = str(multiprocessing.current_process().pid) has_my_job = [False for i in range(self.n_CPU)] def engine(): print( "=================================================================" ) print("PID: ", str(multiprocessing.current_process().pid)) print("WORK STATUS: ", self.work_status) print("HAS MY JOB: ", has_my_job) print("JOB IS DONE: ", self.job_is_done) print("N_RUNNING: (%d, %d) " % (self.n_running[0], self.n_running[1])) print( "=================================================================" ) def is_busy(): return sum(has_my_job) > 0 def process_out(out): i, x = out if i is None: self.n_running[0] -= 1 # I modify the original code, so that we can join the process and release it as soon as possible if type(x) is str: print("Exception> " + x) exit() else: if self.DEBUG: print("Progress: %d processes remaining. Stopping %d" % (self.n_running[0], x)) #print self.c_proc.keys() self.c_proc[x].join() del self.c_proc[x] if self.DEBUG: print("Progress: process %d stopped." % x) else: res.append(out) if self.DEBUG: print("Progress: %d of %d item calculated." % (len(res), n_input)) def fetch(l_lock=False): while is_busy(): l_fetch_something = False for i_worker in range(self.n_CPU): if has_my_job[i_worker] and self.job_is_done[i_worker]: try: (i, x) = self.q_out[i_worker].get() process_out((i, x)) self.n_running[1] -= 1 self.work_status[i_worker] = False has_my_job[i_worker] = False self.job_is_done[i_worker] = False l_fetch_something = True if self.DEBUG: print(">>>A1") engine() with self.mail: self.mail.notify_all() except Exception as e: print( "ERROR> Fail to fetch results from worker: %d" % i_worker) print(traceback.format_exc()) return if not l_fetch_something: if l_lock: with self.mail: self.mail.wait(timeout=8.5 + random.random() * 3) else: return if self.DEBUG: print("DEBUG> self.n_running: ", self.n_running) if self.DEBUG: print("DEBUG> self.c_proc.keys(): ", list(self.c_proc.keys())) ###ZHOU FEB16,2016 #self.n_running[1]+=1 ### i = 0 while (i < n_input): x = X[i] if self.DEBUG: print("fetch job entry %d " % i) #print self.work_status self.lock.acquire() j = util.index(False, self.work_status) # find an idle worker l_put_something = False if j >= 0 and sum( has_my_job ) < n_CPU: #j>=0 and j<n_CPU: # we only use up to n_CPU, even if there are more workers #print "assing job to %d" % j self.work_status[j] = True # flag it as busy has_my_job[j] = True if self.DEBUG: print("DEBUG> self.c_proc.keys(): ", list(self.c_proc.keys())) ###ZHOU FEB16,2016 self.n_running[1] += 1 ### self.lock.release() self.q_in[j].put((i, j, x)) # assign task i += 1 if self.DEBUG: print("Progress: send input %d of %d items." % (i, len(X))) l_put_something = True if self.DEBUG: print(">>>A2") engine() else: self.lock.release() # we constantly removing items from the output queue, so that the process can release some memory fetch() if not l_put_something: with self.mail: self.mail.wait(timeout=8.5 + random.random() * 3) fetch() while (is_busy()): fetch(True) if self.DEBUG: print(">>>A3") engine() self.lock.acquire() ###ZHOU FEB16,2016 #self.n_running[1]-=1 ### if self.DEBUG: print(">>>QUIT=" + ("True" if l_quit else "False")) print(">>>n_running[1]=%d" % self.n_running[1]) if l_quit and self.n_running[1] == 0: # I am the last one running map() if self.DEBUG: print(">>>A4") engine() for i in range(self.n_CPU): self.q_in[i].put((None, i, None)) self.work_status[i] = True self.n_running[1] += 1 has_my_job[i] = True self.lock.release() while (is_busy()): fetch(True) if self.DEBUG: engine() else: self.lock.release() if self.DEBUG: for i, x in enumerate(res): print('>>>A4 ', i, type(x), type(x[0]), type(x[1])) res.sort(key=lambda x: x[0]) return [x for i, x in res]
def hierarchical(self, options=None): if self.input=='': util.error_msg('Clustering.hierachical: Input file has not been prepared; use make_input() first!') if self.table is None: self.make_table() opt=self.cluster_opt opt.update(options or {}) self.cluster_opt=opt l_CWC=self.cluster_opt['BIN'] == 'CWC' if self.cluster_opt['FINGERPRINT'] and l_CWC: util.error_msg('Clustering.hierachical: fingerprint mode has to be used with hybrid binary, not CWC!') #l_CWC=False #XXXXXXXXXXXXXXX if l_CWC: S_cmd=[Clustering.BIN_CWC, "-h -a -E -P", "-i "+self.input+".input", "-o "+self.input] else: S_cmd=[Clustering.BIN_HYB, "-eis", "-i "+self.input+".input", "-o "+self.input] if self.cluster_opt['SKIP_DM']: S_cmd.append('-ctr') s_dme=opt['DME'] s_dmg=opt['DMG'] r_maxe=1 r_maxg=1 iopt=self.input_opt d_start=time.time() if opt['GENE']: if opt['GENE_METRICS']=='BUILD_IN' and opt['DMG']=='' and not opt['HAS_NULL']: S_cmd.append("-p") else: if opt['GENE_METRICS']=='BUILD_IN': opt['GENE_METRICS']='PEARSON' if opt['DMG']=='': R_w=self.input_opt['EXP_WEIGHT'] #R_w=R_w+np.random.randn(len(R_w))*0.001 if R_w is not None and np.allclose(R_w, 1.0, atol=1e-5): R_w=None #print R_w dmg=self.make_DM(S_col=iopt['DATA_COLS'], metrics=opt['GENE_METRICS'], R_weight=R_w, by='GENE') dmg.save(s_file=self.input+'.dmg', s_format='%.2f') opt['DMG']=self.input+'.dmg' else: dmg=DM(s_file=opt['DMG']) r_maxg=dmg.dmax del dmg if l_CWC: S_cmd.append("-dmg "+opt['DMG']) else: if self.cluster_opt['FINGERPRINT']: S_cmd.append("-f "+opt['DMG']) else: S_cmd.append("-d "+opt['DMG']) if opt['EXP']: if not l_CWC: #util.warn_msg('Clustering.hierachical: experiment clustering currently is only supported by CWC!') if opt['EXP_METRICS']=='BUILD_IN': opt['EXP_METRICS']='PEARSON' if opt['DME']=='': R_w=None dme=self.make_DM(S_col=iopt['DATA_COLS'], metrics=opt['EXP_METRICS'], R_weight=R_w, by='EXP') dme.save(s_file=self.input+'.dme', s_format='%.2f') opt['DME']=self.input+'.dme' else: dme=DM(s_file=opt['DME']) r_maxe=dme.dmax del dme S_cmd.append("-de "+opt['DME']) else: S_cmd.append("-eg" if opt['GENE'] else '-e') if opt['EXP_METRICS']=='BUILD_IN' and opt['DME']=='' and not opt['HAS_NULL']: if "-p" not in S_cmd: S_cmd.append("-p") else: if opt['EXP_METRICS']=='BUILD_IN': opt['EXP_METRICS']='PEARSON' if opt['DME']=='': R_w=None if (iopt['WEIGHT_COL']!='' and util.index(iopt['WEIGHT_COL'], self.table.header())>=0): R_w=self.table[iopt['WEIGHT_COL']].values if R_w is not None and np.allclose(R_w, 1, atol=1e-5): R_w=None dme=self.make_DM(S_col=iopt['DATA_COLS'], metrics=opt['EXP_METRICS'], R_weight=R_w, by='EXP') dme.save(s_file=self.input+'.dme', s_format='%.2f') opt['DME']=self.input+'.dme' else: dme=DM(s_file=opt['DME']) r_maxe=dme.dmax del dme S_cmd.append("-dme "+opt['DME']) # cwc sends standard message to error channel util.unix(" ".join(S_cmd), l_error=False, l_print=False) #### ZZZ print(" ".join(S_cmd)) #Clustering._fix_missing(self.input+".cdt") #if opt['RESTORE_DISTANCE']: # if opt['GENE'] and opt['GENE_METRICS']!='BUILD_IN': Clustering.restore_distance(self.input+".gtr", max_dist=r_maxg) # if opt['EXP'] and opt['EXP_METRICS']!='BUILD_IN': Clustering.restore_distance(self.input+".atr", max_dist=r_maxe) if not opt['EXP']: # old CWC version will generate an AID row s_array=Clustering._strip_array_line(self.input+".cdt") if (opt['OPTIMIZE'] and opt['GENE']): Clustering.optimize(self.input) # optimization can handle Array line #if opt['EXP']: Clustering._insert_array_line(self.input+"Opt.cdt", s_array) Clustering.make_JTV(self.input+"Opt") else: Clustering.make_JTV(self.input) if opt['CLEANUP']: if opt['OPTIMIZE']: Clustering._remove_extra_files(self.input+"Opt") else: Clustering._remove_extra_files(self.input)
def tile_at(coord): return Tile.level[util.index(*coord)]
def clear(coord): Tile.level[util.index(*coord)].undraw() Tile.level[util.index(*coord)] = Empty(coord)
def __init__(self, s_file='', Z=None, l_gene_tree=True): """Z: linkage matrix, if None, assume s_file is not empty""" self.l_gene_tree = l_gene_tree self.root = Node('ROOT') self.l_gene_tree = l_gene_tree # gene tree or array tree self.c_name = {} self.c_node = {} self.size = 0 self.parent = {} # track the parent node for each node self.tree_file = None if Z is not None: self.l_gene_tree = True r, c = Z.shape n = r + 1 r_dist = max(Z[:, 2].max(), 1.0) for i in range(r): id_l = str(int(Z[i, 0])) id_r = str(int(Z[i, 1])) id_n = str(n + i) r = max(1.0 - Z[i, 2] / r_dist, 0.0) self.new_node(id_n, label=self.c_name.get(id_n, ''), left=self.new_node(id_l), right=self.new_node(id_r), similarity=r) self.parent[id_l] = id_n self.parent[id_r] = id_n self.root = self.get_node(id_n) self.size = n - 1 else: self.l_gene_tree = l_gene_tree if re.search(r'\.[ag]tr$', s_file): if re.search(r'\.atr$', s_file): l_gene_tree = False s_file = re.sub(r'\.[ag]tr$', '', s_file) self.root = Node('ROOT') self.l_gene_tree = l_gene_tree # gene tree or array tree self.c_name = {} self.c_node = {} self.size = 0 self.parent = {} # track the parent node for each node if not os.path.exists(s_file + ".cdt"): util.error_msg("File not exist: " + s_file + ".cdt!") f = open(s_file + '.cdt') S_header = f.readline().strip().split("\t") if not l_gene_tree: while True: line = f.readline() if not line: break if line.startswith("AID\t"): S_AID = line.strip().split("\t") self.c_name = { s: x for s, x in zip(S_AID, S_header) if str(s).startswith('ARRY') } break else: s_col = 'GENE' if s_col not in S_header and 'NAME' in S_header: s_col = 'NAME' i_GID = util.index('GID', S_header) i_NAME = util.index(s_col, S_header) while True: line = f.readline() if not line: break if line.startswith('AID') or line.startswith('EWEIGHT'): continue S = line.strip().split("\t") self.c_name[S[i_GID]] = S[i_NAME] f.close() self.size = len(self.c_name) if self.size == 0: error_msg("Tree:__init_: No node is found to build the tree!") s_filename = s_file + ('.gtr' if l_gene_tree else '.atr') # check if file has column header self.tree_file = s_filename df = Tree.read_tree_file(s_filename) self.parse(df)
def main(): parser = argparse.ArgumentParser() parser.add_argument('input_filename') parser.add_argument("--log", type=str, default='INFO', help="Logging setting (e.g., INFO, DEBUG)") args = parser.parse_args() # Setting logging parameters numeric_level = getattr(logging, args.log.upper(), None) if not isinstance(numeric_level, int): raise ValueError('Invalid log level: %s' % loglevel) logging.basicConfig(level=numeric_level, format='%(asctime)s %(message)s') n_exemplars = 10 exemplifier = FrameExemplifier(n_exemplars) # Load data start, stop, step = 0, 2700, 1 #start, stop, step = None, None, None logging.info('Loading from %s (start frame=%s, end frame=%s, increment=%s)', args.input_filename, *map(str, [start, stop, step])) sample_inds = list(range(start, stop, step)) all_frames = util.grab_frame(args.input_filename) frame_sample = (util.index(all_frames, sample_inds)) n_bins = 25 best_X_inds = exemplifier.from_BGRs(frame_sample, n_bins=n_bins) # Map index in frame_sample to index in the input video frame_inds = np.array(sample_inds)[best_X_inds] #frame_inds = np.linspace(0, 700, 25, dtype=int) frame_inds.sort() all_frames = util.grab_frame(args.input_filename) im_exemplars = list(util.index(all_frames, frame_inds)) # Display exemplar histograms in one plot plt.figure() fig_title = "Examplar histograms at %s" % time.asctime(time.localtime()) for im_exemplar in im_exemplars: counts, bins = exemplifier.im_BGR_to_features(im_exemplar) plt.plot(bins[:-1], counts) num_subplot_rows = math.ceil(n_exemplars**.5) plt.figure() fig_title = "Examplar images at %s" % time.asctime(time.localtime()) plt.gcf().canvas.set_window_title(fig_title) for i, (frame_ind, im_exemplar) in enumerate(zip(frame_inds, im_exemplars)): plt.subplot(num_subplot_rows, num_subplot_rows, i + 1) plt.imshow(im_exemplar[:,:,::-1], interpolation='nearest') plt.xticks(()) # remove ticks plt.yticks(()) plt.title("Frame #%i" % (frame_ind)) plt.tight_layout() plt.show() logging.info('Exemplar frame indices are %s', str(frame_inds)) return ### DISPLAY OUTPUT ### WIN = 'Output' ESC = 27 SPACEBAR = 32 for fi, frame in enumerate(im_HSVs): cv2.imshow(WIN, frame) key = cv2.waitKey(30) if key == ESC: break # Spacebar pauses video, after while ESC exits video or spacebar # resumes. Other keystrokes are ignored during pause. elif key == SPACEBAR: key = cv2.waitKey() while key != SPACEBAR and key != ESC: key = cv2.waitKey() if key == SPACEBAR: continue else: break cv2.destroyAllWindows()
def color_cdt(s_file, exps=None, exp_bgcolor=None, genes=None, gene_bgcolor=None): if not s_file.endswith('.cdt'): s_file += '.cdt' if not os.path.exists(s_file): util.error_msg("File not exist: " + s_file + "!") BG = '#ffffff' f = open(s_file) S = [] c_first = {} i = 0 while True: line = f.readline() if not line: break SS = line.strip().split("\t") c_first[SS[0]] = i i += 1 S.append(SS) f.close() S_header = S[0] i_gene = util.index('GENE', S_header) i_name = util.index('NAME', S_header) i_gid = util.index('GID', S_header) i_w = util.index("GWEIGHT", S_header) offset = max([i_gene, i_name, i_gid, i_w]) + 1 n_exp = len(S_header) - offset if 'EWEIGHT' not in c_first: # add EWEIGHT ROW i_w = max([c_first.get('GID', -1), c_first.get('AID', -1)]) + 1 S.insert(i_w, ['EWEIGHT'] + [''] * (offset - 1) + ['1.000'] * n_exp) c_first['EWEIGHT'] = i_w i_w = util.index("GWEIGHT", S_header) if i_w < 0: # add GWEIGHT column i_w = offset S_header.insert(i_w, 'GWEIGHT') for i in range(1, len(S)): if i <= c_first['EWEIGHT']: S[i].insert(i_w, '') else: S[i].insert(i_w, '1.000') offset += 1 i_gene_color = util.index('BGCOLOR', S_header) if i_gene_color < 0 and genes is not None: i_gene_color = offset - 1 S_header.insert(i_gene_color, 'BGCOLOR') offset += 1 for i in range(1, len(S)): if i <= c_first['EWEIGHT']: S[i].insert(i_gene_color, '') else: S[i].insert(i_gene_color, BG) i_exp_color = c_first.get('BGCOLOR', -1) if i_exp_color < 0 and exps is not None: i_exp_color = c_first['EWEIGHT'] S.insert(i_exp_color, ['BGCOLOR'] + [''] * (offset - 1) + [BG] * n_exp) c_first['EWEIGHT'] += 1 if genes is not None: c_m = Tree.color_map(genes, gene_bgcolor) idx = i_gene if i_gene >= 0 else i_name for i in range(c_first['EWEIGHT'] + 1, len(S)): S[i][i_gene_color] = c_m.get(S[i][idx], BG) if exps is not None: c_m = Tree.color_map(exps, exp_bgcolor) SS = S[c_first['EWEIGHT'] - 1] for i in range(offset, len(SS)): SS[i] = c_m.get(S_header[i], BG) S = ["\t".join(X) for X in S] util.save_list(s_file, S, s_end="\n")
def query(coord, property): tile = Tile.level[util.index(*coord)] return tile.properties[property]