Esempio n. 1
0
 def _get_chosen_spacial_prob(self, spacial_probs, spacial_choice):
     spacial_probs = tf.stack(
         spacial_probs, axis=-1)  # [T, screen_dim, num_screen_dimensions]
     spacial_probs = util.index(
         spacial_probs, spacial_choice)  # [T, num_screen_dimensions]
     return util.index(spacial_probs, self.action_input %
                       tf.convert_to_tensor(self.num_screen_dims))  # [T]
Esempio n. 2
0
    def _get_chosen_selection_probs(self, selection_probs, selection_choice):
        """
        :param selection_probs: Tensor of integers of shape [T, num_units, num_selection_actions]
        :param selection_choice: Tensor of shape [T] of type int
        :return:
        """
        selection_probs = util.index(selection_probs, selection_choice)  # [T, num_selection_actions]
        num_selection_actions = self.interface.num_unit_selection_actions

        index = (self.action_input - self.num_screen_dims) % tf.convert_to_tensor(num_selection_actions)
        return util.index(selection_probs, index)  # [T]
Esempio n. 3
0
    def make_input(self, s_file='untitled', options=None):
        if self.table is None: util.error_msg('Clustering.make_input: missing Clustering.table!')
        S=self.table.header()
        S_up=[ s.upper() for s in S]
        opt=self.input_opt
        opt.update(options or {})
        self.input_opt=opt
        S_miss=[s for s in opt['DATA_COLS'] if S.index(s)<0]
        if len(S_miss)>0: util.error_msg('Clustering.make_input: missing data column: '+", ".join(S_miss))
        i_id=util.index(opt['ID'], S)
        if (i_id<0):
            i_id=S_up.index('GENE')
            if i_id<0: util.error_msg('Clustering.make_input: no column is specified as the ID!')
            opt['ID']=S[i_id]
        if type(opt['DESCRIPTION']) is str: opt['DESCRIPTION']=[opt['DESCRIPTION']]
        I_des=[util.index(s, S) for s in opt['DESCRIPTION'] if util.index(s, S)>=0]

        if (len(I_des)==0):
            I_des=[i_id]
            opt['DESCRIPTION']=[opt['ID']]
        else:
            for i in I_des:
                self.table.iloc[:, i]=util.sarray2sarray(self.table.iloc[:,i])
        i_w=util.index(opt['WEIGHT_COL'], S)
        opt['DATA_COLS']=self.get_default_exp_cols(opt['DATA_COLS'])
        n_exp=len(opt['DATA_COLS'])
        if n_exp==0: util.error_msg('Clustering.make_input: no data column is specified!')

        S_out=[]
        S_out.append('Gene\tDescription\tWeight\t'+'\t'.join(opt['DATA_COLS']))
        if opt['EXP_WEIGHT'] is None or len(opt['EXP_WEIGHT'])!=n_exp:
            S_out.append('Exp\t\t'+'\t1'*n_exp)
        else:
            S_out.append('Exp\t\t\t'+'\t'.join(util.rarray2sarray(opt['EXP_WEIGHT'], s_format='%g', s_null=1.0)))
        #df.fillna('', inplace=True)
        i_cols=[S.index(s) for s in opt['DATA_COLS']]
        if opt['GENE_WEIGHT'] is not None and len(opt['GENE_WEIGHT'])==len(self.table):
            if opt['WEIGHT_COL']=='':
                opt['WEIGHT_COL']='WEIGHT'
            self.table[opt['WEIGHT_COL']]=opt['GENE_WEIGHT']
        for i in range(len(self.table)):
            s=str(self.table.iloc[i, i_id])+'\t'+":".join(self.table.iloc[i, I_des])+'\t'+str(self.table.iloc[i, i_w] if i_w>=0 else 1)
            R=np.array([x for x in self.table.iloc[i,i_cols]])
            if opt['GENE_NORMALIZE'] and opt['NORMALIZE_METHOD']=='Z':
                valid=util.no_nan(R)
                if len(valid)>1:
                    R=(R-np.mean(valid))/np.std(R, ddof=1)
            s+='\t'+'\t'.join(['' if pd.isnull(x) else str(x) for x in R])
            S_out.append(s)
        if re.search(r'\.input$', s_file) is not None:
            s_file=re.sub(r'\.input$', '', s_file)
        util.save_list(s_file+".input", S_out, s_end='\n')
        self.input=s_file
Esempio n. 4
0
    def _train_log_probs_with_units(self, nonspacial_probs, spacial_probs_x,
                                    spacial_probs_y, selection_probs):
        nonspacial_log_probs = tf.log(
            util.index(nonspacial_probs, self.action_input) + 1e-10)

        # TODO: This only works if all screen dimensions are the same. Should pad to greatest length
        probs_y = self._get_chosen_spacial_prob(spacial_probs_y,
                                                self.spacial_input[:, 1])
        probs_x = self._get_chosen_spacial_prob(spacial_probs_x,
                                                self.spacial_input[:, 0])
        probs_selection = self._get_chosen_selection_probs(
            selection_probs, self.unit_selection_input)

        selection_log_prob = tf.log(probs_selection + 1e-10)
        spacial_log_probs = tf.log(probs_x + 1e-10) + tf.log(probs_y + 1e-10)

        result = nonspacial_log_probs
        result = result + tf.where(self.action_input < self.num_screen_dims,
                                   x=spacial_log_probs,
                                   y=tf.zeros_like(spacial_log_probs))

        is_select_action = tf.logical_and(
            self.action_input >= self.num_screen_dims,
            self.action_input < self.num_screen_dims + self.num_select_actions)
        result = result + tf.where(is_select_action,
                                   x=selection_log_prob,
                                   y=tf.zeros_like(selection_log_prob))
        return result
Esempio n. 5
0
        def f(s):
            args2 = reify(args, s)
            subsets = [self.index[key] for key in enumerate(args) if key in self.index]
            if subsets:  # we are able to reduce the pool early
                facts = intersection(*sorted(subsets, key=len))
            else:
                facts = self.facts
            varinds = [i for i, arg in enumerate(args2) if isvar(arg)]
            valinds = [i for i, arg in enumerate(args2) if not isvar(arg)]
            vars = index(args2, varinds)
            vals = index(args2, valinds)
            assert not any(var in s for var in vars)

            return (
                merge(dict(zip(vars, index(fact, varinds))), s) for fact in self.facts if vals == index(fact, valinds)
            )
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('input_filename')
    parser.add_argument("--log", type=str, default='INFO',
            help="Logging setting (e.g., INFO, DEBUG)")
    args = parser.parse_args()

    # Setting logging parameters
    numeric_level = getattr(logging, args.log.upper(), None)
    if not isinstance(numeric_level, int):
        raise ValueError('Invalid log level: %s' % loglevel)
    logging.basicConfig(level=numeric_level, format='%(asctime)s %(message)s')

    exemplifier = FrameExemplifier()

    # Load data
    start, stop, step = 0, 2700, 1
    #start, stop, step = None, None, None
    logging.info('Loading from %s (start frame=%s, end frame=%s, increment=%s)',
        args.input_filename, *map(str, [start, stop, step]))
    sample_inds = list(range(start, stop, step))
    all_frames = util.grab_frame(args.input_filename)
    frame_sample = (util.index(all_frames, sample_inds))

    # Flatten H channel of every item in sample
    H_rows = np.vstack(
            cv2.cvtColor(im, cv2.COLOR_BGR2HSV)[:,:,0].ravel()
            for im in frame_sample)

    n_exemplars_list = np.linspace(3, 50, 10, dtype=int)
    n_bins_list = np.linspace(10, 45, 5, dtype=int)
    scores = np.zeros((len(n_bins_list), len(n_exemplars_list), ))

    for j, n_bins in enumerate(n_bins_list):
        X = np.vstack(
                np.histogram(H, bins=n_bins, range=(0, 180.))[0]
                for H in H_rows)
        for i, n_exemplars in enumerate(n_exemplars_list):
            kmeans_obj = exemplifier.pipeline.steps[-1][-1]
            kmeans_obj.n_clusters = n_exemplars

            best_X_inds = exemplifier.from_features(X)
            score = exemplifier.pipeline.score(X)
            scores[j, i] = score

    plt.figure()
    for n_bins, score_list in zip(n_bins_list, scores):
        plt.plot(n_exemplars_list, score_list, label='nbins=%i'%(n_bins))
    #plt.imshow(scores, interpolation='nearest')
    plt.legend(loc='best')
    plt.xlabel('Number of clusters')
    plt.ylabel('KMeans score')
    plt.title('KMeans score versus #clusters and #bins')
    plt.show()
Esempio n. 7
0
    def _train_log_probs(self, nonspacial_probs, spacial_probs_x, spacial_probs_y):
        nonspacial_log_probs = tf.log(util.index(nonspacial_probs, self.action_input) + 0.00000001)

        # TODO: This only works if all screen dimensions are the same. Should pad to greatest length
        probs_y = self._get_chosen_spacial_prob(spacial_probs_y, self.spacial_input[:, 1])
        probs_x = self._get_chosen_spacial_prob(spacial_probs_x, self.spacial_input[:, 0])
        spacial_log_probs = tf.log(probs_x + 0.0000001) + tf.log(probs_y + 0.0000001)
        result = nonspacial_log_probs + tf.where(self.action_input < self.num_screen_dims,
                                                 x=spacial_log_probs,
                                                 y=tf.zeros_like(spacial_log_probs))
        return result
Esempio n. 8
0
def read_cdt(s_file):
    if not s_file.endswith('.cdt'):
        s_file += '.cdt'
    if not os.path.exists(s_file):
        util.error_msg("File not exist: " + s_file + "!")
    f = open(s_file)
    S_header = f.readline().strip().split("\t")
    i_w = util.index("GWEIGHT", S_header)
    i_gene = util.index('GENE', S_header)
    i_name = util.index('NAME', S_header)
    l_start = False
    R_exp = []
    R_gene = []
    data = []
    offset = 0
    while True:
        line = f.readline()
        if not line: break
        S = line.strip().split("\t")
        if S[0] == 'EWEIGHT':
            for i in range(1, len(S)):
                if S[i] != "":
                    offset = i
                    break
            tmp = []
            if i_gene >= 0: tmp.append(S_header[i_gene])
            if i_name >= 0: tmp.append(S_header[i_name])
            S_header = tmp + S_header[offset:]
            R_exp = util.sarray2rarray(S[offset:])
            if i_w < 0: i_w = offset - 1
            l_start = True
        elif l_start:
            one = []
            if i_gene >= 0: one.append(S[i_gene])
            if i_name >= 0: one.append(S[i_name])
            one.extend(util.sarray2rarray(S[offset:]))
            data.append(one)
            R_gene.append(float(S[i_w]))
    f.close()
    t = pd.DataFrame(data, columns=S_header)
    return (t, R_exp, R_gene)
Esempio n. 9
0
 def from_table(self, t_edge):
     self.data = {}
     if "TYPE" not in t_edge.header():
         t_edge['TYPE'] = ['Direct'] * len(t_edge)
     idx = util.index('SCORE', [x.upper() for x in t_edge.header()])
     for i in range(len(t_edge)):
         if not self.allow_indirect and t_edge['TYPE'].iloc[i] in [
                 "Indirect", "ppp"
         ]:
             continue
         s1 = t_edge['Gene_A'].iloc[i]
         s2 = t_edge['Gene_B'].iloc[i]
         if s1 not in self.data: self.data[s1] = {}
         if s2 not in self.data: self.data[s2] = {}
         score = 1 if idx < 0 else t_edge.iat[i, idx]
         self.data[s1][s2] = score
         self.data[s2][s1] = score
Esempio n. 10
0
def make_dashboard_part(part_meta, template_params, sub_part_function=None):
    base_part = part_meta['_base']
    part_template = get_template(base_part)
    part_template = metaify_template_string(part_template, part_meta)

    template_params_to_expand = [
        p for p in template_params
        if templating.find_parameter(part_template, p['type']) >= 0
    ]

    combinations = get_all_param_value_combinations(template_params_to_expand)
    result = []
    logging.debug('part_template:{}'.format(part_template))
    logging.debug('template_params:{}'.format(template_params))
    logging.debug('combinations:{}'.format(combinations))
    if combinations:
        for combination in combinations:
            replacements = {}
            sub_template_params = template_params[:]
            for param in combination:
                param_type = param['type']
                param_value = param['value']
                replacements[param_type] = param_value
                idx = util.index(sub_template_params,
                                 lambda x: x['type'] == param_type)
                sub_template_params[idx] = {
                    'type': param_type,
                    'values': [param_value]
                }
            logging.debug('replacements:{}'.format(replacements))
            logging.debug('sub_template_params:{}'.format(sub_template_params))
            part_string = templating.replace(part_template, replacements)
            part = json.loads(part_string)
            if sub_part_function:
                sub_part_function(part, part_meta, sub_template_params)
            if part not in result:
                result.append(part)
    else:
        part_string = templating.replace(part_template, {})
        part = json.loads(part_string)
        if sub_part_function:
            sub_part_function(part, part_meta, template_params)
        if part not in result:
            result.append(part)
    return result
Esempio n. 11
0
def insert_table(slide, t, top=None, col_width=None):
    (left, width, height) = (296260, 8551480, 5078313)
    if top is None:
        top = 1347965 + 25 * 914400 // 72
    rows = len(t)
    cols = len(t.header())
    table = slide.shapes.add_table(rows + 1, cols, left, top, width,
                                   height).table

    #table.columns[0].width = Inches(2.0)
    #table.columns[1].width = Inches(4.0)

    # write column headings
    S_header = t.header()
    if col_width is not None:
        width_remain = width
        for k, v in col_width.items():
            i = t.col_index(k)
            table.columns[i].width = int(v)
            width_remain -= v
        w = int(max(width_remain / (cols - len(col_width)), 914400 // 2))
        for i, x in enumerate(S_header):
            if x not in col_width:
                table.columns[i].width = w
    table.rows[0].height = 14 * 914400 // 72

    for i in range(cols):
        table.cell(0, i).text = S_header[i]
        for j in range(rows):
            table.cell(j + 1,
                       i).text = '' if pd.isnull(t.iat[j,
                                                       i]) else str(t.iat[j,
                                                                          i])
    if '_Color_' in S_header:  # used for color legend
        j = util.index('_Color_', S_header)
        for i in range(1, rows + 1):
            table.cell(i, j).fill.solid()
            s_hex = t.iat[i - 1, j].replace('#', '')
            if re.search(r'^[A-F0-9]{6}$', s_hex):
                table.cell(i,
                           j).fill.fore_color.rgb = RGBColor.from_string(s_hex)
    return table
def cluster_frames():

    seed = 0
    np.random.seed(seed)

    parser = argparse.ArgumentParser()
    parser.add_argument('input_filename')
    parser.add_argument("data_proportion", nargs='?', type=float, default=1.,
            help="Proportion of full dataset to be used")
    parser.add_argument("--log", type=str, default='INFO',
            help="Logging setting (e.g., INFO, DEBUG)")
    parser.add_argument('-o', '--output_filename',
        help='Filename of video to be saved (default: does not save)')
    args = parser.parse_args()

    # Setting logging parameters
    numeric_level = getattr(logging, args.log.upper(), None)
    if not isinstance(numeric_level, int):
        raise ValueError('Invalid log level: %s' % loglevel)
    logging.basicConfig(level=numeric_level, format='%(asctime)s %(message)s')

    sample_inds = [212, 699, 988, 1105, 2190, 2318]
    logging.info('Loading %i images... ', len(sample_inds))

    # Load data
    d = 6  # size of patch
    all_frames = util.grab_frame(args.input_filename)
    im_originals = list(util.index(all_frames, sample_inds))
    im_height, im_width = im_originals[0].shape[:2]
    all_patch_rows =  np.array(list(
            patch.ravel()
            for im in im_originals
            for patch in util.yield_windows(im, (d, d), (1, 1))
            ))
    num_rows_per_im = len(all_patch_rows) // len(im_originals)
    num_im = len(im_originals)
    logging.info('Loaded %i examples from %i images',
        len(all_patch_rows),
        len(im_originals))

    # Randomly sample a subset of the data
    sample_size = int(args.data_proportion * len(all_patch_rows))
    inds = np.random.choice(len(all_patch_rows), sample_size)
    X = all_patch_rows[inds]
    logging.info('Sampled %.1f%% of dataset = %i', 100 * args.data_proportion,
        sample_size)

    ############################# Define pipeline #############################    

    std_scaler = (sklearn.preprocessing.StandardScaler, {})
    coates_scaler = (CoatesScaler.CoatesScaler, {})
    pca = (sklearn.decomposition.PCA,
            {'whiten':True, 'copy':True}
            )
    zca = (ZCA.ZCA, {'regularization': .1})
    n_clusters = 100
    mbkmeans = (sklearn.cluster.MiniBatchKMeans,
            {
                'n_clusters': n_clusters,
                'batch_size': 3000,
            })
    skmeans = (SphericalKMeans.SphericalKMeans,
            {
                'n_clusters': n_clusters,
                'max_iter': 10,
            })
    kmeans = (sklearn.cluster.KMeans,
            {
                'n_clusters': n_clusters,
                #'random_state': np.random.RandomState,
                #'n_jobs': -1,
                #'n_init': 1,
                #'max_iter': 10,
            })

    # Define pipeline
    steps = [coates_scaler, zca, kmeans]
    pipeline = sklearn.pipeline.make_pipeline(
            *[fun(**kwargs) for fun, kwargs in steps])

    # Define pointers to certain steps for future processing
    whitener = pipeline.steps[1][1]  # second step
    dic = pipeline.steps[-1][1]  # last step

    steps = [(obj.__class__, obj.get_params()) for name, obj in pipeline.steps]
    util.print_steps(steps)


    ######################### Train pipeline ##################################

    logging.info('Training model...')
    pipeline.fit(X)
    logging.info('done.')

    ######################### Display atoms of dictionary #####################

    frames = util.grab_frame(args.input_filename)
    patch_row_chunks = (
            np.array(list(
            patch.ravel()
            for patch in util.yield_windows(im, (d, d), (1, 1))))
            for im in frames)

    def im_displays():
        for patch_rows in patch_row_chunks:
            y = pipeline.predict(patch_rows)

            # Map to [0, 1) so that imshow scales across entire colormap spectrum
            y = y / n_clusters

            newshape = (im_height - d + 1, im_width - d + 1, )
            segmentation = np.reshape(y, newshape)

            # Apply color map and remove alpha channel
            cmap = plt.cm.Set1
            colored_segmentation = cmap(segmentation)[:, :, :3]
            colored_segmentation = (colored_segmentation * 255).astype(np.uint8)

            yield colored_segmentation

    #frames = itertools.islice(im_displays(), 5)
    frames = im_displays()
    save_video = args.output_filename is not None
    if save_video:
        write_frames_to_disk(frames, args.output_filename)
    else:
        display_frames(frames)

    return

    logging.info('Displaying atoms of dictionary')

    # Inverse whiten atoms of dictionary
    atom_rows = dic.cluster_centers_ 
    if hasattr(whitener, 'inverse_transform'):
        atom_rows = whitener.inverse_transform(atom_rows)  

    plt.figure()
    for i, atom_row in enumerate(atom_rows):
        patch = atom_row.reshape(d, d, -1)[::-1]
        plt.subplot(10, 10, i + 1)
        plt.imshow(patch, interpolation='nearest')
        plt.xticks(())
        plt.yticks(())

    plt.suptitle('Atoms of dictionary learnt from %i patches by %s' %  \
            (len(atom_rows), dic.__class__.__name__))

    plt.figure()
    displayed_patches = X[np.random.choice(len(X), 100)]
    for i, patch in enumerate(displayed_patches):
        plt.subplot(10, 10, i + 1)
        plt.imshow(patch.reshape([d, d, -1])[:,:,::-1], interpolation='nearest')
        plt.xticks(())
        plt.yticks(())

    plt.show()
Esempio n. 13
0
    def map(self, X, n_CPU=0, l_quit=None):
        """X: list[input tuple], list of input parameters. If workers were started with f=None, each element in X in passed to the wrapper task. In that case, we expect X to be a tuple (or list) and the first element of X must be either the method pointer or its registered name. However, many methods, such as instance method or func within a func, cannot be pickled, therefore the method cannot be send over the pipe. We should pre-register such methods and call them by name. Need example later.
        l_quit: boolean, default None, if specified, controls whether workers quit or not after tasks are processed. If not, workers wait for future tasks.
        return list"""
        # very similar to the idea in https://stackoverflow.com/questions/3288595/multiprocessing-how-to-use-pool-map-on-a-function-defined-in-a-class, author klaus se
        l_quit = l_quit if l_quit is not None else self.QUIT
        #if self.is_busy():
        #    util.error_msg('Works are still busy!')
        if n_CPU == 0: n_CPU = self.n_use  # defaults to n_use
        n_CPU = min(
            n_CPU, self.n_CPU
        )  # one could start 8 CPUs, but only use 4 for mapping, if the work takes lots of memory
        res = []
        n_input = len(X)
        if n_input == 0 and not l_quit: return res
        #print '=============', self.c_proc
        if not self.has_started() and n_input > 0:
            util.warn_msg(
                'Please start processes first, no worker is running!')
            util.warn_msg('However, we will process the task with ONE cpu!!!')
            return [self.wrapper(x) for x in X]

        if n_input > 0 and n_CPU == 0:
            return [self.wrapper(x) for x in X]

        s_pid = str(multiprocessing.current_process().pid)
        has_my_job = [False for i in range(self.n_CPU)]

        def engine():
            print(
                "================================================================="
            )
            print("PID: ", str(multiprocessing.current_process().pid))
            print("WORK STATUS: ", self.work_status)
            print("HAS MY JOB: ", has_my_job)
            print("JOB IS DONE: ", self.job_is_done)
            print("N_RUNNING: (%d, %d) " %
                  (self.n_running[0], self.n_running[1]))
            print(
                "================================================================="
            )

        def is_busy():
            return sum(has_my_job) > 0

        def process_out(out):
            i, x = out
            if i is None:
                self.n_running[0] -= 1
                # I modify the original code, so that we can join the process and release it as soon as possible
                if type(x) is str:
                    print("Exception> " + x)
                    exit()
                else:
                    if self.DEBUG:
                        print("Progress: %d processes remaining. Stopping %d" %
                              (self.n_running[0], x))
                    #print self.c_proc.keys()
                    self.c_proc[x].join()
                    del self.c_proc[x]
                    if self.DEBUG: print("Progress: process %d stopped." % x)
            else:
                res.append(out)
                if self.DEBUG:
                    print("Progress: %d of %d item calculated." %
                          (len(res), n_input))

        def fetch(l_lock=False):
            while is_busy():
                l_fetch_something = False
                for i_worker in range(self.n_CPU):
                    if has_my_job[i_worker] and self.job_is_done[i_worker]:
                        try:
                            (i, x) = self.q_out[i_worker].get()
                            process_out((i, x))
                            self.n_running[1] -= 1
                            self.work_status[i_worker] = False
                            has_my_job[i_worker] = False
                            self.job_is_done[i_worker] = False
                            l_fetch_something = True
                            if self.DEBUG:
                                print(">>>A1")
                                engine()
                            with self.mail:
                                self.mail.notify_all()
                        except Exception as e:
                            print(
                                "ERROR> Fail to fetch results from worker: %d"
                                % i_worker)
                            print(traceback.format_exc())
                            return
                if not l_fetch_something:
                    if l_lock:
                        with self.mail:
                            self.mail.wait(timeout=8.5 + random.random() * 3)
                    else:
                        return

        if self.DEBUG: print("DEBUG> self.n_running: ", self.n_running)
        if self.DEBUG:
            print("DEBUG> self.c_proc.keys(): ", list(self.c_proc.keys()))
        ###ZHOU FEB16,2016
        #self.n_running[1]+=1
        ###
        i = 0
        while (i < n_input):
            x = X[i]
            if self.DEBUG: print("fetch job entry %d " % i)
            #print self.work_status
            self.lock.acquire()
            j = util.index(False, self.work_status)  # find an idle worker
            l_put_something = False
            if j >= 0 and sum(
                    has_my_job
            ) < n_CPU:  #j>=0 and j<n_CPU: # we only use up to n_CPU, even if there are more workers
                #print "assing job to %d" % j
                self.work_status[j] = True  # flag it as busy
                has_my_job[j] = True
                if self.DEBUG:
                    print("DEBUG> self.c_proc.keys(): ",
                          list(self.c_proc.keys()))
                ###ZHOU FEB16,2016
                self.n_running[1] += 1
                ###
                self.lock.release()
                self.q_in[j].put((i, j, x))  # assign task
                i += 1
                if self.DEBUG:
                    print("Progress: send input %d of %d items." % (i, len(X)))
                l_put_something = True
                if self.DEBUG:
                    print(">>>A2")
                    engine()
            else:
                self.lock.release()
            # we constantly removing items from the output queue, so that the process can release some memory
            fetch()
            if not l_put_something:
                with self.mail:
                    self.mail.wait(timeout=8.5 + random.random() * 3)
                    fetch()

        while (is_busy()):
            fetch(True)
        if self.DEBUG:
            print(">>>A3")
            engine()
        self.lock.acquire()
        ###ZHOU FEB16,2016
        #self.n_running[1]-=1
        ###
        if self.DEBUG:
            print(">>>QUIT=" + ("True" if l_quit else "False"))
            print(">>>n_running[1]=%d" % self.n_running[1])
        if l_quit and self.n_running[1] == 0:  # I am the last one running map()
            if self.DEBUG:
                print(">>>A4")
                engine()
            for i in range(self.n_CPU):
                self.q_in[i].put((None, i, None))
                self.work_status[i] = True
                self.n_running[1] += 1
                has_my_job[i] = True
            self.lock.release()
            while (is_busy()):
                fetch(True)
                if self.DEBUG:
                    engine()
        else:
            self.lock.release()
        if self.DEBUG:
            for i, x in enumerate(res):
                print('>>>A4 ', i, type(x), type(x[0]), type(x[1]))
        res.sort(key=lambda x: x[0])
        return [x for i, x in res]
Esempio n. 14
0
    def hierarchical(self, options=None):
        if self.input=='': util.error_msg('Clustering.hierachical: Input file has not been prepared; use make_input() first!')
        if self.table is None: self.make_table()
        opt=self.cluster_opt
        opt.update(options or {})
        self.cluster_opt=opt
        l_CWC=self.cluster_opt['BIN'] == 'CWC'
        if self.cluster_opt['FINGERPRINT'] and l_CWC:
            util.error_msg('Clustering.hierachical: fingerprint mode has to be used with hybrid binary, not CWC!')
        #l_CWC=False
        #XXXXXXXXXXXXXXX
        if l_CWC:
            S_cmd=[Clustering.BIN_CWC, "-h -a -E -P", "-i "+self.input+".input", "-o "+self.input]
        else:
            S_cmd=[Clustering.BIN_HYB, "-eis", "-i "+self.input+".input", "-o "+self.input]
            if self.cluster_opt['SKIP_DM']:
                S_cmd.append('-ctr')
        s_dme=opt['DME']
        s_dmg=opt['DMG']
        r_maxe=1
        r_maxg=1
        iopt=self.input_opt
        d_start=time.time()
        if opt['GENE']:
            if opt['GENE_METRICS']=='BUILD_IN' and opt['DMG']=='' and not opt['HAS_NULL']:
                S_cmd.append("-p")
            else:
                if opt['GENE_METRICS']=='BUILD_IN':
                    opt['GENE_METRICS']='PEARSON'
                if opt['DMG']=='':
                    R_w=self.input_opt['EXP_WEIGHT']
                    #R_w=R_w+np.random.randn(len(R_w))*0.001
                    if R_w is not None and np.allclose(R_w, 1.0, atol=1e-5): R_w=None
                    #print R_w
                    dmg=self.make_DM(S_col=iopt['DATA_COLS'], metrics=opt['GENE_METRICS'], R_weight=R_w, by='GENE')
                    dmg.save(s_file=self.input+'.dmg', s_format='%.2f')
                    opt['DMG']=self.input+'.dmg'
                else:
                    dmg=DM(s_file=opt['DMG'])
                r_maxg=dmg.dmax
                del dmg
                if l_CWC:
                    S_cmd.append("-dmg "+opt['DMG'])
                else:
                    if self.cluster_opt['FINGERPRINT']:
                        S_cmd.append("-f "+opt['DMG'])
                    else:
                        S_cmd.append("-d "+opt['DMG'])
        if opt['EXP']:
            if not l_CWC:
                #util.warn_msg('Clustering.hierachical: experiment clustering currently is only supported by CWC!')
                if opt['EXP_METRICS']=='BUILD_IN':
                    opt['EXP_METRICS']='PEARSON'
                if opt['DME']=='':
                    R_w=None
                    dme=self.make_DM(S_col=iopt['DATA_COLS'], metrics=opt['EXP_METRICS'], R_weight=R_w, by='EXP')
                    dme.save(s_file=self.input+'.dme', s_format='%.2f')
                    opt['DME']=self.input+'.dme'
                else:
                    dme=DM(s_file=opt['DME'])
                r_maxe=dme.dmax
                del dme
                S_cmd.append("-de "+opt['DME'])
            else:
                S_cmd.append("-eg" if opt['GENE'] else '-e')
                if opt['EXP_METRICS']=='BUILD_IN' and opt['DME']=='' and not opt['HAS_NULL']:
                    if "-p" not in S_cmd: S_cmd.append("-p")
                else:
                    if opt['EXP_METRICS']=='BUILD_IN':
                        opt['EXP_METRICS']='PEARSON'
                    if opt['DME']=='':
                        R_w=None
                        if (iopt['WEIGHT_COL']!='' and util.index(iopt['WEIGHT_COL'], self.table.header())>=0):
                            R_w=self.table[iopt['WEIGHT_COL']].values
                        if R_w is not None and np.allclose(R_w, 1, atol=1e-5): R_w=None
                        dme=self.make_DM(S_col=iopt['DATA_COLS'], metrics=opt['EXP_METRICS'], R_weight=R_w, by='EXP')
                        dme.save(s_file=self.input+'.dme', s_format='%.2f')
                        opt['DME']=self.input+'.dme'
                    else:
                        dme=DM(s_file=opt['DME'])
                    r_maxe=dme.dmax
                    del dme
                    S_cmd.append("-dme "+opt['DME'])
        # cwc sends standard message to error channel
        util.unix(" ".join(S_cmd), l_error=False, l_print=False)
        #### ZZZ
        print(" ".join(S_cmd))
        #Clustering._fix_missing(self.input+".cdt")
        #if opt['RESTORE_DISTANCE']:
        #    if opt['GENE'] and opt['GENE_METRICS']!='BUILD_IN': Clustering.restore_distance(self.input+".gtr", max_dist=r_maxg)
        #    if opt['EXP'] and opt['EXP_METRICS']!='BUILD_IN': Clustering.restore_distance(self.input+".atr", max_dist=r_maxe)

        if not opt['EXP']:
            # old CWC version will generate an AID row
            s_array=Clustering._strip_array_line(self.input+".cdt")
        if (opt['OPTIMIZE'] and opt['GENE']):
            Clustering.optimize(self.input)
            # optimization can handle Array line
            #if opt['EXP']: Clustering._insert_array_line(self.input+"Opt.cdt", s_array)
            Clustering.make_JTV(self.input+"Opt")
        else:
            Clustering.make_JTV(self.input)
        if opt['CLEANUP']:
            if opt['OPTIMIZE']:
                Clustering._remove_extra_files(self.input+"Opt")
            else:
                Clustering._remove_extra_files(self.input)
Esempio n. 15
0
 def tile_at(coord):
     return Tile.level[util.index(*coord)]
Esempio n. 16
0
 def clear(coord):
     Tile.level[util.index(*coord)].undraw()
     Tile.level[util.index(*coord)] = Empty(coord)
Esempio n. 17
0
    def __init__(self, s_file='', Z=None, l_gene_tree=True):
        """Z: linkage matrix, if None, assume s_file is not empty"""
        self.l_gene_tree = l_gene_tree
        self.root = Node('ROOT')
        self.l_gene_tree = l_gene_tree  # gene tree or array tree
        self.c_name = {}
        self.c_node = {}
        self.size = 0
        self.parent = {}  # track the parent node for each node
        self.tree_file = None

        if Z is not None:
            self.l_gene_tree = True
            r, c = Z.shape
            n = r + 1
            r_dist = max(Z[:, 2].max(), 1.0)
            for i in range(r):
                id_l = str(int(Z[i, 0]))
                id_r = str(int(Z[i, 1]))
                id_n = str(n + i)
                r = max(1.0 - Z[i, 2] / r_dist, 0.0)
                self.new_node(id_n,
                              label=self.c_name.get(id_n, ''),
                              left=self.new_node(id_l),
                              right=self.new_node(id_r),
                              similarity=r)
                self.parent[id_l] = id_n
                self.parent[id_r] = id_n
            self.root = self.get_node(id_n)
            self.size = n - 1
        else:
            self.l_gene_tree = l_gene_tree
            if re.search(r'\.[ag]tr$', s_file):
                if re.search(r'\.atr$', s_file):
                    l_gene_tree = False
                s_file = re.sub(r'\.[ag]tr$', '', s_file)
            self.root = Node('ROOT')
            self.l_gene_tree = l_gene_tree  # gene tree or array tree
            self.c_name = {}
            self.c_node = {}
            self.size = 0
            self.parent = {}  # track the parent node for each node
            if not os.path.exists(s_file + ".cdt"):
                util.error_msg("File not exist: " + s_file + ".cdt!")
            f = open(s_file + '.cdt')
            S_header = f.readline().strip().split("\t")
            if not l_gene_tree:
                while True:
                    line = f.readline()
                    if not line: break
                    if line.startswith("AID\t"):
                        S_AID = line.strip().split("\t")
                        self.c_name = {
                            s: x
                            for s, x in zip(S_AID, S_header)
                            if str(s).startswith('ARRY')
                        }
                        break
            else:
                s_col = 'GENE'
                if s_col not in S_header and 'NAME' in S_header:
                    s_col = 'NAME'
                i_GID = util.index('GID', S_header)
                i_NAME = util.index(s_col, S_header)
                while True:
                    line = f.readline()
                    if not line: break
                    if line.startswith('AID') or line.startswith('EWEIGHT'):
                        continue
                    S = line.strip().split("\t")
                    self.c_name[S[i_GID]] = S[i_NAME]
            f.close()
            self.size = len(self.c_name)
            if self.size == 0:
                error_msg("Tree:__init_: No node is found to build the tree!")
            s_filename = s_file + ('.gtr' if l_gene_tree else '.atr')
            # check if file has column header
            self.tree_file = s_filename
            df = Tree.read_tree_file(s_filename)
            self.parse(df)
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('input_filename')
    parser.add_argument("--log", type=str, default='INFO',
            help="Logging setting (e.g., INFO, DEBUG)")
    args = parser.parse_args()

    # Setting logging parameters
    numeric_level = getattr(logging, args.log.upper(), None)
    if not isinstance(numeric_level, int):
        raise ValueError('Invalid log level: %s' % loglevel)
    logging.basicConfig(level=numeric_level, format='%(asctime)s %(message)s')

    n_exemplars = 10
    exemplifier = FrameExemplifier(n_exemplars)

    # Load data
    start, stop, step = 0, 2700, 1
    #start, stop, step = None, None, None
    logging.info('Loading from %s (start frame=%s, end frame=%s, increment=%s)',
        args.input_filename, *map(str, [start, stop, step]))
    sample_inds = list(range(start, stop, step))
    all_frames = util.grab_frame(args.input_filename)
    frame_sample = (util.index(all_frames, sample_inds))

    n_bins = 25
    best_X_inds = exemplifier.from_BGRs(frame_sample, n_bins=n_bins)

    # Map index in frame_sample to index in the input video
    frame_inds = np.array(sample_inds)[best_X_inds]
    #frame_inds = np.linspace(0, 700, 25, dtype=int)
    frame_inds.sort()
    all_frames = util.grab_frame(args.input_filename)
    im_exemplars = list(util.index(all_frames, frame_inds))

    # Display exemplar histograms in one plot
    plt.figure()
    fig_title = "Examplar histograms at %s" % time.asctime(time.localtime())
    for im_exemplar in im_exemplars:
        counts, bins = exemplifier.im_BGR_to_features(im_exemplar)
        plt.plot(bins[:-1], counts)

    num_subplot_rows = math.ceil(n_exemplars**.5)
    plt.figure()
    fig_title = "Examplar images at %s" % time.asctime(time.localtime())
    plt.gcf().canvas.set_window_title(fig_title)
    for i, (frame_ind, im_exemplar) in enumerate(zip(frame_inds, im_exemplars)):
        plt.subplot(num_subplot_rows, num_subplot_rows, i + 1)
        plt.imshow(im_exemplar[:,:,::-1], interpolation='nearest')
        plt.xticks(())  # remove ticks
        plt.yticks(())
        plt.title("Frame #%i" % (frame_ind))
    plt.tight_layout()
    plt.show()

    logging.info('Exemplar frame indices are %s', str(frame_inds))
    return

    ### DISPLAY OUTPUT ###

    WIN = 'Output'
    ESC = 27
    SPACEBAR = 32
    for fi, frame in enumerate(im_HSVs):

        cv2.imshow(WIN, frame)
        key = cv2.waitKey(30)
        if key == ESC:
            break

        # Spacebar pauses video, after while ESC exits video or spacebar
        # resumes. Other keystrokes are ignored during pause.
        elif key == SPACEBAR:
            key = cv2.waitKey()
            while key != SPACEBAR and key != ESC:
                key = cv2.waitKey()
            if key == SPACEBAR:
                continue
            else:
                break

    cv2.destroyAllWindows()
Esempio n. 19
0
def color_cdt(s_file,
              exps=None,
              exp_bgcolor=None,
              genes=None,
              gene_bgcolor=None):
    if not s_file.endswith('.cdt'):
        s_file += '.cdt'
    if not os.path.exists(s_file):
        util.error_msg("File not exist: " + s_file + "!")
    BG = '#ffffff'
    f = open(s_file)
    S = []
    c_first = {}
    i = 0
    while True:
        line = f.readline()
        if not line: break
        SS = line.strip().split("\t")
        c_first[SS[0]] = i
        i += 1
        S.append(SS)
    f.close()
    S_header = S[0]
    i_gene = util.index('GENE', S_header)
    i_name = util.index('NAME', S_header)
    i_gid = util.index('GID', S_header)
    i_w = util.index("GWEIGHT", S_header)
    offset = max([i_gene, i_name, i_gid, i_w]) + 1
    n_exp = len(S_header) - offset
    if 'EWEIGHT' not in c_first:
        # add EWEIGHT ROW
        i_w = max([c_first.get('GID', -1), c_first.get('AID', -1)]) + 1
        S.insert(i_w, ['EWEIGHT'] + [''] * (offset - 1) + ['1.000'] * n_exp)
        c_first['EWEIGHT'] = i_w
    i_w = util.index("GWEIGHT", S_header)
    if i_w < 0:  # add GWEIGHT column
        i_w = offset
        S_header.insert(i_w, 'GWEIGHT')
        for i in range(1, len(S)):
            if i <= c_first['EWEIGHT']:
                S[i].insert(i_w, '')
            else:
                S[i].insert(i_w, '1.000')
        offset += 1
    i_gene_color = util.index('BGCOLOR', S_header)
    if i_gene_color < 0 and genes is not None:
        i_gene_color = offset - 1
        S_header.insert(i_gene_color, 'BGCOLOR')
        offset += 1
        for i in range(1, len(S)):
            if i <= c_first['EWEIGHT']:
                S[i].insert(i_gene_color, '')
            else:
                S[i].insert(i_gene_color, BG)
    i_exp_color = c_first.get('BGCOLOR', -1)
    if i_exp_color < 0 and exps is not None:
        i_exp_color = c_first['EWEIGHT']
        S.insert(i_exp_color, ['BGCOLOR'] + [''] * (offset - 1) + [BG] * n_exp)
        c_first['EWEIGHT'] += 1
    if genes is not None:
        c_m = Tree.color_map(genes, gene_bgcolor)
        idx = i_gene if i_gene >= 0 else i_name
        for i in range(c_first['EWEIGHT'] + 1, len(S)):
            S[i][i_gene_color] = c_m.get(S[i][idx], BG)
    if exps is not None:
        c_m = Tree.color_map(exps, exp_bgcolor)
        SS = S[c_first['EWEIGHT'] - 1]
        for i in range(offset, len(SS)):
            SS[i] = c_m.get(S_header[i], BG)
    S = ["\t".join(X) for X in S]
    util.save_list(s_file, S, s_end="\n")
Esempio n. 20
0
 def query(coord, property):
     tile = Tile.level[util.index(*coord)]
     return tile.properties[property]