Beispiel #1
0
    def __init__(self, rooms):
        self.rooms = rooms
        self.mapSize = (MiniMap.mapImage.width - 2 * MiniMap.mapBorder[x],
                        MiniMap.mapImage.height - 2 * MiniMap.mapBorder[y])

        self.xGridCoords = []
        self.yGridCoords = []
        for room in self.rooms:
            if self.xGridCoords.count(room.gridCoord[x]) == 0:
                self.xGridCoords.append(room.gridCoord[x])
            if self.yGridCoords.count(room.gridCoord[y]) == 0:
                self.yGridCoords.append(room.gridCoord[y])
        self.xGridCoords.sort()
        self.yGridCoords.sort()

        self.minimums = [min(self.xGridCoords), min(self.yGridCoords)]
        self.maximums = [max(self.xGridCoords), max(self.yGridCoords)]

        self.roomCount = (abs(self.maximums[x]) + abs(self.minimums[x]) + 1,
                          abs(self.maximums[y]) + abs(self.minimums[y]) + 1)

        self.roomSize = min(
            MiniMap.spaceRatio * self.mapSize[x] /
            ((MiniMap.spaceRatio + 1) * self.roomCount[x] - 1),
            MiniMap.spaceRatio * self.mapSize[y] /
            ((MiniMap.spaceRatio + 1) * self.roomCount[y] - 1))

        self.spaceSize = min(self.roomSize / MiniMap.spaceRatio,
                             self.roomSize / MiniMap.spaceRatio)

        self.shift = [
            median(self.xGridCoords) * (self.roomSize + self.spaceSize),
            median(self.yGridCoords) * (self.roomSize + self.spaceSize)
        ]
Beispiel #2
0
 def __init__(self, rooms):
     self.rooms = rooms
     self.mapSize = (MiniMap.mapImage.width - 2 * MiniMap.mapBorder[x], MiniMap.mapImage.height - 2 * MiniMap.mapBorder[y])
     
     self.xGridCoords = []
     self.yGridCoords = []        
     for room in self.rooms:
         if self.xGridCoords.count(room.gridCoord[x]) == 0:
             self.xGridCoords.append(room.gridCoord[x])
         if self.yGridCoords.count(room.gridCoord[y]) == 0:
             self.yGridCoords.append(room.gridCoord[y])
     self.xGridCoords.sort()
     self.yGridCoords.sort()
     
     self.minimums = [min(self.xGridCoords), min(self.yGridCoords)]
     self.maximums = [max(self.xGridCoords), max(self.yGridCoords)]
     
     self.roomCount = (abs(self.maximums[x]) + abs(self.minimums[x]) + 1,
                       abs(self.maximums[y]) + abs(self.minimums[y]) + 1)
     
     self.roomSize = min(MiniMap.spaceRatio * self.mapSize[x] / ((MiniMap.spaceRatio + 1) * self.roomCount[x] - 1),
                      MiniMap.spaceRatio * self.mapSize[y] / ((MiniMap.spaceRatio + 1) * self.roomCount[y] - 1))
     
     self.spaceSize = min(self.roomSize / MiniMap.spaceRatio,
                          self.roomSize / MiniMap.spaceRatio)
     
     self.shift = [median(self.xGridCoords) * (self.roomSize + self.spaceSize), 
              median(self.yGridCoords) * (self.roomSize + self.spaceSize)]
Beispiel #3
0
def test_median(): 
    ''' Test the weighted median 
    '''
    w = np.repeat(1., 100)
    data = np.arange(100)
    print np.median(data)
    print UT.median(data, weights=w) 

    if np.median(data) != UT.median(data, weights=w):
        raise ValueError

    return None 
Beispiel #4
0
def test_SFMS_highz(run, T, nsnap=15, lit='lee', nsnap0=15, downsampled='14'): 
    ''' Compare the best-fit SFMS parameters from ABC to literature at high z  
    '''
    if lit == 'lee': 
        lit_label = 'Lee et al. (2015)'
    # median ABC theta 
    abcout = abcee.readABC(run, T)
    theta_med = [UT.median(abcout['theta'][:, i], weights=abcout['w'][:]) for i in range(len(abcout['theta'][0]))]

    subcat_sim = abcee.model(run, theta_med, nsnap0=nsnap0, downsampled=downsampled) 

    m_arr = np.arange(8., 12.1, 0.1)
    sfr_abc = Obvs.SSFR_SFMS(m_arr, UT.z_nsnap(nsnap), theta_SFMS=subcat_sim['theta_sfms']) + m_arr

    sfr_obv = Obvs.SSFR_SFMS_obvs(m_arr, UT.z_nsnap(nsnap), lit=lit) + m_arr
    
    fig = plt.figure()
    sub = fig.add_subplot(111)

    sub.fill_between(m_arr,  sfr_abc-0.3, sfr_abc+0.3, color='b', alpha=0.25, linewidth=0, edgecolor=None, 
            label=r'ABC $\theta_{median}$')
    sub.plot(m_arr, sfr_obv+0.3, ls='--', c='k', label=lit_label) 
    sub.plot(m_arr, sfr_obv-0.3, ls='--', c='k') 

    sub.set_xlim([8., 12.])
    sub.set_xlabel('$\mathtt{log\;M_*}$', fontsize=25)
    sub.set_ylim([-4., 3.])
    sub.set_ylabel('$\mathtt{log\;SFR}$', fontsize=25)
    sub.legend(loc='best') 
    fig.savefig(UT.fig_dir()+'SFMS.z'+str(round(UT.z_nsnap(nsnap),2))+'.'+run+'.'+lit+'.png', bbox_inches='tight')
    plt.close()
    return None 
Beispiel #5
0
def test_ABC_SMHMR(run, T):#, sumstat=['smf']): 
    ''' Compare the SMHMR the median T-th ABC particle pool with 'data'
    Hardcoded for smf only 
    '''
    # data summary statistic
    subcat_dat = abcee.Data(nsnap0=15)

    # median theta 
    abcout = abcee.readABC('test0', T)
    theta_med = [UT.median(abcout['theta'][:, i], weights=abcout['w'][:]) for i in range(len(abcout['theta'][0]))]
    
    # F( median theta) 
    subcat_sim = abcee.model(run, theta_med, nsnap0=15, downsampled='14')

    fig = plt.figure()
    sub = fig.add_subplot(111)

    smhmr = Obvs.Smhmr()
    # simulation 
    m_mid, mu_mhalo, sig_mhalo, cnts = smhmr.Calculate(subcat_sim['m.max'], subcat_sim['m.star'], 
            dmhalo=0.2, weights=subcat_sim['weights'])
    sub.fill_between(m_mid, mu_mhalo - sig_mhalo, mu_mhalo + sig_mhalo, color='b', alpha=0.25, linewidth=0, edgecolor=None, 
            label='Sim.')
    # data 
    m_mid, mu_mhalo, sig_mhalo, cnts = smhmr.Calculate(subcat_dat['m.max'], subcat_dat['m.star'], weights=subcat_dat['weights'])
    sub.errorbar(m_mid, mu_mhalo, yerr=sig_mhalo, color='k', label='Data')

    sub.set_xlim([10., 15.])
    sub.set_xlabel('Halo Mass $(\mathcal{M}_{halo})$', fontsize=25)
    sub.set_ylim([8., 12.])
    sub.set_ylabel('Stellar Mass $(\mathcal{M}_*)$', fontsize=25)
    sub.legend(loc='upper right') 
    plt.show()

    return None 
Beispiel #6
0
def test_ABCsumstat(run, T):#, sumstat=['smf']): 
    ''' Compare the summary statistics of the median T-th ABC particle pool with data.
    Hardcoded for smf only 
    '''
    # data summary statistic
    sumdata = abcee.SumData(['smf'], info=True, nsnap0=15)

    # median theta 
    abcout = abcee.readABC('test0', T)
    theta_med = [UT.median(abcout['theta'][:, i], weights=abcout['w'][:]) for i in range(len(abcout['theta'][0]))]
    
    subcat = abcee.model(run, theta_med, nsnap0=15, downsampled='14')
    sumsim = abcee.SumSim(['smf'], subcat, info=True)

    fig = plt.figure()
    sub = fig.add_subplot(111)

    sub.plot(sumdata[0][0], sumdata[0][1], c='k', ls='--', label='Data')
    sub.plot(sumsim[0][0], sumsim[0][1], c='b', label='Sim.')

    sub.set_xlim([9., 12.])
    sub.set_xlabel('Stellar Masses $(\mathcal{M}_*)$', fontsize=25)
    sub.set_ylim([1e-6, 10**-1.75])
    sub.set_yscale('log')
    sub.set_ylabel('$\Phi$', fontsize=25)
    sub.legend(loc='upper right') 
    plt.show()

    return None 
Beispiel #7
0
def select_rewrite_expression(name, exprs):
    """
    Given an expression name and a list of expressions,
    tries to select an expression with the highest selectivity
    for use in AST re-writing.
    """
    # For equality check (=, !=, is), select the mode
    if name[1] == "equality":
        values = [e.right.value for e in exprs]
        filter_using = util.mode(values)
        for e in exprs:
            if e.right.value == filter_using:
                return e

    # For ordering checks, select the median value for static
    elif name[1] == "order":
        is_static = name[3][1] == "static"
        values = [e.right.value for e in exprs]

        # For static (numeric) compares, we use median
        # value to eliminate as many as possible.
        # For non-numeric, we use mode
        if is_static:
            filter_using = util.median(values)
        else:
            filter_using = util.mode(values)

        for e in exprs:
            if e.right.value == filter_using:
                return e

    # For ordering checks without static values, use any
    else:
        return exprs[0]
Beispiel #8
0
    def uclus_distance(self, x, y):
        """
        The method to determine the distance between one cluster an another
        item/cluster. The distance equals to the *average* (median) distance
        from any member of one cluster to any member of the other cluster.

        :param x: first cluster/item.
        :param y: second cluster/item.
        """
        # create a flat list of all the items in <x>
        if not isinstance(x, Cluster):
            x = [x]
        else:
            x = x.fullyflatten()

        # create a flat list of all the items in <y>
        if not isinstance(y, Cluster):
            y = [y]
        else:
            y = y.fullyflatten()

        distances = []
        for k in x:
            for l in y:
                distances.append(self.distance(k, l))
        return median(distances)
Beispiel #9
0
def func_avg(kwargs, f=None, name=None):
    kwargs = dict(kwargs)
    pool = kwargs.pop("pool")
    n_runs = kwargs.pop("n_runs")
    result = pool.map(FunctionCaller(kwargs), range(n_runs))
    xs, sizes = list(zip(*result))
    if f is None:
        f = open("tuning.txt", "w")
    xs_str = "Evals: " + str(xs)
    sizes_str = "Sizes: " + str(sizes)
    median_str = "Median: %.1f  Average: %.1f ~ %.1f" % (util.median(
        xs), util.avg(xs), util.avg([x for x in xs if util.is_finite(x)]))
    avg_size_str = "AverageSize: %.1f ~ %.1f" % (
        util.avg(sizes), util.avg([s for s in sizes if util.is_finite(s)]))
    success_rate = len([x for x in xs if util.is_finite(x)]) / len(xs)
    success_rate_str = "Success_rate: %.2f" % success_rate
    report_str = [
        xs_str, sizes_str, median_str, avg_size_str, success_rate_str
    ]
    if name is not None:
        name_str = "* %s" % name
        report_str = [name_str] + report_str
    report_str = "\n".join(report_str)
    print(report_str, file=f)
    f.flush()
    print(report_str)
Beispiel #10
0
def select_rewrite_expression(name, exprs):
    """
    Given an expression name and a list of expressions,
    tries to select an expression with the highest selectivity
    for use in AST re-writing.
    """
    # For equality check (=, !=, is), select the mode
    if name[1] == "equality":
        values = [e.right.value for e in exprs]
        filter_using = util.mode(values)
        for e in exprs:
            if e.right.value == filter_using:
                return e

    # For ordering checks, select the median value for static
    elif name[1] == "order":
        is_static = name[3][1] == "static"
        values = [e.right.value for e in exprs]

        # For static (numeric) compares, we use median
        # value to eliminate as many as possible.
        # For non-numeric, we use mode
        if is_static:
            filter_using = util.median(values)
        else:
            filter_using = util.mode(values)

        for e in exprs:
            if e.right.value == filter_using:
                return e

    # For ordering checks without static values, use any
    else:
        return exprs[0]
Beispiel #11
0
 def to_dict(self):
     return {
         "name": self.name,
         "num_articles": len(self.articles),
         "articles": self.articles,
         #"scopus_bins": [b.to_dict() for b in self.histogram],
         "scopus_median": median([a["scopus"] for a in self.articles])
     }
 def to_dict(self):
     return {
         "name": self.name,
         "num_articles": len(self.articles),
         "articles": self.articles,
         #"scopus_bins": [b.to_dict() for b in self.histogram],
         "scopus_median": median([a["scopus"] for a in self.articles])
     }
Beispiel #13
0
def test_readABC(T): 
    ''' Try reading in different ABC outputs and do basic plots 
    '''
    abcout = abcee.readABC('test0', T)
    
    # median theta 
    theta_med = [UT.median(abcout['theta'][:, i], weights=abcout['w'][:]) for i in range(len(abcout['theta'][0]))]

    print theta_med
Beispiel #14
0
    def addVehicle(self, veHistory):
        time = max(veHistory.keys()) * self.timePerFrame

        getRatio = lambda mvBbox: (mvBbox[3] - self.ratioErode) / (1 + abs(
            mvBbox[2] - self.ratioErode))  # height / width
        ratios = map(getRatio, veHistory.values())
        medianRatio = util.median(ratios)
        vehicle = "Moto" if medianRatio > self.ratioRef else "Automobile"
        self._data.append((time, vehicle))
        self.segmenter.addVehicle(vehicle)
Beispiel #15
0
def xor_investigate_average():
    n_runs = 100
    x = []
    for i in range(n_runs):
        result = xor_investigate(i)
        print("xor(%d) = %d" % (i, result))
        x.append(result)
    #x = [xor_investigate() for i in range(n_runs)]
    print("Average=%.2f" % util.avg(x))
    print("Median=%.2f" % util.median(x))
Beispiel #16
0
 def time_task(self):
     while True:
         t_send = time.time()
         clock_deltas = {None: (t_send, t_send)}
         for peer, request in [(peer, peer.get_time().addCallback(lambda res: (time.time(), res))) for peer in self.peers]:
             try:
                 t_recv, response = yield request
                 t = .5 * (t_send + t_recv)
                 clock_deltas[(peer.id, peer.address, peer.port)] = (t, float(response))
             except:
                 traceback.print_exc()
                 continue
         
         self.clock_offset = util.median(mine - theirs for mine, theirs in clock_deltas.itervalues())
         
         yield sleep(random.expovariate(1/100))
Beispiel #17
0
    def bondStats(self, ni, nj, ij=0):
        # get the residue index extents.
        rlist = [atom['resSeq'] for atom in self.models[0]]
        rmin = min(rlist)
        rmax = max(rlist)

        # build the list of distances.
        d = []
        for i in range(rmin, rmax + 1):
            j = i + ij
            ai = self.select(i, ni)
            aj = self.select(j, nj)
            if ai and aj:
                d = d + dist(ai, aj)

        # compute and return statistics on the list.
        return (median(d), min(d), max(d))
Beispiel #18
0
def sigMstar_tduty_fid(Mhalo=12, dMhalo=0.5):
    ''' Figure plotting sigmaMstar at M_halo = Mhalo for different 
    duty cycle time (t_duty) with fiducial SFMS parameter values rather 
    than ABC values. 
    '''
    # read in parameter values for randomSFH_1gyr
    abcout = ABC.readABC('randomSFH_1gyr', 13)
    # the median theta will be designated the fiducial parameter values 
    theta_fid = [UT.median(abcout['theta'][:, i], weights=abcout['w'][:]) 
            for i in range(len(abcout['theta'][0]))]
    
    runs = ['randomSFH_0.5gyr', 'randomSFH_1gyr', 'randomSFH_2gyr', 'randomSFH_5gyr', 'randomSFH_10gyr']
    tduties = [0.5, 1., 2., 5., 10.]  #hardcoded
    
    smhmr = Obvs.Smhmr()

    sigMstar_fid = []
    for i_t, tduty in enumerate(tduties): 
        subcat_sim = ABC.model(runs[i_t], theta_fid, 
                nsnap0=15, sigma_smhm=0.2, downsampled='14') 
        isSF = np.where(subcat_sim['gclass'] == 'sf') # only SF galaxies 

        sig_mstar_fid = smhmr.sigma_logMstar(
                subcat_sim['halo.m'][isSF], subcat_sim['m.star'][isSF], 
                weights=subcat_sim['weights'][isSF], Mhalo=Mhalo, dmhalo=dMhalo)

        sigMstar_fid.append(sig_mstar_fid)
    sigMstar_fid = np.array(sigMstar_fid)
    
    # make figure 
    fig = plt.figure(figsize=(5,5)) 
    sub = fig.add_subplot(111)
    sub.scatter(tduties, sigMstar_fid) 
    # x-axis
    sub.set_xlabel('$t_\mathrm{duty}$ [Gyr]', fontsize=20)
    sub.set_xlim([0., 10.]) 
    
    # y-axis
    sub.set_ylabel('$\sigma_{M_*}(M_\mathrm{halo} = 10^{'+str(Mhalo)+'} M_\odot)$', fontsize=20)
    sub.set_ylim([0., 0.5]) 
    
    fig.savefig(''.join([UT.tex_dir(), 'figs/sigMstar_tduty_fid.pdf']), 
            bbox_inches='tight', dpi=150) 
    plt.close()
    return None 
Beispiel #19
0
    def angleStats(self, ni, nj, nk, ij=0, ik=0):
        # get the residue index extents.
        rlist = [atom['resSeq'] for atom in self.models[0]]
        rmin = min(rlist)
        rmax = max(rlist)

        # build the list of angles.
        theta = []
        for i in range(rmin, rmax + 1):
            j = i + ij
            k = i + ik
            ai = self.select(i, ni)
            aj = self.select(j, nj)
            ak = self.select(k, nk)
            if ai and aj and ak:
                theta = theta + angle(ai, aj, ak)

        # compute and return statistics on the list.
        return (median(theta), min(theta), max(theta))
Beispiel #20
0
    def dihedStats(self, ni, nj, nk, nl, ij=0, ik=0, il=0):
        # get the residue index extents.
        rlist = [atom['resSeq'] for atom in self.models[0]]
        rmin = min(rlist)
        rmax = max(rlist)

        # build the list of dihedrals.
        omega = []
        for i in range(rmin, rmax + 1):
            j = i + ij
            k = i + ik
            l = i + il
            ai = self.select(i, ni)
            aj = self.select(j, nj)
            ak = self.select(k, nk)
            al = self.select(l, nl)
            if ai and aj and ak and al:
                omega = omega + dihed(ai, aj, ak, al)

        # compute and return statistics on the list.
        return (median(omega), min(omega), max(omega))
Beispiel #21
0
    def time_task(self):
        while True:
            t_send = time.time()
            clock_deltas = {None: (t_send, t_send)}
            for peer, request in [
                (peer,
                 peer.get_time().addCallback(lambda res: (time.time(), res)))
                    for peer in self.peers
            ]:
                try:
                    t_recv, response = yield request
                    t = .5 * (t_send + t_recv)
                    clock_deltas[(peer.id, peer.address,
                                  peer.port)] = (t, float(response))
                except:
                    traceback.print_exc()
                    continue

            self.clock_offset = util.median(
                mine - theirs for mine, theirs in clock_deltas.itervalues())

            yield sleep(random.expovariate(1 / 100))
Beispiel #22
0
 def find_info_gain(self, X, y, attr):
     '''
     Find the information gain for a given attribute
     '''
     # Find out if its a numerical or categorical
     isCategorical = type(X[1][attr]) == str
     if isCategorical:
         value_set = set([x[attr] for x in X])
         if len(value_set) == 1:
             # only one value for entire list
             return None
         elif len(value_set) == 2:
             attribute_values = [list(value_set)[0]]
         else:
             attribute_values = [x for x in value_set]
     else:
         # calculate mean, median of the values
         value_set = set([x[attr] for x in X])
         if len(value_set) == 1:
             return None
         values = [x for x in value_set]
         # calculate info gain on median and mean
         attribute_values = [mean(values), median(values)]
     max_info_gain = 0
     val = None
     X_left = None
     X_right = None
     Y_left = None
     Y_right = None
     for each in attribute_values:
         x_left, x_right, y_left, y_right = partition_classes(
             X, y, attr, each)
         info_gain = information_gain(y, [y_left, y_right])
         if info_gain > max_info_gain:
             max_info_gain = info_gain
             val = each
             X_left, X_right, Y_left, Y_right = x_left, x_right, y_left, y_right
     return isCategorical, val, max_info_gain, X_left, X_right, Y_left, Y_right
Beispiel #23
0
 def unbalanced_countries(self):
     self.sort_countries()
     q1 = self.num_countries/4.0
     q2 = int(q1*2)
     q3 = int(q1*3)
     q1 = int(q1)
     quant1 = self.countries[:q1]
     quant2 = self.countries[q1:q2]
     quant3 = self.countries[q2:q3]
     median = util.median(self.countries)
     mid50 = len(quant2[-1].territories) - len(quant2[0].territories)
     min_terrs = median - mid50*1.5
     max_terrs = median + mid50*1.5
     small = [c for c in self.countries if len(c.territories) < min_terrs]
     large = [c for c in self.countries if len(c.territories) > max_terrs]
     
     if len(self.countries[0].territories)*1.5 \
             < len(self.countries[-1].territories):
         if self.countries[0] not in small:
             small.append(self.countries[0])
         if self.countries[-1] not in large:
             large.append(self.countries[-1])
     return sorted(small), sorted(large)
Beispiel #24
0
def combine(result_matrices, score_scalings, membership, iteration, config_params):
    """This is  the combining function, taking n result matrices and scalings"""
    quantile_normalize = config_params['quantile_normalize']

    for i, m in enumerate(result_matrices):
        m.fix_extreme_values()
        m.subtract_with_quantile(0.99)

        # debug mode: print scoring matrices before combining
        if ('dump_scores' in config_params['debug'] and
            (iteration == 1 or (iteration % config_params['debug_freq'] == 0))):
            funs = config_params['pipeline']['row-scoring']['args']['functions']
            m.write_tsv_file(os.path.join(config_params['output_dir'], 'score-%s-%04d.tsv' % (funs[i]['id'], iteration)), compressed=False)

    if quantile_normalize:
        if len(result_matrices) > 1:
            start_time = util.current_millis()
            result_matrices = dm.quantile_normalize_scores(result_matrices,
                                                           score_scalings)
            elapsed = util.current_millis() - start_time
            logging.debug("quantile normalize in %f s.", elapsed / 1000.0)

        in_matrices = [m.values for m in result_matrices]

    else:
        in_matrices = []
        num_clusters = membership.num_clusters()
        mat = result_matrices[0]
        index_map = {name: index for index, name in enumerate(mat.row_names)}
        # we assume matrix 0 is always the gene expression score
        # we also assume that the matrices are already extreme value
        # fixed
        rsm = []
        for cluster in range(1, num_clusters + 1):
            row_members = sorted(membership.rows_for_cluster(cluster))
            rsm.extend([mat.values[index_map[row], cluster - 1] for row in row_members])
        scale = util.mad(rsm)
        if scale == 0:  # avoid that we are dividing by 0
            scale = util.r_stddev(rsm)
        if scale != 0:
            median_rsm = util.median(rsm)
            rsvalues = (mat.values - median_rsm) / scale
            num_rows, num_cols = rsvalues.shape
            rscores = dm.DataMatrix(num_rows, num_cols,
                                    mat.row_names,
                                    mat.column_names,
                                    values=rsvalues)
            rscores.fix_extreme_values()
        else:
            logging.warn("combiner scaling -> scale == 0 !!!")
            rscores = mat
        in_matrices.append(rscores.values)

        if len(result_matrices) > 1:
            rs_quant = util.quantile(rscores.values, 0.01)
            logging.debug("RS_QUANT = %f", rs_quant)
            for i in range(1, len(result_matrices)):
                values = result_matrices[i].values
                qqq = abs(util.quantile(values, 0.01))
                if qqq == 0:
                    logging.warn('SPARSE SCORES - %d attempt 1: pick from sorted values', i)
                    qqq = sorted(values.ravel())[9]
                if qqq == 0:
                    logging.warn('SPARSE SCORES - %d attempt 2: pick minimum value', i)
                    qqq = abs(values.min())
                if qqq != 0:
                    values = values / qqq * abs(rs_quant)
                else:
                    logging.warn('SPARSE SCORES - %d not normalizing!', i)
                in_matrices.append(values)

    if len(result_matrices) > 0:
        start_time = util.current_millis()
        # assuming same format of all matrices
        combined_score = np.zeros(in_matrices[0].shape)
        for i in xrange(len(in_matrices)):
            combined_score += in_matrices[i] * score_scalings[i]

        elapsed = util.current_millis() - start_time
        logging.debug("combined score in %f s.", elapsed / 1000.0)
        matrix0 = result_matrices[0]  # as reference for names
        return dm.DataMatrix(matrix0.num_rows, matrix0.num_columns,
                             matrix0.row_names, matrix0.column_names,
                             values=combined_score)
    else:
        return None
	def centralize(self):
		
		self.position.x = utl.median(self.axes['x'][:])
		self.position.y = utl.median(self.axes['y'][:])
		self.position.z = utl.median(self.axes['z'][:])
 def median(self):
     """returns the mean value"""
     return util.median(self.values)
Beispiel #27
0
def test_bench_disk_paxos(metasync, opts):
    "test disk paxos"
    "bencmark latency of paxos with backends"

    from disk_paxos import DiskPaxosWorker

    repeat = 5
    client_num = [1, 2, 3, 4, 5]
    backend_list = [["google"], ["dropbox"], ["onedrive"], ["box"], ["google", "dropbox", "onedrive"]]
    results = [['Clients'] + [','.join(x) for x in backend_list]]

    # start to test
    for num in client_num:
        for num_prop in range(1, num + 1):
            for _ in range(repeat):
                row = ['%d/%d clients' % (num_prop, num)]
                for backend in backend_list:
                    srvs = map(services.factory, backend)
                    dbg.info('Test paxos for %d/%d clients and %s' % (num_prop, num, ','.join(backend)))
                    # initialize all disk blocks
                    blockList = []
                    for i in range(num):
                        path = '/diskpaxos/client%d' % i
                        for srv in srvs:
                            if not srv.exists(path):
                                srv.put(path, '')
                            else:
                                srv.update(path, '')
                        blockList.append(path)

                    clients = []
                    for i in range(num_prop):
                        storages = map(services.factory, backend)
                        worker = DiskPaxosWorker(storages, blockList[i], blockList)
                        clients.append(worker)
                        #dbg.dbg('client %d %s' % (i, worker.clientid))
                    for worker in clients:
                        worker.start()

                    latency = [] 
                    master_latency = None
                    for worker in clients:
                        worker.join()
                        latency.append(worker.latency)
                        if (worker.master):
                            assert master_latency is None
                            master_latency = worker.latency
                    for worker in clients:
                        worker.join()
                    
                    summary = ",".join(map(str,[min(latency), max(latency), util.median(latency), master_latency]))
                    dbg.info("Result: %s" % summary)
                    row.append(summary)
                results.append(row)

    # tabularize
    print "Item Format: min,max,median,master"
    for row in results:
        for e in row:
            print "%s \t" % e,
        print
Beispiel #28
0
def test_bench_paxos2(metasync, opts):
    "bencmark latency of paxos with backends"

    def new_index(srv, folder, prefix):
        if not srv.exists(folder):
            return 0
        files = srv.listdir(folder)
        cnt = 0
        for fn in files:
            if fn.startswith(prefix):
                cnt += 1
        return cnt

    from paxos import PPaxosWorker2

    repeat = 5
    client_num = [1, 2, 3, 4, 5]
    backend_list = [["dropbox"], ["onedrive"]]
    results = [['Clients'] + [','.join(x) for x in backend_list]]

    # start to test
    for num in client_num:
        for _ in range(repeat):
            row = ['%d clients' % (num)]
            for backend in backend_list:
                dbg.info('Test paxos for %d clients and %s' % (num, ','.join(backend)))
                srvs = map(services.factory, backend)
                # init log file
                prefix = 'test2-%d-%d' % (num , len(backend))
                index = new_index(srvs[0], '/ppaxos', prefix)
                path = '/ppaxos/%s.%d' % (prefix, index)
                dbg.info(path)
                for srv in srvs:
                    srv.init_log2(path)

                clients = []
                for i in range(num):
                    storages = map(services.factory, backend)
                    worker = PPaxosWorker2(storages, path)
                    clients.append(worker)
                for worker in clients:
                    worker.start()
                for worker in clients:
                    worker.join()

                latency = []
                master_latency = None
                for worker in clients:
                    latency.append(worker.latency)
                    if (worker.master):
                        assert master_latency is None
                        master_latency = worker.latency
                assert master_latency is not None
                
                summary = ",".join(map(str,[min(latency), max(latency), util.median(latency), master_latency]))
                dbg.info("Result: %s" % summary)
                row.append(summary)
            results.append(row)

    # tabularize
    print "Item Format: min,max,median,master"
    for row in results:
        for e in row:
            print "%s \t" % e,
        print
Beispiel #29
0
def combine(result_matrices, score_scalings, membership, quantile_normalize):
    """This is  the combining function, taking n result matrices and scalings"""
    for m in result_matrices:
        m.fix_extreme_values()

    if quantile_normalize:
        if len(result_matrices) > 1:
            start_time = util.current_millis()
            result_matrices = dm.quantile_normalize_scores(result_matrices,
                                                           score_scalings)
            elapsed = util.current_millis() - start_time
            logging.info("quantile normalize in %f s.", elapsed / 1000.0)

        in_matrices = [m.values for m in result_matrices]

    else:
        in_matrices = []
        num_clusters = membership.num_clusters()
        mat = result_matrices[0]
        index_map = {name: index for index, name in enumerate(mat.row_names)}
        # we assume matrix 0 is always the gene expression score
        # we also assume that the matrices are already extreme value
        # fixed
        rsm = []
        for cluster in range(1, num_clusters + 1):
            row_members = sorted(membership.rows_for_cluster(cluster))
            rsm.extend([mat.values[index_map[row]][cluster - 1]
                        for row in row_members])
        scale = util.mad(rsm)
        if scale == 0:  # avoid that we are dividing by 0
            scale = util.r_stddev(rsm)
        if scale != 0:
            median_rsm = util.median(rsm)
            rsvalues = (mat.values - median_rsm) / scale
            num_rows, num_cols = rsvalues.shape
            rscores = dm.DataMatrix(num_rows, num_cols,
                                    mat.row_names,
                                    mat.column_names,
                                    values=rsvalues)
            rscores.fix_extreme_values()
        else:
            logging.warn("combiner scaling -> scale == 0 !!!")
            rscores = mat
        in_matrices.append(rscores.values)

        if len(result_matrices) > 1:
            rs_quant = util.quantile(rscores.values, 0.01)
            logging.info("RS_QUANT = %f", rs_quant)
            for i in range(1, len(result_matrices)):
                values = result_matrices[i].values
                qqq = abs(util.quantile(values, 0.01))
                #print "qqq(%d) = %f" % (i, qqq)
                if qqq == 0:
                    logging.error("very sparse score !!!")
                values = values / qqq * abs(rs_quant)
                in_matrices.append(values)

    if len(result_matrices) > 0:
        start_time = util.current_millis()
        # assuming same format of all matrices
        combined_score = np.zeros(in_matrices[0].shape)
        for i in xrange(len(in_matrices)):
            combined_score += in_matrices[i] * score_scalings[i]

        elapsed = util.current_millis() - start_time
        logging.info("combined score in %f s.", elapsed / 1000.0)
        matrix0 = result_matrices[0]  # as reference for names
        return dm.DataMatrix(matrix0.num_rows, matrix0.num_columns,
                             matrix0.row_names, matrix0.column_names,
                             values=combined_score)
    else:
        return None
Beispiel #30
0
def analysis(data, settings, flat_age_matrix=[], flat_sim_matrix=[], alpha=0.03):
    # value_keys = ["complete_term_jaccard", "top_term_jaccard", "top_gene_jaccard", "top_parents_jaccard"]
    value_keys = ["top_gene_jaccard", "top_parents_jaccard"]
    data_values = [[row._asdict()[k] for k in value_keys] for row in data]
    means = [mean([x[i] for x in data_values]) for i in range(len(value_keys))]
    stds = [sstdev([x[i] for x in data_values]) for i in range(len(value_keys))]
    medians = [median([x[i] for x in data_values]) for i in range(len(value_keys))]

    log.info("N: %s", len(data_values))
    genes_found, genes_missed = len([genes[1] for row in data for genes in row.genes_found]),len([genes for row in data for genes in row.genes_missed])
    log.info("genes_found: %s, genes_missed: %s", genes_found, genes_missed)
    total = genes_found + genes_missed
    log.info("genes_found: %s, genes_missed: %s", round(genes_found / total, 2), round(genes_missed / total, 2))
    #log.info("distinct unknown genes: %s", len(unknown))
    fig_num = 0
    for i, value_key in enumerate(value_keys):
        log.info("%s: mean=%s, std=%s, median=%s", value_key, means[i], stds[i], medians[i])

        f = plt.figure(fig_num)
        fig_num +=1
        d = [x[i] for x in data_values]
        weights = np.ones_like(d)/float(len(d))
        plt.hist(d, 100, weights=weights, alpha=0.5, label='Actual')

        weights = np.ones_like(flat_sim_matrix)/float(len(flat_sim_matrix))
        plt.hist(flat_sim_matrix, 100, weights=weights, alpha=0.5, label='Randomized')

        plt.xlabel('Similarity')
        plt.ylabel(value_keys[i])
        plt.title('Histogram of ' + value_key + " | " + r'$\mu=' + str(round(means[i], 2)) + r',\ \sigma=' + str(round(stds[i], 2)) + r'$')
        plt.legend(loc='upper right')
        f.show()

        f = plt.figure(fig_num)
        fig_num +=1
        x = [r.age for r in data]
        y = [r._asdict()[value_key] for r in data]
        fit = np.polyfit(x,y,1)
        fit_fn = np.poly1d(fit)
        plt.plot(x,y, 'k.', [min(x), max(x)], fit_fn([min(x), max(x)]), '--g')
        plt.xlabel('Age')
        plt.ylabel("Similarity Index")
        plt.title("Ancestors of " + value_key + " vs Age" + " | " + r'$\mu=' + str(round(means[i], 2)) + r',\ \sigma=' + str(round(stds[i], 2)) + r'$' )
        f.show()

        f = plt.figure(fig_num)
        fig_num +=1
        x = flat_age_matrix
        y = flat_sim_matrix
        fit = np.polyfit(x,y,1)
        fit_fn = np.poly1d(fit)
        plt.plot(x,y, 'k.', [min(x), max(x)], fit_fn([min(x), max(x)]), '--g', alpha=alpha)
        plt.xlabel('Age')
        plt.ylabel("Similarity Index")
        plt.title("Randomized Ancestors of " + value_key + " vs Age" + " | " + r'$\mu=' + str(round(means[i], 2)) + r',\ \sigma=' + str(round(stds[i], 2)) + r'$' )

        f.show()

    f = plt.figure(fig_num)
    fig_num +=1

    d = [x[4] for x in data]
    weights = np.ones_like(d)/float(len(d))
    plt.hist(d, 100, weights=weights, alpha=0.5, label='Actual')

    weights = np.ones_like(flat_age_matrix)/float(len(flat_age_matrix))
    plt.hist(flat_age_matrix, 100, weights=weights, alpha=0.5, label='Randomized')
    plt.xlabel('Age')
    f.show()
    raw_input()
    plt.close("all")
Beispiel #31
0
 def test_median_with_nans(self):
     """tests the mean() function"""
     array = np.array([2.0, 3.0, np.nan, 1.0])
     result = util.median(array)
     self.assertAlmostEqual(2.0, result)
Beispiel #32
0
def test_bench_disk_paxos(metasync, opts):
    "test disk paxos"
    "bencmark latency of paxos with backends"

    from disk_paxos import DiskPaxosWorker

    repeat = 5
    client_num = [1, 2, 3, 4, 5]
    backend_list = [["google"], ["dropbox"], ["onedrive"], ["box"],
                    ["google", "dropbox", "onedrive"]]
    results = [['Clients'] + [','.join(x) for x in backend_list]]

    # start to test
    for num in client_num:
        for num_prop in range(1, num + 1):
            for _ in range(repeat):
                row = ['%d/%d clients' % (num_prop, num)]
                for backend in backend_list:
                    srvs = map(services.factory, backend)
                    dbg.info('Test paxos for %d/%d clients and %s' %
                             (num_prop, num, ','.join(backend)))
                    # initialize all disk blocks
                    blockList = []
                    for i in range(num):
                        path = '/diskpaxos/client%d' % i
                        for srv in srvs:
                            if not srv.exists(path):
                                srv.put(path, '')
                            else:
                                srv.update(path, '')
                        blockList.append(path)

                    clients = []
                    for i in range(num_prop):
                        storages = map(services.factory, backend)
                        worker = DiskPaxosWorker(storages, blockList[i],
                                                 blockList)
                        clients.append(worker)
                        #dbg.dbg('client %d %s' % (i, worker.clientid))
                    for worker in clients:
                        worker.start()

                    latency = []
                    master_latency = None
                    for worker in clients:
                        worker.join()
                        latency.append(worker.latency)
                        if (worker.master):
                            assert master_latency is None
                            master_latency = worker.latency
                    for worker in clients:
                        worker.join()

                    summary = ",".join(
                        map(str, [
                            min(latency),
                            max(latency),
                            util.median(latency), master_latency
                        ]))
                    dbg.info("Result: %s" % summary)
                    row.append(summary)
                results.append(row)

    # tabularize
    print "Item Format: min,max,median,master"
    for row in results:
        for e in row:
            print "%s \t" % e,
        print
Beispiel #33
0
 def calcAverage(self):
     levels = []
     for each in self.players:
         levels.append(int(each.getLevel()))
     self.averageLevel = util.median(levels)
def combine(result_matrices, score_scalings, membership, iteration,
            config_params):
    """This is  the combining function, taking n result matrices and scalings"""
    quantile_normalize = config_params['quantile_normalize']

    for i, m in enumerate(result_matrices):
        m.fix_extreme_values()
        m.subtract_with_quantile(0.99)

        # debug mode: print scoring matrices before combining
        if ('dump_scores' in config_params['debug']
                and (iteration == 1 or
                     (iteration % config_params['debug_freq'] == 0))):
            funs = config_params['pipeline']['row-scoring']['args'][
                'functions']
            m.write_tsv_file(os.path.join(
                config_params['output_dir'],
                'score-%s-%04d.tsv' % (funs[i]['id'], iteration)),
                             compressed=False)

    if quantile_normalize:
        if len(result_matrices) > 1:
            start_time = util.current_millis()
            result_matrices = dm.quantile_normalize_scores(
                result_matrices, score_scalings)
            elapsed = util.current_millis() - start_time
            logging.debug("quantile normalize in %f s.", elapsed / 1000.0)

        in_matrices = [m.values for m in result_matrices]

    else:
        in_matrices = []
        num_clusters = membership.num_clusters()
        mat = result_matrices[0]
        index_map = {name: index for index, name in enumerate(mat.row_names)}
        # we assume matrix 0 is always the gene expression score
        # we also assume that the matrices are already extreme value
        # fixed
        rsm = []
        for cluster in range(1, num_clusters + 1):
            row_members = sorted(membership.rows_for_cluster(cluster))
            rsm.extend([
                mat.values[index_map[row], cluster - 1] for row in row_members
            ])
        scale = util.mad(rsm)
        if scale == 0:  # avoid that we are dividing by 0
            scale = util.r_stddev(rsm)
        if scale != 0:
            median_rsm = util.median(rsm)
            rsvalues = (mat.values - median_rsm) / scale
            num_rows, num_cols = rsvalues.shape
            rscores = dm.DataMatrix(num_rows,
                                    num_cols,
                                    mat.row_names,
                                    mat.column_names,
                                    values=rsvalues)
            rscores.fix_extreme_values()
        else:
            logging.warn("combiner scaling -> scale == 0 !!!")
            rscores = mat
        in_matrices.append(rscores.values)

        if len(result_matrices) > 1:
            rs_quant = util.quantile(rscores.values, 0.01)
            logging.debug("RS_QUANT = %f", rs_quant)
            for i in range(1, len(result_matrices)):
                values = result_matrices[i].values
                qqq = abs(util.quantile(values, 0.01))
                if qqq == 0:
                    logging.debug(
                        'SPARSE SCORES - %d attempt 1: pick from sorted values',
                        i)
                    qqq = sorted(values.ravel())[9]
                if qqq == 0:
                    logging.debug(
                        'SPARSE SCORES - %d attempt 2: pick minimum value', i)
                    qqq = abs(values.min())
                if qqq != 0:
                    values = values / qqq * abs(rs_quant)
                else:
                    logging.debug('SPARSE SCORES - %d not normalizing!', i)
                in_matrices.append(values)

    if len(result_matrices) > 0:
        start_time = util.current_millis()
        # assuming same format of all matrices
        combined_score = np.zeros(in_matrices[0].shape)
        for i in xrange(len(in_matrices)):
            combined_score += in_matrices[i] * score_scalings[i]

        elapsed = util.current_millis() - start_time
        logging.debug("combined score in %f s.", elapsed / 1000.0)
        matrix0 = result_matrices[0]  # as reference for names
        return dm.DataMatrix(matrix0.num_rows,
                             matrix0.num_columns,
                             matrix0.row_names,
                             matrix0.column_names,
                             values=combined_score)
    else:
        return None
Beispiel #35
0
def main ():
	parser = argparse.ArgumentParser()
	parser.add_argument('--host', default = 'mongodb://localhost:27017/')
	parser.add_argument('action', choices = ['insert'])
	parser.add_argument('-it', type = int, default = 2, help = "inserting thread num")
	parser.add_argument('-rt', type = int, default = 1, help = "reading thread num")
	parser.add_argument('-rmt', type = int, default = 1, help = "removing thread num")
	parser.add_argument('-db_name', default = "test_database")
	parser.add_argument('-collection_name', default = "test_collection")
	parser.add_argument('-noi', '--no-index', action = 'store_true', help = "dont create index")
	args = parser.parse_args()

	collection = connect(args)
	print collection.count(), "records in collection %s.%s" % (args.db_name, args.collection_name)

	print "dropping old indexes..."
	try:
		collection.drop_index([("timestamp", pymongo.ASCENDING)])
	except pymongo.errors.OperationFailure as e:
		if "index not found" not in str(e):
			raise 

	if not args.no_index:
		print "trying to create indexes..."
		#pymongo.ASCENDING == 1
		collection.create_index([("ev_id", pymongo.ASCENDING)])
		collection.create_index([("ev_id", pymongo.ASCENDING), ("timestamp", pymongo.ASCENDING)])
		collection.create_index([("ev_id", pymongo.ASCENDING), ("s_id", pymongo.ASCENDING), ("timestamp", pymongo.ASCENDING)])
		collection.create_index([("s_id", pymongo.ASCENDING)])

	# insert()
	if args.action == 'insert':
		accum = collections.deque()
		r_accum = collections.deque()
		rm_accum = collections.deque()

		print "starting threads:", args.it, "inserting", args.rt, "reading", args.rmt, "removing"

		for _ in range(args.it):
			collection = connect(args)

			t = threading.Thread(target = insert_loop, args = [accum, collection])
			t.daemon = True
			t.start()

		for _ in range(args.rt):
			collection = connect(args)

			t = threading.Thread(target = read_loop, args = [r_accum, collection])
			t.daemon = True
			t.start()

		for _ in range(args.rmt):
			collection = connect(args)

			t = threading.Thread(target = remove_loop, args = [rm_accum, collection])
			t.daemon = True
			t.start()

		print "starting stat loop..."

		t_start = time.time()
		total_inserts = 0
		total_reads = 0
		try:
			sleepd = 0
			while True:
				time.sleep(1 - sleepd)

				t1 = time.time()


				v = list(accum)
				accum.clear()
				ra = list(r_accum)
				r_accum.clear()

				v.sort()
				ra.sort()
				num = len(v)
				rnum = len(ra)
				total_inserts += num
				total_reads += rnum

				m = util.median(v)
				m = ("%0.5f" % m) if m is not None else None
				p95 = util.p95(v)
				p95 = ("%0.5f" % p95) if p95 is not None else None
				mx = max(v) if v else None
				mx = ("%0.5f" % mx) if mx is not None else None
				print args.it, "ithreads", num, "insert/s, med", m, "p95", p95, "max", mx, "total num", total_inserts

				m = util.median(ra)
				m = (("%0.5f" % m) if m is not None else None) or '-'
				p95 = util.p95(ra)
				p95 = (("%0.5f" % p95) if p95 is not None else None) or '-'
				mx = max(ra) if ra else None
				mx = (("%0.5f" % mx) if mx is not None else None) or '-'
				print " ", args.rt, "rthreads", rnum, "reads/s, med", m, "p95", p95, "max", mx, "total num", total_reads


				sleepd = time.time() - t1
		finally:
			print "total inserts", total_inserts, "total reads", total_reads, "took", (time.time() - t_start), "s"
Beispiel #36
0
def test_bench_paxos(metasync, opts):
    "bencmark latency of paxos with backends"

    def new_index(srv, folder, prefix):
        if services.slug(srv) == 'onedrive':
            folder = '/Public' + folder
        if not srv.exists(folder):
            return 0
        files = srv.listdir(folder)
        cnt = 0
        for fn in files:
            if fn.startswith(prefix):
                cnt += 1
        return cnt

    from paxos import PPaxosWorker

    repeat = 5
    client_num = [1, 2, 3, 4, 5]
    backend_list = [["google"], ["dropbox"], ["onedrive"], ["box"],
                    ["google", "dropbox", "onedrive"]]
    results = [['Clients'] + [','.join(x) for x in backend_list]]

    # start to test
    for num in client_num:
        for _ in range(repeat):
            row = ['%d clients' % (num)]
            for backend in backend_list:
                dbg.info('Test paxos for %d clients and %s' %
                         (num, ','.join(backend)))
                srvs = map(services.factory, backend)
                # init log file
                prefix = 'test-%d-%d' % (num, len(backend))
                index = new_index(srvs[0], '/ppaxos', prefix)
                path = '/ppaxos/%s.%d' % (prefix, index)
                dbg.info(path)
                for srv in srvs:
                    srv.init_log(path)

                clients = []
                for i in range(num):
                    storages = map(services.factory, backend)
                    worker = PPaxosWorker(storages, path)
                    clients.append(worker)
                for worker in clients:
                    worker.start()

                latency = []
                master_latency = None
                for worker in clients:
                    worker.join()
                    latency.append(worker.latency)
                    if (worker.master):
                        assert master_latency is None
                        master_latency = worker.latency
                for worker in clients:
                    worker.join()
                summary = ",".join(
                    map(str, [
                        min(latency),
                        max(latency),
                        util.median(latency), master_latency
                    ]))
                dbg.info("Result: %s" % summary)
                row.append(summary)
            results.append(row)

    # tabularize
    print "Item Format: min,max,median,master"
    for row in results:
        for e in row:
            print "%s \t" % e,
        print
Beispiel #37
0
 def calcAverage(self):
     levels = []
     for each in self.players:
         levels.append(int(each.getLevel()))
     self.averageLevel = util.median(levels)