コード例 #1
0
ファイル: clinc_util.py プロジェクト: ryland-mortlock/CLiNC
def detect_cross_tree_transitions(parent_map, violations, N):
    tree_heights = get_tree_heights(parent_map, N)

    violation_templates = {}
    for i in parent_map:
        for j in parent_map:
            if i != j:
                violation_templates[(i,j)] = get_violations(i,j,N,parent_map, tree_heights)
                
    transitions = [t for t in violation_templates.keys() if len(violation_templates[t])>0]
    templates = [violation_templates[t] for t in transitions]
    templates_union = set([])
    for template in templates: templates_union = templates_union.union(template)
    violation_order = [v for v in violations if v in templates_union]
    
    a_matrix = np.zeros((len(violation_order),len(templates)))
    for j,template in enumerate(templates):
        for i,v in enumerate(violation_order):
            a_matrix[i,j] = 1 if v in template else 0
    cost = np.array([len([t for t in template if not t in violations])/len(template) for template in templates])+1
    
    g = setcover.SetCover(a_matrix>0, cost)
    solution, time_used = g.SolveSCP()
    nz = np.nonzero(g.s)[0] 
    final_transitions = [transitions[i] for i in nz if cost[i] <= 1.5]
    
    num_violations_predicted = [len(violation_templates[t]) for t in final_transitions]
    num_violations_explained = [len(set(violation_templates[t]).intersection(violations)) for t in final_transitions]
    explained = []; predicted = []
    for t in final_transitions: 
        explained += [v for v in violations if v in violation_templates[t]]
        predicted += violation_templates[t]
    total_explained = len(set(explained))
    all_predicted = sorted(set(predicted))
    return final_transitions, num_violations_predicted, num_violations_explained, total_explained, transitions, cost, all_predicted
コード例 #2
0
ファイル: main.py プロジェクト: danielvarga/grid-slicing
def main_interactive(collected_slices, maxiters):
    shape = collected_slices.shape[1:]
    n, m = shape
    ss, cost = to_setcoverpy_input(collected_slices)

    g = setcover.SetCover(ss, cost, maxiters=maxiters)
    print("starting set cover solver")
    solution, time_used = g.SolveSCP()

    bitvec = g.s
    solution = collected_slices[bitvec, :].reshape((-1, n, m))
    np.save(open("solution.%d-%d.npy" % shape, "wb"), solution)

    agg = np.zeros((n, m), dtype=int)
    for i, s in enumerate(solution):
        print(i)
        print(pretty(s))
        agg += s.astype(int)

    print("aggregate")
    print(agg)

    lagrangian = np.array(g.u).reshape((n, m))
    np.save(open("lagrangian.%d-%d.npy" % shape, "wb"), lagrangian)
    plt.imshow(lagrangian)
    plt.savefig("vis.png")
    plt.clf()
コード例 #3
0
def select_all_specific_p_start_group_min_overlap_probes(probes_summary_info, probe_evaluation_dir, blast_lineage, target_rank, target_taxon, group_distance, max_continuous_homology, bitscore_thresh, mt_cutoff, ot_gc_cutoff):
    best_probes_group = pd.DataFrame()
    bot = 1 - 1/blast_lineage.shape[0]
    best_probes = probes_summary_info.loc[(probes_summary_info.loc[:,'blast_on_target_rate'] > bot) & (probes_summary_info.loc[:,'off_target_max_bitscore'] < bitscore_thresh) & (probes_summary_info.loc[:,'off_target_max_tm'] < mt_cutoff) & (probes_summary_info.loc[:,'off_target_max_gc'] < ot_gc_cutoff), :]
    if not best_probes.empty:
        for group in range(int(np.floor(1500/group_distance))):
            best_probes_temp = best_probes.loc[best_probes.mean_probe_start_group.values == group, :]
            if not best_probes_temp.empty:
                best_probes_temp_sorted = best_probes_temp.sort_values(['on_target_full_match', 'taxon_coverage', 'off_target_full_qcovhsp_fraction', 'off_target_max_mch', 'off_target_max_bitscore', 'off_target_max_tm', 'off_target_max_gc', 'quality'], ascending = [False, False, True, True, True, True, True, True])
                target_taxon_molecule_ids = blast_lineage.loc[blast_lineage[target_rank].values == target_taxon, 'molecule_id']
                probe_ids = best_probes_temp.probe_id.unique()
                cover_matrix = pd.DataFrame(np.zeros((len(target_taxon_molecule_ids), len(probe_ids)), dtype = int), columns = probe_ids, index = target_taxon_molecule_ids)
                cost = np.ones(len(probe_ids), dtype = int)
                for i in range(best_probes_temp_sorted.shape[0]):
                    probe_idx = best_probes_temp_sorted.probe_id.values[i]
                    probe_info = best_probes_temp_sorted.loc[best_probes_temp_sorted.probe_id.values == probe_idx,:]
                    probe_blast = pd.read_csv('{}/{}_probe_evaluation.csv.gz'.format(probe_evaluation_dir, probe_idx))
                    probe_blast = probe_blast.loc[probe_blast.mch.values >= max_continuous_homology,:]
                    blasted_molecules = list(probe_blast.molecule_id.values)
                    cover_matrix.loc[blasted_molecules, probe_idx] = 1
                cover_matrix_filtered = np.array(np.delete(cover_matrix.values, np.where(np.sum(cover_matrix.values, axis = 1) == 0)[0], axis = 0), dtype = bool)
                g = setcover.SetCover(cover_matrix_filtered, cost)
                g.SolveSCP()
                set_cover_indices = np.flatnonzero(g.s*1 == 1)
                best_probes_minoverlap = best_probes_temp.iloc[set_cover_indices,:]
                if not best_probes_minoverlap.empty:
                    best_probes_group = best_probes_group.append(best_probes_minoverlap, sort = False)
                    best_probes_group.loc[:,'selection_method'] = 'AllSpecificPStartGroupMinOverlap'
            else:
                pass
    else:
        probes_summary_info.sort_values(['blast_on_target_rate', 'taxon_coverage', 'off_target_full_qcovhsp_fraction', 'off_target_max_mch', 'off_target_max_bitscore', 'off_target_max_tm', 'off_target_max_gc', 'on_target_full_match', 'quality'], ascending = [False, False, True, True, True, True, True, False, True], inplace = True)
        best_probes_group = probes_summary_info.iloc[[0],:]
        best_probes_group.loc[:,'selection_method'] = 'AllSpecificPStartGroupMinOverlapSingleBest'
    return(best_probes_group)
コード例 #4
0
def select_min_overlap_probes(probe_summary_info, taxon_fasta_filename,
                              probe_evaluation_filename,
                              max_continuous_homology, bot):
    probe_summary_info.sort_values(
        ['blast_on_target_rate', 'taxon_coverage', 'quality'],
        ascending=[False, False, True],
        inplace=True)
    if (probe_summary_info['blast_on_target_rate'][0] > bot
            and probe_summary_info['taxon_coverage'][0] > bot):
        best_probes = probe_summary_info.iloc[[0], :]
        best_probes.loc[:, 'selection_method'] = 'MinOverlapSingleBest'
    elif probe_summary_info['blast_on_target_rate'][0] > bot:
        taxon_molecules = [
            record.id for record in SeqIO.parse(taxon_fasta_filename, 'fasta')
        ]
        taxon_molecules_set = [sub_slash(mol) for mol in set(taxon_molecules)]
        probe_summary_filtered = probe_summary_info[
            probe_summary_info['blast_on_target_rate'] > bot]
        probe_ids = probe_summary_filtered['probe_id'].unique()
        cover_matrix = np.zeros((len(taxon_molecules), len(probe_ids)),
                                dtype=int)
        cost = np.ones(len(probe_ids), dtype=int)
        for i in range(probe_summary_filtered.shape[0]):
            probe_idx = probe_summary_filtered.probe_id.values[i]
            probe_info = probe_summary_filtered[probe_summary_filtered.probe_id
                                                == probe_idx]
            probe_name = 'probe_' + str(probe_idx)
            probe_blast = pd.read_hdf(probe_evaluation_filename, probe_name)
            probe_blast = probe_blast[
                probe_blast['mch'] >= max_continuous_homology]
            blasted_molecules = list(probe_blast['molecule_id'])
            indices = [
                i for i, e in enumerate(blasted_molecules)
                if e in taxon_molecules_set
            ]
            cover_matrix[indices, i] = 1
        cover_matrix_filt = np.array(np.delete(
            cover_matrix, np.where(np.sum(cover_matrix, axis=1) == 0), axis=0),
                                     dtype=bool)
        g = setcover.SetCover(cover_matrix_filt, cost)
        g.SolveSCP()
        set_cover_indices = np.flatnonzero(g.s * 1 == 1)
        best_probes = probe_summary_filtered.iloc[set_cover_indices, :]
        if set_cover_indices.shape[0] > 1:
            best_probes.loc[:, 'selection_method'] = 'MinOverlap'
        else:
            best_probes.loc[:, 'selection_method'] = 'MinOverlapSingleBest'
    else:
        best_probes = probe_summary_info.iloc[[0], :]
        best_probes.loc[:, 'selection_method'] = 'MinOverlapSingleBest'
    return (best_probes)
コード例 #5
0
def handle_SetCoverSolver(req):

    #Convert Message into numpy array
    instance_count = req.visibility_matrix.layout.dim[0].size
    candidate_count = req.visibility_matrix.layout.dim[1].size

    print("No tris/VPs before: " + str(instance_count) + "/" + str(candidate_count))

    vismat = np.zeros((instance_count, candidate_count), dtype=bool)

    for inst in range(instance_count):
        for cand in range(candidate_count):
            # vismat[inst, cand] = req.visibility_matrix.data[inst + instance_count*cand]
            vismat[inst, cand] = req.visibility_matrix.data[inst + instance_count*cand]


    #vismattri,vp]
    #find uncovered rows (triangles)
    instances_retained = []
    print("Shape of vector: " + str(vismat[0,:].shape))
    for i in range(instance_count):
        #If at least one entry in the row is True, mark it to be retained
        if(vismat[i,:].sum() != 0):
            instances_retained.append(i)
    #Apply mask, remove uncovered rows
    vismat = vismat[instances_retained, :]

    #TO-DO: Test, remove
    # return SetCoverSolverResponse(instances_retained)

    print("No tris/VPs after: " + str(vismat.shape))

    cost = np.ones(vismat.shape[1])

    # blockPrint()
    g = setcover.SetCover(vismat, cost)
    solution, time_used = g.SolveSCP()
    # enablePrint()

    solution_entries = []
    # print("Sizes Instances retained, g.s: " + str(len(instances_retained)) + "/" + str(len(g.s)))

    for i in range(len(g.s)):  #TO-DO: -1 correct?
        if(g.s[i] == True):
            solution_entries.append(i)

    return SetCoverSolverResponse(solution_entries)
コード例 #6
0
    def get_archetypes(self, chi2_thresh=0.1, responsibility=False):
        """Solve the SCP problem to get the final set of archetypes and, optionally,
        their responsibility.

        Note: We assume that each template has uniform "cost" but a more general
        model in principle could be used / implemented.

        Parameters
        ----------
        chi2 : numpy.ndarray
            Chi^2 matrix computed by archetypes.compute_chi2().
        chi2_thresh : float
            Threshold chi2 value to differentiate "different" templates.
        responsibility : bool
            If True, then compute and return the responsibility of each archetype.

        Returns
        -------
        If responsibility==True then returns a tuple of (iarch, resp, respindx) where:
            iarch : integer numpy.array
                Indices of the archetypes [N].
            resp : integer numpy.array
                Responsibility of each archetype [N].
            respindx : list of
                Indices the parent sample each archetype is responsible for [N].

        If responsibility==False then only iarch is returned.

        """
        from SetCoverPy import setcover

        nspec = self.chi2[0].shape
        cost = np.ones(nspec)  # uniform cost

        a_matrix = (self.chi2 <= chi2_thresh) * 1
        gg = setcover.SetCover(a_matrix, cost)
        sol, time = gg.SolveSCP()

        iarch = np.nonzero(gg.s)[0]
        if responsibility:
            resp, respindx = self.responsibility(iarch, a_matrix)
            return iarch, resp, respindx
        else:
            return iarch
コード例 #7
0
def get_set_cover(occurrences):
    """Solves the Minimum Set Cover problem with n-grams as collections and
    documents as objects

    Parameters
    ----------
    occurrences : list
        The collections of documents

    Returns
    -------
    list
        the n-grams having a documents coverage greater than the threshold
    """
    pmids = []
    t = 0
    for gram in occurrences:
        for pmid in gram[3]:
            if not pmid in pmids:
                pmids.append(pmid)

    ncols = len(occurrences)
    mrows = len(pmids)

    relationship_matrix = np.zeros(shape=(mrows, ncols), dtype=bool)
    cost = np.ones(ncols)

    for row in range(mrows):
        for col in range(ncols):
            if pmids[row] in occurrences[col][3]:
                relationship_matrix[row, col] = True

    g = setcover.SetCover(relationship_matrix, cost)
    display.disable_print()
    solution, time_used = g.SolveSCP()
    display.enable_print()

    cover = []
    for i in range(ncols):
        if g.s[i]:
            cover.append(occurrences[i])

    return cover
def main():
    universe = set(range(1, 11))
    subsets = [set([1, 2, 3, 8, 9, 10]),
               set([1, 2, 3, 4, 5]),
               set([4, 5, 7]),
               set([5, 6, 7]),
               set([6, 7, 8, 9, 10])]

    M = len(universe)
    N = len(subsets)

    a_mat = np.zeros((M, N), dtype=np.bool)
    for s_i, seta in enumerate(subsets):
        for set_v in seta:
            a_mat[set_v-1, s_i] = True
    cost = np.ones(N)/5

    g = setcover.SetCover(a_mat, cost, maxiters=50)
    solution, time_used = g.SolveSCP()
    print(g.s)
コード例 #9
0
ファイル: main.py プロジェクト: danielvarga/grid-slicing
def main_batch(collected_slices, maxiters):
    shape = collected_slices.shape[1:]
    n, m = shape
    assert n == m
    ss, cost = to_setcoverpy_input(collected_slices)
    found = 0
    for i in range(10000):
        g = setcover.SetCover(ss, cost, maxiters=maxiters)
        solution, time_used = g.SolveSCP()
        bitvec = g.s
        solution = collected_slices[bitvec, :]
        if len(solution) < n:  # nontrivial solution
            found += 1
            filename = "solution.%d.%05d.npy" % (
                n, abs(hash(totuple(solution))) % 100000)
            print("found %s. nontrivial solution, saving it to %s" %
                  (found + 1, filename))
            with open(filename, "wb") as f:
                np.save(f, solution)
        print("%d. set cover restart, %d solutions so far." % (i, found))
コード例 #10
0
    A[i, :] = A_tmp
    tmpchi2[i, :] = chi2_tmp

chi2 = tmpchi2 / (iuse.size - 1)  # reduced chi2

#pcs = n.array([1,10,25,50,75,90,99])
#pcs = n.array([16,17,18,19,20,21,22, 23, 24])
#scrs = scoreatpercentile(n.ravel(chi2), [16,17,18,19,20,21,22, 23, 24])

#scr = scoreatpercentile(n.ravel(chi2), 20 )
scr = scoreatpercentile(n.ravel(chi2), 10)

chi2_min = scr  # 0.05 # the minimum distance, the only free paramter
a_matrix = chi2 < chi2_min  # relationship matrix
cost = n.ones(iuse.size)
g = setcover.SetCover(a_matrix, cost)
# I'm using greedy just for demonstration
# g.greedy()
# SolveSCP() should be used to generate near-optimal solution
g.SolveSCP()
# These are the archetypes
iarchetype = n.nonzero(g.s)[0]
print(scr, len(iarchetype), time.time() - t0, 's')

n_rep = n.sum(a_matrix[:, iarchetype],
              axis=0)  # how many covered by the archetype?
isort = n.argsort(n_rep)[::-1]

tmpmedian = n.zeros((iarchetype.size, masterwave.size))

# These are the archetypal composites we want to use as the initial guess
コード例 #11
0
def find_min_set_cover(boundary_map, kernel=5):

    H, W = boundary_map.shape
    radius = int(kernel / 2)

    # count the boundary cells
    n_boundaries = np.sum(boundary_map == -1)
    boundary2idx = dict()
    idx2bounary = dict()
    boundary_cell_idx = 0
    for h in range(0, H):
        for w in range(0, W):
            if boundary_map[h, w] == -1:
                key = '%d-%d' % (h, w)
                boundary2idx[key] = boundary_cell_idx
                idx2bounary[boundary_cell_idx] = (h, w)
                boundary_cell_idx += 1

    search_cell_offset = dict()
    search_cell_offset['left'] = distance_search(kernel, 'left')
    search_cell_offset['right'] = distance_search(kernel, 'right')
    search_cell_offset['top'] = distance_search(kernel, 'top')
    search_cell_offset['btm'] = distance_search(kernel, 'btm')

    candidates_covers = []
    ignored_b_cells = dict()
    max_bd_cells = 0
    for h in range(0, H):
        for w in range(0, W):
            if boundary_map[h, w] != 0:
                continue

            if h == 2 and w == 3:
                debug = True
                print('A')
            else:
                debug = False

            cover = None
            near_dist = False
            long_dist = False

            for search_direction in ['left', 'right', 'top', 'btm']:
                search_levels = search_cell_offset[search_direction]
                found_sub_sum = 0
                for l, search_cell_offset_l in enumerate(search_levels):

                    for offset in search_cell_offset_l:
                        cell_h = int(h + offset[0])
                        cell_w = int(w + offset[1])

                        if 0 <= cell_h < H and 0 <= cell_w < W and boundary_map[
                                cell_h, cell_w] == -1:

                            if cover is None:
                                cover = dict()
                                cover['loc'] = (h, w)
                                cover['b_covers'] = []
                                cover['dir'] = [search_direction]

                            if search_direction not in cover['dir']:
                                cover['dir'].append(search_direction)

                            found_sub_sum += 1
                            b_key = '%d-%d' % (cell_h, cell_w)
                            b_id = boundary2idx[b_key]
                            if b_id not in ignored_b_cells:
                                ignored_b_cells[b_id] = b_key

                            if l == 0:
                                near_dist = True
                            elif l == len(search_levels) - 1:
                                long_dist = True

                            if b_id not in cover['b_covers']:
                                cover['b_covers'].append(b_id)
                                if debug:
                                    print(b_key, b_id)

                    if found_sub_sum == len(search_cell_offset_l):
                        break

            if cover is not None:
                cover['prior_cost'] = 1.0
                if long_dist is True and near_dist is False:
                    cover['prior_cost'] = 0.01
                elif long_dist is True and near_dist is True:
                    cover['prior_cost'] = 0.6
                elif long_dist is False and near_dist is True:
                    cover['prior_cost'] = 1.0

                candidates_covers.append(cover)

                if len(cover['b_covers']) > max_bd_cells:
                    max_bd_cells = len(cover['b_covers'])

    # build the mat and costs
    a_mat = np.zeros((n_boundaries, len(candidates_covers)), dtype=np.bool)
    a_cost = np.zeros(len(candidates_covers))

    for c_i, cover in enumerate(candidates_covers):
        cover_set = cover['b_covers']
        for b_id in cover_set:
            a_mat[b_id, c_i] = True

        ratio = 1.0 - len(cover_set) / max_bd_cells
        a_cost[c_i] = ratio + cover['prior_cost']

    a_cost = np.clip(a_cost, a_min=0.01, a_max=np.max(a_cost))
    a_cost = a_cost / np.linalg.norm(a_cost)

    # run min set
    g = setcover.SetCover(a_mat, a_cost, maxiters=50)
    solution, time_used = g.SolveSCP()

    res = []
    res_dirs = []
    res_covers = []
    for i in range(g.s.shape[0]):
        res_g = g.s[i]
        if res_g == True:
            res.append(candidates_covers[i]['loc'])
            res_dirs.append(candidates_covers[i]['dir'])

            covers = candidates_covers[i]['b_covers']
            covers_loc = [idx2bounary[i] for i in covers]
            res_covers.append(covers_loc)

    return res, res_dirs, res_covers
コード例 #12
0
ファイル: solver.py プロジェクト: Urup93/Speciale

path = 'rail582.txt'
print('Loading data from ', path)
adj, cost = read_as_adj(path)

#print('read adj mat: ', '\n', adj, '\n')
#print('read cost: ', '\n', cost)

# cplex = cplexSolver()
# time = datetime.now()
# cplex_sol = cplex.solve(adj)
# cplex_time = datetime.now()-time

print('Running set cover py')
scppy = setcover.SetCover(adj, cost)
time = datetime.now()
scppy_solution, time_used = scppy.SolveSCP()
scppy_time = datetime.now()-time
print('SCP py time: ', scppy_time)
print('SCP solution: ', scppy_solution)


print('Running greedy solver')
gs = greedySolver()
time = datetime.now()
greedy_solution, greedy_cost = gs.solve(adj, cost, False)
greedy_time = datetime.now()-time
print('Greedy time: ', greedy_time)
print('Greedy cost: ', greedy_cost)
コード例 #13
0
        time_construct_set_cover_start = time.time()
        set_cover_instance_animal = compute_set_cover_instance(
            animal_sample, starts, ends, dist_tol)
        print "set cover instance built"  #, set_cover_instance_animal[1]
        universe = set([e for s in set_cover_instance_animal[1] for e in s])
        #universe_complete = set(range(0, len(animal_sample)))
        # build covering matrix
        matrix = []
        for computed_set in set_cover_instance_animal[1]:
            row = [entry in computed_set for entry in universe]
            matrix.append(row)
        covering_matrix = np.matrix(matrix).transpose()
        costs = np.array(set_cover_instance_animal[2])

        solution_animal = setcover.SetCover(covering_matrix,
                                            costs,
                                            maxiters=100)
        solution_animal.SolveSCP()
        group_diagram_animal = [
            set_cover_instance_animal[0][i]
            for i in range(0, len(solution_animal.s)) if solution_animal.s[i]
        ]
        group_diagram_animal = SortedList(group_diagram_animal)
        print "group diagram computed", group_diagram_animal

        # gmap = gmplot.GoogleMapPlotter(55, 9, 4)
        kml = simplekml.Kml()
        #
        for start, end in zip(starts, ends):
            # if end == ends[-1][-1]:
            #     end_plot = end
コード例 #14
0
def make_archetype(stack, file_input, percentile=20, sn_min = 1.5):
	print('starts archetype for',file_input, time.time()-t0)
	try:
		out_name = 'archetype_'+os.path.basename(stack[file_input].out_file)+'_snMin_'+str(sn_min)+'_percentile_'+str(percentile)
		allflux  = n.loadtxt(stack[file_input].out_file+'.specMatrix.dat'      , unpack=True )      #, specMatrix)
		allsig   = n.loadtxt(stack[file_input].out_file+'.specMatrixErr.dat'   , unpack=True )#, specMatrixErr)
		allisig  = allsig**(-1)
		masterwave = stack[file_input].wave 
		tmploglam = n.log10(masterwave)
		N_wave = len(tmploglam)
		N_spectra = allflux.shape[1]
		# filter data
		median_sn = n.array([ n.median( (flux_el/sig_el)[((sig_el==9999)==False)] ) for flux_el, sig_el in zip(allflux.T, allsig.T) ])
		#median_sn = n.median( SNR[i][nodata[i]==False], axis=0)
		print(median_sn)
		iuse = n.where( median_sn>sn_min)[0]
		print('iuse',iuse)
		#
		tmpchi2 = n.zeros((iuse.size, iuse.size))
		A = n.zeros((iuse.size, iuse.size))
		print("creates the matrix", time.time()-t0, 's', allflux.shape)
		tmp_yerr = 1./allisig[:, iuse].T.reshape(iuse.size, masterwave.size)
		tmp_y = allflux[:,iuse].T
		for i in n.arange(iuse.size):
			#print(i)
			tmp_x = allflux[:, iuse[i]].T.reshape(1,masterwave.size)
			tmp_xerr = 1./allisig[:, iuse[i]].T.reshape(1,masterwave.size)
			#print(tmp_x, tmp_y, tmp_xerr, tmp_yerr)
			A_tmp, chi2_tmp = mathutils.quick_amplitude(tmp_x, tmp_y, tmp_xerr, tmp_yerr)
			A[i,:] = A_tmp
			tmpchi2[i,:] = chi2_tmp
			
		chi2 = tmpchi2/(iuse.size-1) # reduced chi2
		print('chi2', chi2)
		#pcs = n.array([1,10,25,50,75,90,99])
		#pcs = n.array([16,17,18,19,20,21,22, 23, 24])
		scrs = scoreatpercentile(n.ravel(chi2), [5, 15, 25, 35, 45, 55, 65, 75, 85, 95])#0,16,17,18,19,20,21,22, 23, 24])
		print('chi2 distribution 5,15,25,..95%', scrs)
		chi2_min = scoreatpercentile(n.ravel(chi2), percentile )
		#print('minimum distance chi2_min', chi2_min) # 0.05 # the minimum distance, the only free paramter
		a_matrix = chi2<chi2_min # relationship matrix
		cost = n.ones(iuse.size)
		g = setcover.SetCover(a_matrix, cost)
		# I'm using greedy just for demonstration
		# g.greedy()
		# SolveSCP() should be used to generate near-optimal solution
		g.SolveSCP()
		# These are the archetypes
		iarchetype = n.nonzero(g.s)[0]
		print( chi2_min, len(iarchetype), time.time()-t0, 's')

		# sort archeypes against the number of spectra they represent
		n_rep = n.sum(a_matrix[:, iarchetype], axis=0) # how many covered by the archetype?
		isort = n.argsort(n_rep)[::-1]
		print(n_rep)
		archetype_median = n.zeros((iarchetype.size, masterwave.size))

		# These are the archetypal composites we want to use as the initial guess
		for i in n.arange(iarchetype.size):
			itmp = a_matrix[:, iarchetype[i]] # These are the instances represented by the archetype
			for j in n.arange(masterwave.size):
				thisflux = allflux[j,iuse[itmp]]
				archetype_median[i, j] = n.median(thisflux[thisflux!=0]) # Only use the objects that have this wavelength covered

		#from _pickle import cPickle
		#cPickle.dum

		imax = n.max(isort)
		print('imax', imax)
		p.clf()
		fig = p.figure(figsize=(10,imax*5))
		fig.subplots_adjust(hspace=0)
		p.title(out_name)
		for i in n.arange(0,imax,1):
			ax = fig.add_subplot(imax,1,i+1)
			ax.plot(masterwave[::3], archetype_median[isort[i],:][::3] , label = 'nRep=' + str(n_rep[isort[i]]) )
			ax.set_xlim(masterwave[10], masterwave[-10])
			#ax.set_ylim(-0.1, 3)
			#ax.set_xticks([])
			ax.axvline(1215, color='b', ls='dashed', label='1215 Lya')
			ax.axvline(1546, color='c', ls='dashed', label='1546 CIV')
			ax.axvline(2800, color='m', ls='dashed', label='2800 MgII')
			ax.axvline(3727, color='g', ls='dashed', label='3727 [OII]')
			ax.axvline(5007, color='r', ls='dashed', label='5007 [OIII]')
			ax.axvline(6565, color='k', ls='dashed', label='6565 Ha')
			print(i, n.count_nonzero(a_matrix[:,iarchetype[isort[i]]]))
			ax.grid()
			ax.legend(frameon=False, loc=0)

		p.xlabel('Angstrom')
		p.tight_layout()
		p.savefig( os.path.join(archetype_dir, "figure_archetypes_"+out_name +".png") )
		p.clf()

		n.savetxt(os.path.join(archetype_dir, "archetypes_"+out_name+".txt")   , n.vstack((masterwave, archetype_median)))
		
		f = open(os.path.join(archetype_dir, "index_archetypes_"+out_name+".pkl"), 'wb')
		obj = ObjIds(imax, iuse, n_rep)
		pickle.dump(obj, f)
		f.close()
	except(ValueError):
		print('ValueError')
コード例 #15
0
                else:
                    for i in range( len(sigma1_2)-len(sigma2) ):
                        sigma2 = np.append(sigma2, 1e9)
                        data2 = np.append(data2, 0)
                variance = (1/sigma1_2) + (1/sigma2)
                '''
                distmat[i, j] = distmat[j][i] = 1e9
            # calculate reduced chi2 as distance
            #if distmat[i][j] < mindist:
            #binmat[i][j] = 1
    print " "
    binmat = distmat < mindist
    cost = np.ones(binmat.shape[0])
    #cost = 1 / hdu

    g = setcover.SetCover(binmat, cost)
    #g.greedy()
    g.SolveSCP()

    # Get the archetype indices
    iarchetype = np.nonzero(g.s)[0]
    #print "Number of archetypes: %s\n" % iarchetype.shape

    # How many are covered by each archetype?
    n_rep = np.sum(binmat[:, iarchetype], axis=0)
    for i, arch in enumerate(iarchetype):
        if n_rep[i] == 1:
            pass
            #print "Archtype #%s represents %s spectra." % (arch, n_rep[i])
        else:
            pass