def detect_cross_tree_transitions(parent_map, violations, N): tree_heights = get_tree_heights(parent_map, N) violation_templates = {} for i in parent_map: for j in parent_map: if i != j: violation_templates[(i,j)] = get_violations(i,j,N,parent_map, tree_heights) transitions = [t for t in violation_templates.keys() if len(violation_templates[t])>0] templates = [violation_templates[t] for t in transitions] templates_union = set([]) for template in templates: templates_union = templates_union.union(template) violation_order = [v for v in violations if v in templates_union] a_matrix = np.zeros((len(violation_order),len(templates))) for j,template in enumerate(templates): for i,v in enumerate(violation_order): a_matrix[i,j] = 1 if v in template else 0 cost = np.array([len([t for t in template if not t in violations])/len(template) for template in templates])+1 g = setcover.SetCover(a_matrix>0, cost) solution, time_used = g.SolveSCP() nz = np.nonzero(g.s)[0] final_transitions = [transitions[i] for i in nz if cost[i] <= 1.5] num_violations_predicted = [len(violation_templates[t]) for t in final_transitions] num_violations_explained = [len(set(violation_templates[t]).intersection(violations)) for t in final_transitions] explained = []; predicted = [] for t in final_transitions: explained += [v for v in violations if v in violation_templates[t]] predicted += violation_templates[t] total_explained = len(set(explained)) all_predicted = sorted(set(predicted)) return final_transitions, num_violations_predicted, num_violations_explained, total_explained, transitions, cost, all_predicted
def main_interactive(collected_slices, maxiters): shape = collected_slices.shape[1:] n, m = shape ss, cost = to_setcoverpy_input(collected_slices) g = setcover.SetCover(ss, cost, maxiters=maxiters) print("starting set cover solver") solution, time_used = g.SolveSCP() bitvec = g.s solution = collected_slices[bitvec, :].reshape((-1, n, m)) np.save(open("solution.%d-%d.npy" % shape, "wb"), solution) agg = np.zeros((n, m), dtype=int) for i, s in enumerate(solution): print(i) print(pretty(s)) agg += s.astype(int) print("aggregate") print(agg) lagrangian = np.array(g.u).reshape((n, m)) np.save(open("lagrangian.%d-%d.npy" % shape, "wb"), lagrangian) plt.imshow(lagrangian) plt.savefig("vis.png") plt.clf()
def select_all_specific_p_start_group_min_overlap_probes(probes_summary_info, probe_evaluation_dir, blast_lineage, target_rank, target_taxon, group_distance, max_continuous_homology, bitscore_thresh, mt_cutoff, ot_gc_cutoff): best_probes_group = pd.DataFrame() bot = 1 - 1/blast_lineage.shape[0] best_probes = probes_summary_info.loc[(probes_summary_info.loc[:,'blast_on_target_rate'] > bot) & (probes_summary_info.loc[:,'off_target_max_bitscore'] < bitscore_thresh) & (probes_summary_info.loc[:,'off_target_max_tm'] < mt_cutoff) & (probes_summary_info.loc[:,'off_target_max_gc'] < ot_gc_cutoff), :] if not best_probes.empty: for group in range(int(np.floor(1500/group_distance))): best_probes_temp = best_probes.loc[best_probes.mean_probe_start_group.values == group, :] if not best_probes_temp.empty: best_probes_temp_sorted = best_probes_temp.sort_values(['on_target_full_match', 'taxon_coverage', 'off_target_full_qcovhsp_fraction', 'off_target_max_mch', 'off_target_max_bitscore', 'off_target_max_tm', 'off_target_max_gc', 'quality'], ascending = [False, False, True, True, True, True, True, True]) target_taxon_molecule_ids = blast_lineage.loc[blast_lineage[target_rank].values == target_taxon, 'molecule_id'] probe_ids = best_probes_temp.probe_id.unique() cover_matrix = pd.DataFrame(np.zeros((len(target_taxon_molecule_ids), len(probe_ids)), dtype = int), columns = probe_ids, index = target_taxon_molecule_ids) cost = np.ones(len(probe_ids), dtype = int) for i in range(best_probes_temp_sorted.shape[0]): probe_idx = best_probes_temp_sorted.probe_id.values[i] probe_info = best_probes_temp_sorted.loc[best_probes_temp_sorted.probe_id.values == probe_idx,:] probe_blast = pd.read_csv('{}/{}_probe_evaluation.csv.gz'.format(probe_evaluation_dir, probe_idx)) probe_blast = probe_blast.loc[probe_blast.mch.values >= max_continuous_homology,:] blasted_molecules = list(probe_blast.molecule_id.values) cover_matrix.loc[blasted_molecules, probe_idx] = 1 cover_matrix_filtered = np.array(np.delete(cover_matrix.values, np.where(np.sum(cover_matrix.values, axis = 1) == 0)[0], axis = 0), dtype = bool) g = setcover.SetCover(cover_matrix_filtered, cost) g.SolveSCP() set_cover_indices = np.flatnonzero(g.s*1 == 1) best_probes_minoverlap = best_probes_temp.iloc[set_cover_indices,:] if not best_probes_minoverlap.empty: best_probes_group = best_probes_group.append(best_probes_minoverlap, sort = False) best_probes_group.loc[:,'selection_method'] = 'AllSpecificPStartGroupMinOverlap' else: pass else: probes_summary_info.sort_values(['blast_on_target_rate', 'taxon_coverage', 'off_target_full_qcovhsp_fraction', 'off_target_max_mch', 'off_target_max_bitscore', 'off_target_max_tm', 'off_target_max_gc', 'on_target_full_match', 'quality'], ascending = [False, False, True, True, True, True, True, False, True], inplace = True) best_probes_group = probes_summary_info.iloc[[0],:] best_probes_group.loc[:,'selection_method'] = 'AllSpecificPStartGroupMinOverlapSingleBest' return(best_probes_group)
def select_min_overlap_probes(probe_summary_info, taxon_fasta_filename, probe_evaluation_filename, max_continuous_homology, bot): probe_summary_info.sort_values( ['blast_on_target_rate', 'taxon_coverage', 'quality'], ascending=[False, False, True], inplace=True) if (probe_summary_info['blast_on_target_rate'][0] > bot and probe_summary_info['taxon_coverage'][0] > bot): best_probes = probe_summary_info.iloc[[0], :] best_probes.loc[:, 'selection_method'] = 'MinOverlapSingleBest' elif probe_summary_info['blast_on_target_rate'][0] > bot: taxon_molecules = [ record.id for record in SeqIO.parse(taxon_fasta_filename, 'fasta') ] taxon_molecules_set = [sub_slash(mol) for mol in set(taxon_molecules)] probe_summary_filtered = probe_summary_info[ probe_summary_info['blast_on_target_rate'] > bot] probe_ids = probe_summary_filtered['probe_id'].unique() cover_matrix = np.zeros((len(taxon_molecules), len(probe_ids)), dtype=int) cost = np.ones(len(probe_ids), dtype=int) for i in range(probe_summary_filtered.shape[0]): probe_idx = probe_summary_filtered.probe_id.values[i] probe_info = probe_summary_filtered[probe_summary_filtered.probe_id == probe_idx] probe_name = 'probe_' + str(probe_idx) probe_blast = pd.read_hdf(probe_evaluation_filename, probe_name) probe_blast = probe_blast[ probe_blast['mch'] >= max_continuous_homology] blasted_molecules = list(probe_blast['molecule_id']) indices = [ i for i, e in enumerate(blasted_molecules) if e in taxon_molecules_set ] cover_matrix[indices, i] = 1 cover_matrix_filt = np.array(np.delete( cover_matrix, np.where(np.sum(cover_matrix, axis=1) == 0), axis=0), dtype=bool) g = setcover.SetCover(cover_matrix_filt, cost) g.SolveSCP() set_cover_indices = np.flatnonzero(g.s * 1 == 1) best_probes = probe_summary_filtered.iloc[set_cover_indices, :] if set_cover_indices.shape[0] > 1: best_probes.loc[:, 'selection_method'] = 'MinOverlap' else: best_probes.loc[:, 'selection_method'] = 'MinOverlapSingleBest' else: best_probes = probe_summary_info.iloc[[0], :] best_probes.loc[:, 'selection_method'] = 'MinOverlapSingleBest' return (best_probes)
def handle_SetCoverSolver(req): #Convert Message into numpy array instance_count = req.visibility_matrix.layout.dim[0].size candidate_count = req.visibility_matrix.layout.dim[1].size print("No tris/VPs before: " + str(instance_count) + "/" + str(candidate_count)) vismat = np.zeros((instance_count, candidate_count), dtype=bool) for inst in range(instance_count): for cand in range(candidate_count): # vismat[inst, cand] = req.visibility_matrix.data[inst + instance_count*cand] vismat[inst, cand] = req.visibility_matrix.data[inst + instance_count*cand] #vismattri,vp] #find uncovered rows (triangles) instances_retained = [] print("Shape of vector: " + str(vismat[0,:].shape)) for i in range(instance_count): #If at least one entry in the row is True, mark it to be retained if(vismat[i,:].sum() != 0): instances_retained.append(i) #Apply mask, remove uncovered rows vismat = vismat[instances_retained, :] #TO-DO: Test, remove # return SetCoverSolverResponse(instances_retained) print("No tris/VPs after: " + str(vismat.shape)) cost = np.ones(vismat.shape[1]) # blockPrint() g = setcover.SetCover(vismat, cost) solution, time_used = g.SolveSCP() # enablePrint() solution_entries = [] # print("Sizes Instances retained, g.s: " + str(len(instances_retained)) + "/" + str(len(g.s))) for i in range(len(g.s)): #TO-DO: -1 correct? if(g.s[i] == True): solution_entries.append(i) return SetCoverSolverResponse(solution_entries)
def get_archetypes(self, chi2_thresh=0.1, responsibility=False): """Solve the SCP problem to get the final set of archetypes and, optionally, their responsibility. Note: We assume that each template has uniform "cost" but a more general model in principle could be used / implemented. Parameters ---------- chi2 : numpy.ndarray Chi^2 matrix computed by archetypes.compute_chi2(). chi2_thresh : float Threshold chi2 value to differentiate "different" templates. responsibility : bool If True, then compute and return the responsibility of each archetype. Returns ------- If responsibility==True then returns a tuple of (iarch, resp, respindx) where: iarch : integer numpy.array Indices of the archetypes [N]. resp : integer numpy.array Responsibility of each archetype [N]. respindx : list of Indices the parent sample each archetype is responsible for [N]. If responsibility==False then only iarch is returned. """ from SetCoverPy import setcover nspec = self.chi2[0].shape cost = np.ones(nspec) # uniform cost a_matrix = (self.chi2 <= chi2_thresh) * 1 gg = setcover.SetCover(a_matrix, cost) sol, time = gg.SolveSCP() iarch = np.nonzero(gg.s)[0] if responsibility: resp, respindx = self.responsibility(iarch, a_matrix) return iarch, resp, respindx else: return iarch
def get_set_cover(occurrences): """Solves the Minimum Set Cover problem with n-grams as collections and documents as objects Parameters ---------- occurrences : list The collections of documents Returns ------- list the n-grams having a documents coverage greater than the threshold """ pmids = [] t = 0 for gram in occurrences: for pmid in gram[3]: if not pmid in pmids: pmids.append(pmid) ncols = len(occurrences) mrows = len(pmids) relationship_matrix = np.zeros(shape=(mrows, ncols), dtype=bool) cost = np.ones(ncols) for row in range(mrows): for col in range(ncols): if pmids[row] in occurrences[col][3]: relationship_matrix[row, col] = True g = setcover.SetCover(relationship_matrix, cost) display.disable_print() solution, time_used = g.SolveSCP() display.enable_print() cover = [] for i in range(ncols): if g.s[i]: cover.append(occurrences[i]) return cover
def main(): universe = set(range(1, 11)) subsets = [set([1, 2, 3, 8, 9, 10]), set([1, 2, 3, 4, 5]), set([4, 5, 7]), set([5, 6, 7]), set([6, 7, 8, 9, 10])] M = len(universe) N = len(subsets) a_mat = np.zeros((M, N), dtype=np.bool) for s_i, seta in enumerate(subsets): for set_v in seta: a_mat[set_v-1, s_i] = True cost = np.ones(N)/5 g = setcover.SetCover(a_mat, cost, maxiters=50) solution, time_used = g.SolveSCP() print(g.s)
def main_batch(collected_slices, maxiters): shape = collected_slices.shape[1:] n, m = shape assert n == m ss, cost = to_setcoverpy_input(collected_slices) found = 0 for i in range(10000): g = setcover.SetCover(ss, cost, maxiters=maxiters) solution, time_used = g.SolveSCP() bitvec = g.s solution = collected_slices[bitvec, :] if len(solution) < n: # nontrivial solution found += 1 filename = "solution.%d.%05d.npy" % ( n, abs(hash(totuple(solution))) % 100000) print("found %s. nontrivial solution, saving it to %s" % (found + 1, filename)) with open(filename, "wb") as f: np.save(f, solution) print("%d. set cover restart, %d solutions so far." % (i, found))
A[i, :] = A_tmp tmpchi2[i, :] = chi2_tmp chi2 = tmpchi2 / (iuse.size - 1) # reduced chi2 #pcs = n.array([1,10,25,50,75,90,99]) #pcs = n.array([16,17,18,19,20,21,22, 23, 24]) #scrs = scoreatpercentile(n.ravel(chi2), [16,17,18,19,20,21,22, 23, 24]) #scr = scoreatpercentile(n.ravel(chi2), 20 ) scr = scoreatpercentile(n.ravel(chi2), 10) chi2_min = scr # 0.05 # the minimum distance, the only free paramter a_matrix = chi2 < chi2_min # relationship matrix cost = n.ones(iuse.size) g = setcover.SetCover(a_matrix, cost) # I'm using greedy just for demonstration # g.greedy() # SolveSCP() should be used to generate near-optimal solution g.SolveSCP() # These are the archetypes iarchetype = n.nonzero(g.s)[0] print(scr, len(iarchetype), time.time() - t0, 's') n_rep = n.sum(a_matrix[:, iarchetype], axis=0) # how many covered by the archetype? isort = n.argsort(n_rep)[::-1] tmpmedian = n.zeros((iarchetype.size, masterwave.size)) # These are the archetypal composites we want to use as the initial guess
def find_min_set_cover(boundary_map, kernel=5): H, W = boundary_map.shape radius = int(kernel / 2) # count the boundary cells n_boundaries = np.sum(boundary_map == -1) boundary2idx = dict() idx2bounary = dict() boundary_cell_idx = 0 for h in range(0, H): for w in range(0, W): if boundary_map[h, w] == -1: key = '%d-%d' % (h, w) boundary2idx[key] = boundary_cell_idx idx2bounary[boundary_cell_idx] = (h, w) boundary_cell_idx += 1 search_cell_offset = dict() search_cell_offset['left'] = distance_search(kernel, 'left') search_cell_offset['right'] = distance_search(kernel, 'right') search_cell_offset['top'] = distance_search(kernel, 'top') search_cell_offset['btm'] = distance_search(kernel, 'btm') candidates_covers = [] ignored_b_cells = dict() max_bd_cells = 0 for h in range(0, H): for w in range(0, W): if boundary_map[h, w] != 0: continue if h == 2 and w == 3: debug = True print('A') else: debug = False cover = None near_dist = False long_dist = False for search_direction in ['left', 'right', 'top', 'btm']: search_levels = search_cell_offset[search_direction] found_sub_sum = 0 for l, search_cell_offset_l in enumerate(search_levels): for offset in search_cell_offset_l: cell_h = int(h + offset[0]) cell_w = int(w + offset[1]) if 0 <= cell_h < H and 0 <= cell_w < W and boundary_map[ cell_h, cell_w] == -1: if cover is None: cover = dict() cover['loc'] = (h, w) cover['b_covers'] = [] cover['dir'] = [search_direction] if search_direction not in cover['dir']: cover['dir'].append(search_direction) found_sub_sum += 1 b_key = '%d-%d' % (cell_h, cell_w) b_id = boundary2idx[b_key] if b_id not in ignored_b_cells: ignored_b_cells[b_id] = b_key if l == 0: near_dist = True elif l == len(search_levels) - 1: long_dist = True if b_id not in cover['b_covers']: cover['b_covers'].append(b_id) if debug: print(b_key, b_id) if found_sub_sum == len(search_cell_offset_l): break if cover is not None: cover['prior_cost'] = 1.0 if long_dist is True and near_dist is False: cover['prior_cost'] = 0.01 elif long_dist is True and near_dist is True: cover['prior_cost'] = 0.6 elif long_dist is False and near_dist is True: cover['prior_cost'] = 1.0 candidates_covers.append(cover) if len(cover['b_covers']) > max_bd_cells: max_bd_cells = len(cover['b_covers']) # build the mat and costs a_mat = np.zeros((n_boundaries, len(candidates_covers)), dtype=np.bool) a_cost = np.zeros(len(candidates_covers)) for c_i, cover in enumerate(candidates_covers): cover_set = cover['b_covers'] for b_id in cover_set: a_mat[b_id, c_i] = True ratio = 1.0 - len(cover_set) / max_bd_cells a_cost[c_i] = ratio + cover['prior_cost'] a_cost = np.clip(a_cost, a_min=0.01, a_max=np.max(a_cost)) a_cost = a_cost / np.linalg.norm(a_cost) # run min set g = setcover.SetCover(a_mat, a_cost, maxiters=50) solution, time_used = g.SolveSCP() res = [] res_dirs = [] res_covers = [] for i in range(g.s.shape[0]): res_g = g.s[i] if res_g == True: res.append(candidates_covers[i]['loc']) res_dirs.append(candidates_covers[i]['dir']) covers = candidates_covers[i]['b_covers'] covers_loc = [idx2bounary[i] for i in covers] res_covers.append(covers_loc) return res, res_dirs, res_covers
path = 'rail582.txt' print('Loading data from ', path) adj, cost = read_as_adj(path) #print('read adj mat: ', '\n', adj, '\n') #print('read cost: ', '\n', cost) # cplex = cplexSolver() # time = datetime.now() # cplex_sol = cplex.solve(adj) # cplex_time = datetime.now()-time print('Running set cover py') scppy = setcover.SetCover(adj, cost) time = datetime.now() scppy_solution, time_used = scppy.SolveSCP() scppy_time = datetime.now()-time print('SCP py time: ', scppy_time) print('SCP solution: ', scppy_solution) print('Running greedy solver') gs = greedySolver() time = datetime.now() greedy_solution, greedy_cost = gs.solve(adj, cost, False) greedy_time = datetime.now()-time print('Greedy time: ', greedy_time) print('Greedy cost: ', greedy_cost)
time_construct_set_cover_start = time.time() set_cover_instance_animal = compute_set_cover_instance( animal_sample, starts, ends, dist_tol) print "set cover instance built" #, set_cover_instance_animal[1] universe = set([e for s in set_cover_instance_animal[1] for e in s]) #universe_complete = set(range(0, len(animal_sample))) # build covering matrix matrix = [] for computed_set in set_cover_instance_animal[1]: row = [entry in computed_set for entry in universe] matrix.append(row) covering_matrix = np.matrix(matrix).transpose() costs = np.array(set_cover_instance_animal[2]) solution_animal = setcover.SetCover(covering_matrix, costs, maxiters=100) solution_animal.SolveSCP() group_diagram_animal = [ set_cover_instance_animal[0][i] for i in range(0, len(solution_animal.s)) if solution_animal.s[i] ] group_diagram_animal = SortedList(group_diagram_animal) print "group diagram computed", group_diagram_animal # gmap = gmplot.GoogleMapPlotter(55, 9, 4) kml = simplekml.Kml() # for start, end in zip(starts, ends): # if end == ends[-1][-1]: # end_plot = end
def make_archetype(stack, file_input, percentile=20, sn_min = 1.5): print('starts archetype for',file_input, time.time()-t0) try: out_name = 'archetype_'+os.path.basename(stack[file_input].out_file)+'_snMin_'+str(sn_min)+'_percentile_'+str(percentile) allflux = n.loadtxt(stack[file_input].out_file+'.specMatrix.dat' , unpack=True ) #, specMatrix) allsig = n.loadtxt(stack[file_input].out_file+'.specMatrixErr.dat' , unpack=True )#, specMatrixErr) allisig = allsig**(-1) masterwave = stack[file_input].wave tmploglam = n.log10(masterwave) N_wave = len(tmploglam) N_spectra = allflux.shape[1] # filter data median_sn = n.array([ n.median( (flux_el/sig_el)[((sig_el==9999)==False)] ) for flux_el, sig_el in zip(allflux.T, allsig.T) ]) #median_sn = n.median( SNR[i][nodata[i]==False], axis=0) print(median_sn) iuse = n.where( median_sn>sn_min)[0] print('iuse',iuse) # tmpchi2 = n.zeros((iuse.size, iuse.size)) A = n.zeros((iuse.size, iuse.size)) print("creates the matrix", time.time()-t0, 's', allflux.shape) tmp_yerr = 1./allisig[:, iuse].T.reshape(iuse.size, masterwave.size) tmp_y = allflux[:,iuse].T for i in n.arange(iuse.size): #print(i) tmp_x = allflux[:, iuse[i]].T.reshape(1,masterwave.size) tmp_xerr = 1./allisig[:, iuse[i]].T.reshape(1,masterwave.size) #print(tmp_x, tmp_y, tmp_xerr, tmp_yerr) A_tmp, chi2_tmp = mathutils.quick_amplitude(tmp_x, tmp_y, tmp_xerr, tmp_yerr) A[i,:] = A_tmp tmpchi2[i,:] = chi2_tmp chi2 = tmpchi2/(iuse.size-1) # reduced chi2 print('chi2', chi2) #pcs = n.array([1,10,25,50,75,90,99]) #pcs = n.array([16,17,18,19,20,21,22, 23, 24]) scrs = scoreatpercentile(n.ravel(chi2), [5, 15, 25, 35, 45, 55, 65, 75, 85, 95])#0,16,17,18,19,20,21,22, 23, 24]) print('chi2 distribution 5,15,25,..95%', scrs) chi2_min = scoreatpercentile(n.ravel(chi2), percentile ) #print('minimum distance chi2_min', chi2_min) # 0.05 # the minimum distance, the only free paramter a_matrix = chi2<chi2_min # relationship matrix cost = n.ones(iuse.size) g = setcover.SetCover(a_matrix, cost) # I'm using greedy just for demonstration # g.greedy() # SolveSCP() should be used to generate near-optimal solution g.SolveSCP() # These are the archetypes iarchetype = n.nonzero(g.s)[0] print( chi2_min, len(iarchetype), time.time()-t0, 's') # sort archeypes against the number of spectra they represent n_rep = n.sum(a_matrix[:, iarchetype], axis=0) # how many covered by the archetype? isort = n.argsort(n_rep)[::-1] print(n_rep) archetype_median = n.zeros((iarchetype.size, masterwave.size)) # These are the archetypal composites we want to use as the initial guess for i in n.arange(iarchetype.size): itmp = a_matrix[:, iarchetype[i]] # These are the instances represented by the archetype for j in n.arange(masterwave.size): thisflux = allflux[j,iuse[itmp]] archetype_median[i, j] = n.median(thisflux[thisflux!=0]) # Only use the objects that have this wavelength covered #from _pickle import cPickle #cPickle.dum imax = n.max(isort) print('imax', imax) p.clf() fig = p.figure(figsize=(10,imax*5)) fig.subplots_adjust(hspace=0) p.title(out_name) for i in n.arange(0,imax,1): ax = fig.add_subplot(imax,1,i+1) ax.plot(masterwave[::3], archetype_median[isort[i],:][::3] , label = 'nRep=' + str(n_rep[isort[i]]) ) ax.set_xlim(masterwave[10], masterwave[-10]) #ax.set_ylim(-0.1, 3) #ax.set_xticks([]) ax.axvline(1215, color='b', ls='dashed', label='1215 Lya') ax.axvline(1546, color='c', ls='dashed', label='1546 CIV') ax.axvline(2800, color='m', ls='dashed', label='2800 MgII') ax.axvline(3727, color='g', ls='dashed', label='3727 [OII]') ax.axvline(5007, color='r', ls='dashed', label='5007 [OIII]') ax.axvline(6565, color='k', ls='dashed', label='6565 Ha') print(i, n.count_nonzero(a_matrix[:,iarchetype[isort[i]]])) ax.grid() ax.legend(frameon=False, loc=0) p.xlabel('Angstrom') p.tight_layout() p.savefig( os.path.join(archetype_dir, "figure_archetypes_"+out_name +".png") ) p.clf() n.savetxt(os.path.join(archetype_dir, "archetypes_"+out_name+".txt") , n.vstack((masterwave, archetype_median))) f = open(os.path.join(archetype_dir, "index_archetypes_"+out_name+".pkl"), 'wb') obj = ObjIds(imax, iuse, n_rep) pickle.dump(obj, f) f.close() except(ValueError): print('ValueError')
else: for i in range( len(sigma1_2)-len(sigma2) ): sigma2 = np.append(sigma2, 1e9) data2 = np.append(data2, 0) variance = (1/sigma1_2) + (1/sigma2) ''' distmat[i, j] = distmat[j][i] = 1e9 # calculate reduced chi2 as distance #if distmat[i][j] < mindist: #binmat[i][j] = 1 print " " binmat = distmat < mindist cost = np.ones(binmat.shape[0]) #cost = 1 / hdu g = setcover.SetCover(binmat, cost) #g.greedy() g.SolveSCP() # Get the archetype indices iarchetype = np.nonzero(g.s)[0] #print "Number of archetypes: %s\n" % iarchetype.shape # How many are covered by each archetype? n_rep = np.sum(binmat[:, iarchetype], axis=0) for i, arch in enumerate(iarchetype): if n_rep[i] == 1: pass #print "Archtype #%s represents %s spectra." % (arch, n_rep[i]) else: pass