def process_solution(self, fname, coef_data, pol_data, embeddings): '''Process a single solution file. It is assumed that the solution corresponds to the configurations set by the pol_data and embeddings. The filename should be of format [name][pind]_[rt]us.json, where pind and rt are integers. If there is only one element of pol_data there is no pind.''' fprint(os.path.basename(fname)) NP = len(pol_data) # number of possible polarization comfigurations print(NP) pind = None if NP == 1: fn_regex = re.compile('.*[a-zA-Z_]+_[0-9]+us\.json') val_regex = re.compile('_[0-9]+us') else: fn_regex = re.compile('.*[0-{0}]+_[0-9]+us\.json'.format(NP)) val_regex = re.compile('[0-{0}]+_[0-9]+us'.format(NP)) if not fn_regex.match(fname): try: fn_regex = re.compile('.*_[0-9]+us\.json') assert fn_regex.match(fname) val_regex = re.compile('_[0-9]+us') pind = 0 except: print('Given filename does not match the required pattern: {0}...'.format(fname)) return # extract pind and rt val_str = val_regex.findall(fname)[-1] # will work if this far vals = [int(x) for x in re.findall('[0-9]+', val_str)] if pind is None: pind = 0 if NP==1 else vals[0] rt = vals[-1] # print('{0}: pind={1}, rt={2}'.format(fname, pind, rt)) # get solution object try: solution = DWAVE_Sol(fname) sol_name = os.path.basename(fname) except IOError: return h = coef_data[pind]['h'] J = coef_data[pind]['J'] efunc = lambda s: compute_E(h, J, s) for key in embeddings: embed = embeddings[key] pols = pol_data[pind][key] qbits = list(reduce(lambda x,y:x+y, embed['models'].values())) qbits = [tuple_to_linear(qb, M=12, N=12, L=4, index0=False) for qb in qbits] sol = solution.get_reduced_solution(qbits, efunc) self.save_subsol(embed, pols, sol, h, J, rt, pind, sol_name)
def run_processing(self): ''' ''' sol_fname = os.path.normpath(str(self.i1.text())) coef_fname = os.path.normpath(str(self.i2.text())) summ_fname = os.path.normpath(str(self.i3.text())) # confirm fname format if not re.match('.+[.]json', sol_fname): print('Invalid filename format...') return if not coef_fname: print('Missing coef file') return try: sol = DWAVE_Sol(sol_fname) except IOError: print('Failed to read solution file') return try: # load coef file h, J = self.load_coef_file(coef_fname) efunc = lambda s: compute_E(h, J, s) except: print('Invalid coef file given...') return try: # load summary file embeds = self.process_summ_file(summ_fname) subsols = {} params = {} for k, embed in embeds.items(): qca_file = os.path.basename(embed['qca_file']) if not qca_file == EMBED_FILTER: continue qbits = list(reduce(lambda x,y:x+y, embed['models'].values())) qbits = [tuple_to_linear(qb, M=12, N=12, L=4, index0=False) for qb in qbits] subsols[k] = sol.get_reduced_solution(qbits, efunc) h_, J_ = reduce_coefs(h, J, qbits) params[k] = {'h': h_, 'J': J_, 'qca_file': qca_file} except: print('Failed to read embed summary...') return for k, subsol in subsols.items(): self.export_subsol(subsol, params[k]) return for k, subsol in subsols.items(): print(k) keys, spins, cell_occ, occ = subsol.model_reduction(embeds[k]['models'], \ ind_map=lambda qb: tuple_to_linear(qb, 12, 12, index0=False)) # outcome statistics # cell_occ = sol.cell_occ # cell_occ = subsols[k].cell_occ M = max(cell_occ) # number fo gauge transformation N = max(max(x) for k, x in cell_occ.items()) # number of states D = np.zeros([M, N], dtype=int) for g, co in cell_occ.items(): for s, o in co.items(): D[g-1,s-1] = o # reject rare outcomes if False: inds = np.nonzero(np.max(D, axis=0) > 2)[0] D = D[:, inds] if False: if True: stack_plot = stackPlot(label='GT') stack_plot.set_data(D) stack_plot.plot(block=True) else: plt.figure('GT') plt.clf() plt.plot(D.transpose(), 'x') plt.show(block=True) # statistical distance SD = np.zeros([M, M], dtype=float) print('Computing statistical distances...') k = 0 for i in range(M-1): for j in range(i+1, M): k += 1 sys.stdout.write('\r{0}%'.format(k*100./(.5*M*(M-1)))) sys.stdout.flush() SD[i,j] = SD[j,i] = stat_dist(D[i,:], D[j,:]) print('\n') # seriate SD matrix if at least one pair of GTs have SD overlap if True: mask = np.ones(SD.shape, dtype=bool) np.fill_diagonal(mask, 0) if np.min(SD[mask])>0: print('seriating SD matrix...') try: print('\tattempting {0}'.format(self.i4.text())) new_inds = seriate(SD, method=str(self.i4.text())) except: print('\tfailed, using default method') new_inds = seriate(SD, method='MDS') print(new_inds) SD = SD[new_inds, :][:, new_inds] else: print('No overlap between GT distributions. Seriation not possible.') plt.imshow(SD, aspect='auto', interpolation='none') plt.colorbar() plt.show(block=True) if False: avg_pdf = np.sum(D,axis=0)*1./np.sum(D) # look at number of random GT needed to estimate avg_pdf sd_est = {k: match_dist(D, avg_pdf, k) for k in range(1, M)} pprint(sd_est) K = sorted(sd_est.keys()) plt.figure('SD v K') plt.plot(K, [sd_est[x][0] for x in K], 'x') plt.xlabel('Number of samples', fontsize=FS) plt.ylabel('Statistical Distance', fontsize=FS) plt.show(block=False) plt.figure('stdev(SD) v K') plt.plot(K, [sd_est[x][1] for x in K]) plt.xlabel('Number of samples', fontsize=FS) plt.ylabel('$\sigma_{SD}$', fontsize=FS) plt.show(block=True)
def run_processing(self): ''' ''' sol_fname = os.path.normpath(str(self.i1.text())) coef_fname = os.path.normpath(str(self.i2.text())) summ_fname = os.path.normpath(str(self.i3.text())) # confirm fname format if not re.match('.+[.]json', sol_fname): print('Invalid filename format...') return if not coef_fname: print('Missing coef file') return try: sol = DWAVE_Sol(sol_fname) except IOError: print('Failed to read solution file') return try: # load coef file h, J = self.load_coef_file(coef_fname) efunc = lambda s: compute_E(h, J, s) except: print('Invalid coef file given...') return try: # load summary file embeds = self.process_summ_file(summ_fname) subsols = {} params = {} for k, embed in embeds.items(): qca_file = os.path.basename(embed['qca_file']) if not qca_file == EMBED_FILTER: continue qbits = list( reduce(lambda x, y: x + y, embed['models'].values())) qbits = [ tuple_to_linear(qb, M=12, N=12, L=4, index0=False) for qb in qbits ] subsols[k] = sol.get_reduced_solution(qbits, efunc) h_, J_ = reduce_coefs(h, J, qbits) params[k] = {'h': h_, 'J': J_, 'qca_file': qca_file} except: print('Failed to read embed summary...') return for k, subsol in subsols.items(): self.export_subsol(subsol, params[k]) return for k, subsol in subsols.items(): print(k) keys, spins, cell_occ, occ = subsol.model_reduction(embeds[k]['models'], \ ind_map=lambda qb: tuple_to_linear(qb, 12, 12, index0=False)) # outcome statistics # cell_occ = sol.cell_occ # cell_occ = subsols[k].cell_occ M = max(cell_occ) # number fo gauge transformation N = max(max(x) for k, x in cell_occ.items()) # number of states D = np.zeros([M, N], dtype=int) for g, co in cell_occ.items(): for s, o in co.items(): D[g - 1, s - 1] = o # reject rare outcomes if False: inds = np.nonzero(np.max(D, axis=0) > 2)[0] D = D[:, inds] if False: if True: stack_plot = stackPlot(label='GT') stack_plot.set_data(D) stack_plot.plot(block=True) else: plt.figure('GT') plt.clf() plt.plot(D.transpose(), 'x') plt.show(block=True) # statistical distance SD = np.zeros([M, M], dtype=float) print('Computing statistical distances...') k = 0 for i in range(M - 1): for j in range(i + 1, M): k += 1 sys.stdout.write('\r{0}%'.format(k * 100. / (.5 * M * (M - 1)))) sys.stdout.flush() SD[i, j] = SD[j, i] = stat_dist(D[i, :], D[j, :]) print('\n') # seriate SD matrix if at least one pair of GTs have SD overlap if True: mask = np.ones(SD.shape, dtype=bool) np.fill_diagonal(mask, 0) if np.min(SD[mask]) > 0: print('seriating SD matrix...') try: print('\tattempting {0}'.format(self.i4.text())) new_inds = seriate(SD, method=str(self.i4.text())) except: print('\tfailed, using default method') new_inds = seriate(SD, method='MDS') print(new_inds) SD = SD[new_inds, :][:, new_inds] else: print( 'No overlap between GT distributions. Seriation not possible.' ) plt.imshow(SD, aspect='auto', interpolation='none') plt.colorbar() plt.show(block=True) if False: avg_pdf = np.sum(D, axis=0) * 1. / np.sum(D) # look at number of random GT needed to estimate avg_pdf sd_est = {k: match_dist(D, avg_pdf, k) for k in range(1, M)} pprint(sd_est) K = sorted(sd_est.keys()) plt.figure('SD v K') plt.plot(K, [sd_est[x][0] for x in K], 'x') plt.xlabel('Number of samples', fontsize=FS) plt.ylabel('Statistical Distance', fontsize=FS) plt.show(block=False) plt.figure('stdev(SD) v K') plt.plot(K, [sd_est[x][1] for x in K]) plt.xlabel('Number of samples', fontsize=FS) plt.ylabel('$\sigma_{SD}$', fontsize=FS) plt.show(block=True)
def run_processing(self): ''' ''' ab_fname = os.path.normpath(str(self.i1.text())) coef_fname = os.path.normpath(str(self.i2.text())) self.n_thresh = int(self.i3.text()) # confirm fname format if not re.match('.*_AB_[0-9]+us.json', ab_fname): print('Invalid filename format...') return if not coef_fname: print('Missing coef file') return root, _, ext = ab_fname.rpartition('AB') # rt = re.search('[0-9]+', ext).group(0) # not very robust but good enough or now a_fname = root+'A'+ext b_fname = root+'B'+ext # check that all solution files exist if not all([os.path.exists(fn) for fn in [a_fname, b_fname, ab_fname]]): print('Missing filenames...') return # load coef file h, J = self.load_coef_file(coef_fname) e_func = lambda qb_spins: compute_E(h, J, qb_spins) # preamble done, now process print('loading solution files...') try: a_sol = DWAVE_Sol(a_fname) b_sol = DWAVE_Sol(b_fname) ab_sol = DWAVE_Sol(ab_fname) except IOError: print('Failed to read at least one of the solution files') return # get marginal distribution of ab_so ab_marg = {} if True: print('getting marginalised solution for problem A...') ab_marg['A'] = ab_sol.get_reduced_solution(a_sol.qbits, e_func) compare_sols(a_sol, ab_marg['A'], 'A') if False: print('getting marginalised solution for problem B...') ab_marg['B'] = ab_sol.get_reduced_solution(b_sol.qbits, e_func) compare_sols(b_sol, ab_marg['B'], 'B') return # check independence of A and B print('checking independence of A and B...') hash_ = lambda s: hash(tuple(s.tolist())) qb_map = {k: i for i, k in \ enumerate(sorted(ab_marg['A'].qbits + ab_marg['B'].qbits))} a_inds = [qb_map[qb] for qb in ab_marg['A'].qbits] b_inds = [qb_map[qb] for qb in ab_marg['B'].qbits] a_keys, a_key_inds, i = {}, [], 0 for j in range(len(ab_marg['A'].energies)): key, occ = hash_(ab_marg['A'].spins[j, :]), ab_marg['A'].occ[j] if occ > self.n_thresh: a_keys[key], i = i, i+1 a_key_inds.append(j) b_keys, b_key_inds, i = {}, [], 0 for j in range(len(ab_marg['B'].energies)): key, occ = hash_(ab_marg['B'].spins[j, :]), ab_marg['B'].occ[j] if occ > self.n_thresh: b_keys[key], i = i, i+1 b_key_inds.append(j) if len(a_keys)*len(b_keys) < 1e5: ab_occ = np.zeros([len(a_keys), len(b_keys)], dtype=int) for i in range(ab_sol.spins.shape[0]): spin = ab_sol.spins[i, :] ka, kb = hash_(spin[a_inds]), hash_(spin[b_inds]) if ka in a_keys and kb in b_keys: ab_occ[a_keys[ka], b_keys[kb]] = ab_sol.occ[i] ab_marg_occ = np.outer(np.array(ab_marg['A'].occ)[a_key_inds], np.array(ab_marg['B'].occ)[b_key_inds]) ab_occ = ab_occ*1./np.sum(ab_sol.occ) ab_marg_occ = ab_marg_occ*1./(np.sum(a_sol.occ)*np.sum(b_sol.occ)) # statistical distance ab_sd = stat_dist(ab_occ, ab_marg_occ) print('Statistical Distance: {0:.4f}'.format(ab_sd)) if True: vmax = max(np.max(ab_occ), np.max(ab_marg_occ)) plt.figure('JD') plt.imshow(ab_occ, interpolation='none', aspect='auto', vmin=0, vmax=vmax) plt.colorbar() plt.title('Joint distribution', fontsize=FS) plt.xlabel('A state index', fontsize=FS) plt.ylabel('B state index', fontsize=FS) plt.show(block=False) plt.figure('JMD') plt.imshow(ab_marg_occ, interpolation='none', aspect='auto', vmin=0, vmax=vmax) plt.colorbar() plt.title('Joint marginal distribution', fontsize=FS) plt.xlabel('A state index', fontsize=FS) plt.ylabel('B state index', fontsize=FS) plt.show(block=True) # plt.figure('JD-Diff') # plt.imshow(np.abs(ab_occ-ab_marg_occ), interpolation='none', aspect='auto') # plt.colorbar() # plt.title('Joint distribution diff', fontsize=FS) # plt.xlabel('A state index', fontsize=FS) # plt.ylabel('B state index', fontsize=FS) # plt.show(block=True) else: print('Too many data points to safely plot') # compare marginal distribution to isolated distribution self.dist_comp(a_sol, ab_marg['A'], 'A', plot=True) self.dist_comp(b_sol, ab_marg['B'], 'B', plot=True)