def files2comms(self, files): pars = [] nfiles = len(files) for i in range(nfiles): par = part.Partition(self.vor, False) par.read(files[i]) pars.append(par) multi_par = part.Partition(self.vor, False) multi_par.combine(pars) if self.smooth: multi_par.smooth_until_stable() return multi_par.comms
def dist_sequence(self, db, smooth): # create Voronoi f_ins = [] for per in self.percent_range(): f_ins.append(self.comm_path(per, False)) vertices = set() for f in f_ins: fverts = set(part.read(f).keys()) vertices = vertices.union(fverts) vor = Voronoi(db, vertices) # create paritions parts = {} for per in self.percent_range(): f_in = self.comm_path(per, False) par = part.Partition(vor) par.read(f_in) if smooth: par.smooth_until_stable() parts[per] = par prev = False for per in self.percent_range(): if prev: dist = parts[per].distance(parts[prev]) print('%s' % dist) prev = per
def process_file(self, f_in): print("processing file %s ..." % f_in) par = part.Partition(self.vor, False) par.read(f_in) if self.smooth: par.smooth_until_stable() return self.borders(par.comms)
def similarity_matrix(self, db, smooth, optimize): # create precentile range pr = self.percent_range() npers = len(pr) # save memory use_disk = optimize == 'memory' # create Voronoi f_ins = [] for per in pr: f_ins.append(self.comm_path(per, False)) vertices = set() for f in f_ins: fverts = set(part.read(f).keys()) vertices = vertices.union(fverts) vor = Voronoi(db, vertices) # create paritions parts = {} for per in pr: f_in = self.comm_path(per, False) par = part.Partition(vor) par.read(f_in) if smooth: par.smooth_until_stable() parts[per] = par # create distances cache dists = np.zeros((npers, npers)) # compute distances for i in range(npers): per1 = pr[i] if use_disk and (i > 0): parts[per1].load_commxcomm('tmp/%s' % per1) for j in range(npers): per2 = pr[j] if per1 < per2: if use_disk and (i > 0): parts[per2].load_commxcomm('tmp/%s' % per2) dist = parts[per1].distance(parts[per2]) if use_disk: if i > 0: parts[per2].clean_commxcomm() else: parts[per2].save_commxcomm('tmp/%s' % per2) if j == 1: parts[per1].save_commxcomm('tmp/%s' % per1) dists[i][j] = dist else: dist = dists[j][i] if j > 0: print(',', end="") print('%s' % dist, end="", flush=True) print('', flush=True) if use_disk: parts[per1].clean_commxcomm()
def metric(self, metric, db, best, smooth, scale): # create Voronoi f_ins = [] for per in self.percent_range(): if best: f_ins.append(self.comm_path(per, False)) else: dir_in = self.comm_path(per, True) for (dirpath, dirnames, filenames) in os.walk(dir_in): f_ins.extend(filenames) f_ins = ["%s/%s" % (dir_in, f) for f in f_ins] vertices = set() for f in f_ins: fverts = set(part.read(f).keys()) vertices = vertices.union(fverts) vor = Voronoi(db, vertices) # compute metrics print("percentile,distance,metric") for per in self.percent_range(): f_ins = [] if best: f_ins = [self.comm_path(per, False)] else: dir_in = self.comm_path(per, True) for (dirpath, dirnames, filenames) in os.walk(dir_in): f_ins.extend(filenames) f_ins = ["%s/%s" % (dir_in, f) for f in f_ins] m = 0. for f in f_ins: par = part.Partition(vor) par.read(f) if smooth: par.smooth_until_stable() m += par.metric(metric) m /= float(len(f_ins)) print("%s,%s,%s" % (per, self.dist(per, scale), m))
def compute(self): print("processing file %s ..." % self.in_file) par = part.Partition(self.vor, False) par.read(self.in_file) if self.smooth: par.smooth_until_stable() vareas = self.vor.areas() careas = {} for loc_id in par.comms: comm = par.comms[loc_id] if comm >= 0: a = vareas[loc_id] if comm in careas: careas[comm] += a else: careas[comm] = a total = 0. for comm in careas: total += careas[comm] mean_area = total / len(careas) print('mean_area: %s' % mean_area) print('communities: %s' % len(careas))