def indexL(i): out = {} for k, v in tqdm(i['categories'].items()): out[k] = {} out[k]['large'] = str(containerHash(v, large=True)) out[k]['normal'] = str(containerHash(v)) with open('./index.json', 'w') as o: json.dump(out, o, indent=4)
def run(self): with self.input()[0] as mongo_inp: coll = mongo_inp.collection for i in range(self.h + 1): graphIndex = {} nodeIndex = {} row = [] column = [] data = [] for g in self.graphs: s = '%s_%d' % (g, i) wl_graph = coll.find_one({'_id': s}) if wl_graph is None: continue gI = indexMap(g, graphIndex) count = wl_graph['count'] for n, c in count.items(): nI = indexMap(n, nodeIndex) row.append(gI) column.append(nI) data.append(c) phi = coo_matrix((data, (row, column)), shape=(graphIndex['counter'], nodeIndex['counter'])).tocsr() phi = normalize_gram(jaccard_kernel(phi)) del graphIndex['counter'] inv_graphIndex = np.array( [x[0] for x in sorted(list(graphIndex.items()), key=lambda x: x[1])] ) used_kernel = self.used_kernel.value bulk = [] for gI in range(inv_graphIndex.shape[0]): g = inv_graphIndex[gI] for gJ in range(inv_graphIndex.shape[0]): if gI < gJ: p = inv_graphIndex[gJ] bulk.append({ '_id': containerHash([g, p, i, used_kernel]), 'first_id': g, 'second_id': p, 'h': i, 'sim_function': used_kernel, 'similarity': phi[gI, gJ] }) with self.output() as mongo_out: mongo_out.collection.insert_many(bulk)
def __taskid__(self): return "EvaluationAndSetting_%s_%s_%s" %\ (str(containerHash(self.graphs)), str(containerHash(self.h_Set)), str(containerHash(self.D_Set)))
def __taskid__(self): return "hDGrid_%s_%s_%s_%s" %\ (str(containerHash(self.graphs)), str(containerHash(self.train_index)), str(containerHash(self.h_Set)), str(containerHash(self.D_Set)))
def __taskid__(self): return "CGrid_%d_%d_%s_%s" %\ (self.h, self.D, str(containerHash(self.train_index)), str(containerHash(self.graphs)))