예제 #1
0
def indexL(i):
    out = {}
    for k, v in tqdm(i['categories'].items()):
        out[k] = {}
        out[k]['large'] = str(containerHash(v, large=True))
        out[k]['normal'] = str(containerHash(v))

    with open('./index.json', 'w') as o:
        json.dump(out, o, indent=4)
예제 #2
0
    def run(self):
        with self.input()[0] as mongo_inp:
                coll = mongo_inp.collection
                for i in range(self.h + 1):
                    graphIndex = {}
                    nodeIndex = {}

                    row = []
                    column = []
                    data = []

                    for g in self.graphs:
                        s = '%s_%d' % (g, i)
                        wl_graph = coll.find_one({'_id': s})
                        if wl_graph is None:
                            continue
                        gI = indexMap(g, graphIndex)
                        count = wl_graph['count']
                        for n, c in count.items():
                            nI = indexMap(n, nodeIndex)
                            row.append(gI)
                            column.append(nI)
                            data.append(c)

                    phi = coo_matrix((data, (row, column)),
                                     shape=(graphIndex['counter'],
                                            nodeIndex['counter'])).tocsr()

                    phi = normalize_gram(jaccard_kernel(phi))

                    del graphIndex['counter']

                    inv_graphIndex = np.array(
                        [x[0] for x in sorted(list(graphIndex.items()), key=lambda x: x[1])]
                    )

                    used_kernel = self.used_kernel.value

                    bulk = []

                    for gI in range(inv_graphIndex.shape[0]):
                        g = inv_graphIndex[gI]
                        for gJ in range(inv_graphIndex.shape[0]):
                            if gI < gJ:
                                p = inv_graphIndex[gJ]
                                bulk.append({
                                    '_id': containerHash([g, p, i, used_kernel]),
                                    'first_id': g,
                                    'second_id': p,
                                    'h': i,
                                    'sim_function': used_kernel,
                                    'similarity': phi[gI, gJ]
                                })

                    with self.output() as mongo_out:
                        mongo_out.collection.insert_many(bulk)
예제 #3
0
 def __taskid__(self):
     return "EvaluationAndSetting_%s_%s_%s" %\
                 (str(containerHash(self.graphs)),
                  str(containerHash(self.h_Set)),
                  str(containerHash(self.D_Set)))
예제 #4
0
 def __taskid__(self):
     return "hDGrid_%s_%s_%s_%s" %\
                 (str(containerHash(self.graphs)),
                  str(containerHash(self.train_index)),
                  str(containerHash(self.h_Set)),
                  str(containerHash(self.D_Set)))
예제 #5
0
 def __taskid__(self):
     return "CGrid_%d_%d_%s_%s" %\
                 (self.h, self.D, str(containerHash(self.train_index)),
                  str(containerHash(self.graphs)))