def mine(self,tuple_size,num_tuples,weights=None,expand=False,table_size=2**19): if not weights and not expand: ldb=sa.sampledmh_mine(self.ldb,tuple_size,num_tuples,table_size) elif expand and not weights: max_freq=sa.mh_get_cumulative_frequency(self.ldb,expand.ldb) ldb_=sa.mh_expand_listdb(self.ldb,max_freq) ldb=sa.sampledmh_mine(ldb_,tuple_size,num_tuples,table_size) elif not expand and weights: ldb=sa.sampledmh_mine_weighted(self.ldb,ifindex, tuple_size,num_tuples,table_size,weights.weights) elif expand and weights: max_freq=sa.mh_get_cumulative_frequency(self.ldb,expand.ldb) ldb_=sa.mh_expand_listdb(self.ldb, max_freq) weights_=sa.mh_expand_weights(expand.ldb.size, max_freq, weights.weights) ldb=sa.sampledmh_mine_weighted(ldb_,tuple_size,num_tuples,table_size,weights_) return SMH(ldb=ldb)
def mine(self, listdb, weights = None, expand = None): """ samples inverted file to mine sets of highly co-occurring items """ if not weights and not expand: mined = sa.sampledmh_mine(listdb.ldb, self.tuple_size_, self.number_of_tuples_, self.table_size_, self.min_set_size_) elif expand and not weights: max_freq = sa.mh_get_cumulative_frequency(listdb.ldb, expand.ldb) ldb_ = sa.mh_expand_listdb(listdb.ldb, max_freq) mined = sa.sampledmh_mine(ldb_, self.tuple_size_, self.number_of_tuples_, self.table_size_, self.min_set_size_) sa.listdb_destroy(ldb_) elif not expand and weights: mined = sa.sampledmh_mine_weighted(listdb.ldb, self.tuple_size_, self.number_of_tuples_, self.table_size_, weights.weights, self.min_set_size_) elif expand and weights: max_freq = sa.mh_get_cumulative_frequency(listdb.ldb, expand.ldb) ldb_ = sa.mh_expand_listdb(listdb.ldb, max_freq) weights_ = sa.mh_expand_weights(expand.ldb.size, max_freq, weights.weights) mined = sa.sampledmh_mine_weighted(ldb_, self.tuple_size_, self.number_of_tuples_, self.table_size_, weights_, self.min_set_size_) sa.listdb_destroy(ldb_) sa.listdb_delete_smallest(mined, self.min_set_size_) return ListDB(ldb = mined)
def mine(self, listdb, weights = None, expand = None): """ samples nverted file to mine sets of highly co-occurring items """ if not weights and not expand: mined = sa.sampledmh_mine(listdb.ldb, self.tuple_size_, self.number_of_tuples_, self.table_size_, self.min_set_size_) elif expand and not weights: max_freq = sa.mh_get_cumulative_frequency(listdb.ldb, expand.ldb) ldb_ = sa.mh_expand_listdb(listdb.ldb, max_freq) mined = sa.sampledmh_mine(ldb_, self.tuple_size_, self.number_of_tuples_, self.table_size_, self.min_set_size_) sa.listdb_destroy(ldb_) elif not expand and weights: mined = sa.sampledmh_mine_weighted(listdb.ldb, self.tuple_size_, self.number_of_tuples_, self.table_size_, weights.weights, self.min_set_size_) elif expand and weights: max_freq = sa.mh_get_cumulative_frequency(listdb.ldb, expand.ldb) ldb_ = sa.mh_expand_listdb(listdb.ldb, max_freq) weights_ = sa.mh_expand_weights(expand.ldb.size, max_freq, weights.weights) mined = sa.sampledmh_mine_weighted(ldb_, self.tuple_size_, self.number_of_tuples_, self.table_size_, weights_, self.min_set_size_) sa.listdb_destroy(ldb_) sa.listdb_delete_smallest(mined, self.min_set_size_) return ListDB(ldb = mined)
def invert(self): ldb=sa.sampledmh_mine(self.ldb) ldb._inverted=True ldb._original=self return SMH(ldb=ldb)