Exemple #1
0
class AggregationTestCase(unittest.TestCase):
    def setUp(self):
        search_term = 'q'
        parameters = {'h': 1000}
        self.dblp = PublApi(top_k=1000,
                            delay=5,
                            search_term=search_term,
                            **parameters)
        localdata_file = os.path.abspath(
            os.path.dirname(__file__) + os.path.sep +
            "../../") + '/csv_example/dblp_sample.csv'
        localdata = LocalData(localdata_file, 'csv', "key", ["title"],
                              ["title"])
        localdata_ids, localdata_query, localdata_er = localdata.getlocalData()
        initQueries = utils.queryGene(localdata_query, 2)
        self.initQueries = initQueries

    def test_stra_stratified_estimator(self):
        aggregation.stratified_estimator(query_pool=self.initQueries,
                                         api=self.dblp,
                                         match_term=["info.title"],
                                         candidate_rate=0.2,
                                         query_num=100)
        assert True

    def test_sota_estimator(self):
        aggregation.sota_estimator(query_pool=self.initQueries,
                                   api=self.dblp,
                                   match_term=["info.title"],
                                   uniqueid="info.key",
                                   query_num=1)
        self.dblp.getSession().close()
        assert True
Exemple #2
0
class DblpPublapiTestCase(unittest.TestCase):
    def setUp(self):
        search_term = 'q'
        parameters = {'h': 1000}
        self.dblp = PublApi(top_k=1000, delay=5, search_term=search_term, **parameters)

    def tearDown(self):
        self.dblp = None

    def test_callApi(self):
        query = ['set', 'cover']
        params = self.dblp.getKwargs()
        params[self.dblp.getSearchTerm()] = '+'.join(query)
        hitList = self.dblp.callAPI(params=params)
        assert len(hitList) >= 900

    def test_callMulApi(self):
        queries = [['set', 'cover'], ['approximate', 'query']]
        hitList = self.dblp.callMulAPI(queries)
        self.dblp.getSession().close()
        assert len(hitList) >= 1200
Exemple #3
0
from deeperlib.api.dblp.publapi import PublApi
from deeperlib.core import utils
from deeperlib.data_processing.local_data import LocalData
from deeperlib.estimator import aggregation

# ==== Sota-Estimator Dblp ====
search_term = 'q'
parameters = {'h': 1000}
dblp = PublApi(delay=5, search_term=search_term, **parameters)
localdata_file = 'dblp_10000'
localdata = LocalData(localdata_file, 'pkl', "row['key']", ["row['title']"],
                      ["row['title']"])
localdata_ids, localdata_query, localdata_er = localdata.getlocalData()
initQueries = utils.queryGene(localdata_query, 2)
aggregation.sota_estimator(query_pool=initQueries,
                           api=dblp,
                           match_term=["row['info']['title']"],
                           uniqueid="row['info']['key']",
                           query_num=1)

# ==== Stratified-Estimator Dblp ====
dblp = PublApi(delay=5, search_term=search_term, **parameters)
aggregation.stratified_estimator(query_pool=initQueries,
                                 api=dblp,
                                 match_term=["row['info']['title']"],
                                 candidate_rate=0.2,
                                 query_num=100)
dblp.getSession().close()