def main(): es = Elasticsearch([{'host': 'localhost', 'port': 9200}]) site = mw.Site('rosettacode.org', path='/mw/') r = redis.StrictRedis() samples = r.smembers('samples') conditions = Counter() counter = 0 for taskName in samples: print 'task # %d ================' % counter counter += 1 printTaskContent(taskName, site) impl_id = r.hget('rosetta-id-taskname-mapping', normalize(taskName)) if impl_id is None: actual = [] else: result = es.get(index='throwtable', doc_type='implementation', id=impl_id, ignore=404) if result['found']: actual = result['_source']['algorithm'] else: actual = [] expected = r.smembers("%s:map" % taskName) result = checkPkg(taskName, actual, expected, r) print result r.sadd('samples-%s' % result, taskName) conditions[result] += 1 for (k, v) in conditions.items(): print "%s: %s" % (k, v) print "Precision:", 1.0 * conditions[TRUE_POSITIVE] / (conditions[TRUE_POSITIVE] + conditions[FALSE_POSITIVE]) print "Recall:", 1.0 * conditions[TRUE_POSITIVE] / (conditions[TRUE_POSITIVE] + conditions[FALSE_NEGATIVE])
def index_rosetta_page(page, algo_ids): pagetask = Task(page) # extract data from page using Task from pr for impl in pagetask.solutions: body = { 'language': impl['language'], 'algorithm': algo_ids, 'source': 'rosetta', 'implementation': impl['content'], } if len(algo_ids) > 1: body['description'] = '\n'.join(pagetask.task_summary) es.index(index=INDEX_NAME, doc_type='implementation', id='rosetta:' + normalize(pagetask.task_name) + ':' + get_standardized_lang(impl['language'].decode('utf8'), rd), body=body)
def main(): es = Elasticsearch([{'host': 'localhost', 'port': 9200}]) site = mw.Site('rosettacode.org', path='/mw/') r = redis.StrictRedis() samples = r.smembers('samples') conditions = Counter() counter = 0 for taskName in samples: print 'task # %d ================' % counter counter += 1 printTaskContent(taskName, site) impl_id = r.hget('rosetta-id-taskname-mapping', normalize(taskName)) if impl_id is None: actual = [] else: result = es.get(index='throwtable', doc_type='implementation', id=impl_id, ignore=404) if result['found']: actual = result['_source']['algorithm'] else: actual = [] expected = r.smembers("%s:map" % taskName) result = checkPkg(taskName, actual, expected, r) print result r.sadd('samples-%s' % result, taskName) conditions[result] += 1 for (k, v) in conditions.items(): print "%s: %s" % (k, v) print "Precision:", 1.0 * conditions[TRUE_POSITIVE] / ( conditions[TRUE_POSITIVE] + conditions[FALSE_POSITIVE]) print "Recall:", 1.0 * conditions[TRUE_POSITIVE] / ( conditions[TRUE_POSITIVE] + conditions[FALSE_NEGATIVE])
def index_rosetta_page(page, algo_ids): pagetask = Task(page) # extract data from page using Task from pr for impl in pagetask.solutions: body = { 'language': impl['language'], 'algorithm': algo_ids, 'source': 'rosetta', 'implementation': impl['content'], } # print '----task name:', pagetask.task_name.encode('utf8') # print '----lang:', impl['language'].decode('utf8') # print '----algos:', algo_ids if len(algo_ids) > 1: body['description'] = '\n'.join(pagetask.task_summary) # print '----task description:', body['description'].decode('utf8') es.index(index=INDEX_NAME, doc_type='implementation', id='rosetta:' + normalize(pagetask.task_name) + ':' + impl['language'].decode('utf8'), body=body)