Example #1
0
def main():
    es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
    site = mw.Site('rosettacode.org', path='/mw/')
    r = redis.StrictRedis()
    samples = r.smembers('samples')
    conditions = Counter()
    counter = 0
    for taskName in samples:
        print 'task # %d ================' % counter
        counter += 1
        printTaskContent(taskName, site)

        impl_id = r.hget('rosetta-id-taskname-mapping', normalize(taskName))
        if impl_id is None:
            actual = []
        else:
            result = es.get(index='throwtable', doc_type='implementation',
                id=impl_id, ignore=404)
            if result['found']:
                actual = result['_source']['algorithm']
            else:
                actual = []
        expected = r.smembers("%s:map" % taskName)

        result = checkPkg(taskName, actual, expected, r)

        print result
        r.sadd('samples-%s' % result, taskName)
        conditions[result] += 1

    for (k, v) in conditions.items():
        print "%s: %s" % (k, v)

    print "Precision:", 1.0 * conditions[TRUE_POSITIVE] / (conditions[TRUE_POSITIVE] + conditions[FALSE_POSITIVE])
    print "Recall:", 1.0 * conditions[TRUE_POSITIVE] / (conditions[TRUE_POSITIVE] + conditions[FALSE_NEGATIVE])
def index_rosetta_page(page, algo_ids):
    pagetask = Task(page)  # extract data from page using Task from pr

    for impl in pagetask.solutions:
        body = {
            'language': impl['language'],
            'algorithm': algo_ids,
            'source': 'rosetta',
            'implementation': impl['content'],
        }

        if len(algo_ids) > 1:
            body['description'] = '\n'.join(pagetask.task_summary)

        es.index(index=INDEX_NAME, doc_type='implementation',
            id='rosetta:' + normalize(pagetask.task_name) + ':' +
            get_standardized_lang(impl['language'].decode('utf8'), rd),
            body=body)
Example #3
0
def index_rosetta_page(page, algo_ids):
    pagetask = Task(page)  # extract data from page using Task from pr

    for impl in pagetask.solutions:
        body = {
            'language': impl['language'],
            'algorithm': algo_ids,
            'source': 'rosetta',
            'implementation': impl['content'],
        }

        if len(algo_ids) > 1:
            body['description'] = '\n'.join(pagetask.task_summary)

        es.index(index=INDEX_NAME,
                 doc_type='implementation',
                 id='rosetta:' + normalize(pagetask.task_name) + ':' +
                 get_standardized_lang(impl['language'].decode('utf8'), rd),
                 body=body)
Example #4
0
def main():
    es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
    site = mw.Site('rosettacode.org', path='/mw/')
    r = redis.StrictRedis()
    samples = r.smembers('samples')
    conditions = Counter()
    counter = 0
    for taskName in samples:
        print 'task # %d ================' % counter
        counter += 1
        printTaskContent(taskName, site)

        impl_id = r.hget('rosetta-id-taskname-mapping', normalize(taskName))
        if impl_id is None:
            actual = []
        else:
            result = es.get(index='throwtable',
                            doc_type='implementation',
                            id=impl_id,
                            ignore=404)
            if result['found']:
                actual = result['_source']['algorithm']
            else:
                actual = []
        expected = r.smembers("%s:map" % taskName)

        result = checkPkg(taskName, actual, expected, r)

        print result
        r.sadd('samples-%s' % result, taskName)
        conditions[result] += 1

    for (k, v) in conditions.items():
        print "%s: %s" % (k, v)

    print "Precision:", 1.0 * conditions[TRUE_POSITIVE] / (
        conditions[TRUE_POSITIVE] + conditions[FALSE_POSITIVE])
    print "Recall:", 1.0 * conditions[TRUE_POSITIVE] / (
        conditions[TRUE_POSITIVE] + conditions[FALSE_NEGATIVE])
def index_rosetta_page(page, algo_ids):
    pagetask = Task(page)  # extract data from page using Task from pr

    for impl in pagetask.solutions:
        body = {
            'language': impl['language'],
            'algorithm': algo_ids,
            'source': 'rosetta',
            'implementation': impl['content'],
        }

        # print '----task name:', pagetask.task_name.encode('utf8')
        # print '----lang:', impl['language'].decode('utf8')
        # print '----algos:', algo_ids

        if len(algo_ids) > 1:
            body['description'] = '\n'.join(pagetask.task_summary)
            # print '----task description:', body['description'].decode('utf8')

        es.index(index=INDEX_NAME, doc_type='implementation',
            id='rosetta:' + normalize(pagetask.task_name) + ':' +
            impl['language'].decode('utf8'), body=body)