Esempio n. 1
0
    def test_docs_serialization_format(self):
        import json
        from eWRT.util.module_path import get_resource

        DOCS = [{'id': 7,
                 'body': 'Ehre sei Gott.',
                 'title': '',
                 'format': 'text/html',
                 'header': {'test': 'testvalue'}},
                {'id': 8,
                 'body': '',
                 'title': 'Guten Tag!',
                 'format': 'text/html',
                 'header': {}}]
        REFERENCE_MULTI = json.load(
            open(get_resource(__file__, 'data/jeremia_reference_output_documents.json')))
        REFERENCE_SINGLE = json.load(open(get_resource(
            __file__, 'data/jeremia_reference_output_single_document.json')))

        # document list
        j = Jeremia()
        result = j.submit_documents(DOCS)
        result.sort()
        REFERENCE_MULTI.sort()
        assert REFERENCE_MULTI == result

        # single document
        result = j.submit_document(DOCS[0])
        assert REFERENCE_SINGLE == result
Esempio n. 2
0
    def test_submit_classify_v2(self):
        ''' test the version 2 classifier '''

        weblyzard_xml = open(get_resource(
            __file__, 'data/classifier_v2_testfile.xml')).read()

        classifier = Classifier()
        search_agents = [
            {
                "name": "Santésuisse", "id": 412,
                "product_list": [
                    {"name": "SANTESUISSE FINANZ ENGAGEMENT RP", "id": 327432},
                    {"name": "SANTESUISSE FINANZ ENTWICKLUNG RP", "id": 327435},
                    {"name": "SANTESUISSE FINANZ PERSONEN RP", "id": 327442},
                    {"name": "SANTESUISSE FINANZ PRODUKTE RP", "id": 327444},
                    {"name": "SANTESUISSE FINANZ REGULATION RP", "id": 327446},
                    {"name": "SANTESUISSE FINANZ RESEARCH RP", "id": 327452},
                    {"name": "SANTESUISSE VERS. ALLGEMEIN RP", "id": 327562},
                    {"name": "SANTESUISSE VERS. ENGAGEMENT RP", "id": 327564},
                    {"name": "SANTESUISSE VERS. ENTWICKLUNG RP", "id": 327566},
                    {"name": "SANTESUISSE VERS. PERSONEN RP", "id": 327568},
                    {"name": "SANTESUISSE VERS. PRODUKTE RP", "id": 327570},
                    {"name": "SANTESUISSE VERS. REGULATION RP", "id": 327572},
                    {"name": "SANTESUISSE VERS. RESEARCH RP", "id": 327574},
                    {"name": "SANTESUISSE FINANZ ALLGEMEIN RP", "id": 327428}
                ]},
            {
                "name": "Krankenkassen", "id": 460,
                "product_list": [
                    {"name": "KRANKENKASSEN FINANZ ENGAGEMENT RP", "id": 342053},
                    {"name": "KRANKENKASSEN FINANZ ENTWICKLUNG RP", "id": 342055},
                    {"name": "KRANKENKASSEN FINANZ PERSONEN RP", "id": 342056},
                    {"name": "KRANKENKASSEN FINANZ PRODUKTE RP", "id": 342057},
                    {"name": "KRANKENKASSEN FINANZ REGULATION RP", "id": 342058},
                    {"name": "KRANKENKASSEN FINANZ RESEARCH RP", "id": 342059},
                    {"name": "KRANKENKASSEN VERS. ALLGEMEIN RP", "id": 342060},
                    {"name": "KRANKENKASSEN VERS. ENGAGEMENT RP", "id": 342061},
                    {"name": "KRANKENKASSEN VERS. ENTWICKLUNG RP", "id": 342062},
                    {"name": "KRANKENKASSEN VERS. PERSONEN RP", "id": 342063},
                    {"name": "KRANKENKASSEN VERS. PRODUKTE RP", "id": 342064},
                    {"name": "KRANKENKASSEN VERS. REGULATION RP", "id": 342065},
                    {"name": "KRANKENKASSEN VERS. RESEARCH RP", "id": 342066},
                    {"name": "KRANKENKASSEN FINANZ ALLGEMEIN RP", "id": 342052}
                ]}
        ]
        num_results = 3

        # call the web service
        result = classifier.classify_v2('COMET', weblyzard_xml=weblyzard_xml,
                                        search_agents=search_agents, num_results=num_results)

        # every search_agent should be covered in the result
        assert set(result.keys()) == set(
            self.get_search_agent_ids(search_agents))

        # for every search_agent are 'num_results' classes returned
        for _search_agent, classes in result.items():
            assert len(classes) == num_results

        print(result)
    def test_docs_serialization_format(self):
        import json
        from eWRT.util.module_path import get_resource

        DOCS = [{
            'id': 7,
            'body': 'Ehre sei Gott.',
            'title': '',
            'format': 'text/html',
            'header': {
                'test': 'testvalue'
            }
        }, {
            'id': 8,
            'body': '',
            'title': 'Guten Tag!',
            'format': 'text/html',
            'header': {}
        }]
        REFERENCE_MULTI = json.load(
            open(
                get_resource(__file__,
                             'data/jeremia_reference_output_documents.json')))
        REFERENCE_SINGLE = json.load(
            open(
                get_resource(
                    __file__,
                    'data/jeremia_reference_output_single_document.json')))

        # document list
        j = Jeremia()
        result = j.submit_documents(DOCS)
        result.sort()
        REFERENCE_MULTI.sort()
        assert REFERENCE_MULTI == result

        # single document
        result = j.submit_document(DOCS[0])
        assert REFERENCE_SINGLE == result
Esempio n. 4
0
#!/usr/bin/env python

from eWRT.input.csv import get_csv_data
from eWRT.util.module_path import get_resource

TEST_FILE = get_resource(__file__, ('test.csv', ))

def test_csv_data():
    CORRECT = ( [1,2],[2,4],[4,6] )
    with open(TEST_FILE) as f:
        for correct, computed in zip(CORRECT, get_csv_data(f,
                                                        ('int(row["a"])', 'int(row["a"])+int(row["b"])'),
                                                        'row["show"]=="True"')):
            print correct, computed
            assert correct == computed
Esempio n. 5
0
from bz2 import BZ2File
from csv import reader
from glob import glob
from os.path import dirname, basename, join as os_join
from collections import namedtuple
from datetime import datetime
from twisted.python.text import strFile

from eWRT.util.module_path import get_resource


def extract_index_name(fname):
    return basename(fname).split(".")[0]


DATA_DIR = get_resource(__file__, ('data', ))

Quote = namedtuple('quote', 'date last open high low change_percentage')


class StockIndex(object):
    SUPPORTED_INDICES = {
        extract_index_name(fname): fname
        for fname in glob(DATA_DIR + "/*.idx.bz2")
    }
    SUPPORTED_FUTURES = {
        extract_index_name(fname): fname
        for fname in glob(DATA_DIR + "/*.ftidx.bz2")
    }
    DATE_FORMAT = "%b %d, %Y"
Esempio n. 6
0
#
from builtins import zip
from builtins import str
from builtins import range
import pytest
import unittest

from multiprocessing import Pool
from shutil import rmtree
from os.path import exists, join

from eWRT.util.module_path import get_resource
from eWRT.util.cache import (MemoryCache, MemoryCached, DiskCached, DiskCache,
                             Cache, IterableCache, RedisCached)

get_cache_dir = lambda no: get_resource(__file__, ('.unittest-temp%d' %
                                                   (no), ))


class TestCached(unittest.TestCase):
    ''' tests the MemoryCached Decorator '''
    @staticmethod
    def add(a=2, b=3):
        return a + b

    @staticmethod
    def sub(a=2, b=3):
        return a - b

    def testNonKeywordArguments(self):
        ''' tests the class with non Keyword Arguments '''
        for x in range(1, 20):
Esempio n. 7
0
    def test_submit_classify_v2(self):
        ''' test the version 2 classifier '''

        weblyzard_xml = open(
            get_resource(__file__, 'data/classifier_v2_testfile.xml')).read()

        classifier = Classifier()
        search_agents = [{
            "name":
            "Santésuisse",
            "id":
            412,
            "product_list": [{
                "name": "SANTESUISSE FINANZ ENGAGEMENT RP",
                "id": 327432
            }, {
                "name": "SANTESUISSE FINANZ ENTWICKLUNG RP",
                "id": 327435
            }, {
                "name": "SANTESUISSE FINANZ PERSONEN RP",
                "id": 327442
            }, {
                "name": "SANTESUISSE FINANZ PRODUKTE RP",
                "id": 327444
            }, {
                "name": "SANTESUISSE FINANZ REGULATION RP",
                "id": 327446
            }, {
                "name": "SANTESUISSE FINANZ RESEARCH RP",
                "id": 327452
            }, {
                "name": "SANTESUISSE VERS. ALLGEMEIN RP",
                "id": 327562
            }, {
                "name": "SANTESUISSE VERS. ENGAGEMENT RP",
                "id": 327564
            }, {
                "name": "SANTESUISSE VERS. ENTWICKLUNG RP",
                "id": 327566
            }, {
                "name": "SANTESUISSE VERS. PERSONEN RP",
                "id": 327568
            }, {
                "name": "SANTESUISSE VERS. PRODUKTE RP",
                "id": 327570
            }, {
                "name": "SANTESUISSE VERS. REGULATION RP",
                "id": 327572
            }, {
                "name": "SANTESUISSE VERS. RESEARCH RP",
                "id": 327574
            }, {
                "name": "SANTESUISSE FINANZ ALLGEMEIN RP",
                "id": 327428
            }]
        }, {
            "name":
            "Krankenkassen",
            "id":
            460,
            "product_list": [{
                "name": "KRANKENKASSEN FINANZ ENGAGEMENT RP",
                "id": 342053
            }, {
                "name": "KRANKENKASSEN FINANZ ENTWICKLUNG RP",
                "id": 342055
            }, {
                "name": "KRANKENKASSEN FINANZ PERSONEN RP",
                "id": 342056
            }, {
                "name": "KRANKENKASSEN FINANZ PRODUKTE RP",
                "id": 342057
            }, {
                "name": "KRANKENKASSEN FINANZ REGULATION RP",
                "id": 342058
            }, {
                "name": "KRANKENKASSEN FINANZ RESEARCH RP",
                "id": 342059
            }, {
                "name": "KRANKENKASSEN VERS. ALLGEMEIN RP",
                "id": 342060
            }, {
                "name": "KRANKENKASSEN VERS. ENGAGEMENT RP",
                "id": 342061
            }, {
                "name": "KRANKENKASSEN VERS. ENTWICKLUNG RP",
                "id": 342062
            }, {
                "name": "KRANKENKASSEN VERS. PERSONEN RP",
                "id": 342063
            }, {
                "name": "KRANKENKASSEN VERS. PRODUKTE RP",
                "id": 342064
            }, {
                "name": "KRANKENKASSEN VERS. REGULATION RP",
                "id": 342065
            }, {
                "name": "KRANKENKASSEN VERS. RESEARCH RP",
                "id": 342066
            }, {
                "name": "KRANKENKASSEN FINANZ ALLGEMEIN RP",
                "id": 342052
            }]
        }]
        num_results = 3

        # call the web service
        result = classifier.classify_v2('COMET',
                                        weblyzard_xml=weblyzard_xml,
                                        search_agents=search_agents,
                                        num_results=num_results)

        # every search_agent should be covered in the result
        assert set(result.keys()) == set(
            self.get_search_agent_ids(search_agents))

        # for every search_agent are 'num_results' classes returned
        for _search_agent, classes in result.items():
            assert len(classes) == num_results

        print(result)
Esempio n. 8
0
from os.path import basename

from eWRT.util.module_path import get_resource


def read_wordlist(fname):
    ''' reads a language wordlist from a file '''
    with open(fname) as f:
        return set(map(str.lower, map(str.strip, f.readlines())))


# returns the language name based on the language file's name
def get_lang_name(fname): return basename(fname).split(".")[0]


LANG_DATA_DIR = get_resource(__file__, 'data')

##
# \var STOPWORD_DICT: a dictionary of the 100 most common words in the given language
STOPWORD_DICT = {get_lang_name(fname): read_wordlist(
    fname) for fname in glob(LANG_DATA_DIR + "/*.csv")}
DELETE_CHARS = ",.!?\"'"
DELETE_TABLE = {ch: None for ch in DELETE_CHARS}


import string
table = string.maketrans('ac', 'cx')


def detect_language(text):
    """
Esempio n. 9
0
from bz2 import BZ2File
from csv import reader
from glob import glob
from os.path import dirname, basename, join as os_join
from collections import namedtuple
from datetime import datetime
from twisted.python.text import strFile

from eWRT.util.module_path import get_resource


def extract_index_name(fname): return basename(fname).split(".")[0]


DATA_DIR = get_resource(__file__, ('data', ))

Quote = namedtuple('quote', 'date last open high low change_percentage')


class StockIndex(object):
    SUPPORTED_INDICES = {extract_index_name(fname): fname
                         for fname in glob(DATA_DIR + "/*.idx.bz2")}
    SUPPORTED_FUTURES = {extract_index_name(fname): fname
                         for fname in glob(DATA_DIR + "/*.ftidx.bz2")}
    DATE_FORMAT = "%b %d, %Y"

    @classmethod
    def get_index(cls, index_name):
        """retrieves data from the given index in csv format
           @param index_name: the name of the index
Esempio n. 10
0
from eWRT.util.module_path import get_resource


def read_wordlist(fname):
    ''' reads a language wordlist from a file '''
    with open(fname, 'r', encoding='utf-8') as f:
        return set(map(str.lower, list(map(str.strip, f.readlines()))))


# returns the language name based on the language file's name
def get_lang_name(fname):
    return basename(fname).split(".")[0]


LANG_DATA_DIR = get_resource(__file__, 'data')

# #
# \var STOPWORD_DICT: a dictionary of the 100 most common words in the given language
STOPWORD_DICT = {
    get_lang_name(fname): read_wordlist(fname)
    for fname in glob(LANG_DATA_DIR + "/*.csv")
}
DELETE_CHARS = ",.!?\"'"
DELETE_TABLE = {ch: None for ch in DELETE_CHARS}

# import string [mig]
# table = str.maketrans('ac', 'cx')  # [mig] string --> str


def detect_language(text):
Esempio n. 11
0
# 
# Unittests
# run nosetest from python-nose to execute these tests
#
from shutil import rmtree
from multiprocessing import Pool

from nose.plugins.attrib import attr

from eWRT.util.cache import *
from eWRT.util.module_path import get_resource

get_cache_dir = lambda no: get_resource(__file__, ('.unittest-temp%d' % (no), ))

class TestCached(object):
    ''' tests the MemoryCached Decorator '''
    @staticmethod
    def add(a=2, b=3): 
        return a+b

    @staticmethod
    def sub(a=2, b=3): 
        return a-b

    def testNonKeywordArguments(self):
        ''' tests the class with non Keyword Arguments '''
        for x in xrange(1,20):
            assert self.add(x,5) == (x+5)
            assert self.add(x,5) == (x+5)

        # test objects with a cachesize specified
Esempio n. 12
0
#!/usr/bin/env python

from eWRT.input.csv import get_csv_data
from eWRT.util.module_path import get_resource

TEST_FILE = get_resource(__file__, ('test.csv', ))


def test_csv_data():
    CORRECT = ([1, 2], [2, 4], [4, 6])
    with open(TEST_FILE) as f:
        for correct, computed in zip(
                CORRECT,
                get_csv_data(f,
                             ('int(row["a"])', 'int(row["a"])+int(row["b"])'),
                             'row["show"]=="True"')):
            print correct, computed
            assert correct == computed