Esempio n. 1
0
    def __init__(self, settings, session, path, uri_set=None):

        SourceFile.__init__(self, settings, session, path, uri_set=uri_set)

        self.type = 'gff'

        self.abstraction_dict = {}

        self.domain_knowledge_dict = {}

        self.pos_attr_list = [
            'position_taxon', 'position_ref', 'position_start', 'position_end',
            'position_strand'
        ]

        self.categories_list = ['position_taxon', 'position_ref', 'position_strand']

        self.taxon = ''

        self.entities = []

        self.timestamp = datetime.datetime.now().isoformat()

        self.getLabelFromUri = {}

        if uri_set and len(uri_set)>0:
            self.prefix = self.uri[0]
        else:
            self.prefix=None
Esempio n. 2
0
    def __init__(self, settings, session, path, uri_set=None):

        SourceFile.__init__(self, settings, session, path, uri_set=uri_set)

        self.type = 'gff'

        self.abstraction_dict = {}

        self.domain_knowledge_dict = {}

        self.pos_attr_list = [
            'position_taxon', 'position_ref', 'position_start', 'position_end',
            'position_strand'
        ]

        self.categories_list = [
            'position_taxon', 'position_ref', 'position_strand'
        ]

        self.taxon = ''

        self.entities = []

        self.timestamp = datetime.datetime.now().isoformat()

        self.getLabelFromUri = {}
Esempio n. 3
0
    def setUp(self):
        self.temp_directory = tempfile.mkdtemp()
        self.settings = get_appsettings('configs/development.ini', name='main')

        self.request = testing.DummyRequest()

        self.srcfile = SourceFile(self.settings, self.request.session,
                                  SIMPLE_SOURCE_FILE, 10)
Esempio n. 4
0
    def __init__(self, settings, session, path, file_type='ttl'):

        newfile = path
        
        if not file_type == 'ttl':
            newfile = self.convert_to_ttl(path,file_type)

        SourceFile.__init__(self, settings, session, newfile)

        self.type = 'ttl'
        self.origine_type = file_type
        #overload name
        self.name =  os.path.basename(path)
Esempio n. 5
0
    def __init__(self, settings, session, path, preview_limit, uri_set=None):
        SourceFile.__init__(self, settings, session, path, uri_set=uri_set)
        self.type = 'tsv'

        self.preview_limit = preview_limit

        self.forced_column_types = ['entity']
        self.disabled_columns = []
        self.key_columns = []
        self.headers = self.get_headers_by_file

        self.category_values = defaultdict(set)

        self.type_dict = {
            'numeric' : 'xsd:decimal',
            'text'    : 'xsd:string',
            'category': ':',
            'taxon': ':',
            'ref': ':',
            'strand': ':',
            'start': 'xsd:decimal',
            'end': 'xsd:decimal',
            'entity'  : ':',
            'entitySym'  : ':',
            'entity_start'  : ':',
            'goterm': '',
            'date': 'xsd:dateTime'
            }

        self.delims = {
            'numeric' : ('', ''),
            'text'    : ('', '^^xsd:string'),
            'category': ('', ''),
            'taxon': ('', ''),
            'ref': ('', ''),
            'strand': ('', ''),
            'start' : ('', ''),
            'end' : ('', ''),
            'entity'  : ('', ''),
            'entitySym'  : ('', ''),
            'entity_start'  : ('', ''),
            'goterm': ('<http://purl.obolibrary.org/obo/GO_', '>'),
            'date': ('', '^^xsd:dateTime')
            }
Esempio n. 6
0
    def __init__(self, settings, session, path, preview_limit, uri_set=None):
        SourceFile.__init__(self, settings, session, path, uri_set=uri_set)
        self.type = 'tsv'

        self.preview_limit = preview_limit

        self.forced_column_types = ['entity']
        self.disabled_columns = []
        self.key_columns = []
        self.headers = self.get_headers_by_file

        self.category_values = defaultdict(set)

        self.type_dict = {
            'numeric': 'xsd:decimal',
            'text': 'xsd:string',
            'category': ':',
            'taxon': ':',
            'ref': ':',
            'strand': ':',
            'start': 'xsd:decimal',
            'end': 'xsd:decimal',
            'entity': ':',
            'entitySym': ':',
            'entity_start': ':',
            'goterm': '',
            'date': 'xsd:dateTime'
        }

        self.delims = {
            'numeric': ('', ''),
            'text': ('', '^^xsd:string'),
            'category': ('', ''),
            'taxon': ('', ''),
            'ref': ('', ''),
            'strand': ('', ''),
            'start': ('', ''),
            'end': ('', ''),
            'entity': ('', ''),
            'entitySym': ('', ''),
            'entity_start': ('', ''),
            'goterm': ('<http://purl.obolibrary.org/obo/GO_', '>'),
            'date': ('', '^^xsd:dateTime')
        }
Esempio n. 7
0
    def __init__(self, settings, session, path, tax, ent):

        SourceFile.__init__(self, settings, session, path)

        self.type = 'gff'

        self.abstraction_dict = {}

        self.domain_knowledge_dict = {}

        self.pos_attr_list = [
            'position_taxon', 'position_ref', 'position_start', 'position_end',
            'position_strand'
        ]

        self.categories_list = ['position_taxon', 'position_ref', 'position_strand']

        self.taxon = tax

        self.entities = ent
Esempio n. 8
0
    def __init__(self, settings, session, path, preview_limit):

        SourceFile.__init__(self, settings, session, path)

        self.type = 'tsv'

        self.preview_limit = preview_limit

        self.forced_column_types = ['entity']

        self.category_values = defaultdict(set)

        self.type_dict = {
            'numeric' : 'xsd:decimal',
            'text'    : 'xsd:string',
            'category': ':',
            'taxon': ':',
            'ref': ':',
            'strand': ':',
            'start': 'xsd:decimal',
            'end': 'xsd:decimal',
            'entity'  : ':',
            'entitySym'  : ':',
            'entity_start'  : ':',
            'entityGoterm'  : ''}

        self.delims = {
            'numeric' : ('', ''),
            'text'    : ('"', '"'),
            'category': (':', ''),
            'taxon': (':', ''),
            'ref': (':', ''),
            'strand': (':', ''),
            'start' : ('', ''),
            'end' : ('', ''),
            'entity'  : (':', ''),
            'entitySym'  : (':', ''),
            'entity_start'  : (':', ''),
            'entityGoterm'  : ('"', '"')}
Esempio n. 9
0
    def __init__(self, settings, session, path, uri_set=None):

        SourceFile.__init__(self, settings, session, path, uri_set=uri_set)

        self.type = 'bed'

        self.abstraction_dict = {}

        self.domain_knowledge_dict = {}

        self.pos_attr_list = [
            'position_taxon', 'position_ref', 'position_start', 'position_end',
            'position_strand'
        ]

        self.categories_list = ['position_taxon', 'position_ref', 'position_strand']

        self.taxon = ''

        self.timestamp = datetime.datetime.now().isoformat()

        self.get_label_from_uri = {}

        self.entity = ''
Esempio n. 10
0
    def get_rdf_files(self):
        """
        :return: List of the file to convert paths
        :rtype: List
        """
        src_dir = self.get_source_file_directory()
        paths = glob(src_dir + '/*[.ttl,.rdf]')

        files = []
        for p in paths:
            files.append(
                SourceFile(
                    self.settings, self.session, p,
                    int(self.settings["askomics.overview_lines_limit"])))

        return files
Esempio n. 11
0
class SourceFileTests(AskoTestCase):

    def setUp( self ):
        super().setUp()

        request = testing.DummyRequest()
        self.srcfile = SourceFile(self.settings, request.session, SIMPLE_SOURCE_FILE, 10)


    def test_load_headers_from_file(self):

        assert self.srcfile.headers == ['head1', 'head2', 'head3']

    def test_load_preview_from_file(self):

        assert self.srcfile.get_preview_data() == [['val1.1', 'val1.2', 'val1.3', 'val1.4', 'val1.5', 'val1.6', 'val1.7', 'val1.8', 'val1.9', 'val1.10'], ['val2.1', 'val2.2', 'val2.3', 'val2.4', 'val2.5', 'val2.6', 'val2.7', 'val2.8', 'val2.9', 'val2.10'], ['val3.1', 'val3.2', 'val3.3', 'val3.4', 'val3.5', 'val3.6', 'val3.7', 'val3.8', 'val3.9', 'val3.10']]

    def test_is_decimal(self):

        assert not self.srcfile.is_decimal('test')
        assert not self.srcfile.is_decimal('33a4254')
        assert self.srcfile.is_decimal('23')
        assert self.srcfile.is_decimal('23.3095')
        assert not self.srcfile.is_decimal('23,3095')
        assert self.srcfile.is_decimal('.0495')
        assert not self.srcfile.is_decimal('')

    def test_guess_column_type(self):

        assert self.srcfile.guess_values_type(['453', '334254', '342', '335']) == 'numeric'
        assert self.srcfile.guess_values_type(['45.3', '334.254', '342', '335']) == 'numeric'
        assert self.srcfile.guess_values_type(['453', '33a4254', '342', '335']) == 'text'
        assert self.srcfile.guess_values_type(['453', '453', '453', '453']) == 'category'
        assert self.srcfile.guess_values_type(['453', 'ccc', 'bbb', 'aaa']) == 'text'

    def test_guess_column_types(self):

        assert self.srcfile.guess_column_types([['453', '334254', '342', '335'], ['453', '453', '453', '453'], ['453', 'ccc', 'bbb', 'aaa'], ['453', '334254', '342', '335']]) == ['numeric', 'category', 'text', 'numeric']
Esempio n. 12
0
    def __init__(self, settings, session, url):

        SourceFile.__init__(self, settings, session, url)
Esempio n. 13
0
class SourceFileTests(unittest.TestCase):
    def setUp(self):
        self.temp_directory = tempfile.mkdtemp()
        self.settings = get_appsettings('configs/development.ini', name='main')

        self.request = testing.DummyRequest()

        self.srcfile = SourceFile(self.settings, self.request.session,
                                  SIMPLE_SOURCE_FILE, 10)

    def tearDown(self):
        shutil.rmtree(self.temp_directory)

    def test_load_headers_from_file(self):

        assert self.srcfile.headers == ['head1', 'head2', 'head3', 'head4']

    def test_load_preview_from_file(self):

        c1 = [
            'val1.1', 'val1.2', 'val1.3', 'val1.4', 'val1.5', 'val1.6',
            'val1.7', 'val1.8', 'val1.9', 'val1.10'
        ]
        c2 = [
            'val2.1', 'val2.2', 'val2.3', 'val2.4', 'val2.5', 'val2.6',
            'val2.7', 'val2.8', 'val2.9', 'val2.10'
        ]
        c3 = [
            'val3.1', 'val3.2', 'val3.3', 'val3.4', 'val3.5', 'val3.6',
            'val3.7', 'val3.8', 'val3.9', 'val3.10'
        ]
        c4 = [
            'val4.1', 'val4.2', 'val4.3', 'val4.4', 'val4.5', 'val4.6',
            'val4.7', 'val4.8', 'val4.9', 'val4.10'
        ]
        assert self.srcfile.get_preview_data() == [c1, c2, c3, c4]

    def test_set_forced_column_types(self):
        self.srcfile.set_forced_column_types(
            ['entity', 'numeric', 'text', 'category'])

    def test_set_disabled_columns(self):
        self.srcfile.set_disabled_columns([0, 4])

    def test_is_decimal(self):

        assert not self.srcfile.is_decimal('test')
        assert not self.srcfile.is_decimal('33a4254')
        assert self.srcfile.is_decimal('23')
        assert self.srcfile.is_decimal('23.3095')
        assert not self.srcfile.is_decimal('23,3095')
        assert self.srcfile.is_decimal('.0495')
        assert self.srcfile.is_decimal('')

    def test_guess_column_type(self):

        # category
        assert self.srcfile.guess_values_type(['453', '453', '453', '453'],
                                              'category') == 'category'

        #text
        assert self.srcfile.guess_values_type(['453', '33a4254', '342', '335'],
                                              'text') == 'text'

        #numeric
        assert self.srcfile.guess_values_type(['453', '334254', '342', '335'],
                                              'numeric') == 'numeric'
        assert self.srcfile.guess_values_type(
            ['45.3', '334.254', '342', '335'], 'numeric') == 'numeric'

        #taxon
        assert self.srcfile.guess_values_type(
            ['taxon', 'taxon', 'taxon', 'taxon'], 'taxon') == 'taxon'
        assert self.srcfile.guess_values_type(
            ['taxon', 'taxon', 'taxon', 'taxon'], 'species') == 'taxon'
        assert self.srcfile.guess_values_type(
            ['taxon', 'taxon', 'taxon', 'taxon'], 'aaataxonaaa') == 'taxon'
        assert self.srcfile.guess_values_type(
            ['taxon', 'taxon', 'taxon', 'taxon'], 'aaaspeciesaaa') == 'taxon'

        #ref
        assert self.srcfile.guess_values_type(
            ['reference', 'reference', 'reference', 'reference'],
            'ref') == 'ref'
        assert self.srcfile.guess_values_type(
            ['chromosome', 'chromosome', 'chromosome', 'chromosome'],
            'chrom') == 'ref'
        assert self.srcfile.guess_values_type(
            ['reference', 'reference', 'reference', 'reference'],
            'aaarefaaa') == 'ref'
        assert self.srcfile.guess_values_type(
            ['chromosome', 'chromosome', 'chromosome', 'chromosome'],
            'aaachromaaa') == 'ref'

        #start and end
        assert self.srcfile.guess_values_type(['453', '334254', '342', '335'],
                                              'start') == 'start'
        assert self.srcfile.guess_values_type(
            ['45.3', '334.254', '342', '335'], 'begin') == 'start'
        assert self.srcfile.guess_values_type(['453', '334254', '342', '335'],
                                              'end') == 'end'
        assert self.srcfile.guess_values_type(
            ['45.3', '334.254', '342', '335'], 'stop') == 'end'

        assert self.srcfile.guess_values_type(['a', 'b', 'c', 'd'],
                                              'start') != 'start'
        assert self.srcfile.guess_values_type(['a', 'b', 'c', 'd'],
                                              'ref') != 'start'

    def test_get_domain_knowledge(self):
        srcfile = SourceFile(self.settings, self.request.session,
                             SIMPLE_SOURCE_FILE, 10)
        srcfile.headers == ['head1', 'head2', 'head3', 'head4']
        srcfile.set_forced_column_types(
            ['numeric', 'category', 'text', 'numeric'])
Esempio n. 14
0
 def test_get_domain_knowledge(self):
     srcfile = SourceFile(self.settings, self.request.session,
                          SIMPLE_SOURCE_FILE, 10)
     srcfile.headers == ['head1', 'head2', 'head3', 'head4']
     srcfile.set_forced_column_types(
         ['numeric', 'category', 'text', 'numeric'])
Esempio n. 15
0
    def __init__(self, settings, session, path):

        SourceFile.__init__(self, settings, session, path)

        self.type = 'ttl'
Esempio n. 16
0
    def __init__(self, settings, session, url):

        SourceFile.__init__(self, settings, session, url)
Esempio n. 17
0
    def setUp( self ):
        super().setUp()

        request = testing.DummyRequest()
        self.srcfile = SourceFile(self.settings, request.session, SIMPLE_SOURCE_FILE, 10)