def __init__(self, settings, session, path, uri_set=None): SourceFile.__init__(self, settings, session, path, uri_set=uri_set) self.type = 'gff' self.abstraction_dict = {} self.domain_knowledge_dict = {} self.pos_attr_list = [ 'position_taxon', 'position_ref', 'position_start', 'position_end', 'position_strand' ] self.categories_list = ['position_taxon', 'position_ref', 'position_strand'] self.taxon = '' self.entities = [] self.timestamp = datetime.datetime.now().isoformat() self.getLabelFromUri = {} if uri_set and len(uri_set)>0: self.prefix = self.uri[0] else: self.prefix=None
def __init__(self, settings, session, path, uri_set=None): SourceFile.__init__(self, settings, session, path, uri_set=uri_set) self.type = 'gff' self.abstraction_dict = {} self.domain_knowledge_dict = {} self.pos_attr_list = [ 'position_taxon', 'position_ref', 'position_start', 'position_end', 'position_strand' ] self.categories_list = [ 'position_taxon', 'position_ref', 'position_strand' ] self.taxon = '' self.entities = [] self.timestamp = datetime.datetime.now().isoformat() self.getLabelFromUri = {}
def setUp(self): self.temp_directory = tempfile.mkdtemp() self.settings = get_appsettings('configs/development.ini', name='main') self.request = testing.DummyRequest() self.srcfile = SourceFile(self.settings, self.request.session, SIMPLE_SOURCE_FILE, 10)
def __init__(self, settings, session, path, file_type='ttl'): newfile = path if not file_type == 'ttl': newfile = self.convert_to_ttl(path,file_type) SourceFile.__init__(self, settings, session, newfile) self.type = 'ttl' self.origine_type = file_type #overload name self.name = os.path.basename(path)
def __init__(self, settings, session, path, preview_limit, uri_set=None): SourceFile.__init__(self, settings, session, path, uri_set=uri_set) self.type = 'tsv' self.preview_limit = preview_limit self.forced_column_types = ['entity'] self.disabled_columns = [] self.key_columns = [] self.headers = self.get_headers_by_file self.category_values = defaultdict(set) self.type_dict = { 'numeric' : 'xsd:decimal', 'text' : 'xsd:string', 'category': ':', 'taxon': ':', 'ref': ':', 'strand': ':', 'start': 'xsd:decimal', 'end': 'xsd:decimal', 'entity' : ':', 'entitySym' : ':', 'entity_start' : ':', 'goterm': '', 'date': 'xsd:dateTime' } self.delims = { 'numeric' : ('', ''), 'text' : ('', '^^xsd:string'), 'category': ('', ''), 'taxon': ('', ''), 'ref': ('', ''), 'strand': ('', ''), 'start' : ('', ''), 'end' : ('', ''), 'entity' : ('', ''), 'entitySym' : ('', ''), 'entity_start' : ('', ''), 'goterm': ('<http://purl.obolibrary.org/obo/GO_', '>'), 'date': ('', '^^xsd:dateTime') }
def __init__(self, settings, session, path, preview_limit, uri_set=None): SourceFile.__init__(self, settings, session, path, uri_set=uri_set) self.type = 'tsv' self.preview_limit = preview_limit self.forced_column_types = ['entity'] self.disabled_columns = [] self.key_columns = [] self.headers = self.get_headers_by_file self.category_values = defaultdict(set) self.type_dict = { 'numeric': 'xsd:decimal', 'text': 'xsd:string', 'category': ':', 'taxon': ':', 'ref': ':', 'strand': ':', 'start': 'xsd:decimal', 'end': 'xsd:decimal', 'entity': ':', 'entitySym': ':', 'entity_start': ':', 'goterm': '', 'date': 'xsd:dateTime' } self.delims = { 'numeric': ('', ''), 'text': ('', '^^xsd:string'), 'category': ('', ''), 'taxon': ('', ''), 'ref': ('', ''), 'strand': ('', ''), 'start': ('', ''), 'end': ('', ''), 'entity': ('', ''), 'entitySym': ('', ''), 'entity_start': ('', ''), 'goterm': ('<http://purl.obolibrary.org/obo/GO_', '>'), 'date': ('', '^^xsd:dateTime') }
def __init__(self, settings, session, path, tax, ent): SourceFile.__init__(self, settings, session, path) self.type = 'gff' self.abstraction_dict = {} self.domain_knowledge_dict = {} self.pos_attr_list = [ 'position_taxon', 'position_ref', 'position_start', 'position_end', 'position_strand' ] self.categories_list = ['position_taxon', 'position_ref', 'position_strand'] self.taxon = tax self.entities = ent
def __init__(self, settings, session, path, preview_limit): SourceFile.__init__(self, settings, session, path) self.type = 'tsv' self.preview_limit = preview_limit self.forced_column_types = ['entity'] self.category_values = defaultdict(set) self.type_dict = { 'numeric' : 'xsd:decimal', 'text' : 'xsd:string', 'category': ':', 'taxon': ':', 'ref': ':', 'strand': ':', 'start': 'xsd:decimal', 'end': 'xsd:decimal', 'entity' : ':', 'entitySym' : ':', 'entity_start' : ':', 'entityGoterm' : ''} self.delims = { 'numeric' : ('', ''), 'text' : ('"', '"'), 'category': (':', ''), 'taxon': (':', ''), 'ref': (':', ''), 'strand': (':', ''), 'start' : ('', ''), 'end' : ('', ''), 'entity' : (':', ''), 'entitySym' : (':', ''), 'entity_start' : (':', ''), 'entityGoterm' : ('"', '"')}
def __init__(self, settings, session, path, uri_set=None): SourceFile.__init__(self, settings, session, path, uri_set=uri_set) self.type = 'bed' self.abstraction_dict = {} self.domain_knowledge_dict = {} self.pos_attr_list = [ 'position_taxon', 'position_ref', 'position_start', 'position_end', 'position_strand' ] self.categories_list = ['position_taxon', 'position_ref', 'position_strand'] self.taxon = '' self.timestamp = datetime.datetime.now().isoformat() self.get_label_from_uri = {} self.entity = ''
def get_rdf_files(self): """ :return: List of the file to convert paths :rtype: List """ src_dir = self.get_source_file_directory() paths = glob(src_dir + '/*[.ttl,.rdf]') files = [] for p in paths: files.append( SourceFile( self.settings, self.session, p, int(self.settings["askomics.overview_lines_limit"]))) return files
class SourceFileTests(AskoTestCase): def setUp( self ): super().setUp() request = testing.DummyRequest() self.srcfile = SourceFile(self.settings, request.session, SIMPLE_SOURCE_FILE, 10) def test_load_headers_from_file(self): assert self.srcfile.headers == ['head1', 'head2', 'head3'] def test_load_preview_from_file(self): assert self.srcfile.get_preview_data() == [['val1.1', 'val1.2', 'val1.3', 'val1.4', 'val1.5', 'val1.6', 'val1.7', 'val1.8', 'val1.9', 'val1.10'], ['val2.1', 'val2.2', 'val2.3', 'val2.4', 'val2.5', 'val2.6', 'val2.7', 'val2.8', 'val2.9', 'val2.10'], ['val3.1', 'val3.2', 'val3.3', 'val3.4', 'val3.5', 'val3.6', 'val3.7', 'val3.8', 'val3.9', 'val3.10']] def test_is_decimal(self): assert not self.srcfile.is_decimal('test') assert not self.srcfile.is_decimal('33a4254') assert self.srcfile.is_decimal('23') assert self.srcfile.is_decimal('23.3095') assert not self.srcfile.is_decimal('23,3095') assert self.srcfile.is_decimal('.0495') assert not self.srcfile.is_decimal('') def test_guess_column_type(self): assert self.srcfile.guess_values_type(['453', '334254', '342', '335']) == 'numeric' assert self.srcfile.guess_values_type(['45.3', '334.254', '342', '335']) == 'numeric' assert self.srcfile.guess_values_type(['453', '33a4254', '342', '335']) == 'text' assert self.srcfile.guess_values_type(['453', '453', '453', '453']) == 'category' assert self.srcfile.guess_values_type(['453', 'ccc', 'bbb', 'aaa']) == 'text' def test_guess_column_types(self): assert self.srcfile.guess_column_types([['453', '334254', '342', '335'], ['453', '453', '453', '453'], ['453', 'ccc', 'bbb', 'aaa'], ['453', '334254', '342', '335']]) == ['numeric', 'category', 'text', 'numeric']
def __init__(self, settings, session, url): SourceFile.__init__(self, settings, session, url)
class SourceFileTests(unittest.TestCase): def setUp(self): self.temp_directory = tempfile.mkdtemp() self.settings = get_appsettings('configs/development.ini', name='main') self.request = testing.DummyRequest() self.srcfile = SourceFile(self.settings, self.request.session, SIMPLE_SOURCE_FILE, 10) def tearDown(self): shutil.rmtree(self.temp_directory) def test_load_headers_from_file(self): assert self.srcfile.headers == ['head1', 'head2', 'head3', 'head4'] def test_load_preview_from_file(self): c1 = [ 'val1.1', 'val1.2', 'val1.3', 'val1.4', 'val1.5', 'val1.6', 'val1.7', 'val1.8', 'val1.9', 'val1.10' ] c2 = [ 'val2.1', 'val2.2', 'val2.3', 'val2.4', 'val2.5', 'val2.6', 'val2.7', 'val2.8', 'val2.9', 'val2.10' ] c3 = [ 'val3.1', 'val3.2', 'val3.3', 'val3.4', 'val3.5', 'val3.6', 'val3.7', 'val3.8', 'val3.9', 'val3.10' ] c4 = [ 'val4.1', 'val4.2', 'val4.3', 'val4.4', 'val4.5', 'val4.6', 'val4.7', 'val4.8', 'val4.9', 'val4.10' ] assert self.srcfile.get_preview_data() == [c1, c2, c3, c4] def test_set_forced_column_types(self): self.srcfile.set_forced_column_types( ['entity', 'numeric', 'text', 'category']) def test_set_disabled_columns(self): self.srcfile.set_disabled_columns([0, 4]) def test_is_decimal(self): assert not self.srcfile.is_decimal('test') assert not self.srcfile.is_decimal('33a4254') assert self.srcfile.is_decimal('23') assert self.srcfile.is_decimal('23.3095') assert not self.srcfile.is_decimal('23,3095') assert self.srcfile.is_decimal('.0495') assert self.srcfile.is_decimal('') def test_guess_column_type(self): # category assert self.srcfile.guess_values_type(['453', '453', '453', '453'], 'category') == 'category' #text assert self.srcfile.guess_values_type(['453', '33a4254', '342', '335'], 'text') == 'text' #numeric assert self.srcfile.guess_values_type(['453', '334254', '342', '335'], 'numeric') == 'numeric' assert self.srcfile.guess_values_type( ['45.3', '334.254', '342', '335'], 'numeric') == 'numeric' #taxon assert self.srcfile.guess_values_type( ['taxon', 'taxon', 'taxon', 'taxon'], 'taxon') == 'taxon' assert self.srcfile.guess_values_type( ['taxon', 'taxon', 'taxon', 'taxon'], 'species') == 'taxon' assert self.srcfile.guess_values_type( ['taxon', 'taxon', 'taxon', 'taxon'], 'aaataxonaaa') == 'taxon' assert self.srcfile.guess_values_type( ['taxon', 'taxon', 'taxon', 'taxon'], 'aaaspeciesaaa') == 'taxon' #ref assert self.srcfile.guess_values_type( ['reference', 'reference', 'reference', 'reference'], 'ref') == 'ref' assert self.srcfile.guess_values_type( ['chromosome', 'chromosome', 'chromosome', 'chromosome'], 'chrom') == 'ref' assert self.srcfile.guess_values_type( ['reference', 'reference', 'reference', 'reference'], 'aaarefaaa') == 'ref' assert self.srcfile.guess_values_type( ['chromosome', 'chromosome', 'chromosome', 'chromosome'], 'aaachromaaa') == 'ref' #start and end assert self.srcfile.guess_values_type(['453', '334254', '342', '335'], 'start') == 'start' assert self.srcfile.guess_values_type( ['45.3', '334.254', '342', '335'], 'begin') == 'start' assert self.srcfile.guess_values_type(['453', '334254', '342', '335'], 'end') == 'end' assert self.srcfile.guess_values_type( ['45.3', '334.254', '342', '335'], 'stop') == 'end' assert self.srcfile.guess_values_type(['a', 'b', 'c', 'd'], 'start') != 'start' assert self.srcfile.guess_values_type(['a', 'b', 'c', 'd'], 'ref') != 'start' def test_get_domain_knowledge(self): srcfile = SourceFile(self.settings, self.request.session, SIMPLE_SOURCE_FILE, 10) srcfile.headers == ['head1', 'head2', 'head3', 'head4'] srcfile.set_forced_column_types( ['numeric', 'category', 'text', 'numeric'])
def test_get_domain_knowledge(self): srcfile = SourceFile(self.settings, self.request.session, SIMPLE_SOURCE_FILE, 10) srcfile.headers == ['head1', 'head2', 'head3', 'head4'] srcfile.set_forced_column_types( ['numeric', 'category', 'text', 'numeric'])
def __init__(self, settings, session, path): SourceFile.__init__(self, settings, session, path) self.type = 'ttl'
def setUp( self ): super().setUp() request = testing.DummyRequest() self.srcfile = SourceFile(self.settings, request.session, SIMPLE_SOURCE_FILE, 10)