def ImportDataSet(calculationObject, baseFolder, datasetId, importSettings): Utils.CheckSafeIdentifier(datasetId) with calculationObject.LogHeader( 'Importing dataset {0}'.format(datasetId)): calculationObject.Log('Import settings: ' + str(importSettings)) DQXUtils.CheckValidIdentifier(datasetId) datasetFolder = os.path.join(baseFolder, datasetId) indexDb = config.DB calculationObject.credentialInfo.VerifyCanDo( DQXDbTools.DbOperationWrite(indexDb, 'datasetindex')) calculationObject.credentialInfo.VerifyCanDo( DQXDbTools.DbOperationWrite(datasetId)) # Remove current reference in the index first: if import fails, nothing will show up ImpUtils.ExecuteSQL( calculationObject, indexDb, 'DELETE FROM datasetindex WHERE id="{0}"'.format(datasetId)) globalSettings = SettingsLoader.SettingsLoader( os.path.join(datasetFolder, 'settings')) globalSettings.RequireTokens(['Name']) globalSettings.AddTokenIfMissing('Description', '') print('Global settings: ' + str(globalSettings.Get())) if not importSettings['ConfigOnly']: # Dropping existing database calculationObject.SetInfo('Dropping database') print('Dropping database') try: ImpUtils.ExecuteSQL( calculationObject, indexDb, 'DROP DATABASE IF EXISTS {0}'.format(datasetId)) except: pass ImpUtils.ExecuteSQL(calculationObject, indexDb, 'CREATE DATABASE {0}'.format(datasetId)) # Creating new database scriptPath = os.path.dirname(os.path.realpath(__file__)) calculationObject.SetInfo('Creating database') print('Creating new database') ImpUtils.ExecuteSQLScript(calculationObject, scriptPath + '/createdataset.sql', datasetId) ImpUtils.ExecuteSQL(calculationObject, datasetId, 'DELETE FROM propertycatalog') ImpUtils.ExecuteSQL(calculationObject, datasetId, 'DELETE FROM summaryvalues') ImpUtils.ExecuteSQL(calculationObject, datasetId, 'DELETE FROM tablecatalog') ImpUtils.ExecuteSQL(calculationObject, datasetId, 'DELETE FROM settings') ImpUtils.ExecuteSQL(calculationObject, datasetId, 'DELETE FROM customdatacatalog') datatables = [] if globalSettings.HasToken('DataTables'): if not type(globalSettings['DataTables']) is list: raise Exception('DataTables token should be a list') datatables = globalSettings['DataTables'] for dir in os.listdir(os.path.join(datasetFolder, 'datatables')): if os.path.isdir(os.path.join(datasetFolder, 'datatables', dir)): if dir not in datatables: datatables.append(dir) print('Data tables: ' + str(datatables)) for datatable in datatables: ImportDataTable.ImportDataTable( calculationObject, datasetId, datatable, os.path.join(datasetFolder, 'datatables', datatable), importSettings) try: datatables_2D = globalSettings['2D_DataTables'] except KeyError: datatables_2D = [] if type(datatables_2D) is not list: raise TypeError('2D_DataTables token should be a list') for datatable in datatables_2D: Import2DDataTable.ImportDataTable( calculationObject, datasetId, datatable, os.path.join(datasetFolder, '2D_datatables', datatable), importSettings) if os.path.exists(os.path.join(datasetFolder, 'refgenome')): ImportRefGenome.ImportRefGenome( calculationObject, datasetId, os.path.join(datasetFolder, 'refgenome'), importSettings) globalSettings.AddTokenIfMissing('hasGenomeBrowser', True) ImportWorkspaces.ImportWorkspaces(calculationObject, datasetFolder, datasetId, importSettings) # Global settings print('Defining global settings') ImpUtils.ImportGlobalSettings(calculationObject, datasetId, globalSettings) # Finalise: register dataset print('Registering dataset') importtime = 0 if not importSettings['ConfigOnly']: importtime = time.time() ImpUtils.ExecuteSQL( calculationObject, indexDb, 'INSERT INTO datasetindex VALUES ("{0}", "{1}", "{2}")'.format( datasetId, globalSettings['Name'], str(math.ceil(importtime))))
def ImportRefGenome(calculationObject, datasetId, folder, importSettings): with calculationObject.LogHeader('Importing reference genome data'): ImportRefGenomeSummaryData(calculationObject, datasetId, folder, importSettings) settings = SettingsLoader.SettingsLoader( os.path.join(folder, 'settings')) settings.DefineKnownTokens(['AnnotMaxViewportSize', 'RefSequenceSumm']) print('Settings: ' + str(settings.Get())) ImpUtils.ImportGlobalSettings(calculationObject, datasetId, settings) # Import reference genome if importSettings['ScopeStr'] == 'all': refsequencefile = os.path.join(folder, 'refsequence.fa') if os.path.exists(refsequencefile): with calculationObject.LogHeader( 'Converting reference genome'): destfolder = config.BASEDIR + '/SummaryTracks/' + datasetId + '/Sequence' if not os.path.exists(destfolder): os.makedirs(destfolder) tempfastafile = destfolder + '/refsequence.fa' shutil.copyfile(refsequencefile, tempfastafile) ImpUtils.RunConvertor(calculationObject, 'Fasta2FilterBankData', destfolder, ['refsequence.fa']) else: calculationObject.Log( 'WARNING: missing reference sequence file') else: calculationObject.Log( 'WARNING: Skipping converting reference genome') # Import chromosomes with calculationObject.LogHeader('Loading chromosomes'): tb = VTTable.VTTable() tb.allColumnsText = True try: tb.LoadFile(os.path.join(folder, 'chromosomes')) except Exception as e: raise Exception('Error while reading chromosomes file: ' + str(e)) tb.RequireColumnSet(['chrom', 'length']) tb.RenameCol('chrom', 'id') tb.RenameCol('length', 'len') tb.ConvertColToValue('len') with calculationObject.LogDataDump(): tb.PrintRows(0, 99) sqlfile = ImpUtils.GetTempFileName() tb.SaveSQLDump(sqlfile, 'chromosomes') ImpUtils.ExecuteSQL(calculationObject, datasetId, 'DELETE FROM chromosomes') ImpUtils.ExecuteSQLScript(calculationObject, sqlfile, datasetId) os.remove(sqlfile) if importSettings['ScopeStr'] == 'all': # Import annotation with calculationObject.LogHeader('Converting annotation'): tempgfffile = ImpUtils.GetTempFileName() temppath = os.path.dirname(tempgfffile) shutil.copyfile(os.path.join(folder, 'annotation.gff'), tempgfffile) ImpUtils.RunConvertor(calculationObject, 'ParseGFF', temppath, [os.path.basename(tempgfffile)]) print('Importing annotation') ImpUtils.ExecuteSQLScript( calculationObject, os.path.join(temppath, 'annotation_dump.sql'), datasetId) os.remove(tempgfffile) os.remove(os.path.join(temppath, 'annotation.txt')) os.remove(os.path.join(temppath, 'annotation_dump.sql')) os.remove(os.path.join(temppath, 'annotation_create.sql')) else: calculationObject.Log('WARNING: Skipping converting annotation')