예제 #1
0
def ImportDataSet(calculationObject, baseFolder, datasetId, importSettings):
    Utils.CheckSafeIdentifier(datasetId)
    with calculationObject.LogHeader(
            'Importing dataset {0}'.format(datasetId)):
        calculationObject.Log('Import settings: ' + str(importSettings))
        DQXUtils.CheckValidIdentifier(datasetId)
        datasetFolder = os.path.join(baseFolder, datasetId)
        indexDb = config.DB

        calculationObject.credentialInfo.VerifyCanDo(
            DQXDbTools.DbOperationWrite(indexDb, 'datasetindex'))
        calculationObject.credentialInfo.VerifyCanDo(
            DQXDbTools.DbOperationWrite(datasetId))

        # Remove current reference in the index first: if import fails, nothing will show up
        ImpUtils.ExecuteSQL(
            calculationObject, indexDb,
            'DELETE FROM datasetindex WHERE id="{0}"'.format(datasetId))

        globalSettings = SettingsLoader.SettingsLoader(
            os.path.join(datasetFolder, 'settings'))
        globalSettings.RequireTokens(['Name'])
        globalSettings.AddTokenIfMissing('Description', '')

        print('Global settings: ' + str(globalSettings.Get()))

        if not importSettings['ConfigOnly']:
            # Dropping existing database
            calculationObject.SetInfo('Dropping database')
            print('Dropping database')
            try:
                ImpUtils.ExecuteSQL(
                    calculationObject, indexDb,
                    'DROP DATABASE IF EXISTS {0}'.format(datasetId))
            except:
                pass
            ImpUtils.ExecuteSQL(calculationObject, indexDb,
                                'CREATE DATABASE {0}'.format(datasetId))

            # Creating new database
            scriptPath = os.path.dirname(os.path.realpath(__file__))
            calculationObject.SetInfo('Creating database')
            print('Creating new database')
            ImpUtils.ExecuteSQLScript(calculationObject,
                                      scriptPath + '/createdataset.sql',
                                      datasetId)

        ImpUtils.ExecuteSQL(calculationObject, datasetId,
                            'DELETE FROM propertycatalog')
        ImpUtils.ExecuteSQL(calculationObject, datasetId,
                            'DELETE FROM summaryvalues')
        ImpUtils.ExecuteSQL(calculationObject, datasetId,
                            'DELETE FROM tablecatalog')
        ImpUtils.ExecuteSQL(calculationObject, datasetId,
                            'DELETE FROM settings')
        ImpUtils.ExecuteSQL(calculationObject, datasetId,
                            'DELETE FROM customdatacatalog')

        datatables = []

        if globalSettings.HasToken('DataTables'):
            if not type(globalSettings['DataTables']) is list:
                raise Exception('DataTables token should be a list')
            datatables = globalSettings['DataTables']

        for dir in os.listdir(os.path.join(datasetFolder, 'datatables')):
            if os.path.isdir(os.path.join(datasetFolder, 'datatables', dir)):
                if dir not in datatables:
                    datatables.append(dir)
        print('Data tables: ' + str(datatables))
        for datatable in datatables:
            ImportDataTable.ImportDataTable(
                calculationObject, datasetId, datatable,
                os.path.join(datasetFolder, 'datatables', datatable),
                importSettings)

        try:
            datatables_2D = globalSettings['2D_DataTables']
        except KeyError:
            datatables_2D = []
        if type(datatables_2D) is not list:
            raise TypeError('2D_DataTables token should be a list')
        for datatable in datatables_2D:
            Import2DDataTable.ImportDataTable(
                calculationObject, datasetId, datatable,
                os.path.join(datasetFolder, '2D_datatables', datatable),
                importSettings)

        if os.path.exists(os.path.join(datasetFolder, 'refgenome')):
            ImportRefGenome.ImportRefGenome(
                calculationObject, datasetId,
                os.path.join(datasetFolder, 'refgenome'), importSettings)
            globalSettings.AddTokenIfMissing('hasGenomeBrowser', True)

        ImportWorkspaces.ImportWorkspaces(calculationObject, datasetFolder,
                                          datasetId, importSettings)

        # Global settings
        print('Defining global settings')
        ImpUtils.ImportGlobalSettings(calculationObject, datasetId,
                                      globalSettings)

        # Finalise: register dataset
        print('Registering dataset')
        importtime = 0
        if not importSettings['ConfigOnly']:
            importtime = time.time()
        ImpUtils.ExecuteSQL(
            calculationObject, indexDb,
            'INSERT INTO datasetindex VALUES ("{0}", "{1}", "{2}")'.format(
                datasetId, globalSettings['Name'], str(math.ceil(importtime))))
예제 #2
0
def ImportRefGenome(calculationObject, datasetId, folder, importSettings):
    with calculationObject.LogHeader('Importing reference genome data'):

        ImportRefGenomeSummaryData(calculationObject, datasetId, folder,
                                   importSettings)

        settings = SettingsLoader.SettingsLoader(
            os.path.join(folder, 'settings'))
        settings.DefineKnownTokens(['AnnotMaxViewportSize', 'RefSequenceSumm'])
        print('Settings: ' + str(settings.Get()))
        ImpUtils.ImportGlobalSettings(calculationObject, datasetId, settings)

        # Import reference genome
        if importSettings['ScopeStr'] == 'all':
            refsequencefile = os.path.join(folder, 'refsequence.fa')
            if os.path.exists(refsequencefile):
                with calculationObject.LogHeader(
                        'Converting reference genome'):
                    destfolder = config.BASEDIR + '/SummaryTracks/' + datasetId + '/Sequence'
                    if not os.path.exists(destfolder):
                        os.makedirs(destfolder)
                    tempfastafile = destfolder + '/refsequence.fa'
                    shutil.copyfile(refsequencefile, tempfastafile)
                    ImpUtils.RunConvertor(calculationObject,
                                          'Fasta2FilterBankData', destfolder,
                                          ['refsequence.fa'])
            else:
                calculationObject.Log(
                    'WARNING: missing reference sequence file')
        else:
            calculationObject.Log(
                'WARNING: Skipping converting reference genome')

        # Import chromosomes
        with calculationObject.LogHeader('Loading chromosomes'):
            tb = VTTable.VTTable()
            tb.allColumnsText = True
            try:
                tb.LoadFile(os.path.join(folder, 'chromosomes'))
            except Exception as e:
                raise Exception('Error while reading chromosomes file: ' +
                                str(e))
            tb.RequireColumnSet(['chrom', 'length'])
            tb.RenameCol('chrom', 'id')
            tb.RenameCol('length', 'len')
            tb.ConvertColToValue('len')
            with calculationObject.LogDataDump():
                tb.PrintRows(0, 99)
            sqlfile = ImpUtils.GetTempFileName()
            tb.SaveSQLDump(sqlfile, 'chromosomes')
            ImpUtils.ExecuteSQL(calculationObject, datasetId,
                                'DELETE FROM chromosomes')
            ImpUtils.ExecuteSQLScript(calculationObject, sqlfile, datasetId)
            os.remove(sqlfile)

        if importSettings['ScopeStr'] == 'all':
            # Import annotation
            with calculationObject.LogHeader('Converting annotation'):
                tempgfffile = ImpUtils.GetTempFileName()
                temppath = os.path.dirname(tempgfffile)
                shutil.copyfile(os.path.join(folder, 'annotation.gff'),
                                tempgfffile)
                ImpUtils.RunConvertor(calculationObject, 'ParseGFF', temppath,
                                      [os.path.basename(tempgfffile)])
                print('Importing annotation')
                ImpUtils.ExecuteSQLScript(
                    calculationObject,
                    os.path.join(temppath, 'annotation_dump.sql'), datasetId)
                os.remove(tempgfffile)
                os.remove(os.path.join(temppath, 'annotation.txt'))
                os.remove(os.path.join(temppath, 'annotation_dump.sql'))
                os.remove(os.path.join(temppath, 'annotation_create.sql'))
        else:
            calculationObject.Log('WARNING: Skipping converting annotation')