예제 #1
0
def ImportWorkspace(calculationObject, datasetId, workspaceid, folder,
                    importSettings):
    Utils.CheckSafeIdentifier(workspaceid)
    with calculationObject.LogHeader('Importing workspace {0}.{1}'.format(
            datasetId, workspaceid)):
        print('Source directory: ' + folder)
        settings = SettingsLoader.SettingsLoader(
            os.path.join(folder, 'settings'))
        settings.RequireTokens(['Name'])
        print(settings.ToJSON())
        workspaceName = settings['Name']

        if not ImpUtils.IsDatasetPresentInServer(
                calculationObject.credentialInfo, datasetId):
            raise Exception(
                'Dataset {0} is not found. Please import the dataset first'.
                format(datasetId))

        db = DQXDbTools.OpenDatabase(calculationObject.credentialInfo,
                                     datasetId)
        cur = db.cursor()

        def execSQL(cmd):
            calculationObject.LogSQLCommand(cmd)
            cur.execute(cmd)

        cur.execute('SELECT id, primkey, settings FROM tablecatalog')
        tables = [{
            'id': row[0],
            'primkey': row[1],
            'settingsStr': row[2]
        } for row in cur.fetchall()]
        tableMap = {table['id']: table for table in tables}

        for table in tables:
            tableSettings = SettingsLoader.SettingsLoader()
            tableSettings.LoadDict(
                simplejson.loads(table['settingsStr'], strict=False))
            table['settings'] = tableSettings

        if not importSettings['ConfigOnly']:
            for table in tables:
                tableid = table['id']
                print('Re-creating custom data table for ' + tableid)
                execSQL("DROP TABLE IF EXISTS {0}".format(
                    Utils.GetTableWorkspaceProperties(workspaceid, tableid)))
                execSQL(
                    "CREATE TABLE {0} (StoredSelection TINYINT DEFAULT 0) AS SELECT {1} FROM {2}"
                    .format(
                        Utils.GetTableWorkspaceProperties(
                            workspaceid, tableid), table['primkey'], tableid))
                execSQL("create unique index {1} on {0}({1})".format(
                    Utils.GetTableWorkspaceProperties(workspaceid, tableid),
                    table['primkey']))
                execSQL(
                    "create index idx_StoredSelection on {0}(StoredSelection)".
                    format(
                        Utils.GetTableWorkspaceProperties(
                            workspaceid, tableid)))

        print('Removing existing workspace properties')
        execSQL("DELETE FROM propertycatalog WHERE workspaceid='{0}'".format(
            workspaceid))

        calculationObject.Log('Creating StoredSelection columns')
        for table in tables:
            tableid = table['id']
            sett = '{"CanUpdate": true, "Index": false, "ReadData": false, "showInTable": false, "Search":"None" }'
            cmd = "INSERT INTO propertycatalog VALUES ('{0}', 'custom', 'Boolean', 'StoredSelection', '{1}', 'Stored selection', 0, '{2}')".format(
                workspaceid, tableid, sett)
            execSQL(cmd)

        print('Re-creating workspaces record')
        execSQL("DELETE FROM workspaces WHERE id='{0}'".format(workspaceid))
        execSQL('INSERT INTO workspaces VALUES ("{0}","{1}")'.format(
            workspaceid, workspaceName))
        if not importSettings['ConfigOnly']:
            print('Updating views')
            for table in tables:
                Utils.UpdateTableInfoView(
                    workspaceid, table['id'],
                    table['settings']['AllowSubSampling'], cur)

        db.commit()
        db.close()

        print('Scanning for custom data')
        if os.path.exists(os.path.join(folder, 'customdata')):
            for tableid in os.listdir(os.path.join(folder, 'customdata')):
                if os.path.isdir(os.path.join(folder, 'customdata', tableid)):
                    if not tableid in tableMap:
                        raise Exception('Invalid table id ' + tableid)

                    #Read optional settings file
                    customdatalist = None
                    settingsfilename = os.path.join(folder, 'customdata',
                                                    tableid, 'settings')
                    if os.path.isfile(settingsfilename):
                        with open(settingsfilename) as settingsfile:
                            cSettings = SettingsLoader.SettingsLoader(
                                settingsfilename)
                            if cSettings.HasToken('CustomData'):
                                if not type(cSettings['CustomData']) is list:
                                    raise Exception(
                                        'CustomData token should be a list')
                                customdatalist = cSettings['CustomData']
                                print('Custom data list taken from settings')
                    if customdatalist is None:  # Alternatively, just use list of folders
                        customdatalist = []
                        for customid in os.listdir(
                                os.path.join(folder, 'customdata', tableid)):
                            if os.path.isdir(
                                    os.path.join(folder, 'customdata', tableid,
                                                 customid)):
                                customdatalist.append(customid)
                        print('Custom data list taken from folders')
                    print('Custom data list: ' + str(customdatalist))
                    for customid in customdatalist:
                        ImportCustomData(
                            calculationObject, datasetId, workspaceid, tableid,
                            customid,
                            os.path.join(folder, 'customdata', tableid,
                                         customid), importSettings)

        else:
            print('Directory not present')
예제 #2
0
def ImportCustomData(calculationObject, datasetId, workspaceid, tableid,
                     sourceid, folder, importSettings):

    with calculationObject.LogHeader('Importing custom data'):
        print('Importing custom data into {0}, {1}, {2} FROM {3}'.format(
            datasetId, workspaceid, tableid, folder))

        credInfo = calculationObject.credentialInfo

        if not ImpUtils.IsDatasetPresentInServer(
                calculationObject.credentialInfo, datasetId):
            raise Exception(
                'Dataset {0} is not found. Please import the dataset first'.
                format(datasetId))

        db = DQXDbTools.OpenDatabase(credInfo, datasetId)
        calculationObject.credentialInfo.VerifyCanDo(
            DQXDbTools.DbOperationWrite(datasetId, 'propertycatalog'))
        calculationObject.credentialInfo.VerifyCanDo(
            DQXDbTools.DbOperationWrite(datasetId, 'workspaces'))

        cur = db.cursor()
        cur.execute(
            'SELECT primkey, settings FROM tablecatalog WHERE id="{0}"'.format(
                tableid))
        row = cur.fetchone()
        if row is None:
            raise Exception(
                'Unable to find table record for table {0} in dataset {1}'.
                format(tableid, datasetId))
        primkey = row[0]
        tableSettingsStr = row[1]
        db.close()

        ImpUtils.ExecuteSQL(
            calculationObject, datasetId,
            'DELETE FROM customdatacatalog WHERE tableid="{tableid}" and sourceid="{sourceid}"'
            .format(tableid=tableid, sourceid=sourceid))

        tableSettings = SettingsLoader.SettingsLoader()
        tableSettings.LoadDict(simplejson.loads(tableSettingsStr,
                                                strict=False))

        allowSubSampling = tableSettings['AllowSubSampling']

        isPositionOnGenome = False
        if tableSettings.HasToken(
                'IsPositionOnGenome') and tableSettings['IsPositionOnGenome']:
            isPositionOnGenome = True
            chromField = tableSettings['Chromosome']
            posField = tableSettings['Position']

        settings = SettingsLoader.SettingsLoader(
            os.path.join(os.path.join(folder, 'settings')))

        ImpUtils.ExecuteSQL(
            calculationObject, datasetId,
            "INSERT INTO customdatacatalog VALUES ('{tableid}', '{sourceid}', '{settings}')"
            .format(tableid=tableid,
                    sourceid=sourceid,
                    settings=settings.ToJSON()))

        properties = ImpUtils.LoadPropertyInfo(calculationObject, settings,
                                               os.path.join(folder, 'data'))

        # remove primary key, just in case
        properties = [prop for prop in properties if prop['propid'] != primkey]

        sourcetable = Utils.GetTableWorkspaceProperties(workspaceid, tableid)
        print('Source table: ' + sourcetable)

        #Get list of existing properties
        db = DQXDbTools.OpenDatabase(calculationObject.credentialInfo,
                                     datasetId)
        cur = db.cursor()
        cur.execute(
            'SELECT propid FROM propertycatalog WHERE (workspaceid="{0}") and (source="custom") and (tableid="{1}")'
            .format(workspaceid, tableid))
        existingProperties = [row[0] for row in cur.fetchall()]
        print('Existing properties: ' + str(existingProperties))
        db.close()

        propidList = []
        for property in properties:
            DQXUtils.CheckValidIdentifier(property['propid'])
            propidList.append(property['propid'])

        db = DQXDbTools.OpenDatabase(calculationObject.credentialInfo,
                                     datasetId)
        cur = db.cursor()

        if not importSettings['ConfigOnly']:
            # Dropping columns that will be replaced
            toRemoveExistingProperties = []
            for existProperty in existingProperties:
                if existProperty in propidList:
                    toRemoveExistingProperties.append(existProperty)
            print('Removing outdated information:')
            if len(toRemoveExistingProperties) > 0:
                for prop in toRemoveExistingProperties:
                    print('Removing outdated information: {0} {1} {2}'.format(
                        workspaceid, prop, tableid))
                    sql = 'DELETE FROM propertycatalog WHERE (workspaceid="{0}") and (propid="{1}") and (tableid="{2}")'.format(
                        workspaceid, prop, tableid)
                    print(sql)
                    cur.execute(sql)
                sql = "ALTER TABLE {0} ".format(sourcetable)
                for prop in toRemoveExistingProperties:
                    if prop != toRemoveExistingProperties[0]:
                        sql += ", "
                    sql += "DROP COLUMN {0}".format(prop)
                calculationObject.LogSQLCommand(sql)
                cur.execute(sql)

        ranknr = 0
        for property in properties:
            propid = property['propid']
            settings = property['Settings']
            extraSettings = settings.Clone()
            extraSettings.DropTokens(
                ['Name', 'DataType', 'Order', 'SummaryValues'])
            print('Create property catalog entry for {0} {1} {2}'.format(
                workspaceid, tableid, propid))
            sql = "DELETE FROM propertycatalog WHERE (workspaceid='{0}') and (propid='{1}') and (tableid='{2}')".format(
                workspaceid, propid, tableid)
            ImpUtils.ExecuteSQL(calculationObject, datasetId, sql)
            sql = "INSERT INTO propertycatalog VALUES ('{0}', 'custom', '{1}', '{2}', '{3}', '{4}', {5}, '{6}')".format(
                workspaceid, settings['DataType'], propid, tableid,
                settings['Name'], 0, extraSettings.ToJSON())
            ImpUtils.ExecuteSQL(calculationObject, datasetId, sql)
            ranknr += 1

        propDict = {}
        for property in properties:
            propDict[property['propid']] = property

        if not importSettings['ConfigOnly']:
            tmptable = Utils.GetTempID()
            columns = [{
                'name': prop['propid'],
                'DataType': prop['DataType'],
                'Index': prop['Settings']['Index'],
                'ReadData': prop['Settings']['ReadData']
            } for prop in properties]
            columns.append({
                'name': primkey,
                'DataType': 'Text',
                'Index': False,
                'ReadData': True
            })
            # print('----------------------------------------------------------------')
            # print(str(columns))
            # print('----------------------------------------------------------------')
            LoadTable.LoadTable(calculationObject,
                                os.path.join(folder, 'data'), datasetId,
                                tmptable, columns, {'PrimKey': primkey},
                                importSettings, False)

            print('Creating new columns')
            calculationObject.Log(
                'WARNING: better mechanism to determine column types needed here'
            )  #TODO: implement
            frst = True
            sql = "ALTER TABLE {0} ".format(sourcetable)
            for property in properties:
                propid = property['propid']
                if not frst:
                    sql += " ,"
                sqldatatype = ImpUtils.GetSQLDataType(property['DataType'])
                sql += "ADD COLUMN {0} {1}".format(propid, sqldatatype)
                frst = False
                calculationObject.LogSQLCommand(sql)
            cur.execute(sql)

            print('Joining information')
            frst = True
            credInfo.VerifyCanDo(
                DQXDbTools.DbOperationWrite(datasetId, sourcetable))
            sql = "update {0} left join {1} on {0}.{2}={1}.{2} set ".format(
                sourcetable, tmptable, primkey)
            for property in properties:
                propid = property['propid']
                if not frst:
                    sql += " ,"
                sql += "{0}.{2}={1}.{2}".format(sourcetable, tmptable, propid)
                frst = False
                calculationObject.LogSQLCommand(sql)
            cur.execute(sql)

            print('Cleaning up')
            cur.execute("DROP TABLE {0}".format(tmptable))

        if not importSettings['ConfigOnly']:
            print('Updating view')
            Utils.UpdateTableInfoView(workspaceid, tableid, allowSubSampling,
                                      cur)

        db.commit()
        db.close()

        print('Creating summary values')
        for property in properties:
            propid = property['propid']
            settings = property['Settings']
            if settings.HasToken('SummaryValues'):
                with calculationObject.LogHeader(
                        'Creating summary values for custom data {0}'.format(
                            tableid)):
                    summSettings = settings.GetSubSettings('SummaryValues')
                    if settings.HasToken('minval'):
                        summSettings.AddTokenIfMissing('MinVal',
                                                       settings['minval'])
                    summSettings.AddTokenIfMissing('MaxVal',
                                                   settings['maxval'])
                    destFolder = os.path.join(config.BASEDIR, 'SummaryTracks',
                                              datasetId, propid)
                    if not os.path.exists(destFolder):
                        os.makedirs(destFolder)
                    dataFileName = os.path.join(destFolder, propid)

                    if not isPositionOnGenome:
                        raise Exception(
                            'Summary values defined for non-position table')

                    if not importSettings['ConfigOnly']:
                        calculationObject.Log('Extracting data to ' +
                                              dataFileName)
                        script = ImpUtils.SQLScript(calculationObject)
                        script.AddCommand(
                            "SELECT {2} as chrom, {3} as pos, {0} FROM {1} ORDER BY {2},{3}"
                            .format(
                                propid,
                                Utils.GetTableWorkspaceView(
                                    workspaceid, tableid), chromField,
                                posField))
                        script.Execute(datasetId, dataFileName)
                        calculationObject.LogFileTop(dataFileName, 10)

                    ImpUtils.CreateSummaryValues_Value(
                        calculationObject, summSettings, datasetId, tableid,
                        'custom', workspaceid, propid, settings['Name'],
                        dataFileName, importSettings)