Python VTTable 예제들, DQXTableUtils.VTTable Python 예제들

예제 #1

0

파일 보기

def response(returndata):

    filename = os.path.join(config.BASEDIR, 'Uploads', returndata['fileid'])
    tb = VTTable.VTTable()
    tb.allColumnsText = True
    try:
        tb.LoadFile(filename, 100)
        columns = tb.GetColList()
        returndata['columns'] = ';'.join(columns)
    except Exception as e:
        returndata['Error'] = str(e)
        return returndata

    return returndata

예제 #2

0

파일 보기

파일: ParseBED.py 프로젝트: sanger-pathogens/panoptes

basepath = '.'

#============= FAKE STUFF FOR DEBUGGING; REMOVE FOR PRODUCTION ==============
if True:
    basepath = '/Users/pvaut/Documents/Genome/SnpCrossData3'
    sys.argv = ['', 'genomeregions.bed']
#============= END OF FAKE STUFF ============================================

if len(sys.argv) < 2:
    print('Usage: COMMAND BEDFileName')
    sys.exit()

sourcefile = sys.argv[1]

tb = VTTable.VTTable()
tb.AddColumn(VTTable.VTColumn('chromid', 'Text'))
tb.AddColumn(VTTable.VTColumn('fstart', 'Value'))
tb.AddColumn(VTTable.VTColumn('fend', 'Value'))
tb.AddColumn(VTTable.VTColumn('fname', 'Text'))
tb.AddColumn(VTTable.VTColumn('fid', 'Text'))
tb.AddColumn(VTTable.VTColumn('ftype', 'Text'))
tb.AddColumn(VTTable.VTColumn('fparentid', 'Text'))
tb.AddColumn(VTTable.VTColumn('fnames', 'Text'))
tb.AddColumn(VTTable.VTColumn('descr', 'Text'))

nr = 0
with open(basepath + '/' + sourcefile, 'r') as fp:
    for line in fp:
        tokens = line.rstrip('\r\n').split('\t')
        print(str(tokens))

예제 #3

0

파일 보기

파일: ParseGFF_old.py 프로젝트: sanger-pathogens/panoptes


if len(sys.argv)<2:
    print('Usage: COMMAND GFFFileName')
    sys.exit()

sourcefile = sys.argv[1]


filelist=['{0}/{1}'.format(basepath,sourcefile)]
parser=GFFParser()
#parser.targetfeaturelist=['gene','mRNA']
#parser.targetfeaturelist=['repeat_region','pseudogene','snRNA','tRNA','centromere','pseudogenic_exon','pseudogenic_transcript','rRNA','snoRNA','polypeptide_motif','ncRNA']
parser.parseGFF(filelist)
parser.Process()
parser.save('{0}/annotation.txt'.format(basepath))

tb = VTTable.VTTable()
tb.allColumnsText = True
tb.LoadFile(basepath+'/annotation.txt')
tb.ConvertColToValue('fstart')
tb.ConvertColToValue('fstop')
#tb.CalcCol('fnames', lambda x: x, 'fname')
#tb.CalcCol('descr', lambda: '')
#tb.CalcCol('strand', lambda: '')
print('DD>')
tb.PrintRows(0,10)
print('<DD')
tb.SaveSQLCreation(basepath+'/annotation_create.sql','annotation')
tb.SaveSQLDump(basepath+'/annotation_dump.sql','annotation')

예제 #4

0

파일 보기

파일: CreateRandomSnpProperties.py 프로젝트: TAlexPerkins/panoptes

# This file is part of Panoptes - (C) Copyright 2014, Paul Vauterin, Ben Jeffery, Alistair Miles <*****@*****.**>
# This program is free software licensed under the GNU Affero General Public License.
# You can find a copy of this license in LICENSE in the top directory of the source code or at <http://opensource.org/licenses/AGPL-3.0>

from DQXTableUtils import VTTable
import sys
import math
import random

tb = VTTable.VTTable()
tb.allColumnsText = True
tb.LoadFile('/home/pvaut/Documents/Genome/PfPopgen30/snpInfo.tab')
tb.DropCol('Num')
tb.DropCol('Ref')
tb.DropCol('Nonref')
tb.DropCol('GeneId')
tb.DropCol('Strand')
tb.DropCol('CodonNum')
tb.DropCol('Codon')
tb.DropCol('NtPos')
tb.DropCol('RefAmino')
tb.DropCol('Mutation')
tb.DropCol('MutCodon')
tb.DropCol('MutAmino')
tb.DropCol('MutType')
tb.DropCol('MutName')
tb.DropCol('GeneDescription')
tb.ConvertColToValue('Pos')
tb.RenameCol('Chr', 'chrom')
tb.RenameCol('Pos', 'pos')
tb.RenameCol('SnpName', 'snpid')

예제 #5

0

파일 보기

def replaceNA(vl):
    if vl == '#N/A':
        return None
    else:
        return vl


def boolToVal(vl):
    if vl == 'TRUE':
        return 1
    else:
        return 0


tb = VTTable.VTTable()
tb.allColumnsText = True
#tb.sepchar = ','
tb.LoadFile(
    '/home/pvaut/Documents/Genome/PfPopgen21/metadata-2.2_withsites.txt')
#tb.SaveSQLCreation('/home/pvaut/Documents/Genome/PfPopgen30/snpinfo_create.sql','snpinfo')

tb.DropCol('dummy')

i = 0
while i < tb.GetRowCount():
    sampleid = tb.GetValue(i, 0)
    if sampleid.find(' ') >= 0:
        tb.RemoveRow(i)
    else:
        i += 1

예제 #6

0

파일 보기

파일: ImpUtils.py 프로젝트: TAlexPerkins/panoptes

def LoadPropertyInfo(calculationObject, impSettings, datafile):
    calculationObject.Log('Determining properties')
    properties = []
    propidMap = {}

    autoPrimKey = (impSettings.HasToken('PrimKey')
                   and (impSettings['PrimKey'] == 'AutoKey'))

    if autoPrimKey:
        propid = impSettings['PrimKey']
        property = {'propid': propid}
        propidMap[propid] = property
        properties.append(property)
        settings = SettingsLoader.SettingsLoader()
        settings.LoadDict({
            'Name': impSettings['PrimKey'],
            'ShowInTable': True,
            'DataType': 'Value',
            'DecimDigits': 0
        })
        property['Settings'] = settings

    if impSettings.HasToken('Properties'):
        if not type(impSettings['Properties']) is list:
            raise Exception('Properties token should be a list')
        for propSource in impSettings['Properties']:
            if 'Id' not in propSource:
                raise Exception('Property is missing Id field')
            propids = propSource['Id']
            for propid in propids.split(','):
                propid = propid.strip()
                if propid in propidMap:
                    property = propidMap[propid]
                    settings = property['Settings']
                else:
                    property = {'propid': propid}
                    settings = SettingsLoader.SettingsLoader()
                    settings.LoadDict({})
                    property['Settings'] = settings
                    propidMap[propid] = property
                    properties.append(property)
                DQXUtils.CheckValidIdentifier(propid)
                settings.AddDict(propSource)

    if (impSettings.HasToken('AutoScanProperties')) and (
            impSettings['AutoScanProperties']):
        calculationObject.Log('Auto determining columns')
        tb = VTTable.VTTable()
        tb.allColumnsText = True
        try:
            tb.LoadFile(datafile, 9999)
        except Exception as e:
            raise Exception('Error while reading data file: ' + str(e))
        with calculationObject.LogDataDump():
            tb.PrintRows(0, 9)
        for propid in tb.GetColList():
            if propid not in propidMap:
                property = {'propid': propid}
                colnr = tb.GetColNr(propid)
                cnt_tot = 0
                cnt_isnumber = 0
                cnt_isbool = 0
                for rownr in tb.GetRowNrRange():
                    val = tb.GetValue(rownr, colnr)
                    if val is not None:
                        cnt_tot += 1
                        try:
                            float(val)
                            cnt_isnumber += 1
                        except ValueError:
                            pass
                        if val in [
                                'True', 'true', 'TRUE', 'False', 'false',
                                'FALSE', '1', '0'
                        ]:
                            cnt_isbool += 1

                property['DataType'] = 'Text'
                if (cnt_isnumber > 0.75 * cnt_tot) and (cnt_isnumber >
                                                        cnt_isbool):
                    property['DataType'] = 'Value'
                if (cnt_isbool == cnt_tot) and (cnt_isbool >= cnt_isnumber):
                    property['DataType'] = 'Boolean'

                DQXUtils.CheckValidIdentifier(propid)
                settings = SettingsLoader.SettingsLoader()
                settings.LoadDict({})
                settings.AddTokenIfMissing('Name', propid)
                settings.AddTokenIfMissing('DataType', property['DataType'])
                property['Settings'] = settings
                properties.append(property)
                propidMap[propid] = property

    for property in properties:
        settings = property['Settings']
        settings.AddTokenIfMissing('Index', False)
        settings.AddTokenIfMissing('Search', 'None')
        settings.DefineKnownTokens([
            'isCategorical', 'minval', 'maxval', 'decimDigits',
            'showInBrowser', 'showInTable', 'categoryColors', 'channelName',
            'channelColor', 'connectLines', 'SummaryValues'
        ])
        settings.RequireTokens(['DataType'])
        settings.ConvertToken_Boolean('isCategorical')
        if settings.HasToken('isCategorical') and settings['isCategorical']:
            settings.SetToken(
                'Index', True)  # Categorical data types are always indexed
        if settings.HasToken('Relation'):
            settings.SetToken(
                'Index', True)  # Relation child properties are always indexed
        if settings['Search'] not in [
                'None', 'StartPattern', 'Pattern', 'Match'
        ]:
            raise Exception(
                'Property "Search" token should be None,StartPattern,Pattern,Match'
            )
        if settings['Search'] in ['StartPattern', 'Pattern', 'Match']:
            settings.SetToken('Index', True)  # Use index to speed up search
        settings.AddTokenIfMissing('Name', property['propid'])
        settings.AddTokenIfMissing('ReadData', True)
        settings.ConvertToken_Boolean('ReadData')
        settings.AddTokenIfMissing('CanUpdate', False)
        settings.ConvertToken_Boolean('CanUpdate')
        settings.ConvertStringsToSafeSQL()
        property['DataType'] = settings['DataType']

    if len(properties) == 0:
        raise Exception(
            'No properties defined. Use "AutoScanProperties: true" or "Properties" list to define'
        )

    calculationObject.Log('Properties found:')
    with calculationObject.LogDataDump():
        for property in properties:
            calculationObject.Log(
                str(property) + ' | ' + property['Settings'].ToJSON())
    for property in properties:
        Utils.CheckSafeIdentifier(property['propid'])
    return properties

예제 #7

0

파일 보기

def LoadTable0(calculationObject, sourceFileName, databaseid, tableid, columns, loadSettings):

    calculationObject.Log('Loading table {0} from {1}'.format(tableid, sourceFileName))

    colDict = {col['name']: col for col in columns}
    colNameList = [col['name'] for col in columns]
    print('Column info: '+str(columns))


    tb = VTTable.VTTable()
    tb.allColumnsText = True
    try:
        tb.LoadFile(sourceFileName, loadSettings['MaxTableSize'])
    except Exception as e:
        raise Exception('Error while reading file: '+str(e))
    calculationObject.Log('---- ORIG TABLE ----')
    with calculationObject.LogDataDump():
        tb.PrintRows(0, 9)

    for col in columns:
        if not tb.IsColumnPresent(col['name']):
            raise Exception('Missing column "{0}" in datatable "{1}"'.format(col['name'], tableid))

    if loadSettings['PrimKey'] not in colDict:
        raise Exception('Missing primary key column "{0}" in datatable "{1}"'.format(loadSettings['PrimKey'], tableid))

    for col in tb.GetColList():
        if col not in colDict:
            tb.DropCol(col)
    tb.ArrangeColumns(colNameList)
    for col in columns:
        colname = col['name']
        if ImpUtils.IsValueDataTypeIdenfifier(col['DataType']):
            tb.ConvertColToValue(colname)
        if col['DataType'] == 'Boolean':
            tb.MapCol(colname, ImpUtils.convertToBooleanInt)
            tb.ConvertColToValue(colname)
    calculationObject.Log('---- PROCESSED TABLE ----')
    with calculationObject.LogDataDump():
        tb.PrintRows(0, 9)

    createcmd = 'CREATE TABLE {0} ('.format(tableid)
    frst = True
    for col in columns:
        if not frst:
            createcmd += ', '
        colname = col['name']
        colnr = tb.GetColNr(colname)
        datatypestr = ''
        if col['DataType'] == 'Text':
            maxlength = 1
            for rownr in tb.GetRowNrRange():
                maxlength = max(maxlength, len(tb.GetValue(rownr, colnr)))
            datatypestr = 'varchar({0})'.format(maxlength)
        if len(datatypestr) == 0:
            datatypestr = ImpUtils.GetSQLDataType(col['DataType'])
        createcmd += colname + ' ' + datatypestr
        frst = False
    createcmd += ')'

    calculationObject.Log('Creating datatable')
    scr = ImpUtils.SQLScript(calculationObject)
    scr.AddCommand('drop table if exists {0}'.format(tableid))
    scr.AddCommand(createcmd)
    scr.AddCommand('create unique index {0}_{1} ON {0}({1})'.format(tableid, loadSettings['PrimKey']))
    scr.Execute(databaseid)

    calculationObject.Log('Loading datatable values')
    sqlfile = ImpUtils.GetTempFileName()
    tb.SaveSQLDump(sqlfile, tableid)
    ImpUtils.ExecuteSQLScript(calculationObject, sqlfile, databaseid)
    os.remove(sqlfile)

예제 #8

0

파일 보기

def ImportRefGenome(calculationObject, datasetId, folder, importSettings):
    with calculationObject.LogHeader('Importing reference genome data'):

        ImportRefGenomeSummaryData(calculationObject, datasetId, folder,
                                   importSettings)

        settings = SettingsLoader.SettingsLoader(
            os.path.join(folder, 'settings'))
        settings.DefineKnownTokens(['AnnotMaxViewportSize', 'RefSequenceSumm'])
        print('Settings: ' + str(settings.Get()))
        ImpUtils.ImportGlobalSettings(calculationObject, datasetId, settings)

        # Import reference genome
        if importSettings['ScopeStr'] == 'all':
            refsequencefile = os.path.join(folder, 'refsequence.fa')
            if os.path.exists(refsequencefile):
                with calculationObject.LogHeader(
                        'Converting reference genome'):
                    destfolder = config.BASEDIR + '/SummaryTracks/' + datasetId + '/Sequence'
                    if not os.path.exists(destfolder):
                        os.makedirs(destfolder)
                    tempfastafile = destfolder + '/refsequence.fa'
                    shutil.copyfile(refsequencefile, tempfastafile)
                    ImpUtils.RunConvertor(calculationObject,
                                          'Fasta2FilterBankData', destfolder,
                                          ['refsequence.fa'])
            else:
                calculationObject.Log(
                    'WARNING: missing reference sequence file')
        else:
            calculationObject.Log(
                'WARNING: Skipping converting reference genome')

        # Import chromosomes
        with calculationObject.LogHeader('Loading chromosomes'):
            tb = VTTable.VTTable()
            tb.allColumnsText = True
            try:
                tb.LoadFile(os.path.join(folder, 'chromosomes'))
            except Exception as e:
                raise Exception('Error while reading chromosomes file: ' +
                                str(e))
            tb.RequireColumnSet(['chrom', 'length'])
            tb.RenameCol('chrom', 'id')
            tb.RenameCol('length', 'len')
            tb.ConvertColToValue('len')
            with calculationObject.LogDataDump():
                tb.PrintRows(0, 99)
            sqlfile = ImpUtils.GetTempFileName()
            tb.SaveSQLDump(sqlfile, 'chromosomes')
            ImpUtils.ExecuteSQL(calculationObject, datasetId,
                                'DELETE FROM chromosomes')
            ImpUtils.ExecuteSQLScript(calculationObject, sqlfile, datasetId)
            os.remove(sqlfile)

        if importSettings['ScopeStr'] == 'all':
            # Import annotation
            with calculationObject.LogHeader('Converting annotation'):
                tempgfffile = ImpUtils.GetTempFileName()
                temppath = os.path.dirname(tempgfffile)
                shutil.copyfile(os.path.join(folder, 'annotation.gff'),
                                tempgfffile)
                ImpUtils.RunConvertor(calculationObject, 'ParseGFF', temppath,
                                      [os.path.basename(tempgfffile)])
                print('Importing annotation')
                ImpUtils.ExecuteSQLScript(
                    calculationObject,
                    os.path.join(temppath, 'annotation_dump.sql'), datasetId)
                os.remove(tempgfffile)
                os.remove(os.path.join(temppath, 'annotation.txt'))
                os.remove(os.path.join(temppath, 'annotation_dump.sql'))
                os.remove(os.path.join(temppath, 'annotation_create.sql'))
        else:
            calculationObject.Log('WARNING: Skipping converting annotation')