def response(returndata): filename = os.path.join(config.BASEDIR, 'Uploads', returndata['fileid']) tb = VTTable.VTTable() tb.allColumnsText = True try: tb.LoadFile(filename, 100) columns = tb.GetColList() returndata['columns'] = ';'.join(columns) except Exception as e: returndata['Error'] = str(e) return returndata return returndata
basepath = '.' #============= FAKE STUFF FOR DEBUGGING; REMOVE FOR PRODUCTION ============== if True: basepath = '/Users/pvaut/Documents/Genome/SnpCrossData3' sys.argv = ['', 'genomeregions.bed'] #============= END OF FAKE STUFF ============================================ if len(sys.argv) < 2: print('Usage: COMMAND BEDFileName') sys.exit() sourcefile = sys.argv[1] tb = VTTable.VTTable() tb.AddColumn(VTTable.VTColumn('chromid', 'Text')) tb.AddColumn(VTTable.VTColumn('fstart', 'Value')) tb.AddColumn(VTTable.VTColumn('fend', 'Value')) tb.AddColumn(VTTable.VTColumn('fname', 'Text')) tb.AddColumn(VTTable.VTColumn('fid', 'Text')) tb.AddColumn(VTTable.VTColumn('ftype', 'Text')) tb.AddColumn(VTTable.VTColumn('fparentid', 'Text')) tb.AddColumn(VTTable.VTColumn('fnames', 'Text')) tb.AddColumn(VTTable.VTColumn('descr', 'Text')) nr = 0 with open(basepath + '/' + sourcefile, 'r') as fp: for line in fp: tokens = line.rstrip('\r\n').split('\t') print(str(tokens))
if len(sys.argv)<2: print('Usage: COMMAND GFFFileName') sys.exit() sourcefile = sys.argv[1] filelist=['{0}/{1}'.format(basepath,sourcefile)] parser=GFFParser() #parser.targetfeaturelist=['gene','mRNA'] #parser.targetfeaturelist=['repeat_region','pseudogene','snRNA','tRNA','centromere','pseudogenic_exon','pseudogenic_transcript','rRNA','snoRNA','polypeptide_motif','ncRNA'] parser.parseGFF(filelist) parser.Process() parser.save('{0}/annotation.txt'.format(basepath)) tb = VTTable.VTTable() tb.allColumnsText = True tb.LoadFile(basepath+'/annotation.txt') tb.ConvertColToValue('fstart') tb.ConvertColToValue('fstop') #tb.CalcCol('fnames', lambda x: x, 'fname') #tb.CalcCol('descr', lambda: '') #tb.CalcCol('strand', lambda: '') print('DD>') tb.PrintRows(0,10) print('<DD') tb.SaveSQLCreation(basepath+'/annotation_create.sql','annotation') tb.SaveSQLDump(basepath+'/annotation_dump.sql','annotation')
# This file is part of Panoptes - (C) Copyright 2014, Paul Vauterin, Ben Jeffery, Alistair Miles <*****@*****.**> # This program is free software licensed under the GNU Affero General Public License. # You can find a copy of this license in LICENSE in the top directory of the source code or at <http://opensource.org/licenses/AGPL-3.0> from DQXTableUtils import VTTable import sys import math import random tb = VTTable.VTTable() tb.allColumnsText = True tb.LoadFile('/home/pvaut/Documents/Genome/PfPopgen30/snpInfo.tab') tb.DropCol('Num') tb.DropCol('Ref') tb.DropCol('Nonref') tb.DropCol('GeneId') tb.DropCol('Strand') tb.DropCol('CodonNum') tb.DropCol('Codon') tb.DropCol('NtPos') tb.DropCol('RefAmino') tb.DropCol('Mutation') tb.DropCol('MutCodon') tb.DropCol('MutAmino') tb.DropCol('MutType') tb.DropCol('MutName') tb.DropCol('GeneDescription') tb.ConvertColToValue('Pos') tb.RenameCol('Chr', 'chrom') tb.RenameCol('Pos', 'pos') tb.RenameCol('SnpName', 'snpid')
def replaceNA(vl): if vl == '#N/A': return None else: return vl def boolToVal(vl): if vl == 'TRUE': return 1 else: return 0 tb = VTTable.VTTable() tb.allColumnsText = True #tb.sepchar = ',' tb.LoadFile( '/home/pvaut/Documents/Genome/PfPopgen21/metadata-2.2_withsites.txt') #tb.SaveSQLCreation('/home/pvaut/Documents/Genome/PfPopgen30/snpinfo_create.sql','snpinfo') tb.DropCol('dummy') i = 0 while i < tb.GetRowCount(): sampleid = tb.GetValue(i, 0) if sampleid.find(' ') >= 0: tb.RemoveRow(i) else: i += 1
def LoadPropertyInfo(calculationObject, impSettings, datafile): calculationObject.Log('Determining properties') properties = [] propidMap = {} autoPrimKey = (impSettings.HasToken('PrimKey') and (impSettings['PrimKey'] == 'AutoKey')) if autoPrimKey: propid = impSettings['PrimKey'] property = {'propid': propid} propidMap[propid] = property properties.append(property) settings = SettingsLoader.SettingsLoader() settings.LoadDict({ 'Name': impSettings['PrimKey'], 'ShowInTable': True, 'DataType': 'Value', 'DecimDigits': 0 }) property['Settings'] = settings if impSettings.HasToken('Properties'): if not type(impSettings['Properties']) is list: raise Exception('Properties token should be a list') for propSource in impSettings['Properties']: if 'Id' not in propSource: raise Exception('Property is missing Id field') propids = propSource['Id'] for propid in propids.split(','): propid = propid.strip() if propid in propidMap: property = propidMap[propid] settings = property['Settings'] else: property = {'propid': propid} settings = SettingsLoader.SettingsLoader() settings.LoadDict({}) property['Settings'] = settings propidMap[propid] = property properties.append(property) DQXUtils.CheckValidIdentifier(propid) settings.AddDict(propSource) if (impSettings.HasToken('AutoScanProperties')) and ( impSettings['AutoScanProperties']): calculationObject.Log('Auto determining columns') tb = VTTable.VTTable() tb.allColumnsText = True try: tb.LoadFile(datafile, 9999) except Exception as e: raise Exception('Error while reading data file: ' + str(e)) with calculationObject.LogDataDump(): tb.PrintRows(0, 9) for propid in tb.GetColList(): if propid not in propidMap: property = {'propid': propid} colnr = tb.GetColNr(propid) cnt_tot = 0 cnt_isnumber = 0 cnt_isbool = 0 for rownr in tb.GetRowNrRange(): val = tb.GetValue(rownr, colnr) if val is not None: cnt_tot += 1 try: float(val) cnt_isnumber += 1 except ValueError: pass if val in [ 'True', 'true', 'TRUE', 'False', 'false', 'FALSE', '1', '0' ]: cnt_isbool += 1 property['DataType'] = 'Text' if (cnt_isnumber > 0.75 * cnt_tot) and (cnt_isnumber > cnt_isbool): property['DataType'] = 'Value' if (cnt_isbool == cnt_tot) and (cnt_isbool >= cnt_isnumber): property['DataType'] = 'Boolean' DQXUtils.CheckValidIdentifier(propid) settings = SettingsLoader.SettingsLoader() settings.LoadDict({}) settings.AddTokenIfMissing('Name', propid) settings.AddTokenIfMissing('DataType', property['DataType']) property['Settings'] = settings properties.append(property) propidMap[propid] = property for property in properties: settings = property['Settings'] settings.AddTokenIfMissing('Index', False) settings.AddTokenIfMissing('Search', 'None') settings.DefineKnownTokens([ 'isCategorical', 'minval', 'maxval', 'decimDigits', 'showInBrowser', 'showInTable', 'categoryColors', 'channelName', 'channelColor', 'connectLines', 'SummaryValues' ]) settings.RequireTokens(['DataType']) settings.ConvertToken_Boolean('isCategorical') if settings.HasToken('isCategorical') and settings['isCategorical']: settings.SetToken( 'Index', True) # Categorical data types are always indexed if settings.HasToken('Relation'): settings.SetToken( 'Index', True) # Relation child properties are always indexed if settings['Search'] not in [ 'None', 'StartPattern', 'Pattern', 'Match' ]: raise Exception( 'Property "Search" token should be None,StartPattern,Pattern,Match' ) if settings['Search'] in ['StartPattern', 'Pattern', 'Match']: settings.SetToken('Index', True) # Use index to speed up search settings.AddTokenIfMissing('Name', property['propid']) settings.AddTokenIfMissing('ReadData', True) settings.ConvertToken_Boolean('ReadData') settings.AddTokenIfMissing('CanUpdate', False) settings.ConvertToken_Boolean('CanUpdate') settings.ConvertStringsToSafeSQL() property['DataType'] = settings['DataType'] if len(properties) == 0: raise Exception( 'No properties defined. Use "AutoScanProperties: true" or "Properties" list to define' ) calculationObject.Log('Properties found:') with calculationObject.LogDataDump(): for property in properties: calculationObject.Log( str(property) + ' | ' + property['Settings'].ToJSON()) for property in properties: Utils.CheckSafeIdentifier(property['propid']) return properties
def LoadTable0(calculationObject, sourceFileName, databaseid, tableid, columns, loadSettings): calculationObject.Log('Loading table {0} from {1}'.format(tableid, sourceFileName)) colDict = {col['name']: col for col in columns} colNameList = [col['name'] for col in columns] print('Column info: '+str(columns)) tb = VTTable.VTTable() tb.allColumnsText = True try: tb.LoadFile(sourceFileName, loadSettings['MaxTableSize']) except Exception as e: raise Exception('Error while reading file: '+str(e)) calculationObject.Log('---- ORIG TABLE ----') with calculationObject.LogDataDump(): tb.PrintRows(0, 9) for col in columns: if not tb.IsColumnPresent(col['name']): raise Exception('Missing column "{0}" in datatable "{1}"'.format(col['name'], tableid)) if loadSettings['PrimKey'] not in colDict: raise Exception('Missing primary key column "{0}" in datatable "{1}"'.format(loadSettings['PrimKey'], tableid)) for col in tb.GetColList(): if col not in colDict: tb.DropCol(col) tb.ArrangeColumns(colNameList) for col in columns: colname = col['name'] if ImpUtils.IsValueDataTypeIdenfifier(col['DataType']): tb.ConvertColToValue(colname) if col['DataType'] == 'Boolean': tb.MapCol(colname, ImpUtils.convertToBooleanInt) tb.ConvertColToValue(colname) calculationObject.Log('---- PROCESSED TABLE ----') with calculationObject.LogDataDump(): tb.PrintRows(0, 9) createcmd = 'CREATE TABLE {0} ('.format(tableid) frst = True for col in columns: if not frst: createcmd += ', ' colname = col['name'] colnr = tb.GetColNr(colname) datatypestr = '' if col['DataType'] == 'Text': maxlength = 1 for rownr in tb.GetRowNrRange(): maxlength = max(maxlength, len(tb.GetValue(rownr, colnr))) datatypestr = 'varchar({0})'.format(maxlength) if len(datatypestr) == 0: datatypestr = ImpUtils.GetSQLDataType(col['DataType']) createcmd += colname + ' ' + datatypestr frst = False createcmd += ')' calculationObject.Log('Creating datatable') scr = ImpUtils.SQLScript(calculationObject) scr.AddCommand('drop table if exists {0}'.format(tableid)) scr.AddCommand(createcmd) scr.AddCommand('create unique index {0}_{1} ON {0}({1})'.format(tableid, loadSettings['PrimKey'])) scr.Execute(databaseid) calculationObject.Log('Loading datatable values') sqlfile = ImpUtils.GetTempFileName() tb.SaveSQLDump(sqlfile, tableid) ImpUtils.ExecuteSQLScript(calculationObject, sqlfile, databaseid) os.remove(sqlfile)
def ImportRefGenome(calculationObject, datasetId, folder, importSettings): with calculationObject.LogHeader('Importing reference genome data'): ImportRefGenomeSummaryData(calculationObject, datasetId, folder, importSettings) settings = SettingsLoader.SettingsLoader( os.path.join(folder, 'settings')) settings.DefineKnownTokens(['AnnotMaxViewportSize', 'RefSequenceSumm']) print('Settings: ' + str(settings.Get())) ImpUtils.ImportGlobalSettings(calculationObject, datasetId, settings) # Import reference genome if importSettings['ScopeStr'] == 'all': refsequencefile = os.path.join(folder, 'refsequence.fa') if os.path.exists(refsequencefile): with calculationObject.LogHeader( 'Converting reference genome'): destfolder = config.BASEDIR + '/SummaryTracks/' + datasetId + '/Sequence' if not os.path.exists(destfolder): os.makedirs(destfolder) tempfastafile = destfolder + '/refsequence.fa' shutil.copyfile(refsequencefile, tempfastafile) ImpUtils.RunConvertor(calculationObject, 'Fasta2FilterBankData', destfolder, ['refsequence.fa']) else: calculationObject.Log( 'WARNING: missing reference sequence file') else: calculationObject.Log( 'WARNING: Skipping converting reference genome') # Import chromosomes with calculationObject.LogHeader('Loading chromosomes'): tb = VTTable.VTTable() tb.allColumnsText = True try: tb.LoadFile(os.path.join(folder, 'chromosomes')) except Exception as e: raise Exception('Error while reading chromosomes file: ' + str(e)) tb.RequireColumnSet(['chrom', 'length']) tb.RenameCol('chrom', 'id') tb.RenameCol('length', 'len') tb.ConvertColToValue('len') with calculationObject.LogDataDump(): tb.PrintRows(0, 99) sqlfile = ImpUtils.GetTempFileName() tb.SaveSQLDump(sqlfile, 'chromosomes') ImpUtils.ExecuteSQL(calculationObject, datasetId, 'DELETE FROM chromosomes') ImpUtils.ExecuteSQLScript(calculationObject, sqlfile, datasetId) os.remove(sqlfile) if importSettings['ScopeStr'] == 'all': # Import annotation with calculationObject.LogHeader('Converting annotation'): tempgfffile = ImpUtils.GetTempFileName() temppath = os.path.dirname(tempgfffile) shutil.copyfile(os.path.join(folder, 'annotation.gff'), tempgfffile) ImpUtils.RunConvertor(calculationObject, 'ParseGFF', temppath, [os.path.basename(tempgfffile)]) print('Importing annotation') ImpUtils.ExecuteSQLScript( calculationObject, os.path.join(temppath, 'annotation_dump.sql'), datasetId) os.remove(tempgfffile) os.remove(os.path.join(temppath, 'annotation.txt')) os.remove(os.path.join(temppath, 'annotation_dump.sql')) os.remove(os.path.join(temppath, 'annotation_create.sql')) else: calculationObject.Log('WARNING: Skipping converting annotation')