def testParseHeaderLinesNoTracks(self): self.assertEquals( ('track type', 'segments'), GSuiteParser._parseHeaderLine('##track type: segments\n')) self.assertEquals(('location', 'local'), GSuiteParser._parseHeaderLine('##location:Local\n')) self.assertEquals( ('file format', 'primary'), GSuiteParser._parseHeaderLine('##File format: primary\n')) self.assertEquals(('genome', 'HG18'), GSuiteParser._parseHeaderLine('##genome: HG18\n'))
def _validateGSuiteFile(galaxyTN): import gold.gsuite.GSuiteParser as GSuiteParser from quick.application.ExternalTrackManager import ExternalTrackManager from cStringIO import StringIO galaxyFn = ExternalTrackManager.extractFnFromGalaxyTN(galaxyTN) outFile = StringIO() ok = GSuiteParser.validate(galaxyFn, outFile=outFile, printHelpText=False) if not ok: return outFile.getvalue()
def testParseAndCompose(self): inputContents = \ '##location: multiple\n' \ '##file format: multiple\n' \ '##track type: unknown\n' \ '##genome: hg18\n' \ '###uri\ttitle\tfile_format\tcell\tantibody\textra\n' \ 'ftp://server.somewhere.com/path/to/file1.bed\tTrack1\tprimary\tk562\tcMyb\t.\n' \ 'http://server.other.com/path/to/file2.bed\tTrack2\tprimary\tGM12878\tcMyc\t.\n' \ 'https://server.other.com/path/to/file3.bed\tfile3.bed\tprimary\tGM12878\tcMyb\t.\n' \ 'rsync://server.other.com/path/to/file4;wig\tTrack4\tprimary\tNHFL\t.\t.\n' \ 'hb:/track/name/hierarchy\tTrack5\tpreprocessed\t.\t.\t.\n' \ 'galaxy:/ad123dd12fg;btrack?track=track%3Aname\tTrack6\tpreprocessed\tk562\tcMyb\t.\n' \ 'file:/path/to/file.btrack?track=track%3Aname\tTrack name7\tpreprocessed\t.\tcMyb\tyes\n' gSuite = GSuiteParser.parseLines(inputContents.split('\n')) outputContents = GSuiteComposer.composeToString(gSuite) self.assertEquals(inputContents, outputContents)
def testParseColSpecLineDirectlyMoreStdCols(self): colSpecLine = '###Uri\tTitle\tfile_format\ttrack_type\tgenome\tANTIBODY' self.assertEquals([ 'uri', 'title', 'file_format', 'track_type', 'genome', 'antibody' ], GSuiteParser._parseColumnSpecLine(colSpecLine))
def testParseColSpecLineDirectly(self): colSpecLine = '###uri\tantibody' self.assertEquals(['uri', 'antibody'], GSuiteParser._parseColumnSpecLine(colSpecLine))
def testParseTrackLineFull(self): contents = \ '###uri\ttitle\tcell\tantibody\n' \ 'ftp://server.somewhere.com/path/to/file1.bed.gz\tTrack\tk562\tcMyb\n' \ 'http://server.other.com/path/to/file2.bed?query=something\tTrack2\tGM12878\tcMyc\n' \ 'https://server.other.com/path/to/file3.bed?query=something\tTrack3\tGM12878\tcMyb\n' \ 'rsync://server.other.com/path/to/file4;wig\tTrack4\tNHFL\t.\n' \ 'hb:/track/name/hierarchy\tTrack (2)\t.\t.\n' \ 'galaxy:/ad123dd12fg;btrack?track=track:name\tTrack (3)\tk562\tcMyb\n' \ 'file:/path/to/file.btrack?track=track:name\tTrack name7\t.\tcMyb\n' gSuite = self._parseContents(contents) tracks = list(gSuite.allTracks()) self.assertEquals(7, len(tracks)) self._commonAssertTrack( tracks[0], uri='ftp://server.somewhere.com/path/to/file1.bed.gz', scheme='ftp', netloc='server.somewhere.com', path='/path/to/file1.bed.gz', query=None, suffix='gz', trackName=None, title='Track', location='remote', fileFormat='primary', trackType='unknown', genome='unknown', attributes=OrderedDict([('cell', 'k562'), ('antibody', 'cMyb')])) self._commonAssertTrack( tracks[1], uri='http://server.other.com/path/to/file2.bed?query=something', scheme='http', netloc='server.other.com', path='/path/to/file2.bed', query='query=something', suffix='bed', trackName=None, title='Track2', location='remote', fileFormat='primary', trackType='unknown', genome='unknown', attributes=OrderedDict([('cell', 'GM12878'), ('antibody', 'cMyc')])) self._commonAssertTrack( tracks[2], uri='https://server.other.com/path/to/file3.bed?query=something', scheme='https', netloc='server.other.com', path='/path/to/file3.bed', query='query=something', suffix='bed', trackName=None, title='Track3', location='remote', fileFormat='primary', trackType='unknown', genome='unknown', attributes=OrderedDict([('cell', 'GM12878'), ('antibody', 'cMyb')])) self._commonAssertTrack( tracks[3], uri='rsync://server.other.com/path/to/file4;wig', scheme='rsync', netloc='server.other.com', path='/path/to/file4', query=None, suffix='wig', trackName=None, title='Track4', location='remote', fileFormat='primary', trackType='unknown', genome='unknown', attributes=OrderedDict([('cell', 'NHFL')])) self._commonAssertTrack(tracks[4], uri='hb:/track/name/hierarchy', scheme='hb', netloc=None, path=None, query=None, suffix=None, trackName=['track', 'name', 'hierarchy'], title='Track (2)', location='local', fileFormat='preprocessed', trackType='unknown', genome='unknown', attributes=OrderedDict()) self._commonAssertTrack( tracks[5], uri='galaxy:/ad123dd12fg;btrack?track=track%3Aname', scheme='galaxy', netloc=None, path='/path/to/dataset_ad123dd12fg.dat', query='track=track:name', suffix='btrack', trackName=['track', 'name'], title='Track (3)', location='local', fileFormat='preprocessed', trackType='unknown', genome='unknown', attributes=OrderedDict([('cell', 'k562'), ('antibody', 'cMyb')])) self._commonAssertTrack( tracks[6], uri='file:/path/to/file.btrack?track=track%3Aname', scheme='file', netloc=None, path='/path/to/file.btrack', query='track=track:name', suffix='btrack', trackName=['track', 'name'], title='Track name7', location='local', fileFormat='preprocessed', trackType='unknown', genome='unknown', attributes=OrderedDict([('antibody', 'cMyb')])) self.assertEquals('multiple', gSuite.location) self.assertEquals('multiple', gSuite.fileFormat) self.assertEquals('unknown', gSuite.trackType) self.assertEquals('unknown', gSuite.genome) self.assertEquals(['cell', 'antibody'], gSuite.attributes) from cStringIO import StringIO GSuiteParser.validateFromString(contents, outFile=StringIO())
def _parseContents(contents): return GSuiteParser.parseLines(contents.split('\n'))
def getGSuiteFromGSuiteFile(gSuiteFn): from gold.gsuite import GSuiteParser return GSuiteParser.parse(gSuiteFn)
attributes=OrderedDict([('a', 'yes'), ('b', 'no')]))) uri2 = HbGSuiteTrack.generateURI( trackName=['Genes and gene subsets', 'Genes', 'Refseq']) gSuite.addTrack( GSuiteTrack(uri2, attributes=OrderedDict([('b', 'no'), ('c', 'yes')]))) gSuite.setGenomeOfAllTracks('hg19') contents = GSuiteComposer.composeToString(gSuite) print 'GSuite file contents' print '--------------------' print contents gSuite2 = GSuiteParser.parseFromString(contents) print 'Various ways of direct access' print '-----------------------------' print "genome=%s, location=%s, file format=%s, track type=%s, attributes=%s" % \ (gSuite.genome, gSuite.location, gSuite.fileFormat, gSuite.trackType, gSuite.attributes) for track in gSuite2.allTracks(): print "uri=%s, path=%s, trackName=%s" % (track.uri, track.path, track.trackName) print "netloc=" + gSuite2.getTrackFromTitle('Track1').netloc tracks = list(gSuite.allTracks()) print "b=" + tracks[1].attributes['b']