Example #1
0
 def testParseHeaderLinesNoTracks(self):
     self.assertEquals(
         ('track type', 'segments'),
         GSuiteParser._parseHeaderLine('##track type: segments\n'))
     self.assertEquals(('location', 'local'),
                       GSuiteParser._parseHeaderLine('##location:Local\n'))
     self.assertEquals(
         ('file format', 'primary'),
         GSuiteParser._parseHeaderLine('##File format:  primary\n'))
     self.assertEquals(('genome', 'HG18'),
                       GSuiteParser._parseHeaderLine('##genome:  HG18\n'))
Example #2
0
    def _validateGSuiteFile(galaxyTN):
        import gold.gsuite.GSuiteParser as GSuiteParser
        from quick.application.ExternalTrackManager import ExternalTrackManager
        from cStringIO import StringIO

        galaxyFn = ExternalTrackManager.extractFnFromGalaxyTN(galaxyTN)
        outFile = StringIO()
        ok = GSuiteParser.validate(galaxyFn,
                                   outFile=outFile,
                                   printHelpText=False)
        if not ok:
            return outFile.getvalue()
Example #3
0
    def testParseAndCompose(self):
        inputContents = \
            '##location: multiple\n' \
            '##file format: multiple\n' \
            '##track type: unknown\n' \
            '##genome: hg18\n' \
            '###uri\ttitle\tfile_format\tcell\tantibody\textra\n' \
            'ftp://server.somewhere.com/path/to/file1.bed\tTrack1\tprimary\tk562\tcMyb\t.\n' \
            'http://server.other.com/path/to/file2.bed\tTrack2\tprimary\tGM12878\tcMyc\t.\n' \
            'https://server.other.com/path/to/file3.bed\tfile3.bed\tprimary\tGM12878\tcMyb\t.\n' \
            'rsync://server.other.com/path/to/file4;wig\tTrack4\tprimary\tNHFL\t.\t.\n' \
            'hb:/track/name/hierarchy\tTrack5\tpreprocessed\t.\t.\t.\n' \
            'galaxy:/ad123dd12fg;btrack?track=track%3Aname\tTrack6\tpreprocessed\tk562\tcMyb\t.\n' \
            'file:/path/to/file.btrack?track=track%3Aname\tTrack name7\tpreprocessed\t.\tcMyb\tyes\n'

        gSuite = GSuiteParser.parseLines(inputContents.split('\n'))
        outputContents = GSuiteComposer.composeToString(gSuite)

        self.assertEquals(inputContents, outputContents)
Example #4
0
    def testParseColSpecLineDirectlyMoreStdCols(self):
        colSpecLine = '###Uri\tTitle\tfile_format\ttrack_type\tgenome\tANTIBODY'

        self.assertEquals([
            'uri', 'title', 'file_format', 'track_type', 'genome', 'antibody'
        ], GSuiteParser._parseColumnSpecLine(colSpecLine))
Example #5
0
    def testParseColSpecLineDirectly(self):
        colSpecLine = '###uri\tantibody'

        self.assertEquals(['uri', 'antibody'],
                          GSuiteParser._parseColumnSpecLine(colSpecLine))
Example #6
0
    def testParseTrackLineFull(self):
        contents = \
            '###uri\ttitle\tcell\tantibody\n' \
            'ftp://server.somewhere.com/path/to/file1.bed.gz\tTrack\tk562\tcMyb\n' \
            'http://server.other.com/path/to/file2.bed?query=something\tTrack2\tGM12878\tcMyc\n' \
            'https://server.other.com/path/to/file3.bed?query=something\tTrack3\tGM12878\tcMyb\n' \
            'rsync://server.other.com/path/to/file4;wig\tTrack4\tNHFL\t.\n' \
            'hb:/track/name/hierarchy\tTrack (2)\t.\t.\n' \
            'galaxy:/ad123dd12fg;btrack?track=track:name\tTrack (3)\tk562\tcMyb\n' \
            'file:/path/to/file.btrack?track=track:name\tTrack name7\t.\tcMyb\n'

        gSuite = self._parseContents(contents)

        tracks = list(gSuite.allTracks())
        self.assertEquals(7, len(tracks))

        self._commonAssertTrack(
            tracks[0],
            uri='ftp://server.somewhere.com/path/to/file1.bed.gz',
            scheme='ftp',
            netloc='server.somewhere.com',
            path='/path/to/file1.bed.gz',
            query=None,
            suffix='gz',
            trackName=None,
            title='Track',
            location='remote',
            fileFormat='primary',
            trackType='unknown',
            genome='unknown',
            attributes=OrderedDict([('cell', 'k562'), ('antibody', 'cMyb')]))

        self._commonAssertTrack(
            tracks[1],
            uri='http://server.other.com/path/to/file2.bed?query=something',
            scheme='http',
            netloc='server.other.com',
            path='/path/to/file2.bed',
            query='query=something',
            suffix='bed',
            trackName=None,
            title='Track2',
            location='remote',
            fileFormat='primary',
            trackType='unknown',
            genome='unknown',
            attributes=OrderedDict([('cell', 'GM12878'),
                                    ('antibody', 'cMyc')]))

        self._commonAssertTrack(
            tracks[2],
            uri='https://server.other.com/path/to/file3.bed?query=something',
            scheme='https',
            netloc='server.other.com',
            path='/path/to/file3.bed',
            query='query=something',
            suffix='bed',
            trackName=None,
            title='Track3',
            location='remote',
            fileFormat='primary',
            trackType='unknown',
            genome='unknown',
            attributes=OrderedDict([('cell', 'GM12878'),
                                    ('antibody', 'cMyb')]))

        self._commonAssertTrack(
            tracks[3],
            uri='rsync://server.other.com/path/to/file4;wig',
            scheme='rsync',
            netloc='server.other.com',
            path='/path/to/file4',
            query=None,
            suffix='wig',
            trackName=None,
            title='Track4',
            location='remote',
            fileFormat='primary',
            trackType='unknown',
            genome='unknown',
            attributes=OrderedDict([('cell', 'NHFL')]))

        self._commonAssertTrack(tracks[4],
                                uri='hb:/track/name/hierarchy',
                                scheme='hb',
                                netloc=None,
                                path=None,
                                query=None,
                                suffix=None,
                                trackName=['track', 'name', 'hierarchy'],
                                title='Track (2)',
                                location='local',
                                fileFormat='preprocessed',
                                trackType='unknown',
                                genome='unknown',
                                attributes=OrderedDict())

        self._commonAssertTrack(
            tracks[5],
            uri='galaxy:/ad123dd12fg;btrack?track=track%3Aname',
            scheme='galaxy',
            netloc=None,
            path='/path/to/dataset_ad123dd12fg.dat',
            query='track=track:name',
            suffix='btrack',
            trackName=['track', 'name'],
            title='Track (3)',
            location='local',
            fileFormat='preprocessed',
            trackType='unknown',
            genome='unknown',
            attributes=OrderedDict([('cell', 'k562'), ('antibody', 'cMyb')]))

        self._commonAssertTrack(
            tracks[6],
            uri='file:/path/to/file.btrack?track=track%3Aname',
            scheme='file',
            netloc=None,
            path='/path/to/file.btrack',
            query='track=track:name',
            suffix='btrack',
            trackName=['track', 'name'],
            title='Track name7',
            location='local',
            fileFormat='preprocessed',
            trackType='unknown',
            genome='unknown',
            attributes=OrderedDict([('antibody', 'cMyb')]))

        self.assertEquals('multiple', gSuite.location)
        self.assertEquals('multiple', gSuite.fileFormat)
        self.assertEquals('unknown', gSuite.trackType)
        self.assertEquals('unknown', gSuite.genome)
        self.assertEquals(['cell', 'antibody'], gSuite.attributes)

        from cStringIO import StringIO
        GSuiteParser.validateFromString(contents, outFile=StringIO())
Example #7
0
 def _parseContents(contents):
     return GSuiteParser.parseLines(contents.split('\n'))
def getGSuiteFromGSuiteFile(gSuiteFn):
    from gold.gsuite import GSuiteParser
    return GSuiteParser.parse(gSuiteFn)
                attributes=OrderedDict([('a', 'yes'), ('b', 'no')])))

uri2 = HbGSuiteTrack.generateURI(
    trackName=['Genes and gene subsets', 'Genes', 'Refseq'])
gSuite.addTrack(
    GSuiteTrack(uri2, attributes=OrderedDict([('b', 'no'), ('c', 'yes')])))

gSuite.setGenomeOfAllTracks('hg19')

contents = GSuiteComposer.composeToString(gSuite)

print 'GSuite file contents'
print '--------------------'
print contents

gSuite2 = GSuiteParser.parseFromString(contents)

print 'Various ways of direct access'
print '-----------------------------'
print "genome=%s, location=%s, file format=%s, track type=%s, attributes=%s" % \
    (gSuite.genome, gSuite.location, gSuite.fileFormat, gSuite.trackType, gSuite.attributes)

for track in gSuite2.allTracks():
    print "uri=%s, path=%s, trackName=%s" % (track.uri, track.path,
                                             track.trackName)

print "netloc=" + gSuite2.getTrackFromTitle('Track1').netloc

tracks = list(gSuite.allTracks())
print "b=" + tracks[1].attributes['b']