def setup(self): self.hub = Hub(hub='example_hub', short_label='example hub', long_label='an example hub for testing', email='*****@*****.**') self.genomes_file = GenomesFile() self.genome = Genome('dm3') self.trackdb = TrackDb() self.tracks = [ Track(name='track1', tracktype='bigBed', source=os.path.join(d, 'random-hg38-0.bigBed')), Track( name='track2', tracktype='bigWig', source=os.path.join(d, 'sine-hg38-0.bedgraph.bw'), ), Track( name='track3', tracktype='bigWig', source=os.path.join(d, 'sine-hg38-1.bedgraph.bw'), filename='3.bw', ) ] self.hub.add_genomes_file(self.genomes_file) self.genomes_file.add_genome(self.genome) self.genome.add_trackdb(self.trackdb) self.trackdb.add_tracks(self.tracks)
def setup(self): with pytest.warns(DeprecationWarning): self.hub = Hub( hub='example_hub', short_label='example hub', long_label='an example hub for testing', email='*****@*****.**') self.genomes_file = GenomesFile() self.genome = Genome('dm3') self.trackdb = TrackDb() self.tracks = [ Track( name='track1', tracktype='bigBed', local_fn=os.path.join(d, 'random-hg38-0.bigBed'), remote_fn='1.bigbed', ), Track( name='track2', tracktype='bigWig', local_fn=os.path.join(d, 'sine-hg38-0.bedgraph.bw'), remote_fn='2.bw', ), ] self.hub.add_genomes_file(self.genomes_file) self.genomes_file.add_genome(self.genome) self.genome.add_trackdb(self.trackdb) self.trackdb.add_tracks(self.tracks)
def create_hub(geo, email=None, upload=False, upload_dir=".", user=None, host=None): """ Create an UCSC track hub from a Geo object """ hub = Hub(hub=geo.gse, short_label=geo.gse, long_label="Hub for {0}".format(geo.gse), email=email) genomes_file = GenomesFile() trackdb = TrackDb() local_dir = geo.gse #hub.remote_fn = os.path.join(upload_dir, geo.gse, os.path.basename(hub.local_fn)) all_tracks = {} for sample in geo.samples.values(): genome = sample['genome'] all_tracks.setdefault(genome, []) name = re.sub('[^0-9a-zA-Z]+', '_', sample['name']) track = Track( name=name, url=os.path.join(HUB_URLBASE, geo.gse, genome, "{0}.bw".format(sample['gsm'])), tracktype='bigWig', short_label=sample['gsm'], long_label=name, color='128,128,0', maxHeightPixels='30:30:11', ) basename = os.path.basename(track.url) track.local_fn = os.path.join(local_dir, basename) track.remote_fn = os.path.join(upload_dir, geo.gse, genome, basename) all_tracks[genome].append(track) for build, tracks in all_tracks.items(): genome = Genome(build) trackdb.add_tracks(tracks) genome.add_trackdb(trackdb) genomes_file.add_genome(genome) hub.add_genomes_file(genomes_file) results = hub.render() if upload: for track in trackdb.tracks: upload_track(track=track, host=host, user=user) upload_hub(hub=hub, host=host, user=user)
def create_hub(geo, email=None, upload=False, upload_dir=".", user=None, host=None): """ Create an UCSC track hub from a Geo object """ hub = Hub( hub=geo.gse, short_label=geo.gse, long_label="Hub for {0}".format(geo.gse), email=email) genomes_file = GenomesFile() trackdb = TrackDb() local_dir = geo.gse #hub.remote_fn = os.path.join(upload_dir, geo.gse, os.path.basename(hub.local_fn)) all_tracks = {} for sample in geo.samples.values(): genome = sample['genome'] all_tracks.setdefault(genome, []) name = re.sub('[^0-9a-zA-Z]+', '_',sample['name']) track = Track( name=name, url=os.path.join(HUB_URLBASE, geo.gse, genome, "{0}.bw".format(sample['gsm'])), tracktype='bigWig', short_label=sample['gsm'], long_label=name, color='128,128,0', maxHeightPixels='30:30:11', ) basename = os.path.basename(track.url) track.local_fn = os.path.join(local_dir, basename) track.remote_fn = os.path.join(upload_dir, geo.gse, genome, basename) all_tracks[genome].append(track) for build,tracks in all_tracks.items(): genome = Genome(build) trackdb.add_tracks(tracks) genome.add_trackdb(trackdb) genomes_file.add_genome(genome) hub.add_genomes_file(genomes_file) results = hub.render() if upload: for track in trackdb.tracks: upload_track(track=track, host=host, user=user) upload_hub(hub=hub, host=host, user=user)
def setup(self): self.hub = Hub(hub='example_hub', short_label='example hub', long_label='an example hub for testing', email='*****@*****.**') self.genomes_file = GenomesFile() self.genome = Genome('dm3') self.trackdb = TrackDb() self.tracks = [ Track(name='track1', tracktype='bam'), Track(name='track2', tracktype='bigWig'), ]
def setup(self): self.hub = Hub(hub='example_hub', short_label='example hub', long_label='an example hub for testing', email='*****@*****.**') self.genomes_file = GenomesFile() self.genome = Genome('dm3') self.trackdb = TrackDb() self.tracks = [ Track(name='track1', tracktype='bam', local_fn='data/track1.bam'), Track( name='track2', tracktype='bigWig', local_fn='data/track2.bigwig', ), ] self.hub.add_genomes_file(self.genomes_file) self.genomes_file.add_genome(self.genome) self.genome.add_trackdb(self.trackdb) self.trackdb.add_tracks(self.tracks)
def _add_tracks(data_list, view, default_tracktype): for data_obj in data_list: kwargs = data_obj.obj.get('trackinfo', {}) kwargs = dict((k, str(v)) for k, v in kwargs.items()) kwargs.setdefault('tracktype', default_tracktype) view.add_tracks( Track(name=sanitized_label + utils.sanitize(data_obj.label), short_label=data_obj.label, long_label=data_obj.obj['long_label'], local_fn=data_obj.processed, **kwargs))
def make_bigWig_tracks(signal_view, url_base): for bw in glob.glob('*.bw'): label = bw.replace('.bw', '') basename = os.path.basename(bw) track = Track(name='signal_%s' % label, tracktype='bigWig', url=url_base + basename, local_fn=bw, shortLabel='signal %s' % label, longLabel='signal %s' % label) # add this track to the signal view signal_view.add_tracks(track)
def make_bigBed_tracks(bed_view, url_base): for bb in glob.glob('*.bigBed'): basename = os.path.basename(bb) label = bb.replace('.bigBed', '') track = Track(name='peak_%s' % label, tracktype='bigBed 3', url=url_base + basename, local_fn=bb, shortLabel='peaks %s' % label, longLabel='peaks %s' % label) # add this track to the bed view bed_view.add_tracks(track)
if track.endswith(".bw") or track.endswith('.bigWig'): tracktype = "bigWig" if track.endswith(".bb") or track.endswith('.bigBed'): tracktype = "bigBed" if track.endswith(".bam"): tracktype = "bam" split_track = base_track.split(args.sep) long_name = args.sep.join(split_track[:args.num_sep] + split_track[-3:]) track = Track(name=long_name, url=os.path.join(URLBASE, GENOME, base_track), tracktype=tracktype, short_label=long_name, long_label=long_name, color=color, local_fn=track, remote_fn=os.path.join(upload_dir, GENOME, base_track)) aggregate.add_subtrack(track) supertrack.add_track(aggregate) #trackdb.add_tracks(aggregate) bigBed_files = [ track for track in remaining_files if track.endswith(".bb") or track.endswith(".bigBed") ] for bigBed_file in bigBed_files:
def main(): parser = argparse.ArgumentParser( description= 'Takes in files to turn into trackhub. This version automatically ') # tracks files ############## parser.add_argument('files', nargs='+', help='Files to turn into track hub') # namings ######### parser.add_argument('--hub', help="hub name (no spaces)", required=True) parser.add_argument('--genome', help="genome name", required=True) # upload (in fact run_local=True) ######## #parser.add_argument('--no_s3', default=False, action="store_true", help="upload to defined server instead of s3") #parser.add_argument('--serverscp', default="tscc-login2.sdsc.edu", help="server to SCP to") #parser.add_argument('--user', default='adomissy', help="that is uploading files") # parser.add_argument('--uploaddir', default='yeolab-trackhubs', help="directory to upload files to if not uploading to aws") # web access ############ # parser.add_argument('--urldomain', default="s3-us-west-2.amazonaws.com", help="url domain for public access to trackhubs") # parser.add_argument('--urldir', default="yeolab-trackhubs", help="url directory for public access to trackhubs") # hub labels ############ # parser.add_argument('--hub_short_label', default=None, help="short label for hub") # parser.add_argument('--hub_long_label', default=None, help="long label for hub") parser.add_argument('--hub_email', default='*****@*****.**', help="email for hub") # name parts grouping ##################### parser.add_argument('--sep', default=".", help="Seperator") parser.add_argument('--num_sep', default=2, type=int, help="Number of seperators deep to group on") ########################################################################### args = parser.parse_args() # TODO: unhack this, but let's keep all trackhubs here for now. urldomain = "s3-us-west-2.amazonaws.com" urldir = "yeolab-trackhubs" uploaddir = "yeolab-trackhubs" hub_name = args.hub hub_email = args.hub_email # default label settings ######################## hub_short_label = hub_name hub_long_label = hub_name # hard coding serverscp, in variable HOST HOST = "localhost" # hard coding user, in variable USER USER = "******" GENOME = args.genome # hack for tutorial dataset so it is easy to view in ucsd genome browser if GENOME == 'hg19chr19kbp255': GENOME == 'hg19' uploaddir = os.path.join(uploaddir, hub_name) URLBASE = os.path.join("http://" + urldomain + "/" + urldir + "/", hub_name) # create data structures ######################## hub = Hub( hub=hub_name, short_label=hub_short_label, long_label=hub_long_label, email=hub_email, ) hub.upload_fn = uploaddir genomes_file = GenomesFile() hub.add_genomes_file(genomes_file) genome = Genome(GENOME) genomes_file.add_genome(genome) trackdb = TrackDb() genome.add_trackdb(trackdb) supertrack = SuperTrack(name=hub_name, short_label=hub_short_label, long_label=hub_long_label) # separate bigwigs, bigbeds and others for different processing methods ####################################################################### bigwig_files = [ file for file in args.files if file.endswith(".posbw") or file.endswith(".negbw") or file.endswith( ".bw") or file.endswith(".bigWig") or file.endswith(".bigwig") ] bigbed_files = [ file for file in args.files if file.endswith(".bb") or file.endswith(".bigBed") or file.endswith(".bigbed") ] # not used #other_files = [file for file in args.files if (file not in bigwig_files and file not in bigbed_files )] # process bigwig files , re-grouped by third 2 dot-sepatarated name-parts, as multitracks ########################################################################################## key_func = lambda x: x.split(args.sep)[:args.num_sep] for group_key, group_bigwig_files in groupby( sorted(bigwig_files, key=key_func), key_func): group_bigwig_files_list = list(group_bigwig_files) print("args sep: {}".format(args.sep)) print("args num sep: {}".format(args.num_sep)) print("split filename: {}".format(bigwig_files[0].split( args.sep)[:args.num_sep])) print "-----------------------------------------" print "processing bigwig files group with key :", group_key print "comprised of following files:", group_bigwig_files_list print "-----------------------------------------" long_name = remove_plus_and_pct( os.path.basename(args.sep.join(group_key[:args.num_sep]))) aggregate = AggregateTrack(name=long_name, tracktype='bigWig', short_label=long_name, long_label=long_name, aggregate='transparentOverlay', showSubtrackColorOnUi='on', autoScale='on', priority='1.4', alwaysZero='on', visibility="full") for bigwigfile in group_bigwig_files_list: print "--------------------------" print "bigwigfile", bigwigfile print "--------------------------" base_track = remove_plus_and_pct(os.path.basename(bigwigfile)) split_track = base_track.split(args.sep) long_name = args.sep.join(split_track[:args.num_sep] + split_track[-3:]) color = "0,100,0" if "pos" in bigwigfile else "100,0,0" track = Track(name=long_name, url=os.path.join(URLBASE, GENOME, base_track), tracktype="bigWig", short_label=long_name, long_label=long_name, color=color, local_fn=bigwigfile, remote_fn=os.path.join(uploaddir, GENOME, base_track)) #print "aggregate.add_subtrack", track.name aggregate.add_subtrack(track) #print "supertrack.add_track", aggregate supertrack.add_track(aggregate) #print "trackdb.add_tracks", aggregate #trackdb.add_tracks(aggregate) # process bigbed files as single track ###################################### for bigbed_file in bigbed_files: # print "--------------------------" # print "bigbedfile", bigbedfile # print "--------------------------" color = "0,100,0" if "pos" in bigbed_file else "100,0,0" base_track = remove_plus_and_pct(os.path.basename(bigbed_file)) long_name = args.sep.join(base_track.split( args.sep)[:args.num_sep]) + ".bb" track = Track(name=long_name, url=os.path.join(URLBASE, GENOME, base_track), tracktype="bigBed", short_label=long_name, long_label=long_name, color=color, local_fn=bigbed_file, remote_fn=os.path.join(uploaddir, GENOME, base_track), visibility="full") #trackdb.add_tracks(track) supertrack.add_track(track) trackdb.add_tracks(supertrack) result = hub.render() hub.remote_fn = os.path.join(uploaddir, "hub.txt") # process bigbed files (bam?) ###################### ## UNUSED # if bigwigfile.endswith(".bw") or bigwigfile.endswith('.bigWig'): tracktype = "bigWig" # if bigwigfile.endswith(".bb") or bigwigfile.endswith('.bigBed'): tracktype = "bigBed" # if bigwigfile.endswith(".bam"): tracktype = "bam" # 'upolading' (locally) ######################## for track in trackdb.tracks: #print("upload_track(track=" + track.__repr__() + ", host=" + args.serverscp + ", user="******"run_local=True") #upload_track(track=track, host=args.serverscp, user=args.user) # upload_track(track=track, host=args.serverscp, user=args.user, run_s3=args.no_s3) upload_track(track=track, host=HOST, user=USER, run_local=True) #print("upload_hub(hub=" + hub.__repr__() + ", host=" + args.serverscp + ", user="******"run_local=True") #upload_hub(hub=hub, host=args.serverscp, user=args.user) # upload_hub(hub=hub, host=args.serverscp, user=args.user, run_s3=args.no_s3) pass upload_hub(hub=hub, host=HOST, user=USER, run_local=True) # print("UPLOADDIR: {}".format(uploaddir)) print("BUCKET: {}".format(uploaddir)) copy_dir_to_aws( src=uploaddir, dest=uploaddir, ) print("FINAL URL: {}/hub.txt".format(URLBASE))
def files2viz(files_to_visualize): if DEBUG: print "Visualizing" print files_to_visualize from trackhub import Hub, GenomesFile, Genome, TrackDb, Track from trackhub.upload import upload_hub HUBHOST = 'http://cherry-vm45.stanford.edu' HUBDIR = 'trackhubs' USER = '******' URLBASE = os.path.join(HUBHOST, HUBDIR) EDWBASE = 'http://encodedcc.sdsc.edu/warehouse' GENOME = 'hg19' hub = Hub(hub='Selected_ENCODE_Tracks', short_label='Selected_ENCODE_Tracks_short', long_label='Selected_ENCODE_Tracks_long', email='*****@*****.**') genomes_file = GenomesFile() genome = Genome(GENOME) trackdb = TrackDb() for accession in files_to_visualize: file_obj = get_ENCODE(accession) if DEBUG: print file_obj if file_obj['file_format'] == 'bigWig': track = Track(name=accession, url=os.path.join(EDWBASE, str(file_obj['download_path'])), tracktype='bigWig', short_label=accession, long_label=accession, color='128,0,0', visibility='full') trackdb.add_tracks([track]) genome.add_trackdb(trackdb) genomes_file.add_genome(genome) hub.add_genomes_file(genomes_file) results = hub.render() if DEBUG: print hub print '...' print genomes_file print '...' print genome print '...' print trackdb #upload_hub(hub=hub, host=HUBHOST, user=USER) #doesn't seem to work import subprocess subprocess.call( "cd .. && rsync -r trackhub [email protected]:/www/html/trackhubs", shell=True) import webbrowser hubfile = str(hub.hub) + '.hub.txt' UCSC_url = 'http://genome.ucsc.edu/cgi-bin/hgTracks?udcTimeout=1&db=hg19' + \ '&hubUrl=' + os.path.join(HUBHOST,HUBDIR,'trackhub',hubfile) # + \ #'&hsS_doLoadUrl=submit' + '&hgS_loadUrlName=' + os.path.join(HUBHOST,HUBDIR,'trackhub','session.txt') print UCSC_url webbrowser.open(UCSC_url)
if track.endswith(".bw") or track.endswith('.bigWig'): tracktype = "bigWig" if track.endswith(".bb") or track.endswith('.bigBed'): tracktype = "bigBed" if track.endswith(".bam"): tracktype = "bam" split_track = base_track.split(args.sep) long_name = args.sep.join(split_track[:args.num_sep] + split_track[-3:]) track = Track( name= long_name, url = os.path.join(URLBASE, GENOME, base_track), tracktype = tracktype, short_label=long_name, long_label=long_name, color = color, local_fn = track, remote_fn = os.path.join(upload_dir, GENOME, base_track) ) aggregate.add_subtrack(track) supertrack.add_track(aggregate) #trackdb.add_tracks(aggregate) bigBed_files = [track for track in remaining_files if track.endswith(".bb") or track.endswith(".bigBed")] for bigBed_file in bigBed_files: color = "0,100,0" if "pos" in bigBed_file else "100,0,0" base_track = remove_special_chars(os.path.basename(bigBed_file)) long_name = args.sep.join(base_track.split(args.sep)[:args.num_sep]) + ".bb"
bed_view = ViewTrack(name='Chipseq_bed', view='Bed', visibility='dense', tracktype='bigBed', short_label='peaks') comp.add_view(signal_view) comp.add_view(bed_view) for f in chipseq_bwfiles: name, rep = get_name_and_rep(f) bname = os.path.basename(f) col = maps[name] track = Track(name='%s_%s_signal' % (name, rep), tracktype='bigWig', local_fn=datadir + f, remote_fn=remote_dir + bname, url=uploadbase + 'bw/' + bname, shortLabel='%s_%s' % (name, rep), color=col) signal_view.add_tracks(track) for b in chipseq_bbfiles: bname = os.path.basename(b) name = bname.split('.')[0] col = maps[name] track = Track(name='%s_peak' % name, tracktype='bigBed', local_fn=datadir + f, remote_fn=remote_dir + bname, url=uploadbase + 'bb/' + bname, shortLabel='%s_peaks' % name,
additional_kwargs = {} subgroup['strand'] = direction view = sense_signal_view if direction == 'antisense': additional_kwargs['negateValues'] = 'on' additional_kwargs['viewLimits'] = '-25:0' view = antisense_signal_view else: additional_kwargs['viewLimits'] = '0:25' view.add_tracks( Track(name=sanitize(sample + os.path.basename(bigwig), strict=True), short_label=sample + '_' + direction, long_label=sample + '_' + direction, tracktype='bigWig', subgroups=subgroup, source=bigwig, color=decide_color(sample), altColor=decide_color(sample), maxHeightPixels='8:35:100', **additional_kwargs)) supplemental = hub_config.get('supplemental', []) if supplemental: composite.add_view(supplemental_view) for block in supplemental: supplemental_view.add_tracks(Track(**block)) # Tie everything together composite.add_subgroups(subgroups) trackdb.add_tracks(composite)
for key, val in bait_subgroups.items(): bait_subgroup_values[sanitize(key)].update( [sanitize(val, False)]) if treatment == 'all': local_fn = ( '4cker-output/{comparison}/{kind}_k{k}/' '{bait}_{kind}_k{k}_adaptive_windows.bigbed'.format( **locals())) bait_bed_view.add_tracks( Track( name=sanitize(os.path.basename(local_fn)), tracktype='bigBed 3', local_fn=local_fn, short_label=os.path.basename(local_fn), long_label=os.path.basename(local_fn), subgroups=bait_subgroups, color=color_for_track(local_fn), )) local_fn = ( '4cker-output/{comparison}/{kind}_k{k}/' '{bait}_{kind}_colorized_differential.bigbed'.format( **locals())) bait_bed_view.add_tracks( Track( name=sanitize(os.path.basename(local_fn)), tracktype='bigBed 9', itemRgb='on', local_fn=local_fn,
def test_track_creation(self): track = Track(name='track0', tracktype='bam', local_fn='t0.bam') assert track.local_fn == 't0.bam'
base_track = os.path.basename(track) color = "0,100,0" if "pos" in track else "100,0,0" if track.endswith(".bw") or track.endswith('.bigWig'): tracktype = "bigWig" if track.endswith(".bb") or track.endswith('.bigBed'): tracktype = "bigBed" if track.endswith(".bam"): tracktype = "bam" print base_track track = Track( name= base_track, url = os.path.join(URLBASE, GENOME, base_track), tracktype = tracktype, short_label=base_track, long_label=base_track, color = color, local_fn = track, remote_fn = os.path.join(upload_dir, GENOME, base_track) ) aggregate.add_subtrack(track) trackdb.add_tracks(aggregate) bigBed_files = [track for track in remaining_files if track.endswith(".bb") or track.endswith(".bigBed")] for bigBed_file in bigBed_files: base_track = os.path.basename(bigBed_file) track = Track( name = base_track, url = os.path.join(URLBASE, GENOME, base_track), tracktype = "bigBed",
comp.add_view(chip_signal_view) comp.add_view(chip_bed_view) #comp_rna.add_view(rna_signal_view) >>>>>>> e33a5d89c0f5d75afae01dde9452063e2287f8d6 # Loop through all files and create tracks - adding to appropriate view # ------------------------------------------------------------------------------- for f in chipseq_bwfiles: bname = os.path.basename(f) name = bname.split('.')[0] col = chipmaps[name] track = Track( name = '%s_signal' % (name) , tracktype='bigWig', local_fn = datadir + 'chip/processed/bw/'+ f, remote_fn = remote_dir + bname, url = uploadbase + 'bw/' + bname, shortLabel = '%s' %(name) , color = col) <<<<<<< HEAD signal_view.add_tracks(track) ======= chip_signal_view.add_tracks(track) >>>>>>> e33a5d89c0f5d75afae01dde9452063e2287f8d6 for b in chipseq_bbfiles: bname = os.path.basename(b) name = bname.split('.')[0] col = chipmaps[name] track = Track( name = '%s_peak' % name,
def main(): parser = argparse.ArgumentParser( description='Make trackhubs for UCSC browser using bigBed files. \ Outputs to CURRENT DIRECTORY.') parser.add_argument('inputdir', metavar='INDIR', help='Directory containing bigBed files .bb ending') parser.add_argument('outdir', metavar='OUTDIR', help='Directory for staging files') parser.add_argument('--quiet', '-q', action='store_true', help='Suppress some print statements') parser.add_argument('--render', '-r', action='store_true', help='Render file to current dir') parser.add_argument('--upload', '-u', action='store_true', help='Upload file to webserver') parser.add_argument('--mm9', '-m', action='store_true', help='Switch from mm10 to mm9') parser.add_argument('--has_strand', '-s', action='store_true', help='Bed has strand (changes from 5 columns to 6)') parser.add_argument('--suffix', '-S', metavar="trackhub label suffix", default="", help='Suffix to label, for example H3K4me1') args = parser.parse_args() # store command line arguments for reproducibility CMD_INPUTS = ' '.join(['python'] + sys.argv) # easy printing later # store argparse inputs for reproducibility / debugging purposes args_dic = vars(args) ARG_INPUTS = ['%s=%s' % (key, val) for key, val in args_dic.iteritems()] # ARG_INPUTS = ['%s=%s' % (key, val) for key, val in args_dic.items()] ARG_INPUTS = ' '.join(ARG_INPUTS) # Print arguments supplied by user if not args.quiet: print('Command line inputs:') print(CMD_INPUTS) print('Argparse variables:') print(ARG_INPUTS) # define constants (hard coded) if args.mm9: genobuild = "mm9" else: genobuild = "mm10" jsuffix = "%s_%s" % (genobuild, args.suffix) print("Assigning prefix: %s" % jsuffix) # dirname: motevo_from_peaks/H3K4me1_peaks dirname = "motevo_from_peaks/%s_peaks/motevo_motifs_%s" % (args.suffix, jsuffix) hubname = "motevo_motifs_%s" % jsuffix shortlab = "motevo_%s" % jsuffix longlab = "Motevo motifs %s" % jsuffix email = "*****@*****.**" # url = "http://upnaepc2.epfl.ch" url = "http://upnaesrv1.epfl.ch" assay = "bigbed" jvis = "dense" # bigbed options loaded into ViewTrack jspectrum = "on" scoremax = 1000 scoremin = 500 # define URLs url_main = "%s/%s" % (url, dirname) url_base = "%s/%s/data" % (url, dirname) # upload_main = "~/Sites/%s" % dirname # upload_base = "~/Sites/%s/data" % dirname upload_main = "%s" % hubname upload_base = "%s/data" % hubname if not args.has_strand: ftype = "bigBed 5" else: ftype = "bigBed 6" # host = "circadian.epfl.ch" # user = "******" host = "upnaesrv1.epfl.ch" user = "******" # define constants genomebuild = genobuild files_dic = get_files_from_dir(args.inputdir, ext=".bb") samples_dic = {} for sample in files_dic.keys(): samples_dic[sample] = sample # init hub genomes file genome trackdb # Make my hub hub = Hub(hub=hubname, short_label=shortlab, long_label=longlab, email=email) # url = "%s/%s" % (url, dirname)) hub.url = os.path.join(url_main, "%s.hub.txt" % hub.hub) genomes_file = GenomesFile() genome = Genome(genomebuild) trackdb = TrackDb() # add remote fn # hub.remote_fn = os.path.join(upload_main, "hub.txt") # genomes_file.remote_fn = os.path.join(upload_main, "genomes.txt") hub.remote_fn = upload_main genomes_file.remote_fn = upload_main trackdb.remote_fn = os.path.join(upload_main, genomebuild, "trackDb.txt") hub.add_genomes_file(genomes_file) genome.add_trackdb(trackdb) genomes_file.add_genome(genome) # init composite composite = CompositeTrack(name=hubname, short_label=shortlab, long_label=longlab, tracktype=ftype) # make subgroups subgroups = [ SubGroupDefinition(name="sample", label="sample", mapping=samples_dic), ] composite.add_subgroups(subgroups) # make viewTrack, a hierarchy containing my files, for example view = ViewTrack( name="%sViewTrack" % assay, view="%s" % assay, visibility=jvis, tracktype=ftype, short_label="%s" % assay, long_label="%s assay" % assay, # big bed labels spectrum=jspectrum, scoreMin=scoremin, scoreMax=scoremax) composite.add_view(view) # make track for sample, wfs in files_dic.iteritems(): for wf in wfs: sampname = os.path.basename(wf) bname = sampname.split(".")[0] track = Track(name=bname, tracktype=ftype, url=os.path.join(url_base, "%s" % sampname), local_fn=os.path.abspath(wf), remote_fn=os.path.join(upload_base, "%s" % sampname), visibility=jvis, shortLabel=bname, longLabel=bname, spectrum=jspectrum, scoreMin=scoremin, scoreMax=scoremax, subgroups={"sample": sample}) view.add_tracks(track) trackdb.add_tracks(composite) print('Track looks like this:') print(trackdb) if args.render: # print('Rendering to %s' % hub.local_fn) # results = hub.render() # upload_hub(hub=hub, host='localhost', remote_dir='example_grouping_hub') stage_hub(hub, staging=args.outdir) if args.upload: print('Uploading to [email protected]') # for track in trackdb.tracks: # upload_track(track = track, host = host, user = user) upload_hub(hub=hub, host=host, user=user, remote_dir="/data/web/sites/motevo_from_peaks") print('Subgroups:') for sg in subgroups: print(sg) print("Staging to path: %s" % args.outdir)
def test_track_creation(self): track = Track(name='track0', tracktype='bam', source='t0.bam') assert track.source == 't0.bam'
subgroup = df[df.loc[:, 'label'] == label].to_dict('records')[0] subgroup = { sanitize(k, strict=True): sanitize(v, strict=True) for k, v in subgroup.items() } subgroup['algorithm'] = 'NA' subgroup['peaks'] = 'no' signal_view.add_tracks( Track( name=sanitize(label + os.path.basename(bigwig), strict=True), short_label=label, long_label=label, tracktype='bigWig', subgroups=subgroup, source=bigwig, color=decide_color(label), altColor=decide_color(label), maxHeightPixels='8:35:100', viewLimits='0:500', )) # The peak-calling runs are effectively keyed by (label, algorithm). There can # be multiple samples for each peak-calling run, and there is always at least # an IP and an input. However UCSC does not support multiple tags for # a subgroup, so we can't just add all relevant tags. # # One option would be to create a separate composite. However, this would # defeat the purpose of including the peaks in the same view such that they can # be sorted alongsize the signal. I think a better option is to identify if # there is a consistent value across the subgroup columns for the IP samples.
def ENCSR2viz(experiments_to_visualize): if DEBUG: print "Visualizing" print experiments_to_visualize from trackhub import Hub, GenomesFile, Genome, TrackDb, Track from trackhub.upload import upload_hub HUBHOST = 'http://cherry-vm45.stanford.edu' HUBDIR = 'jseth/trackhubs' USER = '******' URLBASE = os.path.join(HUBHOST, HUBDIR) EDWBASE = 'http://encodedcc.sdsc.edu/warehouse' GENOME = 'hg19' hub = Hub( hub='ENCODE', short_label='ENCODE', long_label='ENCODE', email='*****@*****.**') genomes_file = GenomesFile() genome = Genome(GENOME) trackdb = TrackDb() for accession in experiments_to_visualize: experiment_obj = get_ENCODE(accession) if DEBUG: print experiment_obj for file_id in experiment_obj['files']: file_obj = get_ENCODE(file_id) if file_obj['file_format'] in ['bigWig', 'bigBed', 'broadPeak', 'narrowPeak']: if file_obj['file_format'] in ['bigWig']: track_type = 'bigWig' elif file_obj['file_format'] in ['bigBed', 'broadPeak', 'narrowPeak']: track_type = 'bigBed' track = Track( name=str(file_obj['accession']), url=os.path.join(EDWBASE, str(file_obj['download_path'])), tracktype=track_type, long_label=str(file_obj['accession']), short_label=str(file_obj['output_type']), color='128,0,0', visibility='dense', metadata='cell_type=primary') print file_obj['accession'] trackdb.add_tracks([track]) genome.add_trackdb(trackdb) genomes_file.add_genome(genome) hub.add_genomes_file(genomes_file) results=hub.render() if DEBUG: print hub print '...' print genomes_file print '...' print genome print '...' print trackdb #upload_hub(hub=hub, host=HUBHOST, user=USER) #doesn't seem to work import subprocess subprocess.call("cd .. && rsync -r trackhub [email protected]:/www/html/jseth/trackhubs", shell=True) import webbrowser hubfile = str(hub.hub) + '.hub.txt' UCSC_url = 'http://genome.ucsc.edu/cgi-bin/hgTracks?udcTimeout=1&db=hg19' + \ '&hubUrl=' + os.path.join(HUBHOST,HUBDIR,'trackhub',hubfile) # + \ #'&hsS_doLoadUrl=submit&hgS_loadUrlName=' + os.path.join(HUBHOST,HUBDIR,'trackhub','session.txt') print UCSC_url webbrowser.open(UCSC_url)