def upload(self, hub_only=False, host=None, user=None, rsync_options=None, hub_remote=None): self.process() if 'server' in self.group: host = host or self.group['server'].get('host') user = user or self.group['server'].get('user') rsync_options = rsync_options or self.group['server'].get( 'rsync_options') hub_remote = hub_remote or self.group['server'].get('hub_remote') self.hub.remote_fn = hub_remote self.hub.remote_dir = os.path.dirname(hub_remote) self.hub.render() if user == '$USER': user = os.environ.get('USER') kwargs = dict(host=host, user=user, rsync_options=rsync_options) upload_hub(hub=self.hub, **kwargs) if not hub_only: for track, level in self.hub.leaves(Track): upload_track(track=track, **kwargs) log("Hub can now be accessed via {0}".format(self.hub.url), style=Fore.BLUE)
def upload(self, hub_only=False, host=None, user=None, rsync_options=None, hub_remote=None): self.process() if 'server' in self.group: host = host or self.group['server'].get('host') user = user or self.group['server'].get('user') rsync_options = rsync_options or self.group['server'].get('rsync_options') hub_remote = hub_remote or self.group['server'].get('hub_remote') self.hub.remote_fn = hub_remote self.hub.remote_dir = os.path.dirname(hub_remote) self.hub.render() if user == '$USER': user = os.environ.get('USER') kwargs = dict(host=host, user=user, rsync_options=rsync_options) upload_hub(hub=self.hub, **kwargs) if not hub_only: for track, level in self.hub.leaves(Track): upload_track(track=track, **kwargs) log("Hub can now be accessed via {0}" .format(self.hub.url), style=Fore.BLUE)
def create_hub(geo, email=None, upload=False, upload_dir=".", user=None, host=None): """ Create an UCSC track hub from a Geo object """ hub = Hub(hub=geo.gse, short_label=geo.gse, long_label="Hub for {0}".format(geo.gse), email=email) genomes_file = GenomesFile() trackdb = TrackDb() local_dir = geo.gse #hub.remote_fn = os.path.join(upload_dir, geo.gse, os.path.basename(hub.local_fn)) all_tracks = {} for sample in geo.samples.values(): genome = sample['genome'] all_tracks.setdefault(genome, []) name = re.sub('[^0-9a-zA-Z]+', '_', sample['name']) track = Track( name=name, url=os.path.join(HUB_URLBASE, geo.gse, genome, "{0}.bw".format(sample['gsm'])), tracktype='bigWig', short_label=sample['gsm'], long_label=name, color='128,128,0', maxHeightPixels='30:30:11', ) basename = os.path.basename(track.url) track.local_fn = os.path.join(local_dir, basename) track.remote_fn = os.path.join(upload_dir, geo.gse, genome, basename) all_tracks[genome].append(track) for build, tracks in all_tracks.items(): genome = Genome(build) trackdb.add_tracks(tracks) genome.add_trackdb(trackdb) genomes_file.add_genome(genome) hub.add_genomes_file(genomes_file) results = hub.render() if upload: for track in trackdb.tracks: upload_track(track=track, host=host, user=user) upload_hub(hub=hub, host=host, user=user)
def create_hub(geo, email=None, upload=False, upload_dir=".", user=None, host=None): """ Create an UCSC track hub from a Geo object """ hub = Hub( hub=geo.gse, short_label=geo.gse, long_label="Hub for {0}".format(geo.gse), email=email) genomes_file = GenomesFile() trackdb = TrackDb() local_dir = geo.gse #hub.remote_fn = os.path.join(upload_dir, geo.gse, os.path.basename(hub.local_fn)) all_tracks = {} for sample in geo.samples.values(): genome = sample['genome'] all_tracks.setdefault(genome, []) name = re.sub('[^0-9a-zA-Z]+', '_',sample['name']) track = Track( name=name, url=os.path.join(HUB_URLBASE, geo.gse, genome, "{0}.bw".format(sample['gsm'])), tracktype='bigWig', short_label=sample['gsm'], long_label=name, color='128,128,0', maxHeightPixels='30:30:11', ) basename = os.path.basename(track.url) track.local_fn = os.path.join(local_dir, basename) track.remote_fn = os.path.join(upload_dir, geo.gse, genome, basename) all_tracks[genome].append(track) for build,tracks in all_tracks.items(): genome = Genome(build) trackdb.add_tracks(tracks) genome.add_trackdb(trackdb) genomes_file.add_genome(genome) hub.add_genomes_file(genomes_file) results = hub.render() if upload: for track in trackdb.tracks: upload_track(track=track, host=host, user=user) upload_hub(hub=hub, host=host, user=user)
def test_upload(self): self.hub.remote_fn = os.path.join( 'uploaded_version', self.hub.remote_fn) self.hub.render() upload.upload_hub( 'localhost', None, self.hub, symlink=True, symlink_dir='staging', run_local=True,) for t, level in self.hub.leaves(Track): upload.upload_track( track=t, host='localhost', user=None, run_local=True)
def test_upload(self): self.hub.remote_fn = os.path.join('uploaded_version', self.hub.remote_fn) self.hub.render() upload.upload_hub( 'localhost', None, self.hub, symlink=True, symlink_dir='staging', run_local=True, ) for t, level in self.hub.leaves(Track): upload.upload_track(track=t, host='localhost', user=None, run_local=True)
if track.endswith(".bb") or track.endswith(".bigBed") ] for bigBed_file in bigBed_files: color = "0,100,0" if "pos" in bigBed_file else "100,0,0" base_track = remove_special_chars(os.path.basename(bigBed_file)) long_name = args.sep.join(base_track.split( args.sep)[:args.num_sep]) + ".bb" track = Track(name=long_name, url=os.path.join(URLBASE, GENOME, base_track), tracktype="bigBed", short_label=long_name, long_label=long_name, color=color, local_fn=bigBed_file, remote_fn=os.path.join(upload_dir, GENOME, base_track), visibility="dense") #trackdb.add_tracks(track) supertrack.add_track(track) trackdb.add_tracks(supertrack) result = hub.render() hub.remote_fn = os.path.join(upload_dir, "hub.txt") for track in trackdb.tracks: upload_track(track=track, host=args.server, user=args.user, run_s3=args.no_s3) upload_hub(hub=hub, host=args.server, user=args.user, run_s3=args.no_s3)
def main(): parser = argparse.ArgumentParser( description= 'Takes in files to turn into trackhub. This version automatically ') # tracks files ############## parser.add_argument('files', nargs='+', help='Files to turn into track hub') # namings ######### parser.add_argument('--hub', help="hub name (no spaces)", required=True) parser.add_argument('--genome', help="genome name", required=True) # upload (in fact run_local=True) ######## #parser.add_argument('--no_s3', default=False, action="store_true", help="upload to defined server instead of s3") #parser.add_argument('--serverscp', default="tscc-login2.sdsc.edu", help="server to SCP to") #parser.add_argument('--user', default='adomissy', help="that is uploading files") # parser.add_argument('--uploaddir', default='yeolab-trackhubs', help="directory to upload files to if not uploading to aws") # web access ############ # parser.add_argument('--urldomain', default="s3-us-west-2.amazonaws.com", help="url domain for public access to trackhubs") # parser.add_argument('--urldir', default="yeolab-trackhubs", help="url directory for public access to trackhubs") # hub labels ############ # parser.add_argument('--hub_short_label', default=None, help="short label for hub") # parser.add_argument('--hub_long_label', default=None, help="long label for hub") parser.add_argument('--hub_email', default='*****@*****.**', help="email for hub") # name parts grouping ##################### parser.add_argument('--sep', default=".", help="Seperator") parser.add_argument('--num_sep', default=2, type=int, help="Number of seperators deep to group on") ########################################################################### args = parser.parse_args() # TODO: unhack this, but let's keep all trackhubs here for now. urldomain = "s3-us-west-2.amazonaws.com" urldir = "yeolab-trackhubs" uploaddir = "yeolab-trackhubs" hub_name = args.hub hub_email = args.hub_email # default label settings ######################## hub_short_label = hub_name hub_long_label = hub_name # hard coding serverscp, in variable HOST HOST = "localhost" # hard coding user, in variable USER USER = "******" GENOME = args.genome # hack for tutorial dataset so it is easy to view in ucsd genome browser if GENOME == 'hg19chr19kbp255': GENOME == 'hg19' uploaddir = os.path.join(uploaddir, hub_name) URLBASE = os.path.join("http://" + urldomain + "/" + urldir + "/", hub_name) # create data structures ######################## hub = Hub( hub=hub_name, short_label=hub_short_label, long_label=hub_long_label, email=hub_email, ) hub.upload_fn = uploaddir genomes_file = GenomesFile() hub.add_genomes_file(genomes_file) genome = Genome(GENOME) genomes_file.add_genome(genome) trackdb = TrackDb() genome.add_trackdb(trackdb) supertrack = SuperTrack(name=hub_name, short_label=hub_short_label, long_label=hub_long_label) # separate bigwigs, bigbeds and others for different processing methods ####################################################################### bigwig_files = [ file for file in args.files if file.endswith(".posbw") or file.endswith(".negbw") or file.endswith( ".bw") or file.endswith(".bigWig") or file.endswith(".bigwig") ] bigbed_files = [ file for file in args.files if file.endswith(".bb") or file.endswith(".bigBed") or file.endswith(".bigbed") ] # not used #other_files = [file for file in args.files if (file not in bigwig_files and file not in bigbed_files )] # process bigwig files , re-grouped by third 2 dot-sepatarated name-parts, as multitracks ########################################################################################## key_func = lambda x: x.split(args.sep)[:args.num_sep] for group_key, group_bigwig_files in groupby( sorted(bigwig_files, key=key_func), key_func): group_bigwig_files_list = list(group_bigwig_files) print("args sep: {}".format(args.sep)) print("args num sep: {}".format(args.num_sep)) print("split filename: {}".format(bigwig_files[0].split( args.sep)[:args.num_sep])) print "-----------------------------------------" print "processing bigwig files group with key :", group_key print "comprised of following files:", group_bigwig_files_list print "-----------------------------------------" long_name = remove_plus_and_pct( os.path.basename(args.sep.join(group_key[:args.num_sep]))) aggregate = AggregateTrack(name=long_name, tracktype='bigWig', short_label=long_name, long_label=long_name, aggregate='transparentOverlay', showSubtrackColorOnUi='on', autoScale='on', priority='1.4', alwaysZero='on', visibility="full") for bigwigfile in group_bigwig_files_list: print "--------------------------" print "bigwigfile", bigwigfile print "--------------------------" base_track = remove_plus_and_pct(os.path.basename(bigwigfile)) split_track = base_track.split(args.sep) long_name = args.sep.join(split_track[:args.num_sep] + split_track[-3:]) color = "0,100,0" if "pos" in bigwigfile else "100,0,0" track = Track(name=long_name, url=os.path.join(URLBASE, GENOME, base_track), tracktype="bigWig", short_label=long_name, long_label=long_name, color=color, local_fn=bigwigfile, remote_fn=os.path.join(uploaddir, GENOME, base_track)) #print "aggregate.add_subtrack", track.name aggregate.add_subtrack(track) #print "supertrack.add_track", aggregate supertrack.add_track(aggregate) #print "trackdb.add_tracks", aggregate #trackdb.add_tracks(aggregate) # process bigbed files as single track ###################################### for bigbed_file in bigbed_files: # print "--------------------------" # print "bigbedfile", bigbedfile # print "--------------------------" color = "0,100,0" if "pos" in bigbed_file else "100,0,0" base_track = remove_plus_and_pct(os.path.basename(bigbed_file)) long_name = args.sep.join(base_track.split( args.sep)[:args.num_sep]) + ".bb" track = Track(name=long_name, url=os.path.join(URLBASE, GENOME, base_track), tracktype="bigBed", short_label=long_name, long_label=long_name, color=color, local_fn=bigbed_file, remote_fn=os.path.join(uploaddir, GENOME, base_track), visibility="full") #trackdb.add_tracks(track) supertrack.add_track(track) trackdb.add_tracks(supertrack) result = hub.render() hub.remote_fn = os.path.join(uploaddir, "hub.txt") # process bigbed files (bam?) ###################### ## UNUSED # if bigwigfile.endswith(".bw") or bigwigfile.endswith('.bigWig'): tracktype = "bigWig" # if bigwigfile.endswith(".bb") or bigwigfile.endswith('.bigBed'): tracktype = "bigBed" # if bigwigfile.endswith(".bam"): tracktype = "bam" # 'upolading' (locally) ######################## for track in trackdb.tracks: #print("upload_track(track=" + track.__repr__() + ", host=" + args.serverscp + ", user="******"run_local=True") #upload_track(track=track, host=args.serverscp, user=args.user) # upload_track(track=track, host=args.serverscp, user=args.user, run_s3=args.no_s3) upload_track(track=track, host=HOST, user=USER, run_local=True) #print("upload_hub(hub=" + hub.__repr__() + ", host=" + args.serverscp + ", user="******"run_local=True") #upload_hub(hub=hub, host=args.serverscp, user=args.user) # upload_hub(hub=hub, host=args.serverscp, user=args.user, run_s3=args.no_s3) pass upload_hub(hub=hub, host=HOST, user=USER, run_local=True) # print("UPLOADDIR: {}".format(uploaddir)) print("BUCKET: {}".format(uploaddir)) copy_dir_to_aws( src=uploaddir, dest=uploaddir, ) print("FINAL URL: {}/hub.txt".format(URLBASE))
if track.endswith(".bw") or track.endswith('.bigWig'): tracktype = "bigWig" if track.endswith(".bb") or track.endswith('.bigBed'): tracktype = "bigBed" if track.endswith(".bam"): tracktype = "bam" print base_track track = Track( name= base_track, url = os.path.join(URLBASE, GENOME, base_track), tracktype = tracktype, short_label=base_track, long_label=base_track, color = color, local_fn = track, remote_fn = os.path.join(upload_dir, GENOME, base_track) ) aggregate.add_subtrack(track) trackdb.add_tracks(aggregate) result = hub.render() hub.remote_fn = os.path.join(upload_dir, "hub.txt") for track in trackdb.tracks: upload_track(track=track, host=args.server, user=args.user) upload_hub(hub=hub, host=args.server, user=args.user)
supertrack.add_track(aggregate) #trackdb.add_tracks(aggregate) bigBed_files = [track for track in remaining_files if track.endswith(".bb") or track.endswith(".bigBed")] for bigBed_file in bigBed_files: color = "0,100,0" if "pos" in bigBed_file else "100,0,0" base_track = remove_special_chars(os.path.basename(bigBed_file)) long_name = args.sep.join(base_track.split(args.sep)[:args.num_sep]) + ".bb" track = Track( name=long_name, url=os.path.join(URLBASE, GENOME, base_track), tracktype="bigBed", short_label=long_name, long_label=long_name, color=color, local_fn=bigBed_file, remote_fn=os.path.join(upload_dir, GENOME, base_track), visibility="dense" ) #trackdb.add_tracks(track) supertrack.add_track(track) trackdb.add_tracks(supertrack) result = hub.render() hub.remote_fn = os.path.join(upload_dir, "hub.txt") for track in trackdb.tracks: upload_track(track=track, host=args.server, user=args.user, run_s3=args.no_s3) upload_hub(hub=hub, host=args.server, user=args.user, run_s3=args.no_s3)
local_fn = datadir + f, remote_fn = remote_dir + bname, url = uploadbase+'bw/'+ bname, shortLabel = '%s_%s' %( name,rep) , color = col) signal_view.add_tracks(track) for b in chipseq_bbfiles: bname = os.path.basename(b) name = bname.split('.')[0] col = maps[name] track = Track( name = '%s_peak' % name, tracktype='bigBed', local_fn = datadir+ f, remote_fn = remote_dir + bname, url = uploadbase + 'bb/'+bname, shortLabel = '%s_peaks' % name, color=col) bed_view.add_tracks(track) #need to add in the RNAseq to same composite groups trackdb.add_tracks(comp) print trackdb hub.render() kwargs = dict(host='kure.its.unc.edu', user='******') upload_hub(hub=hub, **kwargs) for track, level in hub.leaves(Track): upload_track(track=track, **kwargs)
long_label=base_track, color = color, local_fn = track, remote_fn = os.path.join(upload_dir, GENOME, base_track) ) aggregate.add_subtrack(track) trackdb.add_tracks(aggregate) bigBed_files = [track for track in remaining_files if track.endswith(".bb") or track.endswith(".bigBed")] for bigBed_file in bigBed_files: base_track = os.path.basename(bigBed_file) track = Track( name = base_track, url = os.path.join(URLBASE, GENOME, base_track), tracktype = "bigBed", short_label = base_track, long_label = base_track, color = color, local_fn = track, remote_fn = os.path.join(upload_dir, GENOME, base_track) ) result = hub.render() hub.remote_fn = os.path.join(upload_dir, "hub.txt") for track in trackdb.tracks: upload_track(track=track, host=args.server, user=args.user) upload_hub(hub=hub, host=args.server, user=args.user)
kwargs = dict(host='kure.its.unc.edu', user='******') ======= chip_bed_view.add_tracks(track) #for r in rnaseq_bwfiles: # bname = os.path.basename(r) # name, strand = bname.split('.')[0].split('_') # col = rnamaps[bname.split('.')[0]] # track = Track( # name = '%s_%s_signal' % (name, strand), # tracktype = 'bigWig', # local_fn = datadir + 'rna/processed/bw/' + bname, # remote_fn = remote_dir + bname, # url = uploadbase + 'bw/' + bname, # shortLabel = '%s_%s_rnasignal' % (name, strand) , # color = col) # rna_signal_view.add_tracks(track) # # ----------------------------------------------------------------------------- # Add everyhing in the supertrack to the db and upload data to Kure trackdb.add_tracks(comp) print trackdb hub.render() kwargs = dict(host='kure.its.unc.edu', user='******') >>>>>>> e33a5d89c0f5d75afae01dde9452063e2287f8d6 upload_hub(hub=hub, **kwargs) for track, level in hub.leaves(Track): upload_track(track=track, **kwargs)