Ejemplo n.º 1
0
    def upload(self,
               hub_only=False,
               host=None,
               user=None,
               rsync_options=None,
               hub_remote=None):
        self.process()

        if 'server' in self.group:
            host = host or self.group['server'].get('host')
            user = user or self.group['server'].get('user')
            rsync_options = rsync_options or self.group['server'].get(
                'rsync_options')
            hub_remote = hub_remote or self.group['server'].get('hub_remote')

        self.hub.remote_fn = hub_remote
        self.hub.remote_dir = os.path.dirname(hub_remote)

        self.hub.render()

        if user == '$USER':
            user = os.environ.get('USER')
        kwargs = dict(host=host, user=user, rsync_options=rsync_options)

        upload_hub(hub=self.hub, **kwargs)
        if not hub_only:
            for track, level in self.hub.leaves(Track):
                upload_track(track=track, **kwargs)

        log("Hub can now be accessed via {0}".format(self.hub.url),
            style=Fore.BLUE)
Ejemplo n.º 2
0
    def upload(self, hub_only=False, host=None, user=None, rsync_options=None,
               hub_remote=None):
        self.process()

        if 'server' in self.group:
            host = host or self.group['server'].get('host')
            user = user or self.group['server'].get('user')
            rsync_options = rsync_options or self.group['server'].get('rsync_options')
            hub_remote = hub_remote or self.group['server'].get('hub_remote')

        self.hub.remote_fn = hub_remote
        self.hub.remote_dir = os.path.dirname(hub_remote)

        self.hub.render()

        if user == '$USER':
            user = os.environ.get('USER')
        kwargs = dict(host=host, user=user, rsync_options=rsync_options)

        upload_hub(hub=self.hub, **kwargs)
        if not hub_only:
            for track, level in self.hub.leaves(Track):
                upload_track(track=track, **kwargs)

        log("Hub can now be accessed via {0}"
            .format(self.hub.url), style=Fore.BLUE)
Ejemplo n.º 3
0
def create_hub(geo,
               email=None,
               upload=False,
               upload_dir=".",
               user=None,
               host=None):
    """ Create an UCSC track hub from a Geo object
    """
    hub = Hub(hub=geo.gse,
              short_label=geo.gse,
              long_label="Hub for {0}".format(geo.gse),
              email=email)

    genomes_file = GenomesFile()

    trackdb = TrackDb()

    local_dir = geo.gse

    #hub.remote_fn = os.path.join(upload_dir, geo.gse, os.path.basename(hub.local_fn))

    all_tracks = {}

    for sample in geo.samples.values():
        genome = sample['genome']
        all_tracks.setdefault(genome, [])

        name = re.sub('[^0-9a-zA-Z]+', '_', sample['name'])
        track = Track(
            name=name,
            url=os.path.join(HUB_URLBASE, geo.gse, genome,
                             "{0}.bw".format(sample['gsm'])),
            tracktype='bigWig',
            short_label=sample['gsm'],
            long_label=name,
            color='128,128,0',
            maxHeightPixels='30:30:11',
        )
        basename = os.path.basename(track.url)
        track.local_fn = os.path.join(local_dir, basename)
        track.remote_fn = os.path.join(upload_dir, geo.gse, genome, basename)
        all_tracks[genome].append(track)

    for build, tracks in all_tracks.items():

        genome = Genome(build)
        trackdb.add_tracks(tracks)
        genome.add_trackdb(trackdb)
        genomes_file.add_genome(genome)
        hub.add_genomes_file(genomes_file)

    results = hub.render()

    if upload:
        for track in trackdb.tracks:
            upload_track(track=track, host=host, user=user)

        upload_hub(hub=hub, host=host, user=user)
Ejemplo n.º 4
0
def create_hub(geo, email=None, upload=False, upload_dir=".", user=None, host=None):
    """ Create an UCSC track hub from a Geo object
    """
    hub = Hub(
        hub=geo.gse,
        short_label=geo.gse,
        long_label="Hub for {0}".format(geo.gse),
        email=email)

    genomes_file = GenomesFile()
    
    trackdb = TrackDb()

    local_dir = geo.gse

    #hub.remote_fn = os.path.join(upload_dir, geo.gse, os.path.basename(hub.local_fn))
    
    all_tracks = {}
    
    for sample in geo.samples.values():
        genome = sample['genome']
        all_tracks.setdefault(genome, [])

        name = re.sub('[^0-9a-zA-Z]+', '_',sample['name'])
        track = Track(
            name=name,
            url=os.path.join(HUB_URLBASE, geo.gse, genome, "{0}.bw".format(sample['gsm'])),
            tracktype='bigWig',
            short_label=sample['gsm'],
            long_label=name,
            color='128,128,0',
            maxHeightPixels='30:30:11',
            )
        basename = os.path.basename(track.url)
        track.local_fn = os.path.join(local_dir, basename)
        track.remote_fn = os.path.join(upload_dir, geo.gse, genome, basename)
        all_tracks[genome].append(track)
    
    for build,tracks in all_tracks.items(): 

        genome = Genome(build)
        trackdb.add_tracks(tracks)
        genome.add_trackdb(trackdb)
        genomes_file.add_genome(genome)
        hub.add_genomes_file(genomes_file)

    results = hub.render()

    if upload:
        for track in trackdb.tracks:
            upload_track(track=track, host=host, user=user)
    
        upload_hub(hub=hub, host=host, user=user)
Ejemplo n.º 5
0
 def test_upload(self):
     self.hub.remote_fn = os.path.join(
         'uploaded_version',
         self.hub.remote_fn)
     self.hub.render()
     upload.upload_hub(
         'localhost',
         None,
         self.hub,
         symlink=True,
         symlink_dir='staging',
         run_local=True,)
     for t, level in self.hub.leaves(Track):
         upload.upload_track(
             track=t, host='localhost', user=None, run_local=True)
Ejemplo n.º 6
0
 def test_upload(self):
     self.hub.remote_fn = os.path.join('uploaded_version',
                                       self.hub.remote_fn)
     self.hub.render()
     upload.upload_hub(
         'localhost',
         None,
         self.hub,
         symlink=True,
         symlink_dir='staging',
         run_local=True,
     )
     for t, level in self.hub.leaves(Track):
         upload.upload_track(track=t,
                             host='localhost',
                             user=None,
                             run_local=True)
Ejemplo n.º 7
0
        if track.endswith(".bb") or track.endswith(".bigBed")
    ]

    for bigBed_file in bigBed_files:
        color = "0,100,0" if "pos" in bigBed_file else "100,0,0"
        base_track = remove_special_chars(os.path.basename(bigBed_file))
        long_name = args.sep.join(base_track.split(
            args.sep)[:args.num_sep]) + ".bb"
        track = Track(name=long_name,
                      url=os.path.join(URLBASE, GENOME, base_track),
                      tracktype="bigBed",
                      short_label=long_name,
                      long_label=long_name,
                      color=color,
                      local_fn=bigBed_file,
                      remote_fn=os.path.join(upload_dir, GENOME, base_track),
                      visibility="dense")
        #trackdb.add_tracks(track)
        supertrack.add_track(track)
    trackdb.add_tracks(supertrack)
    result = hub.render()
    hub.remote_fn = os.path.join(upload_dir, "hub.txt")

    for track in trackdb.tracks:
        upload_track(track=track,
                     host=args.server,
                     user=args.user,
                     run_s3=args.no_s3)

    upload_hub(hub=hub, host=args.server, user=args.user, run_s3=args.no_s3)
Ejemplo n.º 8
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Takes in files to turn into trackhub. This version automatically ')

    # tracks files
    ##############
    parser.add_argument('files',
                        nargs='+',
                        help='Files to turn into track hub')

    # namings
    #########
    parser.add_argument('--hub', help="hub name (no spaces)", required=True)
    parser.add_argument('--genome', help="genome name", required=True)

    # upload (in fact run_local=True)
    ########
    #parser.add_argument('--no_s3', default=False, action="store_true", help="upload to defined server instead of s3")
    #parser.add_argument('--serverscp', default="tscc-login2.sdsc.edu", help="server to SCP to")
    #parser.add_argument('--user', default='adomissy', help="that is uploading files")
    # parser.add_argument('--uploaddir', default='yeolab-trackhubs', help="directory to upload files to if not uploading to aws")

    # web access
    ############
    # parser.add_argument('--urldomain', default="s3-us-west-2.amazonaws.com", help="url domain for public access to trackhubs")
    # parser.add_argument('--urldir',    default="yeolab-trackhubs", help="url directory for public access to trackhubs")

    # hub labels
    ############
    # parser.add_argument('--hub_short_label', default=None, help="short label for hub")
    # parser.add_argument('--hub_long_label',  default=None, help="long label for hub")
    parser.add_argument('--hub_email',
                        default='*****@*****.**',
                        help="email for hub")

    # name parts grouping
    #####################
    parser.add_argument('--sep', default=".", help="Seperator")
    parser.add_argument('--num_sep',
                        default=2,
                        type=int,
                        help="Number of seperators deep to group on")

    ###########################################################################
    args = parser.parse_args()

    # TODO: unhack this, but let's keep all trackhubs here for now.
    urldomain = "s3-us-west-2.amazonaws.com"
    urldir = "yeolab-trackhubs"
    uploaddir = "yeolab-trackhubs"
    hub_name = args.hub
    hub_email = args.hub_email

    # default label settings
    ########################
    hub_short_label = hub_name
    hub_long_label = hub_name

    # hard coding serverscp, in variable HOST
    HOST = "localhost"
    # hard coding user, in variable USER
    USER = "******"

    GENOME = args.genome
    # hack for tutorial dataset so it is easy to view in ucsd genome browser
    if GENOME == 'hg19chr19kbp255':
        GENOME == 'hg19'

    uploaddir = os.path.join(uploaddir, hub_name)

    URLBASE = os.path.join("http://" + urldomain + "/" + urldir + "/",
                           hub_name)

    # create data structures
    ########################

    hub = Hub(
        hub=hub_name,
        short_label=hub_short_label,
        long_label=hub_long_label,
        email=hub_email,
    )
    hub.upload_fn = uploaddir

    genomes_file = GenomesFile()
    hub.add_genomes_file(genomes_file)

    genome = Genome(GENOME)
    genomes_file.add_genome(genome)

    trackdb = TrackDb()
    genome.add_trackdb(trackdb)

    supertrack = SuperTrack(name=hub_name,
                            short_label=hub_short_label,
                            long_label=hub_long_label)

    # separate bigwigs, bigbeds and others for different processing methods
    #######################################################################

    bigwig_files = [
        file for file in args.files
        if file.endswith(".posbw") or file.endswith(".negbw") or file.endswith(
            ".bw") or file.endswith(".bigWig") or file.endswith(".bigwig")
    ]
    bigbed_files = [
        file for file in args.files if file.endswith(".bb")
        or file.endswith(".bigBed") or file.endswith(".bigbed")
    ]

    # not used
    #other_files = [file for file in args.files if (file not in bigwig_files and file not in bigbed_files )]

    # process bigwig files , re-grouped by third 2 dot-sepatarated name-parts, as multitracks
    ##########################################################################################
    key_func = lambda x: x.split(args.sep)[:args.num_sep]
    for group_key, group_bigwig_files in groupby(
            sorted(bigwig_files, key=key_func), key_func):

        group_bigwig_files_list = list(group_bigwig_files)
        print("args sep: {}".format(args.sep))
        print("args num sep: {}".format(args.num_sep))
        print("split filename: {}".format(bigwig_files[0].split(
            args.sep)[:args.num_sep]))
        print "-----------------------------------------"
        print "processing bigwig files group with key :", group_key
        print "comprised of following files:", group_bigwig_files_list
        print "-----------------------------------------"

        long_name = remove_plus_and_pct(
            os.path.basename(args.sep.join(group_key[:args.num_sep])))
        aggregate = AggregateTrack(name=long_name,
                                   tracktype='bigWig',
                                   short_label=long_name,
                                   long_label=long_name,
                                   aggregate='transparentOverlay',
                                   showSubtrackColorOnUi='on',
                                   autoScale='on',
                                   priority='1.4',
                                   alwaysZero='on',
                                   visibility="full")

        for bigwigfile in group_bigwig_files_list:
            print "--------------------------"
            print "bigwigfile", bigwigfile
            print "--------------------------"
            base_track = remove_plus_and_pct(os.path.basename(bigwigfile))
            split_track = base_track.split(args.sep)
            long_name = args.sep.join(split_track[:args.num_sep] +
                                      split_track[-3:])
            color = "0,100,0" if "pos" in bigwigfile else "100,0,0"
            track = Track(name=long_name,
                          url=os.path.join(URLBASE, GENOME, base_track),
                          tracktype="bigWig",
                          short_label=long_name,
                          long_label=long_name,
                          color=color,
                          local_fn=bigwigfile,
                          remote_fn=os.path.join(uploaddir, GENOME,
                                                 base_track))
            #print "aggregate.add_subtrack", track.name
            aggregate.add_subtrack(track)
        #print "supertrack.add_track", aggregate
        supertrack.add_track(aggregate)

        #print "trackdb.add_tracks", aggregate
        #trackdb.add_tracks(aggregate)

    # process bigbed files as single track
    ######################################

    for bigbed_file in bigbed_files:

        #     print "--------------------------"
        #     print "bigbedfile",  bigbedfile
        #     print "--------------------------"

        color = "0,100,0" if "pos" in bigbed_file else "100,0,0"
        base_track = remove_plus_and_pct(os.path.basename(bigbed_file))
        long_name = args.sep.join(base_track.split(
            args.sep)[:args.num_sep]) + ".bb"
        track = Track(name=long_name,
                      url=os.path.join(URLBASE, GENOME, base_track),
                      tracktype="bigBed",
                      short_label=long_name,
                      long_label=long_name,
                      color=color,
                      local_fn=bigbed_file,
                      remote_fn=os.path.join(uploaddir, GENOME, base_track),
                      visibility="full")
        #trackdb.add_tracks(track)
        supertrack.add_track(track)

    trackdb.add_tracks(supertrack)
    result = hub.render()
    hub.remote_fn = os.path.join(uploaddir, "hub.txt")

    # process bigbed files  (bam?)
    ######################
    ##  UNUSED
    # if bigwigfile.endswith(".bw") or bigwigfile.endswith('.bigWig'): tracktype = "bigWig"
    # if bigwigfile.endswith(".bb") or bigwigfile.endswith('.bigBed'): tracktype = "bigBed"
    # if bigwigfile.endswith(".bam"):                                  tracktype = "bam"

    # 'upolading' (locally)
    ########################
    for track in trackdb.tracks:
        #print("upload_track(track=" + track.__repr__() + ", host=" + args.serverscp + ", user="******"run_local=True")
        #upload_track(track=track, host=args.serverscp, user=args.user)
        # upload_track(track=track, host=args.serverscp, user=args.user, run_s3=args.no_s3)
        upload_track(track=track, host=HOST, user=USER, run_local=True)

    #print("upload_hub(hub=" + hub.__repr__() + ", host=" + args.serverscp + ", user="******"run_local=True")
    #upload_hub(hub=hub, host=args.serverscp, user=args.user)
    # upload_hub(hub=hub, host=args.serverscp, user=args.user, run_s3=args.no_s3)
    pass
    upload_hub(hub=hub, host=HOST, user=USER, run_local=True)
    #
    print("UPLOADDIR: {}".format(uploaddir))
    print("BUCKET: {}".format(uploaddir))
    copy_dir_to_aws(
        src=uploaddir,
        dest=uploaddir,
    )
    print("FINAL URL: {}/hub.txt".format(URLBASE))
Ejemplo n.º 9
0
                    
                    if track.endswith(".bw") or track.endswith('.bigWig'):
                        tracktype = "bigWig"
                    if track.endswith(".bb") or track.endswith('.bigBed'):
                        tracktype = "bigBed"
                    if track.endswith(".bam"):
                        tracktype = "bam"

                    print base_track
                    track = Track(
                          name= base_track,
                          url = os.path.join(URLBASE, GENOME, base_track),
                          tracktype = tracktype,
                          short_label=base_track,
                          long_label=base_track,
                          color = color,
                          local_fn = track,
                          remote_fn = os.path.join(upload_dir, GENOME, base_track)
                          )
           
                    aggregate.add_subtrack(track)
            trackdb.add_tracks(aggregate)
    
    result = hub.render()
    hub.remote_fn = os.path.join(upload_dir, "hub.txt") 
    for track in trackdb.tracks:

        upload_track(track=track, host=args.server, user=args.user)
    
    upload_hub(hub=hub, host=args.server, user=args.user)
Ejemplo n.º 10
0
        supertrack.add_track(aggregate)
        #trackdb.add_tracks(aggregate)
    
    bigBed_files = [track for track in remaining_files if track.endswith(".bb") or track.endswith(".bigBed")]

    for bigBed_file in bigBed_files:
        color = "0,100,0" if "pos" in bigBed_file else "100,0,0"
        base_track = remove_special_chars(os.path.basename(bigBed_file))
        long_name = args.sep.join(base_track.split(args.sep)[:args.num_sep]) + ".bb"
        track = Track(
            name=long_name,
            url=os.path.join(URLBASE, GENOME, base_track),
            tracktype="bigBed",
            short_label=long_name,
            long_label=long_name,
            color=color,
            local_fn=bigBed_file,
            remote_fn=os.path.join(upload_dir, GENOME, base_track),
            visibility="dense"
        )
        #trackdb.add_tracks(track)
        supertrack.add_track(track)
    trackdb.add_tracks(supertrack)
    result = hub.render()
    hub.remote_fn = os.path.join(upload_dir, "hub.txt")

    for track in trackdb.tracks:
        upload_track(track=track, host=args.server, user=args.user, run_s3=args.no_s3)

    upload_hub(hub=hub, host=args.server, user=args.user, run_s3=args.no_s3)
Ejemplo n.º 11
0
      local_fn = datadir + f, 
      remote_fn = remote_dir + bname, 
      url = uploadbase+'bw/'+ bname,
      shortLabel = '%s_%s' %( name,rep) , 
      color = col)
   signal_view.add_tracks(track)

for b in chipseq_bbfiles:  
   bname = os.path.basename(b)
   name = bname.split('.')[0]
   col = maps[name]
   track = Track(
      name = '%s_peak' % name,
      tracktype='bigBed',
      local_fn = datadir+ f, 
      remote_fn = remote_dir + bname, 
      url = uploadbase + 'bb/'+bname,  
      shortLabel = '%s_peaks' % name, 
      color=col) 
   bed_view.add_tracks(track)

#need to add in the RNAseq to same composite groups
trackdb.add_tracks(comp)
print trackdb
hub.render()

kwargs = dict(host='kure.its.unc.edu', user='******') 
upload_hub(hub=hub, **kwargs) 
for track, level in hub.leaves(Track):  
   upload_track(track=track, **kwargs)
Ejemplo n.º 12
0
                          long_label=base_track,
                          color = color,
                          local_fn = track,
                          remote_fn = os.path.join(upload_dir, GENOME, base_track)
                          )
           
                    aggregate.add_subtrack(track)
            trackdb.add_tracks(aggregate)
    
    bigBed_files = [track for track in remaining_files if track.endswith(".bb") or track.endswith(".bigBed")]
    for bigBed_file in bigBed_files:
        base_track = os.path.basename(bigBed_file)
        track = Track(
            name = base_track,
            url = os.path.join(URLBASE, GENOME, base_track),
            tracktype = "bigBed",
            short_label = base_track,
            long_label = base_track,
            color = color,
            local_fn = track,
            remote_fn = os.path.join(upload_dir, GENOME, base_track)
            )

    result = hub.render()
    hub.remote_fn = os.path.join(upload_dir, "hub.txt") 
    for track in trackdb.tracks:

        upload_track(track=track, host=args.server, user=args.user)
    
    upload_hub(hub=hub, host=args.server, user=args.user)
Ejemplo n.º 13
0
kwargs = dict(host='kure.its.unc.edu', user='******')
=======
   chip_bed_view.add_tracks(track)

#for r in rnaseq_bwfiles: 
#   bname = os.path.basename(r)
#   name, strand = bname.split('.')[0].split('_')
#   col = rnamaps[bname.split('.')[0]] 
#   track = Track(
#      name = '%s_%s_signal' % (name, strand), 
#      tracktype = 'bigWig', 
#      local_fn = datadir + 'rna/processed/bw/' + bname, 
#      remote_fn = remote_dir + bname, 
#      url = uploadbase + 'bw/' + bname,
#      shortLabel = '%s_%s_rnasignal' % (name, strand) , 
#      color = col) 
#   rna_signal_view.add_tracks(track) 
#
# -----------------------------------------------------------------------------
# Add everyhing in the supertrack to the db and upload data to Kure

trackdb.add_tracks(comp)
print trackdb
hub.render()

kwargs = dict(host='kure.its.unc.edu', user='******') 
>>>>>>> e33a5d89c0f5d75afae01dde9452063e2287f8d6
upload_hub(hub=hub, **kwargs) 
for track, level in hub.leaves(Track):  
   upload_track(track=track, **kwargs)