Beispiel #1
0
def set2rank2(args):

    rnkfile = Path(args['rnkfile']).expanduser().abspath()
    gsetfile = Path(args['gsetfile']).expanduser().abspath()
    cachedir = Path(args['cachedir']).expanduser().abspath()
    gseajar = Path(args['gseajar']).expanduser().abspath()

    def fix_rv(rv):
        rv['gset'] = str(gsetfile.basename()).replace(".grp",
                                                      '').replace(".list", '')
        rv['gset_type'] = str(gsetfile.dirname().basename())
        rv['rank'] = str(rnkfile.basename()).replace(".rnk", '')
        rv['rank_type'] = str(rnkfile.dirname().basename())
        del rv['cachedir']
        return rv

    if os.path.exists(cachedir):
        rv = _check_gsea(cachedir)
        if rv is False:
            cachedir.rmtree()
        else:
            assert isinstance(rv, pd.DataFrame)
            return fix_rv(rv)

    cachedir.makedirs_p()

    if '.list' in gsetfile:
        grpfile = cachedir / (gsetfile.basename().replace('.list', '.grp'))
        os.symlink(gsetfile, grpfile)
        gsetfile = grpfile

    cl = ("""-cp %s 
            -Xmx2048m xtools.gsea.GseaPreranked 
            -gmx %s -collapse false 
            -mode Max_probe -norm meandiv 
            -nperm 1000 -rnk %s
            -scoring_scheme weighted -rpt_label my_analysis
            -include_only_symbols true
            -make_sets true -plot_top_x 1
            -rnd_seed timestamp -set_max 9999
            -set_min 4 -zip_report false
            -out %s -gui false """ %
          (gseajar, gsetfile, rnkfile, cachedir)).split()

    import sh

    try:
        java(*cl)  #, _out = str(cachedir / 'gsea.out'),
        #_err = str(cachedir / 'gsea.err'))
    except sh.ErrorReturnCode_1:
        return False

    return fix_rv(_check_gsea(cachedir))
Beispiel #2
0
def set2rank2(args):

    rnkfile = Path(args['rnkfile']).expanduser().abspath()
    gsetfile = Path(args['gsetfile']).expanduser().abspath()
    cachedir = Path(args['cachedir']).expanduser().abspath()
    gseajar = Path(args['gseajar']).expanduser().abspath()

    def fix_rv(rv):
        rv['gset'] = str(gsetfile.basename()).replace(".grp", '').replace(".list", '')
        rv['gset_type'] = str(gsetfile.dirname().basename())
        rv['rank'] = str(rnkfile.basename()).replace(".rnk", '')
        rv['rank_type'] = str(rnkfile.dirname().basename())
        del rv['cachedir']
        return rv
        
    if os.path.exists(cachedir):
        rv = _check_gsea(cachedir)
        if rv is False:
            cachedir.rmtree()
        else:
            assert isinstance(rv, pd.DataFrame)
            return fix_rv(rv)

    cachedir.makedirs_p()

    if '.list' in gsetfile:
        grpfile = cachedir / (gsetfile.basename().replace('.list', '.grp'))
        os.symlink(gsetfile, grpfile)
        gsetfile = grpfile

    cl = ("""-cp %s 
            -Xmx2048m xtools.gsea.GseaPreranked 
            -gmx %s -collapse false 
            -mode Max_probe -norm meandiv 
            -nperm 1000 -rnk %s
            -scoring_scheme weighted -rpt_label my_analysis
            -include_only_symbols true
            -make_sets true -plot_top_x 1
            -rnd_seed timestamp -set_max 9999
            -set_min 4 -zip_report false
            -out %s -gui false """ % (
                gseajar, gsetfile, rnkfile, cachedir)).split()


    import sh
    
    try:
        java(*cl)#, _out = str(cachedir / 'gsea.out'),
         #_err = str(cachedir / 'gsea.err'))
    except sh.ErrorReturnCode_1:
        return False
        
    return fix_rv(_check_gsea(cachedir))
Beispiel #3
0
def run(rnk, database,
        outpath='~/data/rat/gsea_output',
        gseajar="~/Desktop/gsea2-2.2.0.jar"):
        
    ctx = dict(
        rnkfile = Path(rnkfile).expanduser().abspath(),        
        gseajar = Path(gseajar).expanduser().abspath(),
        outpath = outpath,    
        setdb=GSEA_DB[database] )

    uid = sha1()

    uid.update(str(hash(frozenset(rnk.head().apply(lambda x: str(x))))))

    rnkshasum = shasum(ctx['rnkfile'])
    uid.update(rnkshasum.encode('UTF-8'))
    uid.update(ctx['setdb'].encode('UTF-8'))
    uid = uid.hexdigest()[:9]

    outpath = Path(outpath).expanduser().abspath() / uid

    if not outpath.exists():
        os.makedirs(outpath)

    ctx['outpath'] = outpath
    if len(glob.glob(outpath / '*/*.xls')) == 0:

        print("# start gsea run (uid=%s)" % uid)
        sys.stdout.flush()

        cl = '''

            -cp {gseajar}           -Xmx2048M  
            xtools.gsea.GseaPreranked 
            -gmx {setdb}            -collapse false 
            -mode Max_probe         -norm meandiv 
            -nperm 1000             -scoring_scheme weighted 
            -rpt_label my_analysis  -include_only_symbols true
            -make_sets true         -plot_top_x 20 
            -rnd_seed timestamp     -set_max 1000 
            -set_min 10             -zip_report false 
            -out {outpath}          -gui false
            -rnk {rnkfile}   '''.strip().format(**ctx).split()
        java(*cl)


    #load results
    posfile = glob.glob(outpath / '*/gsea_report_for_na_pos_*.xls')[0]
    posdata = pd.read_csv(posfile, sep="\t")
    print(posdata.head())
    print(posfile)
Beispiel #4
0
def run(rnk,
        database,
        outpath='~/data/rat/gsea_output',
        gseajar="~/Desktop/gsea2-2.2.0.jar"):

    ctx = dict(rnkfile=Path(rnkfile).expanduser().abspath(),
               gseajar=Path(gseajar).expanduser().abspath(),
               outpath=outpath,
               setdb=GSEA_DB[database])

    uid = sha1()

    uid.update(str(hash(frozenset(rnk.head().apply(lambda x: str(x))))))

    rnkshasum = shasum(ctx['rnkfile'])
    uid.update(rnkshasum.encode('UTF-8'))
    uid.update(ctx['setdb'].encode('UTF-8'))
    uid = uid.hexdigest()[:9]

    outpath = Path(outpath).expanduser().abspath() / uid

    if not outpath.exists():
        os.makedirs(outpath)

    ctx['outpath'] = outpath
    if len(glob.glob(outpath / '*/*.xls')) == 0:

        print("# start gsea run (uid=%s)" % uid)
        sys.stdout.flush()

        cl = '''

            -cp {gseajar}           -Xmx2048M  
            xtools.gsea.GseaPreranked 
            -gmx {setdb}            -collapse false 
            -mode Max_probe         -norm meandiv 
            -nperm 1000             -scoring_scheme weighted 
            -rpt_label my_analysis  -include_only_symbols true
            -make_sets true         -plot_top_x 20 
            -rnd_seed timestamp     -set_max 1000 
            -set_min 10             -zip_report false 
            -out {outpath}          -gui false
            -rnk {rnkfile}   '''.strip().format(**ctx).split()
        java(*cl)

    #load results
    posfile = glob.glob(outpath / '*/gsea_report_for_na_pos_*.xls')[0]
    posdata = pd.read_csv(posfile, sep="\t")
    print(posdata.head())
    print(posfile)
Beispiel #5
0
 def remove_duplicates(self):
     """Remove PCR duplicates with MarkDuplicates."""
     # Estimate size #
     mem_size = "16"
     # Run the command #
     sh.java('-Xmx%sg' % mem_size,
             '-XX:ParallelGCThreads=%s' % nr_threads,
             '-XX:+CMSClassUnloadingEnabled',
             '-jar', gefes.repos_dir + 'bin/MarkDuplicates.jar',
             'INPUT=%s' % self.p.map_s_bam,
             'OUTPUT=%s' % self.p.map_smd_bam,
             'METRICS_FILE=%s' % self.p.map_smd_metrics,
             'AS=TRUE',
             'VALIDATION_STRINGENCY=LENIENT',
             'MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000',
             'REMOVE_DUPLICATES=TRUE')
Beispiel #6
0
def start_iota(node=1):
    jar_file = "node%d/iri-1.5.5.jar" % (node)
    log_file = "node%d/iri.log" % (node)
    _status = sh.java("-jar",
                      jar_file,
                      "-p",
                      "14700",
                      "-u",
                      "14700",
                      "-t",
                      "14701",
                      "-n",
                      "udp://localhost:14699",
                      "udp://localhost:14701",
                      "--testnet",
                      "--testnet-no-coo-validation",
                      "--snapshot=./Snapshot.txt",
                      "--mwm",
                      "1",
                      "--walk-validator",
                      "NULL",
                      "--ledger-validator",
                      "NULL",
                      "--max-peers",
                      "40",
                      "--remote",
                      "--ipfs-txns",
                      "false",
                      "--batch-txns",
                      _out=log_file,
                      _bg=True)

    time.sleep(8)
Beispiel #7
0
    def _run_fixer(self, vcf_dir):
        sample = self.sample_lookup.get(os.path.basename(vcf_dir), None)
        if not sample:
            raise ValueError("Could not find lane %s in the lookup table "
                             "%s" % (self.sample_lookup))

        out_file = os.path.join(vcf_dir, "Variations", sample + ".vcf")
        if file_exists(out_file):
            return out_file

        sh.java(self.java_memory,
                "-jar", self.fixer, "variant-utils", "illumina", vcf_dir,
                sample, self.grc_file, self.ucsc_file,
                _out=os.path.join(vcf_dir, sample + ".out"),
                _err=os.path.join(vcf_dir, sample + ".err"))

        return out_file
Beispiel #8
0
 def remove_duplicates(self):
     """Remove PCR duplicates with MarkDuplicates."""
     # Estimate size #
     mem_size = nr_threads * 2
     perm_size = str(max(int(mem_size * 0.3), 1))
     sh.java('-Xms%sg' % perm_size,
             '-Xmx%sg' % mem_size,
             '-XX:ParallelGCThreads=%s' % nr_threads,
             '-XX:MaxPermSize=%sg' % perm_size,
             '-XX:+CMSClassUnloadingEnabled',
             '-jar', gefes.repos_dir + 'bin/picard-tools-1.101/MarkDuplicates.jar',
             'INPUT=%s' % self.p.map_s_bam,
             'OUTPUT=%s' % self.p.map_smd_bam,
             'METRICS_FILE=%s' % self.p.map_smd_metrics,
             'AS=TRUE',
             'VALIDATION_STRINGENCY=LENIENT',
             'MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000',
             'REMOVE_DUPLICATES=TRUE')
Beispiel #9
0
def start_cromwell():
    sh.java("-Dconfig.file=aws.conf",
            "-jar",
            "cromwell-36.jar",
            "server",
            _bg=True,
            _out="cromwell.log",
            _err_to_out=True)
    count = 0
    while True:
        count += 1
        # TODO add timeout
        time.sleep(1)
        with open("cromwell.log") as logfile:
            log = logfile.read()
            if "service started on" in log:
                return
            if count > 120:
                raise ValueError("cromwell not started after 120 seconds")
Beispiel #10
0
    def _run_fixer(self, vcf_dir):
        sample = self.sample_lookup.get(os.path.basename(vcf_dir), None)
        if not sample:
            raise ValueError("Could not find lane %s in the lookup table "
                             "%s" % (self.sample_lookup))

        out_file = os.path.join(vcf_dir, "Variations", sample + ".vcf")
        if file_exists(out_file):
            return out_file

        sh.java(self.java_memory,
                "-jar",
                self.fixer,
                "variant-utils",
                "illumina",
                vcf_dir,
                sample,
                self.grc_file,
                self.ucsc_file,
                _out=os.path.join(vcf_dir, sample + ".out"),
                _err=os.path.join(vcf_dir, sample + ".err"))

        return out_file
Beispiel #11
0
        " xmlns=\"http://s3.amazonaws.com/doc/2006-03-01/\"", "")

    xml = objectify.fromstring(response)
    xmltree = xml.getchildren()

    try:
        open('%s/logstash.jar' % dir)
    except IOError, e:
        if e.errno == 13:
            sys.exit("We don't have permission to read/write the file")
        elif e.errno == 2:
            print(
                "The file %s/logstash.jar does not exist, let's figure out which one to get."
                % dir)
    else:
        current_version = sh.java("-jar", "%s/logstash.jar" % dir, "agent",
                                  "-V").split(" ")[1].strip('\n')
        return xmltree, current_version
    finally:
        return xmltree


def updater(url, xmltree, current_version, args):
    # Version as key, with a dict as value. Example:
    # {'1.1.1': {'releasename': 'release/logstash-1.1.1-monolithic.jar'}}
    available_versions = {'latest': {'releasename': None, 'version': '0.0.0'}}

    version_parser = re.compile("release\/logstash-(.*)-monolithic\.jar")

    try:
        if args.version == "latest":
            user_version = "latest"
Beispiel #12
0
def set2rank(rnk,
             gset,
             cachedir="~/data/rat/gsea_output",
             gseajar="~/bin/gsea2-2.2.1.jar",
             force=False,
             verbose=False):

    outpath = Path(cachedir).expanduser()
    gseajar = Path(gseajar).expanduser()

    gset = frozenset(gset)
    if verbose:
        print("no genes: %d", len(gset))

    rnk = rnk.sort_values()
    txtrnk = "\n".join(['%s\t%s' % (a, b) for a, b in rnk.iteritems()])

    uid = str(abs(hash(gset))) + "_" + str(abs(hash(txtrnk)))
    if verbose:
        print("UID: %s", uid)
    cachedir = outpath / uid

    if force:
        cachedir.rmtree()

    cachedir.makedirs_p()

    rv = _check_gsea(cachedir)
    if isinstance(rv, pd.DataFrame):
        return rv

    if cachedir.exists():
        cachedir.rmtree()
    cachedir.makedirs_p()

    rnkfile = cachedir / 'rank.rnk'
    if not rnkfile.exists():
        with open(rnkfile, 'w') as F:
            F.write(txtrnk)

    gsetfile = cachedir / 'gset.gmx'
    if not gsetfile.exists():
        with open(gsetfile, 'w') as F:
            F.write("gset\nna\n")
            F.write("\n".join(gset))
            F.write("\n")

    cl = ("""-cp %s 
            -Xmx2048m xtools.gsea.GseaPreranked 
            -gmx %s -collapse false 
            -mode Max_probe -norm meandiv 
            -nperm 1000 -rnk %s
            -scoring_scheme weighted -rpt_label my_analysis
            -include_only_symbols true
            -make_sets true -plot_top_x 1
            -rnd_seed timestamp -set_max 9999
            -set_min 5 -zip_report false
            -out %s -gui false """ %
          (gseajar, gsetfile, rnkfile, cachedir)).split()
    java(*cl)
    rv = _check_gsea(cachedir)
    if isinstance(rv, pd.DataFrame):
        return rv

    return set2rank(rnk, gset, outpath, gseajar, force=True)
Beispiel #13
0
def set2rank(
        rnk, gset,
        cachedir="~/data/rat/gsea_output",
        gseajar="~/bin/gsea2-2.2.1.jar",
        force=False, verbose=False):

    
    outpath = Path(cachedir).expanduser()
    gseajar = Path(gseajar).expanduser()
    
    gset = frozenset(gset)
    if verbose:
        print("no genes: %d", len(gset))
        
    rnk = rnk.sort_values()
    txtrnk = "\n".join(['%s\t%s' %(a,b) for a, b in rnk.iteritems()])

    uid = str(abs(hash(gset))) + "_" + str(abs(hash(txtrnk)))
    if verbose:
        print("UID: %s", uid)
    cachedir = outpath / uid
    
    if force:
        cachedir.rmtree()
        
    cachedir.makedirs_p()
    
    rv = _check_gsea(cachedir)
    if isinstance(rv, pd.DataFrame):
        return rv

    if cachedir.exists():
        cachedir.rmtree()
    cachedir.makedirs_p()
    
    rnkfile = cachedir / 'rank.rnk'
    if not rnkfile.exists():
        with open(rnkfile, 'w') as F:
            F.write(txtrnk)

    gsetfile = cachedir / 'gset.gmx'
    if not gsetfile.exists():
        with open(gsetfile, 'w') as F:
            F.write("gset\nna\n")
            F.write("\n".join(gset))
            F.write("\n")

    cl = ("""-cp %s 
            -Xmx2048m xtools.gsea.GseaPreranked 
            -gmx %s -collapse false 
            -mode Max_probe -norm meandiv 
            -nperm 1000 -rnk %s
            -scoring_scheme weighted -rpt_label my_analysis
            -include_only_symbols true
            -make_sets true -plot_top_x 1
            -rnd_seed timestamp -set_max 9999
            -set_min 5 -zip_report false
            -out %s -gui false """ % (
                gseajar, gsetfile, rnkfile, cachedir)).split()
    java(*cl)
    rv = _check_gsea(cachedir)
    if isinstance(rv, pd.DataFrame):
        return rv
    
    return set2rank(rnk, gset, outpath, gseajar, force=True)