def maskSequences(self, sequences): '''mask a collection of sequences.''' outfile, infile = tempfile.mkstemp() for x, s in enumerate(sequences): os.write(outfile, ">%i\n%s\n" % (x, s)) os.close(outfile) statement = self.mCommand % locals() E.debug("statement: %s" % statement) s = subprocess.Popen(statement, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) (out, err) = s.communicate() if s.returncode != 0: raise RuntimeError( "Error in running %s \n%s\nTemporary directory" % (statement, err)) result = [ x.sequence for x in FastaIterator.iterate(StringIO.StringIO(out)) ] os.remove(infile) return result
def executewait(dbhandle, statement, error, retry=False, wait=5): '''execute sql statement. Retry on error, if retry is True. Returns a cursor object. ''' cc = dbhandle.cursor() i = 20 while i > 0: try: cc.execute(statement) return cc except sqlite3.OperationalError as e: msg = e.message E.warn("import failed: msg=%s, statement=\n %s" % (msg, statement)) # TODO: check for database locked msg if not retry: raise error, msg if not re.search("locked", str(msg)): raise error, msg time.sleep(wait) i -= 1 continue break raise sqlite.OperationalError("Database locked and too many retries")
def substituteParameters( **kwargs ): '''return a local PARAMS dictionary. Options in ``**kwargs`` substitute default values in PARAMS. Finally, task specific configuration values are inserted. ''' # build parameter dictionary # note the order of addition to make sure that kwargs takes precedence local_params = dict(PARAMS.items() + kwargs.items()) if "outfile" in local_params: # replace specific parameters with task (outfile) specific parameters outfile = local_params["outfile"] for k in local_params.keys(): if k.startswith(outfile): p = k[len(outfile)+1:] if p not in local_params: raise KeyError( "task specific parameter '%s' does not exist for '%s' " % (p,k)) E.debug( "substituting task specific parameter for %s: %s = %s" % (outfile,p,local_params[k] ) ) local_params[p] = local_params[k] return local_params
def main(): try: Experiment.initLogging(os.environ.has_key("GATEWAY_INTERFACE")) if os.environ.has_key("GATEWAY_INTERFACE"): # CGI (odict, args) = getCGIOptions() else: # Command line. All the output is still CGI-ish, though. Sorry. (odict, args) = getOptions() (odict, args) = processOptions(odict, args) data = newCreature(odict, odict["e"], odict.get("p")) data = Location.getJsonModule().dumps(data, indent=2) print "Content-type: application/json" print "Content-length: %s" % (len(data)) print print data except: msg = string.join(apply( traceback.format_exception, sys.exc_info() ), "") if (msg[-1] == "\n"): msg = msg[:-1] logging.getLogger().warning(msg) data = "Huh?\n%s" % (msg) print "Status: 500 Internal Server Error" print "Content-type: text/plain" print "Content-length: %s" % (len(data)) print print data
def maskSequence( self, peptide_sequence ): """mask peptide sequence """ Masker.__init__(self) outfile, filename_peptide = tempfile.mkstemp() os.write(outfile, ">test\n%s\n" % (peptide_sequence)) os.close(outfile) infile = filename_peptide statement = self.mCommand % locals() E.debug( "statement: %s" % statement ) s = subprocess.Popen( statement, shell = True, stdout = subprocess.PIPE, stderr = subprocess.PIPE, close_fds = True) (out, err) = s.communicate() if s.returncode != 0: raise RuntimeError("Error in running %s \n%s\nTemporary directory" % (statement, err)) os.remove( filename_peptide ) masked_sequence = re.sub("\s", "", string.join(out.split("\n")[1:], "")) return masked_sequence
def executewait( dbhandle, statement, error, retry = False, wait=5): '''execute sql statement. Retry on error, if retry is True. Returns a cursor object. ''' cc = dbhandle.cursor() i = 20 while i>0: try: cc.execute( statement ) return cc except sqlite3.OperationalError as e: msg = e.message E.warn("import failed: msg=%s, statement=\n %s" % (msg, statement ) ) # TODO: check for database locked msg if not retry: raise error, msg if not re.search("locked", str(msg)): raise error, msg time.sleep(wait) i -= 1 continue break raise sqlite.OperationalError("Database locked and too many retries")
def _copy(src, dest): dest = os.path.abspath(os.path.join(PARAMS["web_dir"], dest)) if os.path.exists(dest): shutil.rmtree(dest) if not os.path.exists(src): E.warn("%s does not exist - skipped" % src) return shutil.copytree(os.path.abspath(src), dest)
def importdata(self): # Data[0] = Voltage, Data[1] = Current, Data[2] = Time self.trainData, self.testData = loading.Loader().dataload() self.myExp = Experiment('Experiment 1', .1) for n in self.trainData: print "Trials" print n self.myExp.addTrainingSetTrace(n[0], self.V_units, n[1], self.I_units, np.size(n[2]) / 10, FILETYPE='Array') self.myExp.trainingset_traces[n].setROI([[1000, 120000.0]]) for n in self.testData: self.myExp.addTestSetTrace([n][0], self.V_units, [n][1], self.I_units, np.size([n][2]) / 10, FILETYPE='Array') self.myExp.testset_traces[n].setROI([[1000, 20000]]) self.fitaec(self, self.myExp)
def _copy( src, dest ): dest = os.path.abspath( os.path.join( PARAMS["web_dir"], dest ) ) if os.path.exists( dest ): shutil.rmtree( dest ) if not os.path.exists(src): E.warn( "%s does not exist - skipped" % src ) return shutil.copytree( os.path.abspath(src), dest )
def main(): try: Experiment.initLogging(os.environ.has_key("GATEWAY_INTERFACE")) if os.environ.has_key("GATEWAY_INTERFACE"): # CGI (odict, args) = getCGIOptions() else: # Command line. All the output is still CGI-ish, though. Sorry. (odict, args) = getOptions() (odict, args) = processOptions(odict, args) if odict.has_key("e") or not odict.has_key("c"): data = getGallery(odict, odict.get("e"), odict.get("c")) else: data = getPage(odict, odict["c"], odict.get("p"), odict.get("n")) print "Content-type: text/html" print "Content-length: %s" % (len(data)) print print data except: msg = string.join(apply( traceback.format_exception, sys.exc_info() ), "") if (msg[-1] == "\n"): msg = msg[:-1] logging.getLogger().warning(msg) data = "Huh?\n%s" % (msg) print "Status: 500 Internal Server Error" print "Content-type: text/plain" print "Content-length: %s" % (len(data)) print print data
def run_experiment(experiment_var, random_seed): evals_at_targets_df = pd.DataFrame() for i, dim in enumerate(experiment_var): a = 1 b = -1 # random initial solution with elements between -1 and 1 theta0 = (b - a) * np.random.rand(dim + 1, 1) + a # allow more iterations in higher dimensions parms.max_iterations = parms.max_iterations * dim error_list, sample_evals = ex.run_problem( dim, sample_size, num_targets, num_subintervals, cost_function, theta0, balance, noise, parms, random_seed) ############# benchmark optimization run target_values = ex.create_targets(error_list, num_targets) benchmarker = bm.Benchmark(sample_evals, target_values, error_list) evals_at_targets = benchmarker.benchmark() evals_at_targets_df[i] = evals_at_targets return evals_at_targets_df
def maskSequence(self, peptide_sequence): """mask peptide sequence """ Masker.__init__(self) outfile, filename_peptide = tempfile.mkstemp() os.write(outfile, ">test\n%s\n" % (peptide_sequence)) os.close(outfile) infile = filename_peptide statement = self.mCommand % locals() E.debug("statement: %s" % statement) s = subprocess.Popen(statement, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) (out, err) = s.communicate() if s.returncode != 0: raise RuntimeError( "Error in running %s \n%s\nTemporary directory" % (statement, err)) os.remove(filename_peptide) masked_sequence = re.sub("\s", "", string.join(out.split("\n")[1:], "")) return masked_sequence
def maskSequences( self, sequences ): '''mask a collection of sequences.''' outfile, infile = tempfile.mkstemp() for x,s in enumerate(sequences): os.write(outfile, ">%i\n%s\n" % (x,s) ) os.close(outfile) statement = self.mCommand % locals() E.debug( "statement: %s" % statement ) s = subprocess.Popen( statement, shell = True, stdout = subprocess.PIPE, stderr = subprocess.PIPE, close_fds = True) (out, err) = s.communicate() if s.returncode != 0: raise RuntimeError("Error in running %s \n%s\nTemporary directory" % (statement, err)) result = [ x.sequence for x in FastaIterator.iterate( StringIO.StringIO( out) ) ] os.remove( infile ) return result
def validate(self, samples): # check segment lengths l = [x[1] - x[0] for x in self.segments] values_input = min(l), max(l), numpy.mean(l), numpy.std(l) fail = False for i, sample in enumerate(samples): l = [x[1] - x[0] for x in sample] values_sample = min(l), max(l), numpy.mean(l), numpy.std(l) for val, inp, samp in zip(("min", "max", "mean", "std"), values_input, values_sample): d = abs(inp - samp) / float(inp) # segment length distribution fails if d >= self.stringency_level: fail = True E.warn("segment length distribution in sample %i: expected %s (%f) != observed %s (%f)" % (i, val, inp, val, samp)) break if fail: break else: fail = False return "\t".join(("%i" % (not fail)), )
def getStdoutStderr( stdout_path, stderr_path, tries=5 ): '''get stdout/stderr allowing for same lag. Try at most *tries* times. If unsuccessfull, throw PipelineError. Removes the files once they are read. Returns tuple of stdout and stderr. ''' x = tries while x >= 0: if os.path.exists( stdout_path ): break time.sleep(1) x -= 1 x = tries while x >= 0: if os.path.exists( stderr_path ): break time.sleep(1) x -= 1 try: stdout = open( stdout_path, "r" ).readlines() except IOError, msg: E.warn( "could not open stdout: %s" % msg ) stdout = []
def startUp(self): self.mHeaders = ("nid", "node", "parent", "level", "start", "end") if not self.isComplete(): self.mOutfile = self.openOutputStream(self.mFilenameSegments) self.min_domain_size = self.mConfig.get('adda', 'min_domain_size', 30) self.min_segment_size = self.mConfig.get('segments', 'min_segment_size', 30) self.min_distance_border = self.mConfig.get( 'segments', 'min_distance_border', 0) self.resolution = self.mConfig.get('segments', 'resolution', 10.0) E.debug( "splitting parameters: resolution=%f, min_domain_size=%i, min_distance_border=%i" % \ (self.resolution, self.min_domain_size, self.min_distance_border ) ) # rescale self.r_min_domain_size = int( float(self.min_domain_size) / self.resolution) self.r_min_distance_border = int( float(self.min_distance_border) / self.resolution) if self.mContinueAt == None: self.mOutfile.write("\t".join(self.mHeaders) + "\n") self.mOutfile.flush()
def _iterate(self): """iterate over muliple files.""" def _iter(infile): identifier = None for line in infile: if line.startswith("#"): continue if line.startswith(">"): if self.regexIdentifier: try: identifier = re.search(self.regexIdentifier, line[1:-1]).groups()[0] except AttributeError: raise ValueError( "could not parse identifier from line %s - check the input" % line[1:-1]) else: identifier = re.split("\s", line[1:-1])[0] else: if not identifier: raise ValueError( "refusing to emit sequence without identifier - check the input" ) yield identifier, line.strip() for filename in self.filenames: if self.format == "tar.gz" or self.format == "tar" or ( self.format == "auto" and filename.endswith("tar.gz")): if filename == "-": tf = tarfile.open(fileobj=sys.stdin, mode="r|*") else: tf = tarfile.open(filename, mode="r") for f in tf: b, ext = os.path.splitext(f.name) if ext.lower() in (".fasta", ".fa"): E.info("extracting %s" % f.name) infile = tf.extractfile(f) for x in _iter(infile): yield x else: E.info("skipping %s" % f.name) if tf != sys.stdin: tf.close() continue elif self.format == "fasta.gz" or (self.format == "auto" and filename.endswith(".gz")): infile = gzip.open(filename, "r") elif filename == "-": infile = sys.stdin else: infile = open(filename, "r") for x in _iter(infile): yield x if filename != "-": infile.close() raise StopIteration
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = optparse.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-p", "--proc", dest="processors", type="int", help="use # processors [%default]") parser.set_defaults( processors=1) options, args = E.Start(parser, argv=argv) t1 = Test(RunnerGat, small_test_segmented_workspaces(), [ValidatorNumSamples, ValidatorSegmentDistribution]) t1.run(options.stdout, processors=options.processors) E.Stop()
def validate( self, samples ): # check segment lengths l = [ x[1] - x[0] for x in self.segments ] values_input = min( l ), max(l ), numpy.mean( l ), numpy.std( l ) fail = False for i, sample in enumerate( samples ): l = [ x[1] - x[0] for x in sample ] values_sample = min( l ), max( l ), numpy.mean( l ), numpy.std( l ) for val, inp, samp in zip( ("min", "max", "mean", "std" ), values_input, values_sample ): d = abs(inp - samp) / float(inp) # segment length distribution fails if d >= self.stringency_level: fail = True E.warn( "segment length distribution in sample %i: expected %s (%f) != observed %s (%f)" %\ ( i, val, inp, val, samp ) ) break if fail: break else: fail = False return "\t".join( ( "%i" % (not fail) ), )
def buildPFAMDomains( infiles, outfile ): '''map PFAM domains onto current sequence collection. The mapping is done by ID lookup.''' infile = infiles[0] with IOTools.openFile( "nrdb50.fasta.tsv") as inf: reader = csv.DictReader( inf, dialect='excel-tab' ) map_id2nid = {} for row in reader: map_id2nid[row['repid']] = row['nid'] rx = re.compile( "(\S+)\/(\d+)-(\d+)\s+(\S+);(.*);" ) c = E.Counter() outf = IOTools.openFile( outfile, "w" ) with IOTools.openFile( infile ) as inf: for entry in FastaIterator.iterate( inf ): c.input += 1 pid, start, end, pfam_id, description = rx.match( entry.title ).groups() try: outf.write( "%s\t%i\t%i\t%s\n" % (map_id2nid[pid], int(start)-1, int(end), pfam_id ) ) except KeyError: c.missed += 1 continue c.output += 1 outf.close() E.info( c )
def main(): min_unit, T_slot, a, b, c = 68, 1, 0.88652179221, 0.25726495726, 0.0073070866 exp_times = 1 # 实验次数 record_num = cfg.record_num # send_time_list = [] # 获取分段的数量 if min_unit == 68: piece_num = record_num else: piece_num = 68 * record_num // min_unit + (1 if (68 * record_num % min_unit) else 0) # 获取每轮发送的数量的数组 # pieces_each_round = 4 # 初始值 # with open("TimeOFpiecesEachRound.csv", "w", newline="", encoding="utf-8") as datacsv: # csvwriter = csv.writer(datacsv, dialect="excel") # csvwriter.writerow(["pieces_each_round", "已解码个数"]) # # while pieces_each_round <= piece_num: # 何时间终止 # print(f"\n--------------------------------------------\n") # print(f"\n| pieces_each_round = %4d |\n" %(pieces_each_round, )) # print(f"\n--------------------------------------------\n") # send_time = Experiment.run(min_unit, T_slot, a, b, c, pieces_each_round) # send_time_list.append(send_time) # print(send_time_list) # # 保存 # csvwriter.writerow([pieces_each_round, send_time]) # print("所有轮次发送时间之和: ", send_time) # pieces_each_round += 2 # 每轮增加的值 pieces_each_round = piece_num # 只发送一轮 !!!!! print("开始实验...") if len(sys.argv) == 1: print("自动化") for exp_index in range(1, exp_times + 1): print(f"\n开始第{exp_index}次实验...") send_time = Experiment.run(min_unit, T_slot, a, b, c, pieces_each_round, exp_index, -1) print( f"第{exp_index}次实验的时间是{send_time}\n---------------------------") elif len(sys.argv) == 2 and sys.argv[1] == 'source': print("源端单独运行") send_time = Experiment.run(min_unit, T_slot, a, b, c, pieces_each_round, 1, 0) print(f"源端运行完毕, 实验的时间是{send_time}\n---------------------------") elif len(sys.argv) == 3 and sys.argv[1] == 'forward': forward_index = int(sys.argv[2]) if forward_index > len(cfg.Dest_ADDR): print("转发层节点下标超出上限") print(f"第{forward_index}个转发层单独运行") Experiment.run(min_unit, T_slot, a, b, c, pieces_each_round, 1, forward_index) print("转发层运行完毕.") else: print("""格式错误! 自动化: python exp_main.py 源 端: python exp_main.py source 转发层: python exp_main.py forward 2 (数字表示第几个源端, 需要和config中的地址一一对应) """)
def compress(infile): '''gzip infile''' statement = "gzip -f %(infile)s" % locals() E.debug("executing statement '%s'" % statement) return E.run(statement)
def main(): Experiment.initLogging(os.environ.has_key("GATEWAY_INTERFACE")) (odict, args) = getOptions() (odict, args) = processOptions(odict, args) data = doit(odict, odict["e"])
def compress( infile ): '''gzip infile''' statement = "gzip -f %(infile)s" % locals() E.debug( "executing statement '%s'" % statement ) return E.run( statement )
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = optparse.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) ## add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) if len(args) != 2: raise ValueError("please supply two BAM files.") samfile = pysam.Samfile(args[0], "rb") readone = set() readtwo = set() removeone = set() removetwo = set() for read in samfile.fetch(): if read.is_read1: if read.qname in readone: removeone.add(read.qname) readone.add(read.qname) else: if read.qname in readtwo: removetwo.add(read.qname) readtwo.add(read.qname) discarded = 0 samout = pysam.Samfile(args[1], mode='wb', template=samfile) for read in samfile.fetch(): if (read.qname in removeone) and read.is_read1: discarded += 1 elif (read.qname in removetwo) and read.is_read2: discarded += 1 else: samout.write(read) samfile.close() samout.close() E.info( "%s of %s first reads removed; %s of %s second reads; %s of %s multi mapped reads which fell into %s positions (average of %s positions per read)" % \ ( len(removeone), len(readone), len(removetwo), len(readtwo), len(removeone)+len(removetwo), len(readone)+len(readtwo), discarded, float(discarded)/(len(removeone)+len(removetwo))) ) ## write footer and output benchmark information. E.Stop()
def run_experiments(): try: experiment_file = args['file'] except KeyError as e: log.error("run_experiments requires argument %s" % e.args[0]) return import Experiment Experiment.run_experiments(experiment_file)
def open_file(filename, trigger, *args): binfile = filename[0:-4] + "_data.bin" if len(args) == 0: exp = Experiment.Experiment(binfile, filename, trigger) if len(args) == 1: exp = Experiment.Experiment(binfile, filename, trigger, stim_length=args[0]) experiments[filename[0:-4]] = exp names.update(exp.stim_names)
def stats(): congestion = (1.0*Experiment.drop_count)/Experiment.packet_count overall_rtt = Experiment.rtt() single_rtt = Experiment.single_rtt() double_rtt = Experiment.double_rtt() if(len(Experiment.ribdeltas) != 0): ribdeltas = reduce(lambda x, y: x+y, Experiment.ribdeltas, 0.0)/len(Experiment.ribdeltas) else: ribdeltas = 0 return " ".join(map(str,[congestion, overall_rtt[0], overall_rtt[1], Experiment.packet_count, Experiment.drop_count, Experiment.probe_count, Experiment.revmatch, Experiment.cycles, ribdeltas]))
def _iterate( self ): """iterate over muliple files.""" def _iter( infile ): identifier = None for line in infile: if line.startswith("#"): continue if line.startswith(">"): if self.regexIdentifier: try: identifier = re.search(self.regexIdentifier, line[1:-1]).groups()[0] except AttributeError: raise ValueError("could not parse identifier from line %s - check the input" % line[1:-1]) else: identifier = re.split("\s", line[1:-1])[0] else: if not identifier: raise ValueError("refusing to emit sequence without identifier - check the input") yield identifier, line.strip() for filename in self.filenames: if self.format == "tar.gz" or self.format == "tar" or (self.format == "auto" and filename.endswith( "tar.gz" )): if filename == "-": tf = tarfile.open( fileobj = sys.stdin, mode = "r|*" ) else: tf = tarfile.open( filename, mode = "r" ) for f in tf: b, ext = os.path.splitext( f.name ) if ext.lower() in ( ".fasta", ".fa" ): E.info( "extracting %s" % f.name) infile = tf.extractfile( f ) for x in _iter( infile ): yield x else: E.info( "skipping %s" % f.name ) if tf != sys.stdin: tf.close() continue elif self.format == "fasta.gz" or (self.format == "auto" and filename.endswith(".gz")): infile = gzip.open( filename, "r" ) elif filename == "-": infile = sys.stdin else: infile = open( filename, "r") for x in _iter( infile ): yield x if filename != "-": infile.close() raise StopIteration
def bamToBed(infile, outfile): '''convert bam to bed with bedtools.''' statement = "bamToBed -i %(infile)s > %(outfile)s" % locals() E.debug("executing statement '%s'" % statement) retcode = subprocess.call(statement, cwd=os.getcwd(), shell=True) if retcode < 0: raise OSError("Child was terminated by signal %i: \n%s\n" % (-retcode, statement)) return outfile
def calculateFalsePositiveRate(infiles, outfile): ''' calculate the false positive rate in taxonomic abundances ''' # connect to database dbh = sqlite3.connect(PARAMS["database"]) cc = dbh.cursor() true_file = infiles[0] true_set = set() estimate_set = set() for estimate_file in infiles[1:]: if os.path.basename(estimate_file)[ len("metaphlan_"):] == os.path.basename(true_file): tablenames = [ P.toTable(os.path.basename(true_file)), P.toTable(os.path.basename(estimate_file)) ] for species in cc.execute("""SELECT species_name FROM %s""" % tablenames[0]).fetchall(): true_set.add(species[0]) for species in cc.execute( """SELECT taxon FROM %s WHERE taxon_level == 'species'""" % tablenames[1]).fetchall(): if species[0].find("_unclassified") != -1: continue estimate_set.add(species[0]) total_estimate = len(estimate_set) total_true = len(true_set) E.info("counting false positives and false negatives") print(estimate_set.difference(true_set)) nfp = len(estimate_set.difference(true_set)) nfn = len(true_set.difference(estimate_set)) ntp = len(estimate_set.intersection(true_set)) E.info("writing results") track = P.snip(os.path.basename(true_file), ".load") outf = open(outfile, "w") outf.write("track\ttp_rate\tfp_rate\tfn_rate\n") outf.write("\t".join( map(str, [ track, float(ntp) / total_estimate, float(nfp) / total_estimate, float(nfn) / total_true ])) + "\n") outf.close()
def run(infile, options): options.tablename = quoteTableName(options.tablename, backend=options.backend) if options.map: m = {} for x in options.map: f, t = x.split(":") m[f] = t options.map = m else: options.map = {} existing_tables = None if options.backend == "pg": import pgdb dbhandle = pgdb.connect(options.psql_connection) error = pgdb.DatabaseError options.null = "NULL" options.string_value = "'%s'" if options.insert_quick: raise ValueError("quick import not implemented.") elif options.backend == "sqlite": import sqlite3 dbhandle = sqlite3.connect(options.database) try: os.chmod(options.database, 0664) except OSError, msg: E.warn("could not change permissions of database: %s" % msg) # Avoid the following error: # sqlite3.ProgrammingError: You must not use 8-bit bytestrings unless you use a text_factory that can interpret 8-bit bytestrings (like text_factory = str). It is highly recommended that you instead just switch your application to Unicode strings # Note: might be better to make csv2db unicode aware. dbhandle.text_factory = str error = sqlite3.OperationalError options.insert_many = True # False options.null = None # "NULL" options.string_value = "%s" # "'%s'" statement = "SELECT name FROM sqlite_master WHERE type='table'" cc = executewait(dbhandle, statement, error, options.retry) existing_tables = set([x[0] for x in cc]) cc.close() quick_import_statement = "sqlite3 -header -csv -separator '\t' %s '.import %%s %s'" % ( options.database, options.tablename)
def bamToBed( infile, outfile ): '''convert bam to bed with bedtools.''' statement = "bamToBed -i %(infile)s > %(outfile)s" % locals() E.debug( "executing statement '%s'" % statement ) retcode = subprocess.call( statement, cwd = os.getcwd(), shell = True ) if retcode < 0: raise OSError( "Child was terminated by signal %i: \n%s\n" % (-retcode, statement )) return outfile
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = optparse.OptionParser( version= "%prog version: $Id: script_template.py 2871 2010-03-03 10:20:44Z andreas $", usage=globals()["__doc__"]) ## add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) infile = open(args[0], 'r') genes_list = [] header = None for line in infile: if line.startswith("#"): continue if line.startswith("gene_id"): header = line.rstrip('\n') num_fields = len(header.split('\t')) - 2 total_reads = [0] * num_fields continue la = line.rstrip('\n').split('\t') if len(la) < 3: continue genes_list.append(la) total_reads = map(lambda x, y: float(x) + float(y), total_reads, la[2::]) total_reads = map(lambda x: float(x) / 1000000, total_reads) print header for gene in genes_list: my_str_list = gene[0:2] vals = map( lambda x, y: float(x) / float(y) / (float(gene[1]) / 1000.0), gene[2::], total_reads) my_str_list.extend(map(str, vals)) print "\t".join(my_str_list) ## write footer and output benchmark information. E.Stop()
def bamToMEDIPS( infile, outfile ): '''convert bam to medips format contig, start, end, strand Start is 1-based. ''' statement = '''bamToBed -i %(infile)s | awk '{printf("%%s\\t%%i\\t%%i\\t%%s\\n", $1,$2+1,$3,$6)}' > %(outfile)s''' % locals() E.debug( "executing statement '%s'" % statement ) E.run( statement ) return outfile
def iterator_test( infile, report_step = 100000 ): '''only output parseable lines from infile.''' ninput, noutput, nerrors = 0, 0, 0 while 1: try: x = infile.next() except ParsingError, msg: nerrors += 1 ninput += 1 E.warn( str(msg) ) continue except StopIteration: break
def run( infile, options ): options.tablename = quoteTableName( options.tablename, backend = options.backend ) if options.map: m = {} for x in options.map: f,t = x.split(":") m[f] = t options.map = m else: options.map = {} existing_tables = None if options.backend == "pg": import pgdb dbhandle = pgdb.connect( options.psql_connection ) error = pgdb.DatabaseError options.null = "NULL" options.string_value = "'%s'" if options.insert_quick: raise ValueError("quick import not implemented.") elif options.backend == "sqlite": import sqlite3 dbhandle = sqlite3.connect( options.database ) try: os.chmod( options.database, 0664 ) except OSError, msg: E.warn("could not change permissions of database: %s" % msg ) # Avoid the following error: # sqlite3.ProgrammingError: You must not use 8-bit bytestrings unless you use a text_factory that can interpret 8-bit bytestrings (like text_factory = str). It is highly recommended that you instead just switch your application to Unicode strings # Note: might be better to make csv2db unicode aware. dbhandle.text_factory = str error = sqlite3.OperationalError options.insert_many = True # False options.null = None # "NULL" options.string_value = "%s" # "'%s'" statement = "SELECT name FROM sqlite_master WHERE type='table'" cc = executewait( dbhandle, statement, error, options.retry ) existing_tables = set( [ x[0] for x in cc ] ) cc.close() quick_import_statement = "sqlite3 -header -csv -separator '\t' %s '.import %%s %s'" % (options.database, options.tablename)
def adience_poisson_experiment(dataset, tau_mode, tau=1.): """Ejecuta un experimento entero (entrenamiento y test) con la configuración poisson con el dataset Adience. :param dataset: Dataset para realizar el experimento. :param tau_mode: Modo de ejecución del parámetro tau en el experimento (constante o aprender valor) :param tau: Valor del parámetro tau. Valor inicial si se va a aprender, o valor constante. """ # RUN ADIENCE POISSON assert tau_mode in ["non_learnable", "sigm_learnable"] #Create net architecture poisson_resnet = Resnet_2x4_poisson(tau_mode) model = Model(inputs=poisson_resnet.inputs, outputs=poisson_resnet.get_net()) #Create experiment experiment = Experiment.Experiment(dataset, model) #Train experiment.train('adience_poisson_t=' + tau_mode, '/TFG/ordinal_unimodal_mio/src/logs/') #Test experiment.test()
def adience_baseline_experiment_sgd(dataset): """Ejecuta un experimento entero (entrenamiento y test) con la configuración baseline con el optimizador SGD de nesterov con el dataset Adience. :param dataset: Dataset para realizar el experimento. """ # RUN ADIENCE BASELINE #Create net architecture baseline_resnet = Resnet_2x4() model = Model(inputs=baseline_resnet.inputs, outputs=baseline_resnet.get_net()) #Create experiment experiment = Experiment.Experiment(dataset, model) arguments = { 'epochs': 100, 'optimizer': SGD, 'learning_rate': 1e-2, 'momentum': 0.9, 'loss_fn': 'categorical_crossentropy', 'metrics': ['accuracy'] } callbacks = ['ModelCheckpoint'] #Train experiment.train('adience_baseline', '/TFG/ordinal_unimodal_mio/src/logs/', arguments=arguments, callbacks=callbacks) #Test experiment.test()
def suggest_analysis_layout(solid_runs): """Generate a bash script to build the analysis directory scheme Given a set of SolidRuns, print a set of script commands for running the build_analysis_dir.py program to create and populate the analysis directories. The script can be edited before being executed by the user. Arguments: solid_runs: a list of SolidRun objects. """ print "#!/bin/sh\n#\n# Script commands to build analysis directory structure" for run in solid_runs: build_analysis_dir_cmd = 'build_analysis_dir.py' top_dir = os.path.abspath( os.path.join(os.getcwd(), os.path.basename(run.run_dir))) for sample in run.samples: for project in sample.projects: # Create one experiment per project cmd_line = [] expt = Experiment.Experiment() expt.name = project.getProjectName() expt.type = "expt" expt.sample = project.getSample().name expt.library = project.getLibraryNamePattern() # Print the arguments for the layout cmd_line.extend( (build_analysis_dir_cmd, "--top-dir=%s_analysis" % top_dir, "--link=absolute", "--naming-scheme=partial")) cmd_line.append(expt.describe()) cmd_line.append(run.run_dir) print "#\n%s" % (' \\\n').join(cmd_line)
def test_t4(self): r1 = Experiment.largest([3,2,3,4]) self.assertEqual(r1, 4) #def test_t5(self): # r1 = Experiment.largest([3,-2,3,-1,4]) self.assertEqual(r1, 4)
def main(): min_unit, T_slot, a, b, c = 68, 1, 0.88652179221, 0.25726495726, 0.0073070866 exp_times = 1 # 实验次数 record_num = cfg.record_num send_time_list = [] # 获取分段的数量 if min_unit == 68: piece_num = record_num else: piece_num = 68 * record_num // min_unit + (1 if (68 * record_num % min_unit) else 0) # 获取每轮发送的数量的数组 pieces_each_round = 4 # 初始值 with open("TimeOFpiecesEachRound.csv", "w", newline="", encoding="utf-8") as datacsv: csvwriter = csv.writer(datacsv, dialect="excel") csvwriter.writerow(["pieces_each_round", "源端发送数量"]) while pieces_each_round <= piece_num: # 何时间终止 print(f"\n--------------------------------------------\n") print(f"\n| pieces_each_round = %4d |\n" % (pieces_each_round, )) print(f"\n--------------------------------------------\n") send_num = Experiment.run(min_unit, T_slot, a, b, c, pieces_each_round, 1, -1) send_time_list.append(send_num) print(send_time_list) # 保存 csvwriter.writerow([pieces_each_round, send_num]) print("所有轮次发送时间之和: ", send_num) pieces_each_round += 2 # 每轮增加的值
def bamToMEDIPS(infile, outfile): '''convert bam to medips format contig, start, end, strand Start is 1-based. ''' statement = '''bamToBed -i %(infile)s | awk '{printf("%%s\\t%%i\\t%%i\\t%%s\\n", $1,$2+1,$3,$6)}' > %(outfile)s''' % locals( ) E.debug("executing statement '%s'" % statement) E.run(statement) return outfile
def bedToMEDIPS( infile, outfile ): '''convert bam to medips format contig, start, end, strand Start is 1-based. ''' if infile.endswith( ".gz" ): cat = "zcat" else: cat = "cat" statement = '''%(cat)s %(infile)s | awk '{printf("%%s\\t%%i\\t%%i\\t%%s\\n", $1,$2+1,$3,$6)}' > %(outfile)s''' % locals() E.run( statement ) return outfile
def experimentCounting(ranger): #print("Probabilistic Counting") #print("\nTo use hash functions:") hashes = list(hashlib.algorithms_guaranteed) for element in hashes: if element.lower().startswith("shake_"): hashes.remove(element) #print(hashlib.algorithms_guaranteed) #print(hashes) setups = Experiment.getSetup(["distinct", "hashes"]) distincts = setups.get("various numbers of distinct elements") numHashes = setups.get("number of hashes") #for i in range(len(distincts)): # distinct = distincts[i][0] # calcCounting(distinct,3,hashes) for i in range(len(numHashes)): numHash = numHashes[i] calcCounting(ranger, numHash, hashes)
def experiment(args, logger, dataProcessor): exp = Experiment.Experiment() model = dataProcessor.loadNetwork(args, 0) #PolicyValueFn.PolicyValueFn(args).to(args.device) data = exp.evaluationWithDifferentMinMaxSearchAgent(model) # data = exp.evaluationForNetworkWithFourRollout(model,start=10,end=50,step=10,random_cnt=1,numOfEvaluations=1) logger.info(data)
def buildTrueTaxonomicRelativeAbundances(infile, outfile): ''' get species level relative abundances for the simulateds data. This involes creating maps between different identifiers from the NCBI taxonomy. This is so that the results are comparable to species level analysis from metaphlan The gi_taxid_nucl is a huge table and therefore this function takes an age to run - can think of optimising this somehow ''' to_cluster = True total = 0 rel_abundance = collections.defaultdict(int) for fastq in Fastq.iterate(iotools.openFile(infile)): total += 1 gi = fastq.identifier.split("|")[1] rel_abundance[gi] += 1 for gi, ab in rel_abundance.items(): rel_abundance[gi] = float(ab) / total dbh = sqlite3.connect(PARAMS["database"]) cc = dbh.cursor() result = collections.defaultdict(float) for gi in list(rel_abundance.keys()): E.info("processing gi %s" % gi) taxid = cc.execute( """SELECT taxid FROM gi_taxid_nucl WHERE gi == '%s'""" % gi).fetchone()[0] species_id = cc.execute( """SELECT species_id FROM categories WHERE taxid == '%s'""" % taxid).fetchone()[0] species_name = cc.execute( """SELECT taxname FROM names WHERE taxid == '%s' AND description == 'scientific name'""" % species_id).fetchone()[0] abundance = rel_abundance[gi] E.info("mapped gi %s to taxid: %s, species_id: %s, species_name: %s" % (str(gi), str(taxid), str(species_id), species_name)) result[species_name] += abundance outf = open(outfile, "w") outf.write("species_name\trelab\n") for species_name, abundance in result.items(): # create names consistent with metaphlan species_name = species_name.replace(" ", "_") outf.write("%s\t%f\n" % (species_name, abundance)) outf.close()
def bedToMEDIPS(infile, outfile): '''convert bam to medips format contig, start, end, strand Start is 1-based. ''' if infile.endswith(".gz"): cat = "zcat" else: cat = "cat" statement = '''%(cat)s %(infile)s | awk '{printf("%%s\\t%%i\\t%%i\\t%%s\\n", $1,$2+1,$3,$6)}' > %(outfile)s''' % locals( ) E.run(statement) return outfile
def main( argv = None ): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser( version = "%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $", usage = globals()["__doc__"] ) parser.add_option("-o", "--output-prefix", dest="output_prefix", type="string", help="output filename prefix [default=%default]." ) parser.add_option("-c", "--chromosome-table", dest="filename_chromosome_table", type="string", help="filename with tab separated list of chromosome names [default=%default]." ) parser.add_option( "--action", dest="action", type="choice", choices=("plot", "run"), help="action to perform [default=%default]") parser.add_option( "-s", "--signal-value", dest="signal_value", type="string", help="use either p.value or sig.value as ranking measure [default=%default]" ) parser.set_defaults( action = "plot", output_prefix = "output", half_width = None, overlap_ratio = 0, is_broadpeak = False, signal_value = "signal.value", filename_chromosome_table = "genome_table.txt", ) ## add common options (-h/--help, ...) and parse command line (options, args) = E.Start( parser, argv = argv ) if options.action == "plot": plotIDR( options.output_prefix + ".pdf", args) elif options.action == "run": if len(args) != 2: raise ValueError("require exactly two replicates") runIDR( options, args[0], args[1]) ## write footer and output benchmark information. E.Stop()
def _outputHistogram( counts, bins, section ): outf = E.openOutputFile( "%s.table" % section ) outf.write("%s\tcounts\tfrequency\tcumulative\n" % section ) t, cc = sum( counts ), 0 for bin, c in zip(bins[:-1], counts): cc += c outf.write( "\t".join( (str(bin), str(c), "%6.4f" % (100.0 * c / t ), "%6.4f" % (100.0 * cc / t ) ) ) + "\n" )
def doAmazing(odict, args): if args: exp_names = [args[0]] else: exp_names = Experiment.getAllExperiments() for exp_name in exp_names: exp = Experiment.Experiment(exp_name) exp.regenHTML()
def checkBlastRuns( infiles, outfile ): '''check if output files are complete. ''' outf = IOTools.openFile( outfile, "w" ) outf.write( "chunkid\tquery_first\tquery_last\tfound_first\tfound_last\tfound_total\tfound_results\thas_finished\tattempts\t%s\n" %\ "\t".join(Logfile.RuntimeInformation._fields)) for infile in infiles: E.debug( "processing %s" % infile) chunkid = P.snip( os.path.basename( infile ), ".blast.gz" ) logfile = infile + ".log" chunkfile = P.snip( infile, ".blast.gz" ) + ".fasta" with IOTools.openFile( infile ) as inf: l = inf.readline() ids = set() total_results = 0 for l in inf: if l.startswith("#//"): continue ids.add( int(l.split("\t")[0] ) ) total_results += 1 found_first = min(ids) found_last = max(ids) found_total = len(ids) l = IOTools.getFirstLine( chunkfile ) query_first = l[1:-1] l2 = IOTools.getLastLine( chunkfile, nlines = 2).split("\n") query_last = l2[0][1:] logresults = Logfile.parse( logfile ) outf.write( "\t".join( map(str, (\ chunkid, query_first, query_last, found_first, found_last, found_total, total_results, logresults[-1].has_finished, len(logresults), "\t".join( map(str, logresults[-1]) ) ) ) ) + "\n" ) outf.close()
def __init__(self, measurementSizeX, measurementSizeY, measurementSizeZ, psfSizeX, psfSizeY, psfSizeZ, numChannels, homeDirectory, ops): Experiment.__init__(self,measurementSizeX, measurementSizeY, measurementSizeZ, psfSizeX, psfSizeY, psfSizeZ, numChannels, homeDirectory, ops) # parameters of the top sphere self.spherePositionX=self.objectSizeX / 2 self.spherePositionY=self.objectSizeY / 2 self.spherePositionZ=self.objectSizeZ / 2 - 27 self.sphereRadius=20 self.sphereIntensity=10000 # parameters of the bottom sphere self.spherePosition2X=self.objectSizeX / 2 self.spherePosition2Y=self.objectSizeY / 2 self.spherePosition2Z=self.objectSizeZ / 2 self.sphereRadius2=5 self.sphereIntensity2=10000 self.background=0.000001 self.directory=homeDirectory+"/SpheresHighIntensity/"
def main(): print "DEBUG: Entering Driver.main()" expGUI = ExperimentGUI() # create new ExperimentGUI object using default values res = Results() # create new Results object using default value res.writeToFile(expGUI.window.get_title() + " Experiment\n") res.writeToFile("Experiment started at: " + str(datetime.now()) + "\n\n") gtk.main() # http://www.pygtk.org/dist/pygtk2-tut.pdf catA = ImageCategory("A", ["A0.jpg", "A1.jpg", "A2.jpg", "A3.jpg", "A4.jpg", "A5.jpg"]) catB = ImageCategory("B", ["B0.jpg", "B1.jpg", "B2.jpg", "B3.jpg", "B4.jpg"]) categories = [catA, catB] lb1 = LearningBlock(["A0.jpg", "A1.jpg", "B2.jpg"], 10.0) lb2 = LearningBlock(["B3.jpg", "B4.jpg", "A1.jpg"], 15.0) tb1 = TestingBlock(["A0.jpg", "A1.jpg", "B2.jpg"]) tb2 = TestingBlock(["A3.jpg", "B4.jpg"]) lblockList = [lb1, lb2] tblockList = [tb1, tb2] lp = LearningPhase(lblockList) tp = TestingPhase(tblockList) phaseList = [lp, tp] exp = Experiment(phaseList) exp.runPhases(categories, res) print "DEBUG: Entering gtk.main()" #gtk.main() print "DEBUG: Exiting gtk.main()" res.writeToFile("Experiment ended at: " + str(datetime.now())) print "DEBUG: Exiting Driver.main()"
def bigwig( infile, contig_sizes ): '''convert infile to bigwig file''' if infile.endswith( ".wig"): outfile = infile[:-4] + ".bigwig" else: outfile = infile + ".bigwig" tmp, filename_sizes = tempfile.mkstemp() os.write( tmp, "\n".join( [ "\t".join(map(str,x)) for x in contig_sizes.iteritems() ] ) ) os.close( tmp ) statement = "wigToBigWig -clip %(infile)s %(filename_sizes)s %(outfile)s " % locals() E.debug( "executing statement '%s'" % statement ) if E.run( statement ): os.unlink( infile ) os.unlink( filename_sizes )
def buildTrueTaxonomicRelativeAbundances(infile, outfile): ''' get species level relative abundances for the simulateds data. This involes creating maps between different identifiers from the NCBI taxonomy. This is so that the results are comparable to species level analysis from metaphlan The gi_taxid_nucl is a huge table and therefore this function takes an age to run - can think of optimising this somehow ''' to_cluster = True total = 0 rel_abundance = collections.defaultdict(int) for fastq in Fastq.iterate(IOTools.openFile(infile)): total += 1 gi = fastq.identifier.split("|")[1] rel_abundance[gi] += 1 for gi, ab in rel_abundance.iteritems(): rel_abundance[gi] = float(ab)/total dbh = sqlite3.connect(PARAMS["database"]) cc = dbh.cursor() result = collections.defaultdict(float) for gi in rel_abundance.keys(): E.info("processing gi %s" % gi) taxid = cc.execute("""SELECT taxid FROM gi_taxid_nucl WHERE gi == '%s'""" % gi).fetchone()[0] species_id = cc.execute("""SELECT species_id FROM categories WHERE taxid == '%s'""" % taxid).fetchone()[0] species_name = cc.execute("""SELECT taxname FROM names WHERE taxid == '%s' AND description == 'scientific name'""" % species_id).fetchone()[0] abundance = rel_abundance[gi] E.info("mapped gi %s to taxid: %s, species_id: %s, species_name: %s" % (str(gi), str(taxid), str(species_id), species_name)) result[species_name] += abundance outf = open(outfile, "w") outf.write("species_name\trelab\n") for species_name, abundance in result.iteritems(): # create names consistent with metaphlan species_name = species_name.replace(" ", "_") outf.write("%s\t%f\n" % (species_name, abundance)) outf.close()
def run( self, outfile, processors = 1 ): tasks = [] manager = multiprocessing.Manager() lock = manager.Lock() for segmentor in self.test_generator: headers = segmentor.headers tasks.append( (lock, outfile, segmentor, self.runner, self.validators) ) for v in self.validators: headers.extend( v.headers ) outfile.write( "%s\n" % "\t".join( headers) ) E.info( "created %i tasks for %i workers" % (len(tasks), processors ) ) if processors > 1: pool = multiprocessing.Pool( processors ) pool.map( runSimulation, tasks ) else: for task in tasks: runSimulation( task )
def calculateFalsePositiveRate(infiles, outfile): ''' calculate the false positive rate in taxonomic abundances ''' # connect to database dbh = sqlite3.connect(PARAMS["database"]) cc = dbh.cursor() true_file = infiles[0] true_set = set() estimate_set = set() for estimate_file in infiles[1:]: if os.path.basename(estimate_file)[len("metaphlan_"):] == os.path.basename(true_file): tablenames = [P.toTable(os.path.basename(true_file)), P.toTable(os.path.basename(estimate_file))] for species in cc.execute("""SELECT species_name FROM %s""" % tablenames[0]).fetchall(): true_set.add(species[0]) for species in cc.execute("""SELECT taxon FROM %s WHERE taxon_level == 'species'""" % tablenames[1]).fetchall(): if species[0].find("_unclassified") != -1: continue estimate_set.add(species[0]) total_estimate = len(estimate_set) total_true = len(true_set) E.info("counting false positives and false negatives") print estimate_set.difference(true_set) nfp = len(estimate_set.difference(true_set)) nfn = len(true_set.difference(estimate_set)) ntp = len(estimate_set.intersection(true_set)) E.info("writing results") track = P.snip(os.path.basename(true_file), ".load") outf = open(outfile, "w") outf.write("track\ttp_rate\tfp_rate\tfn_rate\n") outf.write("\t".join(map(str, [track, float(ntp)/total_estimate, float(nfp)/total_estimate, float(nfn)/total_true])) + "\n") outf.close()