Esempi in Python per Experiment, esempi in Python per Experiment

Esempio n. 1

0

Mostra file

File: Masker.py Progetto: yangjl/cgat

    def maskSequences(self, sequences):
        '''mask a collection of sequences.'''

        outfile, infile = tempfile.mkstemp()

        for x, s in enumerate(sequences):
            os.write(outfile, ">%i\n%s\n" % (x, s))

        os.close(outfile)

        statement = self.mCommand % locals()

        E.debug("statement: %s" % statement)

        s = subprocess.Popen(statement,
                             shell=True,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE,
                             close_fds=True)

        (out, err) = s.communicate()

        if s.returncode != 0:
            raise RuntimeError(
                "Error in running %s \n%s\nTemporary directory" %
                (statement, err))

        result = [
            x.sequence for x in FastaIterator.iterate(StringIO.StringIO(out))
        ]

        os.remove(infile)

        return result

Esempio n. 2

0

Mostra file

def executewait(dbhandle, statement, error, retry=False, wait=5):
    '''execute sql statement.

    Retry on error, if retry is True.
    Returns a cursor object.
    '''

    cc = dbhandle.cursor()
    i = 20
    while i > 0:
        try:
            cc.execute(statement)
            return cc
        except sqlite3.OperationalError as e:
            msg = e.message
            E.warn("import failed: msg=%s, statement=\n  %s" %
                   (msg, statement))
            # TODO: check for database locked msg
            if not retry:
                raise error, msg
            if not re.search("locked", str(msg)):
                raise error, msg
            time.sleep(wait)
            i -= 1
            continue
        break
    raise sqlite.OperationalError("Database locked and too many retries")

Esempio n. 3

0

Mostra file

File: Pipeline.py Progetto: pombredanne/cgat

def substituteParameters( **kwargs ):
    '''return a local PARAMS dictionary.

    Options in ``**kwargs`` substitute default
    values in PARAMS.

    Finally, task specific configuration values 
    are inserted.
    '''

    # build parameter dictionary
    # note the order of addition to make sure that kwargs takes precedence
    local_params = dict(PARAMS.items() + kwargs.items())

    if "outfile" in local_params:
        # replace specific parameters with task (outfile) specific parameters
        outfile = local_params["outfile"]
        for k in local_params.keys():
            if k.startswith(outfile):
                p = k[len(outfile)+1:]
                if p not in local_params:
                    raise KeyError( "task specific parameter '%s' does not exist for '%s' " % (p,k))
                E.debug( "substituting task specific parameter for %s: %s = %s" % (outfile,p,local_params[k] ) )
                local_params[p] = local_params[k]

    return local_params

Esempio n. 4

0

Mostra file

File: new_creature.py Progetto: hposkanzer/evolver2

def main():
    
    try:

        Experiment.initLogging(os.environ.has_key("GATEWAY_INTERFACE"))
        
        if os.environ.has_key("GATEWAY_INTERFACE"):
            # CGI
            (odict, args) = getCGIOptions()
        else:
            # Command line.  All the output is still CGI-ish, though.  Sorry.
            (odict, args) = getOptions()
        (odict, args) = processOptions(odict, args)
            
        data = newCreature(odict, odict["e"], odict.get("p"))
        data = Location.getJsonModule().dumps(data, indent=2)
        
        print "Content-type: application/json"
        print "Content-length: %s" % (len(data))
        print
        print data

    except:
        msg = string.join(apply( traceback.format_exception, sys.exc_info() ), "")
        if (msg[-1] == "\n"):
            msg = msg[:-1]
        logging.getLogger().warning(msg)
        data = "Huh?\n%s" % (msg)
        print "Status: 500 Internal Server Error"
        print "Content-type: text/plain"
        print "Content-length: %s" % (len(data))
        print
        print data

Esempio n. 5

0

Mostra file

File: Masker.py Progetto: BioinformaticsArchive/cgat

    def maskSequence( self, peptide_sequence ):
        """mask peptide sequence
        """
        
        Masker.__init__(self)

        outfile, filename_peptide = tempfile.mkstemp()
        os.write(outfile, ">test\n%s\n" % (peptide_sequence))
        os.close(outfile)

        infile = filename_peptide
        statement = self.mCommand % locals()

        E.debug( "statement: %s" % statement )

        s = subprocess.Popen( statement,
                              shell = True,
                              stdout = subprocess.PIPE,
                              stderr = subprocess.PIPE,
                              close_fds = True)                              

        (out, err) = s.communicate()

        if s.returncode != 0:
            raise RuntimeError("Error in running %s \n%s\nTemporary directory" % (statement, err))

        os.remove( filename_peptide ) 
        
        masked_sequence = re.sub("\s", "", string.join(out.split("\n")[1:], ""))
        
        return masked_sequence

Esempio n. 6

0

Mostra file

File: CSV2DB.py Progetto: siping/cgat

def executewait( dbhandle, statement, error, retry = False, wait=5):
    '''execute sql statement.

    Retry on error, if retry is True.
    Returns a cursor object.
    '''

    cc = dbhandle.cursor()    
    i = 20
    while i>0:
        try:
            cc.execute( statement )
            return cc
        except sqlite3.OperationalError as e:
            msg = e.message
            E.warn("import failed: msg=%s, statement=\n  %s" % (msg, statement ) )
        # TODO: check for database locked msg
            if not retry:
                raise error, msg
            if not re.search("locked", str(msg)):
                raise error, msg
            time.sleep(wait)
            i -= 1
            continue
        break
    raise sqlite.OperationalError("Database locked and too many retries")

Esempio n. 7

0

Mostra file

 def _copy(src, dest):
     dest = os.path.abspath(os.path.join(PARAMS["web_dir"], dest))
     if os.path.exists(dest): shutil.rmtree(dest)
     if not os.path.exists(src):
         E.warn("%s does not exist - skipped" % src)
         return
     shutil.copytree(os.path.abspath(src), dest)

Esempio n. 8

0

Mostra file

File: 25sFit.py Progetto: cigani/NEST

    def importdata(self):

        # Data[0] = Voltage, Data[1] = Current, Data[2] = Time

        self.trainData, self.testData = loading.Loader().dataload()

        self.myExp = Experiment('Experiment 1', .1)

        for n in self.trainData:
            print "Trials"
            print n
            self.myExp.addTrainingSetTrace(n[0],
                                           self.V_units,
                                           n[1],
                                           self.I_units,
                                           np.size(n[2]) / 10,
                                           FILETYPE='Array')
            self.myExp.trainingset_traces[n].setROI([[1000, 120000.0]])

        for n in self.testData:
            self.myExp.addTestSetTrace([n][0],
                                       self.V_units, [n][1],
                                       self.I_units,
                                       np.size([n][2]) / 10,
                                       FILETYPE='Array')
            self.myExp.testset_traces[n].setROI([[1000, 20000]])

        self.fitaec(self, self.myExp)

Esempio n. 9

0

Mostra file

File: Local.py Progetto: BioinformaticsArchive/cgat

 def _copy( src, dest ):
     dest = os.path.abspath( os.path.join( PARAMS["web_dir"], dest ) )
     if os.path.exists( dest ): shutil.rmtree( dest )
     if not os.path.exists(src):
         E.warn( "%s does not exist - skipped" % src )
         return
     shutil.copytree( os.path.abspath(src), dest )

Esempio n. 10

0

Mostra file

File: gallery.py Progetto: hposkanzer/evolver2

def main():
    
    try:

        Experiment.initLogging(os.environ.has_key("GATEWAY_INTERFACE"))
        
        if os.environ.has_key("GATEWAY_INTERFACE"):
            # CGI
            (odict, args) = getCGIOptions()
        else:
            # Command line.  All the output is still CGI-ish, though.  Sorry.
            (odict, args) = getOptions()
        (odict, args) = processOptions(odict, args)
            
        if odict.has_key("e") or not odict.has_key("c"):
            data = getGallery(odict, odict.get("e"), odict.get("c"))
        else:
            data = getPage(odict, odict["c"], odict.get("p"), odict.get("n"))
        
        print "Content-type: text/html"
        print "Content-length: %s" % (len(data))
        print
        print data

    except:
        msg = string.join(apply( traceback.format_exception, sys.exc_info() ), "")
        if (msg[-1] == "\n"):
            msg = msg[:-1]
        logging.getLogger().warning(msg)
        data = "Huh?\n%s" % (msg)
        print "Status: 500 Internal Server Error"
        print "Content-type: text/plain"
        print "Content-length: %s" % (len(data))
        print
        print data

Esempio n. 11

0

Mostra file

def run_experiment(experiment_var, random_seed):

    evals_at_targets_df = pd.DataFrame()

    for i, dim in enumerate(experiment_var):

        a = 1
        b = -1
        # random initial solution with elements between -1 and 1
        theta0 = (b - a) * np.random.rand(dim + 1, 1) + a

        # allow more iterations in higher dimensions
        parms.max_iterations = parms.max_iterations * dim

        error_list, sample_evals = ex.run_problem(
            dim, sample_size, num_targets, num_subintervals, cost_function,
            theta0, balance, noise, parms, random_seed)

        ############# benchmark optimization run

        target_values = ex.create_targets(error_list, num_targets)

        benchmarker = bm.Benchmark(sample_evals, target_values, error_list)

        evals_at_targets = benchmarker.benchmark()
        evals_at_targets_df[i] = evals_at_targets

    return evals_at_targets_df

Esempio n. 12

0

Mostra file

File: Masker.py Progetto: yangjl/cgat

    def maskSequence(self, peptide_sequence):
        """mask peptide sequence
        """

        Masker.__init__(self)

        outfile, filename_peptide = tempfile.mkstemp()
        os.write(outfile, ">test\n%s\n" % (peptide_sequence))
        os.close(outfile)

        infile = filename_peptide
        statement = self.mCommand % locals()

        E.debug("statement: %s" % statement)

        s = subprocess.Popen(statement,
                             shell=True,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE,
                             close_fds=True)

        (out, err) = s.communicate()

        if s.returncode != 0:
            raise RuntimeError(
                "Error in running %s \n%s\nTemporary directory" %
                (statement, err))

        os.remove(filename_peptide)

        masked_sequence = re.sub("\s", "",
                                 string.join(out.split("\n")[1:], ""))

        return masked_sequence

Esempio n. 13

0

Mostra file

File: Masker.py Progetto: BioinformaticsArchive/cgat

    def maskSequences( self, sequences ):
        '''mask a collection of sequences.'''

        outfile, infile = tempfile.mkstemp()

        for x,s in enumerate(sequences):
            os.write(outfile, ">%i\n%s\n" % (x,s) )
                     
        os.close(outfile)
                     
        statement = self.mCommand % locals()

        E.debug( "statement: %s" % statement )

        s = subprocess.Popen( statement,
                              shell = True,
                              stdout = subprocess.PIPE,
                              stderr = subprocess.PIPE,
                              close_fds = True)                              

        (out, err) = s.communicate()

        if s.returncode != 0:
            raise RuntimeError("Error in running %s \n%s\nTemporary directory" % (statement, err))

        result = [ x.sequence for x in FastaIterator.iterate( StringIO.StringIO( out) ) ]

        os.remove( infile )
        
        return result

Esempio n. 14

0

Mostra file

File: validate_randomization.py Progetto: zongchangli/gat

    def validate(self, samples):

        # check segment lengths
        l = [x[1] - x[0] for x in self.segments]
        values_input = min(l), max(l), numpy.mean(l), numpy.std(l)

        fail = False

        for i, sample in enumerate(samples):
            l = [x[1] - x[0] for x in sample]
            values_sample = min(l), max(l), numpy.mean(l), numpy.std(l)

            for val, inp, samp in zip(("min", "max", "mean", "std"),
                                      values_input,
                                      values_sample):
                d = abs(inp - samp) / float(inp)

                # segment length distribution fails
                if d >= self.stringency_level:
                    fail = True
                    E.warn("segment length distribution in sample %i: expected %s (%f) != observed %s (%f)" %
                           (i, val, inp, val, samp))

                    break

            if fail:
                break
        else:
            fail = False

        return "\t".join(("%i" % (not fail)), )

Esempio n. 15

0

Mostra file

File: Pipeline.py Progetto: pombredanne/cgat

def getStdoutStderr( stdout_path, stderr_path, tries=5 ):
    '''get stdout/stderr allowing for same lag.

    Try at most *tries* times. If unsuccessfull, throw PipelineError.

    Removes the files once they are read. 

    Returns tuple of stdout and stderr.
    '''
    x = tries
    while x >= 0:
        if os.path.exists( stdout_path ): break
        time.sleep(1)
        x -= 1
            
    x = tries
    while x >= 0:
        if os.path.exists( stderr_path ): break
        time.sleep(1)
        x -= 1

    try:
        stdout = open( stdout_path, "r" ).readlines()
    except IOError, msg:
        E.warn( "could not open stdout: %s" % msg )
        stdout = []

Esempio n. 16

0

Mostra file

File: AddaSegment.py Progetto: ProteinsWebTeam/Pfam

    def startUp(self):

        self.mHeaders = ("nid", "node", "parent", "level", "start", "end")

        if not self.isComplete():
            self.mOutfile = self.openOutputStream(self.mFilenameSegments)
            self.min_domain_size = self.mConfig.get('adda', 'min_domain_size',
                                                    30)
            self.min_segment_size = self.mConfig.get('segments',
                                                     'min_segment_size', 30)
            self.min_distance_border = self.mConfig.get(
                'segments', 'min_distance_border', 0)
            self.resolution = self.mConfig.get('segments', 'resolution', 10.0)

            E.debug( "splitting parameters: resolution=%f, min_domain_size=%i, min_distance_border=%i" % \
                         (self.resolution,
                          self.min_domain_size,
                          self.min_distance_border ) )

            # rescale
            self.r_min_domain_size = int(
                float(self.min_domain_size) / self.resolution)
            self.r_min_distance_border = int(
                float(self.min_distance_border) / self.resolution)

            if self.mContinueAt == None:
                self.mOutfile.write("\t".join(self.mHeaders) + "\n")
                self.mOutfile.flush()

Esempio n. 17

0

Mostra file

File: IndexedFasta.py Progetto: yangjl/cgat

    def _iterate(self):
        """iterate over muliple files."""
        def _iter(infile):

            identifier = None

            for line in infile:
                if line.startswith("#"): continue
                if line.startswith(">"):

                    if self.regexIdentifier:
                        try:
                            identifier = re.search(self.regexIdentifier,
                                                   line[1:-1]).groups()[0]
                        except AttributeError:
                            raise ValueError(
                                "could not parse identifier from line %s - check the input"
                                % line[1:-1])
                    else:
                        identifier = re.split("\s", line[1:-1])[0]

                else:
                    if not identifier:
                        raise ValueError(
                            "refusing to emit sequence without identifier - check the input"
                        )
                    yield identifier, line.strip()

        for filename in self.filenames:
            if self.format == "tar.gz" or self.format == "tar" or (
                    self.format == "auto" and filename.endswith("tar.gz")):
                if filename == "-":
                    tf = tarfile.open(fileobj=sys.stdin, mode="r|*")
                else:
                    tf = tarfile.open(filename, mode="r")
                for f in tf:
                    b, ext = os.path.splitext(f.name)
                    if ext.lower() in (".fasta", ".fa"):
                        E.info("extracting %s" % f.name)
                        infile = tf.extractfile(f)
                        for x in _iter(infile):
                            yield x
                    else:
                        E.info("skipping %s" % f.name)

                if tf != sys.stdin: tf.close()
                continue
            elif self.format == "fasta.gz" or (self.format == "auto"
                                               and filename.endswith(".gz")):
                infile = gzip.open(filename, "r")
            elif filename == "-":
                infile = sys.stdin
            else:
                infile = open(filename, "r")

            for x in _iter(infile):
                yield x
            if filename != "-": infile.close()

        raise StopIteration

Esempio n. 18

0

Mostra file

File: validate_randomization.py Progetto: zongchangli/gat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = optparse.OptionParser(version="%prog version: $Id$",
                                   usage=globals()["__doc__"])

    parser.add_option("-p", "--proc", dest="processors", type="int",
                      help="use # processors [%default]")

    parser.set_defaults(
        processors=1)

    options, args = E.Start(parser, argv=argv)

    t1 = Test(RunnerGat,
              small_test_segmented_workspaces(),
              [ValidatorNumSamples,
               ValidatorSegmentDistribution])

    t1.run(options.stdout,
           processors=options.processors)

    E.Stop()

Esempio n. 19

0

Mostra file

File: validate_randomization.py Progetto: abudulemusa/gat

    def validate( self, samples ):

        # check segment lengths
        l = [ x[1] - x[0] for x in self.segments ]
        values_input = min( l ), max(l ), numpy.mean( l ), numpy.std( l )

        fail = False

        for i, sample in enumerate( samples ):
            l = [ x[1] - x[0] for x in sample ]
            values_sample = min( l ), max( l ), numpy.mean( l ), numpy.std( l )
            
            for val, inp, samp in zip( ("min", "max", "mean", "std" ),
                                  values_input,
                                  values_sample ):
                d = abs(inp - samp) / float(inp)

                # segment length distribution fails
                if d >= self.stringency_level:
                    fail = True
                    E.warn( "segment length distribution in sample %i: expected %s (%f) != observed %s (%f)" %\
                                ( i, val, inp, val, samp ) )

                    break
            
            if fail: break
        else:
            fail = False

        return "\t".join( ( "%i" % (not fail) ), )

Esempio n. 20

0

Mostra file

File: pairsdb.py Progetto: AndreasHeger/adda

def buildPFAMDomains( infiles, outfile ):
    '''map PFAM domains onto current sequence collection. 
    The mapping is done by ID lookup.'''
    
    infile = infiles[0]
    with IOTools.openFile( "nrdb50.fasta.tsv") as inf:

        reader = csv.DictReader( inf, dialect='excel-tab' )
        map_id2nid = {}
        for row in reader:
            map_id2nid[row['repid']] = row['nid']
    
    rx = re.compile( "(\S+)\/(\d+)-(\d+)\s+(\S+);(.*);" )

    c = E.Counter()
    outf = IOTools.openFile( outfile, "w" )
    with IOTools.openFile( infile ) as inf:
        for entry in FastaIterator.iterate( inf ):
            c.input += 1
            pid, start, end, pfam_id, description = rx.match( entry.title ).groups()
            try:
                outf.write( "%s\t%i\t%i\t%s\n" % (map_id2nid[pid], int(start)-1, int(end), pfam_id ) )
            except KeyError:
                c.missed += 1
                continue
            c.output += 1

    outf.close()
    E.info( c )

Esempio n. 21

0

Mostra file

def main():
    min_unit, T_slot, a, b, c = 68, 1, 0.88652179221, 0.25726495726, 0.0073070866
    exp_times = 1  # 实验次数
    record_num = cfg.record_num
    # send_time_list = []
    # 获取分段的数量
    if min_unit == 68:
        piece_num = record_num
    else:
        piece_num = 68 * record_num // min_unit + (1 if (68 * record_num %
                                                         min_unit) else 0)

    # 获取每轮发送的数量的数组
    # pieces_each_round = 4  # 初始值
    # with open("TimeOFpiecesEachRound.csv", "w", newline="", encoding="utf-8") as datacsv:
    #     csvwriter = csv.writer(datacsv, dialect="excel")
    #     csvwriter.writerow(["pieces_each_round", "已解码个数"])
    #
    #     while pieces_each_round <= piece_num:  # 何时间终止
    #         print(f"\n--------------------------------------------\n")
    #         print(f"\n|        pieces_each_round = %4d           |\n" %(pieces_each_round, ))
    #         print(f"\n--------------------------------------------\n")
    #         send_time = Experiment.run(min_unit, T_slot, a, b, c, pieces_each_round)
    #         send_time_list.append(send_time)
    #         print(send_time_list)
    #         # 保存
    #         csvwriter.writerow([pieces_each_round, send_time])
    #         print("所有轮次发送时间之和:  ", send_time)
    #         pieces_each_round += 2  # 每轮增加的值

    pieces_each_round = piece_num  # 只发送一轮 !!!!!
    print("开始实验...")
    if len(sys.argv) == 1:
        print("自动化")
        for exp_index in range(1, exp_times + 1):
            print(f"\n开始第{exp_index}次实验...")
            send_time = Experiment.run(min_unit, T_slot, a, b, c,
                                       pieces_each_round, exp_index, -1)
            print(
                f"第{exp_index}次实验的时间是{send_time}\n---------------------------")
    elif len(sys.argv) == 2 and sys.argv[1] == 'source':
        print("源端单独运行")
        send_time = Experiment.run(min_unit, T_slot, a, b, c,
                                   pieces_each_round, 1, 0)
        print(f"源端运行完毕, 实验的时间是{send_time}\n---------------------------")
    elif len(sys.argv) == 3 and sys.argv[1] == 'forward':
        forward_index = int(sys.argv[2])
        if forward_index > len(cfg.Dest_ADDR):
            print("转发层节点下标超出上限")
        print(f"第{forward_index}个转发层单独运行")
        Experiment.run(min_unit, T_slot, a, b, c, pieces_each_round, 1,
                       forward_index)
        print("转发层运行完毕.")
    else:
        print("""格式错误!
                 自动化: python exp_main.py
                 源  端: python exp_main.py source
                 转发层: python exp_main.py forward 2 (数字表示第几个源端, 需要和config中的地址一一对应)
        """)

Esempio n. 22

0

Mostra file

File: WrapperMEDIPS.py Progetto: yangjl/cgat

def compress(infile):
    '''gzip infile'''

    statement = "gzip -f %(infile)s" % locals()

    E.debug("executing statement '%s'" % statement)

    return E.run(statement)

Esempio n. 23

0

Mostra file

File: load_test.py Progetto: hposkanzer/evolver2

def main():
    
    Experiment.initLogging(os.environ.has_key("GATEWAY_INTERFACE"))
    
    (odict, args) = getOptions()
    (odict, args) = processOptions(odict, args)
        
    data = doit(odict, odict["e"])

Esempio n. 24

0

Mostra file

File: WrapperMEDIPS.py Progetto: BioinformaticsArchive/cgat

def compress( infile ):
    '''gzip infile'''

    statement = "gzip -f %(infile)s" % locals() 

    E.debug( "executing statement '%s'" % statement )

    return E.run( statement )

Esempio n. 25

0

Mostra file

File: uniquify_bam.py Progetto: logust79/cgat-apps

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv: argv = sys.argv

    # setup command line parser
    parser = optparse.OptionParser(version="%prog version: $Id$",
                                   usage=globals()["__doc__"])

    ## add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    if len(args) != 2:
        raise ValueError("please supply two BAM files.")

    samfile = pysam.Samfile(args[0], "rb")

    readone = set()
    readtwo = set()
    removeone = set()
    removetwo = set()

    for read in samfile.fetch():
        if read.is_read1:
            if read.qname in readone:
                removeone.add(read.qname)
            readone.add(read.qname)
        else:
            if read.qname in readtwo:
                removetwo.add(read.qname)
            readtwo.add(read.qname)

    discarded = 0
    samout = pysam.Samfile(args[1], mode='wb', template=samfile)
    for read in samfile.fetch():
        if (read.qname in removeone) and read.is_read1:
            discarded += 1
        elif (read.qname in removetwo) and read.is_read2:
            discarded += 1
        else:
            samout.write(read)
    samfile.close()
    samout.close()

    E.info( "%s of %s first reads removed; %s of %s second reads; %s of %s multi mapped reads which fell into %s positions (average of %s positions per read)" % \
                ( len(removeone),
                  len(readone),
                  len(removetwo),
                  len(readtwo),
                  len(removeone)+len(removetwo),
                  len(readone)+len(readtwo),
                  discarded,
                  float(discarded)/(len(removeone)+len(removetwo))) )
    ## write footer and output benchmark information.
    E.Stop()

Esempio n. 26

0

Mostra file

File: cli.py Progetto: cmantas/tiramola_v3

def run_experiments():

    try:
        experiment_file = args['file']
    except KeyError as e:
        log.error("run_experiments requires argument %s" % e.args[0])
        return
    import Experiment
    Experiment.run_experiments(experiment_file)

Esempio n. 27

0

Mostra file

File: cli.py Progetto: klolos/tiramola_v3

def run_experiments():

    try:
        experiment_file = args['file']
    except KeyError as e:
        log.error("run_experiments requires argument %s" % e.args[0])
        return
    import Experiment
    Experiment.run_experiments(experiment_file)

Esempio n. 28

0

Mostra file

def open_file(filename, trigger, *args):
    binfile = filename[0:-4] + "_data.bin"
    if len(args) == 0:
        exp = Experiment.Experiment(binfile, filename, trigger)
    if len(args) == 1:
        exp = Experiment.Experiment(binfile,
                                    filename,
                                    trigger,
                                    stim_length=args[0])
    experiments[filename[0:-4]] = exp
    names.update(exp.stim_names)

Esempio n. 29

0

Mostra file

File: Simulation.py Progetto: mbeards/pflow

def stats():
  congestion = (1.0*Experiment.drop_count)/Experiment.packet_count
  overall_rtt = Experiment.rtt()
  single_rtt = Experiment.single_rtt()
  double_rtt = Experiment.double_rtt()
  if(len(Experiment.ribdeltas) != 0):
    ribdeltas = reduce(lambda x, y: x+y, Experiment.ribdeltas, 0.0)/len(Experiment.ribdeltas)
  else:
    ribdeltas = 0

  return " ".join(map(str,[congestion, overall_rtt[0], overall_rtt[1], Experiment.packet_count, Experiment.drop_count, Experiment.probe_count, Experiment.revmatch, Experiment.cycles, ribdeltas]))

Esempio n. 30

0

Mostra file

File: IndexedFasta.py Progetto: BioinformaticsArchive/cgat

    def _iterate( self ):
        """iterate over muliple files."""
        
        def _iter( infile ):

            identifier = None

            for line in infile:
                if line.startswith("#"):  continue
                if line.startswith(">"):

                    if self.regexIdentifier:
                        try:
                            identifier = re.search(self.regexIdentifier, line[1:-1]).groups()[0]
                        except AttributeError:
                            raise ValueError("could not parse identifier from line %s - check the input" % line[1:-1])
                    else:
                        identifier = re.split("\s", line[1:-1])[0]

                else:
                    if not identifier:
                        raise ValueError("refusing to emit sequence without identifier - check the input")
                    yield identifier, line.strip()

        for filename in self.filenames:
            if self.format == "tar.gz" or self.format == "tar" or (self.format == "auto" and filename.endswith( "tar.gz" )):
                if filename == "-":
                    tf = tarfile.open( fileobj = sys.stdin, mode = "r|*" ) 
                else:
                    tf = tarfile.open( filename, mode = "r" )
                for f in tf:
                    b, ext = os.path.splitext( f.name )
                    if ext.lower() in ( ".fasta", ".fa" ):
                        E.info( "extracting %s" % f.name)
                        infile = tf.extractfile( f )
                        for x in _iter( infile ): yield x
                    else:
                        E.info( "skipping %s" % f.name )

                if tf != sys.stdin: tf.close()
                continue
            elif self.format == "fasta.gz" or (self.format == "auto" and filename.endswith(".gz")):
                infile = gzip.open( filename, "r" )
            elif filename == "-":
                infile = sys.stdin
            else:
                infile = open( filename, "r")

            for x in _iter( infile ): yield x
            if filename != "-": infile.close()

        raise StopIteration

Esempio n. 31

0

Mostra file

def bamToBed(infile, outfile):
    '''convert bam to bed with bedtools.'''

    statement = "bamToBed -i %(infile)s > %(outfile)s" % locals()

    E.debug("executing statement '%s'" % statement)

    retcode = subprocess.call(statement, cwd=os.getcwd(), shell=True)
    if retcode < 0:
        raise OSError("Child was terminated by signal %i: \n%s\n" %
                      (-retcode, statement))

    return outfile

Esempio n. 32

0

Mostra file

File: pipeline_metagenomebenchmark.py Progetto: kevinrue/cgat-flow

def calculateFalsePositiveRate(infiles, outfile):
    '''
    calculate the false positive rate in taxonomic
    abundances
    '''

    # connect to database
    dbh = sqlite3.connect(PARAMS["database"])
    cc = dbh.cursor()

    true_file = infiles[0]
    true_set = set()
    estimate_set = set()
    for estimate_file in infiles[1:]:
        if os.path.basename(estimate_file)[
                len("metaphlan_"):] == os.path.basename(true_file):
            tablenames = [
                P.toTable(os.path.basename(true_file)),
                P.toTable(os.path.basename(estimate_file))
            ]

            for species in cc.execute("""SELECT species_name FROM %s""" %
                                      tablenames[0]).fetchall():
                true_set.add(species[0])
            for species in cc.execute(
                    """SELECT taxon FROM %s WHERE taxon_level == 'species'""" %
                    tablenames[1]).fetchall():
                if species[0].find("_unclassified") != -1: continue
                estimate_set.add(species[0])

    total_estimate = len(estimate_set)
    total_true = len(true_set)

    E.info("counting false positives and false negatives")
    print(estimate_set.difference(true_set))
    nfp = len(estimate_set.difference(true_set))
    nfn = len(true_set.difference(estimate_set))
    ntp = len(estimate_set.intersection(true_set))

    E.info("writing results")
    track = P.snip(os.path.basename(true_file), ".load")
    outf = open(outfile, "w")
    outf.write("track\ttp_rate\tfp_rate\tfn_rate\n")
    outf.write("\t".join(
        map(str, [
            track,
            float(ntp) / total_estimate,
            float(nfp) / total_estimate,
            float(nfn) / total_true
        ])) + "\n")
    outf.close()

Esempio n. 33

0

Mostra file

def run(infile, options):

    options.tablename = quoteTableName(options.tablename,
                                       backend=options.backend)

    if options.map:
        m = {}
        for x in options.map:
            f, t = x.split(":")
            m[f] = t
        options.map = m
    else:
        options.map = {}

    existing_tables = None

    if options.backend == "pg":
        import pgdb
        dbhandle = pgdb.connect(options.psql_connection)
        error = pgdb.DatabaseError
        options.null = "NULL"
        options.string_value = "'%s'"
        if options.insert_quick:
            raise ValueError("quick import not implemented.")

    elif options.backend == "sqlite":
        import sqlite3
        dbhandle = sqlite3.connect(options.database)
        try:
            os.chmod(options.database, 0664)
        except OSError, msg:
            E.warn("could not change permissions of database: %s" % msg)

        # Avoid the following error:
        # sqlite3.ProgrammingError: You must not use 8-bit bytestrings unless you use a text_factory that can interpret 8-bit bytestrings (like text_factory = str). It is highly recommended that you instead just switch your application to Unicode strings
        # Note: might be better to make csv2db unicode aware.
        dbhandle.text_factory = str

        error = sqlite3.OperationalError
        options.insert_many = True  # False
        options.null = None  # "NULL"
        options.string_value = "%s"  # "'%s'"

        statement = "SELECT name FROM sqlite_master WHERE type='table'"
        cc = executewait(dbhandle, statement, error, options.retry)
        existing_tables = set([x[0] for x in cc])
        cc.close()

        quick_import_statement = "sqlite3 -header -csv -separator '\t' %s '.import %%s %s'" % (
            options.database, options.tablename)

Esempio n. 34

0

Mostra file

File: WrapperSPP.py Progetto: BioinformaticsArchive/cgat

def bamToBed( infile, outfile ):
    '''convert bam to bed with bedtools.'''

    statement = "bamToBed -i %(infile)s > %(outfile)s" % locals()

    E.debug( "executing statement '%s'" % statement )

    retcode = subprocess.call(  statement,
                                cwd = os.getcwd(), 
                                shell = True )
    if retcode < 0:
        raise OSError( "Child was terminated by signal %i: \n%s\n" % (-retcode, statement ))

    return outfile

Esempio n. 35

0

Mostra file

File: calculate_rpkms.py Progetto: logust79/cgat-apps

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv: argv = sys.argv

    # setup command line parser
    parser = optparse.OptionParser(
        version=
        "%prog version: $Id: script_template.py 2871 2010-03-03 10:20:44Z andreas $",
        usage=globals()["__doc__"])

    ## add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    infile = open(args[0], 'r')
    genes_list = []
    header = None

    for line in infile:
        if line.startswith("#"): continue
        if line.startswith("gene_id"):
            header = line.rstrip('\n')
            num_fields = len(header.split('\t')) - 2
            total_reads = [0] * num_fields
            continue

        la = line.rstrip('\n').split('\t')
        if len(la) < 3:
            continue
        genes_list.append(la)
        total_reads = map(lambda x, y: float(x) + float(y), total_reads,
                          la[2::])

    total_reads = map(lambda x: float(x) / 1000000, total_reads)

    print header

    for gene in genes_list:
        my_str_list = gene[0:2]
        vals = map(
            lambda x, y: float(x) / float(y) / (float(gene[1]) / 1000.0),
            gene[2::], total_reads)
        my_str_list.extend(map(str, vals))
        print "\t".join(my_str_list)

    ## write footer and output benchmark information.
    E.Stop()

Esempio n. 36

0

Mostra file

File: WrapperMEDIPS.py Progetto: BioinformaticsArchive/cgat

def bamToMEDIPS( infile, outfile ):
    '''convert bam to medips format

    contig, start, end, strand

    Start is 1-based.
    '''

    statement = '''bamToBed -i %(infile)s | awk '{printf("%%s\\t%%i\\t%%i\\t%%s\\n", $1,$2+1,$3,$6)}' > %(outfile)s''' % locals()

    E.debug( "executing statement '%s'" % statement )

    E.run( statement )

    return outfile

Esempio n. 37

0

Mostra file

def iterator_test( infile, report_step = 100000 ):
    '''only output parseable lines from infile.'''
    
    ninput, noutput, nerrors = 0, 0, 0

    while 1:
        try:
            x = infile.next()
        except ParsingError, msg:
            nerrors += 1
            ninput += 1
            E.warn( str(msg) )
            continue
        except StopIteration:
            break

Esempio n. 38

0

Mostra file

File: CSV2DB.py Progetto: siping/cgat

def run( infile, options ):

    options.tablename = quoteTableName( options.tablename, backend = options.backend )

    if options.map:
        m = {}
        for x in options.map:
            f,t = x.split(":")
            m[f] = t
        options.map = m
    else:
        options.map = {}
        
    existing_tables = None
    
    if options.backend == "pg":
        import pgdb
        dbhandle = pgdb.connect( options.psql_connection )
        error = pgdb.DatabaseError
        options.null = "NULL"
        options.string_value = "'%s'"
        if options.insert_quick:
            raise ValueError("quick import not implemented.")

    elif options.backend == "sqlite":
        import sqlite3
        dbhandle = sqlite3.connect( options.database )
        try:
            os.chmod( options.database, 0664 )
        except OSError, msg:
            E.warn("could not change permissions of database: %s" % msg )

        # Avoid the following error:
        # sqlite3.ProgrammingError: You must not use 8-bit bytestrings unless you use a text_factory that can interpret 8-bit bytestrings (like text_factory = str). It is highly recommended that you instead just switch your application to Unicode strings
        # Note: might be better to make csv2db unicode aware.
        dbhandle.text_factory = str

        error = sqlite3.OperationalError
        options.insert_many = True  # False
        options.null = None # "NULL" 
        options.string_value = "%s" # "'%s'"

        statement = "SELECT name FROM sqlite_master WHERE type='table'"
        cc = executewait( dbhandle, statement, error, options.retry )
        existing_tables = set( [ x[0] for x in cc ] )
        cc.close()

        quick_import_statement = "sqlite3 -header -csv -separator '\t' %s '.import %%s %s'" % (options.database, options.tablename)

Esempio n. 39

0

Mostra file

File: main.py Progetto: carlosccb/Ordinal-Clasification-with-Residual-Networks

def adience_poisson_experiment(dataset, tau_mode, tau=1.):
    """Ejecuta un experimento entero (entrenamiento y test) con la configuración poisson con el dataset Adience.

    :param dataset: Dataset para realizar el experimento.
    :param tau_mode: Modo de ejecución del parámetro tau en el experimento (constante o aprender valor)
    :param tau: Valor del parámetro tau. Valor inicial si se va a aprender, o valor constante.
    """
    #    RUN ADIENCE POISSON
    assert tau_mode in ["non_learnable", "sigm_learnable"]

    #Create net architecture
    poisson_resnet = Resnet_2x4_poisson(tau_mode)

    model = Model(inputs=poisson_resnet.inputs,
                  outputs=poisson_resnet.get_net())

    #Create experiment
    experiment = Experiment.Experiment(dataset, model)

    #Train
    experiment.train('adience_poisson_t=' + tau_mode,
                     '/TFG/ordinal_unimodal_mio/src/logs/')

    #Test
    experiment.test()

Esempio n. 40

0

Mostra file

File: main.py Progetto: carlosccb/Ordinal-Clasification-with-Residual-Networks

def adience_baseline_experiment_sgd(dataset):
    """Ejecuta un experimento entero (entrenamiento y test) con la configuración baseline con el optimizador SGD de nesterov con el dataset Adience.

    :param dataset: Dataset para realizar el experimento.
    """
    #    RUN ADIENCE BASELINE

    #Create net architecture
    baseline_resnet = Resnet_2x4()

    model = Model(inputs=baseline_resnet.inputs,
                  outputs=baseline_resnet.get_net())

    #Create experiment
    experiment = Experiment.Experiment(dataset, model)

    arguments = {
        'epochs': 100,
        'optimizer': SGD,
        'learning_rate': 1e-2,
        'momentum': 0.9,
        'loss_fn': 'categorical_crossentropy',
        'metrics': ['accuracy']
    }

    callbacks = ['ModelCheckpoint']

    #Train
    experiment.train('adience_baseline',
                     '/TFG/ordinal_unimodal_mio/src/logs/',
                     arguments=arguments,
                     callbacks=callbacks)

    #Test
    experiment.test()

Esempio n. 41

0

Mostra file

def suggest_analysis_layout(solid_runs):
    """Generate a bash script to build the analysis directory scheme

    Given a set of SolidRuns, print a set of script commands for running the
    build_analysis_dir.py program to create and populate the analysis directories.

    The script can be edited before being executed by the user.

    Arguments:
      solid_runs: a list of SolidRun objects.
    """
    print "#!/bin/sh\n#\n# Script commands to build analysis directory structure"
    for run in solid_runs:
        build_analysis_dir_cmd = 'build_analysis_dir.py'
        top_dir = os.path.abspath(
            os.path.join(os.getcwd(), os.path.basename(run.run_dir)))
        for sample in run.samples:
            for project in sample.projects:
                # Create one experiment per project
                cmd_line = []
                expt = Experiment.Experiment()
                expt.name = project.getProjectName()
                expt.type = "expt"
                expt.sample = project.getSample().name
                expt.library = project.getLibraryNamePattern()
                # Print the arguments for the layout
                cmd_line.extend(
                    (build_analysis_dir_cmd, "--top-dir=%s_analysis" % top_dir,
                     "--link=absolute", "--naming-scheme=partial"))
                cmd_line.append(expt.describe())
                cmd_line.append(run.run_dir)
                print "#\n%s" % (' \\\n').join(cmd_line)

Esempio n. 42

0

Mostra file

File: ExperimentTest.py Progetto: kenleejr92/APT_Fall_15

  def test_t4(self):
    r1 = Experiment.largest([3,2,3,4]) 
    self.assertEqual(r1, 4)

  #def test_t5(self):
   # r1 = Experiment.largest([3,-2,3,-1,4]) 
    self.assertEqual(r1, 4)

Esempio n. 43

0

Mostra file

File: exp_main.py Progetto: brooot/Data_Distribution_with_Encoding

def main():
    min_unit, T_slot, a, b, c = 68, 1, 0.88652179221, 0.25726495726, 0.0073070866
    exp_times = 1  # 实验次数
    record_num = cfg.record_num
    send_time_list = []
    # 获取分段的数量
    if min_unit == 68:
        piece_num = record_num
    else:
        piece_num = 68 * record_num // min_unit + (1 if (68 * record_num %
                                                         min_unit) else 0)

    # 获取每轮发送的数量的数组
    pieces_each_round = 4  # 初始值
    with open("TimeOFpiecesEachRound.csv", "w", newline="",
              encoding="utf-8") as datacsv:
        csvwriter = csv.writer(datacsv, dialect="excel")
        csvwriter.writerow(["pieces_each_round", "源端发送数量"])

        while pieces_each_round <= piece_num:  # 何时间终止
            print(f"\n--------------------------------------------\n")
            print(f"\n|        pieces_each_round = %4d           |\n" %
                  (pieces_each_round, ))
            print(f"\n--------------------------------------------\n")
            send_num = Experiment.run(min_unit, T_slot, a, b, c,
                                      pieces_each_round, 1, -1)
            send_time_list.append(send_num)
            print(send_time_list)
            # 保存
            csvwriter.writerow([pieces_each_round, send_num])
            print("所有轮次发送时间之和:  ", send_num)
            pieces_each_round += 2  # 每轮增加的值

Esempio n. 44

0

Mostra file

File: WrapperMEDIPS.py Progetto: yangjl/cgat

def bamToMEDIPS(infile, outfile):
    '''convert bam to medips format

    contig, start, end, strand

    Start is 1-based.
    '''

    statement = '''bamToBed -i %(infile)s | awk '{printf("%%s\\t%%i\\t%%i\\t%%s\\n", $1,$2+1,$3,$6)}' > %(outfile)s''' % locals(
    )

    E.debug("executing statement '%s'" % statement)

    E.run(statement)

    return outfile

Esempio n. 45

0

Mostra file

File: WrapperMEDIPS.py Progetto: BioinformaticsArchive/cgat

def bedToMEDIPS( infile, outfile ):
    '''convert bam to medips format

    contig, start, end, strand

    Start is 1-based.
    '''

    if infile.endswith( ".gz" ): cat = "zcat"
    else: cat = "cat"

    statement = '''%(cat)s %(infile)s | awk '{printf("%%s\\t%%i\\t%%i\\t%%s\\n", $1,$2+1,$3,$6)}' > %(outfile)s''' % locals()

    E.run( statement )

    return outfile

Esempio n. 46

0

Mostra file

File: main.py Progetto: bverpaalen/CDE

def experimentCounting(ranger):
    #print("Probabilistic Counting")
    #print("\nTo use hash functions:")

    hashes = list(hashlib.algorithms_guaranteed)

    for element in hashes:
        if element.lower().startswith("shake_"):
            hashes.remove(element)

    #print(hashlib.algorithms_guaranteed)
    #print(hashes)

    setups = Experiment.getSetup(["distinct", "hashes"])

    distincts = setups.get("various numbers of distinct elements")
    numHashes = setups.get("number of hashes")

    #for i in range(len(distincts)):
    #    distinct = distincts[i][0]
    #    calcCounting(distinct,3,hashes)

    for i in range(len(numHashes)):
        numHash = numHashes[i]
        calcCounting(ranger, numHash, hashes)

Esempio n. 47

0

Mostra file

def experiment(args, logger, dataProcessor):
    exp = Experiment.Experiment()

    model = dataProcessor.loadNetwork(args, 0)
    #PolicyValueFn.PolicyValueFn(args).to(args.device)
    data = exp.evaluationWithDifferentMinMaxSearchAgent(model)
#    data = exp.evaluationForNetworkWithFourRollout(model,start=10,end=50,step=10,random_cnt=1,numOfEvaluations=1)
    logger.info(data)

Esempio n. 48

0

Mostra file

File: pipeline_metagenomebenchmark.py Progetto: kevinrue/cgat-flow

def buildTrueTaxonomicRelativeAbundances(infile, outfile):
    '''
    get species level relative abundances for the simulateds
    data. This involes creating maps between different identifiers
    from the NCBI taxonomy. This is so that the results are comparable
    to species level analysis from metaphlan
    The gi_taxid_nucl is a huge table and therefore this function
    takes an age to run - can think of optimising this somehow
    '''
    to_cluster = True

    total = 0
    rel_abundance = collections.defaultdict(int)
    for fastq in Fastq.iterate(iotools.openFile(infile)):
        total += 1
        gi = fastq.identifier.split("|")[1]
        rel_abundance[gi] += 1
    for gi, ab in rel_abundance.items():
        rel_abundance[gi] = float(ab) / total

    dbh = sqlite3.connect(PARAMS["database"])
    cc = dbh.cursor()
    result = collections.defaultdict(float)
    for gi in list(rel_abundance.keys()):
        E.info("processing gi %s" % gi)
        taxid = cc.execute(
            """SELECT taxid FROM gi_taxid_nucl WHERE gi == '%s'""" %
            gi).fetchone()[0]
        species_id = cc.execute(
            """SELECT species_id FROM categories WHERE taxid == '%s'""" %
            taxid).fetchone()[0]
        species_name = cc.execute(
            """SELECT taxname FROM names WHERE taxid == '%s' AND description == 'scientific name'"""
            % species_id).fetchone()[0]
        abundance = rel_abundance[gi]
        E.info("mapped gi %s to taxid: %s, species_id: %s, species_name: %s" %
               (str(gi), str(taxid), str(species_id), species_name))
        result[species_name] += abundance

    outf = open(outfile, "w")
    outf.write("species_name\trelab\n")
    for species_name, abundance in result.items():
        # create names consistent with metaphlan
        species_name = species_name.replace(" ", "_")
        outf.write("%s\t%f\n" % (species_name, abundance))
    outf.close()

Esempio n. 49

0

Mostra file

File: WrapperMEDIPS.py Progetto: yangjl/cgat

def bedToMEDIPS(infile, outfile):
    '''convert bam to medips format

    contig, start, end, strand

    Start is 1-based.
    '''

    if infile.endswith(".gz"): cat = "zcat"
    else: cat = "cat"

    statement = '''%(cat)s %(infile)s | awk '{printf("%%s\\t%%i\\t%%i\\t%%s\\n", $1,$2+1,$3,$6)}' > %(outfile)s''' % locals(
    )

    E.run(statement)

    return outfile

Esempio n. 50

0

Mostra file

File: WrapperIDR.py Progetto: yangjl/cgat

def main( argv = None ):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv: argv = sys.argv

    # setup command line parser
    parser = E.OptionParser( version = "%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $", 
                                    usage = globals()["__doc__"] )

    parser.add_option("-o", "--output-prefix", dest="output_prefix", type="string",
                      help="output filename prefix [default=%default]."  )

    parser.add_option("-c", "--chromosome-table", dest="filename_chromosome_table", type="string",
                      help="filename with tab separated list of chromosome names [default=%default]."  )

    parser.add_option( "--action", dest="action", type="choice",
                       choices=("plot", "run"),
                       help="action to perform [default=%default]")
    parser.add_option( "-s", "--signal-value", dest="signal_value", type="string", 
                       help="use either p.value or sig.value as ranking measure [default=%default]" )
    
    parser.set_defaults(
        action = "plot",
        output_prefix = "output",
        half_width = None,
        overlap_ratio = 0,
        is_broadpeak = False,
        signal_value = "signal.value",
        filename_chromosome_table = "genome_table.txt",
        )

    ## add common options (-h/--help, ...) and parse command line 
    (options, args) = E.Start( parser, argv = argv )

    if options.action == "plot":
        plotIDR( options.output_prefix + ".pdf", args)
    elif options.action == "run":
        if len(args) != 2:
            raise ValueError("require exactly two replicates")
        runIDR( options, args[0], args[1])
        
    ## write footer and output benchmark information.
    E.Stop()

Esempio n. 51

0

Mostra file

File: adda2coverage.py Progetto: AndreasHeger/adda

 def _outputHistogram( counts, bins, section ):
     outf = E.openOutputFile( "%s.table" % section )
     outf.write("%s\tcounts\tfrequency\tcumulative\n" % section )
     t, cc = sum( counts ), 0
     for bin, c in zip(bins[:-1], counts):
         cc += c
         outf.write( "\t".join( (str(bin), str(c), 
                                "%6.4f" % (100.0 * c / t ), 
                                "%6.4f" % (100.0 * cc / t ) ) ) + "\n" )

Esempio n. 52

0

Mostra file

File: regen.py Progetto: hposkanzer/evolver2

def doAmazing(odict, args):
    
    if args:
        exp_names = [args[0]]
    else:
        exp_names = Experiment.getAllExperiments()
        
    for exp_name in exp_names:
        exp = Experiment.Experiment(exp_name)
        exp.regenHTML()

Esempio n. 53

0

Mostra file

File: pairsdb.py Progetto: AndreasHeger/adda

def checkBlastRuns( infiles, outfile ):
    '''check if output files are complete.
    '''
    
    outf = IOTools.openFile( outfile, "w" )

    outf.write( "chunkid\tquery_first\tquery_last\tfound_first\tfound_last\tfound_total\tfound_results\thas_finished\tattempts\t%s\n" %\
                    "\t".join(Logfile.RuntimeInformation._fields))

    for infile in infiles:
        E.debug( "processing %s" % infile)
        chunkid = P.snip( os.path.basename( infile ), ".blast.gz" )
        logfile = infile + ".log"
        chunkfile = P.snip( infile, ".blast.gz" ) + ".fasta"

        with IOTools.openFile( infile ) as inf:
            l = inf.readline()
            ids = set()
            total_results = 0
            for l in inf:
                if l.startswith("#//"): continue
                ids.add( int(l.split("\t")[0] ) )
                total_results += 1
            found_first = min(ids)
            found_last = max(ids)
            found_total = len(ids)

        l = IOTools.getFirstLine( chunkfile )
        query_first = l[1:-1]
        l2 = IOTools.getLastLine( chunkfile, nlines = 2).split("\n")
        query_last = l2[0][1:]

        logresults = Logfile.parse( logfile )
        
        outf.write( "\t".join( map(str, (\
                        chunkid, query_first, query_last,
                        found_first, found_last,
                        found_total, total_results,
                        logresults[-1].has_finished,
                        len(logresults),
                        "\t".join( map(str, logresults[-1]) ) ) ) ) + "\n" )
        
    outf.close()

Esempio n. 54

0

Mostra file

File: Spheres2.py Progetto: DeconWaRE/deconware-scripts

	def __init__(self, measurementSizeX, measurementSizeY, measurementSizeZ, psfSizeX, psfSizeY, psfSizeZ, numChannels, homeDirectory, ops):	
		Experiment.__init__(self,measurementSizeX, measurementSizeY, measurementSizeZ, psfSizeX, psfSizeY, psfSizeZ, numChannels, homeDirectory, ops)
	
		# parameters of the top sphere
		self.spherePositionX=self.objectSizeX / 2
		self.spherePositionY=self.objectSizeY / 2
		self.spherePositionZ=self.objectSizeZ / 2 - 27
		self.sphereRadius=20
		self.sphereIntensity=10000

		# parameters of the bottom sphere
		self.spherePosition2X=self.objectSizeX / 2
		self.spherePosition2Y=self.objectSizeY / 2
		self.spherePosition2Z=self.objectSizeZ / 2 
		self.sphereRadius2=5
		self.sphereIntensity2=10000

		self.background=0.000001

		self.directory=homeDirectory+"/SpheresHighIntensity/"

Esempio n. 55

0

Mostra file

File: Driver.py Progetto: dchin3/Gracken

def main():
    print "DEBUG: Entering Driver.main()"

    expGUI = ExperimentGUI() # create new ExperimentGUI object using default values    

    res = Results() # create new Results object using default value
    res.writeToFile(expGUI.window.get_title() + " Experiment\n")
    res.writeToFile("Experiment started at: " + str(datetime.now()) + "\n\n")

    gtk.main()

   # http://www.pygtk.org/dist/pygtk2-tut.pdf
    
    catA = ImageCategory("A", ["A0.jpg", "A1.jpg", "A2.jpg", "A3.jpg", "A4.jpg", "A5.jpg"])
    catB = ImageCategory("B", ["B0.jpg", "B1.jpg", "B2.jpg", "B3.jpg", "B4.jpg"])

    categories = [catA, catB]
    
    lb1 = LearningBlock(["A0.jpg", "A1.jpg", "B2.jpg"], 10.0)
    lb2 = LearningBlock(["B3.jpg", "B4.jpg", "A1.jpg"], 15.0)
    tb1 = TestingBlock(["A0.jpg", "A1.jpg", "B2.jpg"])
    tb2 = TestingBlock(["A3.jpg", "B4.jpg"])

    lblockList = [lb1, lb2]
    tblockList = [tb1, tb2]
    
    lp = LearningPhase(lblockList)
    tp = TestingPhase(tblockList)
    phaseList = [lp, tp]
    exp = Experiment(phaseList)
    exp.runPhases(categories, res)
    
    print "DEBUG: Entering gtk.main()"
    
    #gtk.main()
    
    print "DEBUG: Exiting gtk.main()"

    res.writeToFile("Experiment ended at: " + str(datetime.now()))

    print "DEBUG: Exiting Driver.main()"

Esempio n. 56

0

Mostra file

File: WrapperMEDIPS.py Progetto: BioinformaticsArchive/cgat

def bigwig( infile, contig_sizes ):
    '''convert infile to bigwig file'''

    if infile.endswith( ".wig"):
        outfile = infile[:-4] + ".bigwig"
    else:
        outfile = infile + ".bigwig"
        
    tmp, filename_sizes = tempfile.mkstemp() 

    os.write( tmp, "\n".join( [ "\t".join(map(str,x)) for x in contig_sizes.iteritems() ] ) )
    os.close( tmp )

    statement = "wigToBigWig -clip %(infile)s %(filename_sizes)s %(outfile)s " % locals() 

    E.debug( "executing statement '%s'" % statement )

    if E.run( statement ):
        os.unlink( infile )

    os.unlink( filename_sizes )

Esempio n. 57

0

Mostra file

File: pipeline_metagenomebenchmark.py Progetto: BioinformaticsArchive/cgat

def buildTrueTaxonomicRelativeAbundances(infile, outfile):
    '''
    get species level relative abundances for the simulateds
    data. This involes creating maps between different identifiers
    from the NCBI taxonomy. This is so that the results are comparable
    to species level analysis from metaphlan
    The gi_taxid_nucl is a huge table and therefore this function
    takes an age to run - can think of optimising this somehow
    '''
    to_cluster = True

    total = 0
    rel_abundance = collections.defaultdict(int)
    for fastq in Fastq.iterate(IOTools.openFile(infile)):
        total += 1
        gi = fastq.identifier.split("|")[1]
        rel_abundance[gi] += 1
    for gi, ab in rel_abundance.iteritems():
        rel_abundance[gi] = float(ab)/total

    dbh = sqlite3.connect(PARAMS["database"])
    cc = dbh.cursor()
    result = collections.defaultdict(float)
    for gi in rel_abundance.keys():
        E.info("processing gi %s" % gi)
        taxid = cc.execute("""SELECT taxid FROM gi_taxid_nucl WHERE gi == '%s'""" % gi).fetchone()[0]
        species_id = cc.execute("""SELECT species_id FROM categories WHERE taxid == '%s'""" % taxid).fetchone()[0]
        species_name = cc.execute("""SELECT taxname FROM names WHERE taxid == '%s' AND description == 'scientific name'""" % species_id).fetchone()[0]
        abundance = rel_abundance[gi]
        E.info("mapped gi %s to taxid: %s, species_id: %s, species_name: %s" % (str(gi), str(taxid), str(species_id), species_name))
        result[species_name] += abundance

    outf = open(outfile, "w")
    outf.write("species_name\trelab\n")
    for species_name, abundance in result.iteritems():
        # create names consistent with metaphlan
        species_name = species_name.replace(" ", "_")
        outf.write("%s\t%f\n" % (species_name, abundance))
    outf.close()

Esempio n. 58

0

Mostra file

File: validate_randomization.py Progetto: abudulemusa/gat

    def run( self, outfile, processors = 1 ):

        tasks = []

        manager = multiprocessing.Manager()
        lock = manager.Lock()

        for segmentor in self.test_generator:
            headers = segmentor.headers
            tasks.append( (lock, outfile, segmentor, self.runner, self.validators) )

        for v in self.validators: headers.extend( v.headers )
        outfile.write( "%s\n" % "\t".join( headers) )
            
        E.info( "created %i tasks for %i workers" % (len(tasks), processors ) )
        
        if processors > 1:
            pool = multiprocessing.Pool( processors )
            pool.map( runSimulation, tasks ) 
        else:
            for task in tasks:
                runSimulation( task )

Esempio n. 59

0

Mostra file

File: pipeline_metagenomebenchmark.py Progetto: BioinformaticsArchive/cgat

def calculateFalsePositiveRate(infiles, outfile):
    '''
    calculate the false positive rate in taxonomic
    abundances
    '''

    # connect to database
    dbh = sqlite3.connect(PARAMS["database"])
    cc = dbh.cursor()

    true_file = infiles[0]
    true_set = set()
    estimate_set = set()
    for estimate_file in infiles[1:]:
        if os.path.basename(estimate_file)[len("metaphlan_"):] == os.path.basename(true_file):
            tablenames = [P.toTable(os.path.basename(true_file)), P.toTable(os.path.basename(estimate_file))]

            for species in cc.execute("""SELECT species_name FROM %s""" % tablenames[0]).fetchall():
                true_set.add(species[0])
            for species in cc.execute("""SELECT taxon FROM %s WHERE taxon_level == 'species'""" % tablenames[1]).fetchall():
                if species[0].find("_unclassified") != -1: continue
                estimate_set.add(species[0])
    
    total_estimate = len(estimate_set)
    total_true = len(true_set)

    E.info("counting false positives and false negatives")
    print estimate_set.difference(true_set)
    nfp = len(estimate_set.difference(true_set))
    nfn = len(true_set.difference(estimate_set))
    ntp = len(estimate_set.intersection(true_set))

    E.info("writing results")
    track = P.snip(os.path.basename(true_file), ".load")
    outf = open(outfile, "w")
    outf.write("track\ttp_rate\tfp_rate\tfn_rate\n")
    outf.write("\t".join(map(str, [track, float(ntp)/total_estimate, float(nfp)/total_estimate, float(nfn)/total_true])) + "\n")
    outf.close()

Esempi in Python per Experiment, deepmind-research