예제 #1
0
def execute(limit=1):
    """
    Executes a job
    """

    # no locking here for now, TODO
    jobs = models.Job.objects.filter(status=status.WAITING).order_by('id')[0:limit]
    for job in jobs:
        #logger.info( 'executing %s' % job)
        set_status(job, status=status.RUNNING)
    else:
        logger.info('no jobs are waiting')

    # run the selected jobs        
    for job in jobs:
        logger.info( 'executing %s' % job)
        jobtype = job.json.get('type')
        data = get_data(job)
        try:
            if jobtype == status.INDEXING_JOB:
                jobdefs.indexing_job(data=data)
            else:
                raise Exception ("unknown jobtype")
            job.delete()
        except Exception, exc:
            logger.error(exc)
            set_error(exc, (data, job))
예제 #2
0
 def flush(table, collect, name):
     # commit the changes
     if collect:
         table.append(collect)
         table.flush()
         # nicer information
         size = util.commify(len(table))
         logger.info("table=%s, contains %s rows" % (name, size))
예제 #3
0
 def flush(table, collect, name):
     # commit the changes
     if collect:
         table.append(collect)
         table.flush()
         # nicer information
         size = util.commify(len(table))
         logger.info('table=%s, contains %s rows' % (name, size))
예제 #4
0
    def display(self, msg, reps=1):
        total = (time.time() - self.start)

        if reps == 1:
            info("%s takes %.3f seconds" % (msg, total))
        else:
            value = total / reps
            info("%s performs at %.3f per second" % (msg, value))

        self.reset()
예제 #5
0
    def display(self, msg, reps=1):
        total = (time.time() - self.start )

        if reps == 1:
            info( "%s takes %.3f seconds" % (msg, total) )
        else:
            value = total/reps
            info( "%s performs at %.3f per second" % (msg, value) )

        self.reset()
예제 #6
0
def generate(n=5):

    projects = []
    for i in range(1, n):
        name = random.choice(project_names) % i
        info = 'some info=%s' % i
        project = authorize.create_project(user=user, name=name, info=info)
        projects.append(project)
        logger.info('creating %s' % name)

    # data names
    data_names = ('short-good-input.gtrack', 'short-data.bed')

    # visualization names
    track_names = ('differential expression', 'HELA subtract', 'Default track')

    # a subset of projects get data, visualization and results added to them
    subset = projects[-1:]
    for project in subset:

        # create some tracks for this project
        for tname in track_names:
            json = dict()
            track = authorize.create_track(user=user,
                                           pid=project.id,
                                           name=tname,
                                           json=json)
            logger.info('creating track %s' % track.name)

        assert (project.track_count(), len(track_names))

        # upload some data names
        for name in data_names:
            logger.info('uploading data %s' % name)
            stream = File(open(conf.testdata(name)))
            data = authorize.create_data(user=user,
                                         pid=project.id,
                                         stream=stream,
                                         name=name,
                                         info='test data')

            # create some  results
            logger.info('adding results content and image')
            stream1 = File(open(conf.testdata('short-results.txt')))
            image1 = File(open(conf.testdata('readcounts.png'), 'rb'))
            result1 = authorize.create_result(user=user,
                                              data=data,
                                              content=stream1,
                                              image=image1)

            image2 = File(open(conf.testdata('shift.png'), 'rb'))
            result2 = authorize.create_result(user=user,
                                              data=data,
                                              content=None,
                                              image=image2)
예제 #7
0
def generate_coverage(func, path, *args, **kwds):
    """
    Generates code coverage for the function 
    and places the results in the path
    """

    import figleaf
    from figleaf import annotate_html

    # Fix for figleaf misbehaving. It is adding a logger at root level
    # and that will add a handler to all subloggers (ours as well)
    # needs to be fixed in figleaf
    import logging
    root = logging.getLogger()
    # remove all root handlers
    for hand in root.handlers:
        root.removeHandler(hand)

    if os.path.isdir(path):
        shutil.rmtree(path)

    info("collecting coverage information")

    figleaf.start()
    # execute the function itself
    return_vals = func(*args, **kwds)
    figleaf.stop()

    info('generating coverage')
    coverage = figleaf.get_data().gather_files()

    annotate_html.prepare_reportdir(path)

    # skip python modules and the test modules
    regpatt = lambda patt: re.compile(patt, re.IGNORECASE)
    patterns = map(regpatt, ['python', 'tests', 'django', 'path*'])
    annotate_html.report_as_html(coverage,
                                 path,
                                 exclude_patterns=patterns,
                                 files_list='')

    return return_vals
예제 #8
0
def generate_coverage( func, path, *args, **kwds):
    """
    Generates code coverage for the function 
    and places the results in the path
    """

    import figleaf
    from figleaf import annotate_html

    # Fix for figleaf misbehaving. It is adding a logger at root level 
    # and that will add a handler to all subloggers (ours as well)
    # needs to be fixed in figleaf
    import logging
    root = logging.getLogger()
    # remove all root handlers
    for hand in root.handlers: 
        root.removeHandler(hand)

    if os.path.isdir( path ):
        shutil.rmtree( path )       
    
    info( "collecting coverage information" )

    figleaf.start() 
    # execute the function itself
    return_vals = func( *args, **kwds)
    figleaf.stop()
    
    info( 'generating coverage' )
    coverage = figleaf.get_data().gather_files()
    
    annotate_html.prepare_reportdir( path )
    
    # skip python modules and the test modules
    regpatt  = lambda patt: re.compile( patt, re.IGNORECASE )
    patterns = map( regpatt, [ 'python', 'tests', 'django', 'path*' ] )
    annotate_html.report_as_html( coverage, path, exclude_patterns=patterns, files_list='')
    
    return return_vals
예제 #9
0
def predict(inpname, outname, options):
    """
    Generate the peak predictions on a genome wide scale
    """
    if options.strand == TWOSTRAND:
            logger.info('operating in twostrand mode')

    if options.index:
        index = hdflib.PositionalData(fname='', index=inpname, nobuild=True, workdir=options.workdir)
    else:
        index = hdflib.PositionalData(fname=inpname, nobuild=True, workdir=options.workdir)

    fp = file(outname, 'wt')

    for label in index.labels:
        table = index.table(label)
        size  = table.cols.idx[-1]
        info  = util.commify(size)
        logger.info('predicting on %s of total size %s' % (label, info))
        lo = 0
        hi = min( (size, options.maxsize) )

        while True:
            if lo >= size:
                break
            perc = '%.1f%%' % (100.0*lo/size)
            logger.info('processing %s %s:%s (%s)' % (label, lo, hi, perc))
            
            # get the data
            res = index.query(start=lo, end=hi, label=label)

            
            # exclusion zone
            w = options.exclude/2

            def predict(x, y):
                fx, fy = fitlib.gaussian_smoothing(x=x, y=y, sigma=options.sigma, epsilon=options.level )
                peaks = fitlib.detect_peaks(x=fx, y=fy )
                if options.mode != 'all':
                    peaks = fitlib.select_peaks(peaks=peaks, exclusion=options.exclude, threshold=options.level)
                return peaks

            if options.strand == TWOSTRAND:
                # operates in two strand mode
                for yval, strand in [ (res.fwd, '+'), (res.rev, '-') ]:
                    logger.debug('processing strand %s' % strand)
                    peaks = predict(x=res.idx, y=yval)
                    output(stream=fp, peaks=peaks, chrom=label, w=w, strand=strand)
            else:
                # combine strands
                peaks = predict(x=res.idx, y=res.val)
                output(stream=fp, peaks=peaks, chrom=label, w=w, strand='+')

            # switching to a higher interval
            lo = hi
            hi += options.maxsize
        
    fp.close()
예제 #10
0
def generate(n=5):

    projects = []
    for i in range(1, n):
        name = random.choice(project_names) % i
        info = 'some info=%s' % i
        project = authorize.create_project(user=user, name=name, info=info)
        projects.append(project)
        logger.info('creating %s' % name)

    # data names
    data_names = ( 'short-good-input.gtrack', 'short-data.bed')
    
    # visualization names
    track_names = ( 'differential expression', 'HELA subtract', 'Default track')
    

    # a subset of projects get data, visualization and results added to them
    subset = projects[-1:]
    for project in subset:
        
        # create some tracks for this project
        for tname in track_names:
            json = dict()
            track = authorize.create_track(user=user, pid=project.id, name=tname, json=json )
            logger.info('creating track %s' % track.name)
        
        assert (project.track_count(), len(track_names))

        # upload some data names        
        for name in data_names:
            logger.info('uploading data %s' % name)
            stream = File( open(conf.testdata(name)) )
            data = authorize.create_data(user=user, pid=project.id, stream=stream, name=name, info='test data')
            
            # create some  results
            logger.info('adding results content and image')
            stream1 = File( open(conf.testdata('short-results.txt')) )
            image1  = File( open(conf.testdata('readcounts.png'),'rb') )
            result1 = authorize.create_result( user=user, data=data, content=stream1, image=image1)

            image2  = File( open(conf.testdata('shift.png'), 'rb') )
            result2 = authorize.create_result( user=user, data=data, content=None, image=image2)
예제 #11
0
    def build(self):
        "May be overriden to use different parsers and schemas"

        logger.info("file='%s'" % self.fname)
        logger.info("index='%s'" % self.index)

        # check file for existance
        if missing(self.fname):
            raise IOError("missing data %s" % self.fname)

        # provides timing information
        timer = util.Timer()

        # iterate over the file
        reader = csv.reader(file(self.fname, "rt"), delimiter="\t")

        # unwind the reader until it hits the header
        for row in reader:
            if row[0] == "chrom":
                break

        # helper function that flushes a table
        def flush(table, collect, name):
            # commit the changes
            if collect:
                table.append(collect)
                table.flush()
                # nicer information
                size = util.commify(len(table))
                logger.info("table=%s, contains %s rows" % (name, size))

        # print messages at every CHUNK line
        last_chrom = table = None
        db = openFile(self.index, mode="w", title="HDF index database")

        # continue on with reading, optimized for throughput
        # with minimal function calls
        collect = []
        for linec, row in izip(count(1), reader):

            # prints progress on processing, also flushes to periodically
            if (linec % CHUNK) == 0:
                logger.info("... processed %s lines" % util.commify(linec))
                flush(table=table, collect=collect, name=last_chrom)
                collect = []

            # get the values from each row
            chrom, index, fwd, rev, value = row
            fwd, rev, value = float(fwd), float(rev), float(value)

            # flush when switching chromosomes
            if chrom != last_chrom:
                # table==None at the beginning
                if table is not None:
                    # logger.debug("... flushing at line %s" % row)
                    flush(table=table, collect=collect, name=last_chrom)
                    collect = []

                # creates the new HDF table here
                table = db.createTable("/", chrom, PositionalSchema, "label %s" % chrom)
                logger.info("creating table:%s" % chrom)
                last_chrom = chrom

            collect.append((index, fwd, rev, value))

        # flush for last chromosome, report some timing information
        flush(table, collect, chrom)
        lineno = util.commify(linec)
        elapsed = timer.report()
        logger.info("finished inserting %s lines in %s" % (lineno, elapsed))

        # close database
        db.close()
예제 #12
0
def predict(inpname, outname, options):
    """
    Generate the peak predictions on a genome wide scale
    """
    if options.strand == TWOSTRAND:
        logger.info('operating in twostrand mode')

    if options.index:
        index = hdflib.PositionalData(fname='',
                                      index=inpname,
                                      nobuild=True,
                                      workdir=options.workdir)
    else:
        index = hdflib.PositionalData(fname=inpname,
                                      nobuild=True,
                                      workdir=options.workdir)

    fp = file(outname, 'wt')

    for label in index.labels:
        table = index.table(label)
        size = table.cols.idx[-1]
        info = util.commify(size)
        logger.info('predicting on %s of total size %s' % (label, info))
        lo = 0
        hi = min((size, options.maxsize))

        while True:
            if lo >= size:
                break
            perc = '%.1f%%' % (100.0 * lo / size)
            logger.info('processing %s %s:%s (%s)' % (label, lo, hi, perc))

            # get the data
            res = index.query(start=lo, end=hi, label=label)

            # exclusion zone
            w = options.exclude / 2

            def predict(x, y):
                fx, fy = fitlib.gaussian_smoothing(x=x,
                                                   y=y,
                                                   sigma=options.sigma,
                                                   epsilon=options.level)
                peaks = fitlib.detect_peaks(x=fx, y=fy)
                if options.mode != 'all':
                    peaks = fitlib.select_peaks(peaks=peaks,
                                                exclusion=options.exclude,
                                                threshold=options.level)
                return peaks

            if options.strand == TWOSTRAND:
                # operates in two strand mode
                for yval, strand in [(res.fwd, '+'), (res.rev, '-')]:
                    logger.debug('processing strand %s' % strand)
                    peaks = predict(x=res.idx, y=yval)
                    output(stream=fp,
                           peaks=peaks,
                           chrom=label,
                           w=w,
                           strand=strand)
            else:
                # combine strands
                peaks = predict(x=res.idx, y=res.val)
                output(stream=fp, peaks=peaks, chrom=label, w=w, strand='+')

            # switching to a higher interval
            lo = hi
            hi += options.maxsize

    fp.close()
예제 #13
0
        dest="limit", type='int', default=1,
        help="how many jobs to run in parallel"
    )

    # flushes all content away, drops all database content!
    parser.add_option(
        '--server', action="store_true", 
        dest="server", default=False, 
        help="runs as a server and invokes the jobrunner at every delay seconds",
    )

    # parse the argument list
    options, args = parser.parse_args()

    logger.disable(options.verbosity)

    # missing file names
    if options.server and not options.delay:
        parser.print_help()
    else:
        if options.server:
            logger.info('server mode, delay=%ss' % options.delay)
        while 1:
            # this is used to start multiple jobs with cron (at every minute but
            # having them actually start up at smaller increments
            time.sleep(options.delay)
            execute(limit=options.limit)
            if not options.server:
                break
            else:
                logger.debug( 'jobserver waiting %ss' % options.delay)
예제 #14
0
        tc.follow("Project view")
        self.delete_project(name)

def get_suite():
    "Returns the testsuite"
    return testlib.make_suite( [] )

def local_suite():
    "Returns the testsuite"
    tests  = [ 
        BaseTest,
        ServerTest,
    ]
    return testlib.make_suite( tests )

def test_runner( suite, verbosity=0 ):
    "Runs the functional tests on a test database"
    from django.db import connection
    
    old_name = settings.DATABASE_NAME
    utils.setup_test_environment()
    connection.creation.create_test_db(verbosity=verbosity, autoclobber=True)
    result = unittest.TextTestRunner(verbosity=2).run(suite) 
    connection.creation.destroy_test_db(old_name, verbosity)
    utils.teardown_test_environment()
    
if __name__ == '__main__':
    logger.info("executing functional tests")
    suite = local_suite()
    test_runner( suite, verbosity=0)
예제 #15
0
def flush_database():
    "Delets all entries"
    logger.info( "flushing the database" )
    #Data.objects.all().delete()
    call_command( 'flush' ) 
예제 #16
0
def get_suite():
    "Returns the testsuite"
    return testlib.make_suite([])


def local_suite():
    "Returns the testsuite"
    tests = [
        BaseTest,
        ServerTest,
    ]
    return testlib.make_suite(tests)


def test_runner(suite, verbosity=0):
    "Runs the functional tests on a test database"
    from django.db import connection

    old_name = settings.DATABASE_NAME
    utils.setup_test_environment()
    connection.creation.create_test_db(verbosity=verbosity, autoclobber=True)
    result = unittest.TextTestRunner(verbosity=2).run(suite)
    connection.creation.destroy_test_db(old_name, verbosity)
    utils.teardown_test_environment()


if __name__ == '__main__':
    logger.info("executing functional tests")
    suite = local_suite()
    test_runner(suite, verbosity=0)
예제 #17
0
    def build(self):
        "May be overriden to use different parsers and schemas"

        logger.info("file='%s'" % self.fname)
        logger.info("index='%s'" % self.index)

        # check file for existance
        if missing(self.fname):
            raise IOError('missing data %s' % self.fname)

        # provides timing information
        timer = util.Timer()

        # iterate over the file
        reader = csv.reader(file(self.fname, 'rt'), delimiter='\t')

        # unwind the reader until it hits the header
        for row in reader:
            if row[0] == 'chrom':
                break

        # helper function that flushes a table
        def flush(table, collect, name):
            # commit the changes
            if collect:
                table.append(collect)
                table.flush()
                # nicer information
                size = util.commify(len(table))
                logger.info('table=%s, contains %s rows' % (name, size))

        # print messages at every CHUNK line
        last_chrom = table = None
        db = openFile(self.index, mode='w', title='HDF index database')

        # continue on with reading, optimized for throughput
        # with minimal function calls
        collect = []
        for linec, row in izip(count(1), reader):

            # prints progress on processing, also flushes to periodically
            if (linec % CHUNK) == 0:
                logger.info("... processed %s lines" % util.commify(linec))
                flush(table=table, collect=collect, name=last_chrom)
                collect = []

            # get the values from each row
            chrom, index, fwd, rev, value = row
            fwd, rev, value = float(fwd), float(rev), float(value)

            # flush when switching chromosomes
            if chrom != last_chrom:
                # table==None at the beginning
                if table is not None:
                    #logger.debug("... flushing at line %s" % row)
                    flush(table=table, collect=collect, name=last_chrom)
                    collect = []

                # creates the new HDF table here
                table = db.createTable("/", chrom, PositionalSchema,
                                       'label %s' % chrom)
                logger.info("creating table:%s" % chrom)
                last_chrom = chrom

            collect.append((index, fwd, rev, value))

        # flush for last chromosome, report some timing information
        flush(table, collect, chrom)
        lineno = util.commify(linec)
        elapsed = timer.report()
        logger.info("finished inserting %s lines in %s" % (lineno, elapsed))

        # close database
        db.close()