Ejemplo n.º 1
0
def testAccuracy(pid=0):
    in_dir = '/scratch/sgeadmin/'
    odir = '/scratch/sgeadmin/'
    np_list = []
    sample_block_size = 32
    npairs_block_size = 16
    nets_block_size = 8

    inst_q = Queue()
    results_q = Queue()
    check_cp = False #check whether in == out, if False, comparing speed
    """
            events = dict of mp events
                events['add_data'] mp event, true means add data 
                events['data_ready'] mp event, true means data is ready to be consumed
                events['die'] event with instructions to die
            shared_mem = dict containing shared memory
                shared_mem['data'] = shared memory for numpy array buffer
                shared_mem['shape'] = shared memory for np array shape
                shared_mem['dtype'] = shared memory for np array dtype
    """
    dsize = {'em':0, 'gm':0, 'sm':0, 'nm':0, 'rms':0}
    dtype = {'em':np.float32, 'gm':np.int32, 'sm':np.int32,'nm':np.int32,'rms':np.float32 }

    check_list = []
    cuda.init()
    unique_fid = set()
    dev = cuda.Device(0)
    ctx = dev.make_context()
    for i in range(10):
        fake = genFakeData( 200, 20000)
        p_hash = None
        for i in range(2):
            f_dict = {}
            f_id = str(random.randint(10000,100000))
            while f_id in unique_fid:
                f_id = str(random.randint(10000,100000))
            f_dict['file_id'] = f_id
            unique_fid.add(f_id)

            for k,v in fake.iteritems():
                if k == 'em':
                    exp = data.Expression(v)
                    exp.createBuffer(sample_block_size, buff_dtype=np.float32)
                    v = exp.buffer_data
                    t_nsamp = exp.orig_nsamples
                elif k == 'sm':
                    sm = data.SampleMap(v)
                    sm.createBuffer(sample_block_size, buff_dtype=np.int32)
                    v = sm.buffer_data
                elif k == 'gm':
                    gm = data.GeneMap(v) 
                    gm.createBuffer( npairs_block_size, buff_dtype=np.int32)
                    v = gm.buffer_data
                elif k == 'nm':
                    nm = data.NetworkMap(v)
                    nm.createBuffer( nets_block_size, buff_dtype=np.int32 )
                    v = nm.buffer_data

                f_hash = hashlib.sha1(v).hexdigest()
                if k == 'em':
                    if p_hash is None:
                        p_hash = f_hash
                        p_temp = v.copy()
                    else:
                        assert p_hash == f_hash, str(v) + " " + str(p_temp)
                        p_hash = None
                        p_temp = None
                f_name = '_'.join([ k, f_dict['file_id'], f_hash])
                with open(os.path.join( in_dir, f_name),'wb') as f:
                    np.save(f, v)
                if v.size > dsize[k]:
                    dsize[k] = v.size
                f_dict[k] = f_name
            srt,rt,rms = processes.runDirac(exp.orig_data, gm.orig_data, sm.orig_data,nm.orig_data, sample_block_size, npairs_block_size, nets_block_size, True)
            """
            uncomment to compare srt and rts
            srt.fromGPU()
            np.save(os.path.join(in_dir, 'srt_'+ f_dict['file_id'] + '_single'), srt.res_data)
            rt.fromGPU()
            np.save(os.path.join(in_dir, 'rt_'+ f_dict['file_id'] + '_single' ), rt.res_data)
            """
            rms.fromGPU(res_dtype=np.float32)
            np.save(os.path.join(in_dir, 'rms_'+ f_dict['file_id'] + '_single'), rms.res_data)

            rms = data.RankMatchingScores( nm.buffer_nnets, sm.buffer_nsamples )
            
            rms.createBuffer(  sample_block_size, nets_block_size, buff_dtype=np.float32)
            if rms.buffer_data.size > dsize['rms']:
               dsize['rms'] = rms.buffer_data.size 
            inst_q.put( f_dict )
            check_list.append(f_dict)
    data_settings = []
    for k,b in dsize.iteritems():
        if k in ['em','sm','gm','nm']:
            data_settings.append((k, b, dtype[k]))
    print "Data Created"
    db = LoaderBoss(str(pid),inst_q,in_dir,data_settings)
    pb = PackerBoss(str(pid), results_q, odir, (dsize['rms'], dtype['rms']) )
    db.start()
    pb.start()
    db.set_add_data()
    ctr = 0
    t = []
    prev_time = time.time()
    while True:
        print time.time() - prev_time
        prev_time = time.time()
        print "*"*10 +str(ctr) +"*"*10
        ready = db.wait_data_ready( time_out=5 )
        if ready:
            db.clear_data_ready()
            my_f = db.get_file_id()

            expression_matrix = db.get_expression_matrix()
            gene_map = db.get_gene_map()
            sample_map = db.get_sample_map()
            network_map = db.get_network_map()
            exp = data.SharedExpression( expression_matrix )
            exp.orig_nsamples = t_nsamp
            gm = data.SharedGeneMap( gene_map )
            sm = data.SharedSampleMap( sample_map )
            nm = data.SharedNetworkMap( network_map )
            srt,rt,rms =  sharedprocesses.runSharedDirac( exp, gm, sm, nm, sample_block_size, npairs_block_size, nets_block_size )
            """
            uncomment to test srt and rt
            srt.fromGPU()
            np.save(os.path.join(in_dir, 'srt_hacky_'+my_f['file_id']), srt.buffer_data)
            rt.fromGPU()
            np.save(os.path.join(in_dir, 'rt_hacky_'+my_f['file_id']), rt.buffer_data)
            """
            db.release_loader_data()
            db.set_add_data()
            while not pb.ready():
                print "z"
                time.sleep(.5)
            rms.fromGPU( pb.get_mem() )
            pb.set_meta( my_f['file_id'], ( rms.buffer_nnets, rms.buffer_nsamples ), dtype['rms'] ) 
            pb.release()
        else:
            if db.empty():
                break
            else:
                raise Exception("Stuck")
    logging.info( "Tester: no data, all processed, killing sp")
    db.kill_all()
    pb.kill()
    db.clean_up()
    pb.clean_up()
    all_match = True
    while not results_q.empty():
        my_dict = results_q.get()
        proc = np.load(os.path.join(in_dir, my_dict['f_name']))
        single = np.load(os.path.join(in_dir, 'rms_'+ my_dict['file_id'] + '_single.npy'))
        (a,b) = single.shape
        print "Comparing", os.path.join(in_dir, my_dict['f_name']), " and ", os.path.join(in_dir, 'rms_'+ my_dict['file_id'] + '_single.npy')
        match = np.allclose(proc[:a,:b], single)
        print "Matching",my_dict['file_id'], match
        if not match:
            all_match = False
    if all_match:
        print "All tests SUCCESSFUL"
    else:
        print "You have been weighed, you have been measured, and you have been found wanting."

    logging.info( "Tester: exitted gracefully")
    ctx.pop()
Ejemplo n.º 2
0
def addFakeDataQueue(unique_fid, in_dir,inst_q, check_list,dsize, sample_block_size, npairs_block_size, nets_block_size):
    
    for i in range(100):
        fake = genFakeData( 200, 20000)
        p_hash = None
        for i in range(1):
            f_dict = {}
            f_id = str(random.randint(10000,100000))
            while f_id in unique_fid:
                f_id = str(random.randint(10000,100000))
            f_dict['file_id'] = f_id
            unique_fid.add(f_id)

            for k,v in fake.iteritems():
                if k == 'em':
                    exp = data.Expression(v)
                    exp.createBuffer(sample_block_size, buff_dtype=np.float32)
                    v = exp.buffer_data
                    t_nsamp = exp.orig_nsamples
                elif k == 'sm':
                    sm = data.SampleMap(v)
                    sm.createBuffer(sample_block_size, buff_dtype=np.int32)
                    v = sm.buffer_data
                elif k == 'gm':
                    gm = data.GeneMap(v) 
                    gm.createBuffer( npairs_block_size, buff_dtype=np.int32)
                    v = gm.buffer_data
                elif k == 'nm':
                    nm = data.NetworkMap(v)
                    nm.createBuffer( nets_block_size, buff_dtype=np.int32 )
                    v = nm.buffer_data
                f_hash = hashlib.sha1(v).hexdigest()
                if k == 'em':
                    if p_hash is None:
                        p_hash = f_hash
                        p_temp = v.copy()
                    else:
                        assert p_hash == f_hash, str(v) + " " + str(p_temp)
                        p_hash = None
                        p_temp = None
                f_name = '_'.join([ k, f_dict['file_id'], f_hash])
                with open(os.path.join( in_dir, f_name),'wb') as f:
                    np.save(f, v)
                if v.size > dsize[k]:
                    dsize[k] = v.size
                f_dict[k] = f_name
            srt,rt,rms = processes.runDirac(exp.orig_data, gm.orig_data, sm.orig_data,nm.orig_data, sample_block_size, npairs_block_size, nets_block_size, True)
            """
            uncomment to compare srt and rts
            srt.fromGPU()
            np.save(os.path.join(in_dir, 'srt_'+ f_dict['file_id'] + '_single'), srt.res_data)
            rt.fromGPU()
            np.save(os.path.join(in_dir, 'rt_'+ f_dict['file_id'] + '_single' ), rt.res_data)
            """
            rms.fromGPU(res_dtype=np.float32)
            np.save(os.path.join(in_dir, 'rms_'+ f_dict['file_id'] + '_single'), rms.res_data)

            rms = data.RankMatchingScores( nm.buffer_nnets, sm.buffer_nsamples )
            
            rms.createBuffer(  sample_block_size, nets_block_size, buff_dtype=np.float32)
            if rms.buffer_data.size > dsize['rms']:
               dsize['rms'] = rms.buffer_data.size 
            inst_q.put( f_dict )
            check_list.append(f_dict)
    return dsize, check_list