Beispiel #1
0
def start_new_run(state_manager, backup_manager, safe_to_exit, run_succeeded, ssm_file, cnv_file, top_k_trees_file, clonal_freqs_file, num_samples, mh_itr, mh_std, write_backups_every, rand_seed):
	state = {}
	state['rand_seed'] = rand_seed
	seed(state['rand_seed'])

	state['ssm_file'] = ssm_file
	state['cnv_file'] = cnv_file
	state['top_k_trees_file'] = top_k_trees_file
	state['clonal_freqs_file'] = clonal_freqs_file
	state['write_backups_every'] = write_backups_every

	codes, n_ssms, n_cnvs = load_data(state['ssm_file'], state['cnv_file'])
	if len(codes) == 0:
		logmsg('No SSMs or CNVs provided. Exiting.', sys.stderr)
		return
	NTPS = len(codes[0].a) # number of samples / time point
	state['glist'] = [datum.name for datum in codes if len(datum.name)>0]

	# MCMC settings
	state['burnin'] = 1000
	state['num_samples'] = num_samples
	state['dp_alpha'] = 25.0
	state['dp_gamma'] = 1.0
	state['alpha_decay'] = 0.25
	state['top_k'] = 5

	# Metropolis-Hastings settings
	state['mh_burnin'] = 0
	state['mh_itr'] = mh_itr # No. of iterations in metropolis-hastings
	state['mh_std'] = mh_std

	state['cd_llh_traces'] = zeros((state['num_samples'], 1))
	state['working_directory'] = os.getcwd()

	root = alleles(conc=0.1, ntps=NTPS)
	state['tssb'] = TSSB(dp_alpha=state['dp_alpha'], dp_gamma=state['dp_gamma'], alpha_decay=state['alpha_decay'], root_node=root, data=codes)
	# hack...
	if 1:
		depth=0
		state['tssb'].root['sticks'] = vstack([ state['tssb'].root['sticks'], boundbeta(1, state['tssb'].dp_gamma) if depth!=0 else .999])
		state['tssb'].root['children'].append({ 'node': state['tssb'].root['node'].spawn(),
					'main':boundbeta(1.0, (state['tssb'].alpha_decay**(depth+1))*state['tssb'].dp_alpha) if state['tssb'].min_depth <= (depth+1) else 0.0, 
					'sticks' : empty((0,1)),	
					'children' : [] })
		new_node = state['tssb'].root['children'][0]['node']
		for n in range(state['tssb'].num_data):
			state['tssb'].assignments[n].remove_datum(n)
			new_node.add_datum(n)
			state['tssb'].assignments[n] = new_node
	
	for datum in codes:
		datum.tssb = state['tssb']
	
	tree_writer = TreeWriter()
	state_manager.write_initial_state(state)
	logmsg("Starting MCMC run...")
	state['last_iteration'] = -state['burnin'] - 1

	do_mcmc(state_manager, backup_manager, safe_to_exit, run_succeeded, state, tree_writer, codes, n_ssms, n_cnvs, NTPS)
Beispiel #2
0
def start_new_run(state_manager, backup_manager, safe_to_exit, run_succeeded,
                  config, ssm_file, cnv_file, params_file, top_k_trees_file,
                  clonal_freqs_file, burnin_samples, num_samples, mh_itr,
                  mh_std, write_state_every, write_backups_every, rand_seed,
                  tmp_dir):
    state = {}

    try:
        state['rand_seed'] = int(rand_seed)
    except TypeError:
        # If rand_seed is not provided as command-line arg, it will be None,
        # meaning it will hit this code path.
        #
        # Use random seed in this order:
        #   1. If a seed is given on the command line, use that.
        #   2. Otherwise, if `random_seed.txt` exists, use the seed stored there.
        #   3. Otherwise, choose a new random seed and write to random_seed.txt.
        try:
            with open('random_seed.txt') as seedf:
                state['rand_seed'] = int(seedf.read().strip())
        except (TypeError, IOError) as E:
            # Can seed with [0, 2**32).
            state['rand_seed'] = randint(2**32)

    seed(state['rand_seed'])
    with open('random_seed.txt', 'w') as seedf:
        seedf.write('%s\n' % state['rand_seed'])

    state['ssm_file'] = ssm_file
    state['cnv_file'] = cnv_file
    state['tmp_dir'] = tmp_dir
    state['top_k_trees_file'] = top_k_trees_file
    state['clonal_freqs_file'] = clonal_freqs_file
    state['write_state_every'] = write_state_every
    state['write_backups_every'] = write_backups_every

    codes, n_ssms, n_cnvs, cnv_logical_physical_mapping = load_data(
        state['ssm_file'], state['cnv_file'])
    if len(codes) == 0:
        logmsg('No SSMs or CNVs provided. Exiting.', sys.stderr)
        return
    NTPS = len(codes[0].a)  # number of samples / time point
    state['glist'] = [datum.name for datum in codes if len(datum.name) > 0]

    # MCMC settings
    state['burnin'] = burnin_samples
    state['num_samples'] = num_samples
    state['dp_alpha'] = 25.0
    state['dp_gamma'] = 1.0
    state['alpha_decay'] = 0.25
    state['top_k'] = 5

    # Metropolis-Hastings settings
    state['mh_burnin'] = 0
    state['mh_itr'] = mh_itr  # No. of iterations in metropolis-hastings
    state['mh_std'] = mh_std

    state['cd_llh_traces'] = zeros((state['num_samples'], 1))
    state['burnin_cd_llh_traces'] = zeros((state['burnin'], 1))
    state['working_directory'] = os.getcwd()

    root = alleles(conc=0.1, ntps=NTPS)
    state['tssb'] = TSSB(dp_alpha=state['dp_alpha'],
                         dp_gamma=state['dp_gamma'],
                         alpha_decay=state['alpha_decay'],
                         root_node=root,
                         data=codes)
    # hack...
    if 1:
        depth = 0
        state['tssb'].root['sticks'] = vstack([
            state['tssb'].root['sticks'],
            boundbeta(1, state['tssb'].dp_gamma) if depth != 0 else .999999
        ])
        state['tssb'].root['children'].append({
            'node':
            state['tssb'].root['node'].spawn(),
            'main':
            boundbeta(1.0, (state['tssb'].alpha_decay**(depth + 1)) *
                      state['tssb'].dp_alpha) if state['tssb'].min_depth <=
            (depth + 1) else 0.0,
            'sticks':
            empty((0, 1)),
            'children': []
        })
        new_node = state['tssb'].root['children'][0]['node']
        for n in range(state['tssb'].num_data):
            state['tssb'].assignments[n].remove_datum(n)
            new_node.add_datum(n)
            state['tssb'].assignments[n] = new_node

    for datum in codes:
        datum.tssb = state['tssb']

    tree_writer = TreeWriter()
    tree_writer.add_extra_file('cnv_logical_physical_mapping.json',
                               json.dumps(cnv_logical_physical_mapping))

    if params_file is not None:
        with open(params_file) as F:
            params = json.load(F)
    else:
        params = {}
    tree_writer.add_extra_file('params.json', json.dumps(params))

    state_manager.write_initial_state(state)
    logmsg("Starting MCMC run...")
    state['last_iteration'] = -state['burnin'] - 1

    # This will overwrite file if it already exists, which is the desired
    # behaviour for a fresh run.
    with open('mcmc_samples.txt', 'w') as mcmcf:
        mcmcf.write('Iteration\tLLH\tTime\n')

    do_mcmc(state_manager, backup_manager, safe_to_exit, run_succeeded, config,
            state, tree_writer, codes, n_ssms, n_cnvs, NTPS, tmp_dir)
Beispiel #3
0
def run(fin='data.txt',fout='./best',out2='top_k_trees',out3='clonal_frequencies',out4='llh_trace',num_samples=2500,mh_itr=5000,rand_seed=1):
	if not os.path.exists(fout):
		os.makedirs(fout)
	else:
		call(['rm','-r',fout])
		os.makedirs(fout)

	seed(rand_seed)
	codes = load_data(fin)				
	NTPS = len(codes[0].a) # number of samples / time points
	glist = [datum.name for datum in codes]

	root  = alleles(conc=0.1,ntps=NTPS)
	tssb  = TSSB( dp_alpha=dp_alpha, dp_gamma=dp_gamma, alpha_decay=alpha_decay, root_node=root, data=codes )

	dp_alpha_traces	= zeros((num_samples, 1))
	dp_gamma_traces	= zeros((num_samples, 1))
	alpha_decay_traces = zeros((num_samples, 1))
	conc_traces	   = zeros((num_samples, 1))
	cd_llh_traces	  = zeros((num_samples, 1))

	intervals = zeros((7))
	best_tssb = 0

	# clonal frequencies
	freq = dict([(g,[] )for g in glist])		

	print "Starting MCMC run..."
	for iter in range(-burnin,num_samples):
	
		times = [ time.time() ]
			
		tssb.resample_assignments()
		times.append(time.time())

		tssb.cull_tree()
		times.append(time.time())
		
		# assign node ids
		wts,nodes=tssb.get_mixture()
		for i,node in enumerate(nodes): node.id=i
	
		mh_acc = metropolis(tssb,mh_itr,mh_std,mh_burnin,NTPS,fin)
		times.append(time.time())
	
		#root.resample_hypers()
		times.append(time.time())
	
		tssb.resample_sticks()
		times.append(time.time())
	
		tssb.resample_stick_orders()
		times.append(time.time())
		
		if iter >= 0:
			tssb.resample_hypers(dp_alpha=True, alpha_decay=True, dp_gamma=True)
		times.append(time.time())
	
		intervals = intervals + diff(array(times)) 

		if iter>=0:
			dp_alpha_traces[iter]	= tssb.dp_alpha
			dp_gamma_traces[iter]	= tssb.dp_gamma
			alpha_decay_traces[iter] = tssb.alpha_decay
			conc_traces[iter]	   = root.conc()
			cd_llh_traces[iter]	  = tssb.complete_data_log_likelihood()
	   
			
		if iter>=0:
			if True or mod(iter, 10) == 0:
				(weights, nodes) = tssb.get_mixture()
				print iter, len(nodes), cd_llh_traces[iter], mh_acc, tssb.dp_alpha, tssb.dp_gamma, tssb.alpha_decay#, " ".join(map(lambda x: "%0.2f" % x, intervals.tolist())) 
				intervals = zeros((7))	  
  
		if iter >= 0 and argmax(cd_llh_traces[:iter+1]) == iter:
			print "\t%f is best per-data complete data likelihood so far." % (cd_llh_traces[iter])

		# save all trees
		if iter >= 0:
			fh = open(fout+'/'+str((cd_llh_traces[iter])[0]), 'w')
			cPickle.dump(tssb, fh)
			fh.close()
		
		wts, nodes = tssb.get_mixture()
		#Save log likelihood:savetxt('loglike',cd_llh_traces)
		savetxt(out4,cd_llh_traces)		

		#log clonal frequencies		
		if iter >= 0:		
			wts, nodes = tssb.get_mixture()
			for node in nodes:
				data = node.get_data()
				for datum in data: 
					for tp in arange(NTPS):	freq[datum.name].append(float(round(node.params[tp],5)))
		
	#save the best tree
	print_top_trees(fout,out2,top_k)

	#save clonal frequencies
	glist = array(freq.keys(),str);glist.shape=(1,len(glist)) 
	savetxt(out3,vstack((glist, array([freq[g] for g in freq.keys()]).T)),fmt='%s',delimiter=', ')	
Beispiel #4
0
def start_new_run(state_manager, backup_manager, safe_to_exit, run_succeeded, config, ssm_file, cnv_file, top_k_trees_file, clonal_freqs_file, burnin_samples, num_samples, mh_itr, mh_std, write_state_every, write_backups_every, rand_seed, tmp_dir):
	state = {}

	with open('random_seed.txt', 'w') as seedf:
		seedf.write('%s\n' % rand_seed)
	try:
		rand_seed = int(rand_seed)
		state['rand_seed'] = rand_seed
		seed(state['rand_seed'])
	except TypeError:
		# If rand_seed is not provided as command-line arg, it will be None,
		# meaning it will hit this code path. Explicitly avoid calling seed(None)
		# -- though this is currently the equivalent of calling seed() in that it
		# seeds the PRNG with /dev/urandom, the semantics of seed(None) might
		# change in later NumPy versions to always seed to the same state.
		state['rand_seed'] = rand_seed
		seed()

	state['ssm_file'] = ssm_file
	state['cnv_file'] = cnv_file
	state['tmp_dir'] = tmp_dir
	state['top_k_trees_file'] = top_k_trees_file
	state['clonal_freqs_file'] = clonal_freqs_file
	state['write_state_every'] = write_state_every
	state['write_backups_every'] = write_backups_every

	codes, n_ssms, n_cnvs = load_data(state['ssm_file'], state['cnv_file'])
	if len(codes) == 0:
		logmsg('No SSMs or CNVs provided. Exiting.', sys.stderr)
		return
	NTPS = len(codes[0].a) # number of samples / time point
	state['glist'] = [datum.name for datum in codes if len(datum.name)>0]

	# MCMC settings
	state['burnin'] = burnin_samples
	state['num_samples'] = num_samples
	state['dp_alpha'] = 25.0
	state['dp_gamma'] = 1.0
	state['alpha_decay'] = 0.25
	state['top_k'] = 5

	# Metropolis-Hastings settings
	state['mh_burnin'] = 0
	state['mh_itr'] = mh_itr # No. of iterations in metropolis-hastings
	state['mh_std'] = mh_std

	state['cd_llh_traces'] = zeros((state['num_samples'], 1))
	state['burnin_cd_llh_traces'] = zeros((state['burnin'], 1))
	state['working_directory'] = os.getcwd()

	root = alleles(conc=0.1, ntps=NTPS)
	state['tssb'] = TSSB(dp_alpha=state['dp_alpha'], dp_gamma=state['dp_gamma'], alpha_decay=state['alpha_decay'], root_node=root, data=codes)
	# hack...
	if 1:
		depth=0
		state['tssb'].root['sticks'] = vstack([ state['tssb'].root['sticks'], boundbeta(1, state['tssb'].dp_gamma) if depth!=0 else .999])
		state['tssb'].root['children'].append({ 'node': state['tssb'].root['node'].spawn(),
					'main':boundbeta(1.0, (state['tssb'].alpha_decay**(depth+1))*state['tssb'].dp_alpha) if state['tssb'].min_depth <= (depth+1) else 0.0, 
					'sticks' : empty((0,1)),	
					'children' : [] })
		new_node = state['tssb'].root['children'][0]['node']
		for n in range(state['tssb'].num_data):
			state['tssb'].assignments[n].remove_datum(n)
			new_node.add_datum(n)
			state['tssb'].assignments[n] = new_node
	
	for datum in codes:
		datum.tssb = state['tssb']
	
	tree_writer = TreeWriter()
	state_manager.write_initial_state(state)
	logmsg("Starting MCMC run...")
	state['last_iteration'] = -state['burnin'] - 1

	# This will overwrite file if it already exists, which is the desired
	# behaviour for a fresh run.
	with open('mcmc_samples.txt', 'w') as mcmcf:
		mcmcf.write('Iteration\tLLH\tTime\n')

	do_mcmc(state_manager, backup_manager, safe_to_exit, run_succeeded, config, state, tree_writer, codes, n_ssms, n_cnvs, NTPS, tmp_dir)
Beispiel #5
0
def run(fin1,fin2,fout='trees.zip',out2='top_k_trees',out3='clonal_frequencies',out4='llh_trace',num_samples=2500,mh_itr=5000,mh_std=100,rand_seed=1):
	seed(rand_seed)
	codes, n_ssms, n_cnvs = load_data(fin1,fin2)
	NTPS = len(codes[0].a) # number of samples / time point
	glist = [datum.name for datum in codes if len(datum.name)>0]

	root  = alleles(conc=0.1,ntps=NTPS)
	tssb  = TSSB( dp_alpha=dp_alpha, dp_gamma=dp_gamma, alpha_decay=alpha_decay, root_node=root, data=codes )
	# hack...
	if 1:
		depth=0
		tssb.root['sticks'] = vstack([ tssb.root['sticks'], boundbeta(1, tssb.dp_gamma) if depth!=0 else .999])
		tssb.root['children'].append({ 'node': tssb.root['node'].spawn(),
					'main':boundbeta(1.0, (tssb.alpha_decay**(depth+1))*tssb.dp_alpha) if tssb.min_depth <= (depth+1) else 0.0, 
					'sticks' : empty((0,1)),	
					'children' : [] })
		new_node=tssb.root['children'][0]['node']	
		for n in range(tssb.num_data):	
			tssb.assignments[n].remove_datum(n)
			new_node.add_datum(n)
			tssb.assignments[n] = new_node

		
	####

	
	for datum in codes: datum.tssb=tssb 

	dp_alpha_traces	= zeros((num_samples, 1))
	dp_gamma_traces	= zeros((num_samples, 1))
	alpha_decay_traces = zeros((num_samples, 1))
	conc_traces	   = zeros((num_samples, 1))
	cd_llh_traces	  = zeros((num_samples, 1))

	intervals = zeros((7))
	best_tssb = 0

	# clonal frequencies
	freq = dict([(g,[] )for g in glist])	
	
	print "Starting MCMC run..."
	tree_writer = TreeWriter(fout)
	
	for iter in range(-burnin,num_samples):
		if iter<0: print iter
	
		times = [ time.time() ]
			
		tssb.resample_assignments()
		times.append(time.time())

		tssb.cull_tree()
		times.append(time.time())
		
		# assign node ids
		wts,nodes=tssb.get_mixture()
		for i,node in enumerate(nodes): node.id=i
		
		##################################################
		## some useful info about the tree,
		## used by CNV related computations,
		## to be called only after resampling assignments
		set_node_height(tssb)
		set_path_from_root_to_node(tssb)
		map_datum_to_node(tssb)
		##################################################

		mh_acc = metropolis(tssb,mh_itr,mh_std,mh_burnin,n_ssms,n_cnvs,fin1,fin2,rand_seed,NTPS)
		if float(mh_acc) < 0.08 and mh_std < 10000:
			mh_std = mh_std*2.0
			print "Shrinking MH proposals. Now %f" % mh_std
		if float(mh_acc) > 0.5 and float(mh_acc) < 0.99:
			mh_std = mh_std/2.0
			print "Growing MH proposals. Now %f" % mh_std
		times.append(time.time())
	
		#root.resample_hypers()
		times.append(time.time())
	
		tssb.resample_sticks()
		times.append(time.time())
		
		tssb.resample_stick_orders()
		times.append(time.time())
	
		tssb.resample_hypers(dp_alpha=True, alpha_decay=True, dp_gamma=True)
		times.append(time.time())
 
		intervals = intervals + diff(array(times)) 

		if iter>=0:
			dp_alpha_traces[iter]	= tssb.dp_alpha
			dp_gamma_traces[iter]	= tssb.dp_gamma
			alpha_decay_traces[iter] = tssb.alpha_decay
			conc_traces[iter]	   = root.conc()
			cd_llh_traces[iter]	  = tssb.complete_data_log_likelihood()
	   
		if iter>=0:
			if True or mod(iter, 10) == 0:
				(weights, nodes) = tssb.get_mixture()
				print iter, len(nodes), cd_llh_traces[iter], mh_acc, tssb.dp_alpha, tssb.dp_gamma, tssb.alpha_decay#, " ".join(map(lambda x: "%0.2f" % x, intervals.tolist())) 
				intervals = zeros((7))	  
  
		if iter >= 0 and argmax(cd_llh_traces[:iter+1]) == iter:
			print "\t%f is best per-data complete data likelihood so far." % (cd_llh_traces[iter])

		# save all trees
		if iter >= 0:
			tree_writer.write_tree(tssb, cd_llh_traces[iter][0])
		
		wts, nodes = tssb.get_mixture()
		#Save log likelihood:savetxt('loglike',cd_llh_traces)
		savetxt(out4,cd_llh_traces)		

		#log clonal frequencies		
		'''
		if iter >= 0:		
			wts, nodes = tssb.get_mixture()
			for node in nodes:
				data = node.get_data()
				for datum in data:
					if datum.name in freq:
						freq[datum.name].append(float(round(node.params,5)))
		'''
	tree_writer.close()

	#save the best tree
	print_top_trees(fout,out2,top_k)

	#save clonal frequencies
	glist = array(freq.keys(),str);glist.shape=(1,len(glist)) 
	savetxt(out3,vstack((glist, array([freq[g] for g in freq.keys()]).T)),fmt='%s',delimiter=', ')
Beispiel #6
0
def run(fin='data.txt',
        fout='./best',
        out2='top_k_trees',
        out3='clonal_frequencies',
        out4='llh_trace',
        num_samples=2500,
        mh_itr=5000,
        rand_seed=1):
    if not os.path.exists(fout):
        os.makedirs(fout)
    else:
        call(['rm', '-r', fout])
        os.makedirs(fout)

    seed(rand_seed)
    codes = load_data(fin)
    NTPS = len(codes[0].a)  # number of samples / time points
    glist = [datum.name for datum in codes]

    root = alleles(conc=0.1, ntps=NTPS)
    tssb = TSSB(dp_alpha=dp_alpha,
                dp_gamma=dp_gamma,
                alpha_decay=alpha_decay,
                root_node=root,
                data=codes)

    dp_alpha_traces = zeros((num_samples, 1))
    dp_gamma_traces = zeros((num_samples, 1))
    alpha_decay_traces = zeros((num_samples, 1))
    conc_traces = zeros((num_samples, 1))
    cd_llh_traces = zeros((num_samples, 1))

    intervals = zeros((7))
    best_tssb = 0

    # clonal frequencies
    freq = dict([(g, []) for g in glist])

    print "Starting MCMC run..."
    for iter in range(-burnin, num_samples):

        times = [time.time()]

        tssb.resample_assignments()
        times.append(time.time())

        tssb.cull_tree()
        times.append(time.time())

        # assign node ids
        wts, nodes = tssb.get_mixture()
        for i, node in enumerate(nodes):
            node.id = i

        mh_acc = metropolis(tssb, mh_itr, mh_std, mh_burnin, NTPS, fin)
        times.append(time.time())

        #root.resample_hypers()
        times.append(time.time())

        tssb.resample_sticks()
        times.append(time.time())

        tssb.resample_stick_orders()
        times.append(time.time())

        if iter >= 0:
            tssb.resample_hypers(dp_alpha=True,
                                 alpha_decay=True,
                                 dp_gamma=True)
        times.append(time.time())

        intervals = intervals + diff(array(times))

        if iter >= 0:
            dp_alpha_traces[iter] = tssb.dp_alpha
            dp_gamma_traces[iter] = tssb.dp_gamma
            alpha_decay_traces[iter] = tssb.alpha_decay
            conc_traces[iter] = root.conc()
            cd_llh_traces[iter] = tssb.complete_data_log_likelihood()

        if iter >= 0:
            if True or mod(iter, 10) == 0:
                (weights, nodes) = tssb.get_mixture()
                print iter, len(nodes), cd_llh_traces[
                    iter], mh_acc, tssb.dp_alpha, tssb.dp_gamma, tssb.alpha_decay  #, " ".join(map(lambda x: "%0.2f" % x, intervals.tolist()))
                intervals = zeros((7))

        if iter >= 0 and argmax(cd_llh_traces[:iter + 1]) == iter:
            print "\t%f is best per-data complete data likelihood so far." % (
                cd_llh_traces[iter])

        # save all trees
        if iter >= 0:
            fh = open(fout + '/' + str((cd_llh_traces[iter])[0]), 'w')
            cPickle.dump(tssb, fh)
            fh.close()

        wts, nodes = tssb.get_mixture()
        #Save log likelihood:savetxt('loglike',cd_llh_traces)
        savetxt(out4, cd_llh_traces)

        #log clonal frequencies
        if iter >= 0:
            wts, nodes = tssb.get_mixture()
            for node in nodes:
                data = node.get_data()
                for datum in data:
                    for tp in arange(NTPS):
                        freq[datum.name].append(
                            float(round(node.params[tp], 5)))

    #save the best tree
    print_top_trees(fout, out2, top_k)

    #save clonal frequencies
    glist = array(freq.keys(), str)
    glist.shape = (1, len(glist))
    savetxt(out3,
            vstack((glist, array([freq[g] for g in freq.keys()]).T)),
            fmt='%s',
            delimiter=', ')
Beispiel #7
0
def start_new_run(state_manager, backup_manager, safe_to_exit, run_succeeded, ssm_file, cnv_file, top_k_trees_file, clonal_freqs_file, burnin_samples, num_samples, mh_itr, mh_std, write_backups_every, rand_seed):
	state = {}

	with open('random_seed.txt', 'w') as seedf:
		seedf.write('%s\n' % rand_seed)
	try:
		rand_seed = int(rand_seed)
		state['rand_seed'] = rand_seed
		seed(state['rand_seed'])
	except TypeError:
		# If rand_seed is not provided as command-line arg, it will be None,
		# meaning it will hit this code path. Explicitly avoid calling seed(None)
		# -- though this is currently the equivalent of calling seed() in that it
		# seeds the PRNG with /dev/urandom, the semantics of seed(None) might
		# change in later NumPy versions to always seed to the same state.
		state['rand_seed'] = rand_seed
		seed()

	state['ssm_file'] = ssm_file
	state['cnv_file'] = cnv_file
	state['top_k_trees_file'] = top_k_trees_file
	state['clonal_freqs_file'] = clonal_freqs_file
	state['write_backups_every'] = write_backups_every

	codes, n_ssms, n_cnvs = load_data(state['ssm_file'], state['cnv_file'])
	if len(codes) == 0:
		logmsg('No SSMs or CNVs provided. Exiting.', sys.stderr)
		return
	NTPS = len(codes[0].a) # number of samples / time point
	state['glist'] = [datum.name for datum in codes if len(datum.name)>0]

	# MCMC settings
	state['burnin'] = burnin_samples
	state['num_samples'] = num_samples
	state['dp_alpha'] = 25.0
	state['dp_gamma'] = 1.0
	state['alpha_decay'] = 0.25
	state['top_k'] = 5

	# Metropolis-Hastings settings
	state['mh_burnin'] = 0
	state['mh_itr'] = mh_itr # No. of iterations in metropolis-hastings
	state['mh_std'] = mh_std

	state['cd_llh_traces'] = zeros((state['num_samples'], 1))
	state['burnin_cd_llh_traces'] = zeros((state['burnin'], 1))
	state['working_directory'] = os.getcwd()

	root = alleles(conc=0.1, ntps=NTPS)
	state['tssb'] = TSSB(dp_alpha=state['dp_alpha'], dp_gamma=state['dp_gamma'], alpha_decay=state['alpha_decay'], root_node=root, data=codes)
	# hack...
	if 1:
		depth=0
		state['tssb'].root['sticks'] = vstack([ state['tssb'].root['sticks'], boundbeta(1, state['tssb'].dp_gamma) if depth!=0 else .999])
		state['tssb'].root['children'].append({ 'node': state['tssb'].root['node'].spawn(),
					'main':boundbeta(1.0, (state['tssb'].alpha_decay**(depth+1))*state['tssb'].dp_alpha) if state['tssb'].min_depth <= (depth+1) else 0.0, 
					'sticks' : empty((0,1)),	
					'children' : [] })
		new_node = state['tssb'].root['children'][0]['node']
		for n in range(state['tssb'].num_data):
			state['tssb'].assignments[n].remove_datum(n)
			new_node.add_datum(n)
			state['tssb'].assignments[n] = new_node
	
	for datum in codes:
		datum.tssb = state['tssb']
	
	tree_writer = TreeWriter()
	state_manager.write_initial_state(state)
	logmsg("Starting MCMC run...")
	state['last_iteration'] = -state['burnin'] - 1

	do_mcmc(state_manager, backup_manager, safe_to_exit, run_succeeded, state, tree_writer, codes, n_ssms, n_cnvs, NTPS)