Python MCMC_utilsの例、MCMC_utils Pythonの例

コード例 #1

0

ファイルを表示

ファイル: plottingutils.py プロジェクト: irelandb/sortseq

def loadcond(infoname,barcodefn,datafnbase):
    infodict = readinfo(infoname)
    numbins = 4
    
    condbase = int(infodict['condbase'])
    condident = infodict['condident']
    
    mut_region_start = int(infodict['mut_region_start'])
    mut_region_length = int(infodict['mut_region_length'])
    expname = infodict['exp_name']
    fnnames = glob.glob(datafnbase + expname + '*.fasta')
    fnnames.sort()

    barcode_dict = {}
    reverse_dict = {}
    csvfile = open(barcodefn,'r')
    reader = csv.DictReader(csvfile)
    for row in reader:
        barcode_dict[row['experiment_name']] = row['fwd_barcode']
        reverse_dict[row['experiment_name']] = row['rev_barcode']

    sequences = [readuniqueseqssingleend.collatedmat(fn) for fn in fnnames]
    seq_start = len(barcode_dict[expname])
    seq_end = sequences[0][0].find(reverse_dict[expname][0:5])
    print 'sequences loaded'
    print len(sequences[0])
    for i in range(0,numbins):
        sequences[i] = [sequences[i][z][seq_start:seq_end] for z in range(0,len(sequences[i]))]

    batch_vec_temp = []
    seqs = []
    for i in range(0,numbins):
        tempseqs = list(set(sequences[i]))
        seqs = seqs + [tempseqs[z][mut_region_start:mut_region_start + mut_region_length] for z in range(0,len(tempseqs)) if tempseqs[z][condbase] == condident]
        batch_vec_temp = batch_vec_temp + [i for z in range(0,len(tempseqs)) if tempseqs[z][condbase] == condident]

    batch_vec_temp = np.array(batch_vec_temp)

    #batch_vec_temp = [batch_vec_temp[i] for i in range(0,len(seqs)) if seqs[i].count('A') > 2 and len(seqs[i]) == mut_region_length]
    print len(batch_vec_temp)
    #seqs = [seqs[i] for i in range(0,len(seqs)) if seqs[i].count('A') > 2 and len(seqs[i]) == mut_region_length]
    print len(seqs)

    seq_mat_temp = np.empty([4,len(seqs[1]),len(seqs)])

    if condbase >= mut_region_start and condbase < mut_region_start + mut_region_length:
	for i, line in enumerate(seqs):
    		seq_mat_temp[:,:,i] = MCMC_utils.seq2mat(line)
    		seq_mat_temp[:,condbase-mut_region_start,i] = MCMC_utils.seq2mat(np.random.choice(['A','C','G','T'])).transpose()
    else:
	for i, line in enumerate(seqs):
    		seq_mat_temp[:,:,i] = MCMC_utils.seq2mat(line)
    return seq_mat_temp, batch_vec_temp

コード例 #2

0

ファイルを表示

ファイル: plottingutils.py プロジェクト: irelandb/sortseq

def loadseqsuniquemut(infoname,barcodefn,datafnbase):
    infodict = readinfo(infoname)
    numbins = 4
    
    mut_region_start = int(infodict['mut_region_start'])
    mut_region_length = int(infodict['mut_region_length'])
    expname = infodict['exp_name']
    fnnames = glob.glob(datafnbase + expname + '*.fasta')
    fnnames.sort()

    barcode_dict = {}
    reverse_dict = {}
    csvfile = open(barcodefn,'r')
    reader = csv.DictReader(csvfile)
    for row in reader:
        barcode_dict[row['experiment_name']] = row['fwd_barcode']
        reverse_dict[row['experiment_name']] = row['rev_barcode']

    sequences = [readuniqueseqssingleend.collatedmat(fn) for fn in fnnames]
    seq_start = len(barcode_dict[expname])
    seq_end = sequences[0][0].find(reverse_dict[expname][0:5])
    print 'sequences loaded'
    print len(sequences[0])
    for i in range(0,numbins):
        sequences[i] = [sequences[i][z][seq_start:seq_end] for z in range(0,len(sequences[i]))]

    batch_vec_temp = []
    seqs = []
    for i in range(0,numbins):
        #tempseqs = list(set(sequences[i])) I instead added this to the next line to make sure only sequences with unique mutated regions are counted.
        tempseqs = sequences[i]
        s2 = list(set([tempseqs[z][mut_region_start:mut_region_start + mut_region_length] for z in range(0,len(tempseqs))]))
        seqs = seqs + s2
        batch_vec_temp = batch_vec_temp + [i for z in range(len(s2))]

    batch_vec_temp = np.array(batch_vec_temp)

    #batch_vec_temp = [batch_vec_temp[i] for i in range(0,len(seqs)) if seqs[i].count('A') > 2 and len(seqs[i]) == mut_region_length]
    print len(batch_vec_temp)
    #seqs = [seqs[i] for i in range(0,len(seqs)) if seqs[i].count('A') > 2 and len(seqs[i]) == mut_region_length]
    print len(seqs)

    seq_mat_temp = np.empty([4,len(seqs[1]),len(seqs)])

    for i, line in enumerate(seqs):
        seq_mat_temp[:,:,i] = MCMC_utils.seq2mat(line)
    return seq_mat_temp, batch_vec_temp

コード例 #3

0

ファイルを表示

ファイル: infofootprint.py プロジェクト: irelandb/sortseq

    tempseqs = list(set(sequences[i]))
    seqs = seqs + [tempseqs[z][mut_region_start:mut_region_start + mut_region_length] for z in range(0,len(tempseqs))]
    batch_vec_temp = batch_vec_temp + [i for z in range(0,len(tempseqs))]

batch_vec_temp = np.array(batch_vec_temp)

#batch_vec_temp = [batch_vec_temp[i] for i in range(0,len(seqs)) if seqs[i].count('A') > 2 and len(seqs[i]) == mut_region_length]
print len(batch_vec_temp)
#seqs = [seqs[i] for i in range(0,len(seqs)) if seqs[i].count('A') > 2 and len(seqs[i]) == mut_region_length]
print len(seqs)

seq_mat_temp = np.empty([4,len(seqs[1]),len(seqs)])


for i, line in enumerate(seqs):
    seq_mat_temp[:,:,i] = MCMC_utils.seq2mat(line)
ifoot = np.zeros([len(seq_mat_temp[0,:,0])])    
ifootrenorm = np.zeros([len(seq_mat_temp[0,:,0])])
pbatch = np.array([float(batch_vec_temp.tolist().count(i))/float(len(batch_vec_temp)) for i in range(0,numbins)])
wtseq = ''
seqdict = {0:'A', 1:'C', 2:'G', 3:'T'}
Anumseq2 = [[] for i in range(0,len(seq_mat_temp[0,:,0]))]
Cnumseq2 = [[] for i in range(0,len(seq_mat_temp[0,:,0]))]
Gnumseq2 = [[] for i in range(0,len(seq_mat_temp[0,:,0]))]
Tnumseq2 = [[] for i in range(0,len(seq_mat_temp[0,:,0]))]
for z in range(0,len(seq_mat_temp[0,:,0])):
#for z in range(0,21):
    Apos = np.nonzero(seq_mat_temp[0,z,:])[0]
    Cpos = np.nonzero(seq_mat_temp[1,z,:])[0]
    Gpos = np.nonzero(seq_mat_temp[2,z,:])[0]
    Tpos = np.nonzero(seq_mat_temp[3,z,:])[0]

コード例 #4

0

ファイルを表示

ファイル: generic_energy_matrix_nonunique.py プロジェクト: irelandb/sortseq

    tempseqs = list(set(sequences[i]))
    seqs = seqs + [tempseqs[z][mut_region_start:mut_region_start + mut_region_length] for z in range(0,len(tempseqs))]
    batch_vec_temp = batch_vec_temp + [i for z in range(0,len(tempseqs))]

batch_vec_temp = np.array(batch_vec_temp)


print len(batch_vec_temp)

print len(seqs)

seq_mat_temp = np.empty([4,len(seqs[1]),len(seqs)])


for i, line in enumerate(seqs):
    seq_mat_temp[:,:,i] = MCMC_utils.seq2mat(line)

#initial energy matrix
emat_0 = MCMC_utils.fix_matrix_gauge(sp.randn(4,mut_region_length))




# shuffle the elements of seq_mat and batch_vec. This will prevent
# spuriously high mutual information values
print len
index_shuf = range(len(batch_vec_temp))
sp.random.shuffle(index_shuf)
seq_mat = sp.zeros([4,len(seq_mat_temp[0,:,0]),len(seq_mat_temp[0,0,:])],dtype = 'int')
batch_vec = sp.zeros_like(batch_vec_temp)
for i, i_s in enumerate(index_shuf):

コード例 #5

0

ファイルを表示

ファイル: nonbatchinfo.py プロジェクト: irelandb/sortseq

import MCMC_utils
import scipy as sp
import pymc
import scipy.ndimage
import ConfigParser
import os
import numpy as np
import matplotlib.pyplot as plt
numbins = 4
config = ConfigParser.RawConfigParser()
config.read('/home/bill/Documents/energymatrix/danieltest/runs_analysis/oldtest35_full-cAMP/35_full-cAMP.cfg')

mut_region_start = config.getint('Input','mut_region_start')
mut_region_length = config.getint('Input','mut_region_length')
data_fn = config.get('Input','data_fn')
seq_mat_temp, batch_vec_temp = MCMC_utils.load_unique_seqs_batches(data_fn,mut_region_start,mut_region_length)

ifoot = np.zeros([len(seq_mat_temp[0,:,0])])
ifootrenorm = np.zeros([len(seq_mat_temp[0,:,0])])  
pbatch = np.array([float(batch_vec_temp.tolist().count(i))/float(len(batch_vec_temp)) for i in range(0,numbins)])
wtseq = ''
seqdict = {0:'A', 1:'C', 2:'G', 3:'T'}
Anumseq2 = [[] for i in range(0,len(seq_mat_temp[0,:,0]))]
Cnumseq2 = [[] for i in range(0,len(seq_mat_temp[0,:,0]))]
Gnumseq2 = [[] for i in range(0,len(seq_mat_temp[0,:,0]))]
Tnumseq2 = [[] for i in range(0,len(seq_mat_temp[0,:,0]))]
mutrate = np.zeros(mut_region_length)
for z in range(0,len(seq_mat_temp[0,:,0])):
#for z in range(0,21):
    Apos = np.nonzero(seq_mat_temp[0,z,:])[0]
    Cpos = np.nonzero(seq_mat_temp[1,z,:])[0]

コード例 #6

0

ファイルを表示

ファイル: generic_energy_matrix_mscsvariedbin.py プロジェクト: irelandb/sortseq

    s2 = [s2[z] for z in range(len(s2)) if len(s2[z]) == mut_region_length]
    seqs = seqs + s2
    batch_vec_temp = batch_vec_temp + [i for z in range(len(s2))]

batch_vec_temp = np.array(batch_vec_temp)


print len(batch_vec_temp)

print len(seqs)

seq_mat_temp = np.empty([4,len(seqs[1]),len(seqs)])


for i, line in enumerate(seqs):
    seq_mat_temp[:,:,i] = MCMC_utils.seq2mat(line)

#initial energy matrix
emat_0 = MCMC_utils.fix_matrix_gauge(sp.randn(4,mut_region_length))




# shuffle the elements of seq_mat and batch_vec. This will prevent
# spuriously high mutual information values
print len
index_shuf = range(len(batch_vec_temp))
sp.random.shuffle(index_shuf)
seq_mat = sp.zeros([4,len(seq_mat_temp[0,:,0]),len(seq_mat_temp[0,0,:])],dtype = 'int')
batch_vec = sp.zeros_like(batch_vec_temp)
for i, i_s in enumerate(index_shuf):

コード例 #7

0

ファイルを表示

        os.makedirs(outputdir)
    except:
        print 'already done'
        continue
    burn_in = 1000
    db = pymc.database.sqlite.load(fn)
    # emat_mean = db.emat.stats()['mean']
    try:
        emat_mean = sp.mean(db.trace('emat')[burn_in:], axis=0)

        # change the sign of emat_mean if necessary. We want a negative
        # correlation between energy and batch number (because the higher
        # the energy, the lower the expression)
        if 'old' in fnname:
            seq_mat, batch_vec = MCMC_utils.load_unique_seqs_batches(
                olddatafn, 18 + int(info_dict['mut_region_start']),
                int(info_dict['mut_region_length']))
        else:
            try:
                unique = info_dict['unique']
                print 'is unique'
                seq_mat, batch_vec = plottingutils.loadseqsuniquemut(
                    infofn[namedict[fn]], barcodefn, datafnbase)
            except:
                try:
                    cond = info_dict['condbase']
                    print 'is conditional'
                    seq_mat, batch_vec = plottingutils.loadcond(
                        infofn[namedict[fn]], barcodefn, datafnbase)
                except:
                    print 'is not unique'

コード例 #8

0

ファイルを表示

ファイル: nonbatchinfo.py プロジェクト: irelandb/sortseq

import pymc
import scipy.ndimage
import ConfigParser
import os
import numpy as np
import matplotlib.pyplot as plt
numbins = 4
config = ConfigParser.RawConfigParser()
config.read(
    '/home/bill/Documents/energymatrix/danieltest/runs_analysis/oldtest35_full-cAMP/35_full-cAMP.cfg'
)

mut_region_start = config.getint('Input', 'mut_region_start')
mut_region_length = config.getint('Input', 'mut_region_length')
data_fn = config.get('Input', 'data_fn')
seq_mat_temp, batch_vec_temp = MCMC_utils.load_unique_seqs_batches(
    data_fn, mut_region_start, mut_region_length)

ifoot = np.zeros([len(seq_mat_temp[0, :, 0])])
ifootrenorm = np.zeros([len(seq_mat_temp[0, :, 0])])
pbatch = np.array([
    float(batch_vec_temp.tolist().count(i)) / float(len(batch_vec_temp))
    for i in range(0, numbins)
])
wtseq = ''
seqdict = {0: 'A', 1: 'C', 2: 'G', 3: 'T'}
Anumseq2 = [[] for i in range(0, len(seq_mat_temp[0, :, 0]))]
Cnumseq2 = [[] for i in range(0, len(seq_mat_temp[0, :, 0]))]
Gnumseq2 = [[] for i in range(0, len(seq_mat_temp[0, :, 0]))]
Tnumseq2 = [[] for i in range(0, len(seq_mat_temp[0, :, 0]))]
mutrate = np.zeros(mut_region_length)
for z in range(0, len(seq_mat_temp[0, :, 0])):

コード例 #9

0

ファイルを表示

batchvec2 = [2 for i in range(0, len(a2))]
batchvec3 = [3 for i in range(0, len(a3))]
batch_vec_temp = batchvec0 + batchvec1 + batchvec2 + batchvec3

seqs = a + a1 + a2 + a3

#batch_vec_temp = [batch_vec_temp[i] for i in range(0,len(seqs)) if seqs[i].count('A') > 2 and len(seqs[i]) == mut_region_length]
print len(batch_vec_temp)
#seqs = [seqs[i] for i in range(0,len(seqs)) if seqs[i].count('A') > 2 and len(seqs[i]) == mut_region_length]
print len(seqs)

seq_mat_temp = np.empty([4, len(seqs[1]), len(seqs)])
seq_mat_temp2 = np.zeros([16, len(seqs[1]) - 1, len(seqs)])
seq_dict = {'A': 0, 'C': 1, 'G': 2, 'T': 3}
for i, line in enumerate(seqs):
    seq_mat_temp[:, :, i] = MCMC_utils.seq2mat(line)
    for q in range(0, len(line) - 1):
        pos = seq_dict[line[q]] * 4 + seq_dict[line[np.mod(q + pairnum, 20)]]
        seq_mat_temp2[pos, q, i] = 1

# Run matrix on only section of data

# shuffle the elements of seq_mat and batch_vec. This will prevent
# spuriously high mutual information values
print len
index_shuf = range(len(batch_vec_temp))
sp.random.shuffle(index_shuf)
seq_mat2 = sp.zeros(
    [16, len(seq_mat_temp2[0, :, 0]),
     len(seq_mat_temp2[0, 0, :])],
    dtype='int')

コード例 #10

0

ファイルを表示

ファイル: plot_MCMC_results.py プロジェクト: irelandb/sortseq

    # load the database and save and plot the mean value of the emat
    burn_in = 1000
    db = pymc.database.sqlite.load(fn)
    # emat_mean = db.emat.stats()['mean']
    emat_mean = sp.mean(db.trace('emat')[burn_in:],axis=0)
    
    # change the sign of emat_mean if necessary. We want a negative
    # correlation between energy and batch number (because the higher
    # the energy, the lower the expression)
    seq_mat, batch_vec = MCMC_utils_mscs.load_unique_seqs_batches(data_fnbase,4,seq_start,seq_end,mut_region_start,mut_region_length)
    energies = sp.zeros(len(batch_vec))
    for i in range(len(batch_vec)):
        energies[i] = sp.sum(seq_mat[:,:,i]*emat_mean)
    r = scipy.stats.pearsonr(energies,batch_vec)[0]
    if r>0:
        emat_mean = MCMC_utils.fix_matrix_gauge(-emat_mean)
    else:
        emat_mean = MCMC_utils.fix_matrix_gauge(emat_mean)
    sp.savetxt(os.path.join(output_dir,run_name+'_emat_mean.txt'),emat_mean)

    # compute mutual information and joint pdf
    MI,f_reg = MCMC_utils.compute_MI(seq_mat,batch_vec,emat_mean)
    MI_f = open(os.path.join(output_dir,run_name+'_MI.txt'),'w')
    MI_f.write(str(MI))

    print MI

    # make heat map of mean matrix
    plt.clf()
    plt.imshow(MCMC_utils.zero_matrix(emat_mean),interpolation='nearest')
    plt.title('Energy Matrix, ' + run_name + ', MI: %.5f' % MI)

コード例 #11

0

ファイルを表示

ファイル: makeplots.py プロジェクト: irelandb/sortseq

 try:
     os.makedirs(outputdir)
 except:
     print 'already done'
     continue
 burn_in = 1000
 db = pymc.database.sqlite.load(fn)
 # emat_mean = db.emat.stats()['mean']
 try:
     emat_mean = sp.mean(db.trace('emat')[burn_in:],axis=0)
 
     # change the sign of emat_mean if necessary. We want a negative
     # correlation between energy and batch number (because the higher
     # the energy, the lower the expression)
     if 'old' in fnname:
         seq_mat,batch_vec = MCMC_utils.load_unique_seqs_batches(olddatafn,18 + int(info_dict['mut_region_start']),int(info_dict['mut_region_length']))
     else:
         try:
             unique = info_dict['unique']
             print 'is unique'
             seq_mat, batch_vec = plottingutils.loadseqsuniquemut(infofn[namedict[fn]],barcodefn,datafnbase)
         except:
             try:
                 cond = info_dict['condbase']
                 print 'is conditional'
                 seq_mat,batch_vec = plottingutils.loadcond(infofn[namedict[fn]],barcodefn,datafnbase)
             except:
                     print 'is not unique'
                     seq_mat, batch_vec = plottingutils.loadseqsuniquemut(infofn[namedict[fn]],barcodefn,datafnbase)
       
     energies = sp.zeros(len(batch_vec))

コード例 #12

0

ファイルを表示

# file with optimized matrix
for i, fn in enumerate(resultsfn):
    #info_dict = plottingutils.readinfo(infofn[i])
    #fnname = fn.split('/results/')[1].split('.sql')[0]
    # load the matrix, set each column to zero
    #outputdir = os.path.join(masteroutputdir,fnname)
    #try:
    #    os.makedirs(outputdir)
    #except:
    #    print 'already done'
    #    continue
    burn_in = 1000
    db = pymc.database.sqlite.load(fn)
    # emat_mean = db.emat.stats()['mean']
    if len(db.trace('emat')[:]) == 3000:
        ematchain = db.trace('emat')[burn_in:]
        emeans[i,:,:] = MCMC_utils.fix_matrix_gauge(np.mean(ematchain,axis=0))
        if emeans[i,0,0] > 0:
            emeans[i,:,:] = emeans[i,:,:]*-1
        #emeans[i,:,:] = emeans[i,:,:] - emeans[i,:,:].min(axis=0)
        evis[i,:] = emeans[i,:,:].ravel(order='F')
        estdMCMC[i,:,:] = np.std(ematchain,axis=0)
'''        
std1 = np.std(emeans[:len(resultsfn)/2,:,:],axis=0)
emean1 = np.mean(emeans[:len(resultsfn)/2,:,:],axis=0)
std2 = np.std(emeans[len(resultsfn)/2:,:,:],axis=0)
emean2 = np.mean(emeans[len(resultsfn)/2:,:,:],axis=0)
convarr = abs(emean1-emean2) / np.sqrt(std1**2 + std2**2)
'''

コード例 #13

0

ファイルを表示

ファイル: conditionalerror.py プロジェクト: irelandb/sortseq

# -*- coding: utf-8 -*-
"""
Created on Sun Apr 26 22:31:36 2015

@author: bill
"""
import MCMC_utils

import numpy as np
emat40A = np.genfromtxt('/home/bill/Documents/energymatrix/mscS4815/results/analyzeddata/conditional40A_40/conditional40A_40_emat_mean.txt')
emat40T = np.genfromtxt('/home/bill/Documents/energymatrix/mscS4815/results/analyzeddata/conditional40T_40/conditional40T_40_emat_mean.txt')

wtseq = 'TTATTGTTTACCCTTGTCAG'

wtmat = MCMC_utils.seq2mat(wtseq)

emat40Anorm = emat40A - (wtmat*emat40A).max(axis=0)
emat40Tnorm = emat40T - (wtmat*emat40T).max(axis=0)

コード例 #14

0

ファイルを表示

    db = pymc.database.sqlite.load(fn)
    # emat_mean = db.emat.stats()['mean']
    emat_mean = sp.mean(db.trace('emat')[burn_in:], axis=0)

    # change the sign of emat_mean if necessary. We want a negative
    # correlation between energy and batch number (because the higher
    # the energy, the lower the expression)
    seq_mat, batch_vec = MCMC_utils_mscs.load_unique_seqs_batches(
        data_fnbase, 4, seq_start, seq_end, mut_region_start,
        mut_region_length)
    energies = sp.zeros(len(batch_vec))
    for i in range(len(batch_vec)):
        energies[i] = sp.sum(seq_mat[:, :, i] * emat_mean)
    r = scipy.stats.pearsonr(energies, batch_vec)[0]
    if r > 0:
        emat_mean = MCMC_utils.fix_matrix_gauge(-emat_mean)
    else:
        emat_mean = MCMC_utils.fix_matrix_gauge(emat_mean)
    sp.savetxt(os.path.join(output_dir, run_name + '_emat_mean.txt'),
               emat_mean)

    # compute mutual information and joint pdf
    MI, f_reg = MCMC_utils.compute_MI(seq_mat, batch_vec, emat_mean)
    MI_f = open(os.path.join(output_dir, run_name + '_MI.txt'), 'w')
    MI_f.write(str(MI))

    print MI

    # make heat map of mean matrix
    plt.clf()
    plt.imshow(MCMC_utils.zero_matrix(emat_mean), interpolation='nearest')

コード例 #15

0

ファイルを表示

#fnnames.sort()

#this section determines from the barcode file where the mutated region starts for this oligo
'''
barcodefn = config.get('Input','barcodefn')

barcode_dict = {}
reverse_dict = {}
csvfile = open(barcodefn,'r')
reader = csv.DictReader(csvfile)
for row in reader:
    barcode_dict[row['experiment_name']] = row['fwd_barcode']
    reverse_dict[row['experiment_name']] = row['rev_barcode']
'''

seq_mat_temp, batch_vec_temp = MCMC_utils.load_unique_seqs_batches(
    data_fn, seq_start + mut_region_start, mut_region_length)

emat_0 = MCMC_utils.fix_matrix_gauge(sp.randn(4, mut_region_length))

# shuffle the elements of seq_mat and batch_vec. This will prevent
# spuriously high mutual information values

index_shuf = range(len(batch_vec_temp))
sp.random.shuffle(index_shuf)
seq_mat = sp.zeros(
    [4, len(seq_mat_temp[0, :, 0]),
     len(seq_mat_temp[0, 0, :])], dtype='int')
batch_vec = sp.zeros_like(batch_vec_temp)
for i, i_s in enumerate(index_shuf):
    seq_mat[:, :, i] = seq_mat_temp[:, :, i_s]
    batch_vec[i] = batch_vec_temp[i_s]

コード例 #16

0

ファイルを表示

ファイル: generic_energy_matrix_oldmscL.py プロジェクト: irelandb/sortseq

#fnnames.sort()

#this section determines from the barcode file where the mutated region starts for this oligo
'''
barcodefn = config.get('Input','barcodefn')

barcode_dict = {}
reverse_dict = {}
csvfile = open(barcodefn,'r')
reader = csv.DictReader(csvfile)
for row in reader:
    barcode_dict[row['experiment_name']] = row['fwd_barcode']
    reverse_dict[row['experiment_name']] = row['rev_barcode']
'''

seq_mat_temp,batch_vec_temp = MCMC_utils.load_unique_seqs_batches(data_fn,seq_start + mut_region_start,mut_region_length)


emat_0 = MCMC_utils.fix_matrix_gauge(sp.randn(4,mut_region_length))




# shuffle the elements of seq_mat and batch_vec. This will prevent
# spuriously high mutual information values

index_shuf = range(len(batch_vec_temp))
sp.random.shuffle(index_shuf)
seq_mat = sp.zeros([4,len(seq_mat_temp[0,:,0]),len(seq_mat_temp[0,0,:])],dtype = 'int')
batch_vec = sp.zeros_like(batch_vec_temp)
for i, i_s in enumerate(index_shuf):