def parallelindiv(path2020, savstatdir, datafile, outnum, reslall, resnall, DeltaOmegaParametersBoundaries, namin, namoutfrag, fittype, k121, k122, k131, k132, k23min, k23max, errruns): processes = [] pn = 0 outnum = 1 numrep = len(reslall) for outnum in np.arange(1): pn0 = list([pn])[0] for runnum in np.arange(numrep): print 'run ', str(outnum * numrep + runnum) #p=parallelfit(setparameters,runnum) namout = namoutfrag + str(runnum) + '_' #produceindivplots(path2020,savstatdir,'FINAL_stage4_',[reslall[runnum]],[resnall[runnum]],[DeltaOmegaParametersBoundaries[runnum]],namout,5,1,2) p = multiprocessing.Process( target=produceindivplots, args=(path2020, savstatdir, datafile, namin, [reslall[runnum]], [resnall[runnum] ], [DeltaOmegaParametersBoundaries[runnum] ], namout, fittype, k121, k122, k131, k132, k23min, k23max, errruns)) processes.append(p) processes[pn].start() pn += 1 pn = list([pn0])[0] for runnum in np.arange(numrep): processes[pn].join() pn += 1 print 'really done with ', str(outnum * numrep + numrep), 'runs'
def parammake(PropAxesColl,parbdslistb,parbdslista,k12min,k12max,k13min,k13max,k23min,k23max,pmin,pmax): """create initial parameters and boundaries as needed (requires editing) (requires optimization and simplification in an update""" if setprotoncpmg == 0: R20500TRmin=2 R20500TRmax=11 R20500nTRmin=2 R20500nTRmax=12 R2multmin=1 R2multmax=3 else: R20500TRmin=2 R20500TRmax=40 R20500nTRmin=2 R20500nTRmax=40 R2multmin=1 R2multmax=3 paramsxx=[] for u in np.arange(100): paramsx=params() #100,400 k23 # paramsx.addpar('k',['sites','sites2','conc'],listpermut(sitemaker(3,'triang'),PropAxesColl.getnforfit('conc')),parbdslist=[[2000,12000],[2000,12000],[1,3500],[1,3500],[1,3500],[1,3500]])#,bounds=[20,400]) paramsx.addpar('k',['sites','sites2','conc'],listpermut(sitemaker(3,'triang'),PropAxesColl.getnforfit('conc')),parbdslist=[[k12min,k12max],[k12min,k12max],[k13min,k13max],[k13min,k13max],[k23min,k23max],[k23min,k23max]])#,bounds=[20,400]) paramsx.generatemissing('k') paramsx.addpar('p',['sites','conc'],listpermut(PropAxesColl.getnforfit('sites'),PropAxesColl.getnforfit('conc')),parbds=[pmin,pmax],fitactive=flatten([np.product(i) for i in listpermut([0,1,1],PropAxesColl.getactforfit('conc'))]))#,bounds=[20,400]) if parbdslistb != 0: paramsx.addpar('dw',['residues','sites'],listpermut(PropAxesColl.getnforfit('residues'),PropAxesColl.getnforfit('sites')),boundlist=flatten(parbdslista,levels=1),parbdslist=flatten(parbdslistb,levels=1),fitactive=flatten([np.product(i) for i in listpermut(PropAxesColl.getactforfit('residues'),PropAxesColl.getactforfit('conc'),[0,1,1])]))#,bounds=[20,400]) else: paramsx.addpar('dw',['residues','sites'],listpermut(PropAxesColl.getnforfit('residues'),PropAxesColl.getnforfit('sites')),parbdslist=flatten(parbdslista,levels=1),fitactive=flatten([np.product(i) for i in listpermut(PropAxesColl.getactforfit('residues'),PropAxesColl.getactforfit('conc'),[0,1,1])]))#,bounds=[20,400]) paramsx.addpar('dwsign',['residues','sites'],listpermut(PropAxesColl.getnforfit('residues'),PropAxesColl.getnforfit('sites')),parbds=[1],fitactive=flatten([np.product(i) for i in listpermut(PropAxesColl.getactforfit('residues'),[0,1,1])]))#,bounds=[20,400]) paramsx.addpar('R20500',['residues','TR'],listpermut(PropAxesColl.getnforfit('residues'),PropAxesColl.getnforfit('TR')),parbdslist=[[R20500TRmin,R20500TRmax] if i[1] == 0 else [R20500nTRmin,R20500nTRmax] for i in listpermut(PropAxesColl.getnforfit('residues'),PropAxesColl.getnforfit('TR'))]) paramsx.addpar('R2mult',['residues','B1field'],listpermut(PropAxesColl.getnforfit('residues'),PropAxesColl.getnforfit('B1field')),parbds=[R2multmin,R2multmax],fitactive=[1 if i[1] > 0 else 0 for i in listpermut(PropAxesColl.getnforfit('residues'),PropAxesColl.getnforfit('B1field'))]) for p in ['p','k','dw','dwsign','R20500','R2mult']: paramsx.generatemissing(p) for p in ['p','k','dwsign','dw','R20500','R2mult']: paramsx.randomizepar(p,20) for p in ['p','k','dw','R20500','R2mult']: paramsx.generatemissing(p) paramsxx.append(paramsx) prxx=[] for paramsx in paramsxx: prx=paramsx for i in ['p','k']: for j in np.arange(len(paramsx[i])/2): for which in ['par','bounds','parbds']: # print i,j*2,which,1+j*2 prx[i][int(1+j*2)][which]=paramsx[i][int(j*2)][which] prxx.append(prx) paramsxx=prxx return paramsxx
def setonepar(self,pname,datlist,**kwargs): #print self[pname] inactivecount=0 for j,i in enumerate(self[pname]): if self[pname][j]['fitactive'] == 1: if pname == 'dw': self[pname][j]['par']=[datlist[j-inactivecount]]#(self['dwsign'][j]['par']*np.random.random(numb))*(bdscoll[1]-bdscoll[0])+bdscoll[0]*self['dwsign'][j]['par'] self['dwsign'][j]['par']=0 else: self[pname][j]['par']=[datlist[j-inactivecount]] if len(np.shape(self[pname][j]['bounds'])) == 1 or len(self[pname][j]['bounds']) != len(self[pname][j]['par']): bndsx=self[pname][j]['bounds'] if len(np.shape(self[pname][j]['bounds'])) == 1 else self[pname][j]['bounds'][0] self[pname][j]['bounds']=[bndsx for i in np.arange(len(self[pname][j]['par']))] if 'bounds' in kwargs.keys(): self[pname][j]['bounds']=kwargs['bounds'] if 'bndtol' in kwargs.keys(): self[pname][j]['bndtol']=kwargs['bndtol'] if 'resetbounds' in kwargs.keys(): self[pname][j]['bounds']=[] # print self[pname][j]['bounds'], 'ye now1',self[pname][j]['parbds'] self.generatemissing(pname) # print self[pname][j]['bounds'], 'ye now2',self[pname][j]['parbds'] if 'resetparbounds' in kwargs.keys(): #resetparbounds self[pname][j]['parbds']=self[pname][j]['bounds'] else: inactivecount+=1
def loadeverything(savstatdir, filenames, fieldnumber, **kwargs): keylist = [key for key, value in kwargs.items()] valuelist = [value for key, value in kwargs.items()] if "progcycle" in keylist: pp = valuelist[keylist.index("progcycle")] else: pp = -1 if "selproc" in keylist: sellist = valuelist[keylist.index("selproc")] else: sellist = np.arange(120) resultcoll = [] resnam = [] poscoll = [] allsetcoll = [] explen = 0 for filename in filenames: for selproc in sellist: try: name, resnamx, poscollx, allsets, cond = loadstatus2( savstatdir, filename + str(selproc) + '_part') resultcoll.append(allsets[pp]) allsetcoll.append(allsets) if len(allsets) > explen: explen = len(allsets) resnam.append(resnamx) poscoll.append(poscollx) except: _ = 0 os.chdir(savstatdir) resultcoll = flatten(resultcoll, levels=1) resnam = flatten(resnam, levels=1) poscoll = flatten(poscoll, levels=1) return poscoll, resnam, allsetcoll, resultcoll, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, cond
def readoutresults(namresultsx): reslall=['A29','A30','A31','A32','A33','A34','A52','A53','A54','A78','A81','A82','A83','A84','A86'] resnall=[29,30,31,32,33,34,52,53,54,78,81,82,83,84,86] freevsfix='free' #'fix_0 pickthis=1 list1=[] for pickthis in np.arange(15): list2=[] for nn in np.arange(3): reslall=['A29','A30','A31','A32','A33','A34','A52','A53','A54','A78','A81','A82','A83','A84','A86'] resnall=[29,30,31,32,33,34,52,53,54,78,81,82,83,84,86] reslall=[reslall[pickthis]] resnall=[resnall[pickthis]] pickthese=[0] praxs1=mainfuncts.praxismake([x for y,x in enumerate(reslall) if y in pickthese],[x for y,x in enumerate(resnall) if y in pickthese]) parbdslistaxallf=[[[-1, 1], [-26000,26000], [-13000, 13000]],\ [[-1, 1], [-26000,26000], [-13000, 13000]],\ [[-1, 1], [-26000,26000], [-13000, 13000]],\ [[-1, 1], [-26000,26000], [-13000, 13000]],\ [[-1, 1], [-26000,26000], [-13000, 13000]],\ [[-1,-1,],[-26000,26000],[-13000, 13000]],\ #out [[-1, 1], [-389.0, -5341], [-13000, 13000]],\ [[-1, 1], [-26000,26000], [-13000, 13000]],\ [[-1, 1], [-26000,26000], [-13000, 13000]],\ [[-1, 1], [-26000,26000], [-13000, 13000]],\ [[-1, 1], [-26000,26000], [-13000, 13000]],\ [[-1, 1], [-26000,26000], [-13000, 13000]],\ [[-1, 1], [-26000,26000], [-13000, 13000]],\ [[-1, 1], [-26000,26000], [-13000, 13000]],\ [[-1, 1], [-26000,26000], [-13000, 13000]],\ [[-1, 1], [-26000,26000], [-13000, 13000]]] parbdslistaxallf=[parbdslistaxallf[pickthis]] paramsxx=mainfuncts.parammake(praxs1,0,[x for y,x in enumerate(parbdslistaxallf) if y in pickthese],1,10000,100,900,1,10000) conditions=[0,[1,3,3,3,3]] #setparameters2=['combo10l.dat','C:\\Users\\Hans\\Desktop\\TRANSFER\\2020Feb\\',[[x for y,x in enumerate(reslall) if y in pickthese]],conditions,namresults] namresults=namresultsx+str(resnall[0])+'_'+str(nn)+'_' poscoll,resnam,allsetcoll,resultcoll,relaxrat0,relaxrat,lookatratio,results,relaxrat1,relaxrat2,relaxrat_1,relaxrat_2,intdiffcorr, intcorr, intmin,ac,oc,rateconstpre,cond=hkio.loadeverything([namresults],0,decoupl=0) costcoll=[i.cost for i in resultcoll] resultcoll=[resultcoll[np.argsort(costcoll)[0]]] paramsxy=mainfuncts.resc2param(praxs1,paramsxx,resultcoll,5) list2.append(costcoll[0]) list1.append(list2) return list1
def getactforfit(self,pr1): actset=[] for i in np.arange(len(self[pr1]['num'])): if 'fitact' in self[pr1]: actset.append(self[pr1][i].fitact[i]) else: actset.append(1) return actset
def parallelmultifit4(path2020,savstatdir,setparameters3,outernum,numrep,paramsxx,PropAxesColl): """parallel fitting engine""" processes=[] pn=0 for outnum in np.arange(outernum): pn0=list([pn])[0] for runnum in np.arange(numrep): print 'run ', str(outnum*numrep+runnum) #p=parallelfit(setparameters,runnum) p=multiprocessing.Process(target=hkfit2.parallelfit3, args=(path2020,savstatdir,setparameters3,outnum*numrep+runnum,paramsxx[outnum*numrep+runnum],PropAxesColl)) processes.append(p) processes[pn].start() pn+=1 pn=list([pn0])[0] for runnum in np.arange(numrep): processes[pn].join() pn+=1 print 'really done with ', str(outnum*numrep+numrep), 'runs'
def printrd(praxs1,x,exp_data,m,err,g,dwbset,par,fl): """This function recalculates the theoretical value and chi square from parameters and experimental data.""" value=[];valuealt=[] chicoll=[] chicoll2=[] a,b1,b2,c,e=par.getallparandbnds(praxs1,['p','k','dw','R20500','R2mult'],inclfilt=[]) par1=a[0] parnocoll=[] for l,i in enumerate(fl): thisset=[j for j in i] parnocoll.append(thisset) paramno=len(set(flatten(parnocoll))) cestchi=[];rdchi=[] xalt=[list(np.arange(np.min(xi),np.max(xi),20)) for xi in x] for l,i in enumerate(fl): a=[par1[j] for j in i] thisset=[j for j in i] val=np.array((flatten([multifunctg2(a,x[l],m[l],dwbset[l],g[l])]))) """ comment in the following line if a higher resolution CPMG curve is required""" if fineres == 1: #print len([m[l][0] for xa in xalt]), len([g[l][0] for xa in xalt]), len(xalt[l]) valalt=np.array((flatten([multifunctg2(a,xalt[l],[m[l][0] for xa in xalt[l]],dwbset[l],[g[l][0] for xa in xalt[l]])]))) valuealt.append(valalt) value.append(val) if g[l][0] > 10: cestchi.append((np.array(exp_data[l])-np.array(val))/(np.array(err[l]))) else: rdchi.append((np.array(exp_data[l])-np.array(val))/(np.array(err[l]))) chicoll2.append((np.array(exp_data[l])-np.array(val))/(np.array(err[l]))) chicoll.append(np.sum(((np.array(exp_data[l])-np.array(val))/\ (np.array(err[l])*np.sqrt(len(exp_data[l])-paramno*\ (len(exp_data[l])/len(flatten(exp_data))))))**2)) paramno=len(set(flatten(parnocoll))) err0=np.array(flatten(exp_data))-np.array(flatten(value)) x1=((len(flatten(cestchi)))/(len(flatten(cestchi))+len(flatten(rdchi)))) x2=np.sum((np.sqrt(2)*np.array(flatten(cestchi))*(1/np.sqrt(len(flatten(exp_data))-paramno)))**2)/2 x3=((len(flatten(rdchi)))/(len(flatten(cestchi))+len(flatten(rdchi)))) x4=np.sum((np.sqrt(2)*np.array(flatten(rdchi))*(1/np.sqrt(len(flatten(exp_data))-paramno)))**2)/2 if fineres == 1: value=valuealt #print value, 'value' return value, chicoll,chicoll2, (x4*x1+x3*x2)*2,(np.array(flatten(err0))/\ (np.array(flatten(err))*np.sqrt(len(flatten(exp_data))-paramno)))
def randomizepar(self,pname,numb): for j,i in enumerate(self[pname]): bdscoll=i['parbds'] #print bdscoll, j, pname if pname == 'dwsign': self[pname][j]['par']=np.random.choice(bdscoll,numb)#np.array([np.random.choice(bdscoll) for i in np.arange(numb)]) elif pname == 'dw': #print np.random.random(numb)*(bdscoll[1]-bdscoll[0])+bdscoll[0], '!!!!', bdscoll[1]-bdscoll[0], bdscoll[0] # print #rnddat=np.random.random(numb)* #print rnddat, self['dwsign'][j]['par'],bdscoll[0] self[pname][j]['par']=(self['dwsign'][j]['par']*np.random.random(numb))*(bdscoll[1]-bdscoll[0])+bdscoll[0]*self['dwsign'][j]['par'] else: self[pname][j]['par']=(np.random.random(numb)*(bdscoll[1]-bdscoll[0])+bdscoll[0]) if len(np.shape(self[pname][j]['bounds'])) == 1 or len(self[pname][j]['bounds']) != len(self[pname][j]['par']): bndsx=self[pname][j]['bounds'] if len(np.shape(self[pname][j]['bounds'])) == 1 else self[pname][j]['bounds'][0] self[pname][j]['bounds']=[bndsx for i in np.arange(len(self[pname][j]['par']))] try: self.generatemissing(pname) except: pass
def produceindivplots(reslallx,resnallx,parbdslistaxallfx,namout,mode,k231,k232): for pickthis in np.arange(len(reslallx)): reslall=[reslallx[pickthis]] resnall=[resnallx[pickthis]] pickthese=[0] #global praxs1 praxs1=mainfuncts.praxismake([x for y,x in enumerate(reslall) if y in pickthese],[x for y,x in enumerate(resnall) if y in pickthese]) parbdslistaxallf=[parbdslistaxallfx[pickthis]] namresults='multix13_k13_ext_0'#######'multindiv_'+str(resnall[0]) paramsxx=mainfuncts.parammake(praxs1,0,[x for y,x in enumerate(parbdslistaxallf) if y in pickthese],1,10000,100,900,k231,k232) conditions=[0,[1,3,3,3,3]] #setparameters2=['combo10l.dat','C:\\Users\\Hans\\Desktop\\TRANSFER\\2020Feb\\',[[x for y,x in enumerate(reslall) if y in pickthese]],conditions,namresults] poscoll,resnam,allsetcoll,resultcoll,relaxrat0,relaxrat,lookatratio,results,relaxrat1,relaxrat2,relaxrat_1,relaxrat_2,intdiffcorr, intcorr, intmin,ac,oc,rateconstpre,cond=hkio.loadeverything([namresults],0,decoupl=0) costcoll=[i.cost for i in resultcoll] resultcoll=[resultcoll[np.argsort(costcoll)[0]]] paramsxy=mainfuncts.resc2param(praxs1,paramsxx,resultcoll,mode) for nn in [0]:#np.arange(3): namresultsOUT=namout+str(resnall[0])+'_'+str(nn)+'_' setparameters3=['combo10l.dat','C:\\Users\\Hans\\Desktop\\TRANSFER\\2020Feb\\',[[x for y,x in enumerate(reslall) if y in pickthese]],conditions,namresultsOUT,0] hkfit2.parallelfit3(praxs1,setparameters3,0,paramsxy[nn])
def resc2param(PropAxesColl,paramsxx,resultcoll,resc2paramtype): """transfers a result to a property/parameter collection using a defined property axis system. simplifies analysis a lot, e.g. easy modification of bounds etc This function can be operated in different modes. Mode 2 is the most frequently used - The parameters but no other conditions of a given parameter set are modified on an input (typically a result from an earlier stage). Mode 1 and 3 are variations of this, however, the boundaries are then locked to a very small range to prevent further calculations. Mode 4 and 5 can be used to transfer residue-independent residue from one calculation with residue set A to another calculation with residue set B. """ whichlist=['p','k','dw','R20500','R2mult'] lengthofoutput = len(resultcoll) if resc2paramtype == 4 or resc2paramtype == 5: whichlist=['p','k']#,'dw'] lengthofoutput = len(paramsxx) for pn in np.arange(lengthofoutput): posit=0 if resc2paramtype == 4 or resc2paramtype == 5: paralist=resultcoll[0].x else: paralist=resultcoll[pn].x for i in whichlist: a,b1,b2,c,e=paramsxx[pn].getallparandbnds(PropAxesColl,[i],inclfilt=[]) posit0=posit posit+=len(a[0]) prlst=paralist[posit0:posit] prlst=[x for x in prlst] bnds=[] if resc2paramtype == 1: bndtl=0.1 if i == 'k': print 'k',prlst if i == 'p': bndtl=0.0001 if i == 'dwf': bndtl=1 #0000 paramsxx[pn].setonepar(i,prlst,bounds=bnds,bndtol=bndtl,resetbounds=1,resetparbounds=1) elif resc2paramtype == 2: #lets parameter bounds untouched, and sets all parameters paramsxx[pn].setonepar(i,prlst,resetparbounds=0) elif resc2paramtype == 3: bndtl=0.1 if i == 'k': bndtl=1 if i == 'p': bndtl=0.0001 if i == 'dw': bndtl=1 #0000 paramsxx[pn].setonepar(i,prlst,bounds=bnds,bndtol=bndtl,resetbounds=1,resetparbounds=1) elif resc2paramtype == 4: #paramsxx can have originated from a different set of residues than resultcoll if i == 'k': bndtl=1 paramsxx[pn].setonepar(i,prlst,bounds=bnds,bndtol=bndtl,resetbounds=1,resetparbounds=1) if i == 'p': bndtl=0.0001 paramsxx[pn].setonepar(i,prlst,bounds=bnds,bndtol=bndtl,resetbounds=1,resetparbounds=1) elif resc2paramtype == 5: #paramsxx can have originated from a different set of residues than resultcoll if i == 'k': bndtl=1 paramsxx[pn].setonepar(i,prlst,resetparbounds=0) if i == 'p': bndtl=0.0001 paramsxx[pn].setonepar(i,prlst,resetparbounds=0) return paramsxx
def fitcpmg4(praxs1,timedat,rawdata,field,err,mode,equationtype,conditions,savstatdir,filenamsav,resnam,poscoll,moreconditions,paramsx,fl): """ Fitting multiple types of relaxation dispersion data (function title somewhat misleading). input arguments: praxs1: property axis collection; timedat: x axis data; rawdata: \ y axis data; field: B0 field relative to 500 MHz, err: y error, mode: not used here, \ was used in the parent script as precalc; equationtype: type of relaxation dispersion, \ see multifunctg2 function for details; conditions: list of conditions: pos 0 - reshuffle, \ this is not used anymore, but not entirely deleted yet (check). pos 1 - details \ about many steps and attempts should be made for fitting. pos 1/0: number of initial \ attempts of the "precalculation" round to get to a small chi square at the beginning \ of the fitting procedure. High number can be useful when starting in a Monte Carlo \ manner at the beginning of the project (large boundaries, little idea about the system); pos 1/1 precalcatt: Number of documented major calculation steps during the precalculation; \ pos 1/2 precalclen: Number of undocumented steps within each major precalculation step; \ pos 1/3 maincalcatt: Number of documented major calculation steps during the main calculation;\ pos 1/4 maincalclen: Number of undocumented steps within each major main calculation step. filenamsav: Name under which progess of fit is saved to disk resnam: list of residue names included in fit (not used here) poscoll: (not used anymore, has to do with being able to undo some sorting action) moreconditions: package of conditions used by parent script, this is supposed to be saved with\ the status and therefore an input argument. paramsx: collection of parameter objects, can have different bounds etc fl: list of pointer lists. Each list member point to certain parameter list positions, \ there is one pointer list for each data point. """ allconditions=[timedat,rawdata,field,err,mode,equationtype,conditions,\ filenamsav,resnam,poscoll,moreconditions] reshuffle=conditions[0] #0 or 1 numbattempts,precalcatt,precalclen,maincalcatt,maincalclen=conditions[1][0:5]#1, 5, 5, 5, 10 numdat=np.shape(rawdata)[0] dwbsetp=[] setdwb=0 for i in np.arange(len(timedat)): for j in np.arange(len(flatten(timedat[i]))): dwbsetp.append(setdwb) setdwb+=1 par6=np.array(field) gpar=np.array(equationtype) par7=np.array(np.array(dwbsetp).astype('int')) par2=np.array(timedat) par3=np.array(rawdata) errvalpar=np.array(err) # print len(boundsl) if reshuffle == 1: par7x=par7;par6x=par6;par2x=par2;par3x=par3;errvalparx=errvalpar;gparx=gpar par7=[];par6=[];par2=[];par3=[];errvalpar=[];gpar=[] for i,j in enumerate(par2x): if gparx[i] == 6: par2.append(par2x[i]) par3.append(par3x[i]) par6.append(par6x[i]) par7.append(par7x[i]) errvalpar.append(errvalparx[i]) gpar.append(gparx[i]) else: foundrnd=0 np.random.seed() while foundrnd == 0: np.random.seed() n=np.random.randint(len(par2x)) if gparx[n] == 3: par2.append(par2x[n]) par3.append(par3x[n]) par6.append(par6x[n]) par7.append(par7x[n]) errvalpar.append(errvalparx[n]) gpar.append(gparx[n]) foundrnd=1 allrescoll=[] par1coll=[] costcoll=[] for u in np.arange(numbattempts): np.random.seed() evalmode=1 a,b1,b2,c,e=paramsx.getallparandbnds(praxs1,['p','k','dw','R20500','R2mult'],inclfilt=[]) par1=a[u];boundsl=b1[u];boundsh=b2[u] if evalmode == 1: for i in zip(par1,boundsl,boundsh): if i[0] <= i[1] or i[0] >= i[2]: print 'problem!', i[0], i[1], i[2] if precalcatt > 0: try: for k in np.arange(precalcatt): if evalmode ==1: for i in zip(par1,boundsl,boundsh): print i[0],i[1],i[2] # print par1,par2, par3, par6, par7, errvalpar, gpar, fl # print 'here' # print gpar, 'gparold' gparn=[] for ggg in gpar: gparn.append(list([gggg+1000 for gggg in ggg])) #gpar=np.array([ggg+1000 for ggg in gpar]) # print gparn, 'gparnew' res=optimize.least_squares(errfunctg3,par1,max_nfev=precalclen,\ bounds=(boundsl,boundsh),args=(par2,par3,par6,par7,\ errvalpar,gparn,fl),method='trf',jac='3-point',x_scale='jac') #, par1=res.x allrescoll.append(res) print 'attempt ', u, ' precalculation step ', u, k, par1,res.cost,filenamsav, allrescoll[-1].cost hkio.savstatus2b(savstatdir,filenamsav,resnam,poscoll,allrescoll,allconditions) except: print 'well this one didnt work' par1coll.append(res.x) costcoll.append(res.cost) if precalcatt > 0: try: par1=par1coll[np.argmin(costcoll)] except: print "unfeasable result" # try: for k in np.arange(maincalcatt): print len(par1), len(par2), len(flatten(par2)), 'well' res=optimize.least_squares(errfunctg3,par1,max_nfev=maincalclen,\ bounds=(boundsl,boundsh),args=(par2,par3,par6,par7,errvalpar,gpar,\ fl),method='trf',jac='3-point',x_scale='jac') allrescoll.append(res) hkio.savstatus2b(savstatdir,filenamsav,resnam,poscoll,allrescoll,allconditions) par1=res.x print 'mainalculation step ', u, k, par1, res.cost,filenamsav, allrescoll[-1].cost for i in res.x: print i print 'final cost', res.cost # except: # res=[0] # print 'late fitting error' return res, allrescoll #xtol=1e-9
def reshuffle(ss,reslalmall,shuffletype): """ Resampling for error calculation by adding or subtracting residuals from calculated experimental data. """ signrnd=1 heteroskedacity=1 withoutreplacement=0 allresid={} alldsref={} dplcoll=[] datatypes=[i[0] for i in shuffletype] for dt in datatypes: allresid[dt]=[] alldsref[dt]=[] dsnum=0 """calculating residuals""" for spinsyst in ss: selnam = spinsyst.name[0] if selnam in reslalmall: dpl=[] resid={} dsref={} if spinsyst.datasets[-1].setselect > dsnum: dsnum=spinsyst.datasets[-1].setselect for dt in datatypes: resid[dt]=[] dsref[dt]=[] # setparameters2=[dataname,'/home/hanskoss/data/Cadherin/nmrCad/procandcoll/TSnewsort/2020Feb/',[selnam],conditions,namresults] for ds in spinsyst.datasets: if signrnd != 1: signrnd=np.random.choice([-1,1],size=len(ds.fit)) if ds.datatype == 'cpmg': if heteroskedacity == 1: errlist=np.array([np.sqrt(np.average(np.array(xx)**2)) for xx in ds.rcpmgerr]) else: errlist=1 ds.resid=signrnd*(ds.rcpmg-ds.fit)/errlist elif ds.datatype == 'cest': if heteroskedacity == 1: errlist=(ds.ymax-ds.ymin)/2 else: errlist=1 ds.resid=signrnd*(ds.y-ds.fit)/errlist elif ds.datatype == 'Rex': if heteroskedacity == 1: errlist=[np.average([ds.yerr1,ds.yerr2]),np.average([ds.yerr1,ds.yerr2])] else: errlist=1 ds.resid=signrnd*(ds.yval-ds.fit)/np.array(errlist) # print ds.resid # print np.average(ds.resid), np.std(ds.resid) dsref[ds.datatype].append(ds.setselect) resid[ds.datatype].append(ds.resid) for dt in datatypes: allresid[dt].append(resid[dt]) alldsref[dt].append(dsref[dt]) dplcoll.append(dpl) dspool={} sspool={} """for each data point, a random residual from a set of eligible residuals is selected. This ways, the residuals for a certain group of data points are mixed (with replacement). method "dataset": include all points belonging to a certain datatype and dataset (across different residues) method "spinsyst": include all points belonging to a certain data type and residue (across different datasets) method "any": include all points belonging to a certain data type (any dataset and any spin system) method "each": include all points belonging to a certain dataset, data type and residue """ for dt,method in shuffletype: dspool[dt]=[[] for i in np.arange(dsnum+1)] sspool[dt]=[] for xx,x in enumerate(allresid[dt]): for z,y in enumerate(x): if 'y' != []: dspool[dt][alldsref[dt][xx][z]].append(y) sspool[dt].append(flatten(x)) dspool[dt]=[flatten(i) for i in dspool[dt]] for q,x in enumerate(allresid[dt]): for z,y in enumerate(x): if method == 'dataset': pool=dspool[dt][alldsref[dt][q][z]] elif method == 'spinsyst': pool=sspool[dt][q] elif method == 'any': pool=flatten(allresid[dt]) elif method == 'each': pool=allresid[dt][q][z] np.random.seed() if pool != []: if withoutreplacement == 0: """with replacement works always""" chosenwere=np.random.randint(len(pool),size=len(allresid[dt][q][z])) """without replacement is not implemented for the 'any' method at this time.""" else: chosenwere=np.random.choice(np.arange(len(pool)),size=len(allresid[dt][q][z]),replace=False) allresid[dt][q][z]=np.array([pool[i] for i in chosenwere]) for delthis in np.sort(chosenwere)[::-1]: try: del(pool[delthis]) except: np.delete(pool,delthis) #z=0 """calculate resampled data points, used as experimental data for error estimation""" for dt in datatypes: q=0 for ssn,spinsyst in enumerate(ss): selnam = spinsyst.name[0] if selnam in reslalmall: z=0 for dsn,ds in enumerate(spinsyst.datasets): if ds.datatype == dt: if heteroskedacity == 1: if ds.datatype == 'cpmg': errlist=np.array([np.sqrt(np.average(np.array(xx)**2)) for xx in ds.rcpmgerr]) elif ds.datatype == 'cest': errlist=(ds.ymax-ds.ymin)/2 elif ds.datatype == 'Rex': errlist=np.array([np.average([ds.yerr1,ds.yerr2]),np.average([ds.yerr1,ds.yerr2])]) else: errlist = 1 print ds.fit, allresid[dt][q][z], errlist, ds.fit+allresid[dt][q][z]*errlist, 'reshuffled' ss[ssn].datasets[dsn].reshufy=ds.fit+allresid[dt][q][z]*errlist if ds.datatype == 'Rex': ss[ssn].datasets[dsn].reshufy=[ss[ssn].datasets[dsn].reshufy[0] for i in np.arange(len(ss[ssn].datasets[dsn].reshufy))] z+=1 q+=1 return ss
def getnforfit(self,pr1): return np.arange(len(self[pr1]['num']))
def fastaddlists(self,pr1,*args): for i,j in [[i,i+1] for i in np.arange(0,len(args),2)]: self.fastaddlist(pr1,args[i],args[j])
resultcoll=[resultcoll[np.argsort(costcoll)[0]]] paramsxy=mainfuncts.resc2param(praxs1,paramsxx,resultcoll,5) list2.append(costcoll[0]) list1.append(list2) return list1 listx=[] listx.append(readoutresults('indiv_free_')) listx.append(readoutresults('indiv_free2_')) listx.append(readoutresults('indiv_fix_1_')) listx.append(readoutresults('indiv_fix_0_')) #%% dwblist=[] dwclist=[] for pickthis in np.arange(15): reslall=['A29','A30','A31','A32','A33','A34','A52','A53','A54','A78','A81','A82','A83','A84','A86'] resnall=[29,30,31,32,33,34,52,53,54,78,81,82,83,84,86] reslall=[reslall[pickthis]] resnall=[resnall[pickthis]] pickthese=[0] praxs1=mainfuncts.praxismake([x for y,x in enumerate(reslall) if y in pickthese],[x for y,x in enumerate(resnall) if y in pickthese]) parbdslistaxallf=[[[-1, 1], [-26000,26000], [-13000, 13000]],\ [[-1, 1], [-26000,26000], [-13000, 13000]],\ [[-1, 1], [-26000,26000], [-13000, 13000]],\ [[-1, 1], [-26000,26000], [-13000, 13000]],\ [[-1, 1], [-26000,26000], [-13000, 13000]],\ [[-1,-1,],[-26000,26000],[-13000, 13000]],\ #out [[-1, 1], [-389.0, -5341], [-13000, 13000]],\ [[-1, 1], [-26000,26000], [-13000, 13000]],\
#namresults='multix13_NEW_pfr_STU_FINAL_' ###namresults='multix13_NEW_pfr_STU_err3g_' #namresults='multix13_NEW_pfr_S2_' #namresults='multix13_NEW_pfr_Sdeb2_' #namresults='multix13_NEW_pfr_M2_err_' #%% namresultslist = [ 'FINAL_stage3_indiv_', 'FINAL_stage3_23_indiv_', 'FINAL_stage3_indiv_restrkp_', 'FINAL_stage3_indiv_restrall_' ] zx = [] for x in namresultslist: z = [] for y in np.arange(15): namresults = x + str(y) + '_0_' #print namresults poscoll,resnam,allsetcoll,resultcoll,relaxrat0,relaxrat,lookatratio,\ results,relaxrat1,relaxrat2,relaxrat_1,relaxrat_2,intdiffcorr, intcorr,\ intmin,ac,oc,rateconstpre,cond=hkio2.loadeverything(savstatdir,[namresults],0,decoupl=0) z.append(np.round(resultcoll[0].cost, 2)) zx.append(z) print np.average(z) from matplotlib import pyplot as plt reslall = [ 'A29', 'A30', 'A31', 'A32', 'A33', 'A34', 'A52', 'A53', 'A54', 'A78', 'A81', 'A82', 'A83', 'A84', 'A86' ] #fullnamlist=['Ser','Gly', 'Trp', 'Val', 'Trp', 'Asn', 'Gln', 'Phe', 'Phe', 'Val', 'Ile', 'Glu', 'Glu', 'Tyr', 'Thr', 'Gly', 'Pro', 'Asp', 'Pro', 'Val', 'Leu', 'Val', 'Gly', 'Arg', 'Leu', 'His', 'Ser', 'Asp', 'Ile', 'Asp', 'Ser', 'Gly', 'Asp', 'Gly', 'Asn', 'Ile', 'Lys', 'Tyr', 'Ile', 'Leu', 'Ser', 'Gly', 'Glu', 'Gly', 'Ala', 'Gly', 'Thr', 'Ile', 'Phe', 'Val', 'Ile', 'Asp', 'Asp', 'Lys', 'Ser', 'Gly', 'Asn', 'Ile', 'His', 'Ala', 'Thr', 'Lys', 'Thr', 'Leu', 'Asp', 'Arg', 'Glu', 'Glu', 'Arg', 'Ala', 'Gln', 'Tyr', 'Thr', 'Leu', 'Met', 'Ala', 'Gln', 'Ala', 'Val', 'Asp', 'Arg', 'Asp', 'Thr', 'Asn', 'Arg', 'Pro', 'Leu', 'Glu', 'Pro', 'Pro', 'Ser', 'Glu', 'Phe', 'Ile', 'Val', 'Lys', 'Val', 'Gln', 'Asp']
def runfit4(praxs1,ctd,selresidues,precalc,resnam,conditions,path2020,savstatdir,files,filenamsav,paramsx,drawonly,cond): """ This function prepares the global fit and converts data structures where appropriate. input arguments: praxs1: property axis collections ctd not used precalc: triggers different preparatory routines depending on this switch 0: regular fit, no pre-calculated data (spinsystems object) containing pre-calcualted theoretical data for resampling """ moreconditions=[ctd,selresidues,precalc,resnam,files] if precalc != 0 and precalc != 1: spinsystems=hkio.loadss(savstatdir,precalc) reslalmall=selresidues[0]#[reslall[i] for i in pickthese] shuffletype=[['cpmg','dataset'],['Rex','dataset'],['cest','each']] spinsystems=reshuffle(spinsystems,reslalmall,shuffletype) elif precalc == 0: spinsystems,setlabels=prepro.launch(path2020,files) print spinsystems[35].name, spinsystems[35].datasets[0].rcpmg # print spinsystems[57].datasets[13].xlabel resultcolll=[]; poscolll=[] resnaml=[] allresultcoll=[] """This is a loop allowing to test various residue combination sets.""" for selectdatn in selresidues: """The experimental data are stored in the spinsystems object; the parameters are stored in the parameters object, using certain property axes. For the fitting engine, all data are flattened form. For each data point, there is a corresponding pointer set in a list of equal lengths which selects the appropriate parameters. All flattened lists, including pointer lists, are generated here. """ resnaml,timedat,rawdata,errd,field,field2,field3,tr,equationtype,poscoll,expcnd=prepro.passdatatofitn(spinsystems,selectdatn,precalc) """hard-coded filters, has to be modified for other experimental combinations""" setprotoncpmg=1 if setprotoncpmg == 1: filters=[[['residues','name'],[[rn] for rn in resnaml]],[['conc','value'],\ [['2.475'],['9.9']]],[['TR','name'],[['T'],['X']]],\ [['B1field','rounded'],[[500],[600],[800],[900]]],\ [['type','name'],[['cpmg'],['Rex'],['cest']]]] filters2=[[['conc','value'],[['2.475'],['9.9']]],\ [['TR','name'],[['T'],['X']]],\ [['B1field','rounded'],[[500],[600],[800],[900]]],\ [['type','name'],[['cpmg'],['Rex'],['cest']]]] else: filters=[[['residues','name'],[[rn] for rn in resnaml]],[['conc','value'],\ [['2.475'],['9.9']]],[['TR','name'],[['T'],['X']]],\ [['B1field','rounded'],[[50],[70],[80],[90]]],\ [['type','name'],[['cpmg'],['Rex'],['cest']]]] filters2=[[['conc','value'],[['2.475'],['9.9']]],\ [['TR','name'],[['T'],['X']]],\ [['B1field','rounded'],[[50],[70],[80],[90]]],\ [['type','name'],[['cpmg'],['Rex'],['cest']]]] selset=[] q=0 explist=[] for j,i in enumerate(expcnd): for l,k in enumerate(i): selsetx=[j] for n,m in enumerate(filters2): selsetx.append([p for p,o in enumerate(m[1]) if k[m[0][0]] in o][0]) selset.append(selsetx) explist.append(q) q+=1 filt=[] aa=selset bb=set(tuple(ix) for ix in selset) bb=[list(b) for b in aa] seldatasets=list(np.arange(len(aa))) inclfx=[] for l,i in enumerate(bb): inclf=[] for k,j in enumerate(i): inclf.append([filters[k][0][0],filters[k][0][1],filters[k][1][j]]) inclfx.append(inclf) a,b1,b2,c,e=paramsx.getallparandbnds(praxs1,['p','k','dw','R20500','R2mult'],inclfilt=inclf) f=flatten([np.array(e[m][:-1])+np.sum([k for k in [0]+[e[j][-1] for \ j,i in enumerate(e) if j < len(e)-1]][0:(l+1)]) for m,l in \ enumerate(np.arange(len(e)))]) filt.append(f) timedat=[flatten(timedat,levels=1)[i] for i in seldatasets] rawdata=[flatten(rawdata,levels=1)[i] for i in seldatasets] errd=[flatten(errd,levels=1)[i] for i in seldatasets] field=[flatten(field,levels=1)[i] for i in seldatasets] field2=[flatten(field2,levels=1)[i] for i in seldatasets] field3=[flatten(field3,levels=1)[i] for i in seldatasets] tr=[flatten(tr,levels=1)[i] for i in seldatasets] equationtype=[flatten(equationtype,levels=1)[i] for i in seldatasets] a,b1,b2,c,e=paramsx.getallparandbnds(praxs1,['p','k','dw','R20500','R2mult'],inclfilt=[]) f=flatten([np.array(e[m][:-1])+np.sum([k for k in [0]+[e[j][-1] for \ j,i in enumerate(e) if j < len(e)-1]][0:(l+1)]) for m,l \ in enumerate(np.arange(len(e)))]) poscolll.append(poscoll) if drawonly == 0: sojetzt,allrescoll=fitcpmg4(praxs1,timedat,rawdata,field,errd,\ precalc,equationtype,conditions,savstatdir,filenamsav,resnaml, poscolll,\ moreconditions,paramsx,filt) else: dwbsetp=[] setdwb=0 for i in np.arange(len(timedat)): for j in np.arange(len(flatten(timedat[i]))): dwbsetp.append(setdwb) setdwb+=1 par7=np.array(np.array(dwbsetp).astype('int')) fittedcurve,chsq0,chsq1,chsq2,chsq3=printrd(praxs1,timedat,\ rawdata,field,errd,equationtype,par7,paramsx,filt) for j,i in enumerate(seldatasets): print i, 'datasetno', chsq0[j], equationtype[j][0] if drawonly == 0: try: resultcolll.append(sojetzt) allresultcoll.append(allrescoll) except: print 'ugh1' else: for j in poscoll: for k,i in enumerate(seldatasets): #fittedcurve: spinsystems[j].datasets[i].fit=fittedcurve[k] if drawonly == 0: return resultcolll, resnaml, poscolll, spinsystems, allresultcoll else: return spinsystems#else:
def produceindivplots(path2020, savstatdir, datafile, namresults, reslallx, resnallx, parbdslistaxallfx, namout, mode, k121, k122, k131, k132, k231, k232, errruns): #print 'a' for pickthis in np.arange(len(reslallx)): # print 'b' reslall = [reslallx[pickthis]] resnall = [resnallx[pickthis]] pickthese = [0] # print 'c' PropAxesColl = mainfuncts.GeneratePropertyAxesCollection( [x for y, x in enumerate(reslall) if y in pickthese], [x for y, x in enumerate(resnall) if y in pickthese]) parbdslistaxallf = [parbdslistaxallfx[pickthis]] #paramsxx=mainfuncts.parammake(PropAxesColl,0,[x for y,x in enumerate(parbdslistaxallf) if y in pickthese],1,8000,100,900,k231,k232,0.005,0.1) #paramsxx=mainfuncts.parammake(PropAxesColl,0,[x for y,x in enumerate(parbdslistaxallf) if y in pickthese],3000,4000,600,700,k231,k232,0.01,0.02) paramsxx = mainfuncts.parammake( PropAxesColl, 0, [x for y, x in enumerate(parbdslistaxallf) if y in pickthese], k121, k122, k131, k132, k231, k232, 0.01, 0.02) ##conditions=[0,[1,0,0,5,5]] #individual nitrogen fits conditions = [0, [1, 0, 0, 10, 5]] #individualproton fits #setparameters2=['combo10l.dat','C:\\Users\\Hans\\Desktop\\TRANSFER\\2020Feb\\',[[x for y,x in enumerate(reslall) if y in pickthese]],conditions,namresults] if namresults == []: ParameterColl6 = paramsxx else: poscoll, resnam, allsetcoll, resultcoll, relaxrat0, relaxrat, lookatratio, results, relaxrat1, relaxrat2, relaxrat_1, relaxrat_2, intdiffcorr, intcorr, intmin, ac, oc, rateconstpre, cond = hkio2.loadeverything( savstatdir, [namresults], 0, decoupl=0) costcoll = [i.cost for i in resultcoll] resultcoll = [resultcoll[np.argsort(costcoll)[0]]] ParameterColl6 = mainfuncts.resc2param(PropAxesColl, paramsxx, resultcoll, mode) for nn in [0]: #np.arange(3): namresultsOUT = namout + str(nn) + '_' setparameters3 = [ datafile, path2020, [[x for y, x in enumerate(reslall) if y in pickthese]], conditions, namresultsOUT, 0 ] hkfit2.parallelfit3(path2020, savstatdir, setparameters3, 0, ParameterColl6[nn], PropAxesColl) dataname = datafile setparameters2 = [ dataname, path2020, [x for y, x in enumerate(reslall) if y in pickthese], conditions, namresultsOUT ] #,'A34' #'/home/hanskoss/data/Cadherin/nmrCad/procandcoll/TSnewsort/2020Feb/ selset = 0 ss = hkfit2.evaluaterdfit(path2020, savstatdir, PropAxesColl, setparameters2, 1, ParameterColl6[selset], 0) hkio2.savss(savstatdir, ss, namresultsOUT) for errc in np.arange(errruns): setparameters3 = [ datafile, path2020, [[x for y, x in enumerate(reslall) if y in pickthese]], conditions, namresultsOUT + 'err_' + str(errc) + '_', namresultsOUT ] hkfit2.parallelfit3(path2020, savstatdir, setparameters3, 0, ParameterColl6[nn], PropAxesColl) setparameters3 = [ 'combo10l.dat', path2020, [[x for y, x in enumerate(reslall) if y in pickthese]], conditions, 'FINAL_stage4_23_', 'FINAL_stage3_23x_' + str(runnum) ] p = multiprocessing.Process(target=hkfit2.parallelfit3, args=(path2020, savstatdir, setparameters3, outnum * numrep + runnum, ParameterColl6[runnum], PropAxesColl))
""" from the 90 runs, the results of the last step from each of the three precalculation attempts is collected. The best 90 (lowest cost) of the 270 collected results are then selected to continue the fit (stage 3b), with 10 documented steps of 5 undocumented fitting substeps. """ resultcoll = [] allcostcoll = [] for i in allsetcoll: allcostcoll.append([i[j].cost for j in [2, 5, 8]]) allcostcoll = flatten(allcostcoll) sortedcosts = np.argsort(allcostcoll) for sel in np.arange(90): pos2 = sortedcosts[sel] % 3 * 3 + 2 pos1 = int(np.floor(sortedcosts[sel] / 3)) resultcoll.append(allsetcoll[pos1][pos2]) namresults = 'FINAL_stage2b' ParameterColl2 = mainfuncts.resc2param(PropAxesColl, ParameterColl, resultcoll, 2) conditions = [0, [1, 0, 0, 10, 5]] setparameters3 = [ 'combo10l.dat', path2020, [[x for y, x in enumerate(reslall) if y in pickthese]], conditions, namresults, 0 ] mainfuncts.parallelmultifit4(path2020, savstatdir, setparameters3, 3, 30,