def generateIdealisedFlatFieldSurface(numdata=2066, floor=1e5, xsize=2048, ysize=2066):
    """

    """
    # generate random data
    x = (np.random.random(numdata) - 0.5)
    y = (np.random.random(numdata) - 0.5)
    xx, yy = np.meshgrid(np.linspace(x.min(), x.max(), xsize), np.linspace(y.min(), y.max(), ysize))
    surface = (-1.4*xx*xx - 1.6*yy*yy - 1.5*xx*yy)*floor*0.09 + floor        #about 9-10 per cent range

    #cutout extra
    surface = surface[:ysize, :xsize]
    x, y = np.meshgrid(np.arange(0, xsize, 1), np.arange(0, ysize, 1))

    print np.max(surface), np.min(surface), np.mean(surface)

    #plot 3D
    fig = plt.figure()
    plt.title('VIS Flat Fielding: Idealised Calibration Unit Flux')
    ax = Axes3D(fig)
    ax.plot_surface(x, y, surface, rstride=100, cstride=100, alpha=0.5, cmap=cm.jet)
    ax.set_zlabel('electrons')
    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    plt.savefig('flatfield.pdf')
    plt.close()

    #save to file
    files.writeFITS(surface, 'VIScalibrationUnitflux.fits')
def simplePSFexample():
    files = glob.glob('cutout*.fits')

    all = []
    sides = []
    for f in files:
        #load data
        data = pf.getdata(f)
        sides.append(data.shape[0])
        #flatten to a 1D array and save the info
        all.append(np.ravel(data))

    all = np.asarray(all)

    #save the mean
    mean = np.mean(all, axis=0).reshape(sides[0], sides[0])
    mean /= np.max(mean)
    fileIO.writeFITS(mean, 'mean.fits', int=False)

    som = SimpleSOMMapper((100, 100), 100)
    som.train(all)

    plt.figure()
    plt.title('Color SOM')
    plt.imshow(som.K, origin='lower')
    plt.savefig('mapped.pdf')

    fileIO.writeFITS(som.K, '/mapped.fits', int=False)
def readBinaryFiles(file, dimensions=(4196, 2072), saveFITS=True, output='tmp.fits'):
    """
    This simple function reads data from a given file that is in the binary format in which
    the CCD lab measurements have been stored in. It reads in the data and optionally saves it to a
    FITS file format. The function also returns the data

    :param file: name of the .bin file to read
    :type file: str
    :param dimensions: dimensions of the image
    :type dimensions: tuple
    :param saveFITS: to control whether a FITS file should be written or not
    :type saveFITS: bool
    :param output: name of the output FITS file if saveFITS = True
    :type output: str
    :return: image
    :rtype: ndarray
    """
    fh = open(file, 'rb')
    #use numpy to read the binary format, the data is 16bit unsigned int
    a = np.fromfile(fh, dtype=np.uint16)
    fh.close()

    #four last numbers are zeros, throwing these out allows to reshape to the dimensions
    #there is still some onwated rows in the data, so we should remove the first four
    try:
        image = a[:-4].reshape(dimensions).T[4:, :]
    except:
        print 'Image shape as not expected'
        print a[:-4].shape
        return None

    if saveFITS:
        fileIO.writeFITS(image, output)

    return image
def dilation(data):
    """
    Use dilation to define the background. Not working too well...
    """
    image = gaussian_filter(data, 1)

    h = 1
    seed = np.copy(image) - h
    mask = image
    dilated = reconstruction(seed, mask, method='dilation')
    dilated = data - dilated

    fileIO.writeFITS(dilated, 'dilation.fits', int=False)

    #plot the image
    fig = plt.figure(figsize=(18, 10))
    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)

    ax1.set_title('Data')
    ax2.set_title('Background')

    im1 = ax1.imshow(np.log10(data), origin='lower', vmin=2., vmax=3.5, interpolation='none')
    im2 = ax2.imshow(dilated, origin='lower', interpolation='none')
    c1 = plt.colorbar(im1, ax=ax1, orientation='horizontal')
    c2 = plt.colorbar(im2, ax=ax2, orientation='horizontal')
    c1.set_label('$\log_{10}$(Counts [ADU])')
    c2.set_label('Dilation')
    plt.savefig('dilation.png')
    plt.close()

    return dilated.ravel()
def readBinaryFiles(file, dimensions=(100, 100), saveFITS=True, output='tmp.fits'):
    """
    This simple function reads data from a given file that is in the binary format in which
    the CCD lab measurements have been stored in. It reads in the data and optionally saves it to a
    FITS file format. The function also returns the data.

    :param file: name of the file to read
    :type file: str
    :param dimensions: dimensions of the image
    :type dimensions: tuple
    :param saveFITS: to control whether a FITS file should be written or not
    :type saveFITS: bool
    :param output: name of the output FITS file if saveFITS = True
    :type output: str

    :return: image
    :rtype: ndarray
    """
    fh = open(file, 'rb')
    #use numpy to read the binary format, the data is 16bit unsigned int
    a = np.fromfile(fh, dtype=np.uint16)
    fh.close()

    try:
        #note the transpose
        image = a.reshape(dimensions).T
    except:
        print 'Image shape as not expected'
        print a.shape
        return None

    if saveFITS:
        fileIO.writeFITS(image, output)

    return image
def combineToFullCCD(fileEF, fileGH, output, evm2=False):
    """
    Combines imaging data from files containing EF and GH image areas.

    :param fileEF: name of the FITS file that contains EF image section
    :type fileEF: str
    :param fileGH: name of the FITS file that contains GH image section
    :type fileGH: str
    :param evm2: if EVM2 ROE board was used then the data need to be scaled
    :type evm2: bool

    :return: None
    """
    dataEF = pf.getdata(fileEF)
    dataGH = pf.getdata(fileGH)[::-1, ::-1]  #GH data needs to be rotated because of how the data have been recorded

    #remove two rows from data
    dataEF = dataEF[:-2, :]
    dataGH = dataGH[2:, :]

    #calculate some statistics
    print 'Statistics from %s' % fileEF
    Q0EF = dataEF[:, :2099]
    Q1EF = dataEF[:, 2098:]
    m0 = Q0EF.mean()
    m1 = Q1EF.mean()
    msk0 = (Q0EF < 1.1*m0) & (Q0EF > 0.9*m0)
    msk1 = (Q1EF < 1.1*m1) & (Q1EF > 0.9*m1)
    print 'Q0 median mean max min std clipped'
    print np.median(Q0EF), m0, Q0EF.max(), Q0EF.min(), Q0EF.std(), Q0EF[msk0].std()
    print 'Q1 median mean max min std clipped'
    print np.median(Q1EF), m1, Q1EF.max(), Q1EF.min(), Q1EF.std(), Q1EF[msk1].std()
    print 'Statistics from %s' % fileGH
    Q0GH = dataGH[:, :2099]
    Q1GH = dataGH[:, 2098:]
    m0 = Q0GH.mean()
    m1 = Q1GH.mean()
    msk0 = (Q0GH < 1.1*m0) & (Q0GH > 0.9*m0)
    msk1 = (Q1GH < 1.1*m1) & (Q1GH > 0.9*m1)
    print 'Q0 median mean max min std clipped'
    print np.median(Q0GH), m0, Q0GH.max(), Q0GH.min(), Q0GH.std(), Q0GH[msk0].std()
    print 'Q1 median mean max min std clipped'
    print np.median(Q1GH), m1, Q1GH.max(), Q1GH.min(), Q1GH.std(), Q1GH[msk1].std()

    if evm2:
            #this bias level is higher than anticipated with DM
            dataEF -= 2400
            dataGH -= 2400

    #stitch together
    CCD = np.vstack((dataEF, dataGH))

    #write out a FITS file
    fileIO.writeFITS(CCD, output)
def generaPostageStamps(filename, catalog, maglimit=22., output='galaxy'):
    """
    Generates postage stamp images from an input file given the input catalog position.
    The output files are saved to FITS files.

    :param filename: name of the FITS file from which the postage stamps are extracted
    :type filename: str
    :param catalog: name of the catalogue with x and y positions and magnitudes
    :type catalog: str
    :param maglimit: brighter galaxies than the given magnitude limit are extracted
    :type maglimit: float
    :param output: name of the postage stamp prefix (will add a running number to this)
    :type output: str

    :return: None
    """
    cat = np.loadtxt(catalog)
    xcoord = cat[:, 0]
    ycoord = cat[:, 1]
    mag = cat[:, 2]
    msk = mag < maglimit

    fh = pf.open(filename, mmap=True, memmap=True)
    ysize, xsize = fh[0].data.shape

    i = 0
    for x, y, mag in zip(xcoord[msk], ycoord[msk], mag[msk]):

        #postage stamp size
        sz = 0.2 ** ((mag - 22.) / 7.) * 50
        #cutout
        xmin = int(max(x - sz, 0))
        ymin = int(max(y - sz, 0))
        xmax = int(min(x + sz, xsize))
        ymax = int(min(y + sz, ysize))
        data = fh[0].data[ymin:ymax, xmin:xmax].copy()

        #renormalize the flux, try to cope with background
        data[data < 1e-4] = 0.0

        print data.max(), '%s%i.fits' % (output, i)
        if data.max() > 5:
            continue

        data /= data.sum()

        #savedata
        fileIO.writeFITS(data, '%s%i.fits' % (output, i), int=False)
        i +=1

    fh.close()
def generateBessel(radius=1.5, oversample=500, size=1000, cx=None, cy=None, debug=False):
    """
    Generates a 2D Bessel function by taking a Fourier transform of a disk with a given radius. The real image
    and the subsequent power spectrum is oversampled with a given factor. The peak value of the generated
    Bessel function is normalized to unity.


    :param radius: radius of the disc [default=1.5]
    :param oversample: oversampling factor [default=500]
    :param size: size of the output array
    :param cx: centre of the disc in x direction
    :param cy: centre of the disc in y direction
    :param debug: whether or not to generate FITS files

    :return:
    """
    pupil = np.zeros((size, size))

    #centroid of the disc
    if cx is None:
        cx = np.shape(pupil)[1] / 2
    if cy is None:
        cy = np.shape(pupil)[0] / 2

    y, x = np.indices(pupil.shape)
    xc = x - cx
    yc = y - cy
    rad = np.sqrt(xc**2 + yc**2)
    mask = rad < (radius*oversample)
    pupil[mask] = 1.

    if debug:
        fileIO.writeFITS(pupil, 'disc.fits', int=False)

    F1 = fftpack.fft2(pupil)
    # Now shift the quadrants around so that low spatial frequencies are in
    # the center of the 2D fourier transformed image.
    F2 = fftpack.fftshift(F1)
    # Calculate a 2D power spectrum
    psd2D = np.abs(F2)**2
    #normalize it
    psd2D /= np.max(psd2D)

    if debug:
        fileIO.writeFITS(psd2D, 'besselOversampled.fits', int=False)

    return psd2D
def medianCombineAllFiles(folder='fits/'):
    """
    Median combine all FITS files to form a "composite spectrum" that can be
    subtracted from the data to have more or less that background and any
    cosmic rays.
    """
    files = g.glob(folder + '*.fits')
    data = []
    for filename in files:
        fh = pf.open(filename, memmap=False)
        d = fh[1].data
        data.append(d)
        fh.close()
    data = np.asarray(data)
    print data.shape
    med = np.median(data, axis=0)
    fileIO.writeFITS(med, 'medianCombined.fits', int=False)
def convertFilesToFITS(folder='CFSData_16/', outFolder='fits/'):
    """
    Each file is a transit/spectra.  So you process each file separately to get 13000 spectra/images.
    //1260 is the number of lines in a file, you might want to do a count(numberoflines) to really make certain.
    
    double [][]flux = new double[10][1260]
    double [][]fluxErr = new double[10][1260]
    //remember to skip first line i.e. header
    for(int i = 0;i < 1260;i++) {
       flux[0][i] = column1
       flux[1][i] = column7
       flux[2][i] = column9
       flux[3][i] = column11
       flux[4][i] = column13
       flux[5][i] = column15
       flux[6][i] = column17
       flux[7][i] = column19
       flux[8][i] = column21
       flux[8][i] = column23
    
       fluxErr[0][i] = column2
       fluxErr[1][i] = column10
       fluxErr[2][i] = column12
       ....
    
       //you can see if column 25 exists to give you a hint of the location of the CR.
       //See my description on the shifting to find the ac and al location.
    """
    files = g.glob(folder + '*.dat')
    
    for filename in files:
        print 'Converting %s to FITS format' % filename
        #data = pd.read_table(filename, header=0, sep=' ')
        data = np.loadtxt(filename, skiprows=1)
        new = [data[:, 1], data[:, 7], data[:, 9], data[:, 11], data[:, 13],
               data[:, 15], data[:, 17], data[:, 19], data[:, 21], data[:, 23]]
                      
        #convert to 2D
        new = np.asarray(new)
        
        #write to fits
        output = outFolder + filename.replace(folder, '').replace('.dat', '.fits')
        print 'Saving the data to %s' % output
        fileIO.writeFITS(new, output, int=False)
def sigmaClippedOpening(data):
    """
    Perform Gaussian filtering, sigma clipping and binary opening to define
    a mask for the background pixels.
    """
    image = gaussian_filter(data, 0.5)

    #sigma clip
    masked = sigma_clip(image, sig=5., iters=None)

    mask = ~masked.mask
    d = masked.data

    #update mask with opening
    mask = binary_opening(mask, disk(8))

    #plot the image
    fig = plt.figure(figsize=(18, 10))
    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)

    ax1.set_title('Data, Single Quadrant')
    ax2.set_title('Background Mask')

    im1 = ax1.imshow(np.log10(data), origin='lower', vmin=2., vmax=3.5, interpolation='none')
    im2 = ax2.imshow(mask, origin='lower', interpolation='none')
    c1 = plt.colorbar(im1, ax=ax1, orientation='horizontal')
    c2 = plt.colorbar(im2, ax=ax2, orientation='horizontal', ticks=[0, 1])
    c2.ax.set_xticklabels(['False', 'True'])
    c1.set_label('$\log_{10}$(Counts [ADU])')
    c2.set_label('Mask')
    plt.savefig('opening.png')
    plt.close()

    out = data*mask
    fileIO.writeFITS(out, 'opening.fits', int=False)

    o = out.ravel()
    o = o[o > 0.]

    print o.min(), o.max(), o.mean(), o.std(), len(o)

    return o
def analyseSpotsDeconvolution(files):
    """
    Analyse spot measurements using deconvolutions.

    Note: does not really work... perhaps an issue with sizes.

    :param files: a list of input files
    :type files: list

    :return: None
    """
    d = {}
    data = []
    for filename in files:
        tmp = readData(filename, crop=False)
        f = filename.replace('.fits', '')
        d[f] = tmp
        data.append(tmp)
    data = np.asarray(data)

    #sanity check plots
    #stackData(data)

    #deconvolve with top hat
    dec1 = {}
    y, x = data[0].shape
    top = np.zeros((y, x))
    top[y/2, x/2] = 1.
    fileIO.writeFITS(top, 'tophat.fits', int=False)
    for filename, im in zip(files, data):
        deconv = weinerFilter(im, top, normalize=False)
        f = filename.replace('.fits', 'deconv1.fits')
        fileIO.writeFITS(deconv, f, int=False)
        dec1[f] = deconv

    print "Tophat deconvolution done"

    #deconvolve with a Besssel
    dec2 = {}
    bes = generateBessel(radius=0.13)
    bes = ndimage.zoom(bes, 1./2.5, order=0)
    bes /= np.max(bes)
    fileIO.writeFITS(bes, 'bessel.fits', int=False)
    for key, value in dec1.iteritems():
        value = ndimage.zoom(value, 4., order=0)
        value -= np.median(value)
        deconv = weinerFilter(value, bes, reqularization=2.0, normalize=False)
        f = key.replace('deconv1.fits', 'deconv2.fits')
        fileIO.writeFITS(deconv, f, int=False)
        dec2[f] = deconv

    print 'Bessel deconvolution done'
def deriveBasisSetsICA(data, cut, outfolder, components=10):
    """
    Derives a basis set from input data using Perform Fast Independent Component Analysis.
    Saves the basis sets to a FITS file for further processing.

    http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.fastica.html#sklearn.decomposition.fastica
    """
    ica = decomposition.FastICA(n_components=components)
    # ICA treats sequential observations as related.  Because of this, we need
    # to fit with the transpose of the spectra
    ica.fit(data.T)
    image = ica.transform(data.T).T

    #save each component to a FITS file
    for i, img in enumerate(image):
        image = img.reshape(cut, cut)
        #to compare IDL results
        #image = -image
        fileIO.writeFITS(image, outfolder + '/ICAbasis%03d.fits' % (i+1),  int=False)
    return image
def maskObjects(data, sigma=4., iterations=None):
    """
    Mask objects using sigma clipping around the median.

    Will also plot the data and the mask.

    :return: masked numpy array
    """
    #sigma clip
    masked = sigma_clip(data.copy(), sig=sigma, iters=iterations)
    print masked.min(), masked.max(), masked.mean(), masked.std()

    mask = ~masked.mask
    d = masked.data

    #plot the image
    fig = plt.figure(figsize=(18, 10))
    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)

    ax1.set_title('Data')
    ax2.set_title('Mask')

    im1 = ax1.imshow(np.log10(data), origin='lower', vmin=2., vmax=3.5, interpolation='none')
    im2 = ax2.imshow(mask, origin='lower', interpolation='none')
    c1 = plt.colorbar(im1, ax=ax1, orientation='horizontal')
    c2 = plt.colorbar(im2, ax=ax2, orientation='horizontal', ticks=[0, 1])
    c2.ax.set_xticklabels(['False', 'True'])
    c1.set_label('$\log_{10}$(Counts [ADU])')
    c2.set_label('Mask')
    plt.savefig('masking.png')
    plt.close()

    out = d*mask
    fileIO.writeFITS(out, 'masking.fits', int=False)

    o = out.ravel()
    o = o[o > 0.]

    return o
def testNonlinearityModel(file='data/psf12x.fits', oversample=12.0, sigma=0.75,
                          scale=2e5, amp=1e-3, phase=0.98, multiplier=1.5, outdir='.'):
    #read in PSF and renormalize it to norm
    data = pf.getdata(file)
    data /= np.max(data)
    data *= scale

    #derive reference values from clean PSF
    settings = dict(sampling=1.0 / oversample, sigma=sigma)
    sh = shape.shapeMeasurement(data, log, **settings)
    reference = sh.measureRefinedEllipticity()
    print reference

    #apply nonlinearity model to the scaled PSF
    newdata = VISinstrumentModel.CCDnonLinearityModelSinusoidal(data.copy(), amp, phase=phase, multi=multiplier)
    newdata[newdata < 0.] = 0.

    #measure e and R2 from the postage stamp image
    sh = shape.shapeMeasurement(newdata.copy(), log, **settings)
    results = sh.measureRefinedEllipticity()
    print results

    print reference['ellipticity'] - results['ellipticity'], reference['R2'] - results['R2']

    fileIO.writeFITS(data, outdir + '/scaledPSF.fits', int=False)
    fileIO.writeFITS(newdata, outdir + '/nonlinearData.fits', int=False)
    fileIO.writeFITS(newdata / data, outdir + '/nonlinearRatio.fits', int=False)
def test(log, file="data/psf1x.fits", oversample=1.0, sigma=0.75, scale=1e2, level=10, covering=1.4, single=False):
    # read in PSF and renormalize it to norm
    data = pf.getdata(file)
    data /= np.max(data)

    # derive reference values from clean PSF
    settings = dict(sampling=1.0 / oversample, sigma=sigma, iterations=10)
    scaled = data.copy() * scale
    sh = shape.shapeMeasurement(scaled.copy(), log, **settings)
    reference = sh.measureRefinedEllipticity()
    print "Reference:"
    pprint.pprint(reference)

    cosmics = cosmicrays.cosmicrays(log, np.zeros((2, 2)))
    crInfo = cosmics._readCosmicrayInformation()

    print "Deposited Energy of Cosmic Rays: %i electrons" % level
    # add cosmic rays to the scaled image
    cosmics = cosmicrays.cosmicrays(log, scaled, crInfo=crInfo)
    if single:
        # only one cosmic with a given energy level, length drawn from a distribution
        newdata = cosmics.addSingleEvent(limit=level)
    else:
        # x cosmic ray events to reach a covering fraction, say 1.4 per cent
        newdata = cosmics.addUpToFraction(covering, limit=level)

    # write out new data for inspection
    fileIO.writeFITS(newdata, "example.fits", int=False)

    # measure e and R2 from the postage stamp image
    sh = shape.shapeMeasurement(newdata.copy(), log, **settings)
    results = sh.measureRefinedEllipticity()
    print "Results:"
    pprint.pprint(results)

    print "delta e_1: ", results["e1"] - reference["e1"]
    print "delta e_2: ", results["e2"] - reference["e2"]
    print "delta e: ", results["ellipticity"] - reference["ellipticity"]
    print "delta R**2: ", results["R2"] - reference["R2"]
def scaleAndSubtract(files, combined='medianCombined.fits'):
    """
    Scale the combined image to the peak of the file and subtract it from the data.
    Store to a new FITS file with "sub" appended.

    :param files: a list of filenames to process
    :type files: lst
    :param combined: name of the file to subtract from the raw data
    :type combined: str
    
    :return: None
    """
    subtract = pf.getdata(combined)
    subtract /= np.max(subtract)
    fileIO.writeFITS(subtract, combined.replace('.fits', 'Normalized.fits'), int=False)
    for filename in files:
        data = pf.getdata(filename)
        peak = np.max(data)
        s = subtract.copy()*peak
        data -= s
        print 'Processing %s with a peak of %.1f' % (filename, peak)
        fileIO.writeFITS(data, filename.replace('.fits', 'sub.fits'), int=False)
def _simpleExample(CCDx=10, CCDy=10):
    spot = np.zeros((21, 21))
    #Create the coordinates x and y
    x = np.arange(0, spot.shape[1])
    y = np.arange(0, spot.shape[0])
    #Put the coordinates in a mesh
    xx, yy = np.meshgrid(x, y)

    peak, center_x, center_y, radius, focus, width_x, width_y = (200000, 10.1, 9.95, 0.5, 0.5, 0.03, 0.06)
    amplitude = _amplitudeFromPeak(peak, center_x, center_y, radius, x_0=CCDx, y_0=CCDy)
    airy = models.AiryDisk2D(amplitude, center_x, center_y, radius)
    adata = airy.eval(xx, yy, amplitude, center_x, center_y, radius).reshape(spot.shape)
    f = models.Gaussian2D(1., center_x, center_y, focus, focus, 0.)
    focusdata = f.eval(xx, yy, 1., center_x, center_y, focus, focus, 0.).reshape(spot.shape)
    foc = signal.convolve2d(adata, focusdata, mode='same')
    fileIO.writeFITS(foc, 'TESTfocus.fits', int=False)
    CCDdata = np.array([[0.0, width_y, 0.0],
                        [width_x, (1.-width_y-width_y-width_x-width_x), width_x],
                        [0.0, width_y, 0.0]])
    model = signal.convolve2d(foc, CCDdata, mode='same')
    #save model
    fileIO.writeFITS(model, 'TESTkernel.fits', int=False)
def testCTIinclusion(log, sigma=0.75, iterations=3, weighted=True,
                                   fixedPosition=False, fixedX=None, fixedY=None):
    #reset settings
    settings = dict(sigma=sigma, iterations=iterations, weighted=weighted, fixedX=fixedX, fixedY=fixedY,
                    fixedPosition=fixedPosition)

    #Thibauts data
    folder = '//Users/sammy/EUCLID/CTItesting/Reconciliation/'
    wcti = pf.getdata(folder + 'damaged_image_parallel.fits')
    wocti = pf.getdata(folder +
                       'galaxy_100mas_dist2_q=0.9568_re=22.2670_theta=-1.30527_norm=1000_dx=0.2274_dy=0.2352.fits')

    wocti /= np.max(wocti)
    wocti *= 420.

    sh = shape.shapeMeasurement(wcti, log, **settings)
    wctiresults = sh.measureRefinedEllipticity()

    #include CTI with my recipe
    ctiMSSL = addCTI(wocti.copy()).T
    ctiThibault = addCTI(wocti.copy(), thibautCDM03=True).T

    sh = shape.shapeMeasurement(ctiMSSL, log, **settings)
    wMSSLctiresults = sh.measureRefinedEllipticity()

    sh = shape.shapeMeasurement(ctiThibault, log, **settings)
    wThibautctiresults = sh.measureRefinedEllipticity()

    fileIO.writeFITS(ctiThibault, 'tmp2.fits', int=False)
    fileIO.writeFITS(wcti/ctiThibault, 'tmp3.fits', int=False)

    for key in wctiresults:
        tmp1 = wctiresults[key] - wMSSLctiresults[key]
        tmp2 = wctiresults[key] - wThibautctiresults[key]
        if 'Gaussian' in key:
            print key, np.max(np.abs(tmp1)), np.max(np.abs(tmp2))
        else:
            print key, tmp1, tmp2
def deriveBasisSetsRandomizedPCA(data, cut, outfolder, components=10, whiten=False):
    """
    Derives a basis set from input data using Randomized Principal component analysis (PCA).
    Saves the basis sets to a FITS file for further processing.

    Information about PCA can be found from the scikit-learn website:
    http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.RandomizedPCA.html#sklearn.decomposition.RandomizedPCA

    :param data: input data from which the basis set are derived from. The input data must be an array of arrays.
                 Each array should describe an independent data set that has been flatted to 1D.
    :type data: ndarray
    :param cut: size of the cutout region that has been used
    :type cut: int
    :param outfolder: name of the output folder e.g. 'output'
    :type outfolder: str
    :param components: the number of basis set function components to derive
    :type components: int
    :param whiten: When True (False by default) the components_ vectors are divided by n_samples times
                   singular values to ensure uncorrelated outputs with unit component-wise variances.
    :type whiten: bool

    :return: Randomized PCA components
    """
    pca = decomposition.RandomizedPCA(n_components=components, whiten=whiten)
    pca.fit(data)
    image = pca.components_

    #output the variance ratio
    print 'Variance Ratio:', pca.explained_variance_ratio_*100.

    #save each component to a FITS file
    for i, img in enumerate(image):
        image = img.reshape(cut, cut)
        #to compare IDL results
        #image = -image
        fileIO.writeFITS(image, outfolder + '/RandomPCAbasis%03d.fits' % (i+1),  int=False)
    return image
def convertMatlabToFITS(suffix='.mat'):
    """
    Converts all matlab files with a given suffix within the current working directory to the FITS format.
    Uses SciPy.io to read the matlab files.

    :return: None
    """
    import scipy.io

    for root, dirs, files in os.walk(os.getcwd()):
        print 'Root directory to process is %s \n' % root
        for f in files:
            #only process files that end with the suffix
            if f.endswith(suffix):
                tmp = root+'/'+f.replace(' ', '').replace(suffix, '.fits')
                #only process if the FITS file does not exist
                if not os.path.isfile(tmp):
                    input = root+'/'+f
                    print 'Processing file', input
                    i = scipy.io.loadmat(input)['PSF']

                    if i is not None:
                        fileIO.writeFITS(i, tmp, int=False)
                        plotImage(i, tmp.replace('.fits', '.pdf'))
def simpleTest(log, sigma=0.75, iterations=50):
    #Thibauts data
    folder = '/Users/sammy/EUCLID/CTItesting/uniform/'
    wcti = pf.getdata(folder +
                      'galaxy_100mas_dist2_q=0.5078_re=6.5402_theta=0.91895_norm=1000_dx=0.3338_dy=0.0048CTI.fits')
    wocti = pf.getdata(folder +
                       'galaxy_100mas_dist2_q=0.5078_re=6.5402_theta=0.91895_norm=1000_dx=0.3338_dy=0.0048noCTI.fits')

    #reset settings
    settings = dict(sigma=sigma, iterations=iterations)

    #calculate shapes
    sh = shape.shapeMeasurement(wcti, log, **settings)
    wctiresults = sh.measureRefinedEllipticity()

    sh = shape.shapeMeasurement(wocti, log, **settings)
    woctiresults = sh.measureRefinedEllipticity()

    #include CTI with my recipe
    ctiMSSL = addCTI(wocti.copy())
    ctiThibault = addCTI(wocti.copy(), thibautCDM03=True)

    sh = shape.shapeMeasurement(ctiMSSL, log, **settings)
    wMSSLctiresults = sh.measureRefinedEllipticity()

    sh = shape.shapeMeasurement(ctiThibault, log, **settings)
    wThibautctiresults = sh.measureRefinedEllipticity()

    fileIO.writeFITS(ctiMSSL, 'tmp1.fits', int=False)
    fileIO.writeFITS(ctiThibault, 'tmp2.fits', int=False)
    fileIO.writeFITS(wcti/ctiMSSL, 'tmp3.fits', int=False)

    for key in wctiresults:
        tmp1 = wctiresults[key] - wMSSLctiresults[key]
        tmp2 = wctiresults[key] - wThibautctiresults[key]
        if 'Gaussian' in key:
            print key, np.max(np.abs(tmp1)), np.max(np.abs(tmp2))
        else:
            print key, tmp1, tmp2
def forwardModel(file, out='Data', wavelength=None, gain=3.1, size=10, burn=500, spotx=2888, spoty=3514, run=700,
                 simulation=False, truths=None):
    """
    Forward models the spot data found from the input file. Can be used with simulated and real data.

    Notes:
    - emcee is run three times as it is important to have a good starting point for the final run.
    """
    print '\n\n\n'
    print '_'*120
    print 'Processing:', file
    #get data and convert to electrons
    o = pf.getdata(file)*gain

    if simulation:
        data = o
    else:
        #roughly the correct location - to avoid identifying e.g. cosmic rays
        data = o[spoty-(size*3):spoty+(size*3)+1, spotx-(size*3):spotx+(size*3)+1].copy()

    #maximum position within the cutout
    y, x = m.maximum_position(data)

    #spot and the peak pixel within the spot, this is also the CCD kernel position
    spot = data[y-size:y+size+1, x-size:x+size+1].copy()
    CCDy, CCDx = m.maximum_position(spot)
    print 'CCD Kernel Position (within the postage stamp):', CCDx, CCDy

    #bias estimate
    if simulation:
        bias = 9000.
        rn = 4.5
    else:
        bias = np.median(o[spoty-size: spoty+size, spotx-220:spotx-20]) #works for read o
        rn = np.std(o[spoty-size: spoty+size, spotx-220:spotx-20])

    print 'Readnoise (e):', rn
    if rn < 2. or rn > 6.:
        print 'NOTE: suspicious readout noise estimate...'
    print 'ADC offset (e):', bias

    #remove bias
    spot -= bias

    #save to file
    fileIO.writeFITS(spot, out+'small.fits', int=False)

    #make a copy ot generate error array
    data = spot.copy().flatten()
    #assume that uncertanties scale as sqrt of the values + readnoise
    #sigma = np.sqrt(data/gain + rn**2)
    tmp = data.copy()
    tmp[tmp + rn**2 < 0.] = 0.  #set highly negative values to zero
    var = tmp.copy() + rn**2
    #Gary B. said that actually this should be from the model or is biased,
    #so I only pass the readout noise part now

    #fit a simple model
    print 'Least Squares Fitting...'
    gaus = models.Gaussian2D(spot.max(), size, size, x_stddev=0.5, y_stddev=0.5)
    gaus.theta.fixed = True  #fix angle
    p_init = gaus
    fit_p = fitting.LevMarLSQFitter()
    stopy, stopx = spot.shape
    X, Y = np.meshgrid(np.arange(0, stopx, 1), np.arange(0, stopy, 1))
    p = fit_p(p_init, X, Y, spot)
    print p
    model = p(X, Y)
    fileIO.writeFITS(model, out+'BasicModel.fits', int=False)
    fileIO.writeFITS(model - spot, out+'BasicModelResidual.fits', int=False)

    #goodness of fit
    gof = (1./(np.size(data) - 5.)) * np.sum((model.flatten() - data)**2 / var)
    print 'GoF:', gof
    print 'Done\n\n'

    #maximum value
    max = np.max(spot)
    peakrange = (0.9*max, 1.7*max)
    sum = np.sum(spot)

    print 'Maximum Value:', max
    print 'Sum of the values:', sum
    print 'Peak Range:', peakrange

    #MCMC based fitting
    print 'Bayesian Model Fitting...'
    nwalkers = 1000

    # Initialize the sampler with the chosen specs.
    #Create the coordinates x and y
    x = np.arange(0, spot.shape[1])
    y = np.arange(0, spot.shape[0])
    #Put the coordinates in a mesh
    xx, yy = np.meshgrid(x, y)

    #Flatten the arrays
    xx = xx.flatten()
    yy = yy.flatten()

    print 'Fitting full model...'
    ndim = 7

    #Choose an initial set of positions for the walkers - fairly large area not to bias the results
    p0 = np.zeros((nwalkers, ndim))
    #peak, center_x, center_y, radius, focus, width_x, width_y = theta
    p0[:, 0] = np.random.normal(max, max/100., size=nwalkers)                 # peak value
    p0[:, 1] = np.random.normal(p.x_mean.value, 0.1, size=nwalkers)           # x
    p0[:, 2] = np.random.normal(p.y_mean.value, 0.1, size=nwalkers)           # y
    print 'Using initial guess [radius, focus, width_x, width_y]:', [1.5, 0.6, 0.02, 0.03]
    p0[:, 3] = np.random.normal(1.5, 0.01, size=nwalkers)                   # radius
    p0[:, 4] = np.random.normal(0.6, 0.01, size=nwalkers)                    # focus
    p0[:, 5] = np.random.normal(0.02, 0.0001, size=nwalkers)                   # width_x
    p0[:, 6] = np.random.normal(0.03, 0.0001, size=nwalkers)                   # width_y

    #initiate sampler
    pool = Pool(cores) #A hack Dan gave me to not have ghost processes running as with threads keyword
    #sampler = emcee.EnsembleSampler(nwalkers, ndim, log_posterior, args=[xx, yy, data, var, peakrange, spot.shape],
    sampler = emcee.EnsembleSampler(nwalkers, ndim, log_posterior,
                                    args=[xx, yy, data, rn**2, peakrange, spot.shape],
                                    pool=pool)

    # Run a burn-in and set new starting position
    print "Burning-in..."
    pos, prob, state = sampler.run_mcmc(p0, burn)
    maxprob_index = np.argmax(prob)
    params_fit = pos[maxprob_index]
    print "Mean acceptance fraction:", np.mean(sampler.acceptance_fraction)
    print 'Estimate:', params_fit
    sampler.reset()

    print "Running MCMC..."
    pos, prob, state = sampler.run_mcmc(pos, run, rstate0=state)
    print "Mean acceptance fraction:", np.mean(sampler.acceptance_fraction)

    #Get the index with the highest probability
    maxprob_index = np.argmax(prob)

    #Get the best parameters and their respective errors and print best fits
    params_fit = pos[maxprob_index]
    errors_fit = [sampler.flatchain[:,i].std() for i in xrange(ndim)]
    _printResults(params_fit, errors_fit)

    #Best fit model
    peak, center_x, center_y, radius, focus, width_x, width_y = params_fit
    amplitude = _amplitudeFromPeak(peak, center_x, center_y, radius, x_0=CCDx, y_0=CCDy)
    airy = models.AiryDisk2D(amplitude, center_x, center_y, radius)
    adata = airy.eval(xx, yy, amplitude, center_x, center_y, radius).reshape(spot.shape)
    f = models.Gaussian2D(1., center_x, center_y, focus, focus, 0.)
    focusdata = f.eval(xx, yy, 1., center_x, center_y, focus, focus, 0.).reshape(spot.shape)
    foc = signal.convolve2d(adata, focusdata, mode='same')
    CCDdata = np.array([[0.0, width_y, 0.0],
                        [width_x, (1.-width_y-width_y-width_x-width_x), width_x],
                        [0.0, width_y, 0.0]])
    fileIO.writeFITS(CCDdata, 'kernel.fits', int=False)
    model = signal.convolve2d(foc, CCDdata, mode='same')
    #save model
    fileIO.writeFITS(model, out+'model.fits', int=False)

    #residuals
    fileIO.writeFITS(model - spot, out+'residual.fits', int=False)
    fileIO.writeFITS(((model - spot)**2 / var.reshape(spot.shape)), out+'residualSQ.fits', int=False)

    # a simple goodness of fit
    gof = (1./(np.size(data) - ndim)) * np.sum((model.flatten() - data)**2 / var)
    maxdiff = np.max(np.abs(model - spot))
    print 'GoF:', gof, ' Maximum difference:', maxdiff
    if maxdiff > 2e3 or gof > 4.:
        print '\nFIT UNLIKELY TO BE GOOD...\n'
    print 'Amplitude estimate:', amplitude

    #plot
    samples = sampler.chain.reshape((-1, ndim))
    extents = None
    if simulation:
        extents = [(0.91*truth, 1.09*truth) for truth in truths]
        extents[1] = (truths[1]*0.995, truths[1]*1.005)
        extents[2] = (truths[2]*0.995, truths[2]*1.005)
        extents[3] = (0.395, 0.425)
        extents[4] = (0.503, 0.517)
        truths[0] = _peakFromTruth(truths)
        print truths
    fig = triangle.corner(samples,
                          labels=['peak', 'x', 'y', 'radius', 'focus', 'width_x', 'width_y'],
                          truths=truths)#, extents=extents)
    fig.savefig(out+'Triangle.png')
    plt.close()
    pool.close()
def forwardModelTest(file, CCDPSFmodel='Gaus', out='Data', gain=3.1, size=10, spotx=2888, spoty=3514,
                     burn=100, run=200, nwalkers=1000):
    """
    A single file to quickly test if the method works
    """
    #get data and convert to electrons
    print '\n\n\n'
    print '_'*120
    print 'Processing:', file

    o = pf.getdata(file)*gain

    #roughly the correct location - to avoid identifying e.g. cosmic rays
    data = o[spoty-(size*3):spoty+(size*3)+1, spotx-(size*3):spotx+(size*3)+1].copy()

    #maximum position within the cutout
    y, x = m.maximum_position(data)

    #spot and the peak pixel within the spot, this is also the CCD kernel position
    spot = data[y-size:y+size+1, x-size:x+size+1].copy()
    CCDy, CCDx = m.maximum_position(spot)

    bias = np.median(o[spoty-size: spoty+size, spotx-220:spotx-20]) #works for read o
    rn = np.std(o[spoty-size: spoty+size, spotx-220:spotx-20])

    print 'Readnoise (e):', rn
    if rn < 2. or rn > 6.:
        print 'NOTE: suspicious readout noise estimate...'
    print 'ADC offset (e):', bias

    #remove bias
    spot -= bias

    #save to file
    fileIO.writeFITS(spot, out+'small.fits', int=False)

    #make a copy ot generate error array
    data = spot.copy().flatten()
    data[data + rn**2 < 0.] = 0.  #set highly negative values to zero
    #assume errors scale as sqrt of the values + readnoise
    sigma = np.sqrt(data + rn**2)
    #variance is the true noise model
    var = sigma**2

    #maximum value
    max = np.max(spot)
    print 'Maximum Value:', max

    #fit a simple model
    print 'Least Squares Fitting...'
    gaus = models.Gaussian2D(spot.max(), size, size, x_stddev=0.5, y_stddev=0.5)
    gaus.theta.fixed = True  #fix angle
    p_init = gaus
    fit_p = fitting.LevMarLSQFitter()
    stopy, stopx = spot.shape
    X, Y = np.meshgrid(np.arange(0, stopx, 1), np.arange(0, stopy, 1))
    p = fit_p(p_init, X, Y, spot)
    print p
    model = p(X, Y)

    fileIO.writeFITS(model, out+'BasicModelG.fits', int=False)
    fileIO.writeFITS(model - spot, out+'BasicModelResidualG.fits', int=False)

    airy = models.AiryDisk2D(spot.max(), size, size, 0.6)
    p_init = airy
    fit_p = fitting.LevMarLSQFitter()
    a = fit_p(p_init, X, Y, spot)
    print a
    model = p(X, Y)

    fileIO.writeFITS(model, out+'BasicModelA.fits', int=False)
    fileIO.writeFITS(model - spot, out+'BasicModelResidualA.fits', int=False)

    #goodness of fit
    gof = (1./(len(data)-5.)) * np.sum((model.flatten() - data)**2 / var)
    print 'GoF:', gof
    print 'Done'

    #MCMC based fitting
    if 'Gaus' in CCDPSFmodel:
        ndim = 7
        print 'Model with a Gaussian CCD PSF, %i dimensions' % ndim

        #Choose an initial set of positions for the walkers - fairly large area not to bias the results
        #amplitude, center_x, center_y, radius, focus, width_x, width_y = theta
        p0 = np.zeros((nwalkers, ndim))
        p0[:, 0] = np.random.uniform(max, 2.*max, size=nwalkers)     # amplitude
        p0[:, 1] = np.random.uniform(7., 14., size=nwalkers)         # x
        p0[:, 2] = np.random.uniform(7., 14., size=nwalkers)         # y
        p0[:, 3] = np.random.uniform(.1, 1., size=nwalkers)          # radius
        p0[:, 4] = np.random.uniform(.1, 1., size=nwalkers)          # focus
        p0[:, 5] = np.random.uniform(.1, 0.5, size=nwalkers)         # width_x
        p0[:, 6] = np.random.uniform(.1, 0.5, size=nwalkers)         # width_y

        # Initialize the sampler with the chosen specs.
        #Create the coordinates x and y
        x = np.arange(0, spot.shape[1])
        y = np.arange(0, spot.shape[0])
        #Put the coordinates in a mesh
        xx, yy = np.meshgrid(x, y)

        #Flatten the arrays
        xx = xx.flatten()
        yy = yy.flatten()

        #initiate sampler
        pool = Pool(7) #A hack Dan gave me to not have ghost processes running as with threads keyword
        sampler = emcee.EnsembleSampler(nwalkers, ndim, log_posteriorG, args=[xx, yy, data, var], pool=pool)

        # Run a burn-in and set new starting position
        print "Burning-in..."
        pos, prob, state = sampler.run_mcmc(p0, burn)
        best_pos = sampler.flatchain[sampler.flatlnprobability.argmax()]
        pos = emcee.utils.sample_ball(best_pos, best_pos/100., size=nwalkers)
        # Reset the chain to remove the burn-in samples.
        sampler.reset()

        # Starting from the final position in the burn-in chain
        print "Running MCMC..."
        pos, prob, state = sampler.run_mcmc(pos, burn)
        sampler.reset()
        pos, prob, state = sampler.run_mcmc(pos, run, rstate0=state)

        # Print out the mean acceptance fraction
        print "Mean acceptance fraction:", np.mean(sampler.acceptance_fraction)

        #Get the index with the highest probability
        maxprob_index = np.argmax(prob)

        #Get the best parameters and their respective errors and print best fits
        params_fit = pos[maxprob_index]
        errors_fit = [sampler.flatchain[:,i].std() for i in xrange(ndim)]
        _printResults2(params_fit, errors_fit, model=CCDPSFmodel)

        #Best fit model
        amplitude, center_x, center_y, radius, focus, width_x, width_y = params_fit
        airy = models.AiryDisk2D(amplitude, center_x, center_y, radius)
        adata = airy.eval(xx, yy, amplitude, center_x, center_y, radius).reshape(spot.shape)
        f = models.Gaussian2D(1., center_x, center_y, focus, focus, 0.)
        focusdata = f.eval(xx, yy, 1., center_x, center_y, focus, focus, 0.).reshape(spot.shape)
        foc = signal.convolve2d(adata, focusdata, mode='same')
        CCD = models.Gaussian2D(1., CCDx, CCDy, width_x, width_y, 0.)
        CCDdata = CCD.eval(xx, yy, 1., CCDx, CCDy, width_x, width_y, 0.).reshape(spot.shape)
        model = signal.convolve2d(foc, CCDdata, mode='same')
        #save model
        fileIO.writeFITS(model, out+'model.fits', int=False)

        #residuals
        fileIO.writeFITS(model - spot, out+'residual.fits', int=False)
        fileIO.writeFITS(((model-spot)**2 / var.reshape(spot.shape)), out+'residualSQ.fits', int=False)

        #results
        _printFWHM(width_x, width_y, errors_fit[5], errors_fit[6])

        #plot
        samples = sampler.chain[:, burn:, :].reshape((-1, ndim))
        fig = triangle.corner(samples,
                              labels=['amplitude', 'center_x', 'center_y', 'radius', 'focus', 'width_x', 'width_y'])
        fig.savefig(out+'Triangle.png')

    elif 'Cross' in CCDPSFmodel:
        ndim = 8
        print 'Model with a Cross CCD PSF, %i dimensions' % ndim

        #amplitude, center_x, center_y, radius, focus, width_x, width_y, width_d = theta
        # Choose an initial set of positions for the walkers using the Gaussian fit
        p0 = [np.asarray([1.3*max,#p.amplitude.value,
                          p.x_mean.value,
                          p.y_mean.value,
                          np.max([p.x_stddev.value, p.y_stddev.value]),
                          0.5,
                          0.08,
                          0.1,
                          0.01]) + 1e-3*np.random.randn(ndim) for i in xrange(nwalkers)]

        # Initialize the sampler with the chosen specs.
        #Create the coordinates x and y
        x = np.arange(0, spot.shape[1])
        y = np.arange(0, spot.shape[0])
        #Put the coordinates in a mesh
        xx, yy = np.meshgrid(x, y)

        #Flatten the arrays
        xx = xx.flatten()
        yy = yy.flatten()

        #initiate sampler
        pool = Pool(7) #A hack Dan gave me to not have ghost processes running as with threads keyword
        sampler = emcee.EnsembleSampler(nwalkers, ndim, log_posteriorC, args=[xx, yy, data, var], pool=pool)


        # Run a burn-in and set new starting position
        print "Burning-in..."
        pos, prob, state = sampler.run_mcmc(p0, burn)
        best_pos = sampler.flatchain[sampler.flatlnprobability.argmax()]
        pos = emcee.utils.sample_ball(best_pos, best_pos/100., size=nwalkers)
        # Reset the chain to remove the burn-in samples.
        sampler.reset()

        # Starting from the final position in the burn-in chain
        print "Running MCMC..."
        pos, prob, state = sampler.run_mcmc(pos, burn)
        sampler.reset()
        pos, prob, state = sampler.run_mcmc(pos, run, rstate0=state)

        # Print out the mean acceptance fraction
        print "Mean acceptance fraction:", np.mean(sampler.acceptance_fraction)

        #Get the index with the highest probability
        maxprob_index = np.argmax(prob)

        #Get the best parameters and their respective errors and print best fits
        params_fit = pos[maxprob_index]
        errors_fit = [sampler.flatchain[:,i].std() for i in xrange(ndim)]
        _printResults2(params_fit, errors_fit, model=CCDPSFmodel)

        #Best fit model
        amplitude, center_x, center_y, radius, focus, width_x, width_y, width_d = params_fit
        airy = models.AiryDisk2D(amplitude, center_x, center_y, radius)
        adata = airy.eval(xx, yy, amplitude, center_x, center_y, radius).reshape(spot.shape)
        f = models.Gaussian2D(1., center_x, center_y, focus, focus, 0.)
        focusdata = f.eval(xx, yy, 1., center_x, center_y, focus, focus, 0.).reshape(spot.shape)
        foc = signal.convolve2d(adata, focusdata, mode='same')

        #3)Apply CCD diffusion kernel
        kernel = np.array([[width_d, width_y, width_d],
                           [width_x, 1., width_x],
                           [width_d, width_y, width_d]])
        kernel /= kernel.sum()
        model = signal.convolve2d(foc, kernel, mode='same')

        #save model
        fileIO.writeFITS(model, out+'model.fits', int=False)

        #residuals
        fileIO.writeFITS(model - spot, out+'residual.fits', int=False)
        fileIO.writeFITS(((model-spot)**2 / var.reshape(spot.shape)), out+'residualSQ.fits', int=False)

        #results
        print kernel
        gaus = models.Gaussian2D(kernel.max(), 1.5, 1.5, x_stddev=0.3, y_stddev=0.3)
        gaus.theta.fixed = True
        p_init = gaus
        fit_p = fitting.LevMarLSQFitter()
        stopy, stopx = kernel.shape
        X, Y = np.meshgrid(np.arange(0, stopx, 1), np.arange(0, stopy, 1))
        p = fit_p(p_init, X, Y, kernel)
        #print p
        _printFWHM(p.x_stddev.value, p.y_stddev.value, errors_fit[5], errors_fit[6])

        #plot
        samples = sampler.chain[:, burn:, :].reshape((-1, ndim))
        fig = triangle.corner(samples,
                              labels=['amplitude', 'center_x', 'center_y', 'radius', 'focus', 'width_x', 'width_y', 'width_d'])
        fig.savefig(out+'Triangle.png')

    # a simple goodness of fit
    gof = (1./(len(data)-ndim)) * np.sum((model.flatten() - data)**2 / var)
    print 'GoF:', gof, ' Maximum difference:', np.max(np.abs(model - spot))
def forwardModel(file, out='Data', gain=3.1, size=10, burn=20, spotx=2888, spoty=3514, run=50,
                 simulation=False, truths=None):
    """
    Forward models the spot data found from the input file. Can be used with simulated and real data.

    Notes:
    - The emcee is run three times as it is important to have a good starting point for the final run.
    - It is very important to have the amplitude well estimated, otherwise it is difficult to get good parameter estimates.
    """
    print '\n\n\n'
    print '_'*120
    print 'Processing:', file
    #get data and convert to electrons
    o = pf.getdata(file)*gain

    if simulation:
        data = o
    else:
        #roughly the correct location - to avoid identifying e.g. cosmic rays
        data = o[spoty-(size*3):spoty+(size*3)+1, spotx-(size*3):spotx+(size*3)+1].copy()

    #maximum position within the cutout
    y, x = m.maximum_position(data)

    #spot and the peak pixel within the spot, this is also the CCD kernel position
    spot = data[y-size:y+size+1, x-size:x+size+1].copy()
    CCDy, CCDx = m.maximum_position(spot)
    print 'CCD Kernel Position (within the postage stamp):', CCDx, CCDy

    #bias estimate
    if simulation:
        bias = 9000.
        rn = 4.5
    else:
        bias = np.median(o[spoty-size: spoty+size, spotx-220:spotx-20]) #works for read o
        rn = np.std(o[spoty-size: spoty+size, spotx-220:spotx-20])

    print 'Readnoise (e):', rn
    if rn < 2. or rn > 6.:
        print 'NOTE: suspicious readout noise estimate...'
    print 'ADC offset (e):', bias

    #remove bias
    spot -= bias

    #save to file
    fileIO.writeFITS(spot, out+'small.fits', int=False)

    #make a copy ot generate error array
    data = spot.copy().flatten()
    data[data + rn**2 < 0.] = 0.  #set highly negative values to zero
    #assume errors scale as sqrt of the values + readnoise
    #sigma = np.sqrt(data/gain + rn**2)
    var = data.copy() + rn**2

    #maximum value
    max = np.max(spot)
    print 'Maximum Value:', max

    #MCMC based fitting
    print 'Bayesian Fitting...'
    ndim = 7
    nwalkers = 1000

    #Choose an initial set of positions for the walkers - fairly large area not to bias the results
    #amplitude, center_x, center_y, radius, focus, width_x, width_y = theta
    p0 = np.zeros((nwalkers, ndim))
    p0[:, 0] = np.random.uniform(max, 2.*max, size=nwalkers)     # amplitude
    p0[:, 1] = np.random.uniform(7., 14., size=nwalkers)         # x
    p0[:, 2] = np.random.uniform(7., 14., size=nwalkers)         # y
    p0[:, 3] = np.random.uniform(.1, 1., size=nwalkers)          # radius
    p0[:, 4] = np.random.uniform(.1, 1., size=nwalkers)          # focus
    p0[:, 5] = np.random.uniform(.1, 0.5, size=nwalkers)         # width_x
    p0[:, 6] = np.random.uniform(.1, 0.5, size=nwalkers)         # width_y

    # Initialize the sampler with the chosen specs.
    #Create the coordinates x and y
    x = np.arange(0, spot.shape[1])
    y = np.arange(0, spot.shape[0])
    #Put the coordinates in a mesh
    xx, yy = np.meshgrid(x, y)

    #Flatten the arrays
    xx = xx.flatten()
    yy = yy.flatten()

    #initiate sampler
    pool = Pool(7) #A hack Dan gave me to not have ghost processes running as with threads keyword
    sampler = emcee.EnsembleSampler(nwalkers, ndim, log_posterior, args=[xx, yy, data, var], pool=pool)

    # Run a burn-in and set new starting position
    print "Burning-in..."
    pos, prob, state = sampler.run_mcmc(p0, burn)
    best_pos = sampler.flatchain[sampler.flatlnprobability.argmax()]
    pos = emcee.utils.sample_ball(best_pos, best_pos/100., size=nwalkers)
    # Reset the chain to remove the burn-in samples.
    sampler.reset()

    # Starting from the final position in the burn-in chain
    print "Running MCMC..."
    pos, prob, state = sampler.run_mcmc(pos, burn)
    sampler.reset()
    pos, prob, state = sampler.run_mcmc(pos, run, rstate0=state)

    # Print out the mean acceptance fraction
    print "Mean acceptance fraction:", np.mean(sampler.acceptance_fraction)

    #Get the index with the highest probability
    maxprob_index = np.argmax(prob)

    #Get the best parameters and their respective errors and print best fits
    params_fit = pos[maxprob_index]
    errors_fit = [sampler.flatchain[:,i].std() for i in xrange(ndim)]
    amplitudeE, center_xE, center_yE, radiusE, focusE, width_xE, width_yE = errors_fit
    _printResults(params_fit, errors_fit)

    #Best fit model
    amplitude, center_x, center_y, radius, focus, width_x, width_y = params_fit
    airy = models.AiryDisk2D(amplitude, center_x, center_y, radius)
    adata = airy.eval(xx, yy, amplitude, center_x, center_y, radius).reshape(spot.shape)
    f = models.Gaussian2D(1., center_x, center_y, focus, focus, 0.)
    focusdata = f.eval(xx, yy, 1., center_x, center_y, focus, focus, 0.).reshape(spot.shape)
    foc = signal.convolve2d(adata, focusdata, mode='same')
    CCD = models.Gaussian2D(1., CCDx, CCDy, width_x, width_y, 0.)
    CCDdata = CCD.eval(xx, yy, 1., CCDx, CCDy, width_x, width_y, 0.).reshape(spot.shape)
    model = signal.convolve2d(foc, CCDdata, mode='same')
    #save model
    fileIO.writeFITS(model, out+'model.fits', int=False)

    #residuals
    fileIO.writeFITS(model - spot, out+'residual.fits', int=False)
    fileIO.writeFITS(((model - spot)**2 / var.reshape(spot.shape)), out+'residualSQ.fits', int=False)

    # a simple goodness of fit
    gof = (1./(np.size(data) - ndim)) * np.sum((model.flatten() - data)**2 / var)
    print 'GoF:', gof, ' Maximum difference:', np.max(np.abs(model - spot))

    #results and save results
    _printFWHM(width_x, width_y, errors_fit[5], errors_fit[6])
    res = dict(wx=width_x, wy=width_y, wxerr=width_xE, wyerr=width_yE, out=out,
               peakvalue=max, CCDmodel=CCD, CCDmodeldata=CCDdata, GoF=gof)
    fileIO.cPickleDumpDictionary(res, out+'.pkl')

    #plot
    samples = sampler.chain.reshape((-1, ndim))
    extents = None
    if simulation:
        extents = [(0.91*truth, 1.09*truth) for truth in truths]
        extents[1] = (truths[1]*0.995, truths[1]*1.005)
        extents[2] = (truths[2]*0.995, truths[2]*1.005)
        extents[3] = (0.395, 0.425)
        extents[4] = (0.503, 0.517)
    fig = triangle.corner(samples,
                          labels=['amplitude', 'x', 'y', 'radius', 'focus', 'width_x', 'width_y'],
                          truths=truths)#, extents=extents)
    fig.savefig(out+'Triangle.png')

    pool.close()
def forwardModelJointFit(files, out, wavelength, gain=3.1, size=10, burn=50, run=100,
                         spotx=2888, spoty=3514, simulated=False, truths=None):
    """
    Forward models the spot data found from the input files. Models all data simultaneously so that the Airy
    disc centroid and shift from file to file. Assumes that the spot intensity, focus, and the CCD PSF kernel
    are the same for each file. Can be used with simulated and real data.
    """
    print '\n\n\n'
    print '_'*120

    images = len(files)
    orig = []
    image = []
    noise = []
    peakvalues = []
    for file in files:
        print file
        #get data and convert to electrons
        o = pf.getdata(file)*gain

        if simulated:
            data = o
        else:
            #roughly the correct location - to avoid identifying e.g. cosmic rays
            data = o[spoty-(size*3):spoty+(size*3)+1, spotx-(size*3):spotx+(size*3)+1].copy()

        #maximum position within the cutout
        y, x = m.maximum_position(data)

        #spot and the peak pixel within the spot, this is also the CCD kernel position
        spot = data[y-size:y+size+1, x-size:x+size+1].copy()
        orig.append(spot.copy())

        #bias estimate
        if simulated:
            bias = 9000.
            rn = 4.5
        else:
            bias = np.median(o[spoty-size: spoty+size, spotx-220:spotx-20])
            rn = np.std(o[spoty-size: spoty+size, spotx-220:spotx-20])

        print 'Readnoise (e):', rn
        if rn < 2. or rn > 6.:
            print 'NOTE: suspicious readout noise estimate...'
        print 'ADC offset (e):', bias

        #remove bias
        spot -= bias

        #set highly negative values to zero
        spot[spot + rn**2 < 0.] = 0.

        max = np.max(spot)
        print 'Maximum Value:', max
        peakvalues.append(max)

        #noise model
        variance = spot.copy() + rn**2

        #save to a list
        image.append(spot)
        noise.append(variance)

    #sensibility test, try to check if all the files in the fit are of the same dataset
    if np.std(peakvalues) > 5*np.sqrt(np.median(peakvalues)):
        #check for more than 5sigma outliers, however, this is very sensitive to the centroiding of the spot...
        print 'POTENTIAL OUTLIER, please check the input files...'
        print np.std(peakvalues), 5*np.sqrt(np.median(peakvalues))

    #MCMC based fitting
    ndim = 2*images + 5  #xpos, ypos for each image and single amplitude, radius, focus, and sigmaX and sigmaY
    nwalkers = 1000
    print 'Bayesian Fitting, model has %i dimensions' % ndim

    # Choose an initial set of positions for the walkers using the Gaussian fit
    p0 = np.zeros((nwalkers, ndim))
    for x in xrange(images):
        p0[:, 2*x] = np.random.uniform(7., 14., size=nwalkers)      # x
        p0[:, 2*x+1] = np.random.uniform(7., 14., size=nwalkers)    # y
    p0[:, -5] = np.random.uniform(max, 2.*max, size=nwalkers)       # amplitude
    p0[:, -4] = np.random.uniform(.1, 1., size=nwalkers)            # radius
    p0[:, -3] = np.random.uniform(.1, 1., size=nwalkers)            # focus
    p0[:, -2] = np.random.uniform(.1, 0.5, size=nwalkers)           # width_x
    p0[:, -1] = np.random.uniform(.1, 0.5, size=nwalkers)           # width_y

    # Initialize the sampler with the chosen specs.
    #Create the coordinates x and y
    x = np.arange(0, spot.shape[1])
    y = np.arange(0, spot.shape[0])
    #Put the coordinates in a mesh
    xx, yy = np.meshgrid(x, y)

    #Flatten the arrays
    xx = xx.flatten()
    yy = yy.flatten()

    #initiate sampler
    pool = Pool(7) #A hack Dan gave me to not have ghost processes running as with threads keyword
    sampler = emcee.EnsembleSampler(nwalkers, ndim, log_posteriorJoint, args=[xx, yy, image, noise], pool=pool)

    # Run a burn-in and set new starting position
    print "Burning-in..."
    pos, prob, state = sampler.run_mcmc(p0, burn)
    best_pos = sampler.flatchain[sampler.flatlnprobability.argmax()]
    pos = emcee.utils.sample_ball(best_pos, best_pos/100., size=nwalkers)
    # Reset the chain to remove the burn-in samples.
    sampler.reset()

    # Starting from the final position in the burn-in chain
    print "Running MCMC..."
    pos, prob, state = sampler.run_mcmc(pos, burn)
    sampler.reset()
    pos, prob, state = sampler.run_mcmc(pos, run, rstate0=state)

    # Print out the mean acceptance fraction
    print "Mean acceptance fraction:", np.mean(sampler.acceptance_fraction)

    #Get the index with the highest probability
    maxprob_index = np.argmax(prob)

    #Get the best parameters and their respective errors and print best fits
    params_fit = pos[maxprob_index]
    errors_fit = [sampler.flatchain[:,i].std() for i in xrange(ndim)]
    print params_fit

    #unpack the fixed parameters
    amplitude, radius, focus, width_x, width_y = params_fit[-5:]
    amplitudeE, radiusE, focusE, width_xE, width_yE = errors_fit[-5:]

    #print results
    _printFWHM(width_x, width_y, width_xE, width_yE)

    #save the best models per file
    size = size*2 + 1
    gofs = []
    for index, file in enumerate(files):
        #path, file = os.path.split(file)
        id = 'test/' + out + str(index)
        #X and Y are always in pairs
        center_x = params_fit[2*index]
        center_y = params_fit[2*index+1]

        #1)Generate a model Airy disc
        airy = models.AiryDisk2D(amplitude, center_x, center_y, radius)
        adata = airy.eval(xx, yy, amplitude, center_x, center_y, radius).reshape((size, size))

        #2)Apply Focus
        f = models.Gaussian2D(1., center_x, center_y, focus, focus, 0.)
        focusdata = f.eval(xx, yy, 1., center_x, center_y, focus, focus, 0.).reshape((size, size))
        model = signal.convolve2d(adata, focusdata, mode='same')

        #3)Apply CCD diffusion, approximated with a Gaussian
        CCD = models.Gaussian2D(1., size/2.-0.5, size/2.-0.5, width_x, width_y, 0.)
        CCDdata = CCD.eval(xx, yy, 1., size/2.-0.5, size/2.-0.5, width_x, width_y, 0.).reshape((size, size))
        model = signal.convolve2d(model, CCDdata, mode='same')

        #save the data, model and residuals
        fileIO.writeFITS(orig[index], id+'data.fits', int=False)
        fileIO.writeFITS(image[index], id+'datafit.fits', int=False)
        fileIO.writeFITS(model, id+'model.fits', int=False)
        fileIO.writeFITS(model - image[index], id+'residual.fits', int=False)
        fileIO.writeFITS(((model - image[index])**2 / noise[index]), id+'residualSQ.fits', int=False)

        #a simple goodness of fit
        gof = (1./(np.size(image[index])*images - ndim)) * np.sum((model - image[index])**2 / noise[index])
        print 'GoF:', gof, ' Max difference', np.max(np.abs(model - image[index]))
        gofs.append(gof)

    #save results
    res = dict(wx=width_x, wy=width_y, wxerr=width_xE, wyerr=width_yE, files=files, out=out,
               wavelength=wavelength, peakvalues=np.asarray(peakvalues), CCDmodel=CCD, CCDmodeldata=CCDdata,
               GoFs=gofs)
    fileIO.cPickleDumpDictionary(res, 'test/' + out + '.pkl')

    #plot
    samples = sampler.chain.reshape((-1, ndim))
    #extents = None
    #if simulated:
    #    extents = [(0.9*truth, 1.1*truth) for truth in truths]
    #    print extents
    fig = triangle.corner(samples, labels=['x', 'y']*images + ['amplitude', 'radius', 'focus', 'width_x', 'width_y'],
                          truths=truths)#, extents=extents)
    fig.savefig('test/' + out + 'Triangle.png')

    pool.close()
def fullQuadrantTestMultiTrapSpecies():
    """

    """
    #Thibauts data
    folder = '//Users/sammy/EUCLID/CTItesting/Reconciliation/multitrap/'
    wocti = pf.getdata(folder + 'no_cti.fits')

    #include CTI with my recipe
    ctiMSSLp = addCTI(wocti.copy(), locx=None, bcgr=None, parallel=1, serial=-1, quadrant=2, test=True, single=False)
    ctiMSSLs = addCTI(wocti.copy(), locx=None, bcgr=None, parallel=-1, serial=1, quadrant=2, test=True, single=False)
    ctiMSSLps = addCTI(wocti.copy(), locx=None, bcgr=None, parallel=1, serial=1, quadrant=2, test=True, single=False)

    #save images
    fileIO.writeFITS(ctiMSSLp, 'multitrap/ctiMSSLp.fits', int=False)
    fileIO.writeFITS(ctiMSSLs, 'multitrap/ctiMSSLs.fits', int=False)
    fileIO.writeFITS(ctiMSSLps, 'multitrap/ctiMSSLps.fits', int=False)

    #load Thibaut's data
    Tp = pf.getdata('multitrap/p_cti.fits')
    Ts = pf.getdata('multitrap/s_cti.fits')
    Tps = pf.getdata('multitrap/ps_cti.fits')

    #ratio images
    rp = ctiMSSLp/Tp
    rs = ctiMSSLs/Ts
    rps = ctiMSSLps/Tps
    fileIO.writeFITS(rp, 'multitrap/ctiMSSLdivTp.fits', int=False)
    fileIO.writeFITS(rs, 'multitrap/ctiMSSLdivTs.fits', int=False)
    fileIO.writeFITS(rps, 'multitrap/ctiMSSLdivTps.fits', int=False)
    print 'Parallel Ratio [max, min]:', rp.max(), rp.min()
    print 'Serial Ratio [max, min]:', rs.max(), rs.min()
    print 'Serial+Parallel Ratio [max, min]:', rps.max(), rps.min()

    print 'Checking arrays, parallel'
    np.testing.assert_array_almost_equal(ctiMSSLp, Tp, decimal=6, err_msg='', verbose=True)
    print 'Checking arrays, serial'
    np.testing.assert_array_almost_equal(ctiMSSLs, Ts, decimal=6, err_msg='', verbose=True)
    print 'Checking arrays, serial + parallel'
    np.testing.assert_array_almost_equal(ctiMSSLps, Tps, decimal=6, err_msg='', verbose=True)
def analyseSpotsFitting(files, gaussian=False, pixelvalues=False, bessel=True, maxfev=10000):
    """
    Analyse spot measurements using different fitting methods.

    :param files: names of the FITS files to analyse (should match the IDs)
    :param gaussian: whether or not to do a simple Gaussian fitting analysis
    :param pixelvalues: whether or not to plot pixel values on a grid
    :param bessel: whether or not to do a Bessel + Gaussian convolution analysis
    :param maxfev: maximum number of iterations in the least squares fitting

    :return: None
    """
    log = lg.setUpLogger('spots.log')
    log.info('Starting...')
    over = 24
    settings = dict(itereations=8)
    ids = fileIDs()

    d = {}
    for filename in files:
        tmp = readData(filename, crop=False)
        f = filename.replace('small.fits', '')
        d[f] = tmp

    if pixelvalues:
        #plot differrent pixel values
        plotPixelValues(d, ids)

    if gaussian:
        #fit simple Gaussians
        Gaussians = {}
        for f, im in d.iteritems():
            #horizontal direction
            sumH = np.sum(im, axis=0)
            Hfit = gaussianFit(sumH, initials=[np.max(sumH) - np.median(sumH), 8., 0.4, np.median(sumH)])
            plotLineFits(sumH, Hfit, f)

            #vertical direction
            sumV = np.sum(im, axis=1)
            Vfit = gaussianFit(sumV, initials=[np.max(sumV) - np.median(sumV), 8., 0.4, np.median(sumV)])
            plotLineFits(sumH, Hfit, f, horizontal=False)

            #2D gaussian
            tmp = im.copy() - np.median(im)
            twoD = fit.Gaussian2D(tmp, intials=[np.max(tmp), 7, 7, 0.4, 0.4])

            print f, Hfit['sigma'], twoD[4], Vfit['sigma'], twoD[3], int(np.max(im))
            Gaussians[f] = [Hfit['sigma'], twoD[4], Vfit['sigma'], twoD[3]]

        fileIO.cPickleDumpDictionary(Gaussians, 'SpotmeasurementsGaussian.pk')

        plotGaussianResults(Gaussians, ids, output='line')
        plotGaussianResults(Gaussians, ids, output='twoD', vals=[1, 3])

    if bessel:
        Gaussians = {}
        #Bessel + Gaussian
        hf = 8 * over
        for f, im in d.iteritems():
            #if '21_59_31s' not in f:
            #    continue

            #over sample the data, needed for convolution
            oversampled = ndimage.zoom(im.copy(), over, order=0)
            fileIO.writeFITS(oversampled, f+'block.fits', int=False)

            #find the centre in oversampled frame, needed for bessel and gives a starting point for fitting
            tmp = oversampled.copy() - np.median(oversampled)
            sh = shape.shapeMeasurement(tmp, log, **settings)
            results = sh.measureRefinedEllipticity()
            midx = results['centreX'] - 1.
            midy = results['centreY'] - 1.

            #generate 2D bessel and re-centre using the above centroid, normalize to the maximum image value and
            #save to a FITS file.
            bes = generateBessel(radius=0.45, oversample=over, size=16*over)
            shiftx = -midx + hf
            shifty = -midy + hf
            bes = ndimage.interpolation.shift(bes, [-shifty, -shiftx], order=0)
            bes /= np.max(bes)
            fileIO.writeFITS(bes, f+'bessel.fits', int=False)

            #check the residual with only the bessel and save to a FITS file
            t = ndimage.zoom(bes.copy(), 1./over, order=0)
            t /= np.max(t)
            fileIO.writeFITS(im.copy() - np.median(oversampled) - t*np.max(tmp), f+'residual.fits', int=False)
            fileIO.writeFITS(oversampled - bes.copy()*np.max(tmp), f+'residualOversampled.fits', int=False)

            #best guesses for fitting parameters
            params = [1., results['centreX'], results['centreY'], 0.5, 0.5]

            biassubtracted = im.copy() - np.median(oversampled)
            #error function is a convolution between a bessel function and 2D gaussian - data
            #note that the error function must be on low-res grid because it is the pixel values we try to match
            errfunc = lambda p: np.ravel(ndimage.zoom(signal.fftconvolve(fitf(*p)(*np.indices(tmp.shape)), bes.copy(), mode='same'), 1./over, order=0)*np.max(tmp) - biassubtracted.copy())

            #fit
            res = sp.optimize.leastsq(errfunc, params, full_output=True, maxfev=maxfev)

            #save the fitted residuals
            t = signal.fftconvolve(fitf(*res[0])(*np.indices(tmp.shape)), bes.copy(), mode='same')
            fileIO.writeFITS(res[2]['fvec'].reshape(im.shape), f+'residualFit.fits', int=False)
            fileIO.writeFITS(fitf(*res[0])(*np.indices(tmp.shape)), f+'gaussian.fits', int=False)
            fileIO.writeFITS(t, f+'BesselGausOversampled.fits', int=False)
            fileIO.writeFITS(ndimage.zoom(t, 1./over, order=0), f+'BesselGaus.fits', int=False)

            #print out the results and save to a dictionary
            print results['centreX'], results['centreY'], res[2]['nfev'], res[0]

            #sigmas are symmetric as the width of the fitting function is later squared...
            sigma1 = np.abs(res[0][3])
            sigma2 = np.abs(res[0][4])
            Gaussians[f] = [sigma1, sigma2]

        fileIO.cPickleDumpDictionary(Gaussians, 'SpotmeasurementsBesselGaussian.pk')

        #plot the findings
        plotGaussianResults(Gaussians, ids, output='Bessel', vals=[0, 1])
    xsize, ysize = 300, 300
    xcen, ycen = 150, 150
    sigmax = 27.25
    sigmay = 14.15

    #calculate ellipticity from Sigmas
    e = ellipticityFromSigmas(sigmax, sigmay)

    #generate a 2D gaussian with given properties...
    gaussian2d = Gaussian2D(xcen, ycen, xsize, ysize, sigmax, sigmay)

    #plot
    plot3D(gaussian2d)

    #write FITS file
    files.writeFITS(gaussian2d['Gaussian'], 'gaussian.fits')

    #calculate shape and printout results
    settings = dict(sigma=15., weighted=False)
    sh = shape.shapeMeasurement(gaussian2d['Gaussian'], log, **settings)
    results = sh.measureRefinedEllipticity()
    print
    pprint.pprint(results)
    print e, (e - results['ellipticity']) / e * 100.

    #generate a plot sigma vs ellipticity for a given Gaussian
    plotEllipticityDependency(gaussian2d['Gaussian'], e, log)

    #measureGaussianR2
    measureGaussianR2(log)
        #interpolate to new location
        #note however that sinc interpolation should probably be used instead of spline...
        shiftx = -midx + cenx
        shifty = -midy + ceny
        cutout = interpolation.shift(data, [shifty, shiftx], order=3)

        #take a cutout to match size
        my, mx = cutout.shape
        mx /= 2
        my /= 2
        cutout = cutout[my - cut:my + cut, mx - cut:mx + cut]

        #write output
        print 'Image %i shift' %(i+1), shiftx, shifty, np.argmax(cutout), cutout.shape
        fileIO.writeFITS(cutout, 'cutout'+file.replace('.fits.gz', '.fits'), int=False)

        all.append(cutout)

    #calculate the average PSF
    all = np.asarray(all)
    mean = np.mean(all, axis=0)
    fileIO.writeFITS(mean, 'averagePSF.fits', int=False)

    #write out residuals
    for file in g.glob('cutout*.fits'):
        data = pf.getdata(file)
        residual = data - mean
        fileIO.writeFITS(residual, file.replace('cutout', 'residual'), int=False)
        print file, np.max(residual), np.min(residual), np.std(residual)