Ejemplo n.º 1
0
def bi_linear_T_to_r_factory(T_cuts, lr_1, lr_2):
    """Factory that returns a function to maps T->r assuming a linear fit"""
    p1 = nlp.poly1d(lr_1)
    p2 = nlp.poly1d(lr_2)
    pc = nlp.poly1d(smooth_connect(*(T_cuts + lr_1 + lr_2)))

    def tmp(T):
        """Map T -> r using a bi linear fit with a cubic interpolation
        between them"""

        def local(T):
            if T < T_cuts[0]:
                return p1(T)
            elif T < T_cuts[1]:
                return pc(T)
            else:
                return p2(T)

        T = np.array(T)
        if len(T.shape) == 0:
            return local(T)
        else:
            return np.array([local(t) for t in T])

    return tmp
Ejemplo n.º 2
0
def Tchebichev_coeffs(ordre):
    #Avec des dictionnaires !
    dico_T = {}

    dico_T[0] = poly1d([1])
    dico_T[1] = poly1d([1, 0])

    x = poly1d([1, 0])

    for i in range(2, ordre + 1):
        dico_T[i] = (2 * x * dico_T[i - 1]) - (dico_T[i - 2])

    return dico_T
def lagrange(x, w, verbose=False):
    M = len(x)
    p = poly1d(0.0)
    for j in xrange(M):
        pt = poly1d(w[j])
        for k in xrange(M):
            if k == j: continue
            fac = x[j]-x[k]
            pt *= poly1d([1.0,-x[k]])/fac
        p += pt

    if verbose:
        print("Lagrangerov interpolacny polynom je:\n{}".format(p))
    else:
        print(p)
    return p
Ejemplo n.º 4
0
def seprate(numberator, roots):
    # seprates a fraction
    tempRes = []
    for i in roots:
        temp = P.poly1d([complex(1, 0)])
        for j in roots:
            if not i == j:
                temp = temp * P.poly1d([complex(1, 0), -j])
        tempRes += [P.polyval(numberator, i) / P.polyval(temp, i)]
    res = []
    for i in range(0, len(tempRes)):
        s = tempRes[i]
        m = P.poly1d([complex(1, 0), -roots[i]])
        k = [s, roots[i]]
        res += [k]
    return res
Ejemplo n.º 5
0
def calcError(h,x,y,size_list):
    b = np.array(h)
    error= 0
    p = poly1d(b)
    for j in range(0,size_list):
        error += (math.pow((p(x[j]) - y[j]),2))
    error= error/size_list
    return error
Ejemplo n.º 6
0
def calcError(h, x, y, size_list):
    b = np.array(h)
    error = 0
    p = poly1d(b)
    for j in range(0, size_list):
        error += (math.pow((p(x[j]) - y[j]), 2))
    error = error / size_list
    return error
Ejemplo n.º 7
0
def mypolyfit(x, y, order=1, verbose=1):
    """
    coeff, yfit = mypolyfit(x,y,order=1, verbose=1)
    """
    from numpy.lib.polynomial import polyfit, poly1d
    coeffs = polyfit(x, y, order)
    polyModel = poly1d(coeffs)
    if verbose: print("Fit coeffs:", coeffs)
    return coeffs, polyModel(x), polyModel
Ejemplo n.º 8
0
def getQ():
    # this function gets the numberator Coefficients
    res = []
    n = int(input("numberator degree +1 :"))
    for i in range(0, n):
        re = int(input("enter real part for z**" + str(n - i - 1) + ":"))
        im = int(input("enter imaginary part for z**" + str(n - i - 1) + ":"))
        res = res + [complex(re, im)]
    resault = P.poly1d(res)
    return resault
Ejemplo n.º 9
0
def interpolation_Lagrange(listeX, listeY):
    rc = poly1d([])
    nbr = 0
    for i in (listeX):
        temp = pi_Lagrange(i, listeX)
        #print "temp =", temp
        #print "Y act =", listeY[nbr]
        rc = rc + (listeY[nbr] * temp)
        nbr = nbr + 1
    return rc
Ejemplo n.º 10
0
def primitive(polynome):
    polynome_primitive = poly1d([])
    p = 1
    for i in range(0, polynome.order + 1):
        temp = polynome[i] * 1 / p
        polynome_primitive[i + 1] = temp

        p = p + 1

    return polynome_primitive
Ejemplo n.º 11
0
def removeZero(denominator, roots):
    # deletes (z-0) from Denominator and counts them
    global zeroCount
    counter = 0
    for i in roots:
        if i == complex(0, 0):
            denominator = P.polydiv(denominator, P.poly1d([complex(1, 0), 0]))
            denominator = denominator[0]
            roots = roots[:counter] + roots[counter + 1:]
            zeroCount += 1
        counter += 1
    return [denominator, roots]
Ejemplo n.º 12
0
def pi_Lagrange(xi, listeX):
    rc = 1
    X = poly1d([1, 0])
    for element in listeX:

        if element != xi:
            numerateur = (X - element)
            denominateur = (xi - element)
            temp = (numerateur / denominateur)
            rc = rc * temp
        else:
            continue
    return rc
Ejemplo n.º 13
0
    def get_level_parameters(cls, level):
        """

        :param int level: Liczba całkowita większa od jendości.
        :return: Zwraca listę współczynników dla poszczególnych puktów
                 w metodzie NC. Na przykład metoda NC stopnia 2 używa punktów
                 na początku i końcu przedziału i każdy ma współczynnik 1,
                 więc metoda ta zwraca [1, 1]. Dla NC 3 stopnia będzie to
                 [1, 3, 1] itp.
        :rtype: List of integers
        """
        paramList = []
        for elem in range(level):
            param = 1
            for i in range(level):
                if elem != i:
                    param = param*poly1d([1, -i])
            param = polyint(param)
            param = polyval(param,level-1)-polyval(param,0)
            a = math.pow(-1,level-elem-1)/math.factorial(elem)/math.factorial(level-elem-1)
            paramList.append(param*a)
        return paramList            
Ejemplo n.º 14
0
def polyFromRoot(xs):
    # this function creates the polynomial from roots
    res = P.poly1d([complex(1, 0)])
    for i in xs:
        res = res * P.poly1d([1, -i])
    return res
Ejemplo n.º 15
0
def extinction(spec, red, coord):
    """
    :param spec: (numpy array) XSpectrum1D objects: use clamato_read.py
    :param red: (numpy array) redshift values
    :param coord: (numpy array) coordinates

    :return:
        unred_spec: (numpy array) de-reddened spec
    """

    import numpy as np
    from numpy.lib.polynomial import poly1d
    import astropy.units as u
    from astropy.coordinates import SkyCoord
    from dustmaps.bayestar import BayestarQuery
    from astropy.cosmology import WMAP9 as cosmo
    from linetools.spectra.xspectrum1d import XSpectrum1D

    r = range(len(spec))

    Mpc = cosmo.comoving_distance(red)
    bayestar = BayestarQuery()
    coords = [
        SkyCoord(coord[i][0] * u.deg,
                 coord[i][1] * u.deg,
                 distance=Mpc[i],
                 frame='fk5') for i in r
    ]
    ebv = [bayestar(i) for i in coords]  #to get the ebv values for each galaxy

    unred_spec = []

    for i in r:

        x = 10000. / np.array(spec[i].wavelength)  # Convert to inverse microns
        npts = x.size
        a = np.zeros(npts, dtype=np.float)
        b = np.zeros(npts, dtype=np.float)
        r_v = 3.1

        good = np.where((x >= 0.3) & (x < 1.1))
        if len(good[0]) > 0:
            a[good] = 0.574 * x[good]**(1.61)
            b[good] = -0.527 * x[good]**(1.61)

        good = np.where((x >= 1.1) & (x < 3.3))
        if len(good[0]) > 0:  # Use new constants from O'Donnell (1994)
            y = x[good] - 1.82

            c1 = np.array([
                1., 0.104, -0.609, 0.701, 1.137, -1.718, -0.827, 1.647, -0.505
            ])  # from O'Donnell
            c2 = np.array([
                0., 1.952, 2.908, -3.989, -7.985, 11.102, 5.491, -10.805, 3.347
            ])

            a[good] = poly1d(c1[::-1])(y)
            b[good] = poly1d(c2[::-1])(y)

        good = np.where((x >= 3.3) & (x < 8))
        if len(good[0]) > 0:
            y = x[good]

            a[good] = 1.752 - 0.316 * y - (0.104 /
                                           ((y - 4.67)**2 + 0.341))  # + f_a
            b[good] = -3.090 + 1.825 * y + (1.206 /
                                            ((y - 4.62)**2 + 0.263))  # + f_b

        good = np.where((x >= 8) & (x <= 11))
        if len(good[0]) > 0:
            y = x[good] - 8.

            c1 = np.array([-1.073, -0.628, 0.137, -0.070])
            c2 = np.array([13.670, 4.257, -0.420, 0.374])
            a[good] = poly1d(c1[::-1])(y)
            b[good] = poly1d(c2[::-1])(y)

        # Now apply extinction correction to input flux vector

        a_v = r_v * ebv[i]
        a_lambda = a_v * (a + b / r_v)

        funred = spec[i].flux * 10.**(0.4 * a_lambda)  # Derive unreddened flux
        funred = np.asarray(funred)
        unred_spec.append(XSpectrum1D(spec[i].wavelength, funred, spec[i].sig))

    return np.asarray(unred_spec)
Ejemplo n.º 16
0
def ccm_unred(wave, flux, a_v=None, ebv=None, r_v=3.1):
    """
     NAME:
         CCM_UNRED
     PURPOSE:
         Deredden a flux vector using the CCM 1989 parameterization
     EXPLANATION:
         The reddening curve is that of Cardelli, Clayton, & Mathis (1989 ApJ.
         345, 245), including the update for the near-UV given by O'Donnell
         (1994, ApJ, 422, 158).   Parameterization is valid from the IR to the
         far-UV (3.5 microns to 0.1 microns).
    
         Users might wish to consider using the alternate procedure FM_UNRED
         which uses the extinction curve of Fitzpatrick (1999).
     CALLING SEQUENCE:
         CCM_UNRED, wave, flux, ebv, funred, [ R_V = ]
                 or
         CCM_UNRED, wave, flux, ebv, [ R_V = ]
     INPUT:
         WAVE - wavelength vector (Angstroms)
         FLUX - calibrated flux vector, same number of elements as WAVE
                 If only 3 parameters are supplied, then this vector will
                 updated on output to contain the dereddened flux.
         EBV  - color excess E(B-V), scalar.  If a negative EBV is supplied,
                 then fluxes will be reddened rather than deredenned.
    
     OUTPUT:
         FUNRED - unreddened flux vector, same units & number of elements
                 as FLUX
    
     OPTIONAL INPUT KEYWORD
         R_V - scalar specifying the ratio of total selective extinction
                 R(V) = A(V) / E(B - V).    If not specified, then R_V = 3.1
                 Extreme values of R(V) range from 2.75 to 5.3
    
     EXAMPLE:
         Determine how a flat spectrum (in wavelength) between 1200 A & 3200 A
         is altered by a reddening of E(B-V) = 0.1.   Assume an "average"
         reddening for the diffuse interstellar medium (R(V) = 3.1)
    
           IDL> w = 1200 + findgen(40)*50      ;Create a wavelength vector
           IDL> f = w*0 + 1                    ;Create a "flat" flux vector
           IDL> ccm_unred, w, f, -0.1, fnew  ;Redden (negative E(B-V)) flux vector
           IDL> plot,w,fnew
    
     NOTES:
         (1) The CCM curve shows good agreement with the Savage & Mathis (1979)
                 ultraviolet curve shortward of 1400 A, but is probably
                 preferable between 1200 & 1400 A.
         (2)  Many sightlines with peculiar ultraviolet interstellar extinction
                 can be represented with a CCM curve, if the proper value of
                 R(V) is supplied.
         (3)  Curve is extrapolated between 912 & 1000 A as suggested by
                 Longo et al. (1989, ApJ, 339,474)
         (4) Use the 4 parameter calling sequence if you wish to save the
                   original flux vector.
         (5) Valencic et al. (2004, ApJ, 616, 912) revise the ultraviolet CCM
                 curve (3.3 -- 8.0 um-1).    But since their revised curve does
                 not connect smoothly with longer & shorter wavelengths, it is
                 not included here.
    
     REVISION HISTORY:
           Written   W. Landsman        Hughes/STX   January, 1992
           Extrapolate curve for wavelengths between 900 & 1000 A   Dec. 1993
           Use updated coefficients for near-UV from O'Donnell   Feb 1994
           Allow 3 parameter calling sequence      April 1998
           Converted to IDLV5.0                    April 1998
    """
    # ON_ERROR, 2

    #    if (r_v is None):
    #        r_v = 3.1

    x = 10000. / numpy.array(wave)  # Convert to inverse microns
    npts = x.size
    a = numpy.zeros(npts, dtype=numpy.float)
    b = numpy.zeros(npts, dtype=numpy.float)
    #******************************

    #good = numpy.where(ravel(bitwise_and((x > 0.3), (x < 1.1))))[0]       #Infrared
    good = numpy.where((x >= 0.3) & (x < 1.1))
    if len(good[0]) > 0:
        a[good] = 0.574 * x[good]**(1.61)
        b[good] = -0.527 * x[good]**(1.61)

    #******************************

    #good = numpy.where(ravel(bitwise_and((x >= 1.1), (x < 3.3))))[0]           #Optical/NIR
    good = numpy.where((x >= 1.1) & (x < 3.3))
    if len(good[0]) > 0:  #Use new constants from O'Donnell (1994)
        y = x[good] - 1.82
        #     c1 = [ 1. , 0.17699, -0.50447, -0.02427,  0.72085,    $ ;Original
        #                 0.01979, -0.77530,  0.32999 ]               ;coefficients
        #     c2 = [ 0.,  1.41338,  2.28305,  1.07233, -5.38434,    $ ;from CCM89
        #                -0.62251,  5.30260, -2.09002 ]

        #** NOTE **:
        #  IDL poly() wants coefficients starting with A0, then A1 then ...AN where
        #             AN is the coefficient for X^N
        #             So the coefficients are given in that order
        c1 = numpy.array(
            [1., 0.104, -0.609, 0.701, 1.137, -1.718, -0.827, 1.647,
             -0.505])  #from O'Donnell
        c2 = numpy.array(
            [0., 1.952, 2.908, -3.989, -7.985, 11.102, 5.491, -10.805, 3.347])

        #  Numpy's poly1d wants **exactly the opposite order **
        #       so swap 'em

        #stop()
        a[good] = poly1d(c1[::-1])(y)
        b[good] = poly1d(c2[::-1])(y)
    #******************************

    good = numpy.where((x >= 3.3) & (x < 8))
    #good = numpy.where(ravel(bitwise_and((x >= 3.3), (x < 8))))[0]           #Mid-UV
    if len(good[0]) > 0:

        y = x[good]
        f_a = numpy.zeros(
            [len(good[0])],
            dtype=numpy.float)  # f_b = numpy.zeros([ngood], dtype=float32)
        good1 = numpy.where(ravel((y > 5.9)))[0]
        if len(good1[0]) > 0:
            y1 = y[good1] - 5.9
            f_a[good1] = -0.04473 * y1**2 - 0.009779 * y1**3
            f_b[good1] = 0.2130 * y1**2 + 0.1207 * y1**3

        a[good] = 1.752 - 0.316 * y - (0.104 / ((y - 4.67)**2 + 0.341)) + f_a
        b[good] = -3.090 + 1.825 * y + (1.206 / ((y - 4.62)**2 + 0.263)) + f_b

    #   *******************************

    #good = numpy.where(ravel(bitwise_and((x >= 8), (x <= 11))))[0]         #Far-UV
    good = numpy.where((x >= 8) & (x <= 11))
    if len(good[0]) > 0:
        y = x[good] - 8.
        c1 = numpy.array([-1.073, -0.628, 0.137, -0.070])
        c2 = numpy.array([13.670, 4.257, -0.420, 0.374])
        a[good] = poly1d(c1[::-1])(y)
        b[good] = poly1d(c2[::-1])(y)

    #   *******************************
    #stop()

    # Now apply extinction correction to input flux vector

    if a_v is None:
        a_v = r_v * ebv

    a_lambda = a_v * (a + b / r_v)
    #print a_v, a, b, r_v, b/r_v
    #print a_lambda
    funred = flux * 10.**(0.4 * a_lambda)  #Derive unreddened flux

    #print "----"
    #print flux
    #print funred
    return funred
Ejemplo n.º 17
0
def ccm_unred(wave, flux, a_v=None, ebv=None, r_v=3.1):
    """
     NAME:
         CCM_UNRED
     PURPOSE:
         Deredden a flux vector using the CCM 1989 parameterization
     EXPLANATION:
         The reddening curve is that of Cardelli, Clayton, & Mathis (1989 ApJ.
         345, 245), including the update for the near-UV given by O'Donnell
         (1994, ApJ, 422, 158).   Parameterization is valid from the IR to the
         far-UV (3.5 microns to 0.1 microns).
    
         Users might wish to consider using the alternate procedure FM_UNRED
         which uses the extinction curve of Fitzpatrick (1999).
     CALLING SEQUENCE:
         CCM_UNRED, wave, flux, ebv, funred, [ R_V = ]
                 or
         CCM_UNRED, wave, flux, ebv, [ R_V = ]
     INPUT:
         WAVE - wavelength vector (Angstroms)
         FLUX - calibrated flux vector, same number of elements as WAVE
                 If only 3 parameters are supplied, then this vector will
                 updated on output to contain the dereddened flux.
         EBV  - color excess E(B-V), scalar.  If a negative EBV is supplied,
                 then fluxes will be reddened rather than deredenned.
    
     OUTPUT:
         FUNRED - unreddened flux vector, same units & number of elements
                 as FLUX
    
     OPTIONAL INPUT KEYWORD
         R_V - scalar specifying the ratio of total selective extinction
                 R(V) = A(V) / E(B - V).    If not specified, then R_V = 3.1
                 Extreme values of R(V) range from 2.75 to 5.3
    
     EXAMPLE:
         Determine how a flat spectrum (in wavelength) between 1200 A & 3200 A
         is altered by a reddening of E(B-V) = 0.1.   Assume an "average"
         reddening for the diffuse interstellar medium (R(V) = 3.1)
    
           IDL> w = 1200 + findgen(40)*50      ;Create a wavelength vector
           IDL> f = w*0 + 1                    ;Create a "flat" flux vector
           IDL> ccm_unred, w, f, -0.1, fnew  ;Redden (negative E(B-V)) flux vector
           IDL> plot,w,fnew
    
     NOTES:
         (1) The CCM curve shows good agreement with the Savage & Mathis (1979)
                 ultraviolet curve shortward of 1400 A, but is probably
                 preferable between 1200 & 1400 A.
         (2)  Many sightlines with peculiar ultraviolet interstellar extinction
                 can be represented with a CCM curve, if the proper value of
                 R(V) is supplied.
         (3)  Curve is extrapolated between 912 & 1000 A as suggested by
                 Longo et al. (1989, ApJ, 339,474)
         (4) Use the 4 parameter calling sequence if you wish to save the
                   original flux vector.
         (5) Valencic et al. (2004, ApJ, 616, 912) revise the ultraviolet CCM
                 curve (3.3 -- 8.0 um-1).    But since their revised curve does
                 not connect smoothly with longer & shorter wavelengths, it is
                 not included here.
    
     REVISION HISTORY:
           Written   W. Landsman        Hughes/STX   January, 1992
           Extrapolate curve for wavelengths between 900 & 1000 A   Dec. 1993
           Use updated coefficients for near-UV from O'Donnell   Feb 1994
           Allow 3 parameter calling sequence      April 1998
           Converted to IDLV5.0                    April 1998
    """
    # ON_ERROR, 2

    #    if (r_v is None):
    #        r_v = 3.1

    x = 10000.0 / numpy.array(wave)  # Convert to inverse microns
    npts = x.size
    a = numpy.zeros(npts, dtype=numpy.float)
    b = numpy.zeros(npts, dtype=numpy.float)
    # ******************************

    # good = numpy.where(ravel(bitwise_and((x > 0.3), (x < 1.1))))[0]       #Infrared
    good = numpy.where((x >= 0.3) & (x < 1.1))
    if len(good[0]) > 0:
        a[good] = 0.574 * x[good] ** (1.61)
        b[good] = -0.527 * x[good] ** (1.61)

    # ******************************

    # good = numpy.where(ravel(bitwise_and((x >= 1.1), (x < 3.3))))[0]           #Optical/NIR
    good = numpy.where((x >= 1.1) & (x < 3.3))
    if len(good[0]) > 0:  # Use new constants from O'Donnell (1994)
        y = x[good] - 1.82
        #     c1 = [ 1. , 0.17699, -0.50447, -0.02427,  0.72085,    $ ;Original
        #                 0.01979, -0.77530,  0.32999 ]               ;coefficients
        #     c2 = [ 0.,  1.41338,  2.28305,  1.07233, -5.38434,    $ ;from CCM89
        #                -0.62251,  5.30260, -2.09002 ]

        # ** NOTE **:
        #  IDL poly() wants coefficients starting with A0, then A1 then ...AN where
        #             AN is the coefficient for X^N
        #             So the coefficients are given in that order
        c1 = numpy.array([1.0, 0.104, -0.609, 0.701, 1.137, -1.718, -0.827, 1.647, -0.505])  # from O'Donnell
        c2 = numpy.array([0.0, 1.952, 2.908, -3.989, -7.985, 11.102, 5.491, -10.805, 3.347])

        #  Numpy's poly1d wants **exactly the opposite order **
        #       so swap 'em

        # stop()
        a[good] = poly1d(c1[::-1])(y)
        b[good] = poly1d(c2[::-1])(y)
    # ******************************

    good = numpy.where((x >= 3.3) & (x < 8))
    # good = numpy.where(ravel(bitwise_and((x >= 3.3), (x < 8))))[0]           #Mid-UV
    if len(good[0]) > 0:

        y = x[good]
        f_a = numpy.zeros([len(good[0])], dtype=numpy.float)  # f_b = numpy.zeros([ngood], dtype=float32)
        good1 = numpy.where(ravel((y > 5.9)))[0]
        if len(good1[0]) > 0:
            y1 = y[good1] - 5.9
            f_a[good1] = -0.04473 * y1 ** 2 - 0.009779 * y1 ** 3
            f_b[good1] = 0.2130 * y1 ** 2 + 0.1207 * y1 ** 3

        a[good] = 1.752 - 0.316 * y - (0.104 / ((y - 4.67) ** 2 + 0.341)) + f_a
        b[good] = -3.090 + 1.825 * y + (1.206 / ((y - 4.62) ** 2 + 0.263)) + f_b

    #   *******************************

    # good = numpy.where(ravel(bitwise_and((x >= 8), (x <= 11))))[0]         #Far-UV
    good = numpy.where((x >= 8) & (x <= 11))
    if len(good[0]) > 0:
        y = x[good] - 8.0
        c1 = numpy.array([-1.073, -0.628, 0.137, -0.070])
        c2 = numpy.array([13.670, 4.257, -0.420, 0.374])
        a[good] = poly1d(c1[::-1])(y)
        b[good] = poly1d(c2[::-1])(y)

    #   *******************************
    # stop()

    # Now apply extinction correction to input flux vector

    if a_v is None:
        a_v = r_v * ebv

    a_lambda = a_v * (a + b / r_v)
    # print a_v, a, b, r_v, b/r_v
    # print a_lambda
    funred = flux * 10.0 ** (0.4 * a_lambda)  # Derive unreddened flux

    # print "----"
    # print flux
    # print funred
    return funred
Ejemplo n.º 18
0
    def extract_features(self, line, unigrams, text_stats):
        """Extract features from a given line

        Args:
            line (Line): Line to get features from
            unigrams (Unigrams): Unigrams for the given line
            text_stats (Statistics): Statistics of the text the line is coming from

        Returns:
            list: List of the features
        """
        # Simple features
        features = [
            float(line.stats["orig"].get_stat("lw_char")),
            float(line.stats["orig"].get_stat("up_char")),
            float(line.stats["orig"].get_stat("sp_char")),
            float(line.stats["orig"].get_stat("nb_char")),
            float(len(line.tokens)),
        ]

        # Additional features
        fappend = features.append
        fappend(line.get_clean_stats().get_stat("lw_char"))
        fappend(line.get_clean_stats().get_stat("up_char"))
        fappend(line.get_clean_stats().get_stat("sp_char"))
        fappend(line.get_clean_stats().get_stat("nb_char"))
        fappend(line.get_line_score())
        fappend(len(line.get_orig_line()))
        fappend(len(line.get_clean_line()))

        u = unigrams

        tk_len = [len(token[0]) for token in line.tokens]
        word_avg_len = 0

        if len(tk_len) > 0:
            word_avg_len = mean(tk_len)

        fappend(float(word_avg_len))

        t0 = [u[tk[0]] for tk in line.tokens]
        s0 = 0

        if len(t0) != 0:
            s0 = mean(t0)

        fappend(float(s0))

        t1 = [u[tk[1]] for tk in line.tokens if not tk[1] is None]
        s1 = 0

        if len(t1) != 0:
            s1 = mean(t1)

        fappend(float(s1))

        t2 = [u[t] for tk in line.tokens if not tk[2] is None for t in tk[2].keys()]
        s2 = 0

        if len(t2) != 0:
            s2 = mean(t2)

        fappend(float(s2))

        # Regularization
        orig_chars = sum(features[:4])
        clean_chars = sum(features[5:9])

        f = [
            features[0] / orig_chars,
            features[1] / orig_chars,
            features[2] / orig_chars,
            features[3] / orig_chars
        ]

        if clean_chars != 0:
            f += [features[5] / clean_chars,
                  features[6] / clean_chars,
                  features[7] / clean_chars,
                  features[8] / clean_chars]
        else:
            f += [0, 0, 0, 0]

        f += [features[9],
              features[4] / text_stats.get_stat("word_avg_nb"),
              features[12] / text_stats.get_stat("word_avg_length"),
              features[10] / text_stats.get_stat("line_avg_length"),
              features[11] / text_stats.get_stat("line_avg_length")]

        if features[13] != 0:
            f.append(features[14] / features[13])
            f.append(features[15] / features[13])
        else:
            f.append(0)
            f.append(0)

        features = f

        # Ordering the data set
        features = [
            features[11],  # Original line average len
            features[12],  # Clean line average len
            features[9],  # Original line average len
            features[10],  # Clean line average len
            features[13],  # Original line average len
            features[14],  # Clean line average len
            features[0],  # Original line average len
            features[1],  # Clean line average len
            features[2],  # Original line average len
            features[3],  # Clean line average len
            features[4],  # Original line average len
            features[5],  # Clean line average len
            features[6],  # Original line average len
            features[7],  # Clean line average len
        ]

        # Polynomial features
        degree = 1
        poly_feat = []
        p_feat = poly1d(features)

        for d in xrange(degree):
            poly_feat += (p_feat ** (d+1)).coeffs.tolist()

        del poly_feat[5]

        self.features = poly_feat

        return self.features
Ejemplo n.º 19
0
    def extract_features(self, line, unigrams, text_stats):
        """Extract features from a given line

        Args:
            line (Line): Line to get features from
            unigrams (Unigrams): Unigrams for the given line
            text_stats (Statistics): Statistics of the text the line is coming from

        Returns:
            list: List of the features
        """
        # Simple features
        features = [
            float(line.stats["orig"].get_stat("lw_char")),
            float(line.stats["orig"].get_stat("up_char")),
            float(line.stats["orig"].get_stat("sp_char")),
            float(line.stats["orig"].get_stat("nb_char")),
            float(len(line.tokens)),
        ]

        # Additional features
        fappend = features.append
        fappend(line.get_clean_stats().get_stat("lw_char"))
        fappend(line.get_clean_stats().get_stat("up_char"))
        fappend(line.get_clean_stats().get_stat("sp_char"))
        fappend(line.get_clean_stats().get_stat("nb_char"))
        fappend(line.get_line_score())
        fappend(len(line.get_orig_line()))
        fappend(len(line.get_clean_line()))

        u = unigrams

        tk_len = [len(token[0]) for token in line.tokens]
        word_avg_len = 0

        if len(tk_len) > 0:
            word_avg_len = mean(tk_len)

        fappend(float(word_avg_len))

        t0 = [u[tk[0]] for tk in line.tokens]
        s0 = 0

        if len(t0) != 0:
            s0 = mean(t0)

        fappend(float(s0))

        t1 = [u[tk[1]] for tk in line.tokens if not tk[1] is None]
        s1 = 0

        if len(t1) != 0:
            s1 = mean(t1)

        fappend(float(s1))

        t2 = [
            u[t] for tk in line.tokens if not tk[2] is None
            for t in tk[2].keys()
        ]
        s2 = 0

        if len(t2) != 0:
            s2 = mean(t2)

        fappend(float(s2))

        # Regularization
        orig_chars = sum(features[:4])
        clean_chars = sum(features[5:9])

        f = [
            features[0] / orig_chars, features[1] / orig_chars,
            features[2] / orig_chars, features[3] / orig_chars
        ]

        if clean_chars != 0:
            f += [
                features[5] / clean_chars, features[6] / clean_chars,
                features[7] / clean_chars, features[8] / clean_chars
            ]
        else:
            f += [0, 0, 0, 0]

        f += [
            features[9], features[4] / text_stats.get_stat("word_avg_nb"),
            features[12] / text_stats.get_stat("word_avg_length"),
            features[10] / text_stats.get_stat("line_avg_length"),
            features[11] / text_stats.get_stat("line_avg_length")
        ]

        if features[13] != 0:
            f.append(features[14] / features[13])
            f.append(features[15] / features[13])
        else:
            f.append(0)
            f.append(0)

        features = f

        # Ordering the data set
        features = [
            features[11],  # Original line average len
            features[12],  # Clean line average len
            features[9],  # Original line average len
            features[10],  # Clean line average len
            features[13],  # Original line average len
            features[14],  # Clean line average len
            features[0],  # Original line average len
            features[1],  # Clean line average len
            features[2],  # Original line average len
            features[3],  # Clean line average len
            features[4],  # Original line average len
            features[5],  # Clean line average len
            features[6],  # Original line average len
            features[7],  # Clean line average len
        ]

        # Polynomial features
        degree = 1
        poly_feat = []
        p_feat = poly1d(features)

        for d in xrange(degree):
            poly_feat += (p_feat**(d + 1)).coeffs.tolist()

        del poly_feat[5]

        self.features = poly_feat

        return self.features