def bi_linear_T_to_r_factory(T_cuts, lr_1, lr_2): """Factory that returns a function to maps T->r assuming a linear fit""" p1 = nlp.poly1d(lr_1) p2 = nlp.poly1d(lr_2) pc = nlp.poly1d(smooth_connect(*(T_cuts + lr_1 + lr_2))) def tmp(T): """Map T -> r using a bi linear fit with a cubic interpolation between them""" def local(T): if T < T_cuts[0]: return p1(T) elif T < T_cuts[1]: return pc(T) else: return p2(T) T = np.array(T) if len(T.shape) == 0: return local(T) else: return np.array([local(t) for t in T]) return tmp
def Tchebichev_coeffs(ordre): #Avec des dictionnaires ! dico_T = {} dico_T[0] = poly1d([1]) dico_T[1] = poly1d([1, 0]) x = poly1d([1, 0]) for i in range(2, ordre + 1): dico_T[i] = (2 * x * dico_T[i - 1]) - (dico_T[i - 2]) return dico_T
def lagrange(x, w, verbose=False): M = len(x) p = poly1d(0.0) for j in xrange(M): pt = poly1d(w[j]) for k in xrange(M): if k == j: continue fac = x[j]-x[k] pt *= poly1d([1.0,-x[k]])/fac p += pt if verbose: print("Lagrangerov interpolacny polynom je:\n{}".format(p)) else: print(p) return p
def seprate(numberator, roots): # seprates a fraction tempRes = [] for i in roots: temp = P.poly1d([complex(1, 0)]) for j in roots: if not i == j: temp = temp * P.poly1d([complex(1, 0), -j]) tempRes += [P.polyval(numberator, i) / P.polyval(temp, i)] res = [] for i in range(0, len(tempRes)): s = tempRes[i] m = P.poly1d([complex(1, 0), -roots[i]]) k = [s, roots[i]] res += [k] return res
def calcError(h,x,y,size_list): b = np.array(h) error= 0 p = poly1d(b) for j in range(0,size_list): error += (math.pow((p(x[j]) - y[j]),2)) error= error/size_list return error
def calcError(h, x, y, size_list): b = np.array(h) error = 0 p = poly1d(b) for j in range(0, size_list): error += (math.pow((p(x[j]) - y[j]), 2)) error = error / size_list return error
def mypolyfit(x, y, order=1, verbose=1): """ coeff, yfit = mypolyfit(x,y,order=1, verbose=1) """ from numpy.lib.polynomial import polyfit, poly1d coeffs = polyfit(x, y, order) polyModel = poly1d(coeffs) if verbose: print("Fit coeffs:", coeffs) return coeffs, polyModel(x), polyModel
def getQ(): # this function gets the numberator Coefficients res = [] n = int(input("numberator degree +1 :")) for i in range(0, n): re = int(input("enter real part for z**" + str(n - i - 1) + ":")) im = int(input("enter imaginary part for z**" + str(n - i - 1) + ":")) res = res + [complex(re, im)] resault = P.poly1d(res) return resault
def interpolation_Lagrange(listeX, listeY): rc = poly1d([]) nbr = 0 for i in (listeX): temp = pi_Lagrange(i, listeX) #print "temp =", temp #print "Y act =", listeY[nbr] rc = rc + (listeY[nbr] * temp) nbr = nbr + 1 return rc
def primitive(polynome): polynome_primitive = poly1d([]) p = 1 for i in range(0, polynome.order + 1): temp = polynome[i] * 1 / p polynome_primitive[i + 1] = temp p = p + 1 return polynome_primitive
def removeZero(denominator, roots): # deletes (z-0) from Denominator and counts them global zeroCount counter = 0 for i in roots: if i == complex(0, 0): denominator = P.polydiv(denominator, P.poly1d([complex(1, 0), 0])) denominator = denominator[0] roots = roots[:counter] + roots[counter + 1:] zeroCount += 1 counter += 1 return [denominator, roots]
def pi_Lagrange(xi, listeX): rc = 1 X = poly1d([1, 0]) for element in listeX: if element != xi: numerateur = (X - element) denominateur = (xi - element) temp = (numerateur / denominateur) rc = rc * temp else: continue return rc
def get_level_parameters(cls, level): """ :param int level: Liczba całkowita większa od jendości. :return: Zwraca listę współczynników dla poszczególnych puktów w metodzie NC. Na przykład metoda NC stopnia 2 używa punktów na początku i końcu przedziału i każdy ma współczynnik 1, więc metoda ta zwraca [1, 1]. Dla NC 3 stopnia będzie to [1, 3, 1] itp. :rtype: List of integers """ paramList = [] for elem in range(level): param = 1 for i in range(level): if elem != i: param = param*poly1d([1, -i]) param = polyint(param) param = polyval(param,level-1)-polyval(param,0) a = math.pow(-1,level-elem-1)/math.factorial(elem)/math.factorial(level-elem-1) paramList.append(param*a) return paramList
def polyFromRoot(xs): # this function creates the polynomial from roots res = P.poly1d([complex(1, 0)]) for i in xs: res = res * P.poly1d([1, -i]) return res
def extinction(spec, red, coord): """ :param spec: (numpy array) XSpectrum1D objects: use clamato_read.py :param red: (numpy array) redshift values :param coord: (numpy array) coordinates :return: unred_spec: (numpy array) de-reddened spec """ import numpy as np from numpy.lib.polynomial import poly1d import astropy.units as u from astropy.coordinates import SkyCoord from dustmaps.bayestar import BayestarQuery from astropy.cosmology import WMAP9 as cosmo from linetools.spectra.xspectrum1d import XSpectrum1D r = range(len(spec)) Mpc = cosmo.comoving_distance(red) bayestar = BayestarQuery() coords = [ SkyCoord(coord[i][0] * u.deg, coord[i][1] * u.deg, distance=Mpc[i], frame='fk5') for i in r ] ebv = [bayestar(i) for i in coords] #to get the ebv values for each galaxy unred_spec = [] for i in r: x = 10000. / np.array(spec[i].wavelength) # Convert to inverse microns npts = x.size a = np.zeros(npts, dtype=np.float) b = np.zeros(npts, dtype=np.float) r_v = 3.1 good = np.where((x >= 0.3) & (x < 1.1)) if len(good[0]) > 0: a[good] = 0.574 * x[good]**(1.61) b[good] = -0.527 * x[good]**(1.61) good = np.where((x >= 1.1) & (x < 3.3)) if len(good[0]) > 0: # Use new constants from O'Donnell (1994) y = x[good] - 1.82 c1 = np.array([ 1., 0.104, -0.609, 0.701, 1.137, -1.718, -0.827, 1.647, -0.505 ]) # from O'Donnell c2 = np.array([ 0., 1.952, 2.908, -3.989, -7.985, 11.102, 5.491, -10.805, 3.347 ]) a[good] = poly1d(c1[::-1])(y) b[good] = poly1d(c2[::-1])(y) good = np.where((x >= 3.3) & (x < 8)) if len(good[0]) > 0: y = x[good] a[good] = 1.752 - 0.316 * y - (0.104 / ((y - 4.67)**2 + 0.341)) # + f_a b[good] = -3.090 + 1.825 * y + (1.206 / ((y - 4.62)**2 + 0.263)) # + f_b good = np.where((x >= 8) & (x <= 11)) if len(good[0]) > 0: y = x[good] - 8. c1 = np.array([-1.073, -0.628, 0.137, -0.070]) c2 = np.array([13.670, 4.257, -0.420, 0.374]) a[good] = poly1d(c1[::-1])(y) b[good] = poly1d(c2[::-1])(y) # Now apply extinction correction to input flux vector a_v = r_v * ebv[i] a_lambda = a_v * (a + b / r_v) funred = spec[i].flux * 10.**(0.4 * a_lambda) # Derive unreddened flux funred = np.asarray(funred) unred_spec.append(XSpectrum1D(spec[i].wavelength, funred, spec[i].sig)) return np.asarray(unred_spec)
def ccm_unred(wave, flux, a_v=None, ebv=None, r_v=3.1): """ NAME: CCM_UNRED PURPOSE: Deredden a flux vector using the CCM 1989 parameterization EXPLANATION: The reddening curve is that of Cardelli, Clayton, & Mathis (1989 ApJ. 345, 245), including the update for the near-UV given by O'Donnell (1994, ApJ, 422, 158). Parameterization is valid from the IR to the far-UV (3.5 microns to 0.1 microns). Users might wish to consider using the alternate procedure FM_UNRED which uses the extinction curve of Fitzpatrick (1999). CALLING SEQUENCE: CCM_UNRED, wave, flux, ebv, funred, [ R_V = ] or CCM_UNRED, wave, flux, ebv, [ R_V = ] INPUT: WAVE - wavelength vector (Angstroms) FLUX - calibrated flux vector, same number of elements as WAVE If only 3 parameters are supplied, then this vector will updated on output to contain the dereddened flux. EBV - color excess E(B-V), scalar. If a negative EBV is supplied, then fluxes will be reddened rather than deredenned. OUTPUT: FUNRED - unreddened flux vector, same units & number of elements as FLUX OPTIONAL INPUT KEYWORD R_V - scalar specifying the ratio of total selective extinction R(V) = A(V) / E(B - V). If not specified, then R_V = 3.1 Extreme values of R(V) range from 2.75 to 5.3 EXAMPLE: Determine how a flat spectrum (in wavelength) between 1200 A & 3200 A is altered by a reddening of E(B-V) = 0.1. Assume an "average" reddening for the diffuse interstellar medium (R(V) = 3.1) IDL> w = 1200 + findgen(40)*50 ;Create a wavelength vector IDL> f = w*0 + 1 ;Create a "flat" flux vector IDL> ccm_unred, w, f, -0.1, fnew ;Redden (negative E(B-V)) flux vector IDL> plot,w,fnew NOTES: (1) The CCM curve shows good agreement with the Savage & Mathis (1979) ultraviolet curve shortward of 1400 A, but is probably preferable between 1200 & 1400 A. (2) Many sightlines with peculiar ultraviolet interstellar extinction can be represented with a CCM curve, if the proper value of R(V) is supplied. (3) Curve is extrapolated between 912 & 1000 A as suggested by Longo et al. (1989, ApJ, 339,474) (4) Use the 4 parameter calling sequence if you wish to save the original flux vector. (5) Valencic et al. (2004, ApJ, 616, 912) revise the ultraviolet CCM curve (3.3 -- 8.0 um-1). But since their revised curve does not connect smoothly with longer & shorter wavelengths, it is not included here. REVISION HISTORY: Written W. Landsman Hughes/STX January, 1992 Extrapolate curve for wavelengths between 900 & 1000 A Dec. 1993 Use updated coefficients for near-UV from O'Donnell Feb 1994 Allow 3 parameter calling sequence April 1998 Converted to IDLV5.0 April 1998 """ # ON_ERROR, 2 # if (r_v is None): # r_v = 3.1 x = 10000. / numpy.array(wave) # Convert to inverse microns npts = x.size a = numpy.zeros(npts, dtype=numpy.float) b = numpy.zeros(npts, dtype=numpy.float) #****************************** #good = numpy.where(ravel(bitwise_and((x > 0.3), (x < 1.1))))[0] #Infrared good = numpy.where((x >= 0.3) & (x < 1.1)) if len(good[0]) > 0: a[good] = 0.574 * x[good]**(1.61) b[good] = -0.527 * x[good]**(1.61) #****************************** #good = numpy.where(ravel(bitwise_and((x >= 1.1), (x < 3.3))))[0] #Optical/NIR good = numpy.where((x >= 1.1) & (x < 3.3)) if len(good[0]) > 0: #Use new constants from O'Donnell (1994) y = x[good] - 1.82 # c1 = [ 1. , 0.17699, -0.50447, -0.02427, 0.72085, $ ;Original # 0.01979, -0.77530, 0.32999 ] ;coefficients # c2 = [ 0., 1.41338, 2.28305, 1.07233, -5.38434, $ ;from CCM89 # -0.62251, 5.30260, -2.09002 ] #** NOTE **: # IDL poly() wants coefficients starting with A0, then A1 then ...AN where # AN is the coefficient for X^N # So the coefficients are given in that order c1 = numpy.array( [1., 0.104, -0.609, 0.701, 1.137, -1.718, -0.827, 1.647, -0.505]) #from O'Donnell c2 = numpy.array( [0., 1.952, 2.908, -3.989, -7.985, 11.102, 5.491, -10.805, 3.347]) # Numpy's poly1d wants **exactly the opposite order ** # so swap 'em #stop() a[good] = poly1d(c1[::-1])(y) b[good] = poly1d(c2[::-1])(y) #****************************** good = numpy.where((x >= 3.3) & (x < 8)) #good = numpy.where(ravel(bitwise_and((x >= 3.3), (x < 8))))[0] #Mid-UV if len(good[0]) > 0: y = x[good] f_a = numpy.zeros( [len(good[0])], dtype=numpy.float) # f_b = numpy.zeros([ngood], dtype=float32) good1 = numpy.where(ravel((y > 5.9)))[0] if len(good1[0]) > 0: y1 = y[good1] - 5.9 f_a[good1] = -0.04473 * y1**2 - 0.009779 * y1**3 f_b[good1] = 0.2130 * y1**2 + 0.1207 * y1**3 a[good] = 1.752 - 0.316 * y - (0.104 / ((y - 4.67)**2 + 0.341)) + f_a b[good] = -3.090 + 1.825 * y + (1.206 / ((y - 4.62)**2 + 0.263)) + f_b # ******************************* #good = numpy.where(ravel(bitwise_and((x >= 8), (x <= 11))))[0] #Far-UV good = numpy.where((x >= 8) & (x <= 11)) if len(good[0]) > 0: y = x[good] - 8. c1 = numpy.array([-1.073, -0.628, 0.137, -0.070]) c2 = numpy.array([13.670, 4.257, -0.420, 0.374]) a[good] = poly1d(c1[::-1])(y) b[good] = poly1d(c2[::-1])(y) # ******************************* #stop() # Now apply extinction correction to input flux vector if a_v is None: a_v = r_v * ebv a_lambda = a_v * (a + b / r_v) #print a_v, a, b, r_v, b/r_v #print a_lambda funred = flux * 10.**(0.4 * a_lambda) #Derive unreddened flux #print "----" #print flux #print funred return funred
def ccm_unred(wave, flux, a_v=None, ebv=None, r_v=3.1): """ NAME: CCM_UNRED PURPOSE: Deredden a flux vector using the CCM 1989 parameterization EXPLANATION: The reddening curve is that of Cardelli, Clayton, & Mathis (1989 ApJ. 345, 245), including the update for the near-UV given by O'Donnell (1994, ApJ, 422, 158). Parameterization is valid from the IR to the far-UV (3.5 microns to 0.1 microns). Users might wish to consider using the alternate procedure FM_UNRED which uses the extinction curve of Fitzpatrick (1999). CALLING SEQUENCE: CCM_UNRED, wave, flux, ebv, funred, [ R_V = ] or CCM_UNRED, wave, flux, ebv, [ R_V = ] INPUT: WAVE - wavelength vector (Angstroms) FLUX - calibrated flux vector, same number of elements as WAVE If only 3 parameters are supplied, then this vector will updated on output to contain the dereddened flux. EBV - color excess E(B-V), scalar. If a negative EBV is supplied, then fluxes will be reddened rather than deredenned. OUTPUT: FUNRED - unreddened flux vector, same units & number of elements as FLUX OPTIONAL INPUT KEYWORD R_V - scalar specifying the ratio of total selective extinction R(V) = A(V) / E(B - V). If not specified, then R_V = 3.1 Extreme values of R(V) range from 2.75 to 5.3 EXAMPLE: Determine how a flat spectrum (in wavelength) between 1200 A & 3200 A is altered by a reddening of E(B-V) = 0.1. Assume an "average" reddening for the diffuse interstellar medium (R(V) = 3.1) IDL> w = 1200 + findgen(40)*50 ;Create a wavelength vector IDL> f = w*0 + 1 ;Create a "flat" flux vector IDL> ccm_unred, w, f, -0.1, fnew ;Redden (negative E(B-V)) flux vector IDL> plot,w,fnew NOTES: (1) The CCM curve shows good agreement with the Savage & Mathis (1979) ultraviolet curve shortward of 1400 A, but is probably preferable between 1200 & 1400 A. (2) Many sightlines with peculiar ultraviolet interstellar extinction can be represented with a CCM curve, if the proper value of R(V) is supplied. (3) Curve is extrapolated between 912 & 1000 A as suggested by Longo et al. (1989, ApJ, 339,474) (4) Use the 4 parameter calling sequence if you wish to save the original flux vector. (5) Valencic et al. (2004, ApJ, 616, 912) revise the ultraviolet CCM curve (3.3 -- 8.0 um-1). But since their revised curve does not connect smoothly with longer & shorter wavelengths, it is not included here. REVISION HISTORY: Written W. Landsman Hughes/STX January, 1992 Extrapolate curve for wavelengths between 900 & 1000 A Dec. 1993 Use updated coefficients for near-UV from O'Donnell Feb 1994 Allow 3 parameter calling sequence April 1998 Converted to IDLV5.0 April 1998 """ # ON_ERROR, 2 # if (r_v is None): # r_v = 3.1 x = 10000.0 / numpy.array(wave) # Convert to inverse microns npts = x.size a = numpy.zeros(npts, dtype=numpy.float) b = numpy.zeros(npts, dtype=numpy.float) # ****************************** # good = numpy.where(ravel(bitwise_and((x > 0.3), (x < 1.1))))[0] #Infrared good = numpy.where((x >= 0.3) & (x < 1.1)) if len(good[0]) > 0: a[good] = 0.574 * x[good] ** (1.61) b[good] = -0.527 * x[good] ** (1.61) # ****************************** # good = numpy.where(ravel(bitwise_and((x >= 1.1), (x < 3.3))))[0] #Optical/NIR good = numpy.where((x >= 1.1) & (x < 3.3)) if len(good[0]) > 0: # Use new constants from O'Donnell (1994) y = x[good] - 1.82 # c1 = [ 1. , 0.17699, -0.50447, -0.02427, 0.72085, $ ;Original # 0.01979, -0.77530, 0.32999 ] ;coefficients # c2 = [ 0., 1.41338, 2.28305, 1.07233, -5.38434, $ ;from CCM89 # -0.62251, 5.30260, -2.09002 ] # ** NOTE **: # IDL poly() wants coefficients starting with A0, then A1 then ...AN where # AN is the coefficient for X^N # So the coefficients are given in that order c1 = numpy.array([1.0, 0.104, -0.609, 0.701, 1.137, -1.718, -0.827, 1.647, -0.505]) # from O'Donnell c2 = numpy.array([0.0, 1.952, 2.908, -3.989, -7.985, 11.102, 5.491, -10.805, 3.347]) # Numpy's poly1d wants **exactly the opposite order ** # so swap 'em # stop() a[good] = poly1d(c1[::-1])(y) b[good] = poly1d(c2[::-1])(y) # ****************************** good = numpy.where((x >= 3.3) & (x < 8)) # good = numpy.where(ravel(bitwise_and((x >= 3.3), (x < 8))))[0] #Mid-UV if len(good[0]) > 0: y = x[good] f_a = numpy.zeros([len(good[0])], dtype=numpy.float) # f_b = numpy.zeros([ngood], dtype=float32) good1 = numpy.where(ravel((y > 5.9)))[0] if len(good1[0]) > 0: y1 = y[good1] - 5.9 f_a[good1] = -0.04473 * y1 ** 2 - 0.009779 * y1 ** 3 f_b[good1] = 0.2130 * y1 ** 2 + 0.1207 * y1 ** 3 a[good] = 1.752 - 0.316 * y - (0.104 / ((y - 4.67) ** 2 + 0.341)) + f_a b[good] = -3.090 + 1.825 * y + (1.206 / ((y - 4.62) ** 2 + 0.263)) + f_b # ******************************* # good = numpy.where(ravel(bitwise_and((x >= 8), (x <= 11))))[0] #Far-UV good = numpy.where((x >= 8) & (x <= 11)) if len(good[0]) > 0: y = x[good] - 8.0 c1 = numpy.array([-1.073, -0.628, 0.137, -0.070]) c2 = numpy.array([13.670, 4.257, -0.420, 0.374]) a[good] = poly1d(c1[::-1])(y) b[good] = poly1d(c2[::-1])(y) # ******************************* # stop() # Now apply extinction correction to input flux vector if a_v is None: a_v = r_v * ebv a_lambda = a_v * (a + b / r_v) # print a_v, a, b, r_v, b/r_v # print a_lambda funred = flux * 10.0 ** (0.4 * a_lambda) # Derive unreddened flux # print "----" # print flux # print funred return funred
def extract_features(self, line, unigrams, text_stats): """Extract features from a given line Args: line (Line): Line to get features from unigrams (Unigrams): Unigrams for the given line text_stats (Statistics): Statistics of the text the line is coming from Returns: list: List of the features """ # Simple features features = [ float(line.stats["orig"].get_stat("lw_char")), float(line.stats["orig"].get_stat("up_char")), float(line.stats["orig"].get_stat("sp_char")), float(line.stats["orig"].get_stat("nb_char")), float(len(line.tokens)), ] # Additional features fappend = features.append fappend(line.get_clean_stats().get_stat("lw_char")) fappend(line.get_clean_stats().get_stat("up_char")) fappend(line.get_clean_stats().get_stat("sp_char")) fappend(line.get_clean_stats().get_stat("nb_char")) fappend(line.get_line_score()) fappend(len(line.get_orig_line())) fappend(len(line.get_clean_line())) u = unigrams tk_len = [len(token[0]) for token in line.tokens] word_avg_len = 0 if len(tk_len) > 0: word_avg_len = mean(tk_len) fappend(float(word_avg_len)) t0 = [u[tk[0]] for tk in line.tokens] s0 = 0 if len(t0) != 0: s0 = mean(t0) fappend(float(s0)) t1 = [u[tk[1]] for tk in line.tokens if not tk[1] is None] s1 = 0 if len(t1) != 0: s1 = mean(t1) fappend(float(s1)) t2 = [u[t] for tk in line.tokens if not tk[2] is None for t in tk[2].keys()] s2 = 0 if len(t2) != 0: s2 = mean(t2) fappend(float(s2)) # Regularization orig_chars = sum(features[:4]) clean_chars = sum(features[5:9]) f = [ features[0] / orig_chars, features[1] / orig_chars, features[2] / orig_chars, features[3] / orig_chars ] if clean_chars != 0: f += [features[5] / clean_chars, features[6] / clean_chars, features[7] / clean_chars, features[8] / clean_chars] else: f += [0, 0, 0, 0] f += [features[9], features[4] / text_stats.get_stat("word_avg_nb"), features[12] / text_stats.get_stat("word_avg_length"), features[10] / text_stats.get_stat("line_avg_length"), features[11] / text_stats.get_stat("line_avg_length")] if features[13] != 0: f.append(features[14] / features[13]) f.append(features[15] / features[13]) else: f.append(0) f.append(0) features = f # Ordering the data set features = [ features[11], # Original line average len features[12], # Clean line average len features[9], # Original line average len features[10], # Clean line average len features[13], # Original line average len features[14], # Clean line average len features[0], # Original line average len features[1], # Clean line average len features[2], # Original line average len features[3], # Clean line average len features[4], # Original line average len features[5], # Clean line average len features[6], # Original line average len features[7], # Clean line average len ] # Polynomial features degree = 1 poly_feat = [] p_feat = poly1d(features) for d in xrange(degree): poly_feat += (p_feat ** (d+1)).coeffs.tolist() del poly_feat[5] self.features = poly_feat return self.features
def extract_features(self, line, unigrams, text_stats): """Extract features from a given line Args: line (Line): Line to get features from unigrams (Unigrams): Unigrams for the given line text_stats (Statistics): Statistics of the text the line is coming from Returns: list: List of the features """ # Simple features features = [ float(line.stats["orig"].get_stat("lw_char")), float(line.stats["orig"].get_stat("up_char")), float(line.stats["orig"].get_stat("sp_char")), float(line.stats["orig"].get_stat("nb_char")), float(len(line.tokens)), ] # Additional features fappend = features.append fappend(line.get_clean_stats().get_stat("lw_char")) fappend(line.get_clean_stats().get_stat("up_char")) fappend(line.get_clean_stats().get_stat("sp_char")) fappend(line.get_clean_stats().get_stat("nb_char")) fappend(line.get_line_score()) fappend(len(line.get_orig_line())) fappend(len(line.get_clean_line())) u = unigrams tk_len = [len(token[0]) for token in line.tokens] word_avg_len = 0 if len(tk_len) > 0: word_avg_len = mean(tk_len) fappend(float(word_avg_len)) t0 = [u[tk[0]] for tk in line.tokens] s0 = 0 if len(t0) != 0: s0 = mean(t0) fappend(float(s0)) t1 = [u[tk[1]] for tk in line.tokens if not tk[1] is None] s1 = 0 if len(t1) != 0: s1 = mean(t1) fappend(float(s1)) t2 = [ u[t] for tk in line.tokens if not tk[2] is None for t in tk[2].keys() ] s2 = 0 if len(t2) != 0: s2 = mean(t2) fappend(float(s2)) # Regularization orig_chars = sum(features[:4]) clean_chars = sum(features[5:9]) f = [ features[0] / orig_chars, features[1] / orig_chars, features[2] / orig_chars, features[3] / orig_chars ] if clean_chars != 0: f += [ features[5] / clean_chars, features[6] / clean_chars, features[7] / clean_chars, features[8] / clean_chars ] else: f += [0, 0, 0, 0] f += [ features[9], features[4] / text_stats.get_stat("word_avg_nb"), features[12] / text_stats.get_stat("word_avg_length"), features[10] / text_stats.get_stat("line_avg_length"), features[11] / text_stats.get_stat("line_avg_length") ] if features[13] != 0: f.append(features[14] / features[13]) f.append(features[15] / features[13]) else: f.append(0) f.append(0) features = f # Ordering the data set features = [ features[11], # Original line average len features[12], # Clean line average len features[9], # Original line average len features[10], # Clean line average len features[13], # Original line average len features[14], # Clean line average len features[0], # Original line average len features[1], # Clean line average len features[2], # Original line average len features[3], # Clean line average len features[4], # Original line average len features[5], # Clean line average len features[6], # Original line average len features[7], # Clean line average len ] # Polynomial features degree = 1 poly_feat = [] p_feat = poly1d(features) for d in xrange(degree): poly_feat += (p_feat**(d + 1)).coeffs.tolist() del poly_feat[5] self.features = poly_feat return self.features