Ejemplo n.º 1
0
 def __init__(self, n, time):
     self._n = n
     if time:
         cmd = 'time QuadraticSieve'
     else:
         cmd = 'QuadraticSieve'
     tmpdir()
     self._p = pexpect.spawn(cmd)
     cleaner.cleaner(self._p.pid, 'QuadraticSieve')
     self._p.sendline(str(self._n)+'\n\n\n')
     self._done = False
     self._out = ''
     self._time = ''
     self._do_time = time
Ejemplo n.º 2
0
 def __init__(self, n, time):
     self._n = n
     if time:
         cmd = 'time QuadraticSieve'
     else:
         cmd = 'QuadraticSieve'
     tmpdir()
     self._p = SageSpawn(cmd)
     cleaner.cleaner(self._p.pid, 'QuadraticSieve')
     self._p.sendline(str(self._n) + '\n\n\n')
     self._done = False
     self._out = ''
     self._time = ''
     self._do_time = time
Ejemplo n.º 3
0
def setup(config, pool, acc, user):
    total = 20
    rows = {}
    for _ in xrange(total):
        uid, r = db_txn(pool, partial(do_setup, acc, user))
        print uid, r
        rows[uid] = r

    fetch = fetcher(config, pool, Condition(Lock()), Condition(Lock()))
    clean = cleaner(config, pool, Condition(Lock()), Condition(Lock()))
    ev = Event()
    ev.clear()
    load = Thread(target=load_database, args=(config, pool, ev, [fetch, clean]))  # @IgnorePep8
    load.start()
    ev.wait()
    fetch.start()

    tasks = []
    while len(tasks) < total:
        print 'already fetched: ', len(tasks), 'tasks'
        print 'to fetch tasks from db'
        fetch.request(acc)
        for r in fetch.replies(True):
            ts = r[1]
            print 'fetched', len(ts), 'tasks'
            for t in ts:
                if t.uuid in rows:
                    tasks.append((t, rows[t.uuid]))

    return (clean, tasks)
Ejemplo n.º 4
0
 def __init__(self, n, time):
     self._n = n
     if time:
         if cygwin:
             raise ValueError, "qsieve time not supported on Cygwin"
         cmd = 'time QuadraticSieve'
     else:
         cmd = 'QuadraticSieve'
     tmpdir()
     self._p = pexpect.spawn(cmd)
     cleaner.cleaner(self._p.pid, 'QuadraticSieve')
     self._p.sendline(str(self._n) + '\n\n\n')
     self._done = False
     self._out = ''
     self._time = ''
     self._do_time = time
Ejemplo n.º 5
0
    def __init__(self):
        self.clean = cleaner()
        engine = create_engine('sqlite:///statuses.db')
        Base.metadata.bind = engine

        DBSession = sessionmaker(bind=engine)
        self.session = DBSession()
Ejemplo n.º 6
0
 def __init__(self, n, time):
     self._n = n
     if time:
         if cygwin: 
             raise ValueError, "qsieve time not supported on Cygwin"
         cmd = 'time QuadraticSieve'
     else:
         cmd = 'QuadraticSieve'
     tmpdir()        
     self._p = pexpect.spawn(cmd)
     cleaner.cleaner(self._p.pid, 'QuadraticSieve')
     self._p.sendline(str(self._n)+'\n\n\n')
     self._done = False
     self._out = ''
     self._time = ''
     self._do_time = time
Ejemplo n.º 7
0
 def __init__(self):
     self.clean = cleaner()
     self.allWords={}
     self.tfDictTrain={}
     self.tfDictNew={}
     self.sharedTermsDict={}
     self.simDict={}
     self.db = dbContainer()
     self.all = self.db.get_all()
Ejemplo n.º 8
0
def test_clean():
    import sys
    sys.path.append('../')
    import glob
    import numpy as np
    import pandas as pd
    from cleaner import cleaner, isfloat
    file1 = 'data/test_data30.csv'
    df = pd.read_csv(file1, names=['time', 'volt'])
    nptime, npvolt = cleaner(df)
    assert abs(sum(nptime) - 199976.1) < 0.2 \
        and abs(sum(npvolt) - 443.105) < 0.01
Ejemplo n.º 9
0
def run_TLS(fn, min_period=0.2, target=None):
    t, f, e = np.genfromtxt(fn, usecols=(0, 1, 2), unpack=True)
    mask = cleaner(t, f)

    t = t[~mask]
    f = f[~mask]
    e = e[~mask]

    lc = TessLightCurve(time=t, flux=f, flux_err=e).flatten(window_length=51,
                                                            polyorder=2,
                                                            niters=5)

    return the_TLS(fn, lc.time.value, lc.flux, lc.flux_err, min_period, target)
Ejemplo n.º 10
0
def start(algo, auto, print_unsorted, print_sorted):
    unsortedList = []
    if auto:
        unsortedList = cleaner.cleaner(auto)
    else:
        unsortedList = generator.main()
    if print_unsorted:
        print('unsorted list: ', unsortedList)
        print('\nFor ', len(unsortedList), ' numbers:')
    else:
        print('For ', len(unsortedList), ' numbers:')
    if algo == 'bubble_sort' or algo == 'all':
        bubble_algorithm(unsortedList[:], print_unsorted, print_sorted)
    if algo == 'insert_sort' or algo == 'all':
        insert_algorithm(unsortedList[:], print_unsorted, print_sorted)
Ejemplo n.º 11
0
 def __init__(self, text, document, pos, comp=False):
     self.pos = pos
     self.document = document
     self.clean_text = cleaner.cleaner(text)
     self.text = text
     self.full_tokens = [
         w.lower().strip(string.punctuation)
         for w in self.clean_text.split()
     ]
     self.tokens = [
         w for w in self.full_tokens
         if w and not w in stopwords.words('english')
     ]
     self.vocab = self.get_vocab()
     self.ent_parse = ner_model(self.text)
     self.dep_parse = dep_model(self.clean_text)
     self.is_compressed = comp
Ejemplo n.º 12
0
def terrorist_finder(query_string,
                     source_data,
                     blacklist_data_file,
                     noise_data_file=[]):
    query = query_string

    if source_data.upper() == "FILE":
        blacklist = clean.cleaner(blacklist_data_file, noise_data_file)
    elif source_data.upper() == "EUROPA":
        blacklist = blacklist_data_file
    else:
        raise ValueError("Source data isn't file or europa")

    # Strict matching
    if (strict_match(query, blacklist)):
        print "TERRORIST MATCHED!"
        print "Certainty: Strict match"
        print "Name: ", query
        return ""
    else:
        print "No strict match, looking for partial matches..."

    # Partial matching
    matches = partial_match(query, blacklist)
    match_name = matches[0]
    match_certainty = matches[1]
    if (len(match_name) > 0):
        for index, name in enumerate(match_name):
            print "TERRORIST MATCHED!"
            print "Certainty: ", match_certainty[index], "%"
            print "Name: ", name
            print ""
        return ""
    else:
        print "No partial match"

    # Fuzzy matching
    # Not implemented yet

    return ""
Ejemplo n.º 13
0
        indirafs = yamldir + args.version
        checkereos = chkeos.checker_eos(indirafs, indireos, args.process)
        checkereos.check()

    elif args.merge:
        print 'running the merger'
        if args.process != '':
            print 'using a specific process ', args.process
        import merger as mgr
        merger = mgr.merger(args.process, yamldir, args.version)
        merger.merge(args.force)

    elif args.clean:
        print 'clean the dictionnary and eos'
        import cleaner as clf
        clean = clf.cleaner(indir, yamldir, args.process, args.version)
        clean.clean()

    elif args.cleanold:
        print 'clean the dictionnary from old jobs that have not been checked'
        import cleaner as clf
        clean = clf.cleaner(indir, yamldir, args.process, args.version)
        clean.cleanoldjobs()

    elif args.web:
        import printer as prt
        webfile = webfile.replace('VERSION', args.version)
        printdic = prt.printer(yamldir, indir, webfile, args.version)
        printdic.run()

    elif args.remove:
Ejemplo n.º 14
0
def data_analysis():

	"""
	This is where the analysis is done.
	In the first step it unzips the tar/tgz files in a separate folder, and after that it`s 
	searching for error terminations and imaginary frequencies in the log files.
	The *.log files containing errors or imaginary freqs are separated from the rest.
	Also the "EnergyEx" module extracts the calculated energies and writes them into a 
	csv file with the help of the cxsvwriter module.
	Logs containig imaginary freqs are processed by the "xvibs" program.
	"""
	
	import os, sys, time

	import tmp_unrar as tu
	from ErrorCatcher import ErrorCatcher as EC
	from ImagCatcher import ImagCatcher as IC
	from RouteCard import RouteCard as RC
	from EnergyEx import EnergyEx  
	from csvwriter import csvwriter
	from FileSeparator import FileSeparator
	from xawker import xawker
	from ImagInput import ImagInput
	from cleaner import cleaner	
	

	global direct

	path=input('Enter the path to the directory where you have the tgz/tar folders: ')
	direct=os.chdir(os.path.expanduser(path)) #changing the directory to where I will work

	####### Checking for a tmp folder and unzipping the tar/tzgz files into tmp #######
	tu.tmp_check(direct)
	tu.unrar(direct)
	###################################################################################
	
	####### Searching for Error termination in the *.log files ########################
	er = EC()

	for file in os.listdir():
		er.e_catch(file)

	print('You have {} calculation(s) with ERROR termination!'.format(er.error_counter))
	####################################################################################
	
	####### Searching for imaginary frequencies in the *.log files #####################
	imag=IC()

	for file in os.listdir():
		imag.imag_catcher(file)
	#####################################################################################	

	########### Selecting the route cards for the files with imaginary freqs ############
	route = RC(imag.imag_dict)
	
	for file in os.listdir():
		route.route_card(file)

	#####################################################################################
	
	####### Extracting the Energies from the *.log files and saving them in energies.csv file #######
	energy=EnergyEx()

	for file in os.listdir():
		# print(file)
		energy.sampler(file)


	csvwriter(energy.energy_dict)
	##################################################################################################

	####### Separating the files containig Errors from those containing imaginary frequencies ########
	fs = FileSeparator(imag.imag_dict)

	for file in os.listdir():
		fs.file_separator(file)
	##################################################################################################

	####### Genertaing the new input files from the *.log files containing imaginary freqs #######
	if imag.imag_dict != {}:

		xawker(imag.imag_dict)

		im = ImagInput()
		
		chrg_spin=input('Please enter the molecules new charge and spin value [ex.: 0 1 ]: ')
		ext=input('Please enter the generated files extension [ex: com]: ')

		for file in os.listdir():
			im.imag_xyz_gen(file, chrg_spin, ext, route.rd)

		################### Cleaning up the mess ###################	
		answer = input("Do you need the old xyz/xyz-/ginp file(s)? ")

		cleaner(answer)
		#############################################################

	else:
		pass
	##################################################################################################

	################################### Exiting the data analysis mode ###############################
	print('Data analysis terminated')
	time.sleep(1)
Ejemplo n.º 15
0
Archivo: ecm.py Proyecto: shrutig/sage
    def time(self, n, factor_digits, verbose=0):
        """
        Gives an approximation for the amount of time it will take to find a factor
        of size factor_digits in a single process on the current computer.
        This estimate is provided by GMP-ECM's verbose option on a single run of a curve.

        INPUT:
            n -- a positive integer
            factor_digits -- the (estimated) number of digits of the smallest factor

        EXAMPLES::

            sage: n = next_prime(11^23)*next_prime(11^37)

            sage: ecm.time(n, 20)                  # not tested
            Expected curves: 77     Expected time: 7.21s
            sage: ecm.time(n, 25)                  # not tested
            Expected curves: 206    Expected time: 1.56m
            sage: ecm.time(n, 30, verbose=1)       # not tested
            GMP-ECM 6.1.3 [powered by GMP 4.2.1] [ECM]

            Input number is 304481639541418099574459496544854621998616257489887231115912293 (63 digits)
            Using MODMULN
            Using B1=250000, B2=128992510, polynomial Dickson(3), sigma=2307628716
            dF=2048, k=3, d=19110, d2=11, i0=3
            Expected number of curves to find a factor of n digits:
            20      25      30      35      40      45      50      55      60      65
            8       50      430     4914    70293   1214949 2.5e+07 5.9e+08 1.6e+10 2.7e+13
            Step 1 took 6408ms
            Using 16 small primes for NTT
            Estimated memory usage: 3862K
            Initializing tables of differences for F took 16ms
            Computing roots of F took 128ms
            Building F from its roots took 408ms
            Computing 1/F took 608ms
            Initializing table of differences for G took 12ms
            Computing roots of G took 120ms
            Building G from its roots took 404ms
            Computing roots of G took 120ms
            Building G from its roots took 412ms
            Computing G * H took 328ms
            Reducing  G * H mod F took 348ms
            Computing roots of G took 120ms
            Building G from its roots took 408ms
            Computing G * H took 328ms
            Reducing  G * H mod F took 348ms
            Computing polyeval(F,G) took 1128ms
            Step 2 took 5260ms
            Expected time to find a factor of n digits:
            20      25      30      35      40      45      50      55      60      65
            1.58m   9.64m   1.39h   15.93h  9.49d   164.07d 9.16y   218.68y 5825y   1e+07y
            Expected curves: 4914   Expected time: 1.39h

        """
        self._validate(n)
        B1 = self.recommended_B1(factor_digits)
        self.__cmd = self._ECM__startup_cmd(B1, None, {'v': ' '})
        child = pexpect.spawn(self.__cmd)
        cleaner.cleaner(child.pid, self.__cmd)
        child.timeout = None
        child.expect('[ECM]')
        child.sendline(str(n))
        try:
            child.sendeof()
        except Exception:
            pass
        child.expect('20\s+25\s+30\s+35\s+40\s+45\s+50\s+55\s+60\s+65')
        if verbose:
            print child.before,
            print child.after,
        child.expect(
            '(\d\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s',
            timeout=None)
        offset = (self.__B1_table_value(factor_digits, 20, 65) - 20) / 5
        curve_count = child.match.groups()[int(offset)]
        if verbose:
            print child.before,
            print child.after,
        child.expect('20\s+25\s+30\s+35\s+40\s+45\s+50\s+55\s+60\s+65',
                     timeout=None)
        if verbose:
            print child.before,
            print child.after,
        child.expect(
            '(\d\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s',
            timeout=None)
        if verbose:
            print child.before,
            print child.after
        time = child.match.groups()[int(offset)]
        child.kill(0)
        print "Expected curves:", curve_count, "\tExpected time:", time
Ejemplo n.º 16
0
parser.add_argument('--nplanets',
                    type=int,
                    default=5,
                    help='Number of planets to search')
parser.add_argument('--ncpu',
                    type=int,
                    default=4,
                    help='Number of CPUs to use')
parser.add_argument('--min-period', type=float, default=0.2)
parser.add_argument('--max-period', type=float, default=20)
parser.add_argument('--method', type=str, default='TLS')

args = parser.parse_args()

t, f, e = np.genfromtxt(args.File, usecols=(0, 1, 2), unpack=True)
ma = cleaner(t, f)

t, f, e = t[~ma], f[~ma], e[~ma]
wl = 2 * (len(t) // 42) + 1
lc = TessLightCurve(time=t, flux=f, flux_err=e).flatten(window_length=wl,
                                                        polyorder=3,
                                                        niters=5)

fig = plt.figure(constrained_layout=True, figsize=[15, 6])
gs = GridSpec(ncols=5, nrows=3, figure=fig, height_ratios=[3, 2, 2])

#Light curve
axlc = fig.add_subplot(gs[0, :])
axlc.plot(lc.time.value, lc.flux, '.k', ms=1)

color = plt.cm.rainbow(np.linspace(0, 1, args.nplanets))
Ejemplo n.º 17
0
 def start(self):
     child = pexpect.spawn(self.__cmd)        
     cleaner.cleaner(child.pid, self.__cmd)
     child.timeout = None
     self.child = child
     self._ready = False
Ejemplo n.º 18
0
        lc = TessLightCurve(time=tt, flux=ff,
                            flux_err=ee).flatten(window_length=51,
                                                 polyorder=2,
                                                 niters=5)
        t.append(lc.time)
        f.append(lc.flux)
        e.append(lc.flux_err)

    t = np.concatenate(t)
    a = np.argsort(t)

    t = t[a]
    f = np.concatenate(f)[a]
    e = np.concatenate(e)[a]

    cm = cleaner(t, f)
    t = t[~cm]
    f = f[~cm]
    e = e[~cm]
    results = the_TLS(fn, t, f, e, target=args.target)

    import matplotlib.pyplot as plt

    fig, ax = plt.subplots(figsize=[10, 3])
    ax.plot(t, f, '-k', lw=0.5)

    fig, ax = plt.subplots(figsize=[10, 3])
    ax.plot(results.periods, results.power, '-k', lw=0.5)

    fig, ax = plt.subplots()
    phase = (results.model_lightcurve_time - results.T0 +
Ejemplo n.º 19
0
    def _start(self, alt_message=None, block_during_init=True):
        from sage.misc.misc import sage_makedirs
        self.quit()  # in case one is already running
        global failed_to_start

        self._session_number += 1
        current_path = os.path.abspath('.')
        dir = self.__path
        sage_makedirs(dir)
        os.chdir(dir)

        #If the 'SAGE_PEXPECT_LOG' environment variable is set and
        #the current logfile is None, then set the logfile to be one
        #in .sage/pexpect_logs/
        if self.__logfile is None and 'SAGE_PEXPECT_LOG' in os.environ:
            from sage.misc.all import DOT_SAGE
            logs = '%s/pexpect_logs'%DOT_SAGE
            sage_makedirs(logs)

            filename = '%s/%s-%s-%s-%s.log'%(logs, self.name(), os.getpid(), id(self), self._session_number)
            self.__logfile = open(filename, 'w')

        cmd = self.__command

        if self.__verbose_start:
            print cmd
            print "Starting %s"%cmd.split()[0]
            
        try:
            if self.__remote_cleaner and self._server:
                c = 'sage-native-execute  ssh %s "nohup sage -cleaner"  &'%self._server
                os.system(c)

            # Unset $TERM for the children to reduce the chances they do
            # something complicated breaking the terminal interface.
            # See Trac #12221.
            pexpect_env = dict(os.environ)
            try:
                del pexpect_env["TERM"]
            except KeyError:
                pass
            self._expect = pexpect.spawn(cmd, logfile=self.__logfile, env=pexpect_env)
            if self._do_cleaner():
                cleaner.cleaner(self._expect.pid, cmd)
            
        except (ExceptionPexpect, pexpect.EOF, IndexError):
            self._expect = None
            self._session_number = BAD_SESSION
            failed_to_start.append(self.name())
            raise RuntimeError, "Unable to start %s because the command '%s' failed.\n%s"%(
                self.name(), cmd, self._install_hints())

        os.chdir(current_path)
        self._expect.timeout = self.__max_startup_time
            
        #self._expect.setmaxread(self.__maxread)
        self._expect.maxread = self.__maxread
        self._expect.delaybeforesend = 0
        try:
            self._expect.expect(self._prompt)
        except (pexpect.TIMEOUT, pexpect.EOF), msg:
            self._expect = None
            self._session_number = BAD_SESSION
            failed_to_start.append(self.name())
            raise RuntimeError, "Unable to start %s"%self.name()
Ejemplo n.º 20
0
 def tag_changer(self):
     file = (self.entry_name_of_the_file.get() + ".mp3")
     metadata_changer(file, self.song.get(), self.artist.get())
     cleaner(self.entry_name_of_the_file.get())
     self.clean()
Ejemplo n.º 21
0
    def find_factor(self, n, factor_digits=None, B1=2000, **kwds):
        """
        Splits off a single factor of n.
        See ECM.factor()

        OUTPUT:
            list of integers whose product is n
        
        EXAMPLES:
            sage: f = ECM()
            sage: n = 508021860739623467191080372196682785441177798407961
            sage: f.find_factor(n)
            [79792266297612017, 6366805760909027985741435139224233]

        Note that the input number can't have more than 4095 digits:
            sage: f=2^2^14+1
            sage: ecm.find_factor(f)
            Traceback (most recent call last):
            ...
            ValueError: n must have at most 4095 digits        
        """
        n = Integer(n)
        self._validate(n)
        if not 'c' in kwds: kwds['c'] = 1000000000
        if not 'I' in kwds: kwds['I'] = 1
        if not factor_digits is None: 
            B1 = self.recommended_B1(factor_digits)
        kwds['one'] = ''
        kwds['cofdec'] = ''
        self.__cmd = self._ECM__startup_cmd(B1, None, kwds)
        self.last_params = { 'B1' : B1 }
        child = pexpect.spawn(self.__cmd)        
        cleaner.cleaner(child.pid, self.__cmd)
        child.timeout = None
        child.__del__ = nothing   # program around stupid exception ignored error
        child.expect('[ECM]')
        child.sendline(str(n))
        child.sendline("bad") # child.sendeof()
        while True: 
        
            try: 
                child.expect('(Using B1=(\d+), B2=(\d+), polynomial ([^,]+), sigma=(\d+)\D)|(Factor found in step \d:\s+(\d+)\D)|(Error - invalid number)')
                info = child.match.groups()
                if not info[0] is None: 
                    self.last_params = { 'B1' : child.match.groups()[1], 
                                         'B2' : child.match.groups()[2], 
                                         'poly' : child.match.groups()[3], 
                                         'sigma' : child.match.groups()[4] }
                elif info[7] != None:
                    child.kill(0)
                    self.primality = [False]
                    return [n]
                else:
                    p = Integer(info[6])
                    child.expect('(input number)|(prime factor)|(composite factor)')
                    if not child.match.groups()[0] is None: 
                        child.kill(0)
                        return self.find_factor(n, B1=4+floor(float(B1)/2), **kwds)
                    else: 
                        # primality testing is cheap compared to factoring, but has already been done
                        # return [p, n/p]
                        self.primality = [not child.match.groups()[1] is None]
                        child.expect('((prime cofactor)|(Composite cofactor)) (\d+)\D')
                        q = Integer(child.match.groups()[3])
                        self.primality += [not child.match.groups()[1] is None]
                        child.kill(0)
                        return [p, q]
                    
                        
            except pexpect.EOF:
                child.kill(0)
                self.primality = [False]
                return [n]
        child.kill(0)
Ejemplo n.º 22
0
    def one_curve(self, n, factor_digits=None, B1=2000, algorithm="ECM", **kwds):
        """
        Run one single ECM (or P-1/P+1) curve on input n.
        
        INPUT:
            n -- a positive integer
            factor_digits -- decimal digits estimate of the wanted factor
            B1 -- stage 1 bound (default 2000)
            algorithm -- either "ECM" (default), "P-1" or "P+1"
        OUTPUT:
            a list [p,q] where p and q are integers and n = p * q.
            If no factor was found, then p = 1 and q = n.
            WARNING: neither p nor q is guaranteed to be prime.
        EXAMPLES:
            sage: f = ECM()
            sage: n = 508021860739623467191080372196682785441177798407961
            sage: f.one_curve(n, B1=10000, sigma=11)
            [1, 508021860739623467191080372196682785441177798407961]
            sage: f.one_curve(n, B1=10000, sigma=1022170541)
            [79792266297612017, 6366805760909027985741435139224233]
            sage: n = 432132887883903108009802143314445113500016816977037257
            sage: f.one_curve(n, B1=500000, algorithm="P-1")
            [67872792749091946529, 6366805760909027985741435139224233]
            sage: n = 2088352670731726262548647919416588631875815083
            sage: f.one_curve(n, B1=2000, algorithm="P+1", x0=5)
            [328006342451, 6366805760909027985741435139224233]
        """
        n = Integer(n)
        self._validate(n)
        if not factor_digits is None: 
            B1 = self.recommended_B1(factor_digits)
        if algorithm == "P-1":
            kwds['pm1'] = ''
        elif algorithm == "P+1":
            kwds['pp1'] = ''
        else:
           if not algorithm == "ECM":
              err = "unexpected algorithm: " + algorithm
              raise ValueError, err
        self.__cmd = self._ECM__startup_cmd(B1, None, kwds)
        child = pexpect.spawn(self.__cmd)        
        cleaner.cleaner(child.pid, self.__cmd)
        child.timeout = None
        child.__del__ = nothing   # work around stupid exception ignored error
        child.expect('[ECM]')
        child.sendline(str(n))
        child.sendline("bad") # child.sendeof()
        while True:
            try: 
               child.expect('(Using B1=(\d+), B2=(\d+), polynomial ([^,]+), sigma=(\d+)\D)|(Factor found in step \d:\s+(\d+)\D)|(Error - invalid number)')
               info = child.match.groups()
               # B1 is info[1], B2 is info[2], poly is info[3], sigma is info[4],
               # step is info[5], factor is info[6], cofactor is info[7]
               if not info[0] is None:
                  # got Using B1=... line
                  self.last_params = { 'B1' : child.match.groups()[1], 
                                       'B2' : child.match.groups()[2], 
                                       'poly' : child.match.groups()[3], 
                                       'sigma' : child.match.groups()[4] }
               elif info[7] != None:
                  # got Error - invalid number, which means the curve did
                  # end without finding any factor, and the next input 'bad'
                  # was given to GMP-ECM
                  child.kill(0)
                  return [1, n]
               else:
                  # got Factor found...
                  p = Integer(info[6])
                  child.kill(0)
                  return [p, n/p]

            except pexpect.EOF:
               child.kill(0)
               return [1, n]
            child.kill(0)
Ejemplo n.º 23
0
import argparse
from cleaner import cleaner

def parsing(parser):
    parser.add_argument("--train", help="Training data", required=True)
    parser.add_argument("--test", help="Testing data", required=True)
    parser.add_argument("--pred", help="Name of the output .csv file with predictions", required=True)
    parser.add_argument("--max_feat", type=int, help="Maximum number of features for TfidfVectorizer", required=True)
    parser.add_argument("--num_folds", type=int, help="Number of folds for k-fold cross-validation", required=True)
    return parser

parser = argparse.ArgumentParser()
parser = parsing(parser)
args = parser.parse_args()

text = cleaner(args.train, args.test, args.pred, args.max_feat, args.num_folds)

if args.pred: #do i even need the if statement if it is a required argument
    text.out()
#
#report F1 score
print("F1 Score is:", text.report_f1)


from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from xgboost import XGBClassifier
from mlxtend.preprocessing import DenseTransformer

pipeline = Pipeline([
  ("vectorizer", TfidfVectorizer()),
Ejemplo n.º 24
0
        '''
        from lightkurve.correctors import PLDCorrector
        tpf.hdu[1].data['FLUX'][ma] -= bkgs[:,None,None]
        tpf.hdu[1].data['FLUX_ERR'][ma] -= np.sqrt(tpf.hdu[1].data['FLUX_ERR'][ma]**2 + berr[:,None,None]**2)
        corr = PLDCorrector(tpf)
        lkf   = corr.correct(aperture_mask = dap[bidx], pld_aperture_mask='threshold', pld_order=3, use_gp=True)


#NORM
if args.norm:
    lkf.flux_err /= np.nanmedian(lkf.flux)
    lkf.flux /= np.nanmedian(lkf.flux)

if args.cleaner:
    from cleaner import cleaner
    omask = cleaner(lkf.time, lkf.flux)
    ntime = lkf.time.value[~omask]
    nflux = lkf.flux.value[~omask]
    nflux_err = lkf.flux_err.value[~omask]

    lkf = TessLightCurve(time=ntime, flux=nflux, flux_err=nflux_err)
   
    
#Gaia sources and dilution factor
if args.gaia:
    from astroquery.gaia import Gaia
    Gaia.ROW_LIMIT = -1

    gaiawh = u.Quantity(21*args.size*np.sqrt(2)/2, u.arcsec)
    gaiar  = Gaia.cone_search_async(coord, gaiawh).get_results()
    #gaiar  = Gaia.query_object_async(coord, width=gaiawh, height=gaiawh)
Ejemplo n.º 25
0
def datacleaner(df):
    from cleaner import cleaner
    return cleaner(df)
Ejemplo n.º 26
0
    def _start(self, alt_message=None, block_during_init=True):
        from sage.misc.misc import sage_makedirs
        self.quit()  # in case one is already running
        global failed_to_start

        self._session_number += 1

        #If the 'SAGE_PEXPECT_LOG' environment variable is set and
        #the current logfile is None, then set the logfile to be one
        #in .sage/pexpect_logs/
        if self.__logfile is None and 'SAGE_PEXPECT_LOG' in os.environ:
            from sage.env import DOT_SAGE
            logs = os.path.join(DOT_SAGE, 'pexpect_logs')
            sage_makedirs(logs)

            filename = '%s/%s-%s-%s-%s.log'%(logs, self.name(), os.getpid(), id(self), self._session_number)
            self.__logfile = open(filename, 'w')

        cmd = self.__command

        if self.__verbose_start:
            print cmd
            print "Starting %s"%cmd.split()[0]

        try:
            if self.__remote_cleaner and self._server:
                c = 'sage-native-execute  ssh %s "nohup sage -cleaner"  &'%self._server
                os.system(c)

            # Unset some environment variables for the children to
            # reduce the chances they do something complicated breaking
            # the terminal interface.
            # See Trac #12221 and #13859.
            pexpect_env = dict(os.environ)
            pexpect_del_vars = ['TERM', 'COLUMNS']
            for i in pexpect_del_vars:
                try:
                    del pexpect_env[i]
                except KeyError:
                    pass

            # Run child from self.__path
            currentdir = os.getcwd()
            os.chdir(self.__path)
            self._expect = pexpect.spawn(cmd, logfile=self.__logfile, env=pexpect_env)
            os.chdir(currentdir)

            if self._do_cleaner():
                cleaner.cleaner(self._expect.pid, cmd)

        except (ExceptionPexpect, pexpect.EOF, IndexError):
            self._expect = None
            self._session_number = BAD_SESSION
            failed_to_start.append(self.name())
            raise RuntimeError("unable to start %s because the command %r failed\n%s" % (
                self.name(), cmd, self._install_hints()))

        self._expect.timeout = self.__max_startup_time

        self._expect.maxread = self.__maxread
        self._expect.delaybeforesend = 0
        try:
            self._expect.expect(self._prompt)
        except (pexpect.TIMEOUT, pexpect.EOF) as msg:
            self._expect = None
            self._session_number = BAD_SESSION
            failed_to_start.append(self.name())
            raise RuntimeError("unable to start %s" % self.name())
        self._expect.timeout = None

        # Calling tcsetattr earlier exposes bugs in various pty
        # implementations, see :trac:`16474`. Since we haven't
        # **written** anything so far it is safe to wait with
        # switching echo off until now.
        if not self._terminal_echo:
            self._expect.setecho(0)

        with gc_disabled():
            if block_during_init:
                for X in self.__init_code:
                    self.eval(X)
            else:
                for X in self.__init_code:
                    self._send(X)
Ejemplo n.º 27
0
Archivo: ecm.py Proyecto: shrutig/sage
    def one_curve(self,
                  n,
                  factor_digits=None,
                  B1=2000,
                  algorithm="ECM",
                  **kwds):
        """
        Run one single ECM (or P-1/P+1) curve on input n.

        INPUT:
            n -- a positive integer
            factor_digits -- decimal digits estimate of the wanted factor
            B1 -- stage 1 bound (default 2000)
            algorithm -- either "ECM" (default), "P-1" or "P+1"
        OUTPUT:
            a list [p,q] where p and q are integers and n = p * q.
            If no factor was found, then p = 1 and q = n.
            WARNING: neither p nor q is guaranteed to be prime.
        EXAMPLES:
            sage: f = ECM()
            sage: n = 508021860739623467191080372196682785441177798407961
            sage: f.one_curve(n, B1=10000, sigma=11)
            [1, 508021860739623467191080372196682785441177798407961]
            sage: f.one_curve(n, B1=10000, sigma=1022170541)
            [79792266297612017, 6366805760909027985741435139224233]
            sage: n = 432132887883903108009802143314445113500016816977037257
            sage: f.one_curve(n, B1=500000, algorithm="P-1")
            [67872792749091946529, 6366805760909027985741435139224233]
            sage: n = 2088352670731726262548647919416588631875815083
            sage: f.one_curve(n, B1=2000, algorithm="P+1", x0=5)
            [328006342451, 6366805760909027985741435139224233]
        """
        n = Integer(n)
        self._validate(n)
        if not factor_digits is None:
            B1 = self.recommended_B1(factor_digits)
        if algorithm == "P-1":
            kwds['pm1'] = ''
        elif algorithm == "P+1":
            kwds['pp1'] = ''
        else:
            if not algorithm == "ECM":
                err = "unexpected algorithm: " + algorithm
                raise ValueError, err
        self.__cmd = self._ECM__startup_cmd(B1, None, kwds)
        child = pexpect.spawn(self.__cmd)
        cleaner.cleaner(child.pid, self.__cmd)
        child.timeout = None
        child.__del__ = nothing  # work around stupid exception ignored error
        child.expect('[ECM]')
        child.sendline(str(n))
        child.sendline("bad")  # child.sendeof()
        while True:
            try:
                child.expect(
                    '(Using B1=(\d+), B2=(\d+), polynomial ([^,]+), sigma=(\d+)\D)|(Factor found in step \d:\s+(\d+)\D)|(Error - invalid number)'
                )
                info = child.match.groups()
                # B1 is info[1], B2 is info[2], poly is info[3], sigma is info[4],
                # step is info[5], factor is info[6], cofactor is info[7]
                if not info[0] is None:
                    # got Using B1=... line
                    self.last_params = {
                        'B1': child.match.groups()[1],
                        'B2': child.match.groups()[2],
                        'poly': child.match.groups()[3],
                        'sigma': child.match.groups()[4]
                    }
                elif info[7] != None:
                    # got Error - invalid number, which means the curve did
                    # end without finding any factor, and the next input 'bad'
                    # was given to GMP-ECM
                    child.kill(0)
                    return [1, n]
                else:
                    # got Factor found...
                    p = Integer(info[6])
                    child.kill(0)
                    return [p, n / p]

            except pexpect.EOF:
                child.kill(0)
                return [1, n]
            child.kill(0)
Ejemplo n.º 28
0
 def start(self):
     child = pexpect.spawn(self.__cmd)
     cleaner.cleaner(child.pid, self.__cmd)
     child.timeout = None
     self.child = child
     self._ready = False
Ejemplo n.º 29
0
Archivo: ecm.py Proyecto: shrutig/sage
    def find_factor(self, n, factor_digits=None, B1=2000, **kwds):
        """
        Splits off a single factor of n.
        See ECM.factor()

        OUTPUT:
            list of integers whose product is n

        EXAMPLES:
            sage: f = ECM()
            sage: n = 508021860739623467191080372196682785441177798407961
            sage: f.find_factor(n)
            [79792266297612017, 6366805760909027985741435139224233]

        Note that the input number can't have more than 4095 digits:
            sage: f=2^2^14+1
            sage: ecm.find_factor(f)
            Traceback (most recent call last):
            ...
            ValueError: n must have at most 4095 digits
        """
        n = Integer(n)
        self._validate(n)
        if not 'c' in kwds: kwds['c'] = 1000000000
        if not 'I' in kwds: kwds['I'] = 1
        if not factor_digits is None:
            B1 = self.recommended_B1(factor_digits)
        kwds['one'] = ''
        kwds['cofdec'] = ''
        self.__cmd = self._ECM__startup_cmd(B1, None, kwds)
        self.last_params = {'B1': B1}
        child = pexpect.spawn(self.__cmd)
        cleaner.cleaner(child.pid, self.__cmd)
        child.timeout = None
        child.__del__ = nothing  # program around stupid exception ignored error
        child.expect('[ECM]')
        child.sendline(str(n))
        child.sendline("bad")  # child.sendeof()
        while True:

            try:
                child.expect(
                    '(Using B1=(\d+), B2=(\d+), polynomial ([^,]+), sigma=(\d+)\D)|(Factor found in step \d:\s+(\d+)\D)|(Error - invalid number)'
                )
                info = child.match.groups()
                if not info[0] is None:
                    self.last_params = {
                        'B1': child.match.groups()[1],
                        'B2': child.match.groups()[2],
                        'poly': child.match.groups()[3],
                        'sigma': child.match.groups()[4]
                    }
                elif info[7] != None:
                    child.kill(0)
                    self.primality = [False]
                    return [n]
                else:
                    p = Integer(info[6])
                    child.expect(
                        '(input number)|(prime factor)|(composite factor)')
                    if not child.match.groups()[0] is None:
                        child.kill(0)
                        return self.find_factor(n,
                                                B1=4 + floor(float(B1) / 2),
                                                **kwds)
                    else:
                        # primality testing is cheap compared to factoring, but has already been done
                        # return [p, n/p]
                        self.primality = [not child.match.groups()[1] is None]
                        child.expect(
                            '((prime cofactor)|(Composite cofactor)) (\d+)\D')
                        q = Integer(child.match.groups()[3])
                        self.primality += [not child.match.groups()[1] is None]
                        child.kill(0)
                        return [p, q]

            except pexpect.EOF:
                child.kill(0)
                self.primality = [False]
                return [n]
        child.kill(0)
Ejemplo n.º 30
0
def run_BLS(fl):
    t, f, e = np.genfromtxt(fl, usecols=(0,1,2), unpack=True)
    mask    = cleaner(t,f)
    
    t = t[~mask]
    f = f[~mask]
    e = e[~mask]

    lc   = TessLightCurve(time=t, flux=f, flux_err=e).flatten(window_length=51, polyorder=2, niters=5)

    #Test Fill
    '''
    diffs = np.diff(lc.time)
    stdd  = np.nanstd(diffs)
    medd  = np.nanmedian(diffs)

    maskgaps = diffs > 0.2#np.abs(diffs-medd) > stdd
    maskgaps = np.concatenate((maskgaps,[False]))
    '''

    '''
    for mg in np.where(maskgaps)[0]:
        addtime = np.arange(lc.time[mg]+0.05, lc.time[mg+1], 0.05)
        addflux = np.random.normal(1, 8e-4, len(addtime))

        lc.time = np.concatenate((lc.time, addtime))
        lc.flux = np.concatenate((lc.flux, addflux))

    addorder = np.argsort(lc.time)
    lc.time = lc.time[addorder]
    lc.flux = lc.flux[addorder]
    '''

    #fmed = np.nanmedian(lc.flux)
    #fstd = np.nanstd(lc.flux)
    #stdm = lc.flux < 0.97#np.abs(lc.flux-fmed) > 3*fstd

    periods   = np.exp(np.linspace(np.log(args.min_period), np.log(args.max_period), 5000))
    durations = np.linspace(0.05, 0.15, 20)# * u.day
    model     = BLS(lc.time,lc.flux) if not args.TLS else transitleastsquares(lc.time.value, lc.flux, lc.flux_err)

    #result    = model.power(periods, durations, oversample=20)#, objective='snr')
    result    = model.power(period_min=args.min_period, oversampling_factor=2, n_transits_min=1, use_threads=1, show_progress_bar=False)
    #try:
    #result    = model.autopower(durations, frequency_factor=2.0, maximum_period=args.max_period)
    #except:
    #    print(fl)
    idx       = np.argmax(result.power)


    period = result.period[idx]
    t0     = result.transit_time[idx]
    dur    = result.duration[idx]
    depth  = result.depth[idx]
    snr    = result.depth_snr[idx]
    '''
    period = result.period
    t0     = result.T0
    dur    = result.duration
    depth  = 1 - result.depth
    snr    = result.snr
    '''


    try:
        stats  = model.compute_stats(period, dur, t0)
        depth_even = stats['depth_even'][0]
        depth_odd  = stats['depth_odd'][0]
        depth_half = stats['depth_half'][0]
        t0, t1     = stats['transit_times'][:2]
        ntra       = len(stats['transit_times'])
    except:
        depth_even = 0
        depth_odd  = 0
        depth_half = 0
        t1         = 0
        ntra       = 0

    if args.target is not None:
        return fl, period, t0, dur, depth, snr, depth_even, depth_odd, depth_half, t1, ntra, result.period, result.power, lc.time, lc.flux, diffs
    else:
        return fl, period, t0, dur, depth, snr, depth_even, depth_odd, depth_half, t1, ntra
Ejemplo n.º 31
0
    def _start(self, alt_message=None, block_during_init=True):
        from sage.misc.misc import sage_makedirs
        self.quit()  # in case one is already running
        global failed_to_start

        self._session_number += 1
        current_path = os.path.abspath('.')
        dir = self.__path
        sage_makedirs(dir)
        os.chdir(dir)

        #If the 'SAGE_PEXPECT_LOG' environment variable is set and
        #the current logfile is None, then set the logfile to be one
        #in .sage/pexpect_logs/
        if self.__logfile is None and 'SAGE_PEXPECT_LOG' in os.environ:
            from sage.env import DOT_SAGE
            logs = '%s/pexpect_logs'%DOT_SAGE
            sage_makedirs(logs)

            filename = '%s/%s-%s-%s-%s.log'%(logs, self.name(), os.getpid(), id(self), self._session_number)
            self.__logfile = open(filename, 'w')

        cmd = self.__command

        if self.__verbose_start:
            print cmd
            print "Starting %s"%cmd.split()[0]

        try:
            if self.__remote_cleaner and self._server:
                c = 'sage-native-execute  ssh %s "nohup sage -cleaner"  &'%self._server
                os.system(c)

            # Unset some environment variables for the children to
            # reduce the chances they do something complicated breaking
            # the terminal interface.
            # See Trac #12221 and #13859.
            pexpect_env = dict(os.environ)
            pexpect_del_vars = ['TERM', 'COLUMNS']
            for i in pexpect_del_vars:
                try:
                    del pexpect_env[i]
                except KeyError:
                    pass
            self._expect = pexpect.spawn(cmd, logfile=self.__logfile, env=pexpect_env)
            if self._do_cleaner():
                cleaner.cleaner(self._expect.pid, cmd)

        except (ExceptionPexpect, pexpect.EOF, IndexError):
            self._expect = None
            self._session_number = BAD_SESSION
            failed_to_start.append(self.name())
            raise RuntimeError, "Unable to start %s because the command '%s' failed.\n%s"%(
                self.name(), cmd, self._install_hints())

        os.chdir(current_path)
        self._expect.timeout = self.__max_startup_time

        #self._expect.setmaxread(self.__maxread)
        self._expect.maxread = self.__maxread
        self._expect.delaybeforesend = 0
        try:
            self._expect.expect(self._prompt)
        except (pexpect.TIMEOUT, pexpect.EOF), msg:
            self._expect = None
            self._session_number = BAD_SESSION
            failed_to_start.append(self.name())
            raise RuntimeError, "Unable to start %s"%self.name()
Ejemplo n.º 32
0
    def time(self, n, factor_digits, verbose=0): 
        """
        Gives an approximation for the amount of time it will take to find a factor 
        of size factor_digits in a single process on the current computer. 
        This estimate is provided by GMP-ECM's verbose option on a single run of a curve. 
        
        INPUT: 
            n -- a positive integer
            factor_digits -- the (estimated) number of digits of the smallest factor
            
        EXAMPLES: 
        
            sage: n = next_prime(11^23)*next_prime(11^37)
                                
            sage: ecm.time(n, 20)                  # not tested
            Expected curves: 77     Expected time: 7.21s
            sage: ecm.time(n, 25)                  # not tested
            Expected curves: 206    Expected time: 1.56m
            sage: ecm.time(n, 30, verbose=1)       # not tested
            GMP-ECM 6.1.3 [powered by GMP 4.2.1] [ECM]

            Input number is 304481639541418099574459496544854621998616257489887231115912293 (63 digits)
            Using MODMULN
            Using B1=250000, B2=128992510, polynomial Dickson(3), sigma=2307628716
            dF=2048, k=3, d=19110, d2=11, i0=3
            Expected number of curves to find a factor of n digits:
            20      25      30      35      40      45      50      55      60      65
            8       50      430     4914    70293   1214949 2.5e+07 5.9e+08 1.6e+10 2.7e+13
            Step 1 took 6408ms
            Using 16 small primes for NTT
            Estimated memory usage: 3862K
            Initializing tables of differences for F took 16ms
            Computing roots of F took 128ms
            Building F from its roots took 408ms
            Computing 1/F took 608ms
            Initializing table of differences for G took 12ms
            Computing roots of G took 120ms
            Building G from its roots took 404ms
            Computing roots of G took 120ms
            Building G from its roots took 412ms
            Computing G * H took 328ms
            Reducing  G * H mod F took 348ms
            Computing roots of G took 120ms
            Building G from its roots took 408ms
            Computing G * H took 328ms
            Reducing  G * H mod F took 348ms
            Computing polyeval(F,G) took 1128ms
            Step 2 took 5260ms
            Expected time to find a factor of n digits:
            20      25      30      35      40      45      50      55      60      65
            1.58m   9.64m   1.39h   15.93h  9.49d   164.07d 9.16y   218.68y 5825y   1e+07y
            Expected curves: 4914   Expected time: 1.39h

        """
        self._validate(n)
        B1 = self.recommended_B1(factor_digits)
        self.__cmd = self._ECM__startup_cmd(B1, None, {'v': ' '})
        child = pexpect.spawn(self.__cmd)
        cleaner.cleaner(child.pid, self.__cmd)
        child.timeout = None
        child.expect('[ECM]')
        child.sendline(str(n))
        try:
            child.sendeof()
        except:
            pass
        child.expect('20\s+25\s+30\s+35\s+40\s+45\s+50\s+55\s+60\s+65')
        if verbose: 
            print child.before,
            print child.after,
        child.expect('(\d\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s', timeout=None)
        offset = (self.__B1_table_value(factor_digits, 20, 65)-20)/5
        curve_count = child.match.groups()[int(offset)]
        if verbose: 
            print child.before,
            print child.after,
        child.expect('20\s+25\s+30\s+35\s+40\s+45\s+50\s+55\s+60\s+65', timeout=None)
        if verbose: 
            print child.before,
            print child.after,
        child.expect('(\d\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s', timeout=None)
        if verbose: 
            print child.before,
            print child.after
        time = child.match.groups()[int(offset)]
        child.kill(0)
        print "Expected curves:", curve_count, "\tExpected time:", time
Ejemplo n.º 33
0
from reader import csv_reader
from cleaner import cleaner
import pandas as pd

csv_file = open('receita.csv', 'r')
dict_list = csv_reader(csv_file)
file_cleaned = cleaner(dict_list)
imports = pd.DataFrame(file_cleaned)
export_csv = imports.to_csv(
    r'C:\Users\CalebeLadis\PycharmProjects\csv-reader\receita2.csv',
    index=None,
    header=True)
Ejemplo n.º 34
0
    def _start(self, alt_message=None, block_during_init=True):
        from sage.misc.misc import sage_makedirs
        self.quit()  # in case one is already running

        self._session_number += 1

        if self.__logfile is None:
            # If the 'SAGE_PEXPECT_LOG' environment variable is set and
            # there is no logfile already defined, then create a
            # logfile in .sage/pexpect_logs/
            if self.__logfilename is None and 'SAGE_PEXPECT_LOG' in os.environ:
                from sage.env import DOT_SAGE
                logs = os.path.join(DOT_SAGE, 'pexpect_logs')
                sage_makedirs(logs)

                self.__logfilename = '%s/%s-%s-%s-%s.log'%(logs, self.name(), os.getpid(), id(self), self._session_number)
            if self.__logfilename is not None:
                self.__logfile = open(self.__logfilename, 'w')

        cmd = self.__command

        if self.__verbose_start:
            print cmd
            print "Starting %s"%cmd.split()[0]

        if self.__remote_cleaner and self._server:
            c = 'sage-native-execute  ssh %s "nohup sage -cleaner"  &'%self._server
            os.system(c)

        # Unset some environment variables for the children to
        # reduce the chances they do something complicated breaking
        # the terminal interface.
        # See Trac #12221 and #13859.
        pexpect_env = dict(os.environ)
        pexpect_del_vars = ['TERM', 'COLUMNS']
        for i in pexpect_del_vars:
            try:
                del pexpect_env[i]
            except KeyError:
                pass

        # Run child from self.__path
        currentdir = os.getcwd()
        os.chdir(self.__path)
        try:
            try:
                self._expect = SageSpawn(cmd,
                        logfile=self.__logfile,
                        timeout=None,  # no timeout
                        env=pexpect_env,
                        name=self._repr_(),
                        quit_string=self._quit_string())
            except (ExceptionPexpect, pexpect.EOF) as e:
                # Change pexpect errors to RuntimeError
                raise RuntimeError("unable to start %s because the command %r failed: %s\n%s" %
                        (self.name(), cmd, e, self._install_hints()))
        except BaseException:
            self._expect = None
            self._session_number = BAD_SESSION
            raise
        finally:
            os.chdir(currentdir)

        if self._do_cleaner():
            cleaner.cleaner(self._expect.pid, cmd)

        self._expect.maxread = self.__maxread
        self._expect.delaybeforesend = 0
        try:
            self._expect.expect(self._prompt)
        except (pexpect.TIMEOUT, pexpect.EOF):
            self._expect = None
            self._session_number = BAD_SESSION
            raise RuntimeError("unable to start %s" % self.name())
        self._expect.timeout = None

        # Calling tcsetattr earlier exposes bugs in various pty
        # implementations, see :trac:`16474`. Since we haven't
        # **written** anything so far it is safe to wait with
        # switching echo off until now.
        if not self._terminal_echo:
            self._expect.setecho(0)

        with gc_disabled():
            if block_during_init:
                for X in self.__init_code:
                    self.eval(X)
            else:
                for X in self.__init_code:
                    self._send(X)
# Defines which weather station code (For Lyon-Bron it is 7480)
cityCode = 7480

# Initialisation
body = []
header = []
i = 0

for d in range(0, deltaDate.days+1):
    sourcingDate = startingDate + timedelta(days=d)

    # Get webpage source code to scrap data from it with our parameters
    rawSource = requests.get("http://www.meteociel.fr/temps-reel/obs_villes.php?jour2=%s&mois2=%s&annee2=%s&code2=%s"                              % (sourcingDate.day, sourcingDate.month-1, sourcingDate.year, cityCode)) 

    # Cleaning raw source code
    source = cleaner(rawSource.text)

    # Transforming into a tree
    tree = html.fromstring(source)
    # Browsing tree, seeking for tr 
    tables = tree.xpath('//tr')
    
    # Initializing counter
    k = 0
        
    # Get header
    header = tables[0].xpath('td/text()')
    
    # Getting values and refactoring date/time
    for td in tables:
        if k > 0:
Ejemplo n.º 36
0
    def _start(self, alt_message=None, block_during_init=True):
        from sage.misc.misc import sage_makedirs
        self.quit()  # in case one is already running
        global failed_to_start

        self._session_number += 1
        current_path = os.path.abspath('.')
        dir = self.__path
        sage_makedirs(dir)
        os.chdir(dir)

        #If the 'SAGE_PEXPECT_LOG' environment variable is set and
        #the current logfile is None, then set the logfile to be one
        #in .sage/pexpect_logs/
        if self.__logfile is None and 'SAGE_PEXPECT_LOG' in os.environ:
            from sage.env import DOT_SAGE
            logs = '%s/pexpect_logs'%DOT_SAGE
            sage_makedirs(logs)

            filename = '%s/%s-%s-%s-%s.log'%(logs, self.name(), os.getpid(), id(self), self._session_number)
            self.__logfile = open(filename, 'w')

        cmd = self.__command

        if self.__verbose_start:
            print cmd
            print "Starting %s"%cmd.split()[0]

        try:
            if self.__remote_cleaner and self._server:
                c = 'sage-native-execute  ssh %s "nohup sage -cleaner"  &'%self._server
                os.system(c)

            # Unset some environment variables for the children to
            # reduce the chances they do something complicated breaking
            # the terminal interface.
            # See Trac #12221 and #13859.
            pexpect_env = dict(os.environ)
            pexpect_del_vars = ['TERM', 'COLUMNS']
            for i in pexpect_del_vars:
                try:
                    del pexpect_env[i]
                except KeyError:
                    pass
            self._expect = pexpect.spawn(cmd, logfile=self.__logfile, env=pexpect_env)
            if self._do_cleaner():
                cleaner.cleaner(self._expect.pid, cmd)

        except (ExceptionPexpect, pexpect.EOF, IndexError):
            self._expect = None
            self._session_number = BAD_SESSION
            failed_to_start.append(self.name())
            raise RuntimeError("Unable to start %s because the command '%s' failed.\n%s"%(
                self.name(), cmd, self._install_hints()))

        os.chdir(current_path)
        self._expect.timeout = self.__max_startup_time

        #self._expect.setmaxread(self.__maxread)
        self._expect.maxread = self.__maxread
        self._expect.delaybeforesend = 0
        try:
            self._expect.expect(self._prompt)
        except (pexpect.TIMEOUT, pexpect.EOF) as msg:
            self._expect = None
            self._session_number = BAD_SESSION
            failed_to_start.append(self.name())
            raise RuntimeError("Unable to start %s"%self.name())
        self._expect.timeout = None

        # Calling tcsetattr earlier exposes bugs in various pty
        # implementations, see :trac:`16474`. Since we haven't
        # **written** anything so far it is safe to wait with
        # switching echo off until now.
        if not self._terminal_echo:
            self._expect.setecho(0)

        with gc_disabled():
            if block_during_init:
                for X in self.__init_code:
                    self.eval(X)
            else:
                for X in self.__init_code:
                    self._send(X)
Ejemplo n.º 37
0
import storingwebpages
import trials
import unifier
import cleaner
query = "husband"
number_of_files = 200
storingwebpages.results(query, "advanced", "1674", "00", "1913", "99",
                        number_of_files)

trials.trials(query, number_of_files)

unifier.unifier(query, number_of_files)

cleaner.cleaner(query, number_of_files)