def __init__(self, n, time): self._n = n if time: cmd = 'time QuadraticSieve' else: cmd = 'QuadraticSieve' tmpdir() self._p = pexpect.spawn(cmd) cleaner.cleaner(self._p.pid, 'QuadraticSieve') self._p.sendline(str(self._n)+'\n\n\n') self._done = False self._out = '' self._time = '' self._do_time = time
def __init__(self, n, time): self._n = n if time: cmd = 'time QuadraticSieve' else: cmd = 'QuadraticSieve' tmpdir() self._p = SageSpawn(cmd) cleaner.cleaner(self._p.pid, 'QuadraticSieve') self._p.sendline(str(self._n) + '\n\n\n') self._done = False self._out = '' self._time = '' self._do_time = time
def setup(config, pool, acc, user): total = 20 rows = {} for _ in xrange(total): uid, r = db_txn(pool, partial(do_setup, acc, user)) print uid, r rows[uid] = r fetch = fetcher(config, pool, Condition(Lock()), Condition(Lock())) clean = cleaner(config, pool, Condition(Lock()), Condition(Lock())) ev = Event() ev.clear() load = Thread(target=load_database, args=(config, pool, ev, [fetch, clean])) # @IgnorePep8 load.start() ev.wait() fetch.start() tasks = [] while len(tasks) < total: print 'already fetched: ', len(tasks), 'tasks' print 'to fetch tasks from db' fetch.request(acc) for r in fetch.replies(True): ts = r[1] print 'fetched', len(ts), 'tasks' for t in ts: if t.uuid in rows: tasks.append((t, rows[t.uuid])) return (clean, tasks)
def __init__(self, n, time): self._n = n if time: if cygwin: raise ValueError, "qsieve time not supported on Cygwin" cmd = 'time QuadraticSieve' else: cmd = 'QuadraticSieve' tmpdir() self._p = pexpect.spawn(cmd) cleaner.cleaner(self._p.pid, 'QuadraticSieve') self._p.sendline(str(self._n) + '\n\n\n') self._done = False self._out = '' self._time = '' self._do_time = time
def __init__(self): self.clean = cleaner() engine = create_engine('sqlite:///statuses.db') Base.metadata.bind = engine DBSession = sessionmaker(bind=engine) self.session = DBSession()
def __init__(self, n, time): self._n = n if time: if cygwin: raise ValueError, "qsieve time not supported on Cygwin" cmd = 'time QuadraticSieve' else: cmd = 'QuadraticSieve' tmpdir() self._p = pexpect.spawn(cmd) cleaner.cleaner(self._p.pid, 'QuadraticSieve') self._p.sendline(str(self._n)+'\n\n\n') self._done = False self._out = '' self._time = '' self._do_time = time
def __init__(self): self.clean = cleaner() self.allWords={} self.tfDictTrain={} self.tfDictNew={} self.sharedTermsDict={} self.simDict={} self.db = dbContainer() self.all = self.db.get_all()
def test_clean(): import sys sys.path.append('../') import glob import numpy as np import pandas as pd from cleaner import cleaner, isfloat file1 = 'data/test_data30.csv' df = pd.read_csv(file1, names=['time', 'volt']) nptime, npvolt = cleaner(df) assert abs(sum(nptime) - 199976.1) < 0.2 \ and abs(sum(npvolt) - 443.105) < 0.01
def run_TLS(fn, min_period=0.2, target=None): t, f, e = np.genfromtxt(fn, usecols=(0, 1, 2), unpack=True) mask = cleaner(t, f) t = t[~mask] f = f[~mask] e = e[~mask] lc = TessLightCurve(time=t, flux=f, flux_err=e).flatten(window_length=51, polyorder=2, niters=5) return the_TLS(fn, lc.time.value, lc.flux, lc.flux_err, min_period, target)
def start(algo, auto, print_unsorted, print_sorted): unsortedList = [] if auto: unsortedList = cleaner.cleaner(auto) else: unsortedList = generator.main() if print_unsorted: print('unsorted list: ', unsortedList) print('\nFor ', len(unsortedList), ' numbers:') else: print('For ', len(unsortedList), ' numbers:') if algo == 'bubble_sort' or algo == 'all': bubble_algorithm(unsortedList[:], print_unsorted, print_sorted) if algo == 'insert_sort' or algo == 'all': insert_algorithm(unsortedList[:], print_unsorted, print_sorted)
def __init__(self, text, document, pos, comp=False): self.pos = pos self.document = document self.clean_text = cleaner.cleaner(text) self.text = text self.full_tokens = [ w.lower().strip(string.punctuation) for w in self.clean_text.split() ] self.tokens = [ w for w in self.full_tokens if w and not w in stopwords.words('english') ] self.vocab = self.get_vocab() self.ent_parse = ner_model(self.text) self.dep_parse = dep_model(self.clean_text) self.is_compressed = comp
def terrorist_finder(query_string, source_data, blacklist_data_file, noise_data_file=[]): query = query_string if source_data.upper() == "FILE": blacklist = clean.cleaner(blacklist_data_file, noise_data_file) elif source_data.upper() == "EUROPA": blacklist = blacklist_data_file else: raise ValueError("Source data isn't file or europa") # Strict matching if (strict_match(query, blacklist)): print "TERRORIST MATCHED!" print "Certainty: Strict match" print "Name: ", query return "" else: print "No strict match, looking for partial matches..." # Partial matching matches = partial_match(query, blacklist) match_name = matches[0] match_certainty = matches[1] if (len(match_name) > 0): for index, name in enumerate(match_name): print "TERRORIST MATCHED!" print "Certainty: ", match_certainty[index], "%" print "Name: ", name print "" return "" else: print "No partial match" # Fuzzy matching # Not implemented yet return ""
indirafs = yamldir + args.version checkereos = chkeos.checker_eos(indirafs, indireos, args.process) checkereos.check() elif args.merge: print 'running the merger' if args.process != '': print 'using a specific process ', args.process import merger as mgr merger = mgr.merger(args.process, yamldir, args.version) merger.merge(args.force) elif args.clean: print 'clean the dictionnary and eos' import cleaner as clf clean = clf.cleaner(indir, yamldir, args.process, args.version) clean.clean() elif args.cleanold: print 'clean the dictionnary from old jobs that have not been checked' import cleaner as clf clean = clf.cleaner(indir, yamldir, args.process, args.version) clean.cleanoldjobs() elif args.web: import printer as prt webfile = webfile.replace('VERSION', args.version) printdic = prt.printer(yamldir, indir, webfile, args.version) printdic.run() elif args.remove:
def data_analysis(): """ This is where the analysis is done. In the first step it unzips the tar/tgz files in a separate folder, and after that it`s searching for error terminations and imaginary frequencies in the log files. The *.log files containing errors or imaginary freqs are separated from the rest. Also the "EnergyEx" module extracts the calculated energies and writes them into a csv file with the help of the cxsvwriter module. Logs containig imaginary freqs are processed by the "xvibs" program. """ import os, sys, time import tmp_unrar as tu from ErrorCatcher import ErrorCatcher as EC from ImagCatcher import ImagCatcher as IC from RouteCard import RouteCard as RC from EnergyEx import EnergyEx from csvwriter import csvwriter from FileSeparator import FileSeparator from xawker import xawker from ImagInput import ImagInput from cleaner import cleaner global direct path=input('Enter the path to the directory where you have the tgz/tar folders: ') direct=os.chdir(os.path.expanduser(path)) #changing the directory to where I will work ####### Checking for a tmp folder and unzipping the tar/tzgz files into tmp ####### tu.tmp_check(direct) tu.unrar(direct) ################################################################################### ####### Searching for Error termination in the *.log files ######################## er = EC() for file in os.listdir(): er.e_catch(file) print('You have {} calculation(s) with ERROR termination!'.format(er.error_counter)) #################################################################################### ####### Searching for imaginary frequencies in the *.log files ##################### imag=IC() for file in os.listdir(): imag.imag_catcher(file) ##################################################################################### ########### Selecting the route cards for the files with imaginary freqs ############ route = RC(imag.imag_dict) for file in os.listdir(): route.route_card(file) ##################################################################################### ####### Extracting the Energies from the *.log files and saving them in energies.csv file ####### energy=EnergyEx() for file in os.listdir(): # print(file) energy.sampler(file) csvwriter(energy.energy_dict) ################################################################################################## ####### Separating the files containig Errors from those containing imaginary frequencies ######## fs = FileSeparator(imag.imag_dict) for file in os.listdir(): fs.file_separator(file) ################################################################################################## ####### Genertaing the new input files from the *.log files containing imaginary freqs ####### if imag.imag_dict != {}: xawker(imag.imag_dict) im = ImagInput() chrg_spin=input('Please enter the molecules new charge and spin value [ex.: 0 1 ]: ') ext=input('Please enter the generated files extension [ex: com]: ') for file in os.listdir(): im.imag_xyz_gen(file, chrg_spin, ext, route.rd) ################### Cleaning up the mess ################### answer = input("Do you need the old xyz/xyz-/ginp file(s)? ") cleaner(answer) ############################################################# else: pass ################################################################################################## ################################### Exiting the data analysis mode ############################### print('Data analysis terminated') time.sleep(1)
def time(self, n, factor_digits, verbose=0): """ Gives an approximation for the amount of time it will take to find a factor of size factor_digits in a single process on the current computer. This estimate is provided by GMP-ECM's verbose option on a single run of a curve. INPUT: n -- a positive integer factor_digits -- the (estimated) number of digits of the smallest factor EXAMPLES:: sage: n = next_prime(11^23)*next_prime(11^37) sage: ecm.time(n, 20) # not tested Expected curves: 77 Expected time: 7.21s sage: ecm.time(n, 25) # not tested Expected curves: 206 Expected time: 1.56m sage: ecm.time(n, 30, verbose=1) # not tested GMP-ECM 6.1.3 [powered by GMP 4.2.1] [ECM] Input number is 304481639541418099574459496544854621998616257489887231115912293 (63 digits) Using MODMULN Using B1=250000, B2=128992510, polynomial Dickson(3), sigma=2307628716 dF=2048, k=3, d=19110, d2=11, i0=3 Expected number of curves to find a factor of n digits: 20 25 30 35 40 45 50 55 60 65 8 50 430 4914 70293 1214949 2.5e+07 5.9e+08 1.6e+10 2.7e+13 Step 1 took 6408ms Using 16 small primes for NTT Estimated memory usage: 3862K Initializing tables of differences for F took 16ms Computing roots of F took 128ms Building F from its roots took 408ms Computing 1/F took 608ms Initializing table of differences for G took 12ms Computing roots of G took 120ms Building G from its roots took 404ms Computing roots of G took 120ms Building G from its roots took 412ms Computing G * H took 328ms Reducing G * H mod F took 348ms Computing roots of G took 120ms Building G from its roots took 408ms Computing G * H took 328ms Reducing G * H mod F took 348ms Computing polyeval(F,G) took 1128ms Step 2 took 5260ms Expected time to find a factor of n digits: 20 25 30 35 40 45 50 55 60 65 1.58m 9.64m 1.39h 15.93h 9.49d 164.07d 9.16y 218.68y 5825y 1e+07y Expected curves: 4914 Expected time: 1.39h """ self._validate(n) B1 = self.recommended_B1(factor_digits) self.__cmd = self._ECM__startup_cmd(B1, None, {'v': ' '}) child = pexpect.spawn(self.__cmd) cleaner.cleaner(child.pid, self.__cmd) child.timeout = None child.expect('[ECM]') child.sendline(str(n)) try: child.sendeof() except Exception: pass child.expect('20\s+25\s+30\s+35\s+40\s+45\s+50\s+55\s+60\s+65') if verbose: print child.before, print child.after, child.expect( '(\d\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s', timeout=None) offset = (self.__B1_table_value(factor_digits, 20, 65) - 20) / 5 curve_count = child.match.groups()[int(offset)] if verbose: print child.before, print child.after, child.expect('20\s+25\s+30\s+35\s+40\s+45\s+50\s+55\s+60\s+65', timeout=None) if verbose: print child.before, print child.after, child.expect( '(\d\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s', timeout=None) if verbose: print child.before, print child.after time = child.match.groups()[int(offset)] child.kill(0) print "Expected curves:", curve_count, "\tExpected time:", time
parser.add_argument('--nplanets', type=int, default=5, help='Number of planets to search') parser.add_argument('--ncpu', type=int, default=4, help='Number of CPUs to use') parser.add_argument('--min-period', type=float, default=0.2) parser.add_argument('--max-period', type=float, default=20) parser.add_argument('--method', type=str, default='TLS') args = parser.parse_args() t, f, e = np.genfromtxt(args.File, usecols=(0, 1, 2), unpack=True) ma = cleaner(t, f) t, f, e = t[~ma], f[~ma], e[~ma] wl = 2 * (len(t) // 42) + 1 lc = TessLightCurve(time=t, flux=f, flux_err=e).flatten(window_length=wl, polyorder=3, niters=5) fig = plt.figure(constrained_layout=True, figsize=[15, 6]) gs = GridSpec(ncols=5, nrows=3, figure=fig, height_ratios=[3, 2, 2]) #Light curve axlc = fig.add_subplot(gs[0, :]) axlc.plot(lc.time.value, lc.flux, '.k', ms=1) color = plt.cm.rainbow(np.linspace(0, 1, args.nplanets))
def start(self): child = pexpect.spawn(self.__cmd) cleaner.cleaner(child.pid, self.__cmd) child.timeout = None self.child = child self._ready = False
lc = TessLightCurve(time=tt, flux=ff, flux_err=ee).flatten(window_length=51, polyorder=2, niters=5) t.append(lc.time) f.append(lc.flux) e.append(lc.flux_err) t = np.concatenate(t) a = np.argsort(t) t = t[a] f = np.concatenate(f)[a] e = np.concatenate(e)[a] cm = cleaner(t, f) t = t[~cm] f = f[~cm] e = e[~cm] results = the_TLS(fn, t, f, e, target=args.target) import matplotlib.pyplot as plt fig, ax = plt.subplots(figsize=[10, 3]) ax.plot(t, f, '-k', lw=0.5) fig, ax = plt.subplots(figsize=[10, 3]) ax.plot(results.periods, results.power, '-k', lw=0.5) fig, ax = plt.subplots() phase = (results.model_lightcurve_time - results.T0 +
def _start(self, alt_message=None, block_during_init=True): from sage.misc.misc import sage_makedirs self.quit() # in case one is already running global failed_to_start self._session_number += 1 current_path = os.path.abspath('.') dir = self.__path sage_makedirs(dir) os.chdir(dir) #If the 'SAGE_PEXPECT_LOG' environment variable is set and #the current logfile is None, then set the logfile to be one #in .sage/pexpect_logs/ if self.__logfile is None and 'SAGE_PEXPECT_LOG' in os.environ: from sage.misc.all import DOT_SAGE logs = '%s/pexpect_logs'%DOT_SAGE sage_makedirs(logs) filename = '%s/%s-%s-%s-%s.log'%(logs, self.name(), os.getpid(), id(self), self._session_number) self.__logfile = open(filename, 'w') cmd = self.__command if self.__verbose_start: print cmd print "Starting %s"%cmd.split()[0] try: if self.__remote_cleaner and self._server: c = 'sage-native-execute ssh %s "nohup sage -cleaner" &'%self._server os.system(c) # Unset $TERM for the children to reduce the chances they do # something complicated breaking the terminal interface. # See Trac #12221. pexpect_env = dict(os.environ) try: del pexpect_env["TERM"] except KeyError: pass self._expect = pexpect.spawn(cmd, logfile=self.__logfile, env=pexpect_env) if self._do_cleaner(): cleaner.cleaner(self._expect.pid, cmd) except (ExceptionPexpect, pexpect.EOF, IndexError): self._expect = None self._session_number = BAD_SESSION failed_to_start.append(self.name()) raise RuntimeError, "Unable to start %s because the command '%s' failed.\n%s"%( self.name(), cmd, self._install_hints()) os.chdir(current_path) self._expect.timeout = self.__max_startup_time #self._expect.setmaxread(self.__maxread) self._expect.maxread = self.__maxread self._expect.delaybeforesend = 0 try: self._expect.expect(self._prompt) except (pexpect.TIMEOUT, pexpect.EOF), msg: self._expect = None self._session_number = BAD_SESSION failed_to_start.append(self.name()) raise RuntimeError, "Unable to start %s"%self.name()
def tag_changer(self): file = (self.entry_name_of_the_file.get() + ".mp3") metadata_changer(file, self.song.get(), self.artist.get()) cleaner(self.entry_name_of_the_file.get()) self.clean()
def find_factor(self, n, factor_digits=None, B1=2000, **kwds): """ Splits off a single factor of n. See ECM.factor() OUTPUT: list of integers whose product is n EXAMPLES: sage: f = ECM() sage: n = 508021860739623467191080372196682785441177798407961 sage: f.find_factor(n) [79792266297612017, 6366805760909027985741435139224233] Note that the input number can't have more than 4095 digits: sage: f=2^2^14+1 sage: ecm.find_factor(f) Traceback (most recent call last): ... ValueError: n must have at most 4095 digits """ n = Integer(n) self._validate(n) if not 'c' in kwds: kwds['c'] = 1000000000 if not 'I' in kwds: kwds['I'] = 1 if not factor_digits is None: B1 = self.recommended_B1(factor_digits) kwds['one'] = '' kwds['cofdec'] = '' self.__cmd = self._ECM__startup_cmd(B1, None, kwds) self.last_params = { 'B1' : B1 } child = pexpect.spawn(self.__cmd) cleaner.cleaner(child.pid, self.__cmd) child.timeout = None child.__del__ = nothing # program around stupid exception ignored error child.expect('[ECM]') child.sendline(str(n)) child.sendline("bad") # child.sendeof() while True: try: child.expect('(Using B1=(\d+), B2=(\d+), polynomial ([^,]+), sigma=(\d+)\D)|(Factor found in step \d:\s+(\d+)\D)|(Error - invalid number)') info = child.match.groups() if not info[0] is None: self.last_params = { 'B1' : child.match.groups()[1], 'B2' : child.match.groups()[2], 'poly' : child.match.groups()[3], 'sigma' : child.match.groups()[4] } elif info[7] != None: child.kill(0) self.primality = [False] return [n] else: p = Integer(info[6]) child.expect('(input number)|(prime factor)|(composite factor)') if not child.match.groups()[0] is None: child.kill(0) return self.find_factor(n, B1=4+floor(float(B1)/2), **kwds) else: # primality testing is cheap compared to factoring, but has already been done # return [p, n/p] self.primality = [not child.match.groups()[1] is None] child.expect('((prime cofactor)|(Composite cofactor)) (\d+)\D') q = Integer(child.match.groups()[3]) self.primality += [not child.match.groups()[1] is None] child.kill(0) return [p, q] except pexpect.EOF: child.kill(0) self.primality = [False] return [n] child.kill(0)
def one_curve(self, n, factor_digits=None, B1=2000, algorithm="ECM", **kwds): """ Run one single ECM (or P-1/P+1) curve on input n. INPUT: n -- a positive integer factor_digits -- decimal digits estimate of the wanted factor B1 -- stage 1 bound (default 2000) algorithm -- either "ECM" (default), "P-1" or "P+1" OUTPUT: a list [p,q] where p and q are integers and n = p * q. If no factor was found, then p = 1 and q = n. WARNING: neither p nor q is guaranteed to be prime. EXAMPLES: sage: f = ECM() sage: n = 508021860739623467191080372196682785441177798407961 sage: f.one_curve(n, B1=10000, sigma=11) [1, 508021860739623467191080372196682785441177798407961] sage: f.one_curve(n, B1=10000, sigma=1022170541) [79792266297612017, 6366805760909027985741435139224233] sage: n = 432132887883903108009802143314445113500016816977037257 sage: f.one_curve(n, B1=500000, algorithm="P-1") [67872792749091946529, 6366805760909027985741435139224233] sage: n = 2088352670731726262548647919416588631875815083 sage: f.one_curve(n, B1=2000, algorithm="P+1", x0=5) [328006342451, 6366805760909027985741435139224233] """ n = Integer(n) self._validate(n) if not factor_digits is None: B1 = self.recommended_B1(factor_digits) if algorithm == "P-1": kwds['pm1'] = '' elif algorithm == "P+1": kwds['pp1'] = '' else: if not algorithm == "ECM": err = "unexpected algorithm: " + algorithm raise ValueError, err self.__cmd = self._ECM__startup_cmd(B1, None, kwds) child = pexpect.spawn(self.__cmd) cleaner.cleaner(child.pid, self.__cmd) child.timeout = None child.__del__ = nothing # work around stupid exception ignored error child.expect('[ECM]') child.sendline(str(n)) child.sendline("bad") # child.sendeof() while True: try: child.expect('(Using B1=(\d+), B2=(\d+), polynomial ([^,]+), sigma=(\d+)\D)|(Factor found in step \d:\s+(\d+)\D)|(Error - invalid number)') info = child.match.groups() # B1 is info[1], B2 is info[2], poly is info[3], sigma is info[4], # step is info[5], factor is info[6], cofactor is info[7] if not info[0] is None: # got Using B1=... line self.last_params = { 'B1' : child.match.groups()[1], 'B2' : child.match.groups()[2], 'poly' : child.match.groups()[3], 'sigma' : child.match.groups()[4] } elif info[7] != None: # got Error - invalid number, which means the curve did # end without finding any factor, and the next input 'bad' # was given to GMP-ECM child.kill(0) return [1, n] else: # got Factor found... p = Integer(info[6]) child.kill(0) return [p, n/p] except pexpect.EOF: child.kill(0) return [1, n] child.kill(0)
import argparse from cleaner import cleaner def parsing(parser): parser.add_argument("--train", help="Training data", required=True) parser.add_argument("--test", help="Testing data", required=True) parser.add_argument("--pred", help="Name of the output .csv file with predictions", required=True) parser.add_argument("--max_feat", type=int, help="Maximum number of features for TfidfVectorizer", required=True) parser.add_argument("--num_folds", type=int, help="Number of folds for k-fold cross-validation", required=True) return parser parser = argparse.ArgumentParser() parser = parsing(parser) args = parser.parse_args() text = cleaner(args.train, args.test, args.pred, args.max_feat, args.num_folds) if args.pred: #do i even need the if statement if it is a required argument text.out() # #report F1 score print("F1 Score is:", text.report_f1) from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.pipeline import Pipeline from xgboost import XGBClassifier from mlxtend.preprocessing import DenseTransformer pipeline = Pipeline([ ("vectorizer", TfidfVectorizer()),
''' from lightkurve.correctors import PLDCorrector tpf.hdu[1].data['FLUX'][ma] -= bkgs[:,None,None] tpf.hdu[1].data['FLUX_ERR'][ma] -= np.sqrt(tpf.hdu[1].data['FLUX_ERR'][ma]**2 + berr[:,None,None]**2) corr = PLDCorrector(tpf) lkf = corr.correct(aperture_mask = dap[bidx], pld_aperture_mask='threshold', pld_order=3, use_gp=True) #NORM if args.norm: lkf.flux_err /= np.nanmedian(lkf.flux) lkf.flux /= np.nanmedian(lkf.flux) if args.cleaner: from cleaner import cleaner omask = cleaner(lkf.time, lkf.flux) ntime = lkf.time.value[~omask] nflux = lkf.flux.value[~omask] nflux_err = lkf.flux_err.value[~omask] lkf = TessLightCurve(time=ntime, flux=nflux, flux_err=nflux_err) #Gaia sources and dilution factor if args.gaia: from astroquery.gaia import Gaia Gaia.ROW_LIMIT = -1 gaiawh = u.Quantity(21*args.size*np.sqrt(2)/2, u.arcsec) gaiar = Gaia.cone_search_async(coord, gaiawh).get_results() #gaiar = Gaia.query_object_async(coord, width=gaiawh, height=gaiawh)
def datacleaner(df): from cleaner import cleaner return cleaner(df)
def _start(self, alt_message=None, block_during_init=True): from sage.misc.misc import sage_makedirs self.quit() # in case one is already running global failed_to_start self._session_number += 1 #If the 'SAGE_PEXPECT_LOG' environment variable is set and #the current logfile is None, then set the logfile to be one #in .sage/pexpect_logs/ if self.__logfile is None and 'SAGE_PEXPECT_LOG' in os.environ: from sage.env import DOT_SAGE logs = os.path.join(DOT_SAGE, 'pexpect_logs') sage_makedirs(logs) filename = '%s/%s-%s-%s-%s.log'%(logs, self.name(), os.getpid(), id(self), self._session_number) self.__logfile = open(filename, 'w') cmd = self.__command if self.__verbose_start: print cmd print "Starting %s"%cmd.split()[0] try: if self.__remote_cleaner and self._server: c = 'sage-native-execute ssh %s "nohup sage -cleaner" &'%self._server os.system(c) # Unset some environment variables for the children to # reduce the chances they do something complicated breaking # the terminal interface. # See Trac #12221 and #13859. pexpect_env = dict(os.environ) pexpect_del_vars = ['TERM', 'COLUMNS'] for i in pexpect_del_vars: try: del pexpect_env[i] except KeyError: pass # Run child from self.__path currentdir = os.getcwd() os.chdir(self.__path) self._expect = pexpect.spawn(cmd, logfile=self.__logfile, env=pexpect_env) os.chdir(currentdir) if self._do_cleaner(): cleaner.cleaner(self._expect.pid, cmd) except (ExceptionPexpect, pexpect.EOF, IndexError): self._expect = None self._session_number = BAD_SESSION failed_to_start.append(self.name()) raise RuntimeError("unable to start %s because the command %r failed\n%s" % ( self.name(), cmd, self._install_hints())) self._expect.timeout = self.__max_startup_time self._expect.maxread = self.__maxread self._expect.delaybeforesend = 0 try: self._expect.expect(self._prompt) except (pexpect.TIMEOUT, pexpect.EOF) as msg: self._expect = None self._session_number = BAD_SESSION failed_to_start.append(self.name()) raise RuntimeError("unable to start %s" % self.name()) self._expect.timeout = None # Calling tcsetattr earlier exposes bugs in various pty # implementations, see :trac:`16474`. Since we haven't # **written** anything so far it is safe to wait with # switching echo off until now. if not self._terminal_echo: self._expect.setecho(0) with gc_disabled(): if block_during_init: for X in self.__init_code: self.eval(X) else: for X in self.__init_code: self._send(X)
def one_curve(self, n, factor_digits=None, B1=2000, algorithm="ECM", **kwds): """ Run one single ECM (or P-1/P+1) curve on input n. INPUT: n -- a positive integer factor_digits -- decimal digits estimate of the wanted factor B1 -- stage 1 bound (default 2000) algorithm -- either "ECM" (default), "P-1" or "P+1" OUTPUT: a list [p,q] where p and q are integers and n = p * q. If no factor was found, then p = 1 and q = n. WARNING: neither p nor q is guaranteed to be prime. EXAMPLES: sage: f = ECM() sage: n = 508021860739623467191080372196682785441177798407961 sage: f.one_curve(n, B1=10000, sigma=11) [1, 508021860739623467191080372196682785441177798407961] sage: f.one_curve(n, B1=10000, sigma=1022170541) [79792266297612017, 6366805760909027985741435139224233] sage: n = 432132887883903108009802143314445113500016816977037257 sage: f.one_curve(n, B1=500000, algorithm="P-1") [67872792749091946529, 6366805760909027985741435139224233] sage: n = 2088352670731726262548647919416588631875815083 sage: f.one_curve(n, B1=2000, algorithm="P+1", x0=5) [328006342451, 6366805760909027985741435139224233] """ n = Integer(n) self._validate(n) if not factor_digits is None: B1 = self.recommended_B1(factor_digits) if algorithm == "P-1": kwds['pm1'] = '' elif algorithm == "P+1": kwds['pp1'] = '' else: if not algorithm == "ECM": err = "unexpected algorithm: " + algorithm raise ValueError, err self.__cmd = self._ECM__startup_cmd(B1, None, kwds) child = pexpect.spawn(self.__cmd) cleaner.cleaner(child.pid, self.__cmd) child.timeout = None child.__del__ = nothing # work around stupid exception ignored error child.expect('[ECM]') child.sendline(str(n)) child.sendline("bad") # child.sendeof() while True: try: child.expect( '(Using B1=(\d+), B2=(\d+), polynomial ([^,]+), sigma=(\d+)\D)|(Factor found in step \d:\s+(\d+)\D)|(Error - invalid number)' ) info = child.match.groups() # B1 is info[1], B2 is info[2], poly is info[3], sigma is info[4], # step is info[5], factor is info[6], cofactor is info[7] if not info[0] is None: # got Using B1=... line self.last_params = { 'B1': child.match.groups()[1], 'B2': child.match.groups()[2], 'poly': child.match.groups()[3], 'sigma': child.match.groups()[4] } elif info[7] != None: # got Error - invalid number, which means the curve did # end without finding any factor, and the next input 'bad' # was given to GMP-ECM child.kill(0) return [1, n] else: # got Factor found... p = Integer(info[6]) child.kill(0) return [p, n / p] except pexpect.EOF: child.kill(0) return [1, n] child.kill(0)
def find_factor(self, n, factor_digits=None, B1=2000, **kwds): """ Splits off a single factor of n. See ECM.factor() OUTPUT: list of integers whose product is n EXAMPLES: sage: f = ECM() sage: n = 508021860739623467191080372196682785441177798407961 sage: f.find_factor(n) [79792266297612017, 6366805760909027985741435139224233] Note that the input number can't have more than 4095 digits: sage: f=2^2^14+1 sage: ecm.find_factor(f) Traceback (most recent call last): ... ValueError: n must have at most 4095 digits """ n = Integer(n) self._validate(n) if not 'c' in kwds: kwds['c'] = 1000000000 if not 'I' in kwds: kwds['I'] = 1 if not factor_digits is None: B1 = self.recommended_B1(factor_digits) kwds['one'] = '' kwds['cofdec'] = '' self.__cmd = self._ECM__startup_cmd(B1, None, kwds) self.last_params = {'B1': B1} child = pexpect.spawn(self.__cmd) cleaner.cleaner(child.pid, self.__cmd) child.timeout = None child.__del__ = nothing # program around stupid exception ignored error child.expect('[ECM]') child.sendline(str(n)) child.sendline("bad") # child.sendeof() while True: try: child.expect( '(Using B1=(\d+), B2=(\d+), polynomial ([^,]+), sigma=(\d+)\D)|(Factor found in step \d:\s+(\d+)\D)|(Error - invalid number)' ) info = child.match.groups() if not info[0] is None: self.last_params = { 'B1': child.match.groups()[1], 'B2': child.match.groups()[2], 'poly': child.match.groups()[3], 'sigma': child.match.groups()[4] } elif info[7] != None: child.kill(0) self.primality = [False] return [n] else: p = Integer(info[6]) child.expect( '(input number)|(prime factor)|(composite factor)') if not child.match.groups()[0] is None: child.kill(0) return self.find_factor(n, B1=4 + floor(float(B1) / 2), **kwds) else: # primality testing is cheap compared to factoring, but has already been done # return [p, n/p] self.primality = [not child.match.groups()[1] is None] child.expect( '((prime cofactor)|(Composite cofactor)) (\d+)\D') q = Integer(child.match.groups()[3]) self.primality += [not child.match.groups()[1] is None] child.kill(0) return [p, q] except pexpect.EOF: child.kill(0) self.primality = [False] return [n] child.kill(0)
def run_BLS(fl): t, f, e = np.genfromtxt(fl, usecols=(0,1,2), unpack=True) mask = cleaner(t,f) t = t[~mask] f = f[~mask] e = e[~mask] lc = TessLightCurve(time=t, flux=f, flux_err=e).flatten(window_length=51, polyorder=2, niters=5) #Test Fill ''' diffs = np.diff(lc.time) stdd = np.nanstd(diffs) medd = np.nanmedian(diffs) maskgaps = diffs > 0.2#np.abs(diffs-medd) > stdd maskgaps = np.concatenate((maskgaps,[False])) ''' ''' for mg in np.where(maskgaps)[0]: addtime = np.arange(lc.time[mg]+0.05, lc.time[mg+1], 0.05) addflux = np.random.normal(1, 8e-4, len(addtime)) lc.time = np.concatenate((lc.time, addtime)) lc.flux = np.concatenate((lc.flux, addflux)) addorder = np.argsort(lc.time) lc.time = lc.time[addorder] lc.flux = lc.flux[addorder] ''' #fmed = np.nanmedian(lc.flux) #fstd = np.nanstd(lc.flux) #stdm = lc.flux < 0.97#np.abs(lc.flux-fmed) > 3*fstd periods = np.exp(np.linspace(np.log(args.min_period), np.log(args.max_period), 5000)) durations = np.linspace(0.05, 0.15, 20)# * u.day model = BLS(lc.time,lc.flux) if not args.TLS else transitleastsquares(lc.time.value, lc.flux, lc.flux_err) #result = model.power(periods, durations, oversample=20)#, objective='snr') result = model.power(period_min=args.min_period, oversampling_factor=2, n_transits_min=1, use_threads=1, show_progress_bar=False) #try: #result = model.autopower(durations, frequency_factor=2.0, maximum_period=args.max_period) #except: # print(fl) idx = np.argmax(result.power) period = result.period[idx] t0 = result.transit_time[idx] dur = result.duration[idx] depth = result.depth[idx] snr = result.depth_snr[idx] ''' period = result.period t0 = result.T0 dur = result.duration depth = 1 - result.depth snr = result.snr ''' try: stats = model.compute_stats(period, dur, t0) depth_even = stats['depth_even'][0] depth_odd = stats['depth_odd'][0] depth_half = stats['depth_half'][0] t0, t1 = stats['transit_times'][:2] ntra = len(stats['transit_times']) except: depth_even = 0 depth_odd = 0 depth_half = 0 t1 = 0 ntra = 0 if args.target is not None: return fl, period, t0, dur, depth, snr, depth_even, depth_odd, depth_half, t1, ntra, result.period, result.power, lc.time, lc.flux, diffs else: return fl, period, t0, dur, depth, snr, depth_even, depth_odd, depth_half, t1, ntra
def _start(self, alt_message=None, block_during_init=True): from sage.misc.misc import sage_makedirs self.quit() # in case one is already running global failed_to_start self._session_number += 1 current_path = os.path.abspath('.') dir = self.__path sage_makedirs(dir) os.chdir(dir) #If the 'SAGE_PEXPECT_LOG' environment variable is set and #the current logfile is None, then set the logfile to be one #in .sage/pexpect_logs/ if self.__logfile is None and 'SAGE_PEXPECT_LOG' in os.environ: from sage.env import DOT_SAGE logs = '%s/pexpect_logs'%DOT_SAGE sage_makedirs(logs) filename = '%s/%s-%s-%s-%s.log'%(logs, self.name(), os.getpid(), id(self), self._session_number) self.__logfile = open(filename, 'w') cmd = self.__command if self.__verbose_start: print cmd print "Starting %s"%cmd.split()[0] try: if self.__remote_cleaner and self._server: c = 'sage-native-execute ssh %s "nohup sage -cleaner" &'%self._server os.system(c) # Unset some environment variables for the children to # reduce the chances they do something complicated breaking # the terminal interface. # See Trac #12221 and #13859. pexpect_env = dict(os.environ) pexpect_del_vars = ['TERM', 'COLUMNS'] for i in pexpect_del_vars: try: del pexpect_env[i] except KeyError: pass self._expect = pexpect.spawn(cmd, logfile=self.__logfile, env=pexpect_env) if self._do_cleaner(): cleaner.cleaner(self._expect.pid, cmd) except (ExceptionPexpect, pexpect.EOF, IndexError): self._expect = None self._session_number = BAD_SESSION failed_to_start.append(self.name()) raise RuntimeError, "Unable to start %s because the command '%s' failed.\n%s"%( self.name(), cmd, self._install_hints()) os.chdir(current_path) self._expect.timeout = self.__max_startup_time #self._expect.setmaxread(self.__maxread) self._expect.maxread = self.__maxread self._expect.delaybeforesend = 0 try: self._expect.expect(self._prompt) except (pexpect.TIMEOUT, pexpect.EOF), msg: self._expect = None self._session_number = BAD_SESSION failed_to_start.append(self.name()) raise RuntimeError, "Unable to start %s"%self.name()
def time(self, n, factor_digits, verbose=0): """ Gives an approximation for the amount of time it will take to find a factor of size factor_digits in a single process on the current computer. This estimate is provided by GMP-ECM's verbose option on a single run of a curve. INPUT: n -- a positive integer factor_digits -- the (estimated) number of digits of the smallest factor EXAMPLES: sage: n = next_prime(11^23)*next_prime(11^37) sage: ecm.time(n, 20) # not tested Expected curves: 77 Expected time: 7.21s sage: ecm.time(n, 25) # not tested Expected curves: 206 Expected time: 1.56m sage: ecm.time(n, 30, verbose=1) # not tested GMP-ECM 6.1.3 [powered by GMP 4.2.1] [ECM] Input number is 304481639541418099574459496544854621998616257489887231115912293 (63 digits) Using MODMULN Using B1=250000, B2=128992510, polynomial Dickson(3), sigma=2307628716 dF=2048, k=3, d=19110, d2=11, i0=3 Expected number of curves to find a factor of n digits: 20 25 30 35 40 45 50 55 60 65 8 50 430 4914 70293 1214949 2.5e+07 5.9e+08 1.6e+10 2.7e+13 Step 1 took 6408ms Using 16 small primes for NTT Estimated memory usage: 3862K Initializing tables of differences for F took 16ms Computing roots of F took 128ms Building F from its roots took 408ms Computing 1/F took 608ms Initializing table of differences for G took 12ms Computing roots of G took 120ms Building G from its roots took 404ms Computing roots of G took 120ms Building G from its roots took 412ms Computing G * H took 328ms Reducing G * H mod F took 348ms Computing roots of G took 120ms Building G from its roots took 408ms Computing G * H took 328ms Reducing G * H mod F took 348ms Computing polyeval(F,G) took 1128ms Step 2 took 5260ms Expected time to find a factor of n digits: 20 25 30 35 40 45 50 55 60 65 1.58m 9.64m 1.39h 15.93h 9.49d 164.07d 9.16y 218.68y 5825y 1e+07y Expected curves: 4914 Expected time: 1.39h """ self._validate(n) B1 = self.recommended_B1(factor_digits) self.__cmd = self._ECM__startup_cmd(B1, None, {'v': ' '}) child = pexpect.spawn(self.__cmd) cleaner.cleaner(child.pid, self.__cmd) child.timeout = None child.expect('[ECM]') child.sendline(str(n)) try: child.sendeof() except: pass child.expect('20\s+25\s+30\s+35\s+40\s+45\s+50\s+55\s+60\s+65') if verbose: print child.before, print child.after, child.expect('(\d\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s', timeout=None) offset = (self.__B1_table_value(factor_digits, 20, 65)-20)/5 curve_count = child.match.groups()[int(offset)] if verbose: print child.before, print child.after, child.expect('20\s+25\s+30\s+35\s+40\s+45\s+50\s+55\s+60\s+65', timeout=None) if verbose: print child.before, print child.after, child.expect('(\d\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s', timeout=None) if verbose: print child.before, print child.after time = child.match.groups()[int(offset)] child.kill(0) print "Expected curves:", curve_count, "\tExpected time:", time
from reader import csv_reader from cleaner import cleaner import pandas as pd csv_file = open('receita.csv', 'r') dict_list = csv_reader(csv_file) file_cleaned = cleaner(dict_list) imports = pd.DataFrame(file_cleaned) export_csv = imports.to_csv( r'C:\Users\CalebeLadis\PycharmProjects\csv-reader\receita2.csv', index=None, header=True)
def _start(self, alt_message=None, block_during_init=True): from sage.misc.misc import sage_makedirs self.quit() # in case one is already running self._session_number += 1 if self.__logfile is None: # If the 'SAGE_PEXPECT_LOG' environment variable is set and # there is no logfile already defined, then create a # logfile in .sage/pexpect_logs/ if self.__logfilename is None and 'SAGE_PEXPECT_LOG' in os.environ: from sage.env import DOT_SAGE logs = os.path.join(DOT_SAGE, 'pexpect_logs') sage_makedirs(logs) self.__logfilename = '%s/%s-%s-%s-%s.log'%(logs, self.name(), os.getpid(), id(self), self._session_number) if self.__logfilename is not None: self.__logfile = open(self.__logfilename, 'w') cmd = self.__command if self.__verbose_start: print cmd print "Starting %s"%cmd.split()[0] if self.__remote_cleaner and self._server: c = 'sage-native-execute ssh %s "nohup sage -cleaner" &'%self._server os.system(c) # Unset some environment variables for the children to # reduce the chances they do something complicated breaking # the terminal interface. # See Trac #12221 and #13859. pexpect_env = dict(os.environ) pexpect_del_vars = ['TERM', 'COLUMNS'] for i in pexpect_del_vars: try: del pexpect_env[i] except KeyError: pass # Run child from self.__path currentdir = os.getcwd() os.chdir(self.__path) try: try: self._expect = SageSpawn(cmd, logfile=self.__logfile, timeout=None, # no timeout env=pexpect_env, name=self._repr_(), quit_string=self._quit_string()) except (ExceptionPexpect, pexpect.EOF) as e: # Change pexpect errors to RuntimeError raise RuntimeError("unable to start %s because the command %r failed: %s\n%s" % (self.name(), cmd, e, self._install_hints())) except BaseException: self._expect = None self._session_number = BAD_SESSION raise finally: os.chdir(currentdir) if self._do_cleaner(): cleaner.cleaner(self._expect.pid, cmd) self._expect.maxread = self.__maxread self._expect.delaybeforesend = 0 try: self._expect.expect(self._prompt) except (pexpect.TIMEOUT, pexpect.EOF): self._expect = None self._session_number = BAD_SESSION raise RuntimeError("unable to start %s" % self.name()) self._expect.timeout = None # Calling tcsetattr earlier exposes bugs in various pty # implementations, see :trac:`16474`. Since we haven't # **written** anything so far it is safe to wait with # switching echo off until now. if not self._terminal_echo: self._expect.setecho(0) with gc_disabled(): if block_during_init: for X in self.__init_code: self.eval(X) else: for X in self.__init_code: self._send(X)
# Defines which weather station code (For Lyon-Bron it is 7480) cityCode = 7480 # Initialisation body = [] header = [] i = 0 for d in range(0, deltaDate.days+1): sourcingDate = startingDate + timedelta(days=d) # Get webpage source code to scrap data from it with our parameters rawSource = requests.get("http://www.meteociel.fr/temps-reel/obs_villes.php?jour2=%s&mois2=%s&annee2=%s&code2=%s" % (sourcingDate.day, sourcingDate.month-1, sourcingDate.year, cityCode)) # Cleaning raw source code source = cleaner(rawSource.text) # Transforming into a tree tree = html.fromstring(source) # Browsing tree, seeking for tr tables = tree.xpath('//tr') # Initializing counter k = 0 # Get header header = tables[0].xpath('td/text()') # Getting values and refactoring date/time for td in tables: if k > 0:
def _start(self, alt_message=None, block_during_init=True): from sage.misc.misc import sage_makedirs self.quit() # in case one is already running global failed_to_start self._session_number += 1 current_path = os.path.abspath('.') dir = self.__path sage_makedirs(dir) os.chdir(dir) #If the 'SAGE_PEXPECT_LOG' environment variable is set and #the current logfile is None, then set the logfile to be one #in .sage/pexpect_logs/ if self.__logfile is None and 'SAGE_PEXPECT_LOG' in os.environ: from sage.env import DOT_SAGE logs = '%s/pexpect_logs'%DOT_SAGE sage_makedirs(logs) filename = '%s/%s-%s-%s-%s.log'%(logs, self.name(), os.getpid(), id(self), self._session_number) self.__logfile = open(filename, 'w') cmd = self.__command if self.__verbose_start: print cmd print "Starting %s"%cmd.split()[0] try: if self.__remote_cleaner and self._server: c = 'sage-native-execute ssh %s "nohup sage -cleaner" &'%self._server os.system(c) # Unset some environment variables for the children to # reduce the chances they do something complicated breaking # the terminal interface. # See Trac #12221 and #13859. pexpect_env = dict(os.environ) pexpect_del_vars = ['TERM', 'COLUMNS'] for i in pexpect_del_vars: try: del pexpect_env[i] except KeyError: pass self._expect = pexpect.spawn(cmd, logfile=self.__logfile, env=pexpect_env) if self._do_cleaner(): cleaner.cleaner(self._expect.pid, cmd) except (ExceptionPexpect, pexpect.EOF, IndexError): self._expect = None self._session_number = BAD_SESSION failed_to_start.append(self.name()) raise RuntimeError("Unable to start %s because the command '%s' failed.\n%s"%( self.name(), cmd, self._install_hints())) os.chdir(current_path) self._expect.timeout = self.__max_startup_time #self._expect.setmaxread(self.__maxread) self._expect.maxread = self.__maxread self._expect.delaybeforesend = 0 try: self._expect.expect(self._prompt) except (pexpect.TIMEOUT, pexpect.EOF) as msg: self._expect = None self._session_number = BAD_SESSION failed_to_start.append(self.name()) raise RuntimeError("Unable to start %s"%self.name()) self._expect.timeout = None # Calling tcsetattr earlier exposes bugs in various pty # implementations, see :trac:`16474`. Since we haven't # **written** anything so far it is safe to wait with # switching echo off until now. if not self._terminal_echo: self._expect.setecho(0) with gc_disabled(): if block_during_init: for X in self.__init_code: self.eval(X) else: for X in self.__init_code: self._send(X)
import storingwebpages import trials import unifier import cleaner query = "husband" number_of_files = 200 storingwebpages.results(query, "advanced", "1674", "00", "1913", "99", number_of_files) trials.trials(query, number_of_files) unifier.unifier(query, number_of_files) cleaner.cleaner(query, number_of_files)