def test_clean_with_article(self): s = '<html><head></head><body><article>Hello! I am a test</article></body></html>' s1 = '<html><head></head><body><div>dsfasfadfasdfasdf</div><article>Hello! I am a test</article></body></html>' s2 = '<html><head></head><body><article><video></video>Hello! I am a test</article></body></html>' self.assertEqual(condense(clean(s)), condense(s)) self.assertEqual(condense(clean(s1)), condense(s)) self.assertEqual(condense(clean(s2)), condense(s))
def run(): from repeats import repeats from score import score from clean import clean from match import match from merge import merge from id_gen import id_gen import pandas as pd ''' This is the new (Summer 2019) implementation of scoring, matching, and merging ''' year = "19" season = "Sp" mergeName = 'QuaRCSLt2_' + season + year + '_merged.csv' PREdata = 'QuaRCSLt2_S19_PRE.csv' PSTdata = PREdata[:-7] + "POST.csv" stu_DB_name = "Student_ID_Database.csv" instr_DB_name = "Instr_ID_Database.csv" print("Scoring...") # Score PRE and PST PREdata = score(PREdata, 'PRE', year, season, 'answ.csv', PREdata[:-4]) PSTdata = score(PSTdata, 'PST', year, season, 'answ.csv', PSTdata[:-4]) # Clean PRE and PST #PREdata = PREdata[:-4] + "_scored.csv" #PSTdata = PSTdata[:-4] + "_scored.csv" print("Cleaning...") PREdata = clean(PREdata, 'PRE') PSTdata = clean(PSTdata, 'PST') # Generate IDs for PRE and PST # PREdata = PREdata[:-4] + "_cleaned.csv" # PSTdata = PSTdata[:-4] + "_cleaned.csv" print("Generating student and instructor IDs...") PREdata = id_gen(PREdata, 'PRE', year, season, stu_DB_name, instr_DB_name) PSTdata = id_gen(PSTdata, 'PST', year, season, stu_DB_name, instr_DB_name) # Split Repeats print("Splitting...") PREdata = repeats(PREdata, 'PRE') PSTdata = repeats(PSTdata, 'PST') # Match # PREdata = PREdata[:-4] + "_id.csv" # PSTdata = PSTdata[:-4] + "_id.csv" #PREdata = pd.read_csv(PREdata) #PSTdata = pd.read_csv(PSTdata) print("Matching...") PRE_not_matched, PST_not_matched, pairs, instructor_change = match( PREdata, PSTdata) # Merge print("Merging...") mergedData = merge(PRE_not_matched, PST_not_matched, PREdata, PSTdata, pairs) mergedData.to_csv(mergeName, encoding='utf-8', index=False) print("Merged dataset saved to {0}".format(mergeName))
def parse_line(sen,count): totranslate=[] k=[] k2={} dic={} for word in ' '.join(sen.split('\n')).split(' '): if find(words,clean(word))==0: k+=[clean(word)] if len(k)==0: return [sen,[]] else: par=detect(sen) for key in par: k2[clean(key)]=par[key] for key in k: if key in k2: dic[key]='{'+str(count)+'}' totranslate+=[(key,k2[key])] count+=1 l=sen.split('\n') sen=' '.join(l) l=sen.split(' ') for i in range(len(l)): word=clean(l[i]) if word in dic: l[i]+='<font color="#ff0000">'+dic[word]+'</font>' sen=' '.join(l) return [sen,totranslate]
def do_cube(in_dirs, cube_dir, min_freq, max_freq, step_freq, width_freq): outfile = os.path.join(cube_dir, 'cube_{0}~{1}'.format(min_freq, max_freq)) print ''' Job {0}: clean(vis={1}, imagename={2})'''.format(job_id, str(in_dirs), outfile) if not debug: try: # dump_all() clean(vis=in_dirs, imagename=outfile, field='deepfield', spw='', mode='frequency', restfreq='1420.405752MHz', nchan=-1, start='', width='', interpolation='nearest', niter=1000, gain=0.1, threshold='0.0mJy', imsize=[2048], cell=['1.25arcsec'], weighting='natural', usescratch=True) except Exception, clEx: print '*********\nClean exception: %s\n***********' % str(clEx)
def check(article): # # # specific article: # article = Article.get_by_id(640002) # db.delete(article) changed = False if not article.clean: clean.clean(article) changed = True if article.clean and not utils.is_prose(article.text): clean.clean(article) changed = True # check that there is a title prob and title sentiment. if not, give it the regular one. # you need only check if article.sentiment or article.title_sentiment is None. if so, use a simpler version of classify.classify() (without keys) that takes only word pairs, and use word pairs and that to classify the article in question. # if article.clean and article.sentiment == None: # # classify article # # analyze.sentiment(article) no # changed = True # if article.analyzed and article.sentiment == None: # # as above # # analyze.sentiment(article) # changed = True if changed: article.put() return changed
def setCleanedIVDFS(self, fwin = 1500): '''This routine takes the self.di_ivdf and self.dye_ivdf files and generates a cleaned (fft - zerod) IVDF in self.di_ivdf_clean and self.dye_ivdf_clean. Optional Inputs: fwin = 1500 : Wavenumber outside which to set the spec to 0. ''' #First the dye. d = np.loadtxt(self.dye_ivdf, skiprows=1) R = np.sqrt(d[:,2]**2 + d[:,3]**2) wl = d[:,0] (wlc, rc) = clean.clean(wl, R) [f, g] = spec.spec(rc, wlc[1] - wlc[0]) g[np.where(np.abs(f) > fwin)] = 0 [t, rcf] = spec.ispec(g, f[1] - f[0]) self.dye_ivdf_c = (wlc, rcf) #Then the diode. if self.di_ivdf == '': #The Diode file is missing. return else: d = np.loadtxt(self.dye_ivdf, skiprows=1) R = np.sqrt(d[:,2]**2 + d[:,3]**2) wl = d[:,0] (wlc, rc) = clean.clean(wl, R) [f, g] = spec.spec(rc, wlc[1] - wlc[0]) g[np.where(np.abs(f) > fwin)] = 0 [t, rcf] = spec.ispec(g, f[1] - f[0]) self.diode_ivdf_c = (wlc, rcf) return
def main(): args = utils.get_args() if not args.upload_only: fetch_spacex(args) fetch_hubble(args) if not args.download_only: publish_images(args) clean(args)
def encode(encoder, trainpath=TRAINPATH, testpath=TESTPATH): ''' pass a fresh encoder instance from ce library. ''' df_test = clean(pd.read_csv(testpath)) X_train = encoder.fit_transform(clean(pd.read_csv(trainpath))[0]) X_test = encoder.fit_transform(df_test[0]) return {'train': X_train, 'test': X_test, 'TEST_IDs': df_test[1]}
def experiment(datasets, log=False, n_jobs=1, nosave=False, error_type=None, arg_seeds=None): """Run expriments on all datasets for all splits""" # set logger for experiments if log: logging.captureWarnings(False) logging.basicConfig(filename='logging_{}.log'.format( datetime.datetime.now()), level=logging.DEBUG) # set seeds for experiments np.random.seed(config.root_seed) split_seeds = np.random.randint(10000, size=config.n_resplit) experiment_seed = np.random.randint(10000) # run experiments for dataset in datasets: if log: logging.debug("{}: Experiment on {}".format( datetime.datetime.now(), dataset['data_dir'])) for i, seed in enumerate(split_seeds): if arg_seeds is not None: if i not in arg_seeds: continue if utils.check_completed(dataset, seed, experiment_seed): print( "Ignore {}-th experiment on {} that has been completed before." .format(i, dataset['data_dir'])) continue tic = time.time() init(dataset, seed=seed, max_size=config.max_size) clean(dataset, error_type) one_split_experiment(dataset, n_retrain=config.n_retrain, n_jobs=n_jobs, nosave=nosave, seed=experiment_seed, error_type=error_type) toc = time.time() t = (toc - tic) / 60 remaining = t * (len(split_seeds) - i - 1) if log: logging.debug( "{}: {}-th experiment takes {} min. Estimated remaining time: {} min" .format(datetime.datetime.now(), i, t, remaining))
def main(): parser = argparse.ArgumentParser( description="given unsegmented, untokenized file and segmented, tokenized file, return segmented untokenized file", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( "--origfile", "-r", nargs="?", type=argparse.FileType("r"), default=sys.stdin, help="unseg, untok file" ) parser.add_argument( "--tokfile", "-t", nargs="?", type=argparse.FileType("r"), default=sys.stdin, help="seg, tok file" ) parser.add_argument( "--outfile", "-o", nargs="?", type=argparse.FileType("w"), default=sys.stdout, help="output (seg, untok) file" ) try: args = parser.parse_args() except IOError as msg: parser.error(str(msg)) origfile = prepfile(args.origfile, "r") tokfile = prepfile(args.tokfile, "r") outfile = prepfile(args.outfile, "o") origlines = [] for line in origfile: line = clean.clean(line) if line is None: continue origlines.append(line) orig = "".join(origlines) origlen = map(len, origlines) for ln, line in enumerate(tokfile, start=1): line = clean.clean(line) if line is None: continue squashline = "".join(line.split()) rex = "\s*".join(map(re.escape, list(squashline))) match = re.search(rex, orig, re.UNICODE) if match is None: sys.stderr.write("Couldn't find [[[%s]]] in [[[%s]]] at line %d\n" % (line, orig[: len(line)], ln)) sys.exit(1) prefix = orig[: match.start(0)] if prefix is not None and not prefix.isspace() and prefix != "": sys.stderr.write("Found %s but skipping prefix %s\n" % (line, prefix)) sys.exit(1) outfile.write(orig[match.start(0) : match.end(0)] + "\n") orig = orig[match.end(0) :] if orig is not None and not orig.isspace() and orig != "": sys.stderr.write("Leftover: " + orig + "\n") sys.exit(1)
def make_sweep_plots(filename, freqs, fignum=1, npoints = 2000, sweep=False): ''' Makes plots for a sweep over antenna frequency, where p is the voltage for current sweep frequency. :param filename: File to read out. :param freqs: [low frequency, high frequency] :return: Tuple containing (wl, x, y, x_clean, y_clean, pha_clean) ''' (wl, x, y, p) = read_lockin(filename) figure(fignum, figsize=(8,8)) clf() subplot(221) f = gen_freq_axis(p, npoints, freqs[0], freqs[1]) plot(f, x,'.') plot(f, y, '.') plot(f, np.abs(x + 1j * y), '.') xlabel('Sweep voltage (V)') ylabel('Lock in signal') title('Raw Sweep Data') legend(['Real', 'Imag', 'Rvec']) subplot(222) if sweep == False: plot(f, np.angle(x + 1j * y)) xlabel('Sweep Voltage (V)') ylabel('Phase (rad)') title('Raw Phase Data') elif sweep == True: plot(p) plot(f * np.max(p) / freqs[1]) xlabel('Sweep #') ylabel('Sweep Ramp Voltage') legend(['Raw Voltage', 'Constructed Sweep']) title('Constructed Sweep and Actual Sweep') subplot(223) (fc, xc) = clean.clean(f, y) (fc, yc) = clean.clean(f, x) plot(fc, xc) plot(fc, yc) xlabel('Frequency') ylabel('Lockin response(v)') title('Cleaned Lock in Response') legend(['Real', 'Imag']) subplot(224) phac = np.angle(xc + 1j * yc) plot(fc, np.unwrap(phac)) xlabel('Frequency') ylabel('Phase (rad)') title('Cleaned Phase') return(x, y, p, xc, yc, fc)
def _main(): args = _parse_arguments() global _debug _debug = args.debug try: if args.command in ['capture', 'cap']: capture(args) elif args.command in ['clean']: clean(args) elif args.command in ['convert', 'con']: convert(args) except KeyboardInterrupt: # TODO: Maybe track some statistics and print them on exit. print() sys.exit(0)
def runall(filename): results = [] try: for lang in brainfucks.iterkeys(): t, bf_name, optimize = run(filename, False, lang) t_opt = run(filename, True, lang)[0] results.append((t, t_opt, bf_name)) except KeyboardInterrupt: print clean() print '-'*40 print 't\tt opt\ttitle' for result in sorted(results, key=lambda a: a[0]): if t: print '%.3f\t%.3f\t%s'%result
def plot_clean(t, x, p0 = 0.0, axis= None): from clean import clean # @UnresolvedImport #f, cleaned, _ = clean(t, x, gain=0.9, threshold=2e-3) f, cleaned, _ = clean(t, x, threshold=1e-3) n2 = len(f) /2 cf = cleaned[n2+1:]/(2.0*np.var(x)) p = 1./f[n2+1:] cf = cf[(p>=0.5) & (p<23.0)] p = p[(p>=0.5) & (p<23.0)] i = np.argmax(cf) period = p[i] plt.axvline(1, color='r', alpha=0.5, label='1.0 day') plt.axvline(p0, color='b', alpha=0.5, label='%.2f days' % p0) plt.axvline(period, color='g', alpha=0.5, label='max. peak') plt.xlim(0.0,max(t)/3) plt.plot(p, cf, 'k') plt.minorticks_on() plt.ylabel('S(p)') plt.text(0.95, 0.9, 'CLEAN', verticalalignment='top', horizontalalignment='right', transform=axis.transAxes) cf = cf[p>=1.1] p = p[p>=1.1] i = np.argmax(cf) period = p[i] return period
def getCleanedDyeIVDFS(fwin = 1500): ''' Acuires and returns cleaned Dye IVDFS. Default f window size of 1500 nm^-1. INPUTS: fwin=1500 : Chooses the range outside of which to zero the spectrum. OUTPUTS: (dye_dyes, dye_cleanedivdfs) dye_days : Corresponding data file for the output data. dye_cleanedivdfs : Tuple of (wavelength, data). ''' rels = getDyeIVDFRels() dye_cleanedivdfs = [] dye_days = [] fl = getDyeIVDFFiles() for x in rels: dye_days.append(fl[x]) d = np.loadtxt(fl[x], skiprows=1) R = np.sqrt(d[:,2]**2 + d[:,3]**2) wl = d[:,0] (wlc, rc) = clean.clean(wl, R) [f, g] = spec.spec(rc, wlc[1]- wlc[0]) fi = np.where(np.abs(f) > fwin) g[fi] = 0 [t, rcf] = spec.ispec(g, f[1]- f[0]) dye_cleanedivdfs.append((wlc, rcf)) return (dye_days, dye_cleanedivdfs)
def getCleanedDiodeIVDFS(fwin = 1500): ''' Acuires and returns cleaned Dye IVDFS. Default f window size of 1500 nm^-1. INPUTS: fwin=1500 : Chooses the range outside of which to zero the spectrum. OUTPUTS: diode_cleanedivdfs : Tuple of (wavelength, data). ''' rels = getDyeIVDFRels() di_civdfs = [] di_days = [] fl = getDyeIVDFFiles() for x in rels: if 'Apr8' in fl[x]: di_civdfs.append(np.array([[],[]])) continue d = np.loadtxt(fl[x].replace('DYE','DIODE'), skiprows=1) R = np.sqrt(d[:,2]**2 + d[:,3]**2) wl = d[:,0] (wlc, rc) = clean.clean(wl, R) #Get rid of high freq noise too [f, g] = spec.spec(rc, wlc[1] - wlc[0]) fi = np.where(np.abs(f) > 1500) g[fi] = 0 [t, rcf] = spec.ispec(g, f[1] - f[0]) di_civdfs.append((wlc, rcf)) return di_civdfs
def mutual_information_title(string): _key_words = [ #reportedly #'reportedly' 'according', 'said', 'reported', 'told', #claim #'claim' 'claimed', 'said', 'would', 'false', #hoax #'hoax' 'culkin', 'macaulay', 'internet', 'story', #fake #'fake' 'facebook', 'site', 'real', 'website', ] X = [] clean_headline = clean(string) features = [1 if word in clean_headline else 0 for word in _key_words] X.append(features) return X
def refuting_features_body(string): _refuting_words = [ #refuting words 'fake', 'fraud', 'hoax', 'hoaxer', 'false', 'deny', 'denies', 'despite', 'nope', 'doubt', 'bogus', 'debunk', 'pranks', 'retract', 'lie', #discussion words 'reportedly', 'report', 'likely', 'probably', 'according', 'might', #key word 'update' ] X = [] clean_headline = clean(string) features = [1 if word in clean_headline else 0 for word in _refuting_words] X.append(features) return X
def cleaned_data_to_database(log): global prev_time global arg if time.time()-prev_time>= int_time: prev_time=time.time() data = clean.clean(log) log = [] data = file_type.get_file_type(data) #print(data) oplist=[] for i in range(0,len(data)): oplist.append([502,data['time'][i],data['clicks'][i],data['file cat'][i], data['file name'][i]]) db.update(db.get_overall("502"), oplist, datetime.now().date()) ## sedn data to databas print('------') print(pd.DataFrame(oplist)) # print(data['file name'][0]) print('------') '''if time.time() - total_time >= terminate_time: key_Listener.stop()## listener.stop()##''' return log
def insertion(counter, postid, message): clean_message = clean.clean(message) tokens = clean.tokenize(clean_message) keywords = clean.remove_stopwords(tokens) if not bool(counter): current_postid = None else: current_postid = next(iter(counter)) if keywords: if postid != current_postid and not bool(counter): counter[postid] = {} elif postid != current_postid and bool(counter): ranked = rank(counter, current_postid) print(ranked) ranked_bytes = serialize(ranked) send_keywords(ranked_bytes) deletion(counter, current_postid) counter[postid] = {} for word in keywords: if word not in counter[postid]: counter[postid][word] = 1 else: counter[postid][word] += 1 return counter
def model(dataset_filename): # Run the exploratory data analysis. explore(dataset_filename) # Get list of anomalies from clean function. anomaly_list = clean(dataset_filename) # Create "results.csv" with open("results.csv", "w+") as results: results.write(str("Anomaly Start Sample, Anomaly End Sample\n")) anomaly_start_position = None anomaly_end_position = None for current_row in anomaly_list: # If anomaly start and end positions are empty, set them both equal to the current row. if anomaly_start_position is None: anomaly_start_position = int(current_row) anomaly_end_position = int(current_row) # If current_row is a continuation of the anomaly, update end position with current_row. elif anomaly_end_position == current_row-1: anomaly_end_position = int(current_row) # If current_row is NOT a continuation of the anomaly, update start and end positions with current_row. elif anomaly_end_position < current_row-1: # Add current start and end positions to "results.csv". results.write(str(anomaly_start_position) + ", " + str(anomaly_end_position) + "\n") anomaly_start_position = int(current_row) anomaly_end_position = int(current_row) # If duplicate entry, it's the last anomaly. Add to "results.csv". elif anomaly_end_position == current_row: results.write(str(anomaly_start_position) + ", " + str(anomaly_end_position) + "\n") return 0
def fix_excel_sheet(self,path): from clean import clean clean_path = os.path.join(self.target_dir, "clean.csv") tmp = os.path.join(self.target_dir, "tmp.csv") cl = clean() cl.run(path,tmp,clean_path) return clean_path
def train(self, train_file, n, m): '''构造词典''' file_name = 'dealed_' + train_file try: file_open = open(file_name, 'r') except: file_name = clean.clean(train_file) file_open = open(file_name, 'r') file_read = file_open.read().decode('utf-8') self.sentence = file_read file_read = file_read.replace('\r\n',' ') file_read = file_read.replace('\n',' ') file_read = file_read.replace('#', '') file_read = file_read.replace('$', '') self.words = set(file_read.split(" ")) self.n_gram = int(n) self.length = len(file_read.split(" ")) self.count = len(self.words) self.max_len = int(m) file_open.close()
def find(): print("""尊敬的用户,您好!!!欢迎使用查询功能 请输入您的选择!!! q & Q)退出本页面 """) while True: name = input(""" 输入查询用户名称,不允许为空:""").strip() if name == "q" or name == "Q": break else: sql = """select server,qq from user where name = '{}'""".format( name) try: result = connect(sql, 'find') server = result[0][0] qq = result[0][1] except: logging.critical("\033[31m在查询数据库时发生了严重的错误\033[0m") result = None if not name: print("用户名为空,请重新输入") elif not result: print("该用户不存在,请重新输入") else: print(""" 查询用户成功!!!! """) table = clean([[name, server, qq]]) print(table)
def fix_excel_sheet(self, path): from clean import clean clean_path = os.path.join(self.target_dir, "clean.csv") tmp = os.path.join(self.target_dir, "tmp.csv") cl = clean() cl.run(path, tmp, clean_path) return clean_path
def main(): # 获得相应平台要处理的文件名 (todo_filename, question_filename) = getFilenames() # 检查文件是否修改过 if fileWasModified(todo_filename): clean.clean(db_name, todo_tablename, todo_filename) saveMD5(todo_filename) logging.info("clean todo up!") else: logging.info("todo is clean!") if fileWasModified(question_filename): clean.clean(db_name, question_tablename, question_filename) saveMD5(question_filename) logging.info("clean question up!") else: logging.info("question is clean!")
def create(path): links, articles = clean(path) data = pd.DataFrame() data["articles"] = articles data["links"] = links data.to_csv('./data/data.csv', index=False) processed_docs_uni = data["articles"].map( preproc) # list of unigrams from all documents processed_docs_bi = data["articles"].map( bigram) #list of bigrams from all documents """ create and save dictionary including only unigrams """ user_dict = corpora.Dictionary(processed_docs_uni) user_dict.save("./data/dictionary_uni.pkl") """create and save dictionary including unigrams and bigrams """ agent_dict = corpora.Dictionary( pd.concat([processed_docs_uni, processed_docs_bi], ignore_index=True)) agent_dict.save("./data/dictionary_uni+bi.pkl")
def update(driver): teams = {} league = "https://hockey.fantasysports.yahoo.com/hockey/58622" fullstandings = league + '/standings' driver.get(fullstandings) cleanfull = clean.cleanstandings([str(i.text.encode('ascii', 'ignore')).split('\n') for i in driver.find_elements_by_tag_name('tr')]) for i in range(1,10): teamvalues = [] players = {} goalies = {} driver.get(league + "/" + str(i) + "?stat1=S&stat2=S_2015") teamcard = clean.cleanteam(str(driver.find_element_by_id("team-card-info").text.encode('ascii', 'ignore')).split('\n')) teamname = clean.cleanname(teamcard[0]) teamowner = teamcard[1].split('Since')[0] teamrank = teamcard[2] print teamname print teamowner print teamrank teamvalues.append(teamname) teamvalues.append(teamowner) teamvalues.append(teamrank) rows = driver.find_elements_by_tag_name('tr') rows = [str(a.text.encode('ascii', 'ignore')).split('\n') for a in rows] rows = clean.clean(rows) for r in rows: print r playervalues = [] playercard = clean.cleancard(r) print playercard playernames = playercard[0].split(' ')[0:2] playername = playernames[0] + ' ' + playernames[1] playerteam = playercard[0].split(' ')[2] playerpos = playercard[0].split(' ')[4] playervalues.append(playername) playervalues.append(playerteam) playervalues.append(playerpos) print playername print playerteam print playerpos playerstats = playercard[1:] print playerstats for x in playerstats: playervalues.append(x) if playerpos != 'G': players[playername] = classes.Player(*playervalues) else: players[playername] = classes.Goalie(*playervalues) teamvalues.append(players) for c in cleanfull[teamname][0]: teamvalues.append(c) for c in cleanfull[teamname][1]: teamvalues.append(c) teams[teamname] = classes.Team(*teamvalues) return teams
def opendocs(): all_docs = [] for doc in docs: text = open(doc).read() ctext = clean.clean(text) ctext = re.split(r"[ ]+", ctext) all_docs += ctext return all_docs
def get_reviews(self, quantity=3): u = clean(self.file) self.label1 = u[0] self.label2 = u[1] reviews1 = random.sample(self.label1, k=quantity) reviews2 = random.sample(self.label2, k=quantity) reviews = random.sample(reviews1 + reviews2, k=quantity) return reviews
def creating_conversion_table(): '''creating a table for sorting store ids based on the sale''' df_original = clean('./data/train.csv') store_id_vs_average_sale = df_original.groupby('Store').agg('mean') sorted = store_id_vs_average_sale.sort_values(by='Sales') sorted['new_id'] = np.array(range(1,sorted.shape[0]+1)) return pd.DataFrame(sorted.new_id)
def run_all_for_file(tbt_file, main_input, clean_input, harpy_input): if main_input.write_raw: output_handler.write_raw_file(tbt_file, main_input) if clean_input is not None or harpy_input is not None: clean_writer = output_handler.CleanedAsciiWritter( main_input, tbt_file.date) for plane in ("x", "y"): bpm_names = np.array(getattr(tbt_file, "bpm_names_" + plane)) bpm_data = getattr(tbt_file, "samples_matrix_" + plane) all_bad_bpms = [] usv = None if clean_input is not None: with timeit(lambda time: LOGGER.debug("Time for filtering: %s", time)): bpm_names, bpm_data, bad_bpms_clean = clean.clean( bpm_names, bpm_data, clean_input, tbt_file.date, ) with timeit(lambda time: LOGGER.debug("Time for SVD clean: %s", time)): bpm_names, bpm_data, bpm_res, bad_bpms_svd, usv = clean.svd_clean( bpm_names, bpm_data, clean_input, ) all_bad_bpms.extend(bad_bpms_clean) all_bad_bpms.extend(bad_bpms_svd) setattr(clean_writer, "bpm_names_" + plane, bpm_names) setattr(clean_writer, "samples_matrix_" + plane, bpm_data) if plane == "x": computed_dpp = calc_dp_over_p(main_input, bpm_names, bpm_data) if harpy_input is not None: with timeit(lambda time: LOGGER.debug( "Time for harmonic_analysis: %s", time)): drive_results, bad_bpms_fft = harmonic_analysis( bpm_names, bpm_data, usv, plane, main_input, harpy_input, ) all_bad_bpms.extend(bad_bpms_fft) #TODO: Writing of harpy should be done in output_handler drive_results.write_full_results() lin_frame = get_orbit_data(bpm_names, bpm_data, bpm_res) output_handler.write_bad_bpms(main_input.file, all_bad_bpms, main_input.outputdir, plane) if clean_input.write_clean: clean_writer.dpp = computed_dpp clean_writer.write()
def firePCA(): # Read in the data y, x = clean.clean() # Scale data between 0 and 1 scaler = MinMaxScaler() data = scaler.fit_transform(x) # Fit data based on 95% expected variance pca = PCA(n_components=0.95) # Apply transformation and reduce components fitted_var = pca.fit(data) a = pca.explained_variance_ratio_ explained_var_matrix = np.diag(a) #for i in explained_var_matrix: # print(i, "\n") # print(explained_var_matrix) reduced = pca.fit_transform(data) # print(reduced) # with open("transformed.csv", mode = "w") as myFile: # csv_writer = csv.writer("transformed.csv", delimeter = ",") # myFile = open("transformed.csv", "w") # csv_writer = csv.writer("transformed.csv", delimeter = ",") # for row in reduced: # for item in row: # csv_writer.writerow(str(item)) # myFile.close() # Visualization #fig, ax = plt.subplots() # Point for each component #xi = np.arange(1, 9, step=1) # Cumlative variance for y axis #y = np.cumsum(pca.explained_variance_ratio_) # Standard between 0 and 1 #plt.ylim(0.0,1.1) # Plot #plt.plot(xi, y, marker='X', linestyle='--', color='g') #plt.xlabel('Number of Components') #plt.xticks(np.arange(0, 13, step=1)) #plt.ylabel("Cumulative Expected Variance") #plt.title("Components Needed to Explain Variance") #plt.axhline(y=0.95, color='r', linestyle='-') #plt.text(0.5, 0.85, '95% cut-off', color = 'red', fontsize=10) #ax.grid(axis='x') #plt.show() return reduced
def update_data(event): data = str(textin.value) vector = transformer.transform([' '.join(clean(data))]) result = model.predict(vector) if int(result) == 1: pred_text = 'Male' else: pred_text = 'Female' output = {'prediction': pred_text} p.text = "{}".format(output)
def do_thing(inst, interval): print("Running calculations: {}".format(inst)) data = clean(inst, interval) data = data['mid'] data = get_sign(data) r = get_rotations(data) r = final(inst, r) return r
def render_track(track, track_cfg): meta = utils.get_track_meta(track, track_cfg) cfg = Config() cfg.Exporter.preprocessors = ['nb_utils.lesson_preprocessor.LearnLessonPreprocessor'] exporter = NotebookExporter(config=cfg) resources = {'track_meta': meta, 'track_cfg': track_cfg} outdir = os.path.join(track, track_cfg['tag'], 'rendered') os.makedirs(outdir, exist_ok=True) for nb_meta in meta.notebooks: in_path = os.path.join(track, 'raw', nb_meta.filename) resources['lesson'] = nb_meta.lesson resources['nb_meta'] = nb_meta if CLEAN: clean(in_path) nb, _ = exporter.from_filename(in_path, resources) out_path = os.path.join(outdir, nb_meta.filename) with open(out_path, 'w') as f: f.write(nb)
def write(self, text): """将爬取的信息写入文件""" ''' 需要清洗文本 ''' with open( 'topic/' + self.topic + '/' + self.topic + time.strftime('%Y-%m-%d-%H', self.now_time) + '.txt', 'a') as f: f.write(clean(text)) # 写入 f.write('\n') # 有时放在循环里面需要自动转行,不然会覆盖上一条数据
def pooltest(): from m48star import M48Star star = M48Star(None, tab=284) lc = star.lightcurve() t = lc.hjd t -= t[0] y = lc.mag y -= np.mean(y) par = np.polyfit(t, y, 1) y -= par[0]*t + par[1] f, px, _, sigma0 = clean(t, y) px = px[f>=0.0] f = f[f>=0.0] i = np.argmax(px) fi = f[i] pxi = px[i] print '%.2f %.1f' % (1./fi, pxi/sigma0) runs = 10000 from multiprocessing import Pool w = np.arange(runs) pool = Pool(initializer=_init, initargs=(t,y)) p = pool.map(_worker, w) pool.close() # no more tasks pool.join() # wrap up current tasks f = np.array([pi[0] for pi in p]) px = np.array([pi[1] for pi in p]) plt.subplot('211') k = np.argsort(f) f = f[k] px = px[k] np.savetxt('/work2/jwe/SOCS/M48/data/scrambler.txt', np.c_[f,px], fmt='%6.3f',header='period sigma') px1 = px[(f>3.07) & (f<3.33)] print px1.shape, 1./fi, pxi plt.scatter(f, px, edgecolor='none', alpha=0.5) plt.scatter(1./fi, pxi/sigma0, c='r', edgecolor='none', s=80) plt.minorticks_on() #plt.show() plt.xlim(0.0, t[-1]/2) plt.title('star 284: %d runs' % runs) plt.xlabel('period [days]') plt.ylabel('$\sigma$') plt.subplot('212') plt.hist(f, bins=np.sqrt(runs), range=[0.0,t[-1]/2], normed=True) plt.axvline(1./fi, color='r') plt.savefig('/work2/jwe/SOCS/M48/plots/scrambler1.pdf') plt.close()
def main(num, speed=0.1, acceleration=speed): ip = f'192.168.1.{num}' robot = getRobot(ip) gripper = Gripper(robot) buckets = {} try: input("Move tool over the RED bucket, then press Enter: ") buckets["Red"] = robot.get_pose() input("Move tool over the GREEN bucket, then press Enter: ") buckets["Green"] = robot.get_pose() input("Move tool over the YELLOW bucket, then press Enter: ") buckets["Yellow"] = robot.get_pose() input("Move tool to a good height and press enter: ") position = robot.get_pose() clean(num, robot, gripper, buckets) robot.set_pose(position, speed, acceleration) except: print('Something went wrong') finally: robot.close() return robot, gripper
def extract(image, predictor): try: sudoku, _ = clean(image) if sudoku is None: return None cells = get_cells(sudoku) digits = predictor.predict(np.reshape(cells, (81, 28, 28, 1))) return digits.reshape(9, 9) except Exception as e: logging.error(e)
def main(): log('starting with: ' + str(config)) for cleaning_options in cleaning_options_list: log('cleaning options: ' + str(cleaning_options)) script_path = os.path.abspath(__file__) # path to python script directory_path = os.path.dirname( os.path.split(script_path)[0]) # path to python script dir data_path = os.path.join(directory_path, "data/parsed_transcripts.csv") log('reading data..') data = pd.read_csv(data_path, index_col=0) log('cleaning data..') data = clean(data, cleaning_options) data = balance_down(data) train, test = split(data) log("training size: " + str(len(train))) log("testing size: " + str(len(test))) for technique, options in config.items(): log('starting: ' + technique) feature_names = [] if technique == "elmo": embedded_train, embedded_test = elmo_embed( train, test, options) feature_length = len( np.array(embedded_train["embedding"].iloc[1])) log("elmo feature length: " + str(feature_length)) feature_names = np.zeros(feature_length) elif technique == 'tfidf': embedded_train, embedded_test, feature_names = tfidf_vectorize( train, test, options) log("tfidf feature length: " + str(len(np.array(embedded_train["embedding"].iloc[1])))) train_res, test_res, coef = classify(embedded_train, embedded_test, options, technique=technique) test_res = evaluate(test_res) test_res.to_pickle(report_path(technique + '_test_results.pkl')) coefficients = pd.DataFrame({ "feature_name": feature_names, "coef": coef }) coefficients.to_pickle(report_path(technique + '_coefficients.pkl')) log('done') return
def apply_selfcal(rawvis, field, spwn_source, spwn_target, calnum=0): noavg_data = '%s_spw%i_split.ms' % (field.replace(" ",""),spwn_target) aptable = 'selfcal_ap_%s_spw%i.gcal' % (field.replace(" ",""),spwn_source) caltable = 'selfcal%i_%s_spw%i.gcal' % (calnum,field.replace(" ",""),spwn_source) os.system('rm -rf '+noavg_data) split(vis=vis, outputvis=noavg_data, datacolumn='corrected', # was 'data'... spw=str(spwn_target)) applycal(vis=noavg_data, gaintable=[aptable,caltable], interp='linear', flagbackup=True) # was False when flagmanager was used selfcal_image = 'spw%i_C_C_selfcal%i_final_cube' % (spwn_target,calnum) for suffix in clean_output_suffixes: os.system("rm -rf "+selfcal_image+suffix) clean(vis=noavg_data,imagename=selfcal_image,field=field, mode='frequency',# mask=cleanboxes, multiscale=[0,5,10,25], psfmode='hogbom', weighting='briggs', robust=robust, niter=10000, imsize=512) exportfits(imagename=selfcal_image+".image", fitsimage=selfcal_image+".fits", overwrite=True)
def test_clean(self): clean(files=dict(path='/tmp/test_fab_build/', match=['*test.*'], ignore=['*.txt'])) self.assertFalse(os.path.exists('/tmp/test_fab_build/test.py')) clean(files=dict(path='/tmp/test_fab_build/', recursive=True, match=['*.py'])) for f in [x for x in self.files if x.endswith(".py")]: self.assertFalse(os.path.exists(f)) clean(files=dict(path='/tmp/test_fab_build/', recursive=True, match=['*3'])) self.assertFalse(os.path.exists('/tmp/test_fab_build/1/2/3')) clean(files=dict(path='/tmp/test_fab_build/')) for d in self.dirs: self.assertFalse(os.path.exists(d)) for f in self.files: self.assertFalse(os.path.exists(f))
def ZeemanFitFunc_VaryV(RSi, RSf, Ei, Ef, P, V, tls, wavelengths, s): """ Function which takes a Zeeman spectrum, deconvolves it with the natural Zeeman linewidth according to B and P, and returns the deconvolved spectrum. This is to be used with an optimization problem. See also: ZeemanFit_VaryB, which postulates that there is a magnetic field other than 1kG in the plasma. INPUTS: RSi - Russel Saunders term for the initial energy level. RSf - Russel Saunders term for the final energy level. Ei - Energy for the initial energy level. Ef - Energy for the final energy level. P - Polarization of Zeeman lines to fit (-1, 0, 1). V - Streaming flow. cm/s defined to be positive towards the observer (laser). tls - Tuple of (ti, tf) for the mean lifetimes of the starting and ending state. Just one entry in the tuple is fine. wavelengths - Wvelengths of data. spec - Measured LIF spectrum. OUTPUTS x - Cleaned wavelengths. yn - Cleaned and normalized spectrum. lns_clnn- Cleaned and normalized natural lines. ivdf - Resulting deconvolved spectrum. """ #Try deconvolving using the usual process. #Normalize: (x, y) = clean.clean(wavelengths, s) yn = y / np.sqrt(np.dot(y, y)) B = 1e3 lns_cln = Zeeman_Lines(RSi, RSf, Ei, Ef, x, P, B, tls, velocity=V) lns_clnn = lns_cln / np.sqrt(np.dot(lns_cln, lns_cln)) [f, gl] = spec.spec(lns_clnn, x[1] - x[0]) [f, gs] = spec.spec(yn, x[1] - x[0]) fi = np.where(np.abs(f) > freq_end) fni = np.where((np.abs(f) > fnoise_start) - (np.abs(f) > fnoise_end)) fwi = np.where(np.abs(f) <= freq_end) gdc = np.abs(gs) / np.abs(gl) gdc[fi] = 0 [trash, ivdf] = spec.ispec(gdc, f[1] - f[0]) ivdf = sp.fftpack.fftshift(ivdf) #Get the errors too. err = np.mean(np.abs(gs[fi])) sig_real = np.sqrt(err**2 / np.size(fwi) * np.sum(1 / gl[fwi]**2) * np.size(fwi) / np.size(f)) errs = np.abs(sig_real) return (x, yn, lns_clnn, ivdf, errs)
def test_clean_empty_img(self): s = u''' <!DOCTYPE html> <html> <head> </head> <body> <img src=""></img> </body> </html> ''' s1 = u''' <!DOCTYPE html> <html> <head> </head> <body> <img src=""></img> <img></img> <img/> </body> </html> ''' self.assertEqual(condense(clean(s1)), condense(clean(s)))
def test_html_to_xhtml(self): s = u'<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml"><head></head><body><div id="Test">Hello</div><br /><br /></body></html>' s1 = u''' <!DOCTYPE html> <html> <head> </head> <body> <DIV ID="Test">Hello</div> <br> <br> </body> </html> ''' self.assertEqual(condense(html_to_xhtml(clean(s1))), s)
def isValid(self,wbcontent): #clean the content wbcontent = clean.seg(wbcontent) wbcontent = clean.clean(wbcontent) #calcuate the weight key_total = 0.0 for key in self.d_Attribute: flag = self.lookup(self.d_Attribute[key],wbcontent) if flag: for words in self.d_Attribute[key]: patt = re.compile(words) sk_num = len(patt.findall(wbcontent))*float(key) key_total = key_total+sk_num if key_total > self.threshold or key_total == self.threshold: key_total = 1.0 else: key_total = 0.0 return key_total
def __init__(self): """ Here we'll load settings and set up us the brain. """ self.version = '0.2.1' self.barf = barf.Barf self.clean = clean.clean() self.cfg = cfg self.settings = self.cfg.set() # Load brain config (or create with these defaults). self.settings.load('conf/brain.cfg', { 'debug': 0, 'symbol': '!', 'learning': 1, 'censored': [], 'num_words': 0, 'num_contexts': 0, 'num_aliases': 0, 'max_words': 1000000, 'aliases': {}, 'optimum': 0, 'ignore_list': [] }) self.static_answers = self.cfg.set() self.static_answers.load("brain/answers.dat", { "sentences": {} }) self.unfilterd = {} self.timers_started = False # Starts the timers: if self.timers_started is False: try: self.autosave = threading.Timer(self.to_sec("125m"), self.__save) self.autosave.start() self.autorebuild = threading.Timer(self.to_sec("71h"), self.auto_rebuild) self.autorebuild.start() timers_started = True except SystemExit, e: self.autosave.cancel() self.autorebuild.cancel()
def test_coffee(self): coffee(files=dict(path='/tmp/test_fab_build/coffee')) expected = [ '/tmp/test_fab_build/coffee/test1.js', '/tmp/test_fab_build/coffee/test2.js', '/tmp/test_fab_build/coffee/test3.js' ] for x in expected: self.assertTrue(os.path.exists(x)) clean(files=dict(path='/tmp/test_fab_build/coffee', ignore=['*.coffee'])) coffee(files=dict(path='/tmp/test_fab_build/coffee', match=['*/test1*'])) self.assertTrue(os.path.exists('/tmp/test_fab_build/coffee/test1.js')) self.assertFalse(os.path.exists('/tmp/test_fab_build/coffee/test2.js')) self.assertFalse(os.path.exists('/tmp/test_fab_build/coffee/test3.js')) clean(dict(path='/tmp/test_fab_build/coffee', ignore=['*.coffee'])) coffee(dict(path='/tmp/test_fab_build/coffee'), map=True) expected = [ '/tmp/test_fab_build/coffee/test1.js', '/tmp/test_fab_build/coffee/test2.js', '/tmp/test_fab_build/coffee/test3.js', '/tmp/test_fab_build/coffee/test1.map', '/tmp/test_fab_build/coffee/test2.map', '/tmp/test_fab_build/coffee/test3.map' ] for x in expected: self.assertTrue(os.path.exists(x)) clean(dict(path='/tmp/test_fab_build/coffee', ignore=['*.coffee'])) coffee(dict(path='/tmp/test_fab_build/coffee'), join='joined.js', output="/tmp/test_fab_build/coffee", map=True) self.assertTrue(os.path.exists('/tmp/test_fab_build/coffee/joined.js')) self.assertTrue(os.path.exists('/tmp/test_fab_build/coffee/joined.map')) clean(dict(path='/tmp/test_fab_build/coffee', ignore=['*.coffee']))
def selfcal(vis, spwn=6, doplots=True, INTERACTIVE=False, reclean=True, field='W51 Ku', outdir_template="spw%i_selfcal_iter/", statsbox='170,50,229,97', ant1list=['ea14','ea05'], ant2list=['ea16','ea07'], avgchannel_wide='128', avgchannel_narrow='8', cleanboxes=cleanboxes, refant='ea27', solint='30s', niter=2, multiscale=[0,5,10,15,25,50], imsize=512, robust=0.0 ): """ Docstring incomplete """ spw = int(spwn) outdir = outdir_template % spwn try: os.mkdir(outdir) except OSError: pass # you're supposed to pass in avg_data as input avg_data = vis mytb.open(vis+"/ANTENNA") antnames = mytb.getcol("NAME") # plot each antenna's ampl vs time for flagging purposes for ant2 in ant2list: for ant in ant1list: plotms(vis=vis, spw=str(spwn), xaxis='time', yaxis='amp', avgchannel=avgchannel_wide, avgscan=F, coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T, antenna=ant+"&"+ant2, title='Amp vs Time before averaging for spw %i ant %s-%s' % (spwn,ant,ant2), plotfile=outdir+'ampvstime_spw%i_ant%s-%s.png' % (spwn,ant,ant2), field=field, overwrite=True, ) plotms(vis=vis, spw=str(spwn), xaxis='freq', yaxis='phase', avgtime='1e8', avgscan=T, coloraxis='corr', iteraxis='baseline', xselfscale=T, yselfscale=T, antenna=ant+"&"+ant2, title='Phase vs Freq with time averaging for spw %i ant %s-%s' % (spwn,ant,ant2), plotfile=outdir+'phasevsfreq_spw%i_ant%s-%s.png' % (spwn,ant,ant2), field=field, overwrite=True, ) plotms(vis=vis, spw=str(spwn), xaxis='amp', yaxis='phase', avgtime='1e8', avgscan=T, coloraxis='corr', iteraxis='baseline', xselfscale=T, yselfscale=T, antenna=ant+"&"+ant2, title='Phase vs Amp with time averaging for spw %i ant %s-%s' % (spwn,ant,ant2), plotfile=outdir+'phasevsamp_spw%i_ant%s-%s.png' % (spwn,ant,ant2), field=field, overwrite=True, ) # imagename = "noaverage_spw%i" % spwn # os.system("rm -rf "+imagename+".image") # os.system("rm -rf "+imagename+".model") # os.system("rm -rf "+imagename+".flux") # os.system("rm -rf "+imagename+".psf") # os.system("rm -rf "+imagename+".residual") # clean(vis=vis, field=field, imagename=imagename, mode='mfs', # weighting='briggs', robust=robust, niter=500, imsize=512) # viewer(imagename+".image", # outfile=outdir+imagename+".image.png", # outformat='png', # gui=False) # exportfits(imagename=imagename+".image", fitsimage=imagename+".fits", overwrite=True) #width = 10 # for TW Hydra # width = 4 # for NGC 3256 # (0) Using your split-off, calibrated data, plot the "model" in this MS using # plotms. It should be unit-valued for all data. If not, run delmod to get # rid of any model that might still be lurking in the header, and/or clearcal # to set to 1 any MODEL data. plotms(vis=avg_data, spw='0', xaxis='time', yaxis='amp', avgchannel=avgchannel_wide, xdatacolumn='model', ydatacolumn='model', avgscan=F, coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T, title='Model Amp vs Time after split for spw %i. Should be all 1s' % spwn, plotfile=outdir+'ampvstime_model_shouldbe1.png', field=field, overwrite=True,) delmod(vis=avg_data) plotms(vis=avg_data, spw='0', xaxis='phase', yaxis='amp', avgchannel=avgchannel_wide, xdatacolumn='data', ydatacolumn='data', avgscan=F, coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T, title='Corrected Phase vs Amp after split', plotfile=outdir+'ampvsphase_corrected_avg_spw%i.png' % spwn, field=field, overwrite=True,) # (0.5) Run clean non-interactively with some set number of iterations, and be # sure to keep the image around for comparison later. Run delmod to get rid of # the model it saved to the MS header. #if reclean: # imagename="average_spw%i_shallowclean" % spwn # for suffix in clean_output_suffixes: # os.system("rm -rf "+imagename+suffix) # clean(vis=avg_data, field=field, imagename=imagename, mode='mfs', # weighting='briggs', robust=robust, niter=100, imsize=512) # viewer(imagename+".image", # outfile=outdir+imagename+".image.png", # outformat='png', # gui=False) # exportfits(imagename=imagename+".image", fitsimage=imagename+".fits", overwrite=True) # delmod(avg_data,scr=True) # (1) Clean a single SPW *interactively*, boxing the brightest regions and not # cleaning very deeply (maybe 100 iterations). Keep this model in the header # -- it's what you'll use for the first round of self-calibration. if reclean: imagename="average_spw%i_shallowclean_masked" % spwn for suffix in clean_output_suffixes: os.system("rm -rf "+imagename+suffix) clean(vis=avg_data, field=field, imagename=imagename, mode='mfs', psfmode='hogbom',multiscale=multiscale, weighting='briggs', robust=robust, niter=100, imsize=imsize, mask=cleanboxes, nterms=2, usescratch=True) viewer(imagename+".image.tt0", outfile=outdir+imagename+".image.tt0.png", outformat='png', gui=False) exportfits(imagename=imagename+".image.tt0", fitsimage=imagename+".fits", overwrite=True) imrms = [imstat(imagename+".image.tt0",box=statsbox)['rms']] # FAILS!!!! #plotms(vis=avg_data, spw='0', xaxis='time', yaxis='amp', # avgchannel='128', xdatacolumn='model', ydatacolumn='model', avgscan=F, # coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T, # title='Model Amp vs Time after shallow clean for spw %i.' % spwn, # plotfile=outdir+'ampvstime_model_shallowclean_spw%i.png' % spwn, field=field, # overwrite=True,) for calnum in xrange(niter): # for Ku D W51 Ku spw 2 if reclean: first_image = 'spw%i_C_C_firstim_selfcal%i' % (spwn,calnum) for suffix in clean_output_suffixes: os.system("rm -rf "+first_image+suffix) clean(vis=avg_data,imagename=first_image,field=field, mode='mfs', psfmode='hogbom',multiscale=multiscale, weighting='briggs', robust=robust, niter=100, imsize=imsize, mask=cleanboxes, nterms=2, usescratch=True) exportfits(imagename=first_image+".image.tt0", fitsimage=first_image+".fits", overwrite=True) viewer(first_image+".image.tt0", outfile=outdir+first_image+".image.tt0.png", outformat='png', gui=False) # this fails? #plotms(vis=avg_data, spw='0', xaxis='time', yaxis='amp', # avgchannel='128', xdatacolumn='model', ydatacolumn='model', avgscan=F, # coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T, # title='Model Amp vs Time after shallow clean for spw %i iter %i.' % (spwn,calnum), # plotfile=outdir+'ampvstime_model_shallowclean_spw%i_iter%i.png' % (spwn,calnum), field=field, # overwrite=True,) # DONE avg/split ing caltable = 'selfcal%i_%s_spw%i.gcal' % (calnum,field.replace(" ",""),spwn) if reclean: os.system('rm -rf '+caltable) gaincal(vis=avg_data, field='', caltable=caltable, spw='', # gaintype = 'T' could reduce failed fit errors by averaging pols... gaintype='G', # 'G' from http://casaguides.nrao.edu/index.php?title=EVLA_Advanced_Topics_3C391 solint=solint, refant=refant, calmode='p', combine='scan', minblperant=4) # # Watch out for failed solutions noted in the terminal during this # solution. If you see a large fraction (really more than 1 or 2) of # your antennas failing to converge in many time intervals then you # may need to lengthen the solution interval. # # =%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=% # INSPECT THE CALIBRATION # =%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=% # # After you have run the gaincal, you want to inspect the # solution. Use PLOTCAL to look at the solution (here broken into # panels by SPW with individual antennas mapped to colors). Look at # the overall magnitude of the correction to get an idea of how # important the selfcal is and at how quickly it changes with time to # get an idea of how stable the instrument and atmosphere were. # if doplots: for ant2 in ant2list: for ant in ant1list: # (4) Have a look at the gain solutions by antenna. Which antennas # have the largest phase corrections? Before applying the # calibration, use plotms to display the corrected phase vs. amp # for these antennas, to compare with *after* the correction is # applied. plotcal(caltable=caltable, xaxis='time', yaxis='phase', showgui=False, antenna=ant+'&'+ant2, figfile=outdir+'selfcal%i_spw%i_phasevstime_ant%s-%s.png' % (calnum,spwn,ant,ant2), iteration='')#, subplot = 221) #plotcal(caltable=caltable, xaxis='amp', yaxis='phase', # showgui=False, # antenna=ant, # figfile=outdir+'selfcal%i_spw%i_phasevsamp_ant%s.png' % (calnum,spwn,ant), # iteration='')#, subplot = 221) if calnum == 0: datacol='data' else: datacol='corrected' plotms(vis=avg_data, xaxis='time', yaxis='phase', xdatacolumn=datacol, ydatacolumn=datacol, avgtime='15s', avgchannel=avgchannel_narrow, coloraxis='corr', antenna=ant+'&'+ant2, overwrite=True, title='Iteration %i for spw %i and ant %s-%s. datacol=%s' % (calnum,spwn,ant,ant2,datacol), plotfile=outdir+'selfcal%i_spw%i_ant%s-%s_phasetime.png' % (calnum,spwn,ant,ant2),) plotms(vis=avg_data, xaxis='time', yaxis='amp', xdatacolumn=datacol, ydatacolumn=datacol, avgtime='15s', avgchannel=avgchannel_narrow, coloraxis='corr', antenna=ant+'&'+ant2, overwrite=True, title='Iteration %i for spw %i and ant %s-%s. datacol=%s' % (calnum,spwn,ant,ant2,datacol), plotfile=outdir+'selfcal%i_spw%i_ant%s-%s_amptime.png' % (calnum,spwn,ant,ant2),) plotms(vis=avg_data, xaxis='phase', yaxis='amp', xdatacolumn=datacol, ydatacolumn=datacol, avgtime='60s', avgchannel=avgchannel_narrow, coloraxis='corr', antenna=ant+'&'+ant2, overwrite=True, title='Iteration %i for spw %i and ant %s-%s. datacol=%s' % (calnum,spwn,ant,ant2,datacol), plotfile=outdir+'selfcal%i_spw%i_ant%s-%s_phaseamp.png' % (calnum,spwn,ant,ant2),) plotcal(caltable=caltable, xaxis='time', yaxis='phase', plotrange=[0,0,-180,180], showgui=INTERACTIVE, figfile='' if INTERACTIVE else outdir+'selfcal%i_spw%i_phasevstime.png' % (calnum,spwn), iteration='spw' if INTERACTIVE else '')#, subplot = 221) plotcal(caltable=caltable, xaxis='antenna', yaxis='phase', showgui=INTERACTIVE, figfile=outdir+'selfcal%i_spw%i_phasevsantenna.png' % (calnum,spwn), iteration='') plotcal(caltable=caltable, xaxis='time', yaxis='amp', plotrange=[0,0,0.5,1.5], showgui=INTERACTIVE, figfile='' if INTERACTIVE else outdir+'selfcal%i_spw%i_ampvstime.png' % (calnum,spwn), iteration='spw' if INTERACTIVE else '')#, subplot = 221) #plotcal(caltable=caltable, # xaxis='phase', yaxis='amp', # plotrange=[-50,50,0.5,1.5], # showgui=INTERACTIVE, # figfile='' if INTERACTIVE else outdir+'selfcal%i_spw%i_ampvsphase.png' % (calnum,spwn), # iteration='spw' if INTERACTIVE else '')#, subplot = 221) # THERE WILL BE WEIRD "LUSTRE" ERRORS GENERATED BY THE FILE SYSTEM. DO # NOT FREAK OUT. These are just a feature of our fast file # system. Plotcal will still work. # It can be useful useful to plot the X-Y solutions (i.e., differences # between polarizations) as an indicator of the noise in the # solutions. plotcal(caltable=caltable, xaxis='time', yaxis='phase', plotrange=[0,0,-25, 25], poln = '/', showgui=INTERACTIVE, iteration='spw,antenna' if INTERACTIVE else '', figfile='' if INTERACTIVE else outdir+'selfcal%i_spw%i_poldiff.png' % (calnum,spwn), subplot = 221 if INTERACTIVE else 111) plotms(vis=avg_data, xaxis='uvdist', yaxis='amp', xdatacolumn='corrected', ydatacolumn='corrected', avgtime='1e8s', avgchannel=avgchannel_narrow, coloraxis='baseline', overwrite=True, title='Iteration %i for spw %i' % (calnum,spw), plotfile='' if INTERACTIVE else outdir+'selfcal%i_spw%i_uvdistamp.png' % (calnum,spwn), ) #plotms(vis=avg_data, # xaxis='phase', # yaxis='amp', # xdatacolumn='corrected', # ydatacolumn='corrected', # avgtime='60s', # avgchannel=avgchannel_narrow, # coloraxis='corr', # overwrite=True, # title='Iteration %i for spw %i' % (calnum,spw), # plotfile='' if INTERACTIVE else outdir+'selfcal%i_spw%i_phaseamp.png' % (calnum,spwn), # ) plotms(vis=avg_data, xaxis='time', yaxis='amp', xdatacolumn='corrected', ydatacolumn='corrected', avgtime='10s', avgchannel=avgchannel_narrow, coloraxis='baseline', overwrite=True, title='Iteration %i for spw %i' % (calnum,spw), plotfile='' if INTERACTIVE else outdir+'selfcal%i_spw%i_amptime.png' % (calnum,spwn), ) # The rms noise is about 4 to 8 deg, depending on antenna, but the # phase changes are considerably larger. This indicates that the # application of this solution will improve the image. # =%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=% # APPLY THE CALIBRATION # =%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=% # # If you are satisfied with your solution, you can now apply it to the # data to generate a new corrected data column, which you can then # image. Be sure to save the previous flags before you do so because # applycal will flag data without good solutions. The commented # command after the applycal will roll back to the saved solution in # case you get in trouble. # # flagmanager(vis=avg_data, # mode='save', # versionname='before_selfcal_apply') # 2013-03-04 19:53:37 SEVERE agentflagger:: (file /opt/casa/stable-2013-02/gcwrap/tools/flagging/agentflagger_cmpt.cc, line 37) Exception Reported: Invalid Table operation: ArrayColumn::setShape; shape cannot be changed for row 0 column FLAG # *** Error *** Invalid Table operation: ArrayColumn::setShape; shape cannot be changed for row 0 column FLAG if reclean: applycal(vis=avg_data, gaintable=caltable, interp='linear', flagbackup=True) # was False when flagmanager was used # (6) Plot corrected phase vs. amp for the antennas you picked out in (4), # to check that in fact the corrections have been applied as expected. for ant2 in ant2list: for ant in ant1list: plotms(vis=avg_data, xaxis='time', yaxis='phase', xdatacolumn='corrected', ydatacolumn='corrected', avgtime='15s', avgchannel=avgchannel_narrow, coloraxis='corr', antenna=ant+'&'+ant2, overwrite=True, title='Iteration %i for spw %i and ant %s-%s' % (calnum,spwn,ant,ant2), plotfile=outdir+'selfcal%i_spw%i_ant%s-%s_phasetime_applied.png' % (calnum,spwn,ant,ant2),) plotms(vis=avg_data, xaxis='time', yaxis='amp', xdatacolumn='corrected', ydatacolumn='corrected', avgtime='60s', avgchannel=avgchannel_narrow, coloraxis='corr', antenna=ant+'&'+ant2, overwrite=True, title='Iteration %i for spw %i and ant %s-%s' % (calnum,spwn,ant,ant2), plotfile=outdir+'selfcal%i_spw%i_ant%s-%s_amptime_applied.png' % (calnum,spwn,ant,ant2),) plotms(vis=avg_data, xaxis='phase', yaxis='amp', xdatacolumn='corrected', ydatacolumn='corrected', avgtime='60s', avgchannel=avgchannel_narrow, coloraxis='corr', antenna=ant+'&'+ant2, overwrite=True, title='Iteration %i for spw %i and ant %s-%s' % (calnum,spwn,ant,ant2), plotfile=outdir+'selfcal%i_spw%i_ant%s-%s_phaseamp_applied.png' % (calnum,spwn,ant,ant2),) plotms(vis=vis, spw='0', xaxis='freq', yaxis='phase', avgtime='1e8', avgscan=T, coloraxis='corr', iteraxis='baseline', xselfscale=T, yselfscale=T, antenna=ant+'&'+ant2, title='Phase vs Freq with time averaging for spw %i ant %s-%s iter %i' % (spwn,ant,ant2,calnum), plotfile=outdir+'phasevsfreq_spw%i_ant%s-%s_selfcal%i.png' % (spwn,ant,ant2,calnum), field=field, overwrite=True, ) # Use this command to roll back to the previous flags in the event of # an unfortunate applycal. #flagmanager(vis=avg_data, # mode='restore', # versionname='before_selfcal_apply') if reclean: selfcal_image = 'spw%i_C_C_selfcal%i' % (spwn,calnum) for suffix in clean_output_suffixes: os.system("rm -rf "+selfcal_image+suffix) clean(vis=avg_data,imagename=selfcal_image,field=field, mode='mfs', psfmode='hogbom',multiscale=multiscale, weighting='briggs', robust=robust, niter=1000, imsize=imsize, nterms=2, mask=cleanboxes, usescratch=True) exportfits(imagename=selfcal_image+".image.tt0", fitsimage=selfcal_image+".fits", overwrite=True) plotms(vis=avg_data, spw='0', xaxis='baseline', yaxis='amp', avgtime='1e8', ydatacolumn='corrected-model', avgscan=T, coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T, title='Residual vs. Baseline after CSCLEAN iter %i' % calnum, plotfile=outdir+'post_selfcal%i_spw%i_residVSbaseline.png' % (calnum,spwn), field=field, overwrite=True, ) plotms(vis=avg_data, spw='0', xaxis='time', yaxis='amp', avgtime='5s', ydatacolumn='corrected-model', coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T, title='Residual vs. Time after CSCLEAN iter %i' % (calnum), plotfile=outdir+'post_selfcal%i_spw%i_residVStime.png' % (calnum,spwn), field=field, overwrite=True, ) plotms(vis=avg_data, spw='0', xaxis='uvdist', yaxis='amp', avgtime='1e8', ydatacolumn='corrected-model', avgscan=T, coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T, title='Residual vs. UVDIST after CSCLEAN iter %i' % (calnum) , plotfile=outdir+'post_selfcal%i_spw%i_residVSuvdist.png' % (calnum,spwn), field=field, overwrite=True, ) imrms.append(imstat(selfcal_image+".image.tt0",box=statsbox)['rms']) viewer(selfcal_image+".image.tt0", outfile=outdir+selfcal_image+".image.tt0.png", outformat='png', gui=False) print "FINISHED ITERATION %i" % calnum print "FINISHED ITERATING!!! YAY!" # final phase + gain cal: # http://casaguides.nrao.edu/index.php?title=Calibrating_a_VLA_5_GHz_continuum_survey#One_Last_Iteration:_Amplitude_.26_Phase_Self_Calibration aptable = 'selfcal_ap_%s_spw%i.gcal' % (field.replace(" ",""),spwn) gaincal(vis=avg_data, field='', caltable=aptable, gaintable=caltable, spw='', solint='inf', refant=refant, calmode='ap', combine='', minblperant=4) plotcal(caltable=aptable, xaxis='phase', yaxis='amp', plotrange=[-50,50,0.5,1.5], showgui=INTERACTIVE, figfile='' if INTERACTIVE else outdir+'selfcal%i_spw%i_ampvsphase_final.png' % (calnum,spwn), iteration='spw' if INTERACTIVE else '')#, subplot = 221) applycal(vis=avg_data, gaintable=[aptable,caltable], interp='linear', flagbackup=True) # was False when flagmanager was used selfcal_image = 'spw%i_C_C_selfcal%i_final' % (spwn,calnum) for suffix in clean_output_suffixes: os.system("rm -rf "+selfcal_image+suffix) clean(vis=avg_data,imagename=selfcal_image,field=field, mode='mfs', mask=cleanboxes, weighting='briggs', robust=robust, niter=10000, imsize=imsize, nterms=2, usescratch=True) exportfits(imagename=selfcal_image+".image.tt0", fitsimage=selfcal_image+".fits", overwrite=True) plotms(vis=avg_data, spw='0', xaxis='baseline', yaxis='amp', avgtime='1e8', ydatacolumn='corrected-model', avgscan=T, coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T, title='Residual vs. Baseline after CSCLEAN iter %i' % calnum, plotfile=outdir+'post_selfcal%i_spw%i_residVSbaseline.png' % (calnum,spwn), field=field, overwrite=True, ) plotms(vis=avg_data, spw='0', xaxis='time', yaxis='amp', avgtime='5s', ydatacolumn='corrected-model', coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T, title='Residual vs. Time after CSCLEAN iter %i' % (calnum), plotfile=outdir+'post_selfcal%i_spw%i_residVStime.png' % (calnum,spwn), field=field, overwrite=True, ) plotms(vis=avg_data, spw='0', xaxis='uvdist', yaxis='amp', avgtime='1e8', ydatacolumn='corrected-model', avgscan=T, coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T, title='Residual vs. UVDIST after CSCLEAN iter %i' % (calnum) , plotfile=outdir+'post_selfcal%i_spw%i_residVSuvdist.png' % (calnum,spwn), field=field, overwrite=True, ) selfcal_image = 'spw%i_C_C_selfcal%i_final_multiscale' % (spwn,calnum) for suffix in clean_output_suffixes: os.system("rm -rf "+selfcal_image+suffix) clean(vis=avg_data,imagename=selfcal_image,field=field, mode='mfs', imagermode='csclean',# mask=cleanboxes, multiscale=multiscale, psfmode='hogbom', nterms=2, weighting='briggs', robust=robust, niter=10000, imsize=imsize, usescratch=True) exportfits(imagename=selfcal_image+".image.tt0", fitsimage=selfcal_image+".fits", overwrite=True) plotms(vis=avg_data, spw='0', xaxis='baseline', yaxis='amp', avgtime='1e8', ydatacolumn='corrected-model', avgscan=T, coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T, title='Residual vs. Baseline after multiscale CLEAN iter %i' % (calnum), plotfile=outdir+'post_selfcal%i_spw%i_residVSbaseline_multiscale.png' % (calnum,spwn), field=field, overwrite=True, ) plotms(vis=avg_data, spw='0', xaxis='time', yaxis='amp', avgtime='5s', ydatacolumn='corrected-model', coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T, title='Residual vs. Time after multiscale CLEAN iter %i' % (calnum), plotfile=outdir+'post_selfcal%i_spw%i_residVStime_multiscale.png' % (calnum,spwn), field=field, overwrite=True, ) plotms(vis=avg_data, spw='0', xaxis='uvdist', yaxis='amp', avgtime='1e8', ydatacolumn='corrected-model', avgscan=T, coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T, title='Residual vs. UVDIST after multiscale CLEAN iter %i' % (calnum), plotfile=outdir+'post_selfcal%i_spw%i_residVSuvdist_multiscale.png' % (calnum,spwn), field=field, overwrite=True, ) return imrms
def stack(coords, vis, outvis='', imagename='', cell='1arcsec', stampsize=32, primarybeam='guess', datacolumn='corrected', use_cuda = False): """ Performs stacking in the uv domain. coords -- A coordList object of all target coordinates. vis -- Input uv data file. outvis -- Output uv data file. Can be set to '' to not save stacked visibilities. datacolumn -- Either 'corrected' or 'data'. Which column stacking is applied to. primarybeam -- How to calculated primary beam. Currently only two options, 'guess' (using casa builtin model) or 'constant' (i.e. no correction) imagename -- Optional argument to image stacked data. cell -- pixel size for target image stampsize -- size of target image in pixels returns: Estimate of stacked flux assuming point source. """ import shutil import os try: from taskinit import casalog except ImportError: casalog = None if casalog is not None: casalog.origin('stacker') casalog.post('#'*42, 'INFO') casalog.post('#'*5 + ' {0: <31}'.format("Begin Task: Stacker")+'#'*5, 'INFO') casalog.post('Number of stacking positions: {0}'.format(len(coords)), 'INFO') if outvis != '': if not os.access(outvis, os.F_OK): shutil.copytree(vis, outvis) infiletype, infilename, infileoptions = stacker._checkfile(vis, datacolumn) if outvis != '': outfiletype, outfilename, outfileoptions =\ stacker._checkfile(outvis, datacolumn) else: outfilename = '' outfiletype = stacker.FILE_TYPE_NONE outfileoptions = 0 if casalog is not None: casalog.post('Input uv file: \'{0}\' of type {1}'.format( infilename, stacker.FILETYPENAME[infiletype]), 'INFO') if outvis != '': casalog.post('Output uv file: \'{0}\' of type {1}'.format( outfilename, stacker.FILETYPENAME[outfiletype]), 'INFO') else: _ = 'No output uv file given, will not write stacked visibility' casalog.post(_, 'INFO') # primary beam if primarybeam == 'guess': primarybeam = stacker.pb.guesspb(vis) elif primarybeam in ['constant', 'none'] or primarybeam is None: primarybeam = stacker.pb.PrimaryBeamModel() pbtype, pbfile, pbnpars, pbpars = primarybeam.cdata() x = [p.x for p in coords] y = [p.y for p in coords] weight = [p.weight for p in coords] x = (c_double*len(x))(*x) y = (c_double*len(y))(*y) weight = (c_double*len(weight))(*weight) import time start = time.time() flux = c_stack(infiletype, c_char_p(infilename), infileoptions, outfiletype, c_char_p(outfilename), outfileoptions, pbtype, c_char_p(pbfile), pbpars, pbnpars, x, y, weight, c_int(len(coords)), c_bool(use_cuda)) stop = time.time() # print("Started stack at {}".format(start)) # print("Finished stack at {}".format(stop)) print("Time used to stack: {0}".format(stop-start)) if imagename != '': import clean import clearcal clearcal.clearcal(vis=outvis) clean.clean(vis=outvis, imagename=imagename, field='0', mode='mfs', cell=cell, imsize=stampsize, weighting='natural') if casalog is not None: casalog.post('#'*5 + ' {0: <31}'.format("End Task: stacker")+'#'*5) casalog.post('#'*42) return flux
exit(1) # let's begin os.mkdir(path) # copy some required files for f in [u"clean.py", u"ExpPlan.txt", u"ExpSel.txt", bin_name]: copy(f, path) # prepare the arborescence for d in [u"TRACES", u"DETAILS", u"TIMINGS", u"RESULTS", u"FILES"]: os.makedirs(os.path.join(path, d)) # required files, again copy(u"empty.csv", os.path.join(path, u"FILES")) copy(u"create.py", os.path.join(path, u"RESULTS")) copy(u"process.py", os.path.join(path, u"RESULTS")) copy(u"_Vizu_V3.xls", os.path.join(path, u"RESULTS")) # do some cleaning import clean clean.clean(path) print(u"Setup finished") print(u"Now, you should run FlashIO to generate the prepare batch:") print(u"FlashIO GenPrepare Dev <your_device> IOS 64 IOC 10000 IOC2 50000") print(u"then, run the batch Prepare.{bat,sh} (see readme.pdf)") print(u"For having help, type FlashIO help") raw_input(u"Press RETURN.") exit(0)
import numpy as np import logging from heamy.dataset import Dataset from heamy.estimator import Regressor, Classifier from heamy.pipeline import ModelsPipeline if __name__ == '__main__': logging.basicConfig(level=logging.INFO) logging.info('Loading datasets...') train = pd.read_csv("../train.csv") test = pd.read_csv("../test.csv") # print(train.head(3)) logging.info('Cleaning train dataset...') train_x = clean(train) train_y = train.loc[:, "OutcomeType"] enc = LabelEncoder() enc.fit(train_y) train_yt = enc.transform(train_y) train_yt = pd.DataFrame(train_yt) logging.info('Cleaning test dataset...') test_x = clean(test) for diff in train_x.columns.difference(test_x.columns): test_x[diff] = 0 for diff in test_x.columns.difference(train_x.columns): test_x[diff] = 0
for spwn in [ '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '0', '1', '2','3','4','5', ]: print "Beginning calibration and mapping of spw ", spwn prefix = rootvis.replace(".ms","") prefix = prefix+"_spw"+spwn vis=prefix+".ms" imagename = prefix+"_mfs_uni" clean(vis=vis, imagename=imagename, field=target,spw='', mode='mfs', # use channel to get cubes niter=5000, gain=0.1, threshold='1.0mJy', psfmode='clark', multiscale=[0], interactive=False, imsize=[2560,2560], cell=['0.1arcsec','0.1arcsec'], stokes='I', weighting='uniform', allowchunk=True, mask=[[1041,1271,1100,1394],[1500,1750,1600,1800],[1014,1150,1525,1701]], usescratch=True) exportfits(imagename=imagename+".image", fitsimage=imagename+".fits") # imagename = prefix+"_mfs_uni" # clean(vis=vis, # imagename=imagename, # field=target,spw='', # mode='mfs', # use channel to get cubes # nterms=2,
""" Run the clean task stand alone for debugging """ import datetime from clean import clean #casalog.filter('DEBUGGING') sp = "/mnt/output/Chiles/split_vis" vf = "vis_1136~1140" start_time = datetime.datetime.now() clean(vis=['{0}/20131116-946-6/{1}'.format(sp, vf),'{0}/20131117-941-6/{1}'.format(sp, vf), '{0}/20131118-946-6/{1}'.format(sp, vf),'{0}/20131119-941-6/{1}'.format(sp, vf), '{0}/20131121-946-6/{1}'.format(sp, vf),'{0}/20131123-951-6/{1}'.format(sp, vf), '{0}/20131126-946-6/{1}'.format(sp, vf),'{0}/20131203-941-6/{1}'.format(sp, vf)], imagename="/mnt/output/Chiles/cube_1136~1140", outlierfile="",field="deepfield",spw="",selectdata=True,timerange="",uvrange="",antenna="",scan="",observation="",intent="",mode="frequency",resmooth=False,gridmode="", wprojplanes=1,facets=1,cfcache="cfcache.dir",rotpainc=5.0,painc=360.0,aterm=True,psterm=False,mterm=True,wbawp=False,conjbeams=True,epjtable="",interpolation="nearest", niter=0,gain=0.1,threshold="0.0mJy",psfmode="clark",imagermode="csclean",ftmachine="mosaic",mosweight=False,scaletype="SAULT",multiscale=[0],negcomponent=-1, smallscalebias=0.6,interactive=False,mask=[],nchan=-1,start="",width="",outframe="BARY",veltype="optical",imsize=[2048],cell=['1.5arcsec'],phasecenter="", restfreq="1420.405752MHz",stokes="I",weighting="natural",robust=0.0,uvtaper=False,outertaper=[''],innertaper=['1.0'],modelimage="",restoringbeam=[''],pbcor=False, minpb=0.2,usescratch=True,noise="1.0Jy",npixels=0,npercycle=100,cyclefactor=1.5,cyclespeedup=-1,nterms=1,reffreq="",chaniter=False,flatnoise=True,allowchunk=False) end_time = datetime.datetime.now() print 'Time taken:', end_time, start_time, end_time - start_time
def main(argv): toolsets = [] incremental = False test_dirs = [] build_dirs = [] configs = [] options = ['preserve-test-targets=on'] time_limit = 1200 for arg in argv: if arg[0] == '-': if arg[1] == 'j': num_processes = int(arg[2:]) options.append('-j%d' % num_processes) elif arg[1] == 'h': print_usage() sys.exit(1) elif arg[1] == 'i': incremental = True elif arg[1:] == 'valgrind': options.append('launcher=valgrind') else: print 'unknown option: %s' % arg print_usage() sys.exit(1) elif '=' in arg: options.append(arg) else: toolsets.append(arg) if toolsets == []: print_usage() sys.exit(1) if not incremental: print 'cleaning repo' clean.clean() try: cfg = open('.regression.yml', 'r') except: print '.regression.yml not found in current directory' sys.exit(1) cfg = yaml.load(cfg.read()) if 'test_dirs' in cfg: for d in cfg['test_dirs']: test_dirs.append(os.path.abspath(d)) if 'build_dirs' in cfg: for d in cfg['build_dirs']: build_dirs.append(os.path.abspath(d)) test_dirs.append(os.path.abspath(d)) if len(build_dirs) == 0 and len(test_dirs) == 0: print 'no test or build directory specified by .regression.yml' sys.exit(1) configs = [] if 'features' in cfg: for d in cfg['features']: configs.append(d) else: configs = [''] build_configs = [] if 'build_features' in cfg: for d in cfg['build_features']: build_configs.append(d) clean_files = [] if 'clean' in cfg: clean_files = cfg['clean'] branch_name = 'trunk' if 'branch' in cfg: branch_name = cfg['branch'] if 'time_limit' in cfg: time_limit = int(cfg['time_limit']) # it takes a bit longer to run in valgrind if 'launcher=valgrind' in options: time_limit *= 7 architecture = platform.machine() build_platform = platform.system() + '-' + platform.release() revision, author = svn_info() timestamp = datetime.now() print '%s-%d - %s - %s' % (branch_name, revision, author, timestamp) print 'toolsets: %s' % ' '.join(toolsets) # print 'configs: %s' % '|'.join(configs) current_dir = os.getcwd() try: rev_dir = os.path.join(current_dir, 'regression_tests') try: os.mkdir(rev_dir) except: pass rev_dir = os.path.join(rev_dir, '%s-%d' % (branch_name, revision)) try: os.mkdir(rev_dir) except: pass for toolset in toolsets: results = {} for test_dir in test_dirs: print 'running tests from "%s" in %s' % (test_dir, branch_name) os.chdir(test_dir) test_dir = os.getcwd() # figure out which tests are exported by this Jamfile p = subprocess.Popen(['bjam', '--dump-tests', 'non-existing-target'], stdout=subprocess.PIPE, cwd=test_dir) tests = [] output = '' for l in p.stdout: output += l if not 'boost-test(RUN)' in l: continue test_name = os.path.split(l.split(' ')[1][1:-1])[1] tests.append(test_name) print 'found %d tests' % len(tests) if len(tests) == 0: tests = [''] additional_configs = [] if test_dir in build_dirs: additional_configs = build_configs futures = [] for features in configs + additional_configs: (compiler, r) = run_tests(toolset, tests, features, options, test_dir, time_limit) results.update(r) print '' if len(clean_files) > 0: print 'deleting ', for filt in clean_files: for f in glob.glob(os.path.join(test_dir, filt)): # a precaution to make sure a malicious repo # won't clean things outside of the test directory if not os.path.abspath(f).startswith(test_dir): continue print '%s ' % f, try: shutil.rmtree(f) except: pass print '' # each file contains a full set of tests for one speific toolset and platform try: f = open(os.path.join(rev_dir, build_platform + '#' + toolset + '.json'), 'w+') except IOError, e: print e rev_dir = os.path.join(current_dir, 'regression_tests') try: os.mkdir(rev_dir) except: pass rev_dir = os.path.join(rev_dir, '%s-%d' % (branch_name, revision)) try: os.mkdir(rev_dir) except: pass f = open(os.path.join(rev_dir, build_platform + '#' + toolset + '.json'), 'w+') print >>f, json.dumps(results) f.close() finally: # always restore current directory try: os.chdir(current_dir) except: pass
def selfcal(vis, spwn=6, doplots=True, INTERACTIVE=False, reclean=True, field='W51 Ku', outdir_template="spw%i_selfcal_iter/", statsbox='170,50,229,97', ant1list=['ea14','ea05'], ant2list=['ea16','ea07'], avgchannel_wide='128', avgchannel_narrow='8', cleanboxes=cleanboxes, refant='ea27', solint='30s', niter=2, multiscale=[0,5,10,15,25,50], imsize=512, ): """ Docstring incomplete """ spw = int(spwn) outdir = outdir_template % spwn try: os.mkdir(outdir) except OSError: pass # you're supposed to pass in avg_data as input avg_data = vis mytb.open(vis+"/ANTENNA") antnames = mytb.getcol("NAME") # (1) Clean a single SPW *interactively*, boxing the brightest regions and not # cleaning very deeply (maybe 100 iterations). Keep this model in the header # -- it's what you'll use for the first round of self-calibration. if reclean: imagename="average_spw%i_shallowclean_masked" % spwn for suffix in clean_output_suffixes: os.system("rm -rf "+imagename+suffix) clean(vis=avg_data, field=field, imagename=imagename, mode='mfs', psfmode='hogbom',multiscale=multiscale, weighting='briggs', robust=0.0, niter=100, imsize=imsize, mask=cleanboxes, nterms=2, usescratch=True) viewer(imagename+".image.tt0", outfile=outdir+imagename+".image.tt0.png", outformat='png', gui=False) exportfits(imagename=imagename+".image.tt0", fitsimage=imagename+".fits", overwrite=True) imrms = [imstat(imagename+".image.tt0",box=statsbox)['rms']] for calnum in xrange(niter): # for Ku D W51 Ku spw 2 if reclean: first_image = 'spw%i_ku_d_firstim_selfcal%i' % (spwn,calnum) for suffix in clean_output_suffixes: os.system("rm -rf "+first_image+suffix) clean(vis=avg_data,imagename=first_image,field=field, mode='mfs', psfmode='hogbom',multiscale=multiscale, weighting='briggs', robust=0.0, niter=100, imsize=imsize, mask=cleanboxes, nterms=2, usescratch=True) exportfits(imagename=first_image+".image.tt0", fitsimage=first_image+".fits", overwrite=True) viewer(first_image+".image.tt0", outfile=outdir+first_image+".image.tt0.png", outformat='png', gui=False) caltable = 'selfcal%i_%s_spw%i.pcal' % (calnum,field.replace(" ",""),spwn) if reclean: os.system('rm -rf '+caltable) gaincal(vis=avg_data, field='', caltable=caltable, spw='', # gaintype = 'T' could reduce failed fit errors by averaging pols... gaintype='G', # 'G' from http://casaguides.nrao.edu/index.php?title=EVLA_Advanced_Topics_3C391 solint=solint, refant=refant, calmode='p', combine='scan', minblperant=4) if reclean: applycal(vis=avg_data, gaintable=caltable, interp='linear', flagbackup=True) # was False when flagmanager was used if reclean: selfcal_image = 'spw%i_ku_d_selfcal%i' % (spwn,calnum) for suffix in clean_output_suffixes: os.system("rm -rf "+selfcal_image+suffix) clean(vis=avg_data,imagename=selfcal_image,field=field, mode='mfs', psfmode='hogbom',multiscale=multiscale, weighting='briggs', robust=0.5, niter=1000, imsize=imsize, nterms=2, mask=cleanboxes, usescratch=True) exportfits(imagename=selfcal_image+".image.tt0", fitsimage=selfcal_image+".fits", overwrite=True) imrms.append(imstat(selfcal_image+".image.tt0",box=statsbox)['rms']) viewer(selfcal_image+".image.tt0", outfile=outdir+selfcal_image+".image.tt0.png", outformat='png', gui=False) print "FINISHED ITERATION %i" % calnum print "FINISHED ITERATING!!! YAY!" # final phase + gain cal: # http://casaguides.nrao.edu/index.php?title=Calibrating_a_VLA_5_GHz_continuum_survey#One_Last_Iteration:_Amplitude_.26_Phase_Self_Calibration aptable = 'selfcal_ap_%s_spw%i.gcal' % (field.replace(" ",""),spwn) gaincal(vis=avg_data, field='', caltable=aptable, gaintable=caltable, spw='', solint='inf', refant=refant, calmode='ap', combine='', minblperant=4) plotcal(caltable=aptable, xaxis='phase', yaxis='amp', plotrange=[-50,50,0.5,1.5], showgui=INTERACTIVE, figfile='' if INTERACTIVE else outdir+'selfcal%i_spw%i_ampvsphase_final.png' % (calnum,spwn), iteration='spw' if INTERACTIVE else '')#, subplot = 221) applycal(vis=avg_data, gaintable=[aptable,caltable], interp='linear', flagbackup=True) # was False when flagmanager was used selfcal_image = 'spw%i_ku_d_selfcal%i_final' % (spwn,calnum) for suffix in clean_output_suffixes: os.system("rm -rf "+selfcal_image+suffix) clean(vis=avg_data,imagename=selfcal_image,field=field, mode='mfs', mask=cleanboxes, weighting='briggs', robust=0.5, niter=10000, imsize=imsize, nterms=2, usescratch=True) exportfits(imagename=selfcal_image+".image.tt0", fitsimage=selfcal_image+".fits", overwrite=True) plotms(vis=avg_data, spw='0', xaxis='baseline', yaxis='amp', avgtime='1e8', ydatacolumn='corrected-model', avgscan=T, coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T, title='Residual vs. Baseline after CSCLEAN iter %i' % calnum, plotfile=outdir+'post_selfcal%i_spw%i_residVSbaseline.png' % (calnum,spwn), field=field, overwrite=True, ) plotms(vis=avg_data, spw='0', xaxis='time', yaxis='amp', avgtime='5s', ydatacolumn='corrected-model', coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T, title='Residual vs. Time after CSCLEAN iter %i' % (calnum), plotfile=outdir+'post_selfcal%i_spw%i_residVStime.png' % (calnum,spwn), field=field, overwrite=True, ) plotms(vis=avg_data, spw='0', xaxis='uvdist', yaxis='amp', avgtime='1e8', ydatacolumn='corrected-model', avgscan=T, coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T, title='Residual vs. UVDIST after CSCLEAN iter %i' % (calnum) , plotfile=outdir+'post_selfcal%i_spw%i_residVSuvdist.png' % (calnum,spwn), field=field, overwrite=True, ) selfcal_image = 'spw%i_ku_d_selfcal%i_final_multiscale' % (spwn,calnum) for suffix in clean_output_suffixes: os.system("rm -rf "+selfcal_image+suffix) clean(vis=avg_data,imagename=selfcal_image,field=field, mode='mfs', imagermode='csclean',# mask=cleanboxes, multiscale=multiscale, psfmode='hogbom', nterms=2, weighting='briggs', robust=0.5, niter=10000, imsize=imsize, usescratch=True) exportfits(imagename=selfcal_image+".image.tt0", fitsimage=selfcal_image+".fits", overwrite=True) plotms(vis=avg_data, spw='0', xaxis='baseline', yaxis='amp', avgtime='1e8', ydatacolumn='corrected-model', avgscan=T, coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T, title='Residual vs. Baseline after multiscale CLEAN iter %i' % (calnum), plotfile=outdir+'post_selfcal%i_spw%i_residVSbaseline_multiscale.png' % (calnum,spwn), field=field, overwrite=True, ) plotms(vis=avg_data, spw='0', xaxis='time', yaxis='amp', avgtime='5s', ydatacolumn='corrected-model', coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T, title='Residual vs. Time after multiscale CLEAN iter %i' % (calnum), plotfile=outdir+'post_selfcal%i_spw%i_residVStime_multiscale.png' % (calnum,spwn), field=field, overwrite=True, ) plotms(vis=avg_data, spw='0', xaxis='uvdist', yaxis='amp', avgtime='1e8', ydatacolumn='corrected-model', avgscan=T, coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T, title='Residual vs. UVDIST after multiscale CLEAN iter %i' % (calnum), plotfile=outdir+'post_selfcal%i_spw%i_residVSuvdist_multiscale.png' % (calnum,spwn), field=field, overwrite=True, ) return imrms