Python clean 예제들, clean.clean Python 예제들

예제 #1

0

파일 보기

파일: unit_tests_clean.py 프로젝트: masdude/pypub

 def test_clean_with_article(self):
     s = '<html><head></head><body><article>Hello! I am a test</article></body></html>'
     s1 = '<html><head></head><body><div>dsfasfadfasdfasdf</div><article>Hello! I am a test</article></body></html>'
     s2 = '<html><head></head><body><article><video></video>Hello! I am a test</article></body></html>'
     self.assertEqual(condense(clean(s)), condense(s))
     self.assertEqual(condense(clean(s1)), condense(s))
     self.assertEqual(condense(clean(s2)), condense(s))

예제 #2

0

파일 보기

def run():
    from repeats import repeats
    from score import score
    from clean import clean
    from match import match
    from merge import merge
    from id_gen import id_gen
    import pandas as pd
    '''
  This is the new (Summer 2019) implementation of scoring, matching, and merging
  '''
    year = "19"
    season = "Sp"
    mergeName = 'QuaRCSLt2_' + season + year + '_merged.csv'
    PREdata = 'QuaRCSLt2_S19_PRE.csv'
    PSTdata = PREdata[:-7] + "POST.csv"
    stu_DB_name = "Student_ID_Database.csv"
    instr_DB_name = "Instr_ID_Database.csv"

    print("Scoring...")
    # Score PRE and PST
    PREdata = score(PREdata, 'PRE', year, season, 'answ.csv', PREdata[:-4])
    PSTdata = score(PSTdata, 'PST', year, season, 'answ.csv', PSTdata[:-4])

    # Clean PRE and PST
    #PREdata = PREdata[:-4] + "_scored.csv"
    #PSTdata = PSTdata[:-4] + "_scored.csv"
    print("Cleaning...")
    PREdata = clean(PREdata, 'PRE')
    PSTdata = clean(PSTdata, 'PST')

    # Generate IDs for PRE and PST
    # PREdata = PREdata[:-4] + "_cleaned.csv"
    # PSTdata = PSTdata[:-4] + "_cleaned.csv"

    print("Generating student and instructor IDs...")

    PREdata = id_gen(PREdata, 'PRE', year, season, stu_DB_name, instr_DB_name)
    PSTdata = id_gen(PSTdata, 'PST', year, season, stu_DB_name, instr_DB_name)

    # Split Repeats
    print("Splitting...")
    PREdata = repeats(PREdata, 'PRE')
    PSTdata = repeats(PSTdata, 'PST')

    # Match
    # PREdata = PREdata[:-4] + "_id.csv"
    # PSTdata = PSTdata[:-4] + "_id.csv"
    #PREdata = pd.read_csv(PREdata)
    #PSTdata = pd.read_csv(PSTdata)
    print("Matching...")
    PRE_not_matched, PST_not_matched, pairs, instructor_change = match(
        PREdata, PSTdata)

    # Merge
    print("Merging...")
    mergedData = merge(PRE_not_matched, PST_not_matched, PREdata, PSTdata,
                       pairs)
    mergedData.to_csv(mergeName, encoding='utf-8', index=False)
    print("Merged dataset saved to {0}".format(mergeName))

예제 #3

0

파일 보기

파일: parse_line.py 프로젝트: erokhins/periscope

def parse_line(sen,count):
	totranslate=[]
	k=[]
	k2={}
	dic={}
	for word in ' '.join(sen.split('\n')).split(' '):
		if find(words,clean(word))==0:
			k+=[clean(word)]
	if len(k)==0:
		return [sen,[]]
	else:
		par=detect(sen)
	for key in par:
		k2[clean(key)]=par[key]
	for key in k:
		if key in k2:
			dic[key]='{'+str(count)+'}'
			totranslate+=[(key,k2[key])]
			count+=1
	l=sen.split('\n')
	sen=' '.join(l)
	l=sen.split(' ')
	for i in range(len(l)):
		word=clean(l[i])
		if word in dic:
			l[i]+='<font color="#ff0000">'+dic[word]+'</font>'
	sen=' '.join(l)
	return [sen,totranslate]

예제 #4

0

파일 보기

파일: makecube_defines.py 프로젝트: ICRAR/chiles_pipeline

def do_cube(in_dirs, cube_dir, min_freq, max_freq, step_freq, width_freq):
    outfile = os.path.join(cube_dir, 'cube_{0}~{1}'.format(min_freq, max_freq))
    print '''
Job {0}: clean(vis={1}, imagename={2})'''.format(job_id, str(in_dirs), outfile)
    if not debug:
        try:
            # dump_all()
            clean(vis=in_dirs,
                  imagename=outfile,
                  field='deepfield',
                  spw='',
                  mode='frequency',
                  restfreq='1420.405752MHz',
                  nchan=-1,
                  start='',
                  width='',
                  interpolation='nearest',
                  niter=1000,
                  gain=0.1,
                  threshold='0.0mJy',
                  imsize=[2048],
                  cell=['1.25arcsec'],
                  weighting='natural',
                  usescratch=True)
        except Exception, clEx:
            print '*********\nClean exception: %s\n***********' % str(clEx)

예제 #5

0

파일 보기

 def test_clean_with_article(self):
     s = '<html><head></head><body><article>Hello! I am a test</article></body></html>'
     s1 = '<html><head></head><body><div>dsfasfadfasdfasdf</div><article>Hello! I am a test</article></body></html>'
     s2 = '<html><head></head><body><article><video></video>Hello! I am a test</article></body></html>'
     self.assertEqual(condense(clean(s)), condense(s))
     self.assertEqual(condense(clean(s1)), condense(s))
     self.assertEqual(condense(clean(s2)), condense(s))

예제 #6

0

파일 보기

파일: janitor.py 프로젝트: memius/market-analyzer

def check(article):
    # # # specific article:
    # article = Article.get_by_id(640002)
    # db.delete(article)

    changed = False

    if not article.clean:
        clean.clean(article)
        changed = True
    if article.clean and not utils.is_prose(article.text):
        clean.clean(article)
        changed = True

# check that there is a title prob and title sentiment. if not, give it the regular one.

# you need only check if article.sentiment or article.title_sentiment is None. if so, use a simpler version of classify.classify() (without keys) that takes only word pairs, and use word pairs and that to classify the article in question.

    # if article.clean and article.sentiment == None:
    #     # classify article
    #     # analyze.sentiment(article) no
    #     changed = True

    # if article.analyzed and article.sentiment == None:
    #     # as above
    #     # analyze.sentiment(article)
    #     changed = True

    if changed:
        article.put()

    return changed

예제 #7

0

파일 보기

파일: makecube_defines.py 프로젝트: astrowork/aws-chiles02

def do_cube(in_dirs, cube_dir, min_freq, max_freq, step_freq, width_freq):
    outfile = os.path.join(cube_dir, 'cube_{0}~{1}'.format(min_freq, max_freq))
    print '''
Job {0}: clean(vis={1}, imagename={2})'''.format(job_id, str(in_dirs), outfile)
    if not debug:
        try:
            # dump_all()
            clean(vis=in_dirs,
                  imagename=outfile,
                  field='deepfield',
                  spw='',
                  mode='frequency',
                  restfreq='1420.405752MHz',
                  nchan=-1,
                  start='',
                  width='',
                  interpolation='nearest',
                  niter=1000,
                  gain=0.1,
                  threshold='0.0mJy',
                  imsize=[2048],
                  cell=['1.25arcsec'],
                  weighting='natural',
                  usescratch=True)
        except Exception, clEx:
            print '*********\nClean exception: %s\n***********' % str(clEx)

예제 #8

0

파일 보기

파일: vcorrs.py 프로젝트: Smattacus/data-analysis

    def setCleanedIVDFS(self, fwin = 1500):
        '''This routine takes the self.di_ivdf and self.dye_ivdf files and
        generates a cleaned (fft - zerod) IVDF in self.di_ivdf_clean and
        self.dye_ivdf_clean.
        
        Optional Inputs:
        fwin = 1500     :   Wavenumber outside which to set the spec to 0.

        '''
        #First the dye.
        d = np.loadtxt(self.dye_ivdf, skiprows=1)
        R = np.sqrt(d[:,2]**2 + d[:,3]**2)
        wl = d[:,0]
        (wlc, rc) = clean.clean(wl, R)
        [f, g] = spec.spec(rc, wlc[1] - wlc[0])
        g[np.where(np.abs(f) > fwin)] = 0
        [t, rcf] = spec.ispec(g, f[1] - f[0])
        self.dye_ivdf_c = (wlc, rcf)
        #Then the diode.
        if self.di_ivdf == '':
            #The Diode file is missing.
            return
        else:
            d = np.loadtxt(self.dye_ivdf, skiprows=1)
            R = np.sqrt(d[:,2]**2 + d[:,3]**2)
            wl = d[:,0]
            (wlc, rc) = clean.clean(wl, R)
            [f, g] = spec.spec(rc, wlc[1] - wlc[0])
            g[np.where(np.abs(f) > fwin)] = 0
            [t, rcf] = spec.ispec(g, f[1] - f[0])
            self.diode_ivdf_c = (wlc, rcf)
        return

예제 #9

0

파일 보기

파일: main.py 프로젝트: lissizza/instaspace

def main():
    args = utils.get_args()
    if not args.upload_only:
        fetch_spacex(args)
        fetch_hubble(args)
    if not args.download_only:
        publish_images(args)
    clean(args)

예제 #10

0

파일 보기

파일: add_encoder.py 프로젝트: quinn-dougherty/DS-Unit-4-Sprint-1-Tree-Ensembles

def encode(encoder, trainpath=TRAINPATH, testpath=TESTPATH):
    ''' pass a fresh encoder instance from ce library. '''

    df_test = clean(pd.read_csv(testpath))

    X_train = encoder.fit_transform(clean(pd.read_csv(trainpath))[0])
    X_test = encoder.fit_transform(df_test[0])

    return {'train': X_train, 'test': X_test, 'TEST_IDs': df_test[1]}

예제 #11

0

파일 보기

def experiment(datasets,
               log=False,
               n_jobs=1,
               nosave=False,
               error_type=None,
               arg_seeds=None):
    """Run expriments on all datasets for all splits"""
    # set logger for experiments
    if log:
        logging.captureWarnings(False)
        logging.basicConfig(filename='logging_{}.log'.format(
            datetime.datetime.now()),
                            level=logging.DEBUG)

    # set seeds for experiments
    np.random.seed(config.root_seed)
    split_seeds = np.random.randint(10000, size=config.n_resplit)
    experiment_seed = np.random.randint(10000)

    # run experiments
    for dataset in datasets:
        if log:
            logging.debug("{}: Experiment on {}".format(
                datetime.datetime.now(), dataset['data_dir']))

        for i, seed in enumerate(split_seeds):
            if arg_seeds is not None:
                if i not in arg_seeds:
                    continue

            if utils.check_completed(dataset, seed, experiment_seed):
                print(
                    "Ignore {}-th experiment on {} that has been completed before."
                    .format(i, dataset['data_dir']))
                continue

            tic = time.time()
            init(dataset, seed=seed, max_size=config.max_size)
            clean(dataset, error_type)
            one_split_experiment(dataset,
                                 n_retrain=config.n_retrain,
                                 n_jobs=n_jobs,
                                 nosave=nosave,
                                 seed=experiment_seed,
                                 error_type=error_type)
            toc = time.time()
            t = (toc - tic) / 60
            remaining = t * (len(split_seeds) - i - 1)
            if log:
                logging.debug(
                    "{}: {}-th experiment takes {} min. Estimated remaining time: {} min"
                    .format(datetime.datetime.now(), i, t, remaining))

예제 #12

0

파일 보기

파일: getseguntok.py 프로젝트: isi-nlp/unitok

def main():
    parser = argparse.ArgumentParser(
        description="given unsegmented, untokenized file and segmented, tokenized file, return segmented untokenized file",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument(
        "--origfile", "-r", nargs="?", type=argparse.FileType("r"), default=sys.stdin, help="unseg, untok file"
    )
    parser.add_argument(
        "--tokfile", "-t", nargs="?", type=argparse.FileType("r"), default=sys.stdin, help="seg, tok file"
    )
    parser.add_argument(
        "--outfile", "-o", nargs="?", type=argparse.FileType("w"), default=sys.stdout, help="output (seg, untok) file"
    )

    try:
        args = parser.parse_args()
    except IOError as msg:
        parser.error(str(msg))

    origfile = prepfile(args.origfile, "r")
    tokfile = prepfile(args.tokfile, "r")
    outfile = prepfile(args.outfile, "o")

    origlines = []
    for line in origfile:
        line = clean.clean(line)
        if line is None:
            continue
        origlines.append(line)
    orig = "".join(origlines)
    origlen = map(len, origlines)
    for ln, line in enumerate(tokfile, start=1):
        line = clean.clean(line)
        if line is None:
            continue
        squashline = "".join(line.split())
        rex = "\s*".join(map(re.escape, list(squashline)))
        match = re.search(rex, orig, re.UNICODE)
        if match is None:
            sys.stderr.write("Couldn't find [[[%s]]] in [[[%s]]] at line %d\n" % (line, orig[: len(line)], ln))
            sys.exit(1)
        prefix = orig[: match.start(0)]
        if prefix is not None and not prefix.isspace() and prefix != "":
            sys.stderr.write("Found %s but skipping prefix %s\n" % (line, prefix))
            sys.exit(1)
        outfile.write(orig[match.start(0) : match.end(0)] + "\n")
        orig = orig[match.end(0) :]
    if orig is not None and not orig.isspace() and orig != "":
        sys.stderr.write("Leftover: " + orig + "\n")
        sys.exit(1)

예제 #13

0

파일 보기

파일: lockin.py 프로젝트: Smattacus/data-analysis

def make_sweep_plots(filename, freqs, fignum=1, npoints = 2000, sweep=False):
    '''
    Makes plots for a sweep over antenna frequency, where p is the voltage for
    current sweep frequency.
    :param filename: File to read out.
    :param freqs: [low frequency, high frequency]
    :return: Tuple containing (wl, x, y, x_clean, y_clean, pha_clean)
    '''
    (wl, x, y, p) = read_lockin(filename)
    figure(fignum, figsize=(8,8))
    clf()
    subplot(221)
    f = gen_freq_axis(p, npoints, freqs[0], freqs[1])
    plot(f, x,'.')
    plot(f, y, '.')
    plot(f, np.abs(x + 1j * y), '.')
    xlabel('Sweep voltage (V)')
    ylabel('Lock in signal')
    title('Raw Sweep Data')
    legend(['Real', 'Imag', 'Rvec'])
    subplot(222)
    if sweep == False:
        plot(f, np.angle(x + 1j * y))
        xlabel('Sweep Voltage (V)')
        ylabel('Phase (rad)')
        title('Raw Phase Data')
    elif sweep == True:
        plot(p)
        plot(f * np.max(p) / freqs[1])
        xlabel('Sweep #')
        ylabel('Sweep Ramp Voltage')
        legend(['Raw Voltage', 'Constructed Sweep'])
        title('Constructed Sweep and Actual Sweep')
    subplot(223)
    (fc, xc) = clean.clean(f, y)
    (fc, yc) = clean.clean(f, x)
    plot(fc, xc)
    plot(fc, yc)
    xlabel('Frequency')
    ylabel('Lockin response(v)')
    title('Cleaned Lock in Response')
    legend(['Real', 'Imag'])
    subplot(224)
    phac = np.angle(xc + 1j * yc)
    plot(fc, np.unwrap(phac))
    xlabel('Frequency')
    ylabel('Phase (rad)')
    title('Cleaned Phase')
    return(x, y, p, xc, yc, fc)

예제 #14

0

파일 보기

파일: timelapse.py 프로젝트: khoulihan/capture-timelapse

def _main():
    args = _parse_arguments()
    global _debug
    _debug = args.debug
    try:
        if args.command in ['capture', 'cap']:
            capture(args)
        elif args.command in ['clean']:
            clean(args)
        elif args.command in ['convert', 'con']:
            convert(args)
    except KeyboardInterrupt:
        # TODO: Maybe track some statistics and print them on exit.
        print()
        sys.exit(0)

예제 #15

0

파일 보기

파일: run.py 프로젝트: griftah/emojicode-xakep

def runall(filename):
    results = []
    try:
        for lang in brainfucks.iterkeys():
            t, bf_name, optimize = run(filename, False, lang)
            t_opt = run(filename, True, lang)[0]
            results.append((t, t_opt, bf_name))
    except KeyboardInterrupt:
        print
        clean()
    print '-'*40
    print 't\tt opt\ttitle'
    for result in sorted(results, key=lambda a: a[0]):
        if t:
            print '%.3f\t%.3f\t%s'%result

예제 #16

0

파일 보기

def plot_clean(t, x, p0 = 0.0, axis= None):
    from clean import clean  # @UnresolvedImport
    #f, cleaned, _ = clean(t, x, gain=0.9, threshold=2e-3)
    f, cleaned, _ = clean(t, x, threshold=1e-3)
    n2 = len(f) /2
    cf = cleaned[n2+1:]/(2.0*np.var(x))
    p = 1./f[n2+1:]
    cf = cf[(p>=0.5) & (p<23.0)]
    p = p[(p>=0.5) & (p<23.0)]
    i = np.argmax(cf)
    period = p[i]
    
    plt.axvline(1, color='r', alpha=0.5, label='1.0 day')
    plt.axvline(p0, color='b', alpha=0.5, label='%.2f days' % p0)
    plt.axvline(period, color='g', alpha=0.5, label='max. peak')
    plt.xlim(0.0,max(t)/3)
    plt.plot(p, cf, 'k')
    plt.minorticks_on()
    plt.ylabel('S(p)')
    plt.text(0.95, 0.9, 'CLEAN', verticalalignment='top', horizontalalignment='right', transform=axis.transAxes)
    
    cf = cf[p>=1.1]
    p = p[p>=1.1]
    i = np.argmax(cf)
    period = p[i]
    return period

예제 #17

0

파일 보기

파일: sroh.py 프로젝트: Smattacus/data-analysis

def getCleanedDyeIVDFS(fwin = 1500):
    '''
        Acuires and returns cleaned Dye IVDFS. Default f window size of 1500
        nm^-1.
        INPUTS:
        fwin=1500    :  Chooses the range outside of which to zero the
        spectrum.   
        OUTPUTS:
        (dye_dyes, dye_cleanedivdfs)
        dye_days    :   Corresponding data file for the output data.
        dye_cleanedivdfs        :   Tuple of (wavelength, data).
    '''
    rels = getDyeIVDFRels()
    dye_cleanedivdfs = []
    dye_days = []
    fl = getDyeIVDFFiles()
    for x in rels:
        dye_days.append(fl[x])
        d = np.loadtxt(fl[x], skiprows=1)
        R = np.sqrt(d[:,2]**2 + d[:,3]**2)
        wl = d[:,0]
        (wlc, rc) = clean.clean(wl, R)
        [f, g] = spec.spec(rc, wlc[1]- wlc[0])
        fi = np.where(np.abs(f) > fwin)
        g[fi] = 0
        [t, rcf] = spec.ispec(g, f[1]- f[0])
        dye_cleanedivdfs.append((wlc, rcf))
    return (dye_days, dye_cleanedivdfs)

예제 #18

0

파일 보기

파일: sroh.py 프로젝트: Smattacus/data-analysis

def getCleanedDiodeIVDFS(fwin = 1500):
    '''
        Acuires and returns cleaned Dye IVDFS. Default f window size of 1500
        nm^-1.
        INPUTS:
        fwin=1500    :  Chooses the range outside of which to zero the
        spectrum.   
        OUTPUTS:
        diode_cleanedivdfs        :   Tuple of (wavelength, data).
    '''
    rels = getDyeIVDFRels()
    di_civdfs = []
    di_days = []
    fl = getDyeIVDFFiles()
    for x in rels:
        if 'Apr8' in fl[x]:
            di_civdfs.append(np.array([[],[]]))
            continue
        d = np.loadtxt(fl[x].replace('DYE','DIODE'), skiprows=1)
        R = np.sqrt(d[:,2]**2 + d[:,3]**2)
        wl = d[:,0]
        (wlc, rc) = clean.clean(wl, R)
        #Get rid of high freq noise too
        [f, g] = spec.spec(rc, wlc[1] - wlc[0])
        fi = np.where(np.abs(f) > 1500)
        g[fi] = 0
        [t, rcf] = spec.ispec(g, f[1] - f[0])
        di_civdfs.append((wlc, rcf))
    return di_civdfs

예제 #19

0

파일 보기

파일: refuting.py 프로젝트: amazingclaude/Fake_News_Stance_Detection

def mutual_information_title(string):
    _key_words = [

        #reportedly
        #'reportedly'
        'according',
        'said',
        'reported',
        'told',
        #claim
        #'claim'
        'claimed',
        'said',
        'would',
        'false',

        #hoax
        #'hoax'
        'culkin',
        'macaulay',
        'internet',
        'story',
        #fake
        #'fake'
        'facebook',
        'site',
        'real',
        'website',
    ]
    X = []

    clean_headline = clean(string)
    features = [1 if word in clean_headline else 0 for word in _key_words]
    X.append(features)
    return X

예제 #20

0

파일 보기

파일: refuting.py 프로젝트: amazingclaude/Fake_News_Stance_Detection

def refuting_features_body(string):
    _refuting_words = [
        #refuting words
        'fake',
        'fraud',
        'hoax',
        'hoaxer',
        'false',
        'deny',
        'denies',
        'despite',
        'nope',
        'doubt',
        'bogus',
        'debunk',
        'pranks',
        'retract',
        'lie',

        #discussion words
        'reportedly',
        'report',
        'likely',
        'probably',
        'according',
        'might',
        #key word
        'update'
    ]
    X = []

    clean_headline = clean(string)
    features = [1 if word in clean_headline else 0 for word in _refuting_words]
    X.append(features)
    return X

예제 #21

0

파일 보기

파일: keylog.py 프로젝트: saiivarma/Teleos_rep

def cleaned_data_to_database(log):
    global prev_time
    global arg
    
    if time.time()-prev_time>= int_time:
        prev_time=time.time()
        
        data = clean.clean(log)
        log = []

        data = file_type.get_file_type(data)

        #print(data)
        
        oplist=[]
        for i in range(0,len(data)):

            oplist.append([502,data['time'][i],data['clicks'][i],data['file cat'][i], data['file name'][i]])
        db.update(db.get_overall("502"), oplist, datetime.now().date())
        
        ## sedn data to databas
        
        print('------')
        print(pd.DataFrame(oplist))
       # print(data['file name'][0])
        print('------')

        '''if time.time() - total_time >= terminate_time:
            key_Listener.stop()##
            listener.stop()##'''
    return log

예제 #22

0

파일 보기

def insertion(counter, postid, message):

    clean_message = clean.clean(message)
    tokens = clean.tokenize(clean_message)
    keywords = clean.remove_stopwords(tokens)

    if not bool(counter):
        current_postid = None
    else:
        current_postid = next(iter(counter))

    if keywords:

        if postid != current_postid and not bool(counter):
            counter[postid] = {}

        elif postid != current_postid and bool(counter):
            ranked = rank(counter, current_postid)
            print(ranked)
            ranked_bytes = serialize(ranked)
            send_keywords(ranked_bytes)
            deletion(counter, current_postid)
            counter[postid] = {}

        for word in keywords:
            if word not in counter[postid]:
                counter[postid][word] = 1
            else:
                counter[postid][word] += 1

    return counter

예제 #23

0

파일 보기

def model(dataset_filename):
    # Run the exploratory data analysis.
    explore(dataset_filename)

    # Get list of anomalies from clean function.
    anomaly_list = clean(dataset_filename)

    # Create "results.csv"
    with open("results.csv", "w+") as results:
        results.write(str("Anomaly Start Sample, Anomaly End Sample\n"))

        anomaly_start_position = None
        anomaly_end_position = None
        for current_row in anomaly_list:
            # If anomaly start and end positions are empty, set them both equal to the current row.
            if anomaly_start_position is None:
                anomaly_start_position = int(current_row)
                anomaly_end_position = int(current_row)

            # If current_row is a continuation of the anomaly, update end position with current_row.
            elif anomaly_end_position == current_row-1:
                anomaly_end_position = int(current_row)

            # If current_row is NOT a continuation of the anomaly, update start and end positions with current_row.
            elif anomaly_end_position < current_row-1:
                # Add current start and end positions to "results.csv".
                results.write(str(anomaly_start_position) + ", " + str(anomaly_end_position) + "\n")
                anomaly_start_position = int(current_row)
                anomaly_end_position = int(current_row)

            # If duplicate entry, it's the last anomaly. Add to "results.csv".
            elif anomaly_end_position == current_row:
                results.write(str(anomaly_start_position) + ", " + str(anomaly_end_position) + "\n")

    return 0

예제 #24

0

파일 보기

파일: radio_census.py 프로젝트: hdoupe/radio_census

	def fix_excel_sheet(self,path):
		from clean import clean
		clean_path = os.path.join(self.target_dir, "clean.csv")
		tmp = os.path.join(self.target_dir, "tmp.csv")
		cl = clean()
		cl.run(path,tmp,clean_path)
		return clean_path

예제 #25

0

파일 보기

파일: Segmentation.py 프로젝트: sunflowerlyb/segmentation

	def train(self, train_file, n, m):
		'''构造词典'''
		file_name = 'dealed_' + train_file

		try:
			file_open = open(file_name, 'r')
		except:
			file_name = clean.clean(train_file)
			file_open = open(file_name, 'r')

		file_read = file_open.read().decode('utf-8')
		self.sentence = file_read

		file_read = file_read.replace('\r\n',' ')
		file_read = file_read.replace('\n',' ')
		file_read = file_read.replace('#', '')
		file_read = file_read.replace('$', '')
		self.words = set(file_read.split(" "))
		
		self.n_gram = int(n)
		self.length = len(file_read.split(" "))
		self.count = len(self.words)
		self.max_len = int(m)

		file_open.close()

예제 #26

0

파일 보기

파일: lol.py 프로젝트: yt287922464/actual-19-homework

def find():
    print("""尊敬的用户，您好！！！欢迎使用查询功能
    请输入您的选择！！！
    q & Q)退出本页面   """)
    while True:
        name = input(""" 输入查询用户名称,不允许为空:""").strip()
        if name == "q" or name == "Q":
            break
        else:
            sql = """select server,qq from user where name = '{}'""".format(
                name)
            try:
                result = connect(sql, 'find')
                server = result[0][0]
                qq = result[0][1]
            except:
                logging.critical("\033[31m在查询数据库时发生了严重的错误\033[0m")
                result = None
            if not name:
                print("用户名为空，请重新输入")
            elif not result:
                print("该用户不存在，请重新输入")
            else:
                print("""
                    查询用户成功！！！！                 
                    """)
                table = clean([[name, server, qq]])
                print(table)

예제 #27

0

파일 보기

 def fix_excel_sheet(self, path):
     from clean import clean
     clean_path = os.path.join(self.target_dir, "clean.csv")
     tmp = os.path.join(self.target_dir, "tmp.csv")
     cl = clean()
     cl.run(path, tmp, clean_path)
     return clean_path

예제 #28

0

파일 보기

파일: main.py 프로젝트: QianChenglong/clean

def main():
    # 获得相应平台要处理的文件名
    (todo_filename, question_filename) = getFilenames()
    # 检查文件是否修改过
    if fileWasModified(todo_filename):
        clean.clean(db_name, todo_tablename, todo_filename)
        saveMD5(todo_filename)
        logging.info("clean todo up!")
    else:
        logging.info("todo is clean!")
    if fileWasModified(question_filename):
        clean.clean(db_name, question_tablename, question_filename)
        saveMD5(question_filename)
        logging.info("clean question up!")
    else:
        logging.info("question is clean!")

예제 #29

0

파일 보기

def create(path):
    links, articles = clean(path)

    data = pd.DataFrame()

    data["articles"] = articles
    data["links"] = links

    data.to_csv('./data/data.csv', index=False)

    processed_docs_uni = data["articles"].map(
        preproc)  # list of unigrams from all documents
    processed_docs_bi = data["articles"].map(
        bigram)  #list of bigrams from all documents
    """
     create and save dictionary including only unigrams
    """

    user_dict = corpora.Dictionary(processed_docs_uni)
    user_dict.save("./data/dictionary_uni.pkl")
    """create and save dictionary including unigrams and bigrams
    """
    agent_dict = corpora.Dictionary(
        pd.concat([processed_docs_uni, processed_docs_bi], ignore_index=True))
    agent_dict.save("./data/dictionary_uni+bi.pkl")

예제 #30

0

파일 보기

파일: update.py 프로젝트: davidlipson/TheLeague

def update(driver):
	teams = {}
	league = "https://hockey.fantasysports.yahoo.com/hockey/58622"
	fullstandings = league + '/standings'
	driver.get(fullstandings)
	cleanfull = clean.cleanstandings([str(i.text.encode('ascii', 'ignore')).split('\n') for i in driver.find_elements_by_tag_name('tr')])
	for i in range(1,10):
		teamvalues = []
		players = {}
		goalies = {}

		driver.get(league + "/" + str(i) + "?stat1=S&stat2=S_2015")

		teamcard = clean.cleanteam(str(driver.find_element_by_id("team-card-info").text.encode('ascii', 'ignore')).split('\n'))
		teamname = clean.cleanname(teamcard[0])
		teamowner = teamcard[1].split('Since')[0]
		teamrank = teamcard[2]
		print teamname
		print teamowner
		print teamrank
		teamvalues.append(teamname)
		teamvalues.append(teamowner)
		teamvalues.append(teamrank)


		rows = driver.find_elements_by_tag_name('tr')
		rows = [str(a.text.encode('ascii', 'ignore')).split('\n') for a in rows]
		rows = clean.clean(rows)

		for r in rows:
			print r
			playervalues = []
			playercard = clean.cleancard(r)
			print playercard
			playernames = playercard[0].split(' ')[0:2]
			playername = playernames[0] + ' ' + playernames[1]
			playerteam = playercard[0].split(' ')[2]
			playerpos = playercard[0].split(' ')[4]
			playervalues.append(playername)
			playervalues.append(playerteam)
			playervalues.append(playerpos)
			print playername
			print playerteam
			print playerpos
			playerstats = playercard[1:]
			print playerstats
			for x in playerstats:
				playervalues.append(x)
			if playerpos != 'G':
				players[playername] = classes.Player(*playervalues)
			else:
				players[playername] = classes.Goalie(*playervalues)
		teamvalues.append(players)
		for c in cleanfull[teamname][0]:
			teamvalues.append(c)
		for c in cleanfull[teamname][1]:
			teamvalues.append(c)
		teams[teamname] = classes.Team(*teamvalues)
	return teams

예제 #31

0

파일 보기

파일: main.py 프로젝트: ggaudreault/python_workshop

def opendocs():
	all_docs = []
	for doc in docs:
		text = open(doc).read()
		ctext = clean.clean(text)
		ctext = re.split(r"[ ]+", ctext)
		all_docs += ctext
	return all_docs

예제 #32

0

파일 보기

 def get_reviews(self, quantity=3):
     u = clean(self.file)
     self.label1 = u[0]
     self.label2 = u[1]
     reviews1 = random.sample(self.label1, k=quantity)
     reviews2 = random.sample(self.label2, k=quantity)
     reviews = random.sample(reviews1 + reviews2, k=quantity)
     return reviews

예제 #33

0

파일 보기

def creating_conversion_table():
    '''creating a table for sorting store ids based on the sale'''
    
    df_original = clean('./data/train.csv')
    store_id_vs_average_sale = df_original.groupby('Store').agg('mean')
    sorted = store_id_vs_average_sale.sort_values(by='Sales')
    sorted['new_id'] = np.array(range(1,sorted.shape[0]+1))
    return pd.DataFrame(sorted.new_id)

예제 #34

0

파일 보기

파일: hole_in_one.py 프로젝트: michi42/Beta-Beat.src

def run_all_for_file(tbt_file, main_input, clean_input, harpy_input):
    if main_input.write_raw:
        output_handler.write_raw_file(tbt_file, main_input)

    if clean_input is not None or harpy_input is not None:
        clean_writer = output_handler.CleanedAsciiWritter(
            main_input, tbt_file.date)
        for plane in ("x", "y"):
            bpm_names = np.array(getattr(tbt_file, "bpm_names_" + plane))
            bpm_data = getattr(tbt_file, "samples_matrix_" + plane)
            all_bad_bpms = []
            usv = None
            if clean_input is not None:
                with timeit(lambda time: LOGGER.debug("Time for filtering: %s",
                                                      time)):
                    bpm_names, bpm_data, bad_bpms_clean = clean.clean(
                        bpm_names,
                        bpm_data,
                        clean_input,
                        tbt_file.date,
                    )
                with timeit(lambda time: LOGGER.debug("Time for SVD clean: %s",
                                                      time)):
                    bpm_names, bpm_data, bpm_res, bad_bpms_svd, usv = clean.svd_clean(
                        bpm_names,
                        bpm_data,
                        clean_input,
                    )
                all_bad_bpms.extend(bad_bpms_clean)
                all_bad_bpms.extend(bad_bpms_svd)
                setattr(clean_writer, "bpm_names_" + plane, bpm_names)
                setattr(clean_writer, "samples_matrix_" + plane, bpm_data)

            if plane == "x":
                computed_dpp = calc_dp_over_p(main_input, bpm_names, bpm_data)

            if harpy_input is not None:
                with timeit(lambda time: LOGGER.debug(
                        "Time for harmonic_analysis: %s", time)):
                    drive_results, bad_bpms_fft = harmonic_analysis(
                        bpm_names,
                        bpm_data,
                        usv,
                        plane,
                        main_input,
                        harpy_input,
                    )
                    all_bad_bpms.extend(bad_bpms_fft)
                    #TODO: Writing of harpy should be done in output_handler
                    drive_results.write_full_results()

            lin_frame = get_orbit_data(bpm_names, bpm_data, bpm_res)
            output_handler.write_bad_bpms(main_input.file, all_bad_bpms,
                                          main_input.outputdir, plane)

        if clean_input.write_clean:
            clean_writer.dpp = computed_dpp
            clean_writer.write()

예제 #35

0

파일 보기

파일: firePCA.py 프로젝트: Jcampbell301/Jcampbell301.github.io

def firePCA():
    # Read in the data
    y, x = clean.clean()

    # Scale data between 0 and 1
    scaler = MinMaxScaler()
    data = scaler.fit_transform(x)

    # Fit data based on 95% expected variance
    pca = PCA(n_components=0.95)
    # Apply transformation and reduce components
    fitted_var = pca.fit(data)
    a = pca.explained_variance_ratio_
    explained_var_matrix = np.diag(a)

    #for i in explained_var_matrix:
    #	print(i, "\n")
    # print(explained_var_matrix)

    reduced = pca.fit_transform(data)

    # print(reduced)

    # with open("transformed.csv", mode = "w") as myFile:
    # 	csv_writer = csv.writer("transformed.csv", delimeter = ",")

    # myFile = open("transformed.csv", "w")
    # csv_writer = csv.writer("transformed.csv", delimeter = ",")
    # for row in reduced:
    # 	for item in row:
    # 		csv_writer.writerow(str(item))
    # myFile.close()

    # Visualization
    #fig, ax = plt.subplots()
    # Point for each component
    #xi = np.arange(1, 9, step=1)
    # Cumlative variance for y axis
    #y = np.cumsum(pca.explained_variance_ratio_)

    # Standard between 0 and 1
    #plt.ylim(0.0,1.1)

    # Plot
    #plt.plot(xi, y, marker='X', linestyle='--', color='g')
    #plt.xlabel('Number of Components')
    #plt.xticks(np.arange(0, 13, step=1))
    #plt.ylabel("Cumulative Expected Variance")
    #plt.title("Components Needed to Explain Variance")

    #plt.axhline(y=0.95, color='r', linestyle='-')
    #plt.text(0.5, 0.85, '95% cut-off', color = 'red', fontsize=10)

    #ax.grid(axis='x')
    #plt.show()

    return reduced

예제 #36

0

파일 보기

 def update_data(event):
     data = str(textin.value)
     vector = transformer.transform([' '.join(clean(data))])
     result = model.predict(vector)
     if int(result) == 1:
         pred_text = 'Male'
     else:
         pred_text = 'Female'
     output = {'prediction': pred_text}
     p.text = "{}".format(output)

예제 #37

0

파일 보기

파일: run_all.py 프로젝트: samchaaa/avg_rotations_dash

def do_thing(inst, interval):

    print("Running calculations: {}".format(inst))
    data = clean(inst, interval)
    data = data['mid']
    data = get_sign(data)
    r = get_rotations(data)
    r = final(inst, r)

    return r

예제 #38

0

파일 보기

def render_track(track, track_cfg):
    meta = utils.get_track_meta(track, track_cfg)
    cfg = Config()
    cfg.Exporter.preprocessors = ['nb_utils.lesson_preprocessor.LearnLessonPreprocessor']
    exporter = NotebookExporter(config=cfg)
    resources = {'track_meta': meta, 'track_cfg': track_cfg}

    outdir = os.path.join(track, track_cfg['tag'], 'rendered')
    os.makedirs(outdir, exist_ok=True)
    for nb_meta in meta.notebooks:
        in_path = os.path.join(track, 'raw', nb_meta.filename)
        resources['lesson'] = nb_meta.lesson
        resources['nb_meta'] = nb_meta
        if CLEAN:
            clean(in_path)
        nb, _ = exporter.from_filename(in_path, resources)
        out_path = os.path.join(outdir, nb_meta.filename)
        with open(out_path, 'w') as f:
            f.write(nb)

예제 #39

0

파일 보기

 def write(self, text):
     """将爬取的信息写入文件"""
     '''
     需要清洗文本
     '''
     with open(
             'topic/' + self.topic + '/' + self.topic +
             time.strftime('%Y-%m-%d-%H', self.now_time) + '.txt',
             'a') as f:
         f.write(clean(text))  # 写入
         f.write('\n')  # 有时放在循环里面需要自动转行，不然会覆盖上一条数据

예제 #40

0

파일 보기

파일: scambler.py 프로젝트: weingrill/SOCS

def pooltest():
    from m48star import M48Star
    
    star = M48Star(None, tab=284)
    lc = star.lightcurve()
    t  = lc.hjd
    t -= t[0]
    y  = lc.mag
    y -= np.mean(y)
    
    par = np.polyfit(t, y, 1)
    
    y -= par[0]*t + par[1]

    f, px, _, sigma0 = clean(t, y)
    px = px[f>=0.0]
    f = f[f>=0.0]
    
    i = np.argmax(px)
    fi = f[i]
    pxi = px[i]
    print '%.2f %.1f' % (1./fi, pxi/sigma0)
    
    runs = 10000
    
    from multiprocessing import Pool
    w = np.arange(runs)
    pool = Pool(initializer=_init, initargs=(t,y))
    p = pool.map(_worker, w)
    pool.close() # no more tasks
    pool.join()  # wrap up current tasks
    f = np.array([pi[0] for pi in p])
    px = np.array([pi[1] for pi in p])
    plt.subplot('211')
    k = np.argsort(f)
    f = f[k] 
    px = px[k]
    np.savetxt('/work2/jwe/SOCS/M48/data/scrambler.txt', np.c_[f,px], fmt='%6.3f',header='period sigma')
    px1 = px[(f>3.07) & (f<3.33)]
    print px1.shape, 1./fi, pxi
    plt.scatter(f, px, edgecolor='none', alpha=0.5)
    plt.scatter(1./fi, pxi/sigma0, c='r', edgecolor='none', s=80)
    plt.minorticks_on()
    #plt.show()
    plt.xlim(0.0, t[-1]/2)
    plt.title('star 284: %d runs' % runs)
    plt.xlabel('period [days]')
    plt.ylabel('$\sigma$')
    plt.subplot('212')
    plt.hist(f, bins=np.sqrt(runs), range=[0.0,t[-1]/2], normed=True)
    plt.axvline(1./fi, color='r')
    plt.savefig('/work2/jwe/SOCS/M48/plots/scrambler1.pdf')
    
    plt.close()

예제 #41

0

파일 보기

def main(num, speed=0.1, acceleration=speed):
    ip = f'192.168.1.{num}'
    robot = getRobot(ip)
    gripper = Gripper(robot)
    buckets = {}
    try:
        input("Move tool over the RED bucket, then press Enter: ")
        buckets["Red"] = robot.get_pose()
        input("Move tool over the GREEN bucket, then press Enter: ")
        buckets["Green"] = robot.get_pose()
        input("Move tool over the YELLOW bucket, then press Enter: ")
        buckets["Yellow"] = robot.get_pose()
        input("Move tool to a good height and press enter: ")
        position = robot.get_pose()
        clean(num, robot, gripper, buckets)
        robot.set_pose(position, speed, acceleration)
    except:
        print('Something went wrong')
    finally:
        robot.close()
    return robot, gripper

예제 #42

0

파일 보기

def extract(image, predictor):
    try:
        sudoku, _ = clean(image)
        if sudoku is None:
            return None

        cells = get_cells(sudoku)
        digits = predictor.predict(np.reshape(cells, (81, 28, 28, 1)))

        return digits.reshape(9, 9)
    except Exception as e:
        logging.error(e)

예제 #43

0

파일 보기

파일: main.py 프로젝트: David-Happel/scene-location-NLP

def main():
    log('starting with: ' + str(config))
    for cleaning_options in cleaning_options_list:
        log('cleaning options: ' + str(cleaning_options))
        script_path = os.path.abspath(__file__)  # path to python script
        directory_path = os.path.dirname(
            os.path.split(script_path)[0])  # path to python script dir
        data_path = os.path.join(directory_path, "data/parsed_transcripts.csv")

        log('reading data..')
        data = pd.read_csv(data_path, index_col=0)

        log('cleaning data..')
        data = clean(data, cleaning_options)
        data = balance_down(data)

        train, test = split(data)
        log("training size: " + str(len(train)))
        log("testing size: " + str(len(test)))

        for technique, options in config.items():
            log('starting: ' + technique)
            feature_names = []
            if technique == "elmo":
                embedded_train, embedded_test = elmo_embed(
                    train, test, options)
                feature_length = len(
                    np.array(embedded_train["embedding"].iloc[1]))
                log("elmo feature length: " + str(feature_length))
                feature_names = np.zeros(feature_length)

            elif technique == 'tfidf':
                embedded_train, embedded_test, feature_names = tfidf_vectorize(
                    train, test, options)
                log("tfidf feature length: " +
                    str(len(np.array(embedded_train["embedding"].iloc[1]))))

            train_res, test_res, coef = classify(embedded_train,
                                                 embedded_test,
                                                 options,
                                                 technique=technique)
            test_res = evaluate(test_res)
            test_res.to_pickle(report_path(technique + '_test_results.pkl'))
            coefficients = pd.DataFrame({
                "feature_name": feature_names,
                "coef": coef
            })
            coefficients.to_pickle(report_path(technique +
                                               '_coefficients.pkl'))

    log('done')
    return

예제 #44

0

파일 보기

파일: selfcal.py 프로젝트: adamginsburg/w51evlareductionscripts

def apply_selfcal(rawvis, field, spwn_source, spwn_target, calnum=0):

    noavg_data = '%s_spw%i_split.ms' % (field.replace(" ",""),spwn_target)
    aptable = 'selfcal_ap_%s_spw%i.gcal' % (field.replace(" ",""),spwn_source)
    caltable = 'selfcal%i_%s_spw%i.gcal' % (calnum,field.replace(" ",""),spwn_source)

    os.system('rm -rf '+noavg_data)
    split(vis=vis,
          outputvis=noavg_data,
          datacolumn='corrected', # was 'data'...
          spw=str(spwn_target))
    applycal(vis=noavg_data,
             gaintable=[aptable,caltable],
             interp='linear',
             flagbackup=True) # was False when flagmanager was used

    selfcal_image = 'spw%i_C_C_selfcal%i_final_cube' % (spwn_target,calnum)
    for suffix in clean_output_suffixes:
        os.system("rm -rf "+selfcal_image+suffix)
    clean(vis=noavg_data,imagename=selfcal_image,field=field, mode='frequency',# mask=cleanboxes,
            multiscale=[0,5,10,25], psfmode='hogbom',
            weighting='briggs', robust=robust, niter=10000, imsize=512)
    exportfits(imagename=selfcal_image+".image", fitsimage=selfcal_image+".fits", overwrite=True)

예제 #45

0

파일 보기

파일: test.py 프로젝트: johnnadratowski/fabuild

    def test_clean(self):
        clean(files=dict(path='/tmp/test_fab_build/', match=['*test.*'], ignore=['*.txt']))
        self.assertFalse(os.path.exists('/tmp/test_fab_build/test.py'))

        clean(files=dict(path='/tmp/test_fab_build/', recursive=True, match=['*.py']))
        for f in [x for x in self.files if x.endswith(".py")]:
            self.assertFalse(os.path.exists(f))

        clean(files=dict(path='/tmp/test_fab_build/', recursive=True, match=['*3']))
        self.assertFalse(os.path.exists('/tmp/test_fab_build/1/2/3'))

        clean(files=dict(path='/tmp/test_fab_build/'))
        for d in self.dirs:
            self.assertFalse(os.path.exists(d))
        for f in self.files:
            self.assertFalse(os.path.exists(f))

예제 #46

0

파일 보기

파일: zeeman.py 프로젝트: Smattacus/data-analysis

def ZeemanFitFunc_VaryV(RSi, RSf, Ei, Ef, P, V, tls, wavelengths, s):
    """
        Function which takes a Zeeman spectrum, deconvolves it with the natural
        Zeeman linewidth according to B and P, and returns the deconvolved
        spectrum. This is to be used with an optimization problem.

        See also: ZeemanFit_VaryB, which postulates that there is a magnetic
        field other than 1kG in the plasma.
        
        INPUTS:
        RSi     -   Russel Saunders term for the initial energy level.
        RSf     -   Russel Saunders term for the final energy level.
        Ei      -   Energy for the initial energy level.
        Ef      -   Energy for the final energy level.
        P       -   Polarization of Zeeman lines to fit (-1, 0, 1).
        V       -   Streaming flow. cm/s defined to be positive towards the
                    observer (laser).
        tls     -   Tuple of (ti, tf) for the mean lifetimes of the starting
                    and ending state. Just one entry in the tuple is fine.
        wavelengths -   Wvelengths of data.
        spec    -   Measured LIF spectrum.
        OUTPUTS
        x       -   Cleaned wavelengths.
        yn      -   Cleaned and normalized spectrum.
        lns_clnn-   Cleaned and normalized natural lines.
        ivdf    -   Resulting deconvolved spectrum.
    """
    #Try deconvolving using the usual process.
    #Normalize:
    (x, y) = clean.clean(wavelengths, s)
    yn = y / np.sqrt(np.dot(y, y))
    B = 1e3
    lns_cln = Zeeman_Lines(RSi, RSf, Ei, Ef, x, P, B, tls, velocity=V)
    lns_clnn = lns_cln / np.sqrt(np.dot(lns_cln, lns_cln))
    [f, gl] = spec.spec(lns_clnn, x[1] - x[0])
    [f, gs] = spec.spec(yn, x[1] - x[0])
    fi = np.where(np.abs(f) > freq_end)
    fni = np.where((np.abs(f) > fnoise_start) - (np.abs(f) > fnoise_end))
    fwi = np.where(np.abs(f) <= freq_end)
    gdc = np.abs(gs) / np.abs(gl)
    gdc[fi] = 0
    [trash, ivdf] = spec.ispec(gdc, f[1] - f[0])
    ivdf = sp.fftpack.fftshift(ivdf)
    #Get the errors too.
    err = np.mean(np.abs(gs[fi]))
    sig_real = np.sqrt(err**2 / np.size(fwi) * np.sum(1 / gl[fwi]**2) *
        np.size(fwi) / np.size(f))
    errs = np.abs(sig_real)
    return (x, yn, lns_clnn, ivdf, errs)

예제 #47

0

파일 보기

파일: unit_tests_clean.py 프로젝트: masdude/pypub

 def test_clean_empty_img(self):
     s = u'''
             <!DOCTYPE html>
             <html>
              <head>
              </head>
              <body>
               <img src=""></img>
              </body>
             </html>
             '''
     s1 = u'''
             <!DOCTYPE html>
             <html>
              <head>
              </head>
              <body>
               <img src=""></img>
               <img></img>
               <img/>
              </body>
             </html>
             '''
     self.assertEqual(condense(clean(s1)), condense(clean(s)))

예제 #48

0

파일 보기

파일: unit_tests_clean.py 프로젝트: vasudevram/pypub

 def test_html_to_xhtml(self):
     s = u'<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml"><head></head><body><div id="Test">Hello</div><br /><br /></body></html>'
     s1 = u'''
             <!DOCTYPE html>
             <html>
              <head>
              </head>
              <body>
               <DIV ID="Test">Hello</div>
               <br>
               <br>
              </body>
             </html>
             '''
     self.assertEqual(condense(html_to_xhtml(clean(s1))), s)

예제 #49

0

파일 보기

파일: predict.py 프로젝트: everal/text_classification

 def isValid(self,wbcontent):
     #clean the content
     wbcontent = clean.seg(wbcontent)
     wbcontent = clean.clean(wbcontent)
     #calcuate the weight
     key_total = 0.0
     for key in self.d_Attribute:
         flag = self.lookup(self.d_Attribute[key],wbcontent)
         if flag:
             for words in self.d_Attribute[key]:
                 patt = re.compile(words)
                 sk_num = len(patt.findall(wbcontent))*float(key)
                 key_total = key_total+sk_num
     if key_total > self.threshold or key_total == self.threshold:
         key_total = 1.0
     else:
         key_total = 0.0
     return  key_total

예제 #50

0

파일 보기

파일: brain.py 프로젝트: RobertTheMagnificent/scrib

	def __init__(self):
		"""
		Here we'll load settings and set up us the brain.
		"""
		self.version = '0.2.1'
		self.barf = barf.Barf
		self.clean = clean.clean()
		self.cfg = cfg
		self.settings = self.cfg.set()
		
		# Load brain config (or create with these defaults).
		self.settings.load('conf/brain.cfg', {
			'debug': 0,
			'symbol': '!',
			'learning': 1,
			'censored': [],
			'num_words': 0,
			'num_contexts': 0,
			'num_aliases': 0,
			'max_words': 1000000,
			'aliases': {},
			'optimum': 0,
			'ignore_list': []
			})

		self.static_answers = self.cfg.set()
		self.static_answers.load("brain/answers.dat", {
							"sentences": {}
							})
		self.unfilterd = {}
		self.timers_started = False
		
		# Starts the timers:
		if self.timers_started is False:
			try:
				self.autosave = threading.Timer(self.to_sec("125m"), self.__save)
				self.autosave.start()
				self.autorebuild = threading.Timer(self.to_sec("71h"), self.auto_rebuild)
				self.autorebuild.start()
				timers_started = True
			except SystemExit, e:
				self.autosave.cancel()
				self.autorebuild.cancel()

예제 #51

0

파일 보기

파일: test.py 프로젝트: johnnadratowski/fabuild

    def test_coffee(self):
        coffee(files=dict(path='/tmp/test_fab_build/coffee'))
        expected = [
            '/tmp/test_fab_build/coffee/test1.js',
            '/tmp/test_fab_build/coffee/test2.js',
            '/tmp/test_fab_build/coffee/test3.js'
        ]
        for x in expected:
            self.assertTrue(os.path.exists(x))

        clean(files=dict(path='/tmp/test_fab_build/coffee', ignore=['*.coffee']))

        coffee(files=dict(path='/tmp/test_fab_build/coffee', match=['*/test1*']))
        self.assertTrue(os.path.exists('/tmp/test_fab_build/coffee/test1.js'))
        self.assertFalse(os.path.exists('/tmp/test_fab_build/coffee/test2.js'))
        self.assertFalse(os.path.exists('/tmp/test_fab_build/coffee/test3.js'))

        clean(dict(path='/tmp/test_fab_build/coffee', ignore=['*.coffee']))

        coffee(dict(path='/tmp/test_fab_build/coffee'), map=True)
        expected = [
            '/tmp/test_fab_build/coffee/test1.js',
            '/tmp/test_fab_build/coffee/test2.js',
            '/tmp/test_fab_build/coffee/test3.js',
            '/tmp/test_fab_build/coffee/test1.map',
            '/tmp/test_fab_build/coffee/test2.map',
            '/tmp/test_fab_build/coffee/test3.map'
        ]
        for x in expected:
            self.assertTrue(os.path.exists(x))

        clean(dict(path='/tmp/test_fab_build/coffee', ignore=['*.coffee']))

        coffee(dict(path='/tmp/test_fab_build/coffee'),
               join='joined.js', output="/tmp/test_fab_build/coffee", map=True)
        self.assertTrue(os.path.exists('/tmp/test_fab_build/coffee/joined.js'))
        self.assertTrue(os.path.exists('/tmp/test_fab_build/coffee/joined.map'))

        clean(dict(path='/tmp/test_fab_build/coffee', ignore=['*.coffee']))

예제 #52

0

파일 보기

파일: selfcal.py 프로젝트: adamginsburg/w51evlareductionscripts

def selfcal(vis, spwn=6, doplots=True, INTERACTIVE=False, reclean=True, field='W51 Ku',
        outdir_template="spw%i_selfcal_iter/", statsbox='170,50,229,97', ant1list=['ea14','ea05'],
        ant2list=['ea16','ea07'], avgchannel_wide='128', avgchannel_narrow='8',
        cleanboxes=cleanboxes, refant='ea27', solint='30s', niter=2,
        multiscale=[0,5,10,15,25,50], imsize=512, robust=0.0 ):
    """
    Docstring incomplete
    """

    spw = int(spwn)
    outdir = outdir_template % spwn
    try:
        os.mkdir(outdir)
    except OSError:
        pass

    # you're supposed to pass in avg_data as input
    avg_data = vis

    mytb.open(vis+"/ANTENNA")
    antnames = mytb.getcol("NAME")

    # plot each antenna's ampl vs time for flagging purposes
    for ant2 in ant2list:
        for ant in ant1list:
            plotms(vis=vis, spw=str(spwn), xaxis='time', yaxis='amp', avgchannel=avgchannel_wide,
                    avgscan=F, coloraxis='baseline', iteraxis='', xselfscale=T,
                    yselfscale=T,
                    antenna=ant+"&"+ant2,
                    title='Amp vs Time before averaging for spw %i ant %s-%s' % (spwn,ant,ant2),
                    plotfile=outdir+'ampvstime_spw%i_ant%s-%s.png' % (spwn,ant,ant2),
                    field=field,
                    overwrite=True,
                    )

            plotms(vis=vis, spw=str(spwn), xaxis='freq', yaxis='phase', avgtime='1e8',
                    avgscan=T, coloraxis='corr', iteraxis='baseline', xselfscale=T,
                    yselfscale=T,
                    antenna=ant+"&"+ant2,
                    title='Phase vs Freq with time averaging for spw %i ant %s-%s' % (spwn,ant,ant2),
                    plotfile=outdir+'phasevsfreq_spw%i_ant%s-%s.png' % (spwn,ant,ant2),
                    field=field,
                    overwrite=True,
                    )

            plotms(vis=vis, spw=str(spwn), xaxis='amp', yaxis='phase', avgtime='1e8',
                    avgscan=T, coloraxis='corr', iteraxis='baseline', xselfscale=T,
                    yselfscale=T,
                    antenna=ant+"&"+ant2,
                    title='Phase vs Amp with time averaging for spw %i ant %s-%s' % (spwn,ant,ant2),
                    plotfile=outdir+'phasevsamp_spw%i_ant%s-%s.png' % (spwn,ant,ant2),
                    field=field,
                    overwrite=True,
                    )

    # imagename = "noaverage_spw%i" % spwn
    # os.system("rm -rf "+imagename+".image")
    # os.system("rm -rf "+imagename+".model")
    # os.system("rm -rf "+imagename+".flux")
    # os.system("rm -rf "+imagename+".psf")
    # os.system("rm -rf "+imagename+".residual")
    # clean(vis=vis, field=field, imagename=imagename, mode='mfs', 
    #         weighting='briggs', robust=robust, niter=500, imsize=512)
    # viewer(imagename+".image",
    #         outfile=outdir+imagename+".image.png",
    #         outformat='png',
    #         gui=False)
    # exportfits(imagename=imagename+".image", fitsimage=imagename+".fits", overwrite=True)


    #width = 10 # for TW Hydra
    # width = 4 # for NGC 3256


    # (0) Using your split-off, calibrated data, plot the "model" in this MS using
    # plotms.  It should be unit-valued for all data.  If not, run delmod to get
    # rid of any model that might still be lurking in the header, and/or clearcal
    # to set to 1 any MODEL data.
    plotms(vis=avg_data, spw='0', xaxis='time', yaxis='amp',
            avgchannel=avgchannel_wide, xdatacolumn='model', ydatacolumn='model', avgscan=F,
            coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T,
            title='Model Amp vs Time after split for spw %i.  Should be all 1s' % spwn,
            plotfile=outdir+'ampvstime_model_shouldbe1.png', field=field,
            overwrite=True,)
    delmod(vis=avg_data)

    plotms(vis=avg_data, spw='0', xaxis='phase', yaxis='amp',
            avgchannel=avgchannel_wide, xdatacolumn='data', ydatacolumn='data', avgscan=F,
            coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T,
            title='Corrected Phase vs Amp after split',
            plotfile=outdir+'ampvsphase_corrected_avg_spw%i.png' % spwn, field=field,
            overwrite=True,)

    # (0.5) Run clean non-interactively with some set number of iterations, and be
    # sure to keep the image around for comparison later.  Run delmod to get rid of
    # the model it saved to the MS header.
    #if reclean:
    #    imagename="average_spw%i_shallowclean" % spwn

    #    for suffix in clean_output_suffixes:
    #        os.system("rm -rf "+imagename+suffix)

    #    clean(vis=avg_data, field=field, imagename=imagename, mode='mfs', 
    #            weighting='briggs', robust=robust, niter=100, imsize=512)
    #    viewer(imagename+".image",
    #            outfile=outdir+imagename+".image.png",
    #            outformat='png',
    #            gui=False)
    #    exportfits(imagename=imagename+".image", fitsimage=imagename+".fits", overwrite=True)
    #    delmod(avg_data,scr=True)


    # (1) Clean a single SPW *interactively*, boxing the brightest regions and not
    # cleaning very deeply (maybe 100 iterations).  Keep this model in the header
    # -- it's what you'll use for the first round of self-calibration.
    if reclean:
        imagename="average_spw%i_shallowclean_masked" % spwn

        for suffix in clean_output_suffixes:
            os.system("rm -rf "+imagename+suffix)

        clean(vis=avg_data, field=field, imagename=imagename, mode='mfs', 
                psfmode='hogbom',multiscale=multiscale,
                weighting='briggs', robust=robust, niter=100, imsize=imsize,
                mask=cleanboxes,
                nterms=2,
                usescratch=True)
        viewer(imagename+".image.tt0",
                outfile=outdir+imagename+".image.tt0.png",
                outformat='png',
                gui=False)
        exportfits(imagename=imagename+".image.tt0", fitsimage=imagename+".fits", overwrite=True)

    imrms = [imstat(imagename+".image.tt0",box=statsbox)['rms']]

    # FAILS!!!!
    #plotms(vis=avg_data, spw='0', xaxis='time', yaxis='amp',
    #        avgchannel='128', xdatacolumn='model', ydatacolumn='model', avgscan=F,
    #        coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T,
    #        title='Model Amp vs Time after shallow clean for spw %i.' % spwn,
    #        plotfile=outdir+'ampvstime_model_shallowclean_spw%i.png' % spwn, field=field,
    #        overwrite=True,)


    for calnum in xrange(niter):

        # for Ku D W51 Ku spw 2
        if reclean:

            first_image = 'spw%i_C_C_firstim_selfcal%i' % (spwn,calnum)

            for suffix in clean_output_suffixes:
                os.system("rm -rf "+first_image+suffix)

            clean(vis=avg_data,imagename=first_image,field=field, mode='mfs', 
                    psfmode='hogbom',multiscale=multiscale,
                    weighting='briggs', robust=robust, niter=100, imsize=imsize,
                    mask=cleanboxes,
                    nterms=2,
                    usescratch=True)
            exportfits(imagename=first_image+".image.tt0", fitsimage=first_image+".fits", overwrite=True)

        viewer(first_image+".image.tt0",
                outfile=outdir+first_image+".image.tt0.png",
                outformat='png',
                gui=False)

        # this fails?
        #plotms(vis=avg_data, spw='0', xaxis='time', yaxis='amp',
        #    avgchannel='128', xdatacolumn='model', ydatacolumn='model', avgscan=F,
        #    coloraxis='baseline', iteraxis='', xselfscale=T, yselfscale=T,
        #    title='Model Amp vs Time after shallow clean for spw %i iter %i.' % (spwn,calnum),
        #    plotfile=outdir+'ampvstime_model_shallowclean_spw%i_iter%i.png' % (spwn,calnum), field=field,
        #    overwrite=True,)

        # DONE avg/split ing

        caltable = 'selfcal%i_%s_spw%i.gcal' % (calnum,field.replace(" ",""),spwn)
        if reclean:
            os.system('rm -rf '+caltable)
            gaincal(vis=avg_data,
                    field='',
                    caltable=caltable,
                    spw='',
                    # gaintype = 'T' could reduce failed fit errors by averaging pols...
                    gaintype='G', #  'G' from http://casaguides.nrao.edu/index.php?title=EVLA_Advanced_Topics_3C391
                    solint=solint,
                    refant=refant,
                    calmode='p',
                    combine='scan',
                    minblperant=4)

        #
        # Watch out for failed solutions noted in the terminal during this
        # solution. If you see a large fraction (really more than 1 or 2) of
        # your antennas failing to converge in many time intervals then you
        # may need to lengthen the solution interval.
        #

        # =%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%
        # INSPECT THE CALIBRATION
        # =%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%

        #
        # After you have run the gaincal, you want to inspect the
        # solution. Use PLOTCAL to look at the solution (here broken into
        # panels by SPW with individual antennas mapped to colors). Look at
        # the overall magnitude of the correction to get an idea of how
        # important the selfcal is and at how quickly it changes with time to
        # get an idea of how stable the instrument and atmosphere were.
        #

        if doplots:

            for ant2 in ant2list:
                for ant in ant1list:
                    # (4) Have a look at the gain solutions by antenna.  Which antennas
                    # have the largest phase corrections?  Before applying the
                    # calibration, use plotms to display the corrected phase vs. amp
                    # for these antennas, to compare with *after* the correction is
                    # applied.
                    plotcal(caltable=caltable,
                            xaxis='time', yaxis='phase',
                            showgui=False,
                            antenna=ant+'&'+ant2,
                            figfile=outdir+'selfcal%i_spw%i_phasevstime_ant%s-%s.png' % (calnum,spwn,ant,ant2),
                            iteration='')#, subplot = 221)
                    #plotcal(caltable=caltable, xaxis='amp', yaxis='phase',
                    #        showgui=False,
                    #        antenna=ant,
                    #        figfile=outdir+'selfcal%i_spw%i_phasevsamp_ant%s.png' % (calnum,spwn,ant),
                    #        iteration='')#, subplot = 221)
                    if calnum == 0:
                        datacol='data'
                    else:
                        datacol='corrected'
                    plotms(vis=avg_data, xaxis='time', yaxis='phase',
                            xdatacolumn=datacol, ydatacolumn=datacol,
                            avgtime='15s', avgchannel=avgchannel_narrow, coloraxis='corr',
                            antenna=ant+'&'+ant2,
                            overwrite=True, title='Iteration %i for spw %i and ant %s-%s.  datacol=%s' % (calnum,spwn,ant,ant2,datacol), 
                            plotfile=outdir+'selfcal%i_spw%i_ant%s-%s_phasetime.png' % (calnum,spwn,ant,ant2),)
                    plotms(vis=avg_data, xaxis='time', yaxis='amp',
                            xdatacolumn=datacol, ydatacolumn=datacol,
                            avgtime='15s', avgchannel=avgchannel_narrow, coloraxis='corr',
                            antenna=ant+'&'+ant2,
                            overwrite=True, title='Iteration %i for spw %i and ant %s-%s.  datacol=%s' % (calnum,spwn,ant,ant2,datacol), 
                            plotfile=outdir+'selfcal%i_spw%i_ant%s-%s_amptime.png' % (calnum,spwn,ant,ant2),)
                    plotms(vis=avg_data, xaxis='phase', yaxis='amp',
                            xdatacolumn=datacol, ydatacolumn=datacol,
                            avgtime='60s', avgchannel=avgchannel_narrow, coloraxis='corr',
                            antenna=ant+'&'+ant2,
                            overwrite=True, title='Iteration %i for spw %i and ant %s-%s.  datacol=%s' % (calnum,spwn,ant,ant2,datacol), 
                            plotfile=outdir+'selfcal%i_spw%i_ant%s-%s_phaseamp.png' % (calnum,spwn,ant,ant2),)

            plotcal(caltable=caltable,
                    xaxis='time', yaxis='phase',
                    plotrange=[0,0,-180,180],
                    showgui=INTERACTIVE,
                    figfile='' if INTERACTIVE else outdir+'selfcal%i_spw%i_phasevstime.png' % (calnum,spwn),
                    iteration='spw' if INTERACTIVE else '')#, subplot = 221)

            plotcal(caltable=caltable,
                    xaxis='antenna', yaxis='phase',
                    showgui=INTERACTIVE,
                    figfile=outdir+'selfcal%i_spw%i_phasevsantenna.png' % (calnum,spwn),
                    iteration='')

            plotcal(caltable=caltable,
                    xaxis='time', yaxis='amp',
                    plotrange=[0,0,0.5,1.5],
                    showgui=INTERACTIVE,
                    figfile='' if INTERACTIVE else outdir+'selfcal%i_spw%i_ampvstime.png' % (calnum,spwn),
                    iteration='spw' if INTERACTIVE else '')#, subplot = 221)

            #plotcal(caltable=caltable,
            #        xaxis='phase', yaxis='amp',
            #        plotrange=[-50,50,0.5,1.5],
            #        showgui=INTERACTIVE,
            #        figfile='' if INTERACTIVE else outdir+'selfcal%i_spw%i_ampvsphase.png' % (calnum,spwn),
            #        iteration='spw' if INTERACTIVE else '')#, subplot = 221)

            # THERE WILL BE WEIRD "LUSTRE" ERRORS GENERATED BY THE FILE SYSTEM. DO
            # NOT FREAK OUT. These are just a feature of our fast file
            # system. Plotcal will still work.

            # It can be useful useful to plot the X-Y solutions (i.e., differences
            # between polarizations) as an indicator of the noise in the
            # solutions.

            plotcal(caltable=caltable,
                    xaxis='time', 
                    yaxis='phase',
                    plotrange=[0,0,-25, 25], 
                    poln = '/',
                    showgui=INTERACTIVE,
                    iteration='spw,antenna' if INTERACTIVE else '', 
                    figfile='' if INTERACTIVE else outdir+'selfcal%i_spw%i_poldiff.png' % (calnum,spwn),
                    subplot = 221 if INTERACTIVE else 111)

            plotms(vis=avg_data,
                    xaxis='uvdist',
                    yaxis='amp',
                    xdatacolumn='corrected',
                    ydatacolumn='corrected',
                    avgtime='1e8s',
                    avgchannel=avgchannel_narrow,
                    coloraxis='baseline',
                    overwrite=True,
                    title='Iteration %i for spw %i' % (calnum,spw),
                    plotfile='' if INTERACTIVE else outdir+'selfcal%i_spw%i_uvdistamp.png' % (calnum,spwn),
                    )

            #plotms(vis=avg_data,
            #        xaxis='phase',
            #        yaxis='amp',
            #        xdatacolumn='corrected',
            #        ydatacolumn='corrected',
            #        avgtime='60s',
            #        avgchannel=avgchannel_narrow,
            #        coloraxis='corr',
            #        overwrite=True,
            #        title='Iteration %i for spw %i' % (calnum,spw),
            #        plotfile='' if INTERACTIVE else outdir+'selfcal%i_spw%i_phaseamp.png' % (calnum,spwn),
            #        )

            plotms(vis=avg_data,
                    xaxis='time',
                    yaxis='amp',
                    xdatacolumn='corrected',
                    ydatacolumn='corrected',
                    avgtime='10s',
                    avgchannel=avgchannel_narrow,
                    coloraxis='baseline',
                    overwrite=True,
                    title='Iteration %i for spw %i' % (calnum,spw),
                    plotfile='' if INTERACTIVE else outdir+'selfcal%i_spw%i_amptime.png' % (calnum,spwn),
                    )


        # The rms noise is about 4 to 8 deg, depending on antenna, but the
        # phase changes are considerably larger.  This indicates that the
        # application of this solution will improve the image.

        # =%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%
        # APPLY THE CALIBRATION
        # =%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%=%

        #
        # If you are satisfied with your solution, you can now apply it to the
        # data to generate a new corrected data column, which you can then
        # image. Be sure to save the previous flags before you do so because
        # applycal will flag data without good solutions. The commented
        # command after the applycal will roll back to the saved solution in
        # case you get in trouble.
        #

        # flagmanager(vis=avg_data,
        #             mode='save',
        #             versionname='before_selfcal_apply')
        # 2013-03-04 19:53:37     SEVERE  agentflagger:: (file /opt/casa/stable-2013-02/gcwrap/tools/flagging/agentflagger_cmpt.cc, line 37)      Exception Reported: Invalid Table operation: ArrayColumn::setShape; shape cannot be changed for row 0 column FLAG
        # *** Error *** Invalid Table operation: ArrayColumn::setShape; shape cannot be changed for row 0 column FLAG

        if reclean:
            applycal(vis=avg_data,
                     gaintable=caltable,
                     interp='linear',
                     flagbackup=True) # was False when flagmanager was used

        # (6) Plot corrected phase vs. amp for the antennas you picked out in (4),
        # to check that in fact the corrections have been applied as expected.
        for ant2 in ant2list:
            for ant in ant1list:
                plotms(vis=avg_data, xaxis='time', yaxis='phase',
                        xdatacolumn='corrected', ydatacolumn='corrected',
                        avgtime='15s', avgchannel=avgchannel_narrow, coloraxis='corr',
                        antenna=ant+'&'+ant2,
                        overwrite=True, title='Iteration %i for spw %i and ant %s-%s' % (calnum,spwn,ant,ant2), 
                        plotfile=outdir+'selfcal%i_spw%i_ant%s-%s_phasetime_applied.png' % (calnum,spwn,ant,ant2),)
                plotms(vis=avg_data, xaxis='time', yaxis='amp',
                        xdatacolumn='corrected', ydatacolumn='corrected',
                        avgtime='60s', avgchannel=avgchannel_narrow, coloraxis='corr',
                        antenna=ant+'&'+ant2,
                        overwrite=True, title='Iteration %i for spw %i and ant %s-%s' % (calnum,spwn,ant,ant2), 
                        plotfile=outdir+'selfcal%i_spw%i_ant%s-%s_amptime_applied.png' % (calnum,spwn,ant,ant2),)
                plotms(vis=avg_data, xaxis='phase', yaxis='amp',
                        xdatacolumn='corrected', ydatacolumn='corrected',
                        avgtime='60s', avgchannel=avgchannel_narrow, coloraxis='corr',
                        antenna=ant+'&'+ant2,
                        overwrite=True, title='Iteration %i for spw %i and ant %s-%s' % (calnum,spwn,ant,ant2), 
                        plotfile=outdir+'selfcal%i_spw%i_ant%s-%s_phaseamp_applied.png' % (calnum,spwn,ant,ant2),)
                plotms(vis=vis, spw='0', xaxis='freq', yaxis='phase', avgtime='1e8',
                        avgscan=T, coloraxis='corr', iteraxis='baseline', xselfscale=T,
                        yselfscale=T,
                        antenna=ant+'&'+ant2,
                        title='Phase vs Freq with time averaging for spw %i ant %s-%s iter %i' % (spwn,ant,ant2,calnum),
                        plotfile=outdir+'phasevsfreq_spw%i_ant%s-%s_selfcal%i.png' % (spwn,ant,ant2,calnum),
                        field=field,
                        overwrite=True,
                        )
        

        # Use this command to roll back to the previous flags in the event of
        # an unfortunate applycal.

        #flagmanager(vis=avg_data,
        #            mode='restore',
        #            versionname='before_selfcal_apply')


        if reclean:
            selfcal_image = 'spw%i_C_C_selfcal%i' % (spwn,calnum)
            for suffix in clean_output_suffixes:
                os.system("rm -rf "+selfcal_image+suffix)
            clean(vis=avg_data,imagename=selfcal_image,field=field, mode='mfs',
                    psfmode='hogbom',multiscale=multiscale,
                    weighting='briggs', robust=robust, niter=1000, imsize=imsize,
                    nterms=2,
                    mask=cleanboxes,
                    usescratch=True)
            exportfits(imagename=selfcal_image+".image.tt0", fitsimage=selfcal_image+".fits", overwrite=True)

            plotms(vis=avg_data, spw='0', xaxis='baseline', yaxis='amp', avgtime='1e8',
                    ydatacolumn='corrected-model',
                    avgscan=T, coloraxis='baseline', iteraxis='', xselfscale=T,
                    yselfscale=T,
                    title='Residual vs. Baseline after CSCLEAN iter %i' % calnum,
                    plotfile=outdir+'post_selfcal%i_spw%i_residVSbaseline.png' % (calnum,spwn),
                    field=field,
                    overwrite=True,
                    )
                
            plotms(vis=avg_data, spw='0', xaxis='time', yaxis='amp', avgtime='5s',
                    ydatacolumn='corrected-model', 
                    coloraxis='baseline', iteraxis='', xselfscale=T,
                    yselfscale=T,
                    title='Residual vs. Time after CSCLEAN iter %i' % (calnum),
                    plotfile=outdir+'post_selfcal%i_spw%i_residVStime.png' % (calnum,spwn),
                    field=field,
                    overwrite=True,
                    )

            plotms(vis=avg_data, spw='0', xaxis='uvdist', yaxis='amp', avgtime='1e8',
                    ydatacolumn='corrected-model', 
                    avgscan=T, coloraxis='baseline', iteraxis='', xselfscale=T,
                    yselfscale=T,
                    title='Residual vs. UVDIST after CSCLEAN iter %i' % (calnum) ,
                    plotfile=outdir+'post_selfcal%i_spw%i_residVSuvdist.png' % (calnum,spwn),
                    field=field,
                    overwrite=True,
                    )

        imrms.append(imstat(selfcal_image+".image.tt0",box=statsbox)['rms'])

        viewer(selfcal_image+".image.tt0",
                outfile=outdir+selfcal_image+".image.tt0.png",
                outformat='png',
                gui=False)

        print "FINISHED ITERATION %i" % calnum

    print "FINISHED ITERATING!!! YAY!"

    # final phase + gain cal:
    # http://casaguides.nrao.edu/index.php?title=Calibrating_a_VLA_5_GHz_continuum_survey#One_Last_Iteration:_Amplitude_.26_Phase_Self_Calibration
    aptable = 'selfcal_ap_%s_spw%i.gcal' % (field.replace(" ",""),spwn)
    gaincal(vis=avg_data, field='', caltable=aptable, gaintable=caltable, spw='',
            solint='inf', refant=refant, calmode='ap', combine='', minblperant=4)

    plotcal(caltable=aptable,
            xaxis='phase', yaxis='amp',
            plotrange=[-50,50,0.5,1.5],
            showgui=INTERACTIVE,
            figfile='' if INTERACTIVE else outdir+'selfcal%i_spw%i_ampvsphase_final.png' % (calnum,spwn),
            iteration='spw' if INTERACTIVE else '')#, subplot = 221)

    applycal(vis=avg_data,
             gaintable=[aptable,caltable],
             interp='linear',
             flagbackup=True) # was False when flagmanager was used

    selfcal_image = 'spw%i_C_C_selfcal%i_final' % (spwn,calnum)
    for suffix in clean_output_suffixes:
        os.system("rm -rf "+selfcal_image+suffix)
    clean(vis=avg_data,imagename=selfcal_image,field=field, mode='mfs', mask=cleanboxes,
            weighting='briggs', robust=robust, niter=10000, imsize=imsize,
            nterms=2,
            usescratch=True)
    exportfits(imagename=selfcal_image+".image.tt0", fitsimage=selfcal_image+".fits", overwrite=True)

    plotms(vis=avg_data, spw='0', xaxis='baseline', yaxis='amp', avgtime='1e8',
            ydatacolumn='corrected-model',
            avgscan=T, coloraxis='baseline', iteraxis='', xselfscale=T,
            yselfscale=T,
            title='Residual vs. Baseline after CSCLEAN iter %i' % calnum,
            plotfile=outdir+'post_selfcal%i_spw%i_residVSbaseline.png' % (calnum,spwn),
            field=field,
            overwrite=True,
            )
        
    plotms(vis=avg_data, spw='0', xaxis='time', yaxis='amp', avgtime='5s',
            ydatacolumn='corrected-model', 
            coloraxis='baseline', iteraxis='', xselfscale=T,
            yselfscale=T,
            title='Residual vs. Time after CSCLEAN iter %i' % (calnum),
            plotfile=outdir+'post_selfcal%i_spw%i_residVStime.png' % (calnum,spwn),
            field=field,
            overwrite=True,
            )

    plotms(vis=avg_data, spw='0', xaxis='uvdist', yaxis='amp', avgtime='1e8',
            ydatacolumn='corrected-model', 
            avgscan=T, coloraxis='baseline', iteraxis='', xselfscale=T,
            yselfscale=T,
            title='Residual vs. UVDIST after CSCLEAN iter %i' % (calnum) ,
            plotfile=outdir+'post_selfcal%i_spw%i_residVSuvdist.png' % (calnum,spwn),
            field=field,
            overwrite=True,
            )

    selfcal_image = 'spw%i_C_C_selfcal%i_final_multiscale' % (spwn,calnum)
    for suffix in clean_output_suffixes:
        os.system("rm -rf "+selfcal_image+suffix)
    clean(vis=avg_data,imagename=selfcal_image,field=field, mode='mfs', imagermode='csclean',# mask=cleanboxes,
            multiscale=multiscale, psfmode='hogbom',
            nterms=2,
            weighting='briggs', robust=robust, niter=10000, imsize=imsize,
            usescratch=True)
    exportfits(imagename=selfcal_image+".image.tt0", fitsimage=selfcal_image+".fits", overwrite=True)

    plotms(vis=avg_data, spw='0', xaxis='baseline', yaxis='amp', avgtime='1e8',
            ydatacolumn='corrected-model',
            avgscan=T, coloraxis='baseline', iteraxis='', xselfscale=T,
            yselfscale=T,
            title='Residual vs. Baseline after multiscale CLEAN iter %i' % (calnum),
            plotfile=outdir+'post_selfcal%i_spw%i_residVSbaseline_multiscale.png' % (calnum,spwn),
            field=field,
            overwrite=True,
            )
        
    plotms(vis=avg_data, spw='0', xaxis='time', yaxis='amp', avgtime='5s',
            ydatacolumn='corrected-model', 
            coloraxis='baseline', iteraxis='', xselfscale=T,
            yselfscale=T,
            title='Residual vs. Time after multiscale CLEAN iter %i' % (calnum),
            plotfile=outdir+'post_selfcal%i_spw%i_residVStime_multiscale.png' % (calnum,spwn),
            field=field,
            overwrite=True,
            )

    plotms(vis=avg_data, spw='0', xaxis='uvdist', yaxis='amp', avgtime='1e8',
            ydatacolumn='corrected-model', 
            avgscan=T, coloraxis='baseline', iteraxis='', xselfscale=T,
            yselfscale=T,
            title='Residual vs. UVDIST after multiscale CLEAN iter %i' % (calnum),
            plotfile=outdir+'post_selfcal%i_spw%i_residVSuvdist_multiscale.png' % (calnum,spwn),
            field=field,
            overwrite=True,
            )

    return imrms

예제 #53

0

파일 보기

파일: __init__.py 프로젝트: centowen/stacker

def stack(coords, vis, outvis='', imagename='', cell='1arcsec', stampsize=32,
          primarybeam='guess', datacolumn='corrected', use_cuda = False):
    """
         Performs stacking in the uv domain.


         coords      -- A coordList object of all target coordinates.
         vis         -- Input uv data file.
         outvis      -- Output uv data file. Can be set to '' to not save
                        stacked visibilities.
         datacolumn  -- Either 'corrected' or 'data'. Which column stacking is
                        applied to.
         primarybeam -- How to calculated primary beam. Currently only two
                        options, 'guess' (using casa builtin model) or
                        'constant' (i.e. no correction)
         imagename   -- Optional argument to image stacked data.
         cell        -- pixel size for target image
         stampsize   -- size of target image in pixels

         returns: Estimate of stacked flux assuming point source.
    """
    import shutil
    import os
    try:
        from taskinit import casalog
    except ImportError:
        casalog = None

    if casalog is not None:
        casalog.origin('stacker')
        casalog.post('#'*42, 'INFO')
        casalog.post('#'*5 + ' {0: <31}'.format("Begin Task: Stacker")+'#'*5,
                     'INFO')
        casalog.post('Number of stacking positions: {0}'.format(len(coords)),
                     'INFO')

    if outvis != '':
        if not os.access(outvis, os.F_OK):
            shutil.copytree(vis, outvis)

    infiletype, infilename, infileoptions = stacker._checkfile(vis, datacolumn)
    if outvis != '':
        outfiletype, outfilename, outfileoptions =\
            stacker._checkfile(outvis, datacolumn)
    else:
        outfilename = ''
        outfiletype = stacker.FILE_TYPE_NONE
        outfileoptions = 0

    if casalog is not None:
        casalog.post('Input uv file: \'{0}\' of type {1}'.format(
            infilename, stacker.FILETYPENAME[infiletype]), 'INFO')
        if outvis != '':
            casalog.post('Output uv file: \'{0}\' of type {1}'.format(
                outfilename, stacker.FILETYPENAME[outfiletype]), 'INFO')
        else:
            _ = 'No output uv file given, will not write stacked visibility'
            casalog.post(_, 'INFO')

# primary beam
    if primarybeam == 'guess':
        primarybeam = stacker.pb.guesspb(vis)
    elif primarybeam in ['constant', 'none'] or primarybeam is None:
        primarybeam = stacker.pb.PrimaryBeamModel()

    pbtype, pbfile, pbnpars, pbpars = primarybeam.cdata()

    x = [p.x for p in coords]
    y = [p.y for p in coords]
    weight = [p.weight for p in coords]

    x = (c_double*len(x))(*x)
    y = (c_double*len(y))(*y)
    weight = (c_double*len(weight))(*weight)

    import time
    start = time.time()
    flux = c_stack(infiletype, c_char_p(infilename), infileoptions,
                   outfiletype, c_char_p(outfilename), outfileoptions,
                   pbtype, c_char_p(pbfile), pbpars, pbnpars,
                   x, y, weight, c_int(len(coords)), c_bool(use_cuda))
    stop = time.time()
#     print("Started stack at {}".format(start))
#     print("Finished stack at {}".format(stop))
    print("Time used to stack: {0}".format(stop-start))

    if imagename != '':
        import clean
        import clearcal
        clearcal.clearcal(vis=outvis)
        clean.clean(vis=outvis, imagename=imagename, field='0', mode='mfs',
                    cell=cell, imsize=stampsize, weighting='natural')

    if casalog is not None:
        casalog.post('#'*5 + ' {0: <31}'.format("End Task: stacker")+'#'*5)
        casalog.post('#'*42)
    return flux

예제 #54

0

파일 보기

파일: setup.py 프로젝트: blusjune/.bsrc

    exit(1)

# let's begin
os.mkdir(path)

# copy some required files
for f in [u"clean.py", u"ExpPlan.txt", u"ExpSel.txt", bin_name]:
    copy(f, path)

# prepare the arborescence
for d in [u"TRACES", u"DETAILS", u"TIMINGS", u"RESULTS", u"FILES"]:
    os.makedirs(os.path.join(path, d))

# required files, again
copy(u"empty.csv", os.path.join(path, u"FILES"))
copy(u"create.py", os.path.join(path, u"RESULTS"))
copy(u"process.py", os.path.join(path, u"RESULTS"))
copy(u"_Vizu_V3.xls", os.path.join(path, u"RESULTS"))

# do some cleaning
import clean
clean.clean(path)

print(u"Setup finished")
print(u"Now, you should run FlashIO to generate the prepare batch:")
print(u"FlashIO GenPrepare Dev <your_device> IOS 64 IOC 10000 IOC2 50000")
print(u"then, run the batch Prepare.{bat,sh} (see readme.pdf)")
print(u"For having help, type FlashIO help")
raw_input(u"Press RETURN.")
exit(0)

예제 #55

0

파일 보기

파일: create_model.py 프로젝트: kernelmode/shelter-animals

import numpy as np
import logging
from heamy.dataset import Dataset
from heamy.estimator import Regressor, Classifier
from heamy.pipeline import ModelsPipeline

if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO)
    logging.info('Loading datasets...')
    train = pd.read_csv("../train.csv")
    test = pd.read_csv("../test.csv")

    # print(train.head(3))

    logging.info('Cleaning train dataset...')
    train_x = clean(train)
    train_y = train.loc[:, "OutcomeType"]

    enc = LabelEncoder()
    enc.fit(train_y)
    train_yt = enc.transform(train_y)
    train_yt = pd.DataFrame(train_yt)

    logging.info('Cleaning test dataset...')
    test_x = clean(test)

    for diff in train_x.columns.difference(test_x.columns):
        test_x[diff] = 0

    for diff in test_x.columns.difference(train_x.columns):
        test_x[diff] = 0

예제 #56

0

파일 보기

파일: remap_nov24.py 프로젝트: keflavich/w51evlareductionscripts_2012

for spwn in [ '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '0', '1', '2','3','4','5',  ]:
    print "Beginning calibration and mapping of spw ", spwn
    prefix = rootvis.replace(".ms","")
    prefix = prefix+"_spw"+spwn
    vis=prefix+".ms"
    
    imagename = prefix+"_mfs_uni"
    clean(vis=vis,
          imagename=imagename,
          field=target,spw='',
          mode='mfs', # use channel to get cubes
          niter=5000,
          gain=0.1, threshold='1.0mJy',
          psfmode='clark',
          multiscale=[0], 
          interactive=False,
          imsize=[2560,2560], cell=['0.1arcsec','0.1arcsec'],
          stokes='I',
          weighting='uniform',
          allowchunk=True,
          mask=[[1041,1271,1100,1394],[1500,1750,1600,1800],[1014,1150,1525,1701]],
          usescratch=True)
    exportfits(imagename=imagename+".image", fitsimage=imagename+".fits")

    # imagename = prefix+"_mfs_uni"
    # clean(vis=vis,
    #       imagename=imagename,
    #       field=target,spw='',
    #       mode='mfs', # use channel to get cubes
    #       nterms=2,

예제 #57

0

파일 보기

파일: standalone_clean.py 프로젝트: ICRAR/chiles_pipeline

"""
Run the clean task stand alone for debugging
"""
import datetime
from clean import clean

#casalog.filter('DEBUGGING')
sp = "/mnt/output/Chiles/split_vis"
vf = "vis_1136~1140"

start_time = datetime.datetime.now()
clean(vis=['{0}/20131116-946-6/{1}'.format(sp, vf),'{0}/20131117-941-6/{1}'.format(sp, vf),
           '{0}/20131118-946-6/{1}'.format(sp, vf),'{0}/20131119-941-6/{1}'.format(sp, vf),
           '{0}/20131121-946-6/{1}'.format(sp, vf),'{0}/20131123-951-6/{1}'.format(sp, vf),
           '{0}/20131126-946-6/{1}'.format(sp, vf),'{0}/20131203-941-6/{1}'.format(sp, vf)],
      imagename="/mnt/output/Chiles/cube_1136~1140",
      outlierfile="",field="deepfield",spw="",selectdata=True,timerange="",uvrange="",antenna="",scan="",observation="",intent="",mode="frequency",resmooth=False,gridmode="",
      wprojplanes=1,facets=1,cfcache="cfcache.dir",rotpainc=5.0,painc=360.0,aterm=True,psterm=False,mterm=True,wbawp=False,conjbeams=True,epjtable="",interpolation="nearest",
      niter=0,gain=0.1,threshold="0.0mJy",psfmode="clark",imagermode="csclean",ftmachine="mosaic",mosweight=False,scaletype="SAULT",multiscale=[0],negcomponent=-1,
      smallscalebias=0.6,interactive=False,mask=[],nchan=-1,start="",width="",outframe="BARY",veltype="optical",imsize=[2048],cell=['1.5arcsec'],phasecenter="",
      restfreq="1420.405752MHz",stokes="I",weighting="natural",robust=0.0,uvtaper=False,outertaper=[''],innertaper=['1.0'],modelimage="",restoringbeam=[''],pbcor=False,
      minpb=0.2,usescratch=True,noise="1.0Jy",npixels=0,npercycle=100,cyclefactor=1.5,cyclespeedup=-1,nterms=1,reffreq="",chaniter=False,flatnoise=True,allowchunk=False)

end_time = datetime.datetime.now()
print 'Time taken:', end_time, start_time, end_time - start_time

예제 #58

0

파일 보기

파일: run_tests.py 프로젝트: correa/libtorrent

def main(argv):

	toolsets = []

	incremental = False

	test_dirs = []
	build_dirs = []
	configs = []
	options = ['preserve-test-targets=on']
	time_limit = 1200

	for arg in argv:
		if arg[0] == '-':
			if arg[1] == 'j':
				num_processes = int(arg[2:])
				options.append('-j%d' % num_processes)
			elif arg[1] == 'h':
				print_usage()
				sys.exit(1)
			elif arg[1] == 'i':
				incremental = True
			elif arg[1:] == 'valgrind':
				options.append('launcher=valgrind')
			else:
				print 'unknown option: %s' % arg
				print_usage()
				sys.exit(1)
		elif '=' in arg:
			options.append(arg)
		else:
			toolsets.append(arg)

	if toolsets == []:
		print_usage()
		sys.exit(1)

	if not incremental:
		print 'cleaning repo'
		clean.clean()

	try:
		cfg = open('.regression.yml', 'r')
	except:
		print '.regression.yml not found in current directory'
		sys.exit(1)

	cfg = yaml.load(cfg.read())

	if 'test_dirs' in cfg:
		for d in cfg['test_dirs']:
			test_dirs.append(os.path.abspath(d))

	if 'build_dirs' in cfg:
		for d in cfg['build_dirs']:
			build_dirs.append(os.path.abspath(d))
			test_dirs.append(os.path.abspath(d))

	if len(build_dirs) == 0 and len(test_dirs) == 0:
		print 'no test or build directory specified by .regression.yml'
		sys.exit(1)

	configs = []
	if 'features' in cfg:
		for d in cfg['features']:
			configs.append(d)
	else:
		configs = ['']

	build_configs = []
	if 'build_features' in cfg:
		for d in cfg['build_features']:
			build_configs.append(d)

	clean_files = []
	if 'clean' in cfg:
		clean_files = cfg['clean']

	branch_name = 'trunk'
	if 'branch' in cfg:
		branch_name = cfg['branch']

	if 'time_limit' in cfg:
		time_limit = int(cfg['time_limit'])

	# it takes a bit longer to run in valgrind
	if 'launcher=valgrind' in options:
		time_limit *= 7

	architecture = platform.machine()
	build_platform = platform.system() + '-' + platform.release()

	revision, author = svn_info()

	timestamp = datetime.now()

	print '%s-%d - %s - %s' % (branch_name, revision, author, timestamp)

	print 'toolsets: %s' % ' '.join(toolsets)
#	print 'configs: %s' % '|'.join(configs)

	current_dir = os.getcwd()

	try:
		rev_dir = os.path.join(current_dir, 'regression_tests')
		try: os.mkdir(rev_dir)
		except: pass
		rev_dir = os.path.join(rev_dir, '%s-%d' % (branch_name, revision))
		try: os.mkdir(rev_dir)
		except: pass

		for toolset in toolsets:
			results = {}
			for test_dir in test_dirs:
				print 'running tests from "%s" in %s' % (test_dir, branch_name)
				os.chdir(test_dir)
				test_dir = os.getcwd()

				# figure out which tests are exported by this Jamfile
				p = subprocess.Popen(['bjam', '--dump-tests', 'non-existing-target'], stdout=subprocess.PIPE, cwd=test_dir)

				tests = []

				output = ''
				for l in p.stdout:
					output += l
					if not 'boost-test(RUN)' in l: continue
					test_name = os.path.split(l.split(' ')[1][1:-1])[1]
					tests.append(test_name)
				print 'found %d tests' % len(tests)
				if len(tests) == 0:
					tests = ['']

				additional_configs = []
				if test_dir in build_dirs:
					additional_configs = build_configs

				futures = []
				for features in configs + additional_configs:
					(compiler, r) = run_tests(toolset, tests, features, options, test_dir, time_limit)
					results.update(r)

				print ''

				if len(clean_files) > 0:
					print 'deleting ',
					for filt in clean_files:
						for f in glob.glob(os.path.join(test_dir, filt)):
							# a precaution to make sure a malicious repo
							# won't clean things outside of the test directory
							if not os.path.abspath(f).startswith(test_dir): continue
							print '%s ' % f,
							try: shutil.rmtree(f)
							except: pass
					print ''

			# each file contains a full set of tests for one speific toolset and platform
			try:
				f = open(os.path.join(rev_dir, build_platform + '#' + toolset + '.json'), 'w+')
			except IOError, e:
				print e
				rev_dir = os.path.join(current_dir, 'regression_tests')
				try: os.mkdir(rev_dir)
				except: pass
				rev_dir = os.path.join(rev_dir, '%s-%d' % (branch_name, revision))
				try: os.mkdir(rev_dir)
				except: pass
				f = open(os.path.join(rev_dir, build_platform + '#' + toolset + '.json'), 'w+')

			print >>f, json.dumps(results)
			f.close()


	finally:
		# always restore current directory
		try:
			os.chdir(current_dir)
		except: pass

예제 #59

0

파일 보기

파일: selfcal_noplots.py 프로젝트: adamginsburg/w51evlareductionscripts

def selfcal(vis, spwn=6, doplots=True, INTERACTIVE=False, reclean=True, field='W51 Ku',
        outdir_template="spw%i_selfcal_iter/", statsbox='170,50,229,97', ant1list=['ea14','ea05'],
        ant2list=['ea16','ea07'], avgchannel_wide='128', avgchannel_narrow='8',
        cleanboxes=cleanboxes, refant='ea27', solint='30s', niter=2,
        multiscale=[0,5,10,15,25,50], imsize=512, ):
    """
    Docstring incomplete
    """

    spw = int(spwn)
    outdir = outdir_template % spwn
    try:
        os.mkdir(outdir)
    except OSError:
        pass

    # you're supposed to pass in avg_data as input
    avg_data = vis

    mytb.open(vis+"/ANTENNA")
    antnames = mytb.getcol("NAME")


    # (1) Clean a single SPW *interactively*, boxing the brightest regions and not
    # cleaning very deeply (maybe 100 iterations).  Keep this model in the header
    # -- it's what you'll use for the first round of self-calibration.
    if reclean:
        imagename="average_spw%i_shallowclean_masked" % spwn

        for suffix in clean_output_suffixes:
            os.system("rm -rf "+imagename+suffix)

        clean(vis=avg_data, field=field, imagename=imagename, mode='mfs', 
                psfmode='hogbom',multiscale=multiscale,
                weighting='briggs', robust=0.0, niter=100, imsize=imsize,
                mask=cleanboxes,
                nterms=2,
                usescratch=True)
        viewer(imagename+".image.tt0",
                outfile=outdir+imagename+".image.tt0.png",
                outformat='png',
                gui=False)
        exportfits(imagename=imagename+".image.tt0", fitsimage=imagename+".fits", overwrite=True)

    imrms = [imstat(imagename+".image.tt0",box=statsbox)['rms']]


    for calnum in xrange(niter):

        # for Ku D W51 Ku spw 2
        if reclean:

            first_image = 'spw%i_ku_d_firstim_selfcal%i' % (spwn,calnum)

            for suffix in clean_output_suffixes:
                os.system("rm -rf "+first_image+suffix)

            clean(vis=avg_data,imagename=first_image,field=field, mode='mfs', 
                    psfmode='hogbom',multiscale=multiscale,
                    weighting='briggs', robust=0.0, niter=100, imsize=imsize,
                    mask=cleanboxes,
                    nterms=2,
                    usescratch=True)
            exportfits(imagename=first_image+".image.tt0", fitsimage=first_image+".fits", overwrite=True)

        viewer(first_image+".image.tt0",
                outfile=outdir+first_image+".image.tt0.png",
                outformat='png',
                gui=False)

        caltable = 'selfcal%i_%s_spw%i.pcal' % (calnum,field.replace(" ",""),spwn)
        if reclean:
            os.system('rm -rf '+caltable)
            gaincal(vis=avg_data,
                    field='',
                    caltable=caltable,
                    spw='',
                    # gaintype = 'T' could reduce failed fit errors by averaging pols...
                    gaintype='G', #  'G' from http://casaguides.nrao.edu/index.php?title=EVLA_Advanced_Topics_3C391
                    solint=solint,
                    refant=refant,
                    calmode='p',
                    combine='scan',
                    minblperant=4)


        if reclean:
            applycal(vis=avg_data,
                     gaintable=caltable,
                     interp='linear',
                     flagbackup=True) # was False when flagmanager was used


        if reclean:
            selfcal_image = 'spw%i_ku_d_selfcal%i' % (spwn,calnum)
            for suffix in clean_output_suffixes:
                os.system("rm -rf "+selfcal_image+suffix)
            clean(vis=avg_data,imagename=selfcal_image,field=field, mode='mfs',
                    psfmode='hogbom',multiscale=multiscale,
                    weighting='briggs', robust=0.5, niter=1000, imsize=imsize,
                    nterms=2,
                    mask=cleanboxes,
                    usescratch=True)
            exportfits(imagename=selfcal_image+".image.tt0", fitsimage=selfcal_image+".fits", overwrite=True)

        imrms.append(imstat(selfcal_image+".image.tt0",box=statsbox)['rms'])

        viewer(selfcal_image+".image.tt0",
                outfile=outdir+selfcal_image+".image.tt0.png",
                outformat='png',
                gui=False)

        print "FINISHED ITERATION %i" % calnum

    print "FINISHED ITERATING!!! YAY!"

    # final phase + gain cal:
    # http://casaguides.nrao.edu/index.php?title=Calibrating_a_VLA_5_GHz_continuum_survey#One_Last_Iteration:_Amplitude_.26_Phase_Self_Calibration
    aptable = 'selfcal_ap_%s_spw%i.gcal' % (field.replace(" ",""),spwn)
    gaincal(vis=avg_data, field='', caltable=aptable, gaintable=caltable, spw='',
            solint='inf', refant=refant, calmode='ap', combine='', minblperant=4)

    plotcal(caltable=aptable,
            xaxis='phase', yaxis='amp',
            plotrange=[-50,50,0.5,1.5],
            showgui=INTERACTIVE,
            figfile='' if INTERACTIVE else outdir+'selfcal%i_spw%i_ampvsphase_final.png' % (calnum,spwn),
            iteration='spw' if INTERACTIVE else '')#, subplot = 221)

    applycal(vis=avg_data,
             gaintable=[aptable,caltable],
             interp='linear',
             flagbackup=True) # was False when flagmanager was used

    selfcal_image = 'spw%i_ku_d_selfcal%i_final' % (spwn,calnum)
    for suffix in clean_output_suffixes:
        os.system("rm -rf "+selfcal_image+suffix)
    clean(vis=avg_data,imagename=selfcal_image,field=field, mode='mfs', mask=cleanboxes,
            weighting='briggs', robust=0.5, niter=10000, imsize=imsize,
            nterms=2,
            usescratch=True)
    exportfits(imagename=selfcal_image+".image.tt0", fitsimage=selfcal_image+".fits", overwrite=True)

    plotms(vis=avg_data, spw='0', xaxis='baseline', yaxis='amp', avgtime='1e8',
            ydatacolumn='corrected-model',
            avgscan=T, coloraxis='baseline', iteraxis='', xselfscale=T,
            yselfscale=T,
            title='Residual vs. Baseline after CSCLEAN iter %i' % calnum,
            plotfile=outdir+'post_selfcal%i_spw%i_residVSbaseline.png' % (calnum,spwn),
            field=field,
            overwrite=True,
            )
        
    plotms(vis=avg_data, spw='0', xaxis='time', yaxis='amp', avgtime='5s',
            ydatacolumn='corrected-model', 
            coloraxis='baseline', iteraxis='', xselfscale=T,
            yselfscale=T,
            title='Residual vs. Time after CSCLEAN iter %i' % (calnum),
            plotfile=outdir+'post_selfcal%i_spw%i_residVStime.png' % (calnum,spwn),
            field=field,
            overwrite=True,
            )

    plotms(vis=avg_data, spw='0', xaxis='uvdist', yaxis='amp', avgtime='1e8',
            ydatacolumn='corrected-model', 
            avgscan=T, coloraxis='baseline', iteraxis='', xselfscale=T,
            yselfscale=T,
            title='Residual vs. UVDIST after CSCLEAN iter %i' % (calnum) ,
            plotfile=outdir+'post_selfcal%i_spw%i_residVSuvdist.png' % (calnum,spwn),
            field=field,
            overwrite=True,
            )

    selfcal_image = 'spw%i_ku_d_selfcal%i_final_multiscale' % (spwn,calnum)
    for suffix in clean_output_suffixes:
        os.system("rm -rf "+selfcal_image+suffix)
    clean(vis=avg_data,imagename=selfcal_image,field=field, mode='mfs', imagermode='csclean',# mask=cleanboxes,
            multiscale=multiscale, psfmode='hogbom',
            nterms=2,
            weighting='briggs', robust=0.5, niter=10000, imsize=imsize,
            usescratch=True)
    exportfits(imagename=selfcal_image+".image.tt0", fitsimage=selfcal_image+".fits", overwrite=True)

    plotms(vis=avg_data, spw='0', xaxis='baseline', yaxis='amp', avgtime='1e8',
            ydatacolumn='corrected-model',
            avgscan=T, coloraxis='baseline', iteraxis='', xselfscale=T,
            yselfscale=T,
            title='Residual vs. Baseline after multiscale CLEAN iter %i' % (calnum),
            plotfile=outdir+'post_selfcal%i_spw%i_residVSbaseline_multiscale.png' % (calnum,spwn),
            field=field,
            overwrite=True,
            )
        
    plotms(vis=avg_data, spw='0', xaxis='time', yaxis='amp', avgtime='5s',
            ydatacolumn='corrected-model', 
            coloraxis='baseline', iteraxis='', xselfscale=T,
            yselfscale=T,
            title='Residual vs. Time after multiscale CLEAN iter %i' % (calnum),
            plotfile=outdir+'post_selfcal%i_spw%i_residVStime_multiscale.png' % (calnum,spwn),
            field=field,
            overwrite=True,
            )

    plotms(vis=avg_data, spw='0', xaxis='uvdist', yaxis='amp', avgtime='1e8',
            ydatacolumn='corrected-model', 
            avgscan=T, coloraxis='baseline', iteraxis='', xselfscale=T,
            yselfscale=T,
            title='Residual vs. UVDIST after multiscale CLEAN iter %i' % (calnum),
            plotfile=outdir+'post_selfcal%i_spw%i_residVSuvdist_multiscale.png' % (calnum,spwn),
            field=field,
            overwrite=True,
            )

    return imrms