Beispiel #1
0
def melanoma_metastatic_oncogenes(source_files):
    cli, seq, graph, onco_genes = source_files[:4]
    print("----- reading raw datasets")
    cli = read_files(cli, index_col=0)
    seq = read_files(seq, index_col=0)
    graph = read_files(graph)
    onco_genes = read_files(onco_genes)

    print("----- preprocessing raw datasets")
    # get primary(0) vs metastatic(1)
    cli = cli.loc[:, ["sample_type.samples"]]
    cli = cli.loc[cli.iloc[:, 0].isin(["Primary Tumor", "Metastatic"]), :]
    cli = cli.replace({"Primary Tumor": 0, "Metastatic": 1})

    # seq
    common_genes = list(
        set(seq.index).intersection(set(onco_genes.iloc[:, 0])))
    seq = seq.loc[common_genes, :].T
    # remove duplicated, choose the largest var
    seq = remove_duplicated_columns_byVar(seq).loc[:, common_genes]

    # graph
    graph_gene_mask = graph.iloc[:, -2:].isin(common_genes).all(axis=1)
    graph = graph[graph_gene_mask]
    # gene names --> number
    graph = trans_graph_toNumber(common_genes, graph, [1, 2])

    # same samples
    cli, seq = samples_intersection(cli, seq)

    # to ndarray, return
    return cli.values, seq.values, graph.values
Beispiel #2
0
def run_test(model, train, test):
    train = utils.read_files(train)
    test = utils.read_files(test)
    distr = model.distribution(train)
    print("-Train:-")
    model.neg_log_prob(train, distr)
    model.predict(train, distr)
    print("\n")
    print("-Test:-")
    model.neg_log_prob(test, distr)
    model.predict(test, distr)
    print("\n")
def run_test(model, train, test):
    train_pitch, train_rhythm = utils.read_files(train, True)
    test_pitch, test_rhythm = utils.read_files(test, True)

    p_distr = model.distribution(train_pitch)
    r_distr = model.distribution(train_rhythm)
    print("-Train:-")
    model.neg_log_prob(train_pitch, train_rhythm, p_distr, r_distr)
    model.predict(train_pitch, train_rhythm, p_distr, r_distr)
    print("\n")
    print("-Test:-")
    model.neg_log_prob(test_pitch, test_rhythm, p_distr, r_distr)
    model.predict(test_pitch, test_rhythm, p_distr, r_distr)
    print("\n")
Beispiel #4
0
def test_model(obs_file=OBS_FILE, length_file=LENGTH_FILE):
    obs, length = utils.read_files(obs_file, length_file)
    obs = utils.preprocess_data(obs)
    # model = joblib.load(MODEL_NAME+str(0)+".pkl")
    model = joblib.load("../interval_model/model_mbs.pkl")
    model._print_info()
    validate_model(model, obs[:, 0].reshape(-1, 1), length)
Beispiel #5
0
    def discover(self, coverage, input_dir, mode='full'):
        tracer = pathfinder.Tracer(self, self.verbose)

        def run(inputs):
            cov = False
            if coverage == "full":
                cov = True
            traces = self.generate_traces(inputs)  # dynamorio traces
            etraces = self.encode_traces(traces)  # edge traces
            args = self.fix_args(self.args, inputs)  # replace placeholders
            self.msg("[+] Tracing with: " + str(args))
            concrete_traces = tracer.trace_concrete(args, traces, etraces, inputs)
            if len(concrete_traces) == 0:
                return
            length = tracer.trace_symbolic(args, concrete_traces, inputs)
            self.msg("[+] Done Tracing, discovered " + str(len(self.new)) + " new edges")
            self.__discover(length, cov)

        all_inputs = utils.read_files(input_dir)  # inputs
        # we do this to create a way to get results quicker, sacrificing a complete coverage map for quick results
        # by not tracing everything and then running discover but tracing
        # single files and discovering on them
        if mode == 'full':
            run(all_inputs)
        else:
            #print all_inputs
            k, v = all_inputs.items()[random.randint(0, len(all_inputs) - 1)]
            one_input = dict()
            one_input[k] = v
            run(one_input)
Beispiel #6
0
def main():
    test, emission, transition, output = read_files()
    emission, transition = get_nested_dictionaries(emission, transition)
    initial = transition["START"]
    prediction = []
    print(emission)

    trellis = {}

    # initialize the trellis
    for tag in initial:
        trellis[tag] = trellis_cell(tag,
                                    initial[tag] * emission[tag][test[0][0]],
                                    None)

    last_cell = None
    i = 1
    while i < len(test[0]):

        # Initialize next layer
        next_layer = {}
        # Loop through layer
        for next_tag in trellis:
            max_prob = 0
            prev_cell = None
            # Loop through the current node layer
            for tag in trellis:
                # Calculate the edge probability
                prob = trellis[tag].val*transition[tag][next_tag] * \
                    emission[next_tag][test[0][i]]
                # Keep track of max probability
                if prob > max_prob:
                    max_prob = prob
                    prev_cell = trellis[tag]
            # Update node layer with the maximum probability
            next_layer[next_tag] = trellis_cell(next_tag, max_prob, prev_cell)

        # Find the max node value in the layer
        max_val = 0
        tag_choice = ""
        cell_choice = None
        for tag in next_layer:
            if next_layer[tag].val > max_val:
                max_val = next_layer[tag].val
                tag_choice = tag
                cell_choice = next_layer[tag]

        trellis = next_layer
        last_cell = cell_choice
        i += 1

    # Backtrace to find the optimal route
    i -= 1
    while last_cell != None:
        prediction.insert(0, (test[0][i], last_cell.tag))
        last_cell = last_cell.ptr
        i -= 1

    print('Your Output is:', prediction, '\n Expected Output is:', output)
Beispiel #7
0
def main():
    test, emission, transition, output = read_files()
    emission, transition = get_nested_dictionaries(emission, transition)
    initial = transition["START"]
    prediction = []
    """WRITE YOUR VITERBI IMPLEMENTATION HERE"""

    print('Your Output is:', prediction, '\n Expected Output is:', output)
Beispiel #8
0
def get_stack_traces_for_signature(fnames, signature, traces_num=100):
    traces = download_stack_traces_for_signature(signature, traces_num)

    for line in utils.read_files(fnames):
        data = json.loads(line)
        if data['signature'] == signature:
            traces.add(data['proto_signature'])

    return list(traces)
Beispiel #9
0
def build_one_tree_dataset_from_xml(path, classe, max_depth):
    onlyfiles = read_files(path)
    data = []
    for f in onlyfiles:
        G = Graph()
        G.build_Xml_tree(path + '/' + f, max_depth)
        data.append((G, classe))

    return data
Beispiel #10
0
def get_patients_data():
    OBS_FILE, LENGTH_FILE = utils.set_filename(LABEL, TYPE)
    obses, lengths = utils.read_files(OBS_FILE, LENGTH_FILE)
    assert len(obses) == len(lengths)

    patients = dict()
    Patient.Patient.vol = len(TYPE)
    print("vol: ", Patient.Patient.vol)
    Patient.Patient.trend_types = TREND_TYPE
    return pair_X_and_y_for_patients(obses, lengths, patients)
Beispiel #11
0
def main(parameters_file, bird_params, predict_dir, modelfile=None):
    p = yaml.safe_load(open(parameters_file, 'r'))
    p.update(yaml.safe_load(open(bird_params, 'r')))
    sampled_dsets = read_files(predict_dir, load_events=False)
    spa = create_spectra(p)
    if modelfile is None:
        modelfile = default_model_filename(parameters_file, bird_params)
    model = keras.models.load_model(modelfile)
    print(model.summary())
    for sampled_dset in sampled_dsets:
        predict(model, sampled_dset, spa, p)
Beispiel #12
0
def train_and_test_models(obs_file, length_file, n_split=N_SPLIT):
    # Read data
    obses, lengths = utils.read_files(obs_file, length_file)
    print(obs_file, length_file)
    assert len(obses) == len(lengths)
    for i in range(len(obses)):
        obs = obses[i]
        length = lengths[i][:, 1:].flatten()
        print("There are ", obs.shape[0], " observations and ", length.shape,
              " patients.")
        length = np.array_split(length, n_split)
        train_and_test_one_model(obs, length, i)
Beispiel #13
0
def get_path(path, children):
    files = read_files(path)
    if path:
        children.append(html.P(f"files found : {len(files)}"))
        embedding = Embedding()
        embs = np.array(embedding.embeddings(files))
        matrix = similarity_matrix(embs, embs)
        index_pair = sort_matrix(matrix)
        np.save('index_pair.npy', index_pair)
        df = new_df(files)
        save_df(df, './files.csv')
        return children
    return []
Beispiel #14
0
def get_generator(trainingdir, spa, params):
    encoder = sampling_encoder(params)
    sampled_dsets, event_dsets = read_files(trainingdir, params)
    for sampled_dset, event_dset in zip(sampled_dsets, event_dsets):
        data_gen = data_generator(
            spa,
            sampled_dset.data,
            window_len=params['window_len'],
            labels=event_dset.data,
            encoder=encoder,
            batch_size=1,  # this is required for saving images
            amplitude_norm=params['amplitude_norm'],
            loop=False)
        yield from data_gen
Beispiel #15
0
def main():
    root = Path("corpus-20090418")
    files = list(root.iterdir())
    # files = files[:2]

    lsh = LSH(read_files(files))
    # plagiarism_table = lsh.rough_jaccard_test()
    # task_format_print(files, plagiarism_table)

    # plagiarism_table_min_hash = lsh.min_hashing()
    # task_format_print(files, plagiarism_table_min_hash)

    plagiarism_table_min_hash = lsh.min_hashing(shingles_mode=True)
    task_format_print(files, plagiarism_table_min_hash)
Beispiel #16
0
def main():
    if len(sys.argv) != 2:
        print("Usage: folder containing mel files")
        return 1

    mels, keys = utils.read_files(sys.argv[1], "key")

    print("_______Key_ID_______")

    for l in range(1, 25):
        cv_test(mels, keys, l)

    print("Done.")
    return 0
Beispiel #17
0
 def get_genotype_phenotype(self):
     '''
     get the relevant genotype phenotype data
     '''
     data = utils.read_files(self.gene_file)
     # remove nc?
     if self.closeness:
         nc_variants = utils.extract_nc_from_vcf(self.vep_vcf_file,
                                                 self.closeness)
         utils.remove_noncoding(data, nc_variants)
     utils.cleanse_variants(data)
     # remove batch effect?
     if self.binom_cutoff:
         batch_artefacts = self.get_batch_artefacts(data, )
         data = self.remove_batch_artefacts(data, batch_artefacts)
     return data
Beispiel #18
0
def read_corpus(fnames):
    elems = []
    already_selected = set()
    for line in utils.read_files(fnames):
        data = json.loads(line)
        proto_signature = data['proto_signature']

        if should_skip(proto_signature):
            continue

        processed = preprocess(proto_signature)

        if frozenset(processed) not in already_selected:
            elems.append((processed, data['signature']))
        already_selected.add(frozenset(processed))

    return [gensim.models.doc2vec.TaggedDocument(trace, [i, signature]) for i, (trace, signature) in enumerate(elems)]
Beispiel #19
0
def main():
    test, emission, transition, output = read_files()
    emission, transition = get_nested_dictionaries(emission, transition)
    initial = transition["S"]
    prediction = []
    """WRITE YOUR VITERBI IMPLEMENTATION HERE"""
    # print("emission: ",emission)
    # print()
    # print("transition: ",transition)
    # print()
    backtrack = {}
    probs = {}
    for tag in initial:
        probs[(0, tag)] = math.log(initial[tag]) + math.log(
            emission[tag][test[0][0]])
        backtrack[(0, tag)] = 0
    print(probs)
    for message in test:
        for i in range(1, len(message)):
            word = message[i]
            for tag in emission:
                rates = float("-inf")
                best = ""
                for prev in transition[tag]:
                    likelihood = math.log(transition[prev][tag]) + probs[
                        (i - 1, prev)] + math.log(emission[tag][word])
                    if rates < likelihood:
                        rates = likelihood
                        best = prev
                backtrack[(i, tag)] = (i - 1, best)
                probs[(i, tag)] = rates
        best = ""
        rate = float("-inf")
        for tag in emission:
            if rate < probs[len(message) - 1, tag]:
                best = tag
                rate = probs[len(message) - 1, tag]
        i = len(message) - 1
        tuple = (i, best)
        while i >= 0:
            prediction.insert(0, (message[i], tuple[1]))
            tuple = backtrack[tuple]
            i -= 1
    print('Your Output is:', prediction, '\n Expected Output is:', output)
Beispiel #20
0
def get_stack_traces_for_signature(fnames, signature, traces_num=100):
    traces = set()

    # query stack traces online
    url = 'https://crash-stats.mozilla.com/api/SuperSearch'
    params = {
        'signature': '=' + signature,
        '_facets': ['proto_signature'],
        '_facets_size': traces_num,
        '_results_number': 0
    }
    res = utils.get_with_retries(url, params)
    records = res.json()['facets']['proto_signature']
    for record in records:
        traces.add(record['term'])

    # query stack traces from downloaded data
    for line in utils.read_files(fnames):
        data = json.loads(line)
        if data['signature'] == signature:
            traces.add(data['proto_signature'])

    return list(traces)
Beispiel #21
0
 def dump_maybe(self, s, prefix="pathfinder"):
     ''' Concretize inputs and write to file, given a state. This only dumps the outputs when they are a) different from an existing
     output/input and b) the path depended on the input (there have to be constraints on it).
     '''
     data = []
     if not self.stdin:
         try:
             cur_data = s.posix.dump_file_by_path(self.sym_file_name)
         except BaseException:
             print("Could not dump file")
             return False
     else:
         cur_data = s.posix.dumps(0)
     if len(cur_data) > 0:
         data.append(cur_data)
     if len(data) > 0:
         queue = utils.read_files(self.queue_dir)
         # compare to inputs to avoid doublettes
         data = self.filter_compare(queue, data)
         if len(data) > 0:
             #print("[+] Found "+ str(len(data)) + " unique! (Writing to disk..)"
             self.write_outputs(data, prefix)
             return True
     return False
Beispiel #22
0
def main():

    """
    Main program for ADI data reduction, configured with a call to
    adiparam.GetConfig(), which brings up a GUI to set parameters.

    The pipeline is currently designed for SEEDS data taken without
    an occulting mask.  
    
    You must have scipy, numpy, pyephem, multiprocessing, and matplotlib
    installed to use this pipeline.
    """

    parser = optparse.OptionParser(usage=__doc__)
    parser.add_option("-p", "--prefix", dest="prefix", default="HICA",
                      help="Specify raw file name prefix (default=%default)")
    opts, args = parser.parse_args()

    exec_path = os.path.dirname(os.path.realpath(__file__))
    filesetup, adipar, locipar = GetConfig(prefix=opts.prefix)

    nframes = len(filesetup.framelist)
    ngroup = 1 + int((nframes - 1) / locipar.max_n)
    flat = pyf.open(filesetup.flat)
    if filesetup.pixmask is not None:
        hotpix = pyf.open(filesetup.pixmask)
    else:
        hotpix = None

    dimy, dimx = pyf.open(filesetup.framelist[0])[-1].data.shape
    mem, ncpus, storeall = utils.config(nframes, dimy * dimx)
    
    if filesetup.scale_phot:
        x, y = np.meshgrid(np.arange(7) - 3, np.arange(7) - 3)
        window = (x**2 + y**2 < 2.51**2) * 1.0
        window /= np.sum(window)
        ref_phot, ref_psf = photometry.calc_phot(filesetup, adipar, flat,
                                                 hotpix, mem, window)
    else:
        ref_psf = None
        ref_phot = None
    
    ################################################################
    # WCS coordinates are not reliable in HiCIAO data with the image
    # rotator off.  Compute parallactic angle.  Otherwise, trust the
    # WCS coordinates.
    ################################################################

    if 'HICA' in filesetup.framelist[0]:
        pa = np.asarray([transform.get_pa(frame) * -1 * np.pi / 180
                         for frame in filesetup.framelist])
    else:
        pa = np.ones(len(filesetup.framelist))
        for i in range(len(filesetup.framelist)):
            cd2_1 = pyf.open(filesetup.framelist[i])[0].header['cd2_1']
            cd2_2 = pyf.open(filesetup.framelist[i])[0].header['cd2_2']
            pa[i] = -np.arctan2(cd2_1, cd2_2)
            
    fullframe = re.sub("-C.*fits", ".fits", filesetup.framelist[0])
    try:
        objname = pyf.open(fullframe)[0].header['OBJECT']
    except:
        objname = "Unknown_Object"
    objname = re.sub(' ', '_', objname)
    np.savetxt(filesetup.output_dir + '/' + objname + '_palist.dat', pa)
    dr_rms = None

    ####################################################################
    # Default save/resume points: destriping, recentering, final files
    # Configuration gives the option to skip the destriping step (only
    # performing a flat-field), the dewarping, and the centering.
    ####################################################################
    
    if np.all(utils.check_files(filesetup, ext="_r")):
        print "\nResuming reduction from recentered files."
        if ngroup == 1:
            flux = utils.read_files(filesetup, ext="_r")
        else:
            flux = utils.read_files(filesetup, ext="_r")
    else:
        if storeall and np.all(utils.check_files(filesetup, ext="_ds")):
            flux = utils.read_files(filesetup, ext="_ds")
        elif not np.all(utils.check_files(filesetup, ext="_ds")):
            flux = parallel._destripe(filesetup, flat, hotpix, mem, adipar,
                                      write_files=True, storeall=storeall,
                                      full_destripe=adipar.full_destripe,
                                      do_horiz=adipar.full_destripe)
        else:
            flux = None
            
        if adipar.dewarp:
            flux = parallel._dewarp(filesetup, mem, flux=flux, storeall=storeall)

        if adipar.do_centroid:
            centers, dr_rms = centroid.fit_centroids(filesetup, flux, pa,
                                                     storeall=storeall,
                                                     objname=objname,
                                                     method=adipar.center,
                                                     psf_dir=exec_path+'/psfref', ref_psf=ref_psf)
            #centers = np.ndarray((nframes, 2))
            #centers[:, 0] = 1026 - 128
            #centers[:, 1] = 949 + 60

            #dr_rms = 30
            np.savetxt(filesetup.output_dir + '/' + objname +
                       '_centers.dat', centers)

        ####################################################################
        # Recenter the data onto a square array of the largest dimension
        # such that the entire array has data
        ####################################################################

            mindim = min(dimy - centers[:, 0].max(), centers[:, 0].min(),
                         dimx - centers[:, 1].max(), centers[:, 1].min())
            mindim = int(mindim) * 2 - 1
            flux = parallel._rotate_recenter(filesetup, flux, storeall=storeall,
                                             centers=centers, newdimen=mindim,
                                             write_files=True)
            nframes = len(filesetup.framelist)

    ####################################################################
    # Perform scaled PCA on the flux array; alternatively, read in an
    # array of principal components.  Neither is currently used.
    ####################################################################
    
    if False:
        pcapath = '/scr/wakusei1/users/tbrandt'
        flux, pca_arr = pca.pca(flux, ncomp=20, nread=2, dosub=True,
                                pcadir=pcapath + '/psfref')
        for i in range(nframes):
            out = pyf.HDUList(pyf.PrimaryHDU(flux[i].astype(np.float32),
                                             pyf.open(filesetup.framelist[i])[0].header))
            rootfile = re.sub('.*/', '', filesetup.framelist[i])
            out.writeto(filesetup.reduce_dir + '/' + re.sub('.fits', '_r.fits', rootfile), clobber=True)
        if dr_rms is None:
            dr_rms = 20
    elif False:
        pca_dir = '.'
        npca = 40
        pca_arr = np.zeros((npca, flux.shape[1], flux.shape[2]), np.float32)
        for i in range(npca):
            tmp = pyf.open(pca_dir + '/pcacomp_' + str(i) + '.fits')[0].data
            dy, dx = [tmp.shape[0] // 2, tmp.shape[1] // 2]
            pca_arr[i, yc - dy:yc + dy + 1, xc - dx:xc + dx + 1] = tmp
    else:
        pca_arr = None

    ####################################################################
    # Find the n closest matches to each frame.  Not currently used.
    ####################################################################

    if False:
        corr = pca.allcorr(range(int(locipar.rmax)), flux, n=80)
        ngroup = 1
    else:
        corr = None
        
    ####################################################################
    # Subtract a radial profile from each frame.  Not currently used.
    ####################################################################

    if False:
        flux = parallel._radialsub(filesetup, flux, mode='median', 
                                   center=None, rmax=None, smoothwidth=0)

    ####################################################################
    # Run LOCI if that ADI reduction method is chosen
    ####################################################################

    partial_sub = None
    full_pa = pa.copy()
    full_framelist = [frame for frame in filesetup.framelist]
    for igroup in range(ngroup):

        if ngroup > 1:
            filesetup.framelist = full_framelist[igroup::ngroup]
            if np.all(utils.check_files(filesetup, ext="_r")):
                flux = utils.read_files(filesetup, ext="_r")
            else:
                print "Unable to read recentered files for LOCI."
                sys.exit()
            pa = full_pa[igroup::ngroup]
        
        x = np.arange(flux.shape[1]) - flux.shape[1] // 2
        x, y = np.meshgrid(x, x)
        r = np.sqrt(x**2 + y**2)
        
        if adipar.adi == 'LOCI':

            ################################################################
            # Set the maximum radius at which to perform LOCI
            ################################################################
        
            deltar = np.sqrt(np.pi * locipar.fwhm**2 / 4 * locipar.npsf)
            rmax = int(flux.shape[1] // 2 - deltar - 50)
            locipar.rmax = min(locipar.rmax, rmax)
                        
            if dr_rms is None:
                nf, dy, dx = flux.shape
                fluxmed = np.median(flux, axis=0)[dy // 2 - 100:dy // 2 + 101,
                                                  dx // 2 - 100:dx // 2 + 101]
                sat = fluxmed > 0.7 * fluxmed.max()
                r2 = r[dy//2 - 100:dy//2 + 101, dx//2 - 100:dx//2 + 101]**2
                dr_rms = np.sqrt(np.sum(r2 * sat) / np.sum(sat))

            ################################################################
            # This is regular LOCI
            ################################################################
        
            if locipar.feedback == 0:
                partial_sub = loci.loci(flux, pa, locipar, mem, mode='LOCI',
                                        pca_arr=None, r_ex=dr_rms, corr=corr,
                                        method='matrix', do_partial_sub=True,
                                        sub_dir=exec_path)
                
            ################################################################
            # The next block runs LOCI once, de-rotates, takes the median,
            # and re-rotates to each frame's position angle.  It then runs
            # LOCI again to over-correct the result.  Not recommended for
            # SEEDS data with AO188.
            ################################################################
        
            else:
                fluxref = np.ndarray(flux.shape, np.float32)
                fluxref[:] = flux
            
                loci.loci(fluxref, pca_arr, pa, locipar, mem, mode='LOCI',
                          r_ex=dr_rms, pca_arr=pca_arr,
                          corr=corr, method='matrix', do_partial_sub=False)
                
                for i in range(flux.shape[0]):
                    np.putmask(fluxref[i], r > locipar.rmax - 1, 0)
                    np.putmask(fluxref[i], r < dr_rms + 1, 0)
                locipar.rmax -= 100
                fluxref = parallel._rotate_recenter(filesetup, fluxref, theta=pa)
            
                for i in range(flux.shape[0]):
                    np.putmask(fluxref[i], r > locipar.rmax - 1, 0)
                    np.putmask(fluxref[i], r < dr_rms + 1, 0)
                locipar.rmax -= 100
                fluxmed = np.median(fluxref, axis=0)
                for i in range(flux.shape[0]):
                    fluxref[i] = fluxmed * locipar.feedback
                fluxref = parallel._rotate_recenter(filesetup, fluxref, theta=-pa)
            
                loci.loci(flux, pa, locipar, mem, mode='refine', fluxref=fluxref,
                          pca_arr=pca_arr, rmin=dr_rms, r_ex=dr_rms)

            ################################################################
            # Mask saturated areas (< dr_rms), do median subtraction at radii
            # beyond the limit of the LOCI reduction
            ################################################################

            fluxmed = np.median(flux, axis=0)
            for i in range(flux.shape[0]):
                np.putmask(flux[i], r < dr_rms + 2, 0)
                np.putmask(flux[i], r > locipar.rmax - 1, flux[i] - fluxmed)
                
         ####################################################################
         # Alternative to LOCI: median PSF subtraction
         ####################################################################

        elif adipar.adi == 'median':
            medpsf = np.median(flux, axis=0)
            for i in range(flux.shape[0]):
                flux[i] -= medpsf

        else:
            print "Error:  ADI reduction method " + adipar.adi + " not recognized."
            #sys.exit(1)

        ####################################################################
        # Derotate, combine flux array using mean/median hybrid (see
        # Brandt+ 2012), measure standard deviation at each radius
        ####################################################################

        if igroup == 0:
            newhead = utils.makeheader(flux[0], pyf.open(fullframe)[0].header,
                                       full_framelist, adipar, locipar)
            
            flux = parallel._rotate_recenter(filesetup, flux, theta=pa)
            fluxtmp, noise = combine.meanmed(flux)
            fluxbest = fluxtmp / ngroup
            if partial_sub is not None:
                partial_sub_tot = partial_sub / ngroup
        else:
            flux = parallel._rotate_recenter(filesetup, flux, theta=pa)
            fluxtmp, noise = combine.meanmed(flux)
            fluxbest += fluxtmp / ngroup
            if partial_sub is not None:
                partial_sub_tot += partial_sub / ngroup
            
    filesetup.framelist = full_framelist
    if partial_sub is not None:
        partial_sub = partial_sub_tot            
    
    ####################################################################
    # Rescale all arrays to 2001x2001 so that the center is pixel number
    # (1000, 1000) indexed from 0.  Use NaN to pad arrays.
    ####################################################################

    fluxbest = utils.arr_resize(fluxbest)
    if partial_sub is not None:
        partial_sub = utils.arr_resize(partial_sub, newdim=fluxbest.shape[0]).astype(np.float32)
        fluxbest /= partial_sub
        out = pyf.HDUList(pyf.PrimaryHDU(partial_sub))
        out.writeto('partial_sub2.fits', clobber=True)
        
    x, y = np.meshgrid(np.arange(7) - 3, np.arange(7) - 3)
    window = (x**2 + y**2 < 2.51**2) * 1.0
    window /= np.sum(window)
    fluxbest = signal.convolve2d(fluxbest, window, mode='same')
    noise = combine.radprof(fluxbest, mode='std', smoothwidth=2, sigrej=4.5)[0]

    r = utils.arr_resize(r)
    if dr_rms is not None:
        np.putmask(fluxbest, r < dr_rms + 3, np.nan)
    np.putmask(fluxbest, r > locipar.rmax - 2, np.nan)
    
    fluxsnr = (fluxbest / noise).astype(np.float32)

    ####################################################################
    # 5-sigma sensitivity maps--just multiply by the scaled aperture
    # photometry of the central star
    ####################################################################
    
    if partial_sub is not None:
        sensitivity = noise * 5 / partial_sub

        ####################################################################
        # Photometry of the central star
        ####################################################################

        if filesetup.scale_phot:
            #ref_phot = photometry.calc_phot(filesetup, adipar, flat,
            #                                    hotpix, mem, window)[0]
            sensitivity /= ref_phot
            fluxbest /= ref_phot
            noise /= ref_phot
        
        sig_sens = combine.radprof(sensitivity, mode='std', smoothwidth=0)[0]
        outfile = open(filesetup.output_dir + '/' + objname +
                       '_5sigma_sensitivity.dat', 'w')
        for i in range(sig_sens.shape[0] // 2, sig_sens.shape[0]):
            iy = sig_sens.shape[0] // 2
            if np.isfinite(sensitivity[iy, i]):
                outfile.write('%8d  %12.5e  %12.5e  %12e\n' %
                              (i - iy, sensitivity[iy, i], sig_sens[iy, i],
                               partial_sub[iy, i]))
        outfile.close()
        
    else:
        np.savetxt(filesetup.output_dir + '/' + objname + '_noiseprofile.dat',
                   noise[noise.shape[0] // 2, noise.shape[1] // 2:].T)
            
    ####################################################################
    # Write the output fits files. 
    ####################################################################

    snr = pyf.HDUList(pyf.PrimaryHDU(fluxsnr.astype(np.float32), newhead))
    final = pyf.HDUList(pyf.PrimaryHDU(fluxbest.astype(np.float32), newhead))
    if partial_sub is not None:
        contrast = pyf.HDUList(pyf.PrimaryHDU(sensitivity.astype(np.float32), newhead))

    name_base = filesetup.output_dir + '/' + objname
    snr.writeto(name_base + '_snr.fits', clobber=True)
    final.writeto(name_base + '_final.fits', clobber=True)
    if partial_sub is not None:
        contrast.writeto(name_base + '_5sigma_sensitivity.fits', clobber=True)
Beispiel #23
0
def generalGreedy_node_parallel(filename,
                                G,
                                budget,
                                h_l,
                                gamma1,
                                gamma2,
                                beta1=1.0,
                                beta2=1.0,
                                type_algo=1):
    ''' Finds initial seed set S using general greedy heuristic
    Input: G -- networkx Graph object
    k -- number of initial nodes needed
    p -- propagation probability
    Output: S -- initial set of k nodes to propagate
    '''
    # import time
    # start = time.time()
    # R = 200 # number of times to run Random Cascade
    S = []  # set of selected nodes
    influenced = []
    influenced_a = []
    influenced_b = []
    influenced_c = []
    seeds_a = []
    seeds_b = []
    seeds_c = []
    seed_range = []
    if type_algo == 1:
        filename = filename + '_greedy_'

    elif type_algo == 2:
        filename = filename + '_log_gamma_{gamma1,gamma2}_'

    elif type_algo == 3:
        filename = filename + '_root_gamma_{gamma1}_beta_{beta1,beta2}_'

    elif type_algo == 4:
        filename = filename + '_root_majority_gamma_{gamma1}_beta_{beta1,beta2}_'

    stats = ut.graph_stats(G, print_stats=False)

    try:

        influenced, influenced_a, influenced_b, influenced_c, seeds_a, seeds_b, seeds_c = ut.read_files(
            filename)
        S = seeds_a[-1] + seeds_b[-1] + seeds_c[-1]

        if len(S) >= budget:
            # ut.write_files(filename,influenced, influenced_a, influenced_b, seeds_a, seeds_b)
            print(influenced_a)
            print("\n\n")
            print(influenced_b)
            print("\n\n")
            print(influenced_c)
            print(" Seed length ", len(S))

            ut.plot_influence(influenced_a, influenced_b, influenced_c, len(S),
                              filename, stats['group_a'], stats['group_b'],
                              stats['group_c'], [len(S_a) for S_a in seeds_a],
                              [len(S_b) for S_b in seeds_b],
                              [len(S_c) for S_c in seeds_c])

            return (influenced, influenced_a, influenced_b, influenced_c,
                    seeds_a, seeds_b, seeds_c)
        else:
            seed_range = range(budget - len(S))

    except FileNotFoundError:
        print('{filename} not Found ')

        seed_range = range(budget)

    # add node to S if achieves maximum propagation for current chosen + this node
    for i in seed_range:  # cannot parallellize

        pool = multiprocessing.Pool(multiprocessing.cpu_count())
        # results = None
        if type_algo == 1:
            results = pool.starmap(
                map_select_next_seed_set_cover,
                zip(repeat(G), repeat(S), list(G.nodes()), repeat(h_l)))
            # results = pool.map(map_select_next_seed_greedy, ((G, S, v,h_l) for v in G.nodes()))
        elif type_algo == 2:
            results = pool.map(map_select_next_seed_log_greedy,
                               ((G, S, v, gamma1, gamma2) for v in G.nodes()))
        elif type_algo == 3:
            results = pool.map(map_select_next_seed_root_greedy,
                               ((G, S, v, gamma1, beta1, beta2)
                                for v in G.nodes()))
        elif type_algo == 4:
            results = pool.map(map_select_next_seed_root_majority_greedy,
                               ((G, S, v, gamma1) for v in G.nodes()))

        pool.close()
        pool.join()

        s = PQ()  # priority queue
        # if results == None:

        for v, priority, p_a, p_b, p_c in results:  # run R times Random Cascade The gain of parallelizing isn't a lot as the one runIC is not very complex maybe for huge graphs
            s.add_task(v, -priority)

        node, priority = s.pop_item()
        S.append(node)
        I, I_a, I_b, I_c = map_fair_IC((G, S, h_l))
        influenced.append(I)
        influenced_a.append(I_a)
        influenced_b.append(I_b)
        influenced_c.append(I_c)
        S_red = []
        S_blue = []
        S_purple = []
        group = G.nodes[node]['color']
        print(
            str(i + 1) + ' Selected Node is ' + str(node) + ' group ' +
            str(group) + ' Ia = ' + str(I_a) + ' Ib = ' + str(I_b) + ' Ic = ' +
            str(I_c))
        for n in S:
            if G.nodes[n]['color'] == 'red':
                S_red.append(n)
            if G.nodes[n]['color'] == 'blue':
                S_blue.append(n)
            else:
                S_purple.append(n)

        seeds_a.append(
            S_red)  # id's of the seeds so the influence can be recreated
        seeds_b.append(S_blue)
        seeds_c.append(S_purple)
        # print(i, k, time.time() - start)
    # print ( "\n \n  I shouldn't be here.   ********* \n \n ")
    ut.plot_influence(influenced_a, influenced_b, influenced_c, len(S),
                      filename, stats['group_r'], stats['group_b'],
                      stats['group_n'], [len(S_a) for S_a in seeds_a],
                      [len(S_b)
                       for S_b in seeds_b], [len(S_c) for S_c in seeds_c])

    ut.write_files(filename, influenced, influenced_a, influenced_b,
                   influenced_c, seeds_a, seeds_b, seeds_c)

    return (influenced, influenced_a, influenced_b, influenced_c, seeds_a,
            seeds_b, seeds_c)
def run_classifier():
    u.create_required_directories()
    """
    Get the data
    """
    x_train, train_paths = u.read_files(train_data_path, training_subjects,
                                        folder_name)
    x_test, test_paths = u.read_files(test_data_path, testing_subjects,
                                      folder_name)
    x_validation, val_paths = u.read_files(val_data_path, testing_subjects,
                                           folder_name)

    x_train = np.array(x_train)
    x_test = np.array(x_test)
    x_validation = np.array(x_validation)
    print(x_train.shape, x_test.shape, x_validation.shape)

    np.random.shuffle(x_train)

    x_train, y_train, _ = u.split_x_y(x_train)
    x_test, y_test, y_test_labeled = u.split_x_y(x_test)
    x_validation, y_validation, _ = u.split_x_y(x_validation)

    # x_train = u.pad_rows(x_train, 60, True)
    # x_test = u.pad_rows(x_test, 60, True)
    # x_validation = u.pad_rows(x_validation, 60, True)

    print(x_train.shape, x_test.shape)
    print(y_train.shape, y_test.shape)
    """
    Get the model
    """

    # model = m.simple_rnn_model(len(labels), x_train.shape)
    model = m.simple_rnn_model(len(labels), len(x_train[0]))

    print(model.summary())

    adam = Adam(lr=learning_rate)

    # Compile model
    model.compile(loss='categorical_crossentropy',
                  optimizer=adam,
                  metrics=['accuracy'])

    # Fit the model
    model = u.train_model(model, x_train, y_train, x_validation, y_validation,
                          time_stamp, epochs, batch_size)
    """
    Evaluate the model
    """
    prediction_y = model.predict(x_test)
    pred = np.array(prediction_y)
    pred = pred.argmax(1)
    pred = np.array(pred).astype(int)

    y_test_labeled = np.array(y_test_labeled).astype(int)

    cf = confusion_matrix(y_test_labeled, pred)
    print(cf)
    with open("confusion_matrix/{}".format(time_stamp), "w") as file:
        sentences = []
        for line in cf:
            s = ""
            for item in line:
                s += "{},".format(item)
            sentences.append(s + "\n")
        file.writelines(sentences)
    print(labels)
    print(classification_report(y_test_labeled, pred))

    scores = model.evaluate(x_test, y_test)
    print(scores)
    print("Accuracy: %.2f%%" % (scores[1] * 100))
    if load_checked and model_path != "":
        print("Initiating Model: " + model_path)
    print(train_paths)
    print(test_paths)
    print(folder_name, epochs, batch_size, learning_rate, time_stamp)
Beispiel #25
0
    def __init__(self, args):

        np.random.seed(args.seed)
        torch.manual_seed(args.seed)

        self.input_path = args.input_path
        self.lr = args.lr
        self.l2 = args.l2
        self.dr = args.dr
        self.momentum = args.momentum
        self.train_portion = args.train_portion
        self.data_type = args.data_type
        self.n_epoch = args.n_epoch
        self.batch_size = args.batch_size
        self.thr = args.thr
        self.gpu = args.gpu
        self.mode = args.mode
        self.net_type = args.net
        self.output = Output(args)
        self.sprint = self.output.sprint
        self.dprint = self.output.dprint
        self.best_loss = np.inf

        files = ut.read_files(self.input_path, self.data_type)
        if self.mode == 'debug':
            files = files[:min(100, len(files))]
        np.random.shuffle(files)
        self.input_path = self.input_path
        self.n_file = len(files)
        self.n_train = int(self.train_portion * self.n_file)
        self.n_test = self.n_file - self.n_train
        self.train_files = files[:self.n_train]
        self.test_files = files[self.n_train:]
        self.train_data = Data(args.input_path, self.train_files,
                               args.batch_size, args.thr)
        self.test_data = Data(args.input_path, self.test_files,
                              args.batch_size, args.thr)
        print('Training set: %i photos\nTest set: %i photos' %
              (self.n_train, self.n_test))

        if args.act_fn == 'relu':
            self.act_fn = F.relu
        elif args.act_fn == 'sigmoid':
            self.act_fn = F.sigmoid
        else:  # none
            self.act_fn = lambda x: x

        if self.net_type == 'cnn':
            self.net_ = CNN_Net(self.act_fn, self.dr)
        else:  # feature
            self.net_ = Feature_Net(self.thr)
        self.criterion_ = nn.MSELoss()

        if self.gpu > -1:
            self.net = self.net_.cuda(self.gpu)
            self.criterion = self.criterion_.cuda(self.gpu)
        else:
            self.net = self.net_
            self.criterion = self.criterion_

        if args.optim == 'adagrad':
            self.optim = torch.optim.Adagrad(self.net.parameters(),
                                             lr=self.lr,
                                             weight_decay=self.l2,
                                             initial_accumulator_value=args.x)
        elif args.optim == 'adam':
            self.optim = torch.optim.Adam(self.net.parameters(),
                                          lr=self.lr,
                                          weight_decay=self.l2)
        else:  # sgd
            self.optim = torch.optim.SGD(self.net.parameters(),
                                         lr=self.lr,
                                         weight_decay=self.l2,
                                         momentum=self.momentum)

        print(self.net)
Beispiel #26
0
def main():
    """
    Main program for ADI data reduction, configured with a call to
    adiparam.GetConfig(), which brings up a GUI to set parameters.

    The pipeline is currently designed for SEEDS data taken without
    an occulting mask.  
    
    You must have scipy, numpy, pyephem, multiprocessing, and matplotlib
    installed to use this pipeline.
    """

    parser = optparse.OptionParser(usage=__doc__)
    parser.add_option("-p",
                      "--prefix",
                      dest="prefix",
                      default="HICA",
                      help="Specify raw file name prefix (default=%default)")
    opts, args = parser.parse_args()

    exec_path = os.path.dirname(os.path.realpath(__file__))
    filesetup, adipar, locipar = GetConfig(prefix=opts.prefix)

    nframes = len(filesetup.framelist)
    ngroup = 1 + int((nframes - 1) / locipar.max_n)
    flat = pyf.open(filesetup.flat)
    if filesetup.pixmask is not None:
        hotpix = pyf.open(filesetup.pixmask)
    else:
        hotpix = None

    dimy, dimx = pyf.open(filesetup.framelist[0])[-1].data.shape
    mem, ncpus, storeall = utils.config(nframes, dimy * dimx)

    if filesetup.scale_phot:
        x, y = np.meshgrid(np.arange(7) - 3, np.arange(7) - 3)
        window = (x**2 + y**2 < 2.51**2) * 1.0
        window /= np.sum(window)
        ref_phot, ref_psf = photometry.calc_phot(filesetup, adipar, flat,
                                                 hotpix, mem, window)
    else:
        ref_psf = None
        ref_phot = None

    ################################################################
    # WCS coordinates are not reliable in HiCIAO data with the image
    # rotator off.  Compute parallactic angle.  Otherwise, trust the
    # WCS coordinates.
    ################################################################

    if 'HICA' in filesetup.framelist[0]:
        pa = np.asarray([
            transform.get_pa(frame) * -1 * np.pi / 180
            for frame in filesetup.framelist
        ])
    else:
        pa = np.ones(len(filesetup.framelist))
        for i in range(len(filesetup.framelist)):
            cd2_1 = pyf.open(filesetup.framelist[i])[0].header['cd2_1']
            cd2_2 = pyf.open(filesetup.framelist[i])[0].header['cd2_2']
            pa[i] = -np.arctan2(cd2_1, cd2_2)

    fullframe = re.sub("-C.*fits", ".fits", filesetup.framelist[0])
    try:
        objname = pyf.open(fullframe)[0].header['OBJECT']
    except:
        objname = "Unknown_Object"
    objname = re.sub(' ', '_', objname)
    np.savetxt(filesetup.output_dir + '/' + objname + '_palist.dat', pa)
    dr_rms = None

    ####################################################################
    # Default save/resume points: destriping, recentering, final files
    # Configuration gives the option to skip the destriping step (only
    # performing a flat-field), the dewarping, and the centering.
    ####################################################################

    if np.all(utils.check_files(filesetup, ext="_r")):
        print "\nResuming reduction from recentered files."
        if ngroup == 1:
            flux = utils.read_files(filesetup, ext="_r")
        else:
            flux = utils.read_files(filesetup, ext="_r")
    else:
        if storeall and np.all(utils.check_files(filesetup, ext="_ds")):
            flux = utils.read_files(filesetup, ext="_ds")
        elif not np.all(utils.check_files(filesetup, ext="_ds")):
            flux = parallel._destripe(filesetup,
                                      flat,
                                      hotpix,
                                      mem,
                                      adipar,
                                      write_files=True,
                                      storeall=storeall,
                                      full_destripe=adipar.full_destripe,
                                      do_horiz=adipar.full_destripe)
        else:
            flux = None

        if adipar.dewarp:
            flux = parallel._dewarp(filesetup,
                                    mem,
                                    flux=flux,
                                    storeall=storeall)

        if adipar.do_centroid:
            centers, dr_rms = centroid.fit_centroids(filesetup,
                                                     flux,
                                                     pa,
                                                     storeall=storeall,
                                                     objname=objname,
                                                     method=adipar.center,
                                                     psf_dir=exec_path +
                                                     '/psfref',
                                                     ref_psf=ref_psf)
            #centers = np.ndarray((nframes, 2))
            #centers[:, 0] = 1026 - 128
            #centers[:, 1] = 949 + 60

            #dr_rms = 30
            np.savetxt(filesetup.output_dir + '/' + objname + '_centers.dat',
                       centers)

            ####################################################################
            # Recenter the data onto a square array of the largest dimension
            # such that the entire array has data
            ####################################################################

            mindim = min(dimy - centers[:, 0].max(), centers[:, 0].min(),
                         dimx - centers[:, 1].max(), centers[:, 1].min())
            mindim = int(mindim) * 2 - 1
            flux = parallel._rotate_recenter(filesetup,
                                             flux,
                                             storeall=storeall,
                                             centers=centers,
                                             newdimen=mindim,
                                             write_files=True)
            nframes = len(filesetup.framelist)

    ####################################################################
    # Perform scaled PCA on the flux array; alternatively, read in an
    # array of principal components.  Neither is currently used.
    ####################################################################

    if False:
        pcapath = '/scr/wakusei1/users/tbrandt'
        flux, pca_arr = pca.pca(flux,
                                ncomp=20,
                                nread=2,
                                dosub=True,
                                pcadir=pcapath + '/psfref')
        for i in range(nframes):
            out = pyf.HDUList(
                pyf.PrimaryHDU(flux[i].astype(np.float32),
                               pyf.open(filesetup.framelist[i])[0].header))
            rootfile = re.sub('.*/', '', filesetup.framelist[i])
            out.writeto(filesetup.reduce_dir + '/' +
                        re.sub('.fits', '_r.fits', rootfile),
                        clobber=True)
        if dr_rms is None:
            dr_rms = 20
    elif False:
        pca_dir = '.'
        npca = 40
        pca_arr = np.zeros((npca, flux.shape[1], flux.shape[2]), np.float32)
        for i in range(npca):
            tmp = pyf.open(pca_dir + '/pcacomp_' + str(i) + '.fits')[0].data
            dy, dx = [tmp.shape[0] // 2, tmp.shape[1] // 2]
            pca_arr[i, yc - dy:yc + dy + 1, xc - dx:xc + dx + 1] = tmp
    else:
        pca_arr = None

    ####################################################################
    # Find the n closest matches to each frame.  Not currently used.
    ####################################################################

    if False:
        corr = pca.allcorr(range(int(locipar.rmax)), flux, n=80)
        ngroup = 1
    else:
        corr = None

    ####################################################################
    # Subtract a radial profile from each frame.  Not currently used.
    ####################################################################

    if False:
        flux = parallel._radialsub(filesetup,
                                   flux,
                                   mode='median',
                                   center=None,
                                   rmax=None,
                                   smoothwidth=0)

    ####################################################################
    # Run LOCI if that ADI reduction method is chosen
    ####################################################################

    partial_sub = None
    full_pa = pa.copy()
    full_framelist = [frame for frame in filesetup.framelist]
    for igroup in range(ngroup):

        if ngroup > 1:
            filesetup.framelist = full_framelist[igroup::ngroup]
            if np.all(utils.check_files(filesetup, ext="_r")):
                flux = utils.read_files(filesetup, ext="_r")
            else:
                print "Unable to read recentered files for LOCI."
                sys.exit()
            pa = full_pa[igroup::ngroup]

        x = np.arange(flux.shape[1]) - flux.shape[1] // 2
        x, y = np.meshgrid(x, x)
        r = np.sqrt(x**2 + y**2)

        if adipar.adi == 'LOCI':

            ################################################################
            # Set the maximum radius at which to perform LOCI
            ################################################################

            deltar = np.sqrt(np.pi * locipar.fwhm**2 / 4 * locipar.npsf)
            rmax = int(flux.shape[1] // 2 - deltar - 50)
            locipar.rmax = min(locipar.rmax, rmax)

            if dr_rms is None:
                nf, dy, dx = flux.shape
                fluxmed = np.median(flux, axis=0)[dy // 2 - 100:dy // 2 + 101,
                                                  dx // 2 - 100:dx // 2 + 101]
                sat = fluxmed > 0.7 * fluxmed.max()
                r2 = r[dy // 2 - 100:dy // 2 + 101,
                       dx // 2 - 100:dx // 2 + 101]**2
                dr_rms = np.sqrt(np.sum(r2 * sat) / np.sum(sat))

            ################################################################
            # This is regular LOCI
            ################################################################

            if locipar.feedback == 0:
                partial_sub = loci.loci(flux,
                                        pa,
                                        locipar,
                                        mem,
                                        mode='LOCI',
                                        pca_arr=None,
                                        r_ex=dr_rms,
                                        corr=corr,
                                        method='matrix',
                                        do_partial_sub=True,
                                        sub_dir=exec_path)

            ################################################################
            # The next block runs LOCI once, de-rotates, takes the median,
            # and re-rotates to each frame's position angle.  It then runs
            # LOCI again to over-correct the result.  Not recommended for
            # SEEDS data with AO188.
            ################################################################

            else:
                fluxref = np.ndarray(flux.shape, np.float32)
                fluxref[:] = flux

                loci.loci(fluxref,
                          pca_arr,
                          pa,
                          locipar,
                          mem,
                          mode='LOCI',
                          r_ex=dr_rms,
                          pca_arr=pca_arr,
                          corr=corr,
                          method='matrix',
                          do_partial_sub=False)

                for i in range(flux.shape[0]):
                    np.putmask(fluxref[i], r > locipar.rmax - 1, 0)
                    np.putmask(fluxref[i], r < dr_rms + 1, 0)
                locipar.rmax -= 100
                fluxref = parallel._rotate_recenter(filesetup,
                                                    fluxref,
                                                    theta=pa)

                for i in range(flux.shape[0]):
                    np.putmask(fluxref[i], r > locipar.rmax - 1, 0)
                    np.putmask(fluxref[i], r < dr_rms + 1, 0)
                locipar.rmax -= 100
                fluxmed = np.median(fluxref, axis=0)
                for i in range(flux.shape[0]):
                    fluxref[i] = fluxmed * locipar.feedback
                fluxref = parallel._rotate_recenter(filesetup,
                                                    fluxref,
                                                    theta=-pa)

                loci.loci(flux,
                          pa,
                          locipar,
                          mem,
                          mode='refine',
                          fluxref=fluxref,
                          pca_arr=pca_arr,
                          rmin=dr_rms,
                          r_ex=dr_rms)

            ################################################################
            # Mask saturated areas (< dr_rms), do median subtraction at radii
            # beyond the limit of the LOCI reduction
            ################################################################

            fluxmed = np.median(flux, axis=0)
            for i in range(flux.shape[0]):
                np.putmask(flux[i], r < dr_rms + 2, 0)
                np.putmask(flux[i], r > locipar.rmax - 1, flux[i] - fluxmed)

        ####################################################################
        # Alternative to LOCI: median PSF subtraction
        ####################################################################

        elif adipar.adi == 'median':
            medpsf = np.median(flux, axis=0)
            for i in range(flux.shape[0]):
                flux[i] -= medpsf

        else:
            print "Error:  ADI reduction method " + adipar.adi + " not recognized."
            #sys.exit(1)

        ####################################################################
        # Derotate, combine flux array using mean/median hybrid (see
        # Brandt+ 2012), measure standard deviation at each radius
        ####################################################################

        if igroup == 0:
            newhead = utils.makeheader(flux[0],
                                       pyf.open(fullframe)[0].header,
                                       full_framelist, adipar, locipar)

            flux = parallel._rotate_recenter(filesetup, flux, theta=pa)
            fluxtmp, noise = combine.meanmed(flux)
            fluxbest = fluxtmp / ngroup
            if partial_sub is not None:
                partial_sub_tot = partial_sub / ngroup
        else:
            flux = parallel._rotate_recenter(filesetup, flux, theta=pa)
            fluxtmp, noise = combine.meanmed(flux)
            fluxbest += fluxtmp / ngroup
            if partial_sub is not None:
                partial_sub_tot += partial_sub / ngroup

    filesetup.framelist = full_framelist
    if partial_sub is not None:
        partial_sub = partial_sub_tot

    ####################################################################
    # Rescale all arrays to 2001x2001 so that the center is pixel number
    # (1000, 1000) indexed from 0.  Use NaN to pad arrays.
    ####################################################################

    fluxbest = utils.arr_resize(fluxbest)
    if partial_sub is not None:
        partial_sub = utils.arr_resize(
            partial_sub, newdim=fluxbest.shape[0]).astype(np.float32)
        fluxbest /= partial_sub
        out = pyf.HDUList(pyf.PrimaryHDU(partial_sub))
        out.writeto('partial_sub2.fits', clobber=True)

    x, y = np.meshgrid(np.arange(7) - 3, np.arange(7) - 3)
    window = (x**2 + y**2 < 2.51**2) * 1.0
    window /= np.sum(window)
    fluxbest = signal.convolve2d(fluxbest, window, mode='same')
    noise = combine.radprof(fluxbest, mode='std', smoothwidth=2, sigrej=4.5)[0]

    r = utils.arr_resize(r)
    if dr_rms is not None:
        np.putmask(fluxbest, r < dr_rms + 3, np.nan)
    np.putmask(fluxbest, r > locipar.rmax - 2, np.nan)

    fluxsnr = (fluxbest / noise).astype(np.float32)

    ####################################################################
    # 5-sigma sensitivity maps--just multiply by the scaled aperture
    # photometry of the central star
    ####################################################################

    if partial_sub is not None:
        sensitivity = noise * 5 / partial_sub

        ####################################################################
        # Photometry of the central star
        ####################################################################

        if filesetup.scale_phot:
            #ref_phot = photometry.calc_phot(filesetup, adipar, flat,
            #                                    hotpix, mem, window)[0]
            sensitivity /= ref_phot
            fluxbest /= ref_phot
            noise /= ref_phot

        sig_sens = combine.radprof(sensitivity, mode='std', smoothwidth=0)[0]
        outfile = open(
            filesetup.output_dir + '/' + objname + '_5sigma_sensitivity.dat',
            'w')
        for i in range(sig_sens.shape[0] // 2, sig_sens.shape[0]):
            iy = sig_sens.shape[0] // 2
            if np.isfinite(sensitivity[iy, i]):
                outfile.write('%8d  %12.5e  %12.5e  %12e\n' %
                              (i - iy, sensitivity[iy, i], sig_sens[iy, i],
                               partial_sub[iy, i]))
        outfile.close()

    else:
        np.savetxt(filesetup.output_dir + '/' + objname + '_noiseprofile.dat',
                   noise[noise.shape[0] // 2, noise.shape[1] // 2:].T)

    ####################################################################
    # Write the output fits files.
    ####################################################################

    snr = pyf.HDUList(pyf.PrimaryHDU(fluxsnr.astype(np.float32), newhead))
    final = pyf.HDUList(pyf.PrimaryHDU(fluxbest.astype(np.float32), newhead))
    if partial_sub is not None:
        contrast = pyf.HDUList(
            pyf.PrimaryHDU(sensitivity.astype(np.float32), newhead))

    name_base = filesetup.output_dir + '/' + objname
    snr.writeto(name_base + '_snr.fits', clobber=True)
    final.writeto(name_base + '_final.fits', clobber=True)
    if partial_sub is not None:
        contrast.writeto(name_base + '_5sigma_sensitivity.fits', clobber=True)
Beispiel #27
0
        qque.extend(re.findall(r"\b(q)\b", clean_tweet, re.IGNORECASE))  # q = que
        xpor.extend(re.findall(r"\b(x)\b", clean_tweet, re.IGNORECASE))  # x = por
        dde.extend(re.findall(r"\b(d)\b", clean_tweet, re.IGNORECASE))  # d = de
        xqs.extend(re.findall(r"\b(xq)\b", clean_tweet, re.IGNORECASE))  # xq = porque
        pqs.extend(re.findall(r"\b(pq)\b", clean_tweet, re.IGNORECASE))  # pq = porque
        # clean_tweet = clean_tweet.translate(str.maketrans('', '', string.punctuation + '¡'))  # PUNCTUATION


    return hashtags, urls, usernames, letReps, laughters, numbers, emojis, xpor, qque, dde, xqs, pqs



sc = {'¡', '!', '?', '¿'}
punctuation = ''.join([c for c in string.punctuation if c not in sc])

train_data, dev_data, test_data, valid_data = utils.read_files('all')
hashtags, urls, usernames, letReps, laughters, numbers,emojis, xpor, qque, dde, xqs, pqs = print_preprocess(train_data['content'])
hashtags_d, urls_d, usernames_d, letReps_d, laughters_d, numbers_d,emojis_d, xpor_d, qque_d, dde_d, xqs_d, pqs_d = print_preprocess(dev_data['content'])
hashtags_t, urls_t, usernames_t, letReps_t, laughters_t, numbers_t,emojis_t, xpor_t, qque_t, dde_t, xqs_t, pqs_t = print_preprocess(test_data['content'])

print('Intercesión de hashtags')
counter = 0
train_hash = dict.fromkeys(hashtags)
train_hash.update(dict.fromkeys(hashtags_d))
print('ht:{}        hd:{}       htest:{}'.format(len(hashtags), len(hashtags_d), len(hashtags_t)))
for hash in hashtags_t:
    if hash not in train_hash:
        counter += 1
print(counter)
print()
Beispiel #28
0
import scipy as scp
import scipy.misc
import tensorflow as tf

import fcn16_vgg
import loss
import utils

RESOURCE = '../dataset'
MODEL_PATH = "./models/model.ckpt"

logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
                    level=logging.INFO,
                    stream=sys.stdout)

dataset = utils.read_files(RESOURCE)
random.shuffle(dataset)
input_set, output_set = utils.split_dataset(dataset)

np.save("input_set.npy", input_set)
np.save("output_set.npy", output_set)
# input_set = np.load("input_set.npy")
# output_set = np.load("output_set.npy")

train_input_set, train_output_set, test_input_set, test_output_set \
    = utils.train_test_split(input_set, output_set, 0.1)

train_input_set, train_output_set, valid_input_set, valid_output_set \
    = utils.train_test_split(train_input_set, train_output_set, 0.1)

height = input_set.shape[1]
def generalGreedy_node_set_cover(filename,
                                 G,
                                 budget,
                                 gamma_a=1e-2,
                                 gamma_b=0,
                                 type_algo=1):
    ''' Finds initial seed set S using general greedy heuristic
    Input: G -- networkx Graph object
    k -- fraction of population needs to be influenced in both groups 
    p -- propagation probability
    Output: S -- initial set of k nodes to propagate
    '''
    #import time
    #start = time.time()
    #R = 200 # number of times to run Random Cascade

    stats = ut.graph_stats(G, print_stats=False)

    if type_algo == 1:
        filename = filename + f'_set_cover_reach_{budget}_'
    elif type_algo == 2:
        filename = filename + f'_set_cover_timings_reach_{budget}_gamma_a_{gamma_a}_gamma_b_{gamma_b}_'
    elif type_algo == 3:
        filename = filename + f'_set_cover_timings_reach_{budget}_gamma_a_{gamma_a}_gamma_b_{gamma_a}_'

    reach = 0.0
    S = []  # set of selected nodes
    # add node to S if achieves maximum propagation for current chosen + this node
    influenced = []
    influenced_a = []
    influenced_b = []
    seeds_a = []
    seeds_b = []

    try:

        influenced, influenced_a, influenced_b, seeds_a, seeds_b = ut.read_files(
            filename)
        reach = min(influenced_a[-1] / stats['group_a'], budget) + min(
            influenced_b[-1] / stats['group_b'], budget)
        S = seeds_a[-1] + seeds_b[-1]
        if reach >= budget:
            #ut.write_files(filename,influenced, influenced_a, influenced_b, seeds_a, seeds_b)
            print(influenced_a)
            print("\n\n")
            print(influenced_b)
            print(f" reach: {reach}")
            ut.plot_influence(influenced_a, influenced_b, len(S), filename,
                              stats['group_a'], stats['group_b'],
                              [len(S_a) for S_a in seeds_a],
                              [len(S_b) for S_b in seeds_b])
            return (influenced, influenced_a, influenced_b, seeds_a, seeds_b)

    except FileNotFoundError:
        print(f'{filename} not Found ')

    i = 0
    while reach < 2 * budget:  # cannot parallellize

        pool = multiprocessing.Pool(multiprocessing.cpu_count() - 1)

        if type_algo == 1:
            results = pool.map(map_select_next_seed_set_cover,
                               ((G, S, v) for v in G.nodes()))
        elif type_algo == 2:
            results = pool.map(map_IC_timing, ((G, S, v, gamma_a, gamma_b)
                                               for v in G.nodes()))
        elif type_algo == 3:
            results = pool.map(map_IC_timing, ((G, S, v, gamma_a, gamma_a)
                                               for v in G.nodes()))

        pool.close()
        pool.join()

        s = PQ()  # priority queue
        for v, p, p_a, p_b in results:  #
            s.add_task(
                v, -(min(p_a / stats['group_a'], budget) +
                     min(p_b / stats['group_b'], budget)))

        node, priority = s.pop_item()
        #priority = -priority # as the current priority is negative fraction
        S.append(node)

        I, I_a, I_b = map_fair_IC((G, S))
        influenced.append(I)
        influenced_a.append(I_a)
        influenced_b.append(I_b)
        S_red = []
        S_blue = []
        group = G.nodes[node]['color']

        for n in S:
            if G.nodes[n]['color'] == 'red':
                S_red.append(n)
            else:
                S_blue.append(n)

        seeds_a.append(
            S_red)  # id's of the seeds so the influence can be recreated
        seeds_b.append(S_blue)

        #reach += -priority both are fine
        reach_a = I_a / stats['group_a']
        reach_b = I_b / stats['group_b']
        reach = (min(reach_a, budget) + min(reach_b, budget))

        print(
            f'{i+1} Node ID {node} group {group} Ia = {I_a} Ib {I_b} reach: {reach} reach_a {reach_a} reach_b {reach_b}'
        )
        #print(i, k, time.time() - start)
        i += 1

    ut.plot_influence(influenced_a, influenced_b, len(S), filename,
                      stats['group_a'], stats['group_b'],
                      [len(S_a)
                       for S_a in seeds_a], [len(S_b) for S_b in seeds_b])

    ut.write_files(filename, influenced, influenced_a, influenced_b, seeds_a,
                   seeds_b)

    return (influenced, influenced_a, influenced_b, seeds_a, seeds_b)
Beispiel #30
0
def run_classifier():
    print("Using Model: {}".format(training_model))
    u.create_required_directories()

    """
    Step 1: Get the data
    """
    x_train, train_paths = u.read_files(train_data_path, training_subjects, folder_name)
    x_test, test_paths = u.read_files(test_data_path, testing_subjects, folder_name)
    x_validation, val_paths = u.read_files(val_data_path, testing_subjects, folder_name)

    x_train = np.array(x_train)
    x_test = np.array(x_test)
    x_validation = np.array(x_validation)
    print(x_train.shape, x_test.shape, x_validation.shape)

    """
    Step 1.1: Shuffle the training data 
    """
    np.random.shuffle(x_train)

    """
    Step 1.2: Split x and y values
    """
    x_train, y_train, _ = u.split_x_y(x_train)
    x_test, y_test, y_test_labeled = u.split_x_y(x_test)
    x_validation, y_validation, _ = u.split_x_y(x_validation)

    """
    Step 2: Normalize the data
    """
    mean_array = np.mean(x_train, axis=0)
    x_train -= mean_array
    x_test -= mean_array
    x_validation -= mean_array

    max_value = np.max(x_train)
    x_train /= float(max_value)
    x_test /= float(max_value)
    x_validation /= float(max_value)

    normalizing_values = {
        "mean_array": mean_array,
        "max_value": max_value
    }

    """
    Step 2.2: Save the normalizing values for analysis
    """
    u.save_object(normalizing_values, "normalizers/{}".format(time_stamp))

    print(x_train.shape, x_test.shape)
    print(y_train.shape, y_test.shape)

    if training_model == "NN" or training_model == "LR":
        """
        For Fully Connected Neural Nets and Logistic Regression, the dimension is required to be changed 
        """
        dimension = x_train.shape[1] * x_train.shape[2] * x_train.shape[3]
        x_train = x_train.reshape(x_train.shape[0], dimension).astype('float32')
        x_test = x_test.reshape(x_test.shape[0], dimension).astype('float32')
        x_validation = x_validation.reshape(x_validation.shape[0], dimension).astype('float32')

    """
    Step 3: Get the model.
    """
    model = u.get_model(x_train.shape, load_checked, model_path, len(labels), training_model)

    """
    Step 3.1: Compile model
    """
    adam = Adam(lr=learning_rate)
    model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=u.get_metrics())
    print(model.summary())

    """
    Step 4: Train the model.
    """
    if not predict_only:
        model = u.train_model(model, x_train, y_train, x_validation, y_validation, time_stamp, epochs, batch_size)

    """
    Step 4.1: After training reload the model depending on the best performance on the validation dataset
    """
    if reload_model:
        model = u.load_checked_model(model_path, time_stamp)

    """
    Step 5: Evaluate the model
    """
    # Get the predictions on test set
    prediction_y = model.predict(x_test)

    pred = np.array(prediction_y)
    pred = pred.argmax(1)
    pred = np.array(pred).astype(int)

    y_test_labeled = np.array(y_test_labeled).astype(int)

    # Get and save the confusion matrix
    cf = confusion_matrix(y_test_labeled, pred)
    with open("confusion_matrix/{}".format(time_stamp), "w") as file:
        sentences = []
        for line in cf:
            s = ""
            for item in line:
                s += "{},".format(item)
            sentences.append(s + "\n")
        file.writelines(sentences)
    print(labels)

    # Show the precision, recall and F1 measure for all the classes
    print(classification_report(y_test_labeled, pred))

    # Get and save the top-5 predictions for each word in test
    p_array = []
    for i in range(len(prediction_y)):
        p = prediction_y[i]
        top5 = np.argpartition(p, -5)[-5:]
        out = [labels[y_test_labeled[i]]]
        for j in top5:
            probability = p[j]
            prediction_label = labels[j]
            out.append((prediction_label, probability))
        p_array.append(out)
    with open("model_plots/output_{}.csv".format(time_stamp), 'w') as resultFile:
        wr = csv.writer(resultFile)
        wr.writerows(p_array)

    # Calculate the accuracy on the test dataset
    scores = model.evaluate(x_test, y_test)
    print(scores)
    print("Accuracy: %.2f%%" % (scores[1] * 100))
    if load_checked and model_path != "":
        print("Initiating Model: " + model_path)

    # Print some of the parameters related to the process
    print(train_paths)
    print(test_paths)
    print(folder_name, epochs, batch_size, learning_rate, time_stamp)
Beispiel #31
0
 def test_read_files(self):
     paths = ['tests/test_utils.json']
     for line in utils.read_files(paths):
         assert 'proto_signature' in line
         assert 'signature' in line
         assert 'uuid' in line