Example #1
0
def make_val_set(df, split_percentage=0.2, drop_percentage=0.):
    # Find the product_ids for each category.
    category_dict = defaultdict(list)
    for ir in tqdm(df.itertuples()):
        category_dict[ir[4]].append(ir[0])
    train_list = []
    val_list = []
    with tqdm(total=len(df)) as pbar:
        for category_id, product_ids in category_dict.items():
            category_idx = cat2idx[category_id]
            # Randomly remove products to make the dataset smaller.
            keep_size = int(len(product_ids) * (1. - drop_percentage))
            if keep_size < len(product_ids):
                product_ids = np.random.choice(product_ids, keep_size, replace=False)
            # Randomly choose the products that become part of the validation set.
            val_size = int(len(product_ids) * split_percentage)
            if val_size > 0:
                val_ids = np.random.choice(product_ids, val_size, replace=False)
            else:
                val_ids = []
            # Create a new row for each image.
            for product_id in product_ids:
                row = [product_id, category_idx]
                for img_idx in range(df.loc[product_id, "num_imgs"]):
                    if product_id in val_ids:
                        val_list.append(row + [img_idx])
                    else:
                        train_list.append(row + [img_idx])
                pbar.update()
    columns = ["product_id", "category_idx", "img_idx"]
    train_df = pd.DataFrame(train_list, columns=columns)
    val_df = pd.DataFrame(val_list, columns=columns)
    return train_df, val_df
Example #2
0
def maxent_motifs(N, L, desired_ic, num_motifs, tolerance=10**-10, A=4, beta=None, countses=None,
                  entropies=None, log_cols=None, verbose=False):
    ### computational
    if countses is None:
        logger("countses", verbose)
        countses = enumerate_counts(N, A, verbose=verbose)
    if entropies is None:
        logger("entropies", verbose)
        entropies = np.array(map(entropy_from_counts, tqdm(countses)))
    if log_cols is None:
        iterator = tqdm(countses) if verbose else countses
        logger("log_cols", verbose)
        log_cols = np.array([log_counts_to_cols(counts, A=A) for counts in iterator])
    if beta is None:
        correction_per_col = (A-1)/(2*log(2)*N)
        desired_ic += L * correction_per_col
        beta = find_beta_for_mean_motif_ic(N,L,desired_ic,tolerance=tolerance,verbose=verbose, A=A,
                                           countses=countses, entropies=entropies, log_cols=log_cols)
        logger("beta: %s" % beta, verbose)
    logger("computing count ps from beta", verbose)
    ps = count_ps_from_beta(N,beta, A=A, verbose=verbose,
                            log_cols=log_cols, entropies=entropies)
    count_sampler = inverse_cdf_sampler(countses, ps)
    def sample():
        counts = [count_sampler() for i in range(L)]
        cols = [sample_col_from_count(count, A=A) for count in counts]
        return map(lambda site:"".join(site),transpose(cols))
    iterator = trange if verbose else xrange
    if verbose:
        print "sampling"
    return [sample() for _ in iterator(num_motifs)]
def experiment4(L=10):
    """do grid search to determine whether linear or apw models fair better under different regimes"""
    def apw_fit(sigma, mu, Ne):
        code = sample_code(L, sigma)
        def apw_phat(site):
            ep = score(code, site)
            return 1/(1+exp(ep-mu))**(Ne-1)
        chain = mh(lambda s:apw_phat(s), proposal=mutate_site, x0=random_site(L),
                   capture_state = lambda s:apw_occ(code, mu, s))[25000:]
        return mean(chain)
    def linear_fit(sigma, mu, Ne):
        pssm = sample_matrix(L, sigma)
        def linear_phat(site):
            ep = score_seq(pssm, site)
            return 1/(1+exp(ep-mu))**(Ne-1)
        chain = mh(lambda s:linear_phat(s), proposal=mutate_site, x0=random_site(L),
                   capture_state = lambda s:linear_occ(pssm, mu, s))[25000:]
        return mean(chain)
    def apw_occ(code, mu, site):
        ep = score(code, site)
        return 1/(1+exp(ep-mu))
    def linear_occ(pssm, mu, site):
        ep = score_seq(pssm, site)
        return 1/(1+exp(ep-mu))
    sigmas = np.linspace(0,5,5)
    mus = np.linspace(-10,10,5)
    Nes = np.linspace(0,5,5)
    apws = [apw_fit(sigma, mu, Ne) for sigma in tqdm(sigmas) for mu in mus for Ne in Nes]
    linears = [linear_fit(sigma, mu, Ne) for sigma in tqdm(sigmas) for mu in mus for Ne in Nes]
Example #4
0
def find_beta_for_mean_col_ic(n, desired_ic_per_col,tolerance=10**-10,verbose=False, A=4,
                              countses=None, entropies=None, log_cols=None):
    """find beta such that entropy*exp(-beta*entropy)/Z = des_ent"""
    if countses is None:
        if verbose:
            print "enumerating countses"
        countses = enumerate_counts(n, A, verbose=verbose)
    if entropies is None:
        if verbose:
            print "enumerating entropies"
            entropies = np.array(map(entropy_from_counts, tqdm(countses)))
        else:
            entropies = np.array(map(entropy_from_counts, countses))
    #cols = np.array(map(countses_to_cols, countses))
    if log_cols is None:
        if verbose:
            print "enumerating cols"
            #cols = np.exp(np.array(map(log_counts_to_cols, countses)))
        iterator = tqdm(countses) if verbose else countses
        log_cols = np.array([log_counts_to_cols(counts, A=A) for counts in iterator])
    def f2(beta):
        log_phats = np_log_normalize(log_cols + -beta*entropies)
        expected_entropy = np.exp(log_phats).dot(entropies)
        return log2(A) - expected_entropy - desired_ic_per_col
    lb = -1
    while f2(lb) > 0:
        lb *= 2
        if lb < -1000:
            print "Warning, failed to find lower bound on beta"
            raise Exception("Couldn't find beta'")
    ub = 1000
    while f2(ub) < 0:
        ub *= 2
        print "raising upper bound to:",ub
    return secant_interval(f2,lb,ub,verbose=verbose,tolerance=tolerance)
def sigma_Ne_contour_plot(filename=None):
    sigmas = np.linspace(0,5,20)
    Nes = np.linspace(1,20,20)
    L = 10
    n = 50
    copies = 10*n
    trials = 100
    motifss = [[[(sample_motif(sigma, Ne, L, copies, n))
               for i in range(trials)]
          for sigma in sigmas] for Ne in tqdm(Nes)]
    occ_M = [[expected_occupancy(sigma, Ne, L, copies)
          for sigma in sigmas] for Ne in tqdm(Nes)]
    print "ic_M"
    ic_M = mmap(lambda ms:mean(map(motif_ic,ms)),motifss)
    print "gini_M"
    gini_M = mmap(lambda ms:mean(map(motif_gini,ms)),motifss)
    print "mi_M"
    mi_M = mmap(lambda ms:mean(map(total_motif_mi,ms)),tqdm(motifss))
    plt.subplot(2,2,1)
    plt.contourf(sigmas,Nes,occ_M,cmap='jet')
    plt.colorbar()
    plt.subplot(2,2,2)
    plt.contourf(sigmas,Nes,ic_M,cmap='jet')
    plt.colorbar()
    plt.subplot(2,2,3)
    plt.contourf(sigmas,Nes,gini_M,cmap='jet')
    plt.colorbar()
    plt.subplot(2,2,4)
    plt.contourf(sigmas,Nes,mi_M,cmap='jet')
    plt.colorbar()
    maybesave(filename)
    def make_api_call(crest_url_list):

        @RateLimited(150)
        def get_data(session, url):
            try:
                response = session.get(url.full_url)
                return response

            except requests.ConnectionError:
                print "Connection Aborted - BadStatusLine"

        session = FuturesSession(max_workers=10)

        for url in tqdm(crest_url_list, desc="Downloading", leave=False):
            # futures.append(get_data(session, url))
            futures.append(SellOrder(get_data(session, url), url.region))

        for request in tqdm(futures, desc="Completing Requests", leave=False):
            # res.append(request.result())
            res.append(SellOrder(request.data.result(), request.region))

        for x in res:
            # sell_orders_list.append(json.loads(x.content))
            sell_orders_list.append(SellOrder(json.loads(x.data.content), x.region))

        return sell_orders_list
def validate_sample_motif_neglect_fg2(iterations=50000):
    """compare fg_neglect sampling to MCMC"""
    bio_motif = Escherichia_coli.LexA
    n = len(bio_motif)
    L = len(bio_motif[0])
    matrix = [[-ep for ep in row] for row in make_pssm(bio_motif)]
    ringer = ringer_motif(matrix,n)
    Ne = 2.375 
    random_motifs = [sample_motif_neglect_fg(matrix,n,Ne) for i in trange(iterations)]
    random_rhos = [motif_hamming_distance(ringer,motif) for motif in tqdm(random_motifs)]
    random_log_fs = [log_fitness(matrix,motif,G) for motif in tqdm(random_motifs)]
    random_ics = map(motif_ic,random_motifs)
    _, chain = sella_hirsch_mh(matrix=matrix,init="ringer",Ne=Ne,n=n,iterations=iterations)
    chain_rhos = [motif_hamming_distance(ringer,motif) for motif in tqdm(chain)]
    chain_log_fs = [log_fitness(matrix,motif,G) for motif in tqdm(chain)]
    chain_ics = map(motif_ic,chain)
    plt.subplot(1,2,1)
    plt.scatter(random_rhos,random_log_fs)
    plt.scatter(chain_rhos,chain_log_fs,color='g')
    plt.xlabel("rho")
    plt.ylabel("log fitness")
    plt.subplot(1,2,2)
    plt.scatter(random_rhos,random_ics)
    plt.scatter(chain_rhos,chain_ics,color='g')
    plt.xlabel("rho")
    plt.ylabel("IC")
Example #8
0
def memeExt(id_list,file_name):
    dir_PATH = 'D:\\KimSS-NAS\\LFG\\Works\\2016.04 Yeast HD LD Rho0\\(Archive) FIMO analysis\\'
    PATH = dir_PATH+file_name
    
    # 1. Read file
    meme_f = open(PATH,'r')
    meme_fc = meme_f.readlines()
    meme_f.close()
    print('1. Read file done: %d lines' % (len(meme_fc)))
    
    # 2. Search motif information and their index
    motifs = [s for s in meme_fc if 'MOTIF'.lower() in s.lower()]
    motifs_id = [i for i, s in enumerate(meme_fc) if 'MOTIF'.lower() in s.lower()]
    print('2. Search motifs done: %d motifs\n' % (len(motifs_id)))
    
    # 3. Search id_list in motif list
    my_motifs_id = []
    for sch in tqdm(id_list):
        my_id = [i for i, s in enumerate(motifs) if sch.lower() in s.lower()]
        my_motifs_id.append(my_id)
    print('3. Extract my motifs done: %d motifs\n'% len(my_motifs_id))
    
    result = []
    for my in tqdm(my_motifs_id):
        if len(my)>0:
            my_motif = meme_fc[motifs_id[my[0]]:
                motifs_id[my[0]+1]-1]
            my_motif_str = ''.join(my_motif)
            result.append(my_motif_str)
    print('4. Summary my motifs done.%d motifs\n'% len(result))

    return(result)
Example #9
0
def test(test_indexes, BG_img, params):
    folder = params["folder"]
    marginX = params["marginX"]
    marginY = params["marginY"]
    neg_weight = params["neg_weight"]
    method = params["method"]
    feature = params["feature"]

    test_features, test_labels = [], []
    test_feature_count = 0

    print "Extracting positive test features..."

    # Read positive test examples
    for i in tqdm(range(len(test_indexes))):
        img = img_read(folder, test_indexes[i])
        #motion_img = read_motion_image(folder, test_indexes[i], BG_img)

        height, width = img.shape
        bboxes = add_bbox_margin(read_bboxes(folder, test_indexes[i]), marginX, marginY, height, width)

        for j in bboxes:
            img_cut = img[j[0]:j[1], j[2]:j[3]]
            #motion_img_cut = motion_img[j[0]:j[1], j[2]:j[3]]
            test_feature_count += 1
            #test_features.append(extract(img_cut, motion_img_cut, method, feature))
            test_features.append(extract(img_cut, None, method, feature))
            test_labels.append(1)

    pos_test_feature_count = test_feature_count

    print "Positive test features are extracted."
    print "Extracting negative test features..."

    # Read negative test examples
    for j in tqdm(range(pos_test_feature_count*neg_weight)):
        i = sample(test_indexes, 1)[0]

        img = img_read(folder, i)

        height, width = img.shape
        bboxes = add_bbox_margin(read_bboxes(folder, i), marginX, marginY, height, width)

        neg_bb = rand_bbox(bboxes, height, width);

        if overlaps(neg_bb, bboxes) != -1:
            continue

        #motion_img = read_motion_image(folder, i, BG_img)

        img_cut = img[neg_bb[0]:neg_bb[1], neg_bb[2]:neg_bb[3]]
        #motion_img_cut = motion_img[neg_bb[0]:neg_bb[1], neg_bb[2]:neg_bb[3]]
        test_feature_count += 1
        #test_features.append(extract(img_cut, motion_img_cut, method, feature))
        test_features.append(extract(img_cut, None, method, feature))
        test_labels.append(-1)
    
    print "Negative test features are extracted."

    return test_features, test_labels, test_feature_count
def on_off_experiment2(num_motifs=100,filename="gini-vs-mi-correlation-in-on-off-spoofs.pdf"):
    """compare MI vs Gini on biological_motifs"""
    bio_motifs = [getattr(tfdf,tf) for tf in tfdf.tfs]
    Ns = map(len, bio_motifs)
    spoofses = [spoof_on_off_motif(motif,num_motifs=num_motifs,trials=1) for motif in bio_motifs]
    spoof_ginises = mmap(motif_gini,tqdm(spoofses))
    spoof_mises = mmap(total_motif_mi,tqdm(spoofses))
    cors, ps = [],[]
    for ginis, mis in zip(ginises, mises):
        cor, p = pearsonr(ginis,mis)
        cors.append(cor)
        ps.append(p)
    q = fdr(ps)
    
    plt.scatter(cors,ps,filename="gini-vs-mi-correlation-in-on-off-spoofs.pdf")
    plt.plot([-1,1],[q,q],linestyle='--',label="FDR-Adjusted Significance Level")
    plt.semilogy()
    plt.legend()
    plt.xlabel("Pearson Correlation Coefficient")
    plt.ylabel("P value")
    plt.xlim([-1,1])
    plt.ylim([10**-4,1+1])
    cor_ps = zip(cors,ps)
    sig_negs = [(c,p) for (c,p) in cor_ps if c < 0 and p < q]
    sig_poses = [(c,p) for (c,p) in cor_ps if c > 0 and p < q]
    insigs = [(c,p) for (c,p) in cor_ps if p > q]
    def weighted_correlation(cor_p_Ns):
        cors,ps,Ns = transpose(cor_p_Ns)
        return sum([cor*N for (cor,N) in zip (cors,Ns)])/sum(Ns)
    plt.title("Gini-MI Correlation Coefficient vs. P-value for On-Off Simulations from Prokaryotic Motifs")
    maybesave(filename)
Example #11
0
def simple_eval(opts):
    """
    Simple evaluation of the program, can and will be merged with the other evaluation in future.
    """

    if not check_conf([opts.simple], opts):
        return

    values = list()
    print("Running simple evaluation.\tArguments: {0}\tRepeats: {1}.".format(1, opts.repeats))
    for x in tqdm(range(opts.repeats)):
        values.append(get_simulation([opts.simple]))

    print("Extracting values..")
    for i in tqdm(range(len(values))):
        values[i] = extract(values[i], opts)

    mean = get_mean(values)
    print("Mean: {0}".format(mean))
    median = get_median(values)
    print("Median: {0}".format(median))

    if opts.graph:
        print("Generating graph..")
        graph([values], mean, median)

    sys.exit(0)
Example #12
0
def parseAffiliationsToCSV(aff_filename):

	global AFF_OUT

	aff_file = open(aff_filename, 'r')
	dblp_aff = open(AFF_OUT,"w")
	aff_wr = csv.writer(dblp_aff, quoting=csv.QUOTE_ALL)

	aff_set = Set()

	for rline in tqdm(aff_file):
		line = rline.decode("utf-8").rstrip().replace("\n","")

		if line.startswith("#index"):
			continue
		if line.startswith("#n"):
			continue
		if line.startswith("#a"):
			affs = line.lstrip("#a").split(";")
			for a in affs:
				aff_set.add(a)

		if line.startswith("#t"):
			continue
		if line == "":
			continue

	AFF_INDEX = 0

	for aff in tqdm(aff_set):
		aff_wr.writerow([AFF_INDEX,aff])
		AFF_INDEX += 1

	dblp_aff.close()
	aff_file.close()
def experiment3(trials=10):
    mu = -10
    Ne = 5
    L = 10
    sigma = 1
    codes = [sample_code(L, sigma) for i in range(trials)]
    pssms = [sample_matrix(L, sigma) for i in range(trials)]
    sites = [random_site(L) for i in xrange(10000)]
    apw_site_sigmas = [sd([score(code,site) for site in sites]) for code in codes]
    linear_site_sigmas = [sd([score_seq(pssm,site) for site in sites]) for pssm in pssms]
    def apw_phat(code, site):
        ep = score(code, site)
        return 1/(1+exp(ep-mu))**(Ne-1)
    def apw_occ(code, site):
        ep = score(code, site)
        return 1/(1+exp(ep-mu))
    def linear_phat(pssm, site):
        ep = score_seq(pssm, site)
        return 1/(1+exp(ep-mu))**(Ne-1)
    def linear_occ(pssm, site):
        ep = score_seq(pssm, site)
        return 1/(1+exp(ep-mu))
    apw_mean_fits = [exp(mean(map(log10, mh(lambda s:apw_phat(code, s), proposal=mutate_site, x0=random_site(L),
                                          capture_state = lambda s:apw_occ(code, s))[1:])))
                         for code in tqdm(codes)]
    linear_mean_fits = [exp(mean(map(log10, mh(lambda s:linear_phat(pssm, s), proposal=mutate_site, x0=random_site(L),
                                             capture_state = lambda s:linear_occ(pssm, s))[1:])))
                        for pssm in tqdm(pssms)]
    plt.scatter(apw_site_sigmas, apw_mean_fits, label='apw')
    plt.scatter(linear_site_sigmas, linear_mean_fits, color='g',label='linear')
    plt.semilogy()
    plt.legend(loc='lower right')
def get_image_feat(feat_type, image_folder, orig_split, indices, real_split):

    feats = defaultdict(int)
    prefix = 'abstract_v002_%s2015_' % (orig_split)
    if 'fc7' in feat_type:
        # set some parameters
        folder = os.path.join(image_folder, 'scene_img', 'img_%s2015' % (orig_split)) + '/'
        print "Preparing the VGG 19 Net"
        net = demo.build_convnet()
        print "Extracting Features"
        with open('temp_{}.txt'.format(orig_split), 'w') as image_file:
            for item in tqdm(indices):
                image_file.write(imname(prefix, item) + '\n')
        image_file.close()
        feats = demo.compute_fromfile(net, 'temp_{}.txt'.format(orig_split),
                                      base_path=folder)
    elif 'hdf5' in feat_type:
        try:
            folder = os.path.join(image_folder, 'scene_img', 'img_%s2015' % (orig_split)) + '/'
            images = np.zeros((len(indices), 3, 224, 224)) # TODO: Low Priority, make general
            for index, item in tqdm(enumerate(indices)):
                images[index] = demo.load_abstract_image(folder + imname(prefix, item))
            with h5py.File('/ssd_local/rama/datasets/abstract-hdf5/{}.h5'.format(real_split), 'w') as outfile:
                outfile['images'] = images
            return True
        except:
            print "problem"
            return False
    else:
        folder = os.path.join(image_folder, 'scene_json', 'scene_%s2015_indv' % (orig_split))

        # create the abstract feature instance
        AF = pickle.load(open('extract_features/af_dump.p', 'r'))
        # TODO: Figure out a better place to initialize all this
        out_dir = '/srv/share/vqa/release_data/abstract_v002/scene_json/features_v2/'
        keep_or_remove = 'keep'
        get_names = False
        tags = feat_type
        # path to metafeature directory
        metafeat_dir = af.dir_path(os.path.join(out_dir, 'metafeatures'))

        for item in tqdm(indices):
            metafeat_fn = '{}_instances-{}.cpickle'.format(item,
                                                        AF.instance_ordering)

            cur_metafeat_fn = os.path.join(metafeat_dir,
                                        metafeat_fn)

            with open(cur_metafeat_fn, 'rb') as fp:
                cur_metafeats = pickle.load(fp)

            cur_feats, _ = AF.scene_metafeatures_to_features(cur_metafeats,
                                                            tags,
                                                            keep_or_remove,
                                                            get_names)

            feats[item] = cur_feats

    return feats
def experiment1():
    """Does downsampling preserve percentile statistics?"""
    motif = (prok_motifs[11])
    downsamples = [sample(int(len(motif)/10), motif,replace=False) for i in range(100)]
    maxent_spoofs = spoof_maxent_motifs(motif, 1000, verbose=True)
    down_spoofs = [spoof_maxent_motifs(dm, 100) for dm in tqdm(downsamples)]
    true_mi, spoof_mis = motif_mi(motif), map(motif_mi, tqdm(maxent_spoofs))
    down_mis, down_spoof_mis = map(motif_mi, downsamples), [map(motif_mi, spoofs) for spoofs in tqdm(down_spoofs)]
    true_percentile = percentile(true_mi, spoof_mis)
    down_percentiles = [percentile(down_mi, ds_mis) for (down_mi, ds_mis) in zip (down_mis, down_spoof_mis)]
Example #16
0
def download_github_files(token, org, repo, branch, to_path, is_dogweb=False):
    """
    Using the github api downloads manifest files to a temporary location for processing

    :param token: string of github token
    :param org: string of organization
    :param repo: string of git repository
    :param branch: string of branchname
    :param to_path: where to extract
    :param is_dogweb: if dogweb repo we need to get nested data
    """
    directory = 'integration' if is_dogweb else ''
    url = 'https://api.github.com/repos/{0}/{1}/contents/{3}?ref={2}'.format(org, repo, branch, directory)
    headers = {'Authorization': 'token {}'.format(token)} if token else {}
    excludes = ['LICENSE', 'Rakefile', 'Gemfile']
    print('Downloading files from {}/{}..'.format(repo, branch))
    response = requests.get(url, headers=headers)
    
    """
    Downloading manifest.json for integrations core repo only
    """
    if response.status_code == requests.codes.ok:
        if not is_dogweb:
            for obj in tqdm(response.json()):
                name = obj.get('name', '')
                if not name.startswith('.') and not splitext(name)[1] and name not in excludes:
                    to_manifest = '{}/manifest.json'.format(name)
                    response_manifest = requests.get('https://raw.githubusercontent.com/{0}/{1}/{2}/{3}'.format(org, repo, branch, to_manifest), headers=headers)
                    if response_manifest.status_code == requests.codes.ok:
                        with open('{}{}_manifest.json'.format(to_path, name), mode='wb+') as f:
                            f.write(response_manifest.content)
    else:
        print('There was an error ({}) listing {}/{} contents..'.format(response.status_code, repo, branch))
        exit(1)

    """
    Downloading readme.md for integrations core repo only
    """
    if response.status_code == requests.codes.ok:
        if not is_dogweb:
            for obj in tqdm(response.json()):
                name = obj.get('name', '')
                if not name.startswith('.') and not splitext(name)[1] and name not in excludes:
                    to_manifest = '{}/README.md'.format(name)
                    response_manifest = requests.get('https://raw.githubusercontent.com/{0}/{1}/{2}/{3}'.format(org, repo, branch, to_manifest), headers=headers)
                    if response_manifest.status_code == requests.codes.ok:
                        with open('{}{}_readme.md'.format(to_path, name), mode='wb+') as f:
                            f.write(response_manifest.content)
    else:
        print('There was an error ({}) listing {}/{} contents..'.format(response.status_code, repo, branch))
        exit(1)
def visualize_stationary_sum(matrix,n,Ne,T,samples_per_bin=100):
    L = len(matrix)
    nu = Ne - 1
    ringer = ringer_motif(matrix,n)
    motifss = [[mutate_motif_k_times(ringer,k) for i in range(samples_per_bin)] for k in trange(n*L)]
    log_fss = mmap(lambda motif:log_fitness(matrix,motif,G),tqdm(motifss))
    Tss = mmap(T,tqdm(motifss))
    log_ws = [log_rho_weight(rho,n,L) for rho in range(n*L)]
    terms = [mean(exp(nu*log_f + log_w)*T for log_f,T in zip(log_fs,Ts))
             for log_w,log_fs,Ts in zip(log_ws,log_fss,Tss)]
    Z = sum([mean(exp(nu*log_f + log_w) for log_f,T in zip(log_fs,Ts))
             for log_w,log_fs,Ts in zip(log_ws,log_fss,Tss)])
    print sum(terms)/Z
    plt.plot(range(n*L),terms)
Example #18
0
    def get_data(self, data_type, color_images=True, mat_label_images=True, obj_label_images=True, calibrations=False, depth=False):
        file_list = []
        for t in data_type:
            list_type = t + '_images'
            if list_type in self.config:
                file_list += self.config[list_type]
            else:
                raise Exception('The config does not contain a list for the entry: \'{0}_images\' \nConfig file located at: {1}'.format(t, self.config_filename))

        return_list = []
        if color_images:
            images = []
            for fn in tqdm(file_list):
                i_n = os.path.join(self.image_folder, fn+self.image_extension)
                images.append(self.load_color(i_n))
            return_list.append(images)

        if mat_label_images:
            mat_labels = []
            for fn in tqdm(file_list):
                mat_l_n = os.path.join(self.mat_label_folder, fn+self.mat_label_extension)
                mat_labels.append(self.load_labels(mat_l_n, 'mat'))
            return_list.append(mat_labels)

        if obj_label_images:
            obj_labels = []
            for fn in tqdm(file_list):
                obj_l_n = os.path.join(self.obj_label_folder, fn+self.obj_label_extension)
                obj_labels.append(self.load_labels(obj_l_n, 'obj'))
            return_list.append(obj_labels)

        if calibrations:
            calibration_data = []
            for fn in tqdm(file_list):
                c_n = os.path.join(self.calibration_folder, fn+self.calibration_extension)
                calibration_data.append(self.load_calibration(c_n))
            return_list.append(calibration_data)

        if depth:
            depth_data = []
            for fn in tqdm(file_list):
                d_n = os.path.join(self.depth_folder, fn+self.depth_extension)
                depth_data.append(self.load_depth(d_n))
            return_list.append(depth_data)

        if len(return_list) == 1:
            return return_list[0]
        else:
            return return_list
def analyze_collection(prok_motifs, euk_motifs):
    prok_correlated_pairses = map(analyze_motif,tqdm(prok_motifs,desc='motifs'))
    with open("prok_correlated_pairses.pkl",'w') as f:
        cPickle.dump(f,prok_correlated_pairses)
    euk_correlated_pairses = map(analyze_motif,tqdm(euk_motifs,desc='motifs'))
    with open("euk_correlated_pairses.pkl",'w') as f:
        cPickle.dump(f,euk_correlated_pairses)
    prok_corrs = np.array(map(len,prok_correlated_pairses))
    euk_corrs = np.array(map(len,euk_correlated_pairses))
    prok_depths = np.array([len(motif) for motif in prok_motifs])
    euk_depths = np.array([len(motif) for motif in euk_motifs])
    prok_lens = np.array([len(motif[0]) for motif in prok_motifs])
    euk_lens = np.array([len(motif[0]) for motif in euk_motifs])
    prok_lc2s = np.array([choose(L,2) for L in prok_lens])
    euk_lc2s = np.array([choose(L,2) for L in euk_lens])
 def download_from_repo(self, org, repo, branch, globs):
     """
     Takes github info and file globs and downloads files from github using multiple processes
     :param org: github organization or person
     :param repo: github repo name
     :param branch: the branch name
     :param globs: list of strings in glob format of what to extract
     :return:
     """
     with GitHub(self.options.token) as gh:
         listing = gh.list(org, repo, branch, globs)
         dest = "{0}{1}{2}".format(
             self.extract_dir, repo, sep
         )
         with Pool(processes=self.pool_size) as pool:
             with requests.Session() as s:
                 r = [
                     x
                     for x in tqdm(
                         pool.imap_unordered(
                             partial(
                                 gh.raw,
                                 request_session=s,
                                 org=org,
                                 repo=repo,
                                 branch=branch,
                                 dest_dir=dest,
                             ),
                             listing,
                         )
                     )
                 ]
def process_glove(args, vocab_list, save_path, size=4e5, random_init=True):
    """
    :param vocab_list: [vocab]
    :return:
    """
    if not gfile.Exists(save_path + ".npz"):
        glove_path = os.path.join(args.glove_dir, "glove.6B.{}d.txt".format(args.glove_dim))
        if random_init:
            glove = np.random.randn(len(vocab_list), args.glove_dim)
        else:
            glove = np.zeros((len(vocab_list), args.glove_dim))
        found = 0
        with open(glove_path, 'r') as fh:
            for line in tqdm(fh, total=size):
                array = line.lstrip().rstrip().split(" ")
                word = array[0]
                vector = list(map(float, array[1:]))
                if word in vocab_list:
                    idx = vocab_list.index(word)
                    glove[idx, :] = vector
                    found += 1
                if word.capitalize() in vocab_list:
                    idx = vocab_list.index(word.capitalize())
                    glove[idx, :] = vector
                    found += 1
                if word.upper() in vocab_list:
                    idx = vocab_list.index(word.upper())
                    glove[idx, :] = vector
                    found += 1

        print("{}/{} of word vocab have corresponding vectors in {}".format(found, len(vocab_list), glove_path))
        np.savez_compressed(save_path, glove=glove)
        print("saved trimmed glove matrix at: {}".format(save_path))
def estremo_lite_vs_maxent_motifs():
    n = 20
    L = 10
    Ns = np.linspace(10,10000,100)
    pss = [sella_hirsch_predictions(n=n,L=L,G=1000,N=N) for N in tqdm(Ns)]
    ics = np.array([mean_ic_from_eps(eps,n,L) for eps in enumerate_eps(N,L)])
    expected_ics = [ics.dot(ps) for ps in pss]
Example #23
0
    def run(self, progress=True, verbose=False):
        """Compute all steps of the simulation. Be careful: if tmax is not set,
        this function will result in an infinit loop.

        Returns
        -------

        (t, fields):
            last time and result fields.
        """
        total_iter = int((self.tmax // self.user_dt) if self.tmax else None)
        log = logging.info if verbose else logging.debug
        if progress:
            with tqdm(initial=(self.i if self.i < total_iter else total_iter),
                      total=total_iter) as pbar:
                for t, fields in self:
                    pbar.update(1)
                    log("%s running: t: %g" % (self.id, t))
                try:
                    return t, fields
                except UnboundLocalError:
                    warnings.warn("Simulation already ended")
        for t, fields in self:
            log("%s running: t: %g" % (self.id, t))
        try:
            return t, fields
        except UnboundLocalError:
            warnings.warn("Simulation already ended")
Example #24
0
def ic_log_pvalue(N, L, des_ic, verbose=False, trials=100, method="ub"):
    print des_ic
    correction_per_col = 3/(2*log(2)*N)
    K = L * correction_per_col # correction per motif
    ic_for_beta = des_ic + K
    tolerance = 10**-10
    beta = find_beta_for_mean_motif_ic(N,L,ic_for_beta,tolerance=tolerance,verbose=verbose) # correct val of beta
    countses = enumerate_counts(N)
    entropies = np.array(map(entropy_from_counts, countses))
    iterator = tqdm(countses) if verbose else countses
    log_cols = np.array(map(log_counts_to_cols, iterator))
    log_Zq = log_sum(log_cols + -beta*entropies)*L
    log_Zp = N*L*log(4)
    #log_prefactor = log_Zq - log_Zp + beta*2*L
    log_prefactor = log_Zq - log_Zp + beta*(2*L-K)
    if method == "UB":
        log_expectation_ub = (-beta*(des_ic))
        log_pval_ub = log_prefactor + log_expectation_ub
        return log_pval_ub - log(2)
    elif method == "analytic":
        mu, sigma = calc_params(N, L, beta)
        log_expectation = log(compute_expectation_spec(beta, mu, sigma))
        log_pval = log_prefactor + log_expectation
        return log_pval
    else:
        ms = maxent_motifs(N, L, des_ic, trials, beta=beta)
        ics = map(motif_ic, ms)
        print "des_ic, mean ics:", des_ic, mean(ics)
        log_expectation = log_sum([-beta*ic for ic in ics if ic > des_ic]) - log(trials) # Xxx loss of precision
        log_pval = log_prefactor + log_expectation
        return log_pval
Example #25
0
    def plot_DO(self, dos, target='stream', name='T0076', ymax=30000):
        '''
        Plot target by do.

        target has to be either stream/mail
        '''

        if type(dos) == str:
            dos = [dos]

        fig = plt.figure(figsize=(20, 30))
        colors = sns.color_palette('deep', len(dos))
        for j, year in tqdm(enumerate(['2014', '2015', '2016']), total=3):
            ax = plt.subplot(3, 1, j + 1)
            for i, do in enumerate(dos):
                tmp = self.get_DO(do, target=target, name=name)
                try:
                    tmp[year].cnt.plot(ax=ax,
                                       label=do,
                                       lw=2,
                                       color=colors[i], marker='o')

                except:
                    pass
            ax.legend()

            ax.legend(fontsize=18)
            ax.tick_params(axis='both', which='major', labelsize=30)
Example #26
0
def update_integration_pre_build(from_path=None, to_path=None):
    """
    All modifications that may happen to a integration content are here
    
    :param from_path:   the input path where we scrap data from
    :param to_path:     the output path to integration md files
    """
    
    if exists(from_path):
        pattern = '**/*_manifest.json'
        for file_name in tqdm(sorted(glob.glob('{}{}'.format(from_path, pattern), recursive=True))):
            key_name = basename(file_name.replace('_manifest.json', ''))
            
            """
            Scraping all sections that we can found
            """
            data_array = readme_get_section(from_path, key_name)

            """
            Gathering the manifest short description and adding the right token
            """

            data_array.append([DESC_TOKEN,manifest_get_data(from_path,key_name,DESC_ATTRIBUTE)])

            """
            Inlining the data in the doc file
            """
            file_update_content(to_path, key_name, data_array)
    else:
        print('Path does not exist: {}'.format(from_path))
        exit(1)
Example #27
0
 def sync(self):
     """sync up doi and materials collections (needed after doicoll reset)"""
     existing_mp_ids = self.ad.matcoll.find(
         {'doi': {'$exists': True}}, {'_id': 0, 'task_id': 1}
     ).distinct('task_id')
     if existing_mp_ids:
         num_bibtex_errors = 0
         docs = self.ad.doicoll.find(
             {'_id': {'$in': existing_mp_ids}}, {'doi': 1, 'bibtex': 1}
         ).limit(0 if self.show_pbar else 5)
         ndocs = docs.count()
         if self.show_pbar:
             pbar = tqdm(total=ndocs)
         for doc in docs:
             if num_bibtex_errors > 2:
                 logger.error('abort bibtex generation (too many request errors)')
                 return None
             doc['bibtex'] = self.save_bibtex_item(doc)
             if not doc['bibtex']:
                 num_bibtex_errors += 1
                 continue
             self.build_item(doc)
             if self.show_pbar:
                 pbar.update()
         if self.show_pbar:
             pbar.close()
             logger.info('{} materials synced'.format(ndocs))
     else:
         logger.info('no materials with DOIs exist')
Example #28
0
def build_directory(dirnm, force=False):
    # print 'Loading Variable to Summary Table Mapping'
    if force or not os.path.isfile('directory.json'):
        directory = {}
        for fname in tqdm(os.listdir(dirnm)):
            typ = fname[0:1]
            st_nm = fname[1:3]
            seq_nm = fname[4:8]
            with open(dirnm + fname, 'rb') as f:
                read_flg = False
                for ln in f:
                    if ln.strip() not in ('', ';', 'RUN;') and read_flg:
                        varnm = re.split('\s*', ln.strip())[0]
                        try:
                            directory[varnm].append([typ, st_nm, seq_nm])
                        except KeyError:
                            directory[varnm] = [[typ, st_nm, seq_nm]]
                    read_flg = True if 'INPUT' in ln else read_flg
        for key in ['FILEID', 'FILETYPE', 'STUSAB', 'CHARITER', 'SEQUENCE', 'LOGRECNO']:
            directory.pop(key)
        with open('directory.json', 'wb') as f:
            json.dump(directory, f)
    with open('directory.json', 'rb') as f:
        directory = json.load(f)
    return directory
Example #29
0
 def build(self, mpids=None):
     """build DOIs into matcoll"""
     # get mp-id's
     #     - w/ valid doi & bibtex keys in doicoll
     #     - but w/o doi & doi_bibtex keys in matcoll
     query = {'doi': {'$exists': True}, 'bibtex': {'$exists': True}}
     if mpids is not None:
         query['_id'] = {'$in': mpids}
     valid_mp_ids = self.ad.doicoll.find(query).distinct('_id')
     if valid_mp_ids:
         missing_mp_ids = self.ad.matcoll.find(
             {
                 'task_id': {'$in': valid_mp_ids},
                 'doi': {'$exists': False}, 'doi_bibtex': {'$exists': False}
             },
             {'_id': 0, 'task_id': 1}
         ).distinct('task_id')
         items = self.ad.doicoll.find(
             {'_id': {'$in': missing_mp_ids}}, {'doi': 1, 'bibtex': 1}
         ).sort('bibtexed_on', pymongo.ASCENDING)
         if self.show_pbar:
             pbar = tqdm(total=items.count())
         for item in items:
             self.build_item(item)
             if self.show_pbar:
                 pbar.update()
         if self.show_pbar:
             pbar.close()
             logger.info('all available DOIs built into matcoll')
     else:
       logger.info('no valid DOIs available for build')
Example #30
0
def read_bson(bson_path, num_records, with_categories):
    rows = {}
    with open(bson_path, "rb") as f, tqdm(total=num_records) as pbar:
        offset = 0
        while True:
            item_length_bytes = f.read(4)
            if len(item_length_bytes) == 0:
                break
            length = struct.unpack("<i", item_length_bytes)[0]
            f.seek(offset)
            item_data = f.read(length)
            assert len(item_data) == length
            item = bson.BSON.decode(item_data)
            product_id = item["_id"]
            num_imgs = len(item["imgs"])
            row = [num_imgs, offset, length]
            if with_categories:
                row += [item["category_id"]]
            rows[product_id] = row
            offset += length
            f.seek(offset)
            pbar.update()
    columns = ["num_imgs", "offset", "length"]
    if with_categories:
        columns += ["category_id"]
    df = pd.DataFrame.from_dict(rows, orient="index")
    df.index.name = "product_id"
    df.columns = columns
    df.sort_index(inplace=True)
    return df
Example #31
0
    #加载测试集数据
    # Get test data
    m = 10000
    X = extract_data('./data/t10k-images-idx3-ubyte.gz', m, 28)
    y_dash = extract_labels('./data/t10k-labels-idx1-ubyte.gz', m).reshape(m, 1)
    # Normalize the data
    X -= int(np.mean(X))  # subtract mean
    X /= int(np.std(X))  # divide by standard deviation
    test_data = np.hstack((X, y_dash))

    X = test_data[:, 0:-1]
    X = X.reshape(len(test_data), 1, 28, 28)
    y = test_data[:, -1]

    t = tqdm(range(len(X)), leave=True)

    #计算精准率和召回率
    corr = 0
    digit_count = [0 for i in range(10)]
    digit_correct = [0 for i in range(10)]

    for i in t:
        x = X[i]
        pred, prob = predict(x, f1, f2, w3, w4, b1, b2, b3, b4)
        digit_count[int(y[i])] += 1
        if pred == y[i]:
            corr += 1
            digit_correct[pred] += 1

        t.set_description("Acc:%0.2f%%" % (float(corr / (i + 1)) * 100))
Example #32
0
                    default='bolorspeech',
                    help='dataset name')
args = parser.parse_args()

if args.dataset == 'mbspeech':
    from datasets.mb_speech import MBSpeech
    dataset = MBSpeech()
elif args.dataset == 'librispeech':
    from datasets.libri_speech import LibriSpeech
    dataset = ConcatDataset([
        LibriSpeech(name='train-clean-100'),
        LibriSpeech(name='train-clean-360'),
        LibriSpeech(name='train-other-500'),
        LibriSpeech(name='dev-clean', )
    ])
else:
    from datasets.bolor_speech import BolorSpeech
    dataset = ConcatDataset([
        BolorSpeech(name='train'),
        BolorSpeech(name='test'),
        BolorSpeech(name='demo'),
        BolorSpeech(name='annotation')
    ])

transform = Compose([LoadAudio(), ComputeMagSpectrogram()])
for data in tqdm(dataset):
    fname = data['fname']
    data = transform(data)
    mel_spectrogram = data['input']
    np.save(fname.replace('.wav', '.npy'), mel_spectrogram)
Example #33
0
def download(url):
    with open('./sougou/{name}.scel'.format(name=j), 'wb') as f:
        f.write(requests.get(url).content)
    f.close()


if __name__ == '__main__':
    first_urls = get_first(key_word)
    second_urls = get_second(first_urls)
    third_urls = get_third(second_urls)
    print "开始下载......"
    j = 0
    counts = len(third_urls)

    #tqdm装饰在任意一个迭代器上,能显示当前迭代的进度
    for url in tqdm(third_urls):
        time.sleep(0.01)
        download(url)
        j += 1
        #print "下载{name}.scel完成,当前进度为{status} %".format(name = j,status = str(float(j)/counts *100))
    print "下载完毕,开始导入词典.....\n"
    commands.getoutput(
        "./extract-sougou-dict.py sougou/*.scel -o sougou-dict.txt -mmseg")

    print "\t1. 生成词库成功,合并新词典....."
    if not os.path.exists("/usr/local/mmseg3/etc/unigram.txt"):
        print "\t请检查文件 /usr/local/mmseg3/etc/unigram.txt 是否存在!"
        sys.exit()
    os.system("cp /usr/local/mmseg3/etc/unigram.txt ./")
    commands.getoutput(
        "./merge-mmseg-dict.py -a unigram.txt -b sougou-dict.txt -o merged.txt"
Example #34
0
		#s_to_file = open(api+'.api', "w", encoding="utf-8")
		#s_to_file.write(str(todos))
		#s_to_file.close()
	except:
		pass

# читаем excel-файл
wb = openpyxl.load_workbook("100bighh/firms.xlsx")

# печатаем список листов
sheets = wb.sheetnames
for sheet in sheets:
	print(sheet)

# получаем активный лист
sheet = wb.active
rows = sheet.max_row
print(rows)

for i in tqdm(range(2, rows + 1)):
	cell = sheet.cell(row=i, column=1)
	firm_no = str(cell.value)
	cell = sheet.cell(row=i, column=2)
	firm_name = str(cell.value)
	cell = sheet.cell(row=i, column=3)
	firm_id = str(cell.value)
	print(firm_no, firm_name, firm_id)
	apitofile(firm_id)


Example #35
0
def main():
    '''
    maine
    '''

    # Command Line Stuff...
    myCommandLine = CommandLine()
    bed = myCommandLine.args['input_bed']
    gtf = myCommandLine.args['gtf']
    otherJuncs = myCommandLine.args['junctionsBed']
    wiggle = myCommandLine.args['wiggleWindow']
    threads = myCommandLine.args['threads']
    outFile = myCommandLine.args['output_fname']
    cleanup = myCommandLine.args['keepTemp']
    resolveStrand = myCommandLine.args['correctStrand']

    # There are a few functions that evaluate what verbose is defined as.
    # Instead of passing it around, just global it.
    global verbose
    verbose = myCommandLine.args['quiet']

    global progress
    progress = myCommandLine.args['progress']
    # Convert gtf to bed and split by cromosome.
    juncs, chromosomes = gtfToSSBed(gtf)

    # Do the same for the other juncs file.
    if otherJuncs != None:
        juncs, chromosomes = addOtherJuncs(juncs, otherJuncs, chromosomes)

    annotations = dict()
    for chrom, data in tqdm(
            juncs.items(),
            desc=
            "Step 3/5: Preparing annotated junctions to use for correction",
            total=len(list(juncs.keys())),
            dynamic_ncols=True,
            position=1) if progress else juncs.items():
        annotations[chrom] = "%s_known_juncs.bed" % chrom
        with open("%s_known_juncs.bed" % chrom, "w") as out:
            for k, v in data.items():
                annotation = v
                c1, c2, strand = k
                print(chrom,
                      c1,
                      c2,
                      annotation,
                      ".",
                      strand,
                      sep="\t",
                      file=out)

    skippedChroms = set()
    readDict = dict()
    with open(bed) as lines:
        outDict = dict()
        for line in tqdm(lines,
                         desc="Step 4/5: Preparing reads for correction",
                         dynamic_ncols=True,
                         position=1) if progress else lines:
            cols = line.rstrip().split()
            chrom = cols[0]
            if chrom not in chromosomes:
                if chrom not in skippedChroms:
                    skippedChroms.add(chrom)
                    if verbose:
                        tqdm.write(
                            "Reference sequence not found in annotations, skipping: %s"
                            % (chrom),
                            file=sys.stderr)
                    continue
            else:
                if chrom not in outDict:
                    readDict[chrom] = "%s_temp_reads.bed" % chrom
                    outDict[chrom] = open("%s_temp_reads.bed" % chrom, 'w')
                print(line.rstrip(), file=outDict[chrom])

    cmds = list()
    for chrom in readDict:
        juncs = annotations[chrom]
        reads = readDict[chrom]

        outDict[chrom].close()

        cmds.append((chrom, juncs, reads, resolveStrand))

    p = Pool(threads)
    for i in tqdm(p.imap(runCMD, cmds),
                  total=len(cmds),
                  desc="Step 5/5: Correcting Splice Sites",
                  dynamic_ncols=True,
                  position=1) if progress else p.imap(runCMD, cmds):
        pass

    with open("%s_all_inconsistent.bed" % outFile, 'wb') as inconsistent:
        for chrom in readDict:
            with open("%s_inconsistent.bed" % chrom, 'rb') as fd:
                shutil.copyfileobj(fd, inconsistent, 1024 * 1024 * 10)
            if cleanup:
                os.remove(annotations[chrom])
                os.remove(readDict[chrom])
                os.remove("%s_inconsistent.bed" % chrom)

    with open("%s_all_corrected.bed" % outFile, 'wb') as corrected:
        for chrom in readDict:
            with open("%s_corrected.bed" % chrom, 'rb') as fd:
                shutil.copyfileobj(fd, corrected, 1024 * 1024 * 10)
            if cleanup:
                os.remove("%s_corrected.bed" % chrom)

    print("\n")
Example #36
0
                        default='v1.0-simplified-train.jsonl.gz')
    parser.add_argument('--devfile', default='v1.0-simplified-dev.jsonl.gz')
    parser.add_argument('--passagefile', default='all_passages.jsonl')
    parser.add_argument('--queries_trainfile', default='train_queries.json')
    parser.add_argument('--answers_trainfile', default='train_answers.json')
    parser.add_argument('--queries_devfile', default='dev_queries.json')
    parser.add_argument('--answers_devfile', default='dev_answers.json')
    parser.add_argument('--qrelsfile', default='all_qrels.txt')

    args = parser.parse_args()

    traindata = []

    with gzip.open(args.trainfile, 'rb') as fp:
        for i, line in enumerate(
                tqdm(fp, total=307373, desc='Reading trainset')):
            item = json.loads(line.strip())
            eid = item['example_id']
            doc = item.pop('document_text')
            item.pop('long_answer_candidates')
            pids = []
            paras = []
            for ans in item.pop('annotations'):
                lans = ans['long_answer']
                pid = lans['candidate_index']
                st = lans['start_token']
                et = lans['end_token']
                if pid not in pids:
                    pids.append(pid)
                    para = doc.split(" ")[st:et]
                    paras.append(para)
Example #37
0
    def predict(self, X, y, refit=False, n_iter=None, alpha=None, L=None):
        if isinstance(X, pd.DataFrame):
            self.pdflag = True
            self.pd_index = X.index.values
            self.pd_columns = X.columns
            self.pd_yname = y.name
            X = X.values
            y = y.values
        if alpha is None:
            alpha = self.alpha
        if L is None:
            L = self.L
        if n_iter is None:
            n_iter = self.L

        X_imput = X.copy()
        init_idx = 0
        if not np.allclose(self.X, X, equal_nan=True):
            init_idx, _ = self.update_x_y(X, y, refit=refit)

        self.nan_matrix = np.zeros(self.X.shape, dtype='bool')
        for i in range(self.nan_matrix.shape[0]):
            for j in range(self.nan_matrix.shape[1]):
                if np.isnan(self.X[i, j]):
                    self.nan_matrix[i, j] = True
        X_di = self.X_di
        for n in range(self.n_iter):
            print('ITERATION # ', n)
            update_count = 0
            for i in tqdm(range(init_idx, X_di.shape[0])):
                ncolumns = np.random.permutation(
                    X_di.shape[1])  # To enchance imputation variance
                for col in ncolumns.tolist():
                    if self.nan_matrix[i, col] == True:
                        idx_y = self.target_dict[self.y[i]]
                        xi = X_di[i]
                        sgraph = self.make_subgraph(xi, col)
                        n_interval = self.estimate_interval(col, sgraph, idx_y)
                        imput_val = self.value_by_node((col, n_interval))
                        self.X_di[i, col] = imput_val
                        if update_count > self.update_step:
                            self.update_weights_all()
                            update_count = 0
                update_count = update_count + 1
        # categorical = self.disc.categorical
        for i in tqdm(range(init_idx, X_di.shape[0])):
            for col in range(X_di.shape[1]):
                if self.nan_matrix[i, col] == True:
                    yyi = self.y[i]
                    val = self.X_di[i, col]
                    node = self.find_node_by_value(col, val)
                    xj, yi = self.real_interval(node)

                    notnan = np.invert(np.isnan(xj))
                    mu0 = xj[notnan].mean()
                    std0 = xj[notnan].std()

                    imput = None
                    c_idx = np.where(yi == yyi)[0]
                    xjc = xj[c_idx]
                    notnan2 = np.invert(np.isnan(xjc))
                    mu1 = xjc[notnan2].mean()
                    std1 = xjc[notnan2].std()

                    if mu1 / mu0 > alpha:
                        if self.random_state is not None:
                            np.random.seed(self.random_state + i + col)
                        imput = self.f_sample(mu1, L * std1)

                    else:
                        if self.random_state is not None:
                            np.random.seed(self.random_state + i + col)
                        imput = self.f_sample(mu0, L * std0)

                    X_imput[i - init_idx, col] = imput
        if self.pdflag:
            X_imput = pd.DataFrame(X_imput,
                                   columns=self.pd_columns,
                                   index=self.pd_index)

        return X_imput, y
Example #38
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    InceptionResnet_model_1 = InceptionResnetV1(
        pretrained='vggface2').eval().to(device)
    print('load InceptionResnet-vggface2.pt successfully')

    InceptionResnet_model_2 = InceptionResnetV1(
        pretrained='casia-webface').eval().to(device)
    print('load InceptionResnet-casia-webface.pt successfully')

    IR_50_model_1 = IR_50([112, 112])
    IR_50_model_1.load_state_dict(
        torch.load(
            '/notebooks/Workspace/tmp/pycharm_project_314/TianChi/Face_recognition/irse/model/backbone_ir50_asia.pth'
        ))
    IR_50_model_1.eval().to(device)
    print('load IR_50 successfully')

    IR_152_model_1 = IR_152([112, 112])
    IR_152_model_1.load_state_dict(
        torch.load(
            '/notebooks/Workspace/tmp/pycharm_project_314/TianChi/Face_recognition/irse/model/Backbone_IR_152_Epoch_112_Batch_2547328_Time_2019-07-13-02-59_checkpoint.pth'
        ))
    IR_152_model_1.eval().to(device)
    print('load IR_152 successfully')

    # IR_152_model_2 = IR_152([112, 112])
    # IR_152_model_2.load_state_dict(
    #     torch.load(
    #         '/notebooks/Workspace/tmp/pycharm_project_314/TianChi/Face_recognition/irse/model/Head_ArcFace_Epoch_112_Batch_2547328_Time_2019-07-13-02-59_checkpoint.pth'))
    # IR_152_model_2.eval().to(device)
    # print('load IR_152_ArcFace successfully')

    import insightface

    Insightface_iresnet34 = insightface.iresnet34(pretrained=True)
    Insightface_iresnet34.eval().to(device)
    print('load Insightface_iresnet34 successfully')

    Insightface_iresnet50 = insightface.iresnet50(pretrained=True)
    Insightface_iresnet50.eval().to(device)
    print('load Insightface_iresnet50 successfully')

    Insightface_iresnet100 = insightface.iresnet100(pretrained=True)
    Insightface_iresnet100.eval().to(device)
    print('load Insightface_iresnet100 successfully')

    ###########################vgg16
    from Face_recognition.vgg16.vgg16 import CenterLossModel, loadCheckpoint
    vgg16_checkpoint = loadCheckpoint(
        '/notebooks/Workspace/tmp/pycharm_project_314/TianChi/Face_recognition/vgg16/model'
    )

    VGG16 = CenterLossModel(embedding_size=512,
                            num_classes=712,
                            checkpoint=vgg16_checkpoint).eval().to(device)
    print('load VGG16 successfully')

    ###########################resnet34
    from Face_recognition.resnet34_triplet.resnet34 import Resnet34Triplet

    checkpoint = torch.load(
        '/notebooks/Workspace/tmp/pycharm_project_314/TianChi/Face_recognition/resnet34_triplet/model/model_resnet34_triplet.pt'
    )
    Resnet34 = Resnet34Triplet(
        embedding_dimension=checkpoint['embedding_dimension']).to(device)
    Resnet34.load_state_dict(checkpoint['model_state_dict'])
    print('load Resnet34 successfully')

    criterion = nn.MSELoss()
    # cpu
    # collect all images to attack
    paths = []
    picpath = '/notebooks/Workspace/tmp/pycharm_project_314/TianChi/images'
    for root, dirs, files in os.walk(picpath):
        for f in files:
            paths.append(os.path.join(root, f))
    random.shuffle(paths)

    # paras
    eps = 1
    steps = 30
    output_path = './output_img'
    momentum = 1.0

    for path in tqdm(paths):

        start = time.time()
        print('processing ' + path + '  ===============>')
        image = Image.open(path)

        # define paras
        # in_tensor is origin tensor of image
        # in_variable changes with gradient

        in_tensor = img2tensor(np.array(image))
        # print(in_tensor.shape)
        in_variable = in_tensor.detach().to(device)
        in_tensor = in_tensor.squeeze().to(device)
        adv = None

        # in_tensor= img2tensor_224(image)
        # # print(in_tensor.shape)
        # in_variable = in_tensor.to(device)
        # in_tensor = in_tensor.squeeze().to(device)
        # adv = None

        #
        # # origin feature

        origin_InceptionResnet_model_1 = InceptionResnet_model_1(in_variable)
        origin_InceptionResnet_model_2 = InceptionResnet_model_2(in_variable)
        origin_IR_50_model_1 = IR_50_model_1(in_variable)
        origin_IR_152_model_1 = IR_152_model_1(in_variable)
        # # origin_IR_152_model_2 = IR_152_model_2(in_variable)
        origin_Insightface_iresent34 = Insightface_iresnet34(in_variable)
        origin_Insightface_iresent50 = Insightface_iresnet50(in_variable)
        origin_Insightface_iresent100 = Insightface_iresnet100(in_variable)
        #######
        origin_VGG16 = VGG16.forward_GetFeature(in_variable)
        ########Resnet34
        origin_Resnet34 = Resnet34(in_variable)

        # 1. untarget attack -> random noise
        # 2. target attack -> x = alpha * target + (1 - alpha) * x
        perturbation = torch.Tensor(3, 112, 112).uniform_(-0.1, 0.1).to(device)
        in_variable = in_variable + perturbation
        in_variable.data.clamp_(-1.0, 1.0)
        in_variable.requires_grad = True
        g_noise = 0.0

        #  sum gradient
        for i in range(steps):
            # print('step: ' + str(i))
            # in_variable = in_variable.to(device)

            out_InceptionResnet_model_1 = InceptionResnet_model_1(in_variable)
            out_InceptionResnet_model_2 = InceptionResnet_model_2(in_variable)
            out_IR_50_model_1 = IR_50_model_1(in_variable)
            out_IR_152_model_1 = IR_152_model_1(in_variable)
            # # out_IR_152_model_2 = IR_152_model_2(in_variable)
            out_Insightface_iresent34 = Insightface_iresnet34(in_variable)
            out_Insightface_iresent50 = Insightface_iresnet50(in_variable)
            out_Insightface_iresent100 = Insightface_iresnet100(in_variable)
            #####
            out_VGG16 = VGG16.forward_GetFeature(in_variable)
            #####
            out_Resnet34 = Resnet34(in_variable)


            loss = criterion(origin_InceptionResnet_model_1, out_InceptionResnet_model_1) + \
                   criterion(origin_InceptionResnet_model_2, out_InceptionResnet_model_2) + \
                   criterion(origin_IR_50_model_1, out_IR_50_model_1) + \
                   criterion(origin_IR_152_model_1, out_IR_152_model_1) + \
                   criterion(origin_Insightface_iresent34, out_Insightface_iresent34) + \
                   criterion(origin_Insightface_iresent50, out_Insightface_iresent50) + \
                   criterion(origin_Insightface_iresent100, out_Insightface_iresent100) + \
                   criterion(origin_VGG16, out_VGG16) + \
                   criterion(origin_Resnet34, out_Resnet34)

            # print('loss : %f' % loss)
            # compute gradients
            loss.backward(retain_graph=True)

            g_noise = momentum * g_noise + (in_variable.grad /
                                            in_variable.grad.data.norm(1))
            g_noise = g_noise / g_noise.data.norm(1)

            if i % 2 == 0:
                kernel = gkern(3, 2).astype(np.float32)
                gaussian_blur1 = GaussianBlur(kernel).to(device)
                g_noise = gaussian_blur1(g_noise)
                g_noise = torch.clamp(g_noise, -0.1, 0.1)
            else:
                addition = TVLoss()
                g_noise = addition(g_noise)

            in_variable.data = in_variable.data + (
                (eps / 255.) * torch.sign(g_noise)
            )  # * torch.from_numpy(mat).unsqueeze(0).float()

            in_variable.grad.data.zero_()  # unnecessary

        # deprocess image
        adv = in_variable.data.cpu().numpy()[0]  # (3, 112, 112)
        perturbation = (adv - in_tensor.cpu().numpy())

        adv = adv * 128.0 + 127.0
        adv = adv.swapaxes(0, 1).swapaxes(1, 2)
        adv = adv[..., ::-1]
        adv = np.clip(adv, 0, 255).astype(np.uint8)

        sample_dir = '/notebooks/Workspace/tmp/pycharm_project_314/TianChi/main_5_output-8-29/'
        if not os.path.exists(sample_dir):
            os.makedirs(sample_dir)

        advimg = sample_dir + path.split('/')[-1].split('.')[-2] + '.jpg'

        cv2.imwrite(advimg, adv)
        print("save path is " + advimg)
        print('cost time is %.2f s ' % (time.time() - start))
Example #39
0
def data_loader_for_combined_model(file_list, dataset, config, isVenation):
    shape_x = []
    texture_x = []
    img_x = []
    vein_x = []
    y = []
    maxVal = config['max_val']
    regx_str = config['regx_str']
    regx = re.compile(regx_str)
    for path in tqdm(file_list):
        path = path.strip()
        strs = str.split(path, '/')
        f_name = regx.findall(strs[-1])[0]
        if strs[-2].startswith("yd"):
            d = strs[-2][2:]
        else:
            d = strs[-2]
        if dataset == 'soybean':
            period = strs[-3]
            shape_parent_path = os.path.join(config['shape_data_path'], d,
                                             period)
            texture_parent_path = os.path.join(config['texture_data_path'], d,
                                               period)
            vein_parent_path = os.path.join(config['vein_data_path'], d,
                                            period)
        else:
            shape_parent_path = os.path.join(config['shape_data_path'], d)
            texture_parent_path = os.path.join(config['texture_data_path'], d)
            if isVenation:
                vein_parent_path = os.path.join(config['vein_data_path'], d)

        shape_multiview_x = []
        texture_multiview_x = []
        if isVenation:
            vein_multiview_x = []

        for i in range(config['shape_views']):
            channel_1 = np.loadtxt(
                os.path.join(
                    shape_parent_path,
                    f_name + '_' + str(view_combination[i][0]) + '.txt'))

            if channel_1.size < 4:
                channel_1 = np.reshape(channel_1, [1, 2])
                channel_1 = np.repeat(channel_1, 100, axis=0)

            index1 = get_persistence(channel_1, shape_point_num)
            vec1 = channel_1[index1]
            vector1 = pht['shape'](vec1)

            channel_2 = np.loadtxt(
                os.path.join(
                    shape_parent_path,
                    f_name + '_' + str(view_combination[i][1]) + '.txt'))

            if channel_2.size < 4:
                channel_2 = np.reshape(channel_2, [1, 2])
                channel_2 = np.repeat(channel_2, 100, axis=0)

            index2 = get_persistence(channel_2, shape_point_num)
            vec2 = channel_2[index2]
            vector2 = pht['shape'](vec2)

            channel_3 = np.loadtxt(
                os.path.join(
                    shape_parent_path,
                    f_name + '_' + str(view_combination[i][2]) + '.txt'))

            if channel_3.size < 4:
                channel_3 = np.reshape(channel_3, [1, 2])
                channel_3 = np.repeat(channel_3, 100, axis=0)

            index3 = get_persistence(channel_3, shape_point_num)
            vec3 = channel_3[index3]
            vector3 = pht['shape'](vec3)

            feature = np.dstack([vector1, vector2, vector3])
            flag = np.sum(np.isinf(feature).astype(int))
            if flag > 0:
                print("Inf Error: {}".format(f_name))
            shape_multiview_x.append(feature)

        for j in range(config['texture_views']):

            if dataset == 'cherry':
                texture_pairs = np.loadtxt(
                    os.path.join(texture_parent_path,
                                 f_name + '_pd' + str(j) + '.txt'))
            else:
                texture_pairs = np.loadtxt(
                    os.path.join(texture_parent_path,
                                 f_name + '-pd' + str(j) + '.txt'))

            if texture_pairs.size < 4:
                texture_pairs = np.reshape(texture_pairs, [1, 2])
                texture_pairs = np.repeat(texture_pairs, 100, axis=0)

            index4 = get_persistence(texture_pairs, texture_and_vein_point_num)
            vec_texture = texture_pairs[index4]
            vec_texture = pht['texture'](vec_texture)
            texture_multiview_x.append(vec_texture)

        if isVenation:
            for m in range(config['vein_views']):

                if dataset == 'cherry':
                    vein_pairs = np.loadtxt(
                        os.path.join(vein_parent_path,
                                     f_name + '_pd' + str(m) + '.txt'))
                else:
                    vein_pairs = np.loadtxt(
                        os.path.join(vein_parent_path,
                                     f_name + '-pd' + str(m) + '.txt'))

                if vein_pairs.size < 4:
                    vein_pairs = np.reshape(vein_pairs, [1, 2])
                    vein_pairs = np.repeat(vein_pairs, 100, axis=0)

                index5 = get_persistence(vein_pairs,
                                         texture_and_vein_point_num)
                vec_vein = vein_pairs[index5]
                vec_vein = pht['venation'](vec_vein) / maxVal
                vein_multiview_x.append(vec_vein)

        img_f_path = os.path.join(path)
        img = skio.imread(img_f_path)
        img = resize(img,
                     [config['image_size'][0], config['image_size'][1], 3])

        # print(np.max(img))
        # img = img/255
        shape_x.append(shape_multiview_x)
        texture_x.append(texture_multiview_x)
        img_x.append(img)
        y.append(int(d))
        if isVenation:
            vein_x.append(vein_multiview_x)

    # result_map = dict()
    # result_map['img_x'] = img_x
    # result_map['shape_x'] = shape_x
    # result_map['texture_x'] = texture_x
    # if isVenation:
    #     result_map['vein_x'] = vein_x
    # result_map['y'] = y
    #
    # return result_map

    if isVenation:
        return img_x, shape_x, texture_x, vein_x, y

    return img_x, shape_x, texture_x, y
from tqdm import *

n = 10
bar = tqdm(total=10)
bar.set_description("You're in a progress bar...")

for i in range(n):
    bar.update()

bar.close()
Example #41
0
def train(load_model, learning_rate, num_epochs):
    """ Trains the autoencoder.

	Keyword arguments:
	load_model -- load previous model
	learning_rate -- learning rate of the algorithm
	num_epochs -- training epochs (optional)
	"""

    if not os.path.exists('../autoencoder_images'):
        os.mkdir('../autoencoder_images')

    batch_size = 128
    code_size = 1024
    linear_input = 2304
    linear_output = 5760
    vis = visdom.Visdom()

    img_transform = transforms.Compose([
        transforms.Resize((60, 108), Image.ANTIALIAS),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    dataset = datasets.ImageFolder(root='../training_set/',
                                   transform=img_transform)

    dataset_length = len(dataset)

    #Training
    n_training_samples = (dataset_length / 5) * 3
    train_sampler = SubsetRandomSampler(
        np.arange(n_training_samples, dtype=np.int64))

    #Validation
    n_val_samples = (dataset_length / 5)
    val_sampler = SubsetRandomSampler(
        np.arange(n_training_samples,
                  n_training_samples + n_val_samples,
                  dtype=np.int64))
    print(n_training_samples, n_training_samples + n_val_samples)

    #Test
    n_test_samples = (dataset_length / 5)
    test_sampler = SubsetRandomSampler(
        np.arange(n_training_samples + n_val_samples,
                  n_training_samples + n_val_samples + n_test_samples,
                  dtype=np.int64))
    print(n_training_samples + n_val_samples,
          n_training_samples + n_val_samples + n_test_samples)

    train_loader = torch.utils.data.DataLoader(dataset,
                                               batch_size=batch_size,
                                               sampler=train_sampler,
                                               num_workers=2,
                                               drop_last=True)
    validation_loader = torch.utils.data.DataLoader(dataset,
                                                    batch_size=batch_size,
                                                    sampler=val_sampler,
                                                    num_workers=2,
                                                    drop_last=True)
    test_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=batch_size,
                                              sampler=test_sampler,
                                              num_workers=2,
                                              drop_last=True)

    train_loss_vector = []
    val_loss_vector = []
    epoch_vector = []

    if load_model:
        model = torch.load('../autoencoder.pth')
    else:
        model = autoencoder(linear_input, linear_output, code_size).cuda()

    criterion = nn.MSELoss()
    #criterion = nn.BCELoss()
    m = nn.Sigmoid()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        print('Training')
        for data in tqdm(train_loader):
            img, _ = data
            img = Variable(img).cuda()
            # ===================forward=====================
            output, _ = model(img)
            loss = criterion(output, img)
            # ===================backward====================
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        # ===================log========================
        print('epoch [{}/{}], loss:{:.4f}'.format(epoch + 1, num_epochs,
                                                  loss.item()))

        if epoch % 1 == 0:
            pic = to_img(output.cpu().data)
            save_image(pic, '../autoencoder_images/image_{}.png'.format(epoch))

        train_loss_vector.append(loss.item())

        print('Validation')
        for data in tqdm(validation_loader):
            img, _ = data
            img = Variable(img).cuda()
            # ===================forward=====================
            output, _ = model(img)
            val_loss = criterion(output, img)
            # ===================backward====================
            optimizer.zero_grad()
            val_loss.backward()
            optimizer.step()
        # ===================log========================
        print('Validation loss:{:.4f}'.format(val_loss.item()))

        val_loss_vector.append(val_loss.item())
        epoch_vector.append(epoch)
        validation = dict(x=epoch_vector,
                          y=val_loss_vector,
                          mode="markers+lines",
                          type='custom',
                          marker={
                              'color': 'red',
                              'symbol': 104,
                              'size': "10"
                          })
        train = dict(x=epoch_vector,
                     y=train_loss_vector,
                     mode="markers+lines",
                     type='custom',
                     marker={
                         'color': 'blue',
                         'symbol': 104,
                         'size': "10"
                     })
        layout = dict(title="Loss function",
                      xaxis={'title': 'epochs'},
                      yaxis={'title': 'loss'})

        vis._send({
            'data': [validation, train],
            'layout': layout,
            'win': 'aelosswin'
        })
        torch.save(model, '../autoencoder.pth')

    print('Testing')
    for data in tqdm(test_loader):
        img, _ = data
        img = Variable(img).cuda()
        # ===================forward=====================
        output, _ = model(img)
        test_loss = criterion(output, img)
        # ===================backward====================
        optimizer.zero_grad()
        test_loss.backward()
        optimizer.step()
    # ===================log========================
    print('Testing loss:{:.4f}'.format(test_loss.item()))
    pic = to_img(output.cpu().data)
    save_image(pic, '../autoencoder_images/testing.png'.format(epoch))

    torch.save(model, '../autoencoder.pth')
Example #42
0
    stopwords = open(stopwordFile, 'r').read().split()
    queries = dict([row for row in csv.reader(open(queryFile, 'r'))][1:])
    titles = json.load(open(titleJson, "r"))

    trim = lambda f: [t.strip() for t in f if t.strip()]
    token = trim(open(tokenFile).read().split('\n'))#[:5000]#[:301]
    tokey = trim(open(tokeyFile).read().split('\n'))#[:5000]#[:301]

    # append title to doc
    print("""
appending title to document...
""")

    title_weight = 2

    for i, key in enumerate(tqdm(tokey)):
        title = retain_chinese(titles.get(key, '')).strip()
        if title and title != "Non":
            title_token = ' {}'.format(' '.join([w for w
                in cut_method(title) if w not in stopwords])) * title_weight
            token[i] += title_token
            #print('+= ' + title_token)

    if len(token) != len(tokey):
        print('token len sould eq to tokey len')
        exit(0)

    bm25 = BM25Transformer()
    vectorizer = TfidfVectorizer()

    print("""
Example #43
0
def get_time_evolution(N, wmin, wmax, total_time):

    #N must be odd in order to locate the central oscillator
    if N % 2 == 0:
        raise ValueError('N must be odd')
    else:
        N = int(N / 2) + 3
        centre = -2

    t = 0
    dt = 0.01  #Somewhat optimal value
    w = wmin
    dw = (wmax - wmin) / total_time * dt
    u = 0.01

    num_steps = round(total_time / dt)

    #Empty lists to collect data
    frequencies = []
    shifts = []

    z = np.zeros((N, N))
    v = np.zeros((N, N))

    for step in tqdm(range(num_steps)):
        #Increase w and t, set position of the central oscillator
        w += dw
        t += dt
        z[centre, centre] = np.sin(w * t)

        #Find estimates of next v and z
        a = np.roll(z, 1, 1) + np.roll(z, -1, 1) + np.roll(z, 1, 0) + np.roll(
            z, -1, 0) - 4 * z - u * v
        vE_next = v + a * dt
        zE_next = z + v * dt

        #Clean up boundaries and correct position of the central oscillator
        zE_next[0] = zE_next[1]
        zE_next[-1] = zE_next[:, -3]
        zE_next[:, 0] = zE_next[:, 1]
        zE_next[:, -1] = zE_next[-3]
        zE_next[centre, centre] = np.sin(w * (t + dt))

        #Find next v and z using Heun method
        aE_next = (np.roll(zE_next, 1, 1) + np.roll(zE_next, -1, 1) +
                   np.roll(zE_next, 1, 0) + np.roll(zE_next, -1, 0) -
                   4 * zE_next - u * vE_next)
        vH_next = v + 0.5 * (a + aE_next) * dt
        zH_next = z + 0.5 * (v + vE_next) * dt

        #Set v and z for the next step
        v = vH_next
        z = zH_next

        #Clean up boundaries
        z[0] = z[1]
        z[-1] = z[:, -3]
        z[:, 0] = z[:, 1]
        z[:, -1] = z[-3]

        #Calculate and write zero plane displacement
        shift = np.mean(z[1:-1, 1:-1]**2) / (N - 2)**2
        shifts.append(shift)
        frequencies.append(w)

    return frequencies, shifts
Example #44
0
        ws = wb.worksheets[0]
        row = ws.max_row
        if row == 1:
            ws.cell(row=row, column=1).value = 'user_id'
            ws.cell(row=row, column=2).value = 'user_name'
            ws.cell(row=row, column=3).value = 'portrait'
            ws.cell(row=row, column=4).value = 'post_id'
            ws.cell(row=row, column=5).value = 'content_text'
            ws.cell(row=row, column=6).value = 'lou_num'
            ws.cell(row=row, column=7).value = 'post_time'
            ws.cell(row=row, column=8).value = 'url'
            ws.cell(row=row, column=9).value = 'crawl_time'
            row += 1

        need_update = 0
        with tqdm(total=len(tiezi_list)) as pbar:
            for url in tiezi_list:
                # tiezi = Tiezi(url, is_check_tiezi, all_tiezi_address)
                tiezi = Tiezi(url)
                contents = tiezi.get_content()
                if len(contents) == 0:
                    print("the length of contents is 0!")
                for content in contents:
                    if content['post_id'] in all_post_id_ori:
                        continue
                    if content['post_id'] in all_post_id_new:
                        continue
                    if len(content['content_text']) == 0:
                        continue
                    ws.cell(row=row, column=1).value = content['user_id']
                    ws.cell(row=row, column=2).value = content['user_name']
Example #45
0
    def train(self,
              imsize,
              batch_size,
              input_ch,
              epoch_nbr,
              net_weights_init,
              dir_images,
              saver_directory,
              images_root,
              label_nbr,
              learning_rate,
              variable_scope="s"):

        with tf.Graph().as_default() as g:

            # create placeholders
            images = tf.placeholder(tf.float32,
                                    [None, imsize, imsize, input_ch],
                                    name="images")
            labels = tf.placeholder(tf.int32, [None, imsize, imsize],
                                    name="labels")

            with tf.variable_scope(variable_scope) as scope:

                # create model
                deconv_net, net = self.model_function(images, label_nbr)
                predictions = deconv_net[-1]

            # create saver
            saver = tf.train.Saver(
                [v for v in tf.global_variables() if variable_scope in v.name])

            # error
            reshaped_labels = tf.reshape(labels, [-1])
            reshaped_predictions = tf.reshape(predictions, [-1, label_nbr])
            loss = tf.contrib.losses.sparse_softmax_cross_entropy(
                reshaped_predictions, reshaped_labels)

            # optimizer
            optimizer = tf.train.AdamOptimizer(learning_rate)
            train_step = optimizer.minimize(loss)

            # create session
            sess = tf.Session()
            init = tf.global_variables_initializer()
            sess.run(init)

            # load net weights if needed
            if net is not None:
                net.load(net_weights_init, sess)

            # create the list of images in the folder
            directory = os.path.join(dir_images, images_root)
            directory_labels = os.path.join(dir_images, "labels/")
            files = []
            for file in os.listdir(directory_labels):
                if file.endswith(".npz"):
                    file = file.split(".")[:-1]
                    file = ".".join(file)
                    files.append(file)

            # load to get the size
            imsize = scipy.misc.imread(
                os.path.join(directory, files[0] + ".png")).shape

            # create directory
            if os.path.exists(saver_directory):
                shutil.rmtree(saver_directory)
            os.makedirs(saver_directory)

            # open file for loss
            f = open(os.path.join(saver_directory, "loss.txt"), 'w')

            # iterate
            for epoch in range(epoch_nbr):
                print("epoch " + str(epoch))

                total_loss = 0

                # create batches
                shuffle(files)
                batches = [
                    files[i:i + batch_size]
                    for i in range(0, len(files), batch_size)
                ]
                batches = batches[:
                                  -1]  # remove last batch (potentially not the same size)

                batch_ = np.zeros(
                    (batch_size, imsize[0], imsize[1], imsize[2]), dtype=float)
                labels_ = np.zeros((batch_size, imsize[0], imsize[1]),
                                   dtype=int)
                for batch_files in tqdm(batches):
                    for im_id in range(len(batch_files)):
                        batch_[im_id] = scipy.misc.imread(
                            os.path.join(directory,
                                         batch_files[im_id] + ".png"))
                        labels_[im_id] = np.load(
                            os.path.join(directory_labels,
                                         batch_files[im_id] + ".npz"))["arr_0"]
                    batch_ /= 255

                    fd = {images: batch_, labels: labels_}

                    [l, tr_] = sess.run([loss, train_step], fd)
                    total_loss += l

                print(total_loss / (len(batches) * batch_size))

                f.write(str(total_loss / (len(batches) * batch_size)) + " \n")
                f.flush()

                if ((epoch + 1) % 10 == 0):
                    # save the model
                    saver.save(sess, os.path.join(saver_directory,
                                                  "model.ckpt"))

            # save the model
            saver.save(sess, os.path.join(saver_directory, "model.ckpt"))

            # close file
            f.close()

            # close session
            del sess
Example #46
0
def load_patch_data(data_dir,
                    preprocess_dir,
                    patch_size,
                    labels_known=True,
                    num_patches=100):
    '''
    Loads in datasets and returns the labeled preprocessed patches for use in the model.

    Determines the number of classes for the problem and assigns labels to each class,
    sorted alphabetically.

    Params:
        - data_dir: string, path to all training class directories
        - preprocess_dir: string, path to destination for robustfov files
        - patch_size: 3-element tuple of integers, size of patches to use for training
        - labels_known: boolean, True if we know the labels, such as for training or
                                 validation.  False if we do not know the labels, such
                                 as loading in data to classify in production
    Returns:
        - data: list of 3D ndarrays, the patches of images to use for training
        - labels: list of 1D ndarrays, one-hot encoding corresponding to classes
        - all_filenames: list of strings, corresponding filenames for use in validation/test
    '''

    data = []
    labels = []
    all_filenames = []

    #################### CLASSIFICATION OF UNKNOWN DATA ####################

    if not labels_known:
        print("*** CALLING 3DRESAMPLE ***")
        orient_dir = orient(data_dir, preprocess_dir)

        print("*** CALLING ROBUSTFOV ***")
        robustfov_dir = robust_fov(orient_dir, preprocess_dir)

        filenames = [
            x for x in os.listdir(robustfov_dir)
            if not os.path.isdir(os.path.join(robustfov_dir, x))
        ]
        filenames.sort()

        for f in filenames:
            img = nib.load(os.path.join(robustfov_dir, f)).get_data()
            #normalized_img = normalize_data(img)
            patches = get_patches(img, patch_size, num_patches)

            for patch in tqdm(patches):
                data.append(patch)
                all_filenames.append(f)

        print("A total of {} patches collected.".format(len(data)))

        data = np.array(data)

        return data, all_filenames

    #################### TRAINING OR VALIDATION ####################

    # determine number of classes
    class_directories = [
        os.path.join(data_dir, x) for x in os.listdir(data_dir)
    ]
    class_directories.sort()
    num_classes = len(class_directories)

    # write the mapping of class to a local file in the following space-separated format:
    # CLASS_NAME integer_category
    class_encodings_file = os.path.join(data_dir, "..", "..",
                                        "class_encodings.txt")
    if not os.path.exists(class_encodings_file):
        with open(class_encodings_file, 'w') as f:
            for i in range(len(class_directories)):
                f.write(
                    os.path.basename(class_directories[i]) + " " + str(i) +
                    '\n')

    print("*** GATHERING PATCHES ***")
    for i in range(len(class_directories)):
        filenames = os.listdir(class_directories[i])
        filenames.sort()

        for f in tqdm(filenames):
            img = nib.load(os.path.join(class_directories[i], f)).get_data()
            #normalized_img = normalize_data(img)
            patches = get_patches(img, patch_size, num_patches)

            for patch in patches:
                data.append(patch)
                labels.append(to_categorical(i, num_classes=num_classes))
                all_filenames.append(f)

    print("A total of {} patches collected.".format(len(data)))

    data = np.array(data, dtype=np.float16)
    data = np.reshape(data, (data.shape + (1, )))
    labels = np.array(labels, dtype=np.float16)

    return data, labels, all_filenames
Example #47
0
    def run(self):

        # If this is the first data assimilation window, we can just run the model as normal
        if self.start_day == 0:
            assert self.current_particle_pop_df is None  # Shouldn't have any preivously-created particles
            # load snapshot
            snapshot = Snapshot.load_full_snapshot(path=self.snapshot_file)
            # set params
            snapshot.update_params(self.params)
            # Can set the random seed to make it deterministic (None means np will choose one randomly)
            snapshot.seed_prngs(seed=None)

            # Create a simulator and upload the snapshot data to the OpenCL device
            simulator = Simulator(snapshot,
                                  opencl_dir=self.opencl_dir,
                                  gpu=self.use_gpu)
            simulator.upload_all(snapshot.buffers)

            if not self.quiet:
                # print(f"Running simulation {sim_number + 1}.")
                print(f"Running simulation")

            params = Params.fromarray(
                snapshot.buffers.params
            )  # XX Why extract Params? Can't just use PARAMS?

            summary = Summary(
                snapshot,
                store_detailed_counts=self.store_detailed_counts,
                max_time=self.run_length  # Total length of the simulation
            )

            # only show progress bar in quiet mode
            timestep_iterator = range(self.run_length) if self.quiet \
                else tqdm(range(self.quiet), desc="Running simulation")

            iter_count = 0  # Count the total number of iterations
            # Run for iterations days
            for _ in timestep_iterator:
                # Update parameters based on lockdown
                params.set_lockdown_multiplier(snapshot.lockdown_multipliers,
                                               iter_count)
                simulator.upload("params", params.asarray())

                # Step the simulator
                simulator.step()
                iter_count += 1

            # Update the statuses
            simulator.download("people_statuses",
                               snapshot.buffers.people_statuses)
            summary.update(iter_count, snapshot.buffers.people_statuses)

            if not self.quiet:
                for i in range(self.run_length):
                    print(f"\nDay {i}")
                    summary.print_counts(i)

            if not self.quiet:
                print("\nFinished")

            # Download the snapshot from OpenCL to host memory
            # XX This is 'None'.
            final_state = simulator.download_all(snapshot.buffers)

            pass
        else:  # Otherwise we need to restart previous models stored in the current_particle_pop_df
            # XXXX CAN GET OLD MODEL STATES, WITH ALL DISEASE STATUSES, FROM THE DF. TWO ISSUES
            # 1. But need to work out how to draw these appropriately; can't assume they are each as good as
            # each other. THIS SHOULD BE OK, surely there's a way to go from the final particles and weights
            # to the DF of state vectors. Particle ID? Just try it out.
            # 2. Also: what to do about stochasticity. For a given (global) parameter combination, we will
            # get quite different results depending on the mode state. - I DON'T THINK THIS IS A PROBLEM.
            # ABC Commonly used with stochastic models. E.g. https://eprints.lancs.ac.uk/id/eprint/80439/1/mainR1.pdf
            #
            raise Exception("Not implemented yet")

        # Return the current state of the model in a dictionary describing what it is
        #return {"simulator": simulator}
        return {"simulator": snapshot}
Example #48
0
def analyze_track(model, waveform, sample_rate):
    global cuts, mergelist, pbar, audio_len, labels, stats

    # state vars for analysis loop
    lastc = 1  # last seen class
    lasts = 0  # last visited second
    lastts = "00:00:00.000"  # last cut was at this timestamp
    count = 0  # number of subtitle records
    lastwf = 0  # last frame of last analyzed. for 0s --> 1s at 16000Hz would be 16000
    stats = [[0, 0] for _ in range(len(labels))]

    if args.srt:
        sub = open(video_path[:-4] + ".srt", 'w',
                   encoding='utf-8')  # subtitle track name
    else:
        sub = io.StringIO(
        )  # RAM file if no subtitle file needs to be generated

    window_size = int(sample_rate / args.window_size_divide)  # 1s by default
    window_slide = int(window_size / args.window_slide_divide)

    # slide the window of size window_size by window_slide per iteration.
    # overlap may occour.
    print("analyzing track...")
    last_i = window_slide * int(audio_len / window_slide)
    pbar = tqdm(total=last_i)
    for i in range(0, audio_len, window_slide):
        pbar.update(n=window_slide)
        spectrogram = get_spectrogram(waveform, i, window_size)
        spectrogram = tf.expand_dims(spectrogram, axis=0)

        prediction = model(spectrogram)
        cls = int(tf.math.argmax(prediction[0]))
        conf = float(tf.nn.softmax(prediction[0])[cls])

        # generate cut when we know the end of it (or the track is at its end)
        if cls != lastc or i == last_i:
            s = i / sample_rate
            if i == last_i:
                s += (audio_len - i) / sample_rate

            ts = "0" + str(datetime.timedelta(seconds=s))[:11]
            if len(ts) <= 8:
                ts += ".000"
            # if the window slide is overlapping the previous analyzed window
            # and prediction has changed, don't generate a new cut until we are over it
            # ...unless an "emh" is detected! [don't truncate last detected ehm]
            if labels[cls] != "emh" and i < lastwf and i < last_i:
                continue
            # generate subtitles
            record = str(count) + "\n" + lastts.replace('.',',') + " --> " + \
                     ts.replace('.',',') + "\n" + labels[lastc] + \
                     "\n[" + str(conf * 100)[:4] + "]" +"\n\n"
            count += 1
            sub.write(record)
            stats[lastc][0] += 1
            stats[lastc][1] += s - lasts
            lasts = s
            # generate cut
            if labels[lastc] == "speech":
                generate_cut(lastts, ts, count)
            elif args.generate_training_data:
                generate_tdata(lastts, ts, count, labels[lastc])
            lastts = ts
            lastc = cls
        # slide the right hand side of the window detection.
        # This allows to cut segments > than window size
        lastwf = i + window_size

        if not args.spectrogram:
            continue

        img = spectrogram.numpy().T
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        cv2.putText(img, labels[cls], (5, 120), cv2.FONT_HERSHEY_SIMPLEX, 1,
                    (0, 255, 255), 2)
        cv2.imshow("spectrogram", img)
        cv2.waitKey(1) & 0xFF
        time.sleep(0.2)

    sub.close()
Example #49
0
def preprocess_images(imageIds, dst, threeBandImagesDir):
    for imageId in tqdm(imageIds, leave=False):
        band_paths = [threeBandImagesDir / f"{imageId}.tif"]
        stack(band_paths=band_paths, out_path=dst / f"{imageId}.tif")
Example #50
0
            width=params.width,
            array_noise_level=params.array_noise_level,
            array_noise_seed=seeds_for_noise[CESnumber],
            mapping_perpair=params.mapping_perpair)

        ## Initialise map containers for each processor
        if pos_CES == 0:
            sky_out_tot = OutputSkyMap(projection=tod.projection,
                                       nside=tod.nside_out,
                                       obspix=tod.obspix,
                                       npixsky=tod.npixsky,
                                       pixel_size=tod.pixel_size)

        ## Scan input map to get TODs
        d = []
        for det in tqdm(range(inst.focal_plane.nbolometer)):
            d.append(tod.map2tod(det))

        ## Project TOD to maps
        tod.tod2map(np.array(d), sky_out_tot)

    MPI.COMM_WORLD.barrier()

    ## Coaddition over all processors.
    ## Note that all processors will then have the coadded data.
    ## If you want informations at the level of each CES (or group of),
    ## use instead:
    ## final_map = OutputSkyMap(nside=nside_out, obspix=tod.obspix)
    ## final_map.coadd_MPI(sky_out_tot, MPI=MPI)
    sky_out_tot.coadd_MPI(sky_out_tot, MPI=MPI)
Example #51
0
def read_us_user_like_page(input_file_path):
    """Reading data "us_user_like_page" into a python dictionary.

    First check the structure of the input file, then calulate the number
    of shared users between pages and store it with a dictionary.

    Args:
        input_file_path: A string of path of input file: us_user_like_page.
    
    Returns:
        page_page_dict: A dictionary, containes multiple dictionaries that 
            stores the numbers of shared useres betweeen two pages, using 
            page id as key and shared useres as values, ex. 
            {
                pageid1: {pageid2: (shared user pageid1 & pageid2)}, 
                         {pageid3: (shared user pageid1 & pageid3)}, ...
                pageid2: {pageid3: (shared user pageid2 & pageid3)}, ...
            }
    
    Raises:
        incorrect_file_type: Contradiction of correct input file structure.
            Example of correct structure:
            {
                user_id,like_pages,like_times
                1000000736695525,21785951839,1
                1000001070029820,"44473416732,50978409031,630067593722141","2,1,2"
            }
    """

    page_page_dict = {}

    try:
        inputfile = open(input_file_path, "r")
        reader = csv.DictReader(inputfile)
        for i, test in enumerate(reader):
            test["user_id"]
            test["like_pages"]
            test["like_times"]
            break;
    except:
        raise incorrect_file_type("input should be an us_user_like_page data")

    with open(input_file_path, "r") as inputfile:
        reader = csv.DictReader(inputfile)

        for i, row in enumerate( tqdm( reader, 
                                    total = get_num_lines(input_file_path))):
            pageid_list = row['like_pages'].split(',')

            for j, p in enumerate(pageid_list):
                if p not in page_page_dict:
                    page_page_dict[p] = {}

                for k, p1 in enumerate(pageid_list):
                    if k < j:
                        continue
                    elif k == j:
                        page_page_dict[p][p] = page_page_dict[p].get(p,0) + 1
                    else:
                        if p1 not in page_page_dict:
                            page_page_dict[p1] = {}
                        page_page_dict[p][p1] = page_page_dict[p].get(p1,0) + 1
                        page_page_dict[p1][p] = page_page_dict[p1].get(p,0) + 1

    return(page_page_dict)
import csv
from csv import writer

notify = Notify()
notify.register()

url_names = 'https://www.maxpreps.com/rankings/football-fall-17/{}/state/texas.htm'
url_scores = 'https://www.maxpreps.com/high-schools/{})/football-fall-17/schedule.htm'
url_contact_info = 'https://www.maxpreps.com/high-schools/{})/home.htm'
#state_set = {'texas'}
state_set = [
    'indiana', 'maine', 'minnesota', 'north-dakota'
    'nebraska', 'nevada', 'ohio', 'oregon', 'texas', 'virginia'
]

for x in tqdm(range(0, 50, 1)):
    names = url_names.format(x)
    r = requests.get(names)
    sopa = BeautifulSoup(r.text, 'html.parser')
    for item in sopa.find_all('tr'):
        try:
            school_name.append(
                item.find('th', attrs={
                    'class': 'school',
                    'scope': 'row'
                }))
        except:
            school_name.append(np.nan)
new_list = []
for i in school_name:
    i = str(i)
            unfreeze_model_param = list(
                model.module.model.embedding.parameters()) + list(
                    criterion.parameters())

        if epoch == 0:
            for param in list(
                    set(model.parameters()).difference(
                        set(unfreeze_model_param))):
                param.requires_grad = False
        if epoch == args.warm:
            for param in list(
                    set(model.parameters()).difference(
                        set(unfreeze_model_param))):
                param.requires_grad = True

    pbar = tqdm(enumerate(dl_tr))

    for batch_idx, (x, y) in pbar:
        m = model(x.squeeze().cuda())
        loss = criterion(m, y.squeeze().cuda())

        opt.zero_grad()
        loss.backward()

        torch.nn.utils.clip_grad_value_(model.parameters(), 10)
        if args.loss == 'Proxy_Anchor':
            torch.nn.utils.clip_grad_value_(criterion.parameters(), 10)

        losses_per_epoch.append(loss.data.cpu().numpy())
        opt.step()
Example #54
0
from time import sleep
from tqdm import *
from multiprocessing import Pool, freeze_support, RLock

str = '123124555'
for i in tqdm(str):
  sleep(0.1)
    def main(self):

        if self.mode == 'train':
            print("Training in epoch %s" % self.epoch)
            print(self.cfg.actions_weights)
            for batch_data in tqdm(self.data_loader):

                self.baseprocess(batch_data)

                # Optim
                self.optimizer.zero_grad()
                if self.cfg.center_loss_weight > 0:
                    self.lossOpti.zero_grad()
                self.total_loss.backward()
                self.optimizer.step()
                # multiple (1./alpha) in order to remove the effect of alpha on updating centers
                if self.cfg.center_loss_weight > 0:
                    for param in self.centerlossModel.parameters():
                        param.grad.data *= (1./self.cfg.center_loss_weight)
                        lossOpti.step()
                
            # renew the action loss weight by accuracy
            if self.cfg.renew_weight:
                new_weight = torch.nn.functional.softmin(self.actions_meter.correct_rate_each, dim=0)
                new_weight = new_weight * 9.
                old_weight = torch.tensor(self.cfg.actions_weights)
                new_weight = old_weight * (1 - self.cfg.weight_renew_rate) + self.cfg.weight_renew_rate * new_weight
                self.cfg.actions_weights = new_weight.tolist()
            info = {
                'mode': self.mode,
                'time': self.epoch_timer.timeit(),
                'epoch': self.epoch,
                'loss': self.loss_meter.avg,
                'actions_acc': self.actions_meter.correct_rate,
                'actions_ave_acc': self.actions_meter.ave_rate,
                'actions_each_acc': self.actions_meter.correct_rate_each.numpy().round(3),
                'actions_each_num': self.actions_meter.all_num_each,
                'actions_loss_weights': self.actions_loss_weight.correct_rate_each.numpy().round(3),
                'actions_confusion': self.confuMatrix.class_acc.numpy().round(3),
                'oriens_acc': self.oriens_meter.correct_rate,
                'oriens_ave_acc': self.oriens_meter.ave_rate,
                'oriens_each_acc': self.oriens_meter.correct_rate_each.numpy().round(3),
                'oriens_each_num': self.oriens_meter.all_num_each,
                'oriens_confusion': self.confuMatrix2.class_acc.numpy().round(3)
            }
        elif self.mode == 'test':
            print("Testing in test dataset")
            with torch.no_grad():
                for batch_data in tqdm(self.data_loader):
                    self.baseprocess(batch_data)

            info = {
                'mode': self.mode,
                'time': self.epoch_timer.timeit(),
                'epoch': self.epoch,
                'loss': self.loss_meter.avg,
                'actions_acc': self.actions_meter.correct_rate,
                'actions_ave_acc': self.actions_meter.ave_rate,
                'actions_each_acc': self.actions_meter.correct_rate_each.numpy().round(3),
                'actions_each_num': self.actions_meter.all_num_each,
                'actions_confusion': self.confuMatrix.class_acc.numpy().round(3),
                'oriens_acc': self.oriens_meter.correct_rate,
                'oriens_ave_acc': self.oriens_meter.ave_rate,
                'oriens_each_acc': self.oriens_meter.correct_rate_each.numpy().round(3),
                'oriens_each_num': self.oriens_meter.all_num_each,
                'oriens_confusion': self.confuMatrix2.class_acc.numpy().round(3)
            }
        else:
            assert False, "mode name incorrect"

        return info
Example #56
0
    def __init__(self,
                 root=None,
                 train=False,
                 transform=None,
                 mist_transform=None,
                 loader=default_loader,
                 seqlen=5,
                 debug=False,
                 dist_filter=None,
                 off_3d=True,
                 off_pc_render=True,
                 overwrite_fofn=False,
                 semantic_transform=np.array,
                 env=None):
        print('Processing the data:')
        if not root:
            self.root = os.path.join(
                os.path.dirname(os.path.abspath(assets.__file__)), "dataset")
        else:
            self.root = root
        self.train = train
        self.env = env
        self.loader = loader
        self.seqlen = seqlen
        self.transform = transform
        self.target_transform = transform
        self.depth_trans = mist_transform
        self.semantic_trans = semantic_transform
        self._require_semantics = "SEMANTICS" in self.env.config[
            "ui_components"]
        self.off_3d = off_3d
        self.select = []
        self.fofn = self.root + '_fofn' + str(int(train)) + '.pkl'
        self.off_pc_render = off_pc_render
        self.dll = None

        if not self.off_pc_render:
            self.dll = np.ctypeslib.load_library('render', '.')

        if overwrite_fofn or not os.path.isfile(self.fofn):
            self.scenes = sorted([
                d for d in (os.listdir(self.root))
                if os.path.isdir(os.path.join(self.root, d)) and
                os.path.isfile(os.path.join(self.root, d, 'camera_poses.csv'))
                and os.path.isdir(os.path.join(self.root, d, 'pano'))
            ])

            num_scenes = len(self.scenes)
            num_train = int(num_scenes * 0.9)
            print("Total %d scenes %d train %d test" %
                  (num_scenes, num_train, num_scenes - num_train))
            if train:
                self.scenes = self.scenes[:num_train]

            self.meta = {}
            if debug:
                last = 35
            else:
                last = len(self.scenes)

            for scene in self.scenes[:last]:
                posefile = os.path.join(self.root, scene, 'camera_poses.csv')
                with open(posefile) as f:
                    for line in f:
                        l = line.strip().split(',')
                        uuid = l[0]
                        xyz = list(map(float, l[1:4]))
                        quat = list(map(float, l[4:8]))
                        if not scene in self.meta:
                            self.meta[scene] = {}
                        metadata = (uuid, xyz, quat)
                        # print(uuid, xyz)

                        if os.path.isfile(
                                os.path.join(self.root, scene, 'pano',
                                             'points',
                                             'point_' + uuid + '.json')):
                            self.meta[scene][uuid] = metadata
            print("Indexing")

            for scene, meta in tqdm(list(self.meta.items())):
                if len(meta) < self.seqlen:
                    continue
                for uuid, v in list(meta.items()):
                    dist_list = [
                        (uuid2,
                         np.linalg.norm(np.array(v2[1]) - np.array(v[1])))
                        for uuid2, v2 in list(meta.items())
                    ]
                    dist_list = sorted(dist_list, key=lambda x: x[-1])

                    if not dist_filter is None:
                        if dist_list[1][-1] < dist_filter:
                            self.select.append(
                                [[scene, dist_list[i][0], dist_list[i][1]]
                                 for i in range(self.seqlen)])

                    else:
                        self.select.append(
                            [[scene, dist_list[i][0], dist_list[i][1]]
                             for i in range(self.seqlen)])

            with open(self.fofn, 'wb') as fp:
                pickle.dump([
                    self.scenes, self.meta, self.select, num_scenes, num_train
                ], fp)

        else:
            with open(self.fofn, 'rb') as fp:
                self.scenes, self.meta, self.select, num_scenes, num_train = pickle.load(
                    fp)
                print("Total %d scenes %d train %d test" %
                      (num_scenes, num_train, num_scenes - num_train))
Example #57
0
def searem(X, string=''):
    clean = []
    for nm in tqdm(list(X.columns)):
        if string not in nm:
            clean.append(nm)
    return (clean)
Example #58
0
def CNN(epoch=100,
        batch_size=128,
        save_period=10,
        load_period=100,
        optimizer="sgd",
        learning_rate=0.01,
        dataset="MNIST",
        ctx=mx.gpu(0),
        method=1):

    #data selection
    if dataset == "MNIST":
        train_data, test_data = MNIST(batch_size)
        path = "weights/MNIST-{}.params".format(load_period)
    elif dataset == "CIFAR10":
        train_data, test_data = CIFAR10(batch_size)
        path = "weights/CIFAR10-{}.params".format(load_period)
    elif dataset == "FashionMNIST":
        train_data, test_data = FashionMNIST(batch_size)
        path = "weights/FashionMNIST-{}.params".format(load_period)
    else:
        return "The dataset does not exist."
    '''Follow these steps:

    •Define network
    •Initialize parameters
    •Loop over inputs
    •Forward input through network to get output
    •Compute loss with output and label
    •Backprop gradient
    •Update parameters with gradient descent.
    '''

    #Convolution Neural Network
    # formula : output_size=((input−weights+2*Padding)/Stride)+1
    # data size
    # MNIST,FashionMNIST = (batch size , 1 , 28 ,  28)
    # CIFAR = (batch size , 3 , 32 ,  32)
    '''note!!!
    To compile and optimize the HybridSequential, we can then call its hybridize method. 
    Only HybridBlocks, e.g. HybridSequential, can be compiled. But you can still call hybridize on normal Block 
    and its HybridBlock children will be compiled instead. We will talk more about HybridBlocks 
    '''

    if method == 1:
        #method 1 : HybridBlock
        net = HybridBlockNetwork()
    elif method == 2:
        #method 2 : Block
        net = BlockNetwork()
    else:
        #method 3 : using Sequential()
        net = gluon.nn.HybridSequential()  # stacks 'Block's sequentially

        with net.name_scope():
            net.add(
                gluon.nn.Conv2D(channels=60,
                                kernel_size=(3, 3),
                                strides=(1, 1),
                                use_bias=True,
                                activation="relu")
            )  # MNIST : result = ( batch size , 60 , 26 , 26) , CIFAR10 : : result = ( batch size , 60 , 30 , 30)
            net.add(
                gluon.nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2))
            )  # MNIST : result = (batch size , 60 , 13 , 13) , CIFAR10 : result = (batch size , 60 , 15 , 15)
            net.add(
                gluon.nn.Conv2D(channels=30,
                                kernel_size=(6, 6),
                                strides=(1, 1),
                                use_bias=True,
                                activation="relu")
            )  # MNIST :  result = ( batch size , 30 , 8 , 8), CIFAR10 :  result = ( batch size , 30 , 10 , 10)
            net.add(
                gluon.nn.MaxPool2D(pool_size=(2, 2), strides=(2, 2))
            )  # MNIST : result = (batch size , 30 , 4 , 4) , CIFAR10 : result = (batch size , 30 , 5 , 5)
            net.add(
                gluon.nn.Dense(units=120,
                               activation="sigmoid",
                               use_bias=True,
                               flatten=True))
            net.add(gluon.nn.Dropout(0.2))
            net.add(
                gluon.nn.Dense(units=64, activation="sigmoid", use_bias=True))
            net.add(gluon.nn.Dropout(0.2))
            net.add(gluon.nn.Dense(10, use_bias=True))

    net.hybridize()  # hybridize!!!! for faster learning - only for hybrid

    #weights initialization
    if os.path.exists(path):
        print("loading weights")
        net.load_params(filename=path, ctx=ctx)  # weights load
    else:
        print("initializing weights")
        net.collect_params().initialize(mx.init.Normal(sigma=0.1),
                                        ctx=ctx)  # weights initialization
        #net.initialize(mx.init.Normal(sigma=0.1),ctx=ctx) # weights initialization

    #optimizer
    trainer = gluon.Trainer(net.collect_params(), optimizer,
                            {"learning_rate": learning_rate})

    #learning
    for i in tqdm(range(1, epoch + 1, 1)):
        for data, label in train_data:

            data = data.as_in_context(ctx)
            label = label.as_in_context(ctx)

            with autograd.record(train_mode=True):
                output = net(data)

                #loss definition
                '''Why do you write this?
                answer :  Blocks, sequential, softmaxCrossEntropyLoss, and other gluon package keywords should be accessed as classes by default.'''
                loss = gluon.loss.SoftmaxCrossEntropyLoss(axis=-1,
                                                          sparse_label=True)(
                                                              output, label)
                cost = nd.mean(loss).asscalar()
            loss.backward()
            trainer.step(batch_size, ignore_stale_grad=True)

        print(" epoch : {} , last batch cost : {}".format(i, cost))

        #weight_save
        if i % save_period == 0:

            if not os.path.exists("weights"):
                os.makedirs("weights")

            print("saving weights")
            if dataset == "MNIST":
                net.save_params("weights/MNIST-{}.params".format(i))

            if dataset == "FashionMNIST":
                net.save_params("weights/FashionMNIST-{}.params".format(i))

            elif dataset == "CIFAR10":
                net.save_params("weights/CIFAR10-{}.params".format(i))

    test_accuracy = evaluate_accuracy(test_data, net, ctx)
    print("Test_acc : {}".format(test_accuracy))

    return "optimization completed"
Example #59
0
            pixel_size=params.pixel_size,
            width=params.width,
            array_noise_level=params.array_noise_level,
            array_noise_seed=seeds_for_noise[CESnumber],
            mapping_perpair=True)

        ## Initialise map containers for each processor
        if pos_CES == 0:
            sky_out_tot = OutputSkyMap(projection=tod.projection,
                                       nside=tod.nside_out,
                                       obspix=tod.obspix,
                                       npixsky=tod.npixsky,
                                       pixel_size=tod.pixel_size,
                                       demodulation=True)

        for pair in tqdm(tod.pair_list):
            ## Demodulated TS
            d_demod = np.array([tod.map2tod(det) for det in pair])
            d_demod = tod.demodulate_timestreams(d_demod)
            tod.tod2map(d_demod, sky_out_tot)

    MPI.COMM_WORLD.barrier()

    ## Coaddition over all processors.
    ## Note that all processors will then have the coadded data.
    ## If you want informations at the level of each CES (or group of),
    ## use instead:
    ## final_map = OutputSkyMap(nside=nside_out, obspix=tod.obspix)
    ## final_map.coadd_MPI(sky_out_tot, MPI=MPI)
    sky_out_tot.coadd_MPI(sky_out_tot, MPI=MPI)
Example #60
0
# To evaluate on the validation set:
#model.evaluate_generator(val_gen, steps=num_val_images // batch_size, workers=8)

#------------------------PREDICTIONS--------------------------------------------------#

from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator

submission_df = pd.read_csv(data_dir + 'sample_submission.csv')
submission_df.head()

test_datagen = ImageDataGenerator()
data = bson.decode_file_iter(open(test_bson_path, 'rb'))

with tqdm(total=num_test_products) as pbar:
    for c, d in enumerate(data):
        product_id = d['_id']
        num_imgs = len(d['imgs'])

        batch_x = np.zeros((num_imgs, 180, 180, 3), dtype=K.floatx())

        for i in range(num_imgs):
            bson_img = d['imgs'][i]['picture']

            #Load and preprocess the image
            img = load_img(io.BytesIO(bson_img), target_size=(180, 180))
            x = img_to_array(img)
            x = test_datagen.random_transform(x)
            x = test_datagen.standardize(x)