Beispiel #1
0
def il_max_avg_obstacle_clearance():
    name = f'samples/{inspect.currentframe().f_code.co_name}.pkl'
    sampler.sample(N=10000, alpha=0.1, prior_file='samples/il_prior.pkl', N_sigma=1000,
                   behavior_func=behavior.obstacle_clearance,
                   env=environment.RBF2dGymEnv(use_lidar=True), env_kernel=kernel.RBF2dEnvKernelNormal(),
                   controller=controller.ILController(), controller_kernel=kernel.TransitionKernel(),
                   target_type='maximal', save=name)
Beispiel #2
0
def rrt_max_avg_obstacle_clearance(prior_file='samples/rrt_prior.pkl'):
    name = f'samples/{inspect.currentframe().f_code.co_name}.pkl'
    sampler.sample(N=10000, alpha=0.1, prior_file=prior_file, N_sigma=1000,
                   behavior_func=behavior.obstacle_clearance,
                   env=environment.RBF2dGymEnv(use_lidar=False), env_kernel=kernel.RBF2dEnvKernelNormal(),
                   controller=controller.RRTController(), controller_kernel=kernel.RRTKernelNormal([-1, -1], [1, 1]),
                   target_type='maximal', save=name)
Beispiel #3
0
    def test_sample(self):
        settings.DJANGO_SAMPLER_USE_COST = True
        sampler.sample('sql', 'SELECT 1', 1, [])

        query = models.Query.objects.get()
        self.assertEquals('sql', query.query_type)
        self.assertEquals('SELECT 1', query.query)
Beispiel #4
0
def il_min_legibility(prior_file='samples/il_prior.pkl'):
    name = f'samples/{inspect.currentframe().f_code.co_name}.pkl'
    sampler.sample(N=10000, alpha=0.1, prior_file=prior_file, N_sigma=1000,
                   behavior_func=behavior.neg_behavior(behavior.legibility),
                   env=environment.RBF2dGymEnv(use_lidar=True), env_kernel=kernel.RBF2dEnvKernelNormal(),
                   controller=controller.ILController(), controller_kernel=kernel.TransitionKernel(),
                   target_type='maximal', save=name)
Beispiel #5
0
def rrt_min_abs_jerkiness():
    name = f'samples/{inspect.currentframe().f_code.co_name}.pkl'
    sampler.sample(N=10000, alpha=0.1, prior_file='samples/rrt_prior.pkl', N_sigma=1000,
                   env=environment.RBF2dGymEnv(use_lidar=False), controller=controller.RRTController(),
                   behavior_func=behavior.min_jerkiness, env_kernel=kernel.RBF2dEnvKernelNormal(),
                   controller_kernel=kernel.RRTKernelNormal([-1, -1], [1, 1]),
                   target_type='match', target_behavior=0, save=name)
Beispiel #6
0
def create_sample_deltas(sample_size=20000):
    ''' 
    This function read the delta data and save a sample of it
    '''
    nodelta_file = "/mnt/h/FeatureData/all_nodelta_feature_data.csv"
    nodelta_sample_file = "../data/sampled_nodelta_feature_data.csv"
    sampler.sample(nodelta_file, nodelta_sample_file, sample_size)
Beispiel #7
0
def rrt_min_legibility():
    name = f'samples/{inspect.currentframe().f_code.co_name}.pkl'
    sampler.sample(N=10000, alpha=0.1, prior_file='samples/rrt_prior.pkl', N_sigma=1000,
                   behavior_func=behavior.neg_behavior(behavior.legibility),
                   env=environment.RBF2dGymEnv(use_lidar=False), env_kernel=kernel.RBF2dEnvKernelNormal(),
                   controller=controller.RRTController(), controller_kernel=kernel.RRTKernelNormal([-1, -1], [1, 1]),
                   target_type='maximal', save=name)
Beispiel #8
0
def il_min_abs_jerkiness():
    name = f'samples/{inspect.currentframe().f_code.co_name}.pkl'
    sampler.sample(N=10000, alpha=0.1, prior_file='samples/il_prior.pkl', N_sigma=1000,
                   behavior_func=behavior.min_jerkiness,
                   env=environment.RBF2dGymEnv(use_lidar=True), env_kernel=kernel.RBF2dEnvKernelNormal(),
                   controller=controller.ILController(), controller_kernel=kernel.TransitionKernel(),
                   target_type='match', target_behavior=0, save=name)
Beispiel #9
0
def sample_preview():
    if len(sys.argv) < 3:
        print(
            'Please specify the path to the WikiNews pages-articles XML to sample from.'
        )
        return

    sampler.sample(sys.argv[2], preview=True)
Beispiel #10
0
def ds_min_avg_obstacle_clearance():
    name = f'samples/{inspect.currentframe().f_code.co_name}.pkl'
    sampler.sample(N=10000, alpha=0.1, prior_file='samples/ds_prior.pkl', N_sigma=1000,
                   behavior_func=behavior.obstacle_clearance,
                   env=environment.RBF2dGymEnv(time_limit=500, oob_termination=False, use_lidar=False),
                   env_kernel=kernel.RBF2dEnvKernelNormal(),
                   controller=controller.DSController(), controller_kernel=kernel.TransitionKernel(),
                   target_type='match', target_behavior=0, save=name)
def _find_FC(var_type,
             sd,
             pauroc_goal,
             start_fc,
             test,
             max_iter=500,
             precision=0.003,
             scale=0.05):
    """
    gradient descent based method for finding the smallest fold change, given the standard deviation of
    the sample noise, that produces the desired partial AUROC score

    :param var_type:       either "uniform", "gamma" or "trend". describes how variance is sampled
    :param sd:             mean standard deviation of sample noise
    :param pauroc_goal:    desired partial AUROC value
    :param start_fc:       where to start looking
    :param test:           statistical tests that has the shape test(ctrl, exp) and returns an array of p-values
    :param max_iter:       maximum number of iterations
    :param precision:      how close the pAUROC with the estimated fold change should be to the desired
                           pAUROC
    :param scale:          multiply the step size by
    :return:               approximate fold change that produces the the desired pAUROC score given the SD
                           None if max_iter is exceeded
    """

    fc = start_fc
    if var_type == "uniform":
        simulator = lambda fold_change: sample(
            var_type, pep_var=sd**2, fold_change=fold_change)
    else:
        simulator = lambda fold_change: sample(
            var_type, beta=(ALPHA - 1) * sd**2, fold_change=fold_change)

    for x in range(max_iter):
        # simulate data
        ctrl, exp, is_changed = simulator(fc)
        p_vals = test(ctrl, exp)

        # calculate pauroc
        predicted = -np.log(p_vals)
        fpr, tpr, _ = roc_curve(is_changed, predicted)
        try:
            idx = next(i for i, v in enumerate(fpr) if v > FDR)
        except StopIteration:
            idx = len(fpr) - 1

        t_fpr, t_tpr = fpr[:idx + 1], tpr[:idx + 1]
        t_fpr[-1] = FDR

        pauroc = auc(t_fpr, t_tpr) / FDR

        if (abs(pauroc_goal - pauroc) < precision):
            return fc

        # update fold change
        fc += scale * (0.75 - pauroc)
Beispiel #12
0
def density_scatter(ctrl_data):
    """
    Scatterplot of variance vs log2 mean intensity
    used for figure 1ABCD

    :param ctrl_data: pd.DataFrame containing control data
    :return:          plot
    """

    matplotlib.rc("font", size=15)
    f, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, figsize=(15, 5))

    def do_plot(data, ax):
        means = np.mean(data.values, axis=1)
        vars = np.var(data.values, axis=1)
        plot_density_scatter(means, vars, ax)

    # Real
    rs = np.random.choice(ctrl_data.shape[0], NUM_PEPTIDES, replace=False)
    slct_data = ctrl_data.iloc[rs]

    # Simulated
    # only NUM_ROWS avg_ctrl data provided -> will be based on real data (reordered but none omitted)
    ctrl_u, _, _ = sample("uniform", num_changed=0)
    ctrl_g, _, _ = sample("gamma", num_changed=0)
    ctrl_t, _, _ = sample("trend", num_changed=0)

    do_plot(ctrl_u, ax1)
    do_plot(ctrl_g, ax2)
    do_plot(ctrl_t, ax3)
    do_plot(slct_data, ax4)

    ax1.set_ylabel("Peptide variance")
    ax1.set_title("Uniform variance", fontsize=15)
    ax2.set_title("Inverse gamma variance", fontsize=15)
    ax3.set_title("Intensity dependent \ninverse gamma variance", fontsize=15)
    ax4.set_title("Empirical Data", fontsize=15)
    ax1.set_xlabel("Mean $log2$ peptide intensity")
    ax2.set_xlabel("Mean $log2$ peptide intensity")
    ax3.set_xlabel("Mean $log2$ peptide intensity")
    ax4.set_xlabel("Mean $log2$ peptide intensity")

    f.tight_layout()
    f.subplots_adjust(right=0.94)

    # Legend
    make_colorbar(ax4)

    return f
Beispiel #13
0
def triangulate(path):
    mesh = sampler.sample(path)

    print('>> mesh samples with chew93_Surface')
    for f_index in range(mesh.number_of_faces()):
        global dihedral_min, dihedral_max
        dihedral_max = 0.0
        dihedral_min = np.pi

        print('face', f_index + 1, 'of', mesh.number_of_faces(), '. . .')
        face_mesh = mesh.face_meshes[f_index]
        meta_block = mesh.meta_blocks[f_index]
        meta_block[MeshkD.MM_PRIO] = PRIORITY_FACTOR
        meta_block[MeshkD.MM_SIZE] = USE_SIZE_TEST
        meta_block[MeshkD.MM_ADMS] = APPROX_DIST_MULTI_SAMPLING
        meta_block[MeshkD.MV_ADTH] = DISTANCE_THRESHOLD
        chew93_Surface(face_mesh, meta_block)
        print('\rrefining mesh - done (after', meta_block[MeshkD.NV_REFI],
              'iterations)')
        if meta_block[MeshkD.NV_REFI] > 0:
            print('dihedral range: (',
                  np.rad2deg(dihedral_min),
                  ', ',
                  np.rad2deg(dihedral_max),
                  ')',
                  sep='')

    mesh.reset_bounding_boxes()

    return mesh
def err_bars_peptide_roc(var_type, **kwargs):
    """
    Runs multiple rounds of simulations
    Summarizes overall ROC scores

    :param var_type:  either "uniform", "gamma" or "trend". describes how variance is sampled
    :param kwargs:    optional arguments to be passed to sampler
    :return:          numpy.ndarray (N_RUNS x number of tests x 3), arr[i][j] = (AUC, PRC, pAUC)
    """

    start = time.time()
    res = np.zeros((N_RUNS, len(TESTS), 3), dtype=np.float32)

    for i in xrange(N_RUNS):
        if i % 50 == 0:
            print "At iteration %d" % i
        ctrl, exp, is_changed = sample(var_type, **kwargs)
        p_vals = do_all_tests(ctrl, exp)
        res[i, :, 0], res[i, :,
                          1], res[i, :,
                                  2] = roc_prc_scores(is_changed, p_vals)

    end = time.time()
    print end - start

    return res
def err_bars_peptide_fdr(var_type, **kwargs):
    """
    Runs multiple rounds of simulations
    Summarizes overall FDR scores

    :param var_type:  either "uniform", "gamma" or "trend". describes how variance is sampled
    :param kwargs:    optional arguments to be passed to sampler
    :return:          numpy.ndarray (N_RUNS x number of tests x 4), arr[i][j] = (FP_raw, TP_raw, FP_adj, TP_adj)
    """

    start = time.time()
    res = np.zeros((N_RUNS, len(TESTS), 4), dtype=np.float32)

    for i in xrange(N_RUNS):
        if i % 50 == 0:
            print "At iteration %d" % i
        ctrl, exp, is_changed = sample(var_type, **kwargs)
        p_vals = do_all_tests(ctrl, exp)
        res[i, :,
            0], res[i, :,
                    1], res[i, :,
                            2], res[i, :,
                                    3] = power_analysis(is_changed, p_vals)
        # avoid memory filling up with unused variables
        del ctrl, exp, is_changed

    end = time.time()
    print end - start

    return res
Beispiel #16
0
	def test_2_sample_ten(self):
		"Test size after 1 sample() run"
		print()
		mon_crit = 'corpusName'
		# (allow +/- 1 result length for roundup error)
		hit_index = sampler.sample(10, [mon_crit])
		self.assertTrue(len(hit_index) <= 11)
		self.assertTrue(len(hit_index) >= 9)
Beispiel #17
0
 def get(self, page=None):
     if page in METALS:
         point = sample()
         level = point[METALS.index(page)]
         level = int((level * 20) + 80) / 100.0  # .8 - 1.
         log.info("%s: %s" % (page, level))
         return self.text(str(level))
         # return self.text(str(1.0))
     return self.text("ANIMAS: %s" % (METALS, ))
Beispiel #18
0
 def get(self, page=None):
     if page in METALS:
         point = sample()
         level = point[METALS.index(page)]
         level = int((level * 20) + 80) / 100.0  # .8 - 1.
         log.info("%s: %s" % (page, level))
         return self.text(str(level))
         # return self.text(str(1.0))
     return self.text("ANIMAS: %s" % (METALS,))
Beispiel #19
0
def run_LSPI():
    epsilon = 0.1  #convergence criterion, should this be dynamic?
    iteration = 0
    max_iterations = 100
    distance = float('Inf')
    first_time = True
    pi = []
    distances = []

    sample_n = 10  # num of episodes to simulate

    while iteration < max_iterations and distance > epsilon:
        samples = sampler.sample(
            sample_n, pi
        )  # get samples from simulation (need to sample from these according to prob. dist.?)

        phi = basis.calculate_bases(samples)
        k = np.size(
            phi, 1)  #dimensions of basis phi (needs to be altered for matrix)

        if first_time:
            pi = policy.zero_policy(
                k)  # initial policy with initial weights zero

        old_pi = copy.copy(pi)  #old weights of pi

        LSQ.calc_weights(
            samples, k, phi, pi
        )  ## Least squares approximation of Q function, calculate and set new weights for pi based on samples

        l1 = len(old_pi.weights)
        l2 = len(pi.weights)

        #compare weights of old_pi and new weights of pi (based on new samples)
        if l1 == l2:
            diff = old_pi.weights - pi.weights
            Linf_norm = np.linalg.norm(diff, np.inf)
            L2_norm = np.linalg.norm(diff)
        else:
            Linf_norm = np.absolute(
                np.linalg.norm(old_pi.weights, inf) -
                np.linalg.norm(pi.weights, inf))
            L2_norm = np.absolute(
                np.linalg.norm(old_pi.weights) - np.linalg.norm(pi.weights))

        distance = L2_norm  #print Linf_norm

        distances.append(distance)

        print("Distance between weights is " + str(distance) +
              " at iteration " + str(iteration))

        iteration += 1
        first_time = False

    return pi, distances
Beispiel #20
0
	def test_3_query_trace(self):
		"Check if we kept coherent trace of query 'crit:val'"
		print()
		mon_crit = 'corpusName'
		hit_index = sampler.sample(1, [mon_crit])
		# read index[some_id]['_q'] 
		id0 = next(iter(hit_index))
		first_query = hit_index[id0]['_q']
		left_side = first_query.split(':')[0]
		self.assertEqual(left_side, mon_crit)
Beispiel #21
0
def plot_example_roc_curves(var_type):
    """
    Figure 1EFG, example curves

    :param var_type:  type of distribution to use to sample peptide variance, either "uniform", "gamma" or "trend"
    :return:          plot
    """
    matplotlib.rc("font", size=11)
    ctrl, exp, is_changed = sample(var_type)
    pvals = do_all_tests(ctrl, exp)
    return plot_roc_curves(is_changed, pvals)
Beispiel #22
0
def rrt_max_illegibility():
    name = f'samples/{inspect.currentframe().f_code.co_name}.pkl'
    samples = sample(N=10000,
                     alpha=0.1,
                     prior_file='samples/rrt_prior.pkl',
                     N_sigma=1000,
                     behavior_func=behavior.illegibility_behavior,
                     env=PandaEnv(),
                     env_kernel=ReachingEnvKernelNormal(),
                     controller=RRTController(),
                     controller_kernel=RRTKernelNormal(),
                     target_type='maximal',
                     save=name)
Beispiel #23
0
def volcano_multipanel_example(var_type):
    """
    Generate panel comparing volcano plots
    Compare uniform and inverse gamma
    Corresponds to Figure 4B of manuscript

    :param var_type:  type of distribution to use to sample peptide variance, either "uniform", "gamma" or "trend"
    :return:          plot
    """
    matplotlib.rc("font", size=20)
    ctrl, exp, is_changed = sample(var_type)
    pvals = do_all_tests(ctrl, exp)

    return volcano_multipanel(pvals, ctrl, exp, is_changed)
Beispiel #24
0
def mesher(path):
    mesh = sampler.sample(path)

    print('>> mesh samples with chew93_Surface')
    for face_index in range(mesh.number_of_faces()):
        print('face', face_index + 1, 'of', mesh.number_of_faces(), '. . .')
        vertices, wire_meshes, triangles, _ = mesh.face_meshes[face_index]

        triangles.clear()
        triangles += chew93_Surface(vertices, wire_meshes)

    mesh.reset_bounding_boxes()

    return mesh
Beispiel #25
0
def ds_improved_min_end_distance():
    name = f'samples/{inspect.currentframe().f_code.co_name}.pkl'
    samples = sample(N=10000,
                     alpha=0.1,
                     prior_file='samples/ds_improved_prior.pkl',
                     N_sigma=1000,
                     behavior_func=behavior.ee_distance_behavior,
                     env=PandaEnv(),
                     env_kernel=ReachingEnvKernelNormal(),
                     controller=DSController(typ='improved'),
                     controller_kernel=TransitionKernel(),
                     target_type='match',
                     target_behavior=0,
                     save=name)
Beispiel #26
0
def rrt_min_ee_distance():
    name = f'samples/{inspect.currentframe().f_code.co_name}.pkl'
    samples = sample(N=10000,
                     alpha=0.1,
                     prior_file='samples/rrt_prior.pkl',
                     N_sigma=1000,
                     behavior_func=behavior.ee_distance_behavior,
                     env=PandaEnv(),
                     env_kernel=ReachingEnvKernelNormal(),
                     controller=RRTController(),
                     controller_kernel=RRTKernelNormal(),
                     target_type='match',
                     target_behavior=0,
                     save=name)
Beispiel #27
0
def barplot_multipanel(var_type):
    """
    Compute FP, TP, FN, TN for raw and adj p-values
    Corresponds to Figure 4C in manuscript

    :param var_type:  type of distribution to use to sample peptide variance, either "uniform", "gamma" or "trend"
    :return:          plot
    """
    matplotlib.rc("font", size=15)
    f, axarr = plt.subplots(2, 2, sharey="row", sharex=True, figsize=(5, 7))

    ctrl, exp, is_changed = sample(var_type)
    pvals = do_all_tests(ctrl, exp)
    pvals.drop("fold change", axis=1, inplace=True)

    # Edit column titles for pretty printing
    pvals.columns = [
        (LABEL_MAPPING[l] if l in LABEL_MAPPING else l).replace('\n', ' ')
        for l in list(pvals.columns)
    ]

    # Adjust pvals
    pvals_a = pd.DataFrame.from_dict(
        OrderedDict([(col, multipletests(pvals[col], 0.05, method="fdr_bh")[1])
                     for col in pvals.columns]))

    # Plot
    barplot_accuracy(pvals, is_changed, axarr[0][0], axarr[1][0])
    barplot_accuracy(pvals_a, is_changed, axarr[0][1], axarr[1][1])
    axarr[1][0].set_ylabel("Count")
    axarr[0][0].set_title("Raw p-values")
    axarr[0][1].set_title("BH adjusted")

    f.tight_layout()

    # Resize ax
    for ax in axarr[0]:
        box = ax.get_position()
        ax.set_position([box.x0, box.y0, box.width, box.height * 0.5])

    # Add legend
    handles, labels = axarr[0][1].get_legend_handles_labels()
    plt.figlegend(handles, labels, loc="upper center", ncol=2)

    return f
Beispiel #28
0
def main(n, Jth):
    # V = set([xinit])
    V = set()

    print("Sampling...")
    COST = dict()
    V |= sample(n, (0., 0., 0.), XDIM, YDIM, ZDIM, COST, ax=None)

    # sample_pairs: a set with elements (state0, state1)
    Npair = int(n*(n-1)/20)
    sample_pairs = sample_data(V, Npair)

    ## --- COST roadmap --- ##

    # create a dictionary COST with entries:  (state0, state1) : (opt_cost, opt_time) 
    print("Constructing (sampled) COST roadmap...")
    for p in sample_pairs:
        tau = find_tau(*p)
        J_opt = Jstar(*p, tau)
        COST[p] = (J_opt, tau)

    # create svm.SVC object which classifies pairs of states with C and Jth
    classifier = trainsvm(COST, C, Jth)

    # add every possible edge to COST
    print("Completing COST roadmap...")
    sys.setrecursionlimit(2*n*n)
    N = len(V)*(len(V)-1)
    i = 1
    for u in V:
        for v in V - set([u]):
            if i % 1000 == 1:
                print("  Processing {} / {}".format(i, N))
            cost(u, v, COST)  # Computes and stores cost in COST
            if i % 50000 == 0:
                save_object(COST, 'COST_full_n{}_J{}.pkl'.format(int(n), int(Jth)))
            i += 1

    V = precompute_reachable_sets(V, COST, classifier, Jth)

    print("Saving objects.")
    save_object(V, 'V_n{}_J{}.pkl'.format(int(n), int(Jth)))
    save_object(COST, 'COST_full_n{}_J{}.pkl'.format(int(n), int(Jth)))
    save_object(classifier, 'classifier{}node_J{}.pkl'.format(n, int(Jth)))
    print("Success! n: {}, Jth: {}".format(n, int(Jth)))
Beispiel #29
0
    def batchProcess(self, batch, showSeg):
        self.lm.cleargrads()

        # reduce
        for b in batch:
            self.ds.reduceSeg(b)

        # inVoc and V
        V = len([w for w in self.ds.cObs.uni if self.ds.cObs.uni[w] > 0])
        inVoc = self.ds.getInVoc(self.samplingSizeK, mode='uniform')

        # set ngram in batch
        sampler.setNgram([self.ds.idData[b][1:-1] for b in batch], self.lm,
                         self.ds, inVoc)

        for b in batch:
            if showSeg:
                print('red<', '_'.join(self.ds.getSegedLine(b)))

            self.ds.segData[b] = sampler.sample(self.ds.idData[b][1:-1],
                                                self.lm, self.ds)

            if showSeg:
                print('add>', '_'.join(self.ds.getSegedLine(b)))

        # add
        for b in batch:
            self.ds.addSeg(b)

        # recalc inVoc and V
        V = len([w for w in self.ds.cObs.uni if self.ds.cObs.uni[w] > 0])

        # calc loss
        segedLines = [
            module.segmentIdLine(self.ds.idData[b],
                                 [1] + self.ds.segData[b] + [1]) for b in batch
        ]
        # BOS Padding
        segedLines = [[segedLine[0] for _ in range(n - 1)] + segedLine
                      for segedLine in segedLines]

        loss = self.lm.getLoss(segedLines, self.ds.getInVoc())
        loss.backward()
        self.opt.update()
Beispiel #30
0
def pvalue_multipanel():
    """
    Generate panel comparing p-value distributions
    Compare uniform, inverse gamma and trend
    Corresponds to Figure 2BDF of manuscript

    :return:  plot
    """
    matplotlib.rc("font", size=15)
    f, axarr = plt.subplots(3,
                            len(TESTS) + 1,
                            sharex="col",
                            sharey="row",
                            figsize=(20, 15))

    for i, var_type in enumerate(["uniform", "gamma", "trend"]):
        ctrl, exp, _ = sample(var_type, num_changed=0)
        pvals = do_all_tests(ctrl, exp)
        pvals.drop("fold change", axis=1, inplace=True)

        # Edit column titles for pretty printing
        pvals.columns = [
            LABEL_MAPPING[l] if l in LABEL_MAPPING else l
            for l in list(pvals.columns)
        ]
        pvals["ModT \n(2-s., robust)"] = modT(ctrl,
                                              exp,
                                              robust=True,
                                              run_2sample=True)
        pvals["ModT \n(1-s., robust)"] = modT(ctrl, exp, robust=True)
        plot_pvalue_dist(pvals, axarr[i])
        for ax in axarr[i]:
            ax.set_xlabel("" if i < 2 else "p-value")

    f.tight_layout()
    plt.subplots_adjust(hspace=0.4)

    return f
Beispiel #31
0
def main(args):
    # If args['hetero'] is True, g would be a heterogeneous graph.
    # Otherwise, it will be a list of homogeneous graphs.
    g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \
    val_mask, test_mask = load_data(args['dataset'])

    if hasattr(torch, 'BoolTensor'):
        train_mask = train_mask.bool()
        val_mask = val_mask.bool()
        test_mask = test_mask.bool()

    # features = features.to(args['device'])
    features = [f.to(args['device']) for f in features]
    labels = labels.to(args['device'])
    train_mask = train_mask.to(args['device'])
    val_mask = val_mask.to(args['device'])
    test_mask = test_mask.to(args['device'])

    if args['hetero']:
        from model_hetero import SS_HAN
        model = SS_HAN(muti_meta_paths=
                    [[['pa', 'ap'], ['pf', 'fp']],
                    [['ap', 'pa']],
                    [['fp', 'pf']]],
                    in_size=features[0].shape[1],
                    hidden_size=args['hidden_units'],
                    out_size=num_classes,
                    num_heads=args['num_heads'],
                    dropout=args['dropout']).to(args['device'])

        g = g.to(args['device'])
    else:
        from model import HAN
        model = HAN(num_meta_paths=len(g),
                    in_size=features.shape[1],
                    hidden_size=args['hidden_units'],
                    out_size=num_classes,
                    num_heads=args['num_heads'],
                    dropout=args['dropout']).to(args['device'])
        g = [graph.to(args['device']) for graph in g]

    stopper = EarlyStopping(patience=args['patience'])
    # loss_fcn = F.binary_cross_entropy_with_logits
    loss_fcn = torch.nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=args['lr'],
                                 weight_decay=args['weight_decay'])
    # lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.95)

    print('*****************************Pre-training Starting*************************************')
    for epoch in range(args['pretrain_epochs']):
        model.train()

        for idx in range(args['batch_size']):
            embeddings = model(g, features)
            pos_edge_index, neg_edge_index = sample(g, 1)
            link_logits = model.calculate_loss(embeddings, pos_edge_index, neg_edge_index)
            link_labels = get_link_labels(pos_edge_index, neg_edge_index)
            loss = loss_fcn(link_logits, link_labels)
            link_probs = link_logits.sigmoid().detach().numpy()
            acc = roc_auc_score(link_labels, link_probs)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # print('link_labels : {}'.format(link_labels))
            # print('link_probs : {}'.format(link_probs))
            print('epoch: {} || batch_size : {} || loss: {} || accuracy: {}'.format(epoch, idx, loss, acc))
        # lr_scheduler.step()
        early_stop = stopper.step(model, epoch, loss.item(), acc)
        if early_stop:
            break
    filename = './model/ss-han_{}_{:02f}_{:02f}'.format(epoch, loss, acc)
    torch.save(model.state_dict(), filename)

    print('*****************************Pre-training Ending*************************************')
    print('\n')
    print('*****************************Fine-tuning Starting*************************************')

    # freeze the pretrained parameter
    for parms in model.parameters():
        parms.requires_grad = False

    from model_hetero import Classifier
    classifier = Classifier(in_size=args['hidden_units']*args['num_heads'][-1],
                            hidden_size=128,
                            out_size=num_classes)

    loss_fcn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(classifier.parameters(), lr=args['lr'],
                                 weight_decay=args['weight_decay'])

    for epoch in range(args['fine-tuning_epochs']):
        model.train()

        embeddings = model(g, features)
        output = classifier(embeddings[0])
        loss = loss_fcn(output[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_acc, train_micro_f1, train_macro_f1 = score(output[train_mask], labels[train_mask])
        val_loss, val_acc, val_micro_f1, val_macro_f1 \
            = evaluate(model, classifier, g, features, labels, val_mask, loss_fcn)
        print('Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | '
              'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'.format(
            epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1))

    print('*****************************Fine-tuning Ending*************************************')

    test_loss, test_acc, test_micro_f1, test_macro_f1 \
        = evaluate(model, classifier, g, features, labels, val_mask, loss_fcn)
    print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'.format(
        test_loss.item(), test_micro_f1, test_macro_f1))
Beispiel #32
0
def sample_preview():
    if len(sys.argv) < 3:
        print('Please specify the path to the WikiNews pages-articles XML to sample from.')
        return

    sampler.sample(sys.argv[2], preview=True)
Beispiel #33
0
import entropy 
import os
import sys

def postfn(params):
#    print('postfn, params:',params) 
    entropy.loadCosts('../data/costMatrixSea.csv')
    sites = '../data/cities_weights.csv'

    experiment = entropy.Experiment(0, params[0], params[1], params[2])
    simSites = entropy.runEntropy(experiment, sites, False)
    return simSites

sites = '../data/cities_weights.csv'
data = entropy.loadHistoricalSites(sites)

eps = threshold.LinearEps(15, 200, 150)
priors = sampler.TophatPrior([0,0,0],[2,2,10])

sampler = sampler.Sampler(N=200, Y=data, postfn=postfn, dist=entropy.distRelative, threads=16)

for pool in sampler.sample(priors, eps):
    print("T: {0}, eps: {1:>.4f}, ratio: {2:>.4f}".format(pool.t, pool.eps, pool.ratio))
    for i, (mean, std) in enumerate(zip(np.mean(pool.thetas, axis=0), np.std(pool.thetas, axis=0))):
        print(u"    theta[{0}]: {1:>.4f} \u00B1 {2:>.4f}".format(i, mean,std))
    np.savetxt("result_"+str('%.2f')%pool.eps+'.csv', pool.thetas, delimiter=";", fmt='%1.5f')

print(pool.thetas)
np.savetxt("foo.csv", pool.thetas, delimiter=";", fmt='%1.5f')

Beispiel #34
0
def generate0(charset, policy, length, f):
    n = len(charset)
    while 1:
        password = ''.join(charset[sample(n, f)] for _ in xrange(length))
        if policy is None or policy.accept(password):
            return password