def calculate_random_walk_features(graph, train_nodes, test_nodes,
                                   train_targets, test_targets):
    adjacency = nx.adjacency_matrix(graph, nodelist=graph.nodes(),
                                    weight=None).toarray()
    indices = {x: i for i, x in enumerate(list(g.nodes()))}
    train_indices = [indices[name] for name in train_nodes]
    test_indices = [indices[name] for name in test_nodes]

    train_targets_7xM = train_targets.reshape(7, -1)
    train_new_features = np.zeros(train_targets.shape)
    for i, train_node in enumerate(train_indices):
        probabilities = random_walk(adjacency, train_node)
        probabilities_Mx1 = probabilities[train_indices].reshape(-1, 1)
        probabilities_Mx1[train_node] = 0
        multiplication = np.matmul(train_targets_7xM, probabilities_Mx1)
        train_new_features[i, :] = multiplication.ravel()

    test_new_features = np.zeros(test_targets.shape)
    for i, test_node in enumerate(test_indices):
        probabilities = random_walk(adjacency, test_node)
        probabilities_Mx1 = probabilities[train_indices].reshape(-1, 1)
        multiplication = np.matmul(train_targets_7xM, probabilities_Mx1)
        train_new_features[i, :] = multiplication.ravel()

    return train_new_features, test_new_features
def classification_by_random_walk(graph, train_nodes, test_nodes,
                                  train_targets, test_targets):
    """Classification using only the values from Random Walk"""
    adj = nx.adjacency_matrix(graph, nodelist=graph.nodes(),
                              weight=None).toarray()
    indices = {x: i for i, x in enumerate(list(graph.nodes()))}

    train_ind = [indices[node] for node in train_nodes]
    test_int = [indices[node] for node in test_nodes]
    predictions = list()

    tt_reshaped = train_targets.reshape(7, -1)
    for indice, node in zip(range(813), test_nodes):
        prediction = random_walk(adj, indice)
        reshaped = prediction[train_ind].reshape(-1, 1)
        multiplied = np.matmul(tt_reshaped, reshaped)
        predicted = np.argmax(multiplied)
        formated = np.zeros(7)
        formated[predicted] = 1
        predictions.append(formated)

    predictions = np.array(predictions)
    calculate_metrics(test_targets, predictions)
    return
    pass
Exemple #3
0
def createTrainingSet(n_sequences):
    world = random_walk(N_STATES)
    x_train = []
    y_train = []
    for i in range(n_sequences):
        episode = world.generateEpisode()
        x, y = convertEpisodeToStateRep(episode)
        x_train.append(x)
        y_train.append(y)
    return x_train, y_train, world
Exemple #4
0
def calculate_random_walk_features(graph, train_nodes, test_nodes,
                                   train_targets, test_targets):
    #classify based on the word vector of the tittle
    adj_matrix = nx.adjacency_matrix(graph, nx.nodes(graph)).toarray()
    p = random_walk(adj_matrix, 1)
    p_train_nodes = p[0:len(train_nodes)].reshape(-1, 1)
    train_targets = np.matrix(train_targets.transpose())
    # print(p_train_nodes.reshape(-1,1))
    result = train_targets.__mul__(p_train_nodes)
    return result
Exemple #5
0
train_weight = train_weight[train_mask]
test_mask = (test_weight >= filter_num)
test_data = test_data[test_mask]
test_weight = test_weight[test_mask]

dict_data = np.concatenate([train_data, test_data])
dict_weight = np.concatenate([train_weight, test_weight])


# At this stage, the index still starts from zero

node_list = np.arange(num_list[-1]).astype('int')
if args.walk == 'hyper':
	walk_path = random_walk_hyper(args, node_list, train_data)
else:
	walk_path = random_walk(args, num, train_data)
del node_list

# Add 1 for the padding index
print("adding pad idx")
dict_data = add_padding_idx(dict_data)
train_data = add_padding_idx(train_data)
test_data = add_padding_idx(test_data)

# Note that, no matter how many node types are here, make sure the
# hyperedge (N1,N2,N3,...) has id, N1 < N2 < N3...

compress = True
# Note that, no matter how many node types are here, make sure the
# hyperedge (N1,N2,N3,...) has id, N1 < N2 < N3...
if not dynamic_dict:
Exemple #6
0
def calc(sign, lock ):
	for indx in range(iter_num):
	    lst_motif = [0]*112
	    w=rdmW.random_walk(step_num+1000, G, 50)
	    i=1000

	    while i<step_num+1000:	
		    if len(list(set(w[(i-2):i+1])))==3: 
		    	m=rdm.randint(0,G.degree(w[i-1])+G.degree(w[i])) 
		    if m<G.degree(w[i-1]):
		        node4=rdm.choice(G.neighbors(w[i-1]))
		    else:
		        node4=rdm.choice(G.neighbors(w[i]))
		    n=rdm.randint(0,G.degree(w[i-1])+G.degree(w[i])+G.degree(node4))
		    if n<G.degree(w[i-1]):
		        node5=rdm.choice(G.neighbors(w[i-1]))
		    elif n<G.degree(w[i-1])+G.degree(w[i]):
		        node5=rdm.choice(G.neighbors(w[i]))
		    else:
		        node5=rdm.choice(G.neighbors(node4))
		    r=rdm.randint(0,G.degree(w[i-1])+G.degree(w[i])+G.degree(node4)+G.degree(node5))
		    if r<G.degree(w[i-1]):
		        node6=rdm.choice(G.neighbors(w[i-1]))
		    elif r<G.degree(w[i-1])+G.degree(w[i]):
		        node6=rdm.choice(G.neighbors(w[i]))
		    elif r<G.degree(w[i-1])+G.degree(w[i])+G.degree(node4)  :
		        node6=rdm.choice(G.neighbors(node4)) 
		    else:
		        node6=rdm.choice(G.neighbors(node5)) 
		    temp=G.subgraph(w[(i-2):i+1]+[node4]+[node5]+[node6])
		    dg_prod=G.degree(w[i-1])*(G.degree(w[i-1])+G.degree(w[i]))*(G.degree(w[i-1])+G.degree(w[i])+G.degree(node4))*(G.degree(w[i-1])+G.degree(w[i])+G.degree(node4)+G.degree(node5))
		    if len(list(set(w[(i-2):i+1]+[node4]+[node5]+[node6])))==6:
		      for graph_in in range(112):  
		        if  temp.isomorphic(Graph_list[graph_in]):
		             lst_motif[graph_in]+=dg_prod
		             break
		i+=1
		    
	      
	    denom=sum([a/b for a,b in zip(lst_motif,W_constant)])
	   

	    x=[31525*2*(a/(b*step_num)) for a,b in zip(lst_motif,W_constant)]
	    
            lock.acquire()
            
                 

            counter111.value+=(x[111])
            counter110.value+=(x[110])
            counter109.value+=(x[109])
            counter108.value+=(x[108])
            counter107.value+=(x[107])
            counter106.value+=(x[106])
            counter105.value+=(x[105])
            counter104.value+=(x[104])
            counter103.value+=(x[103])
            counter102.value+=(x[102])
            counter101.value+=(x[101])
	    counter100.value+=(x[100])
	    counter99.value+=(x[99])
	    counter98.value+=(x[98])
            counter97.value+=(x[97])
	    counter96.value+=(x[96])
	    counter95.value+=(x[95])
	    counter94.value+=(x[94])
	    counter93.value+=(x[93])
	    counter92.value+=(x[92])
	    counter91.value+=(x[91])
            counter90.value+=(x[90])
	    counter89.value+=(x[89])
	    counter88.value+=(x[88])
	    counter87.value+=(x[87])
	    counter86.value+=(x[86])
	    counter85.value+=(x[85])
	    counter84.value+=(x[84])
	    counter83.value+=(x[83])
	    counter82.value+=(x[82])
	    counter81.value+=(x[81])
	    counter80.value+=(x[80])
	    counter79.value+=(x[79])
	    counter78.value+=(x[78])
	    counter77.value+=(x[77])
	    counter76.value+=(x[76])
	    counter75.value+=(x[75])
	    counter74.value+=(x[74])
	    counter73.value+=(x[73])
	    counter72.value+=(x[72])
	    counter71.value+=(x[71])
	    counter70.value+=(x[70])
	    counter69.value+=(x[69])
	    counter68.value+=(x[68])
	    counter67.value+=(x[67])
	    counter66.value+=(x[66])
	    counter65.value+=(x[65])
	    counter64.value+=(x[64])
	    counter63.value+=(x[63])
	    counter62.value+=(x[62])
	    counter61.value+=(x[61])
	    counter60.value+=(x[60])
	    counter59.value+=(x[59])
	    counter58.value+=(x[58])
	    counter57.value+=(x[57])
	    counter56.value+=(x[56])
	    counter55.value+=(x[55])
	    counter54.value+=(x[54])
	    counter53.value+=(x[53])
	    counter52.value+=(x[52])
	    counter51.value+=(x[51])
	    counter50.value+=(x[50])
	    counter49.value+=(x[49])
	    counter48.value+=(x[48])
	    counter47.value+=(x[47])
	    counter46.value+=(x[46])
	    counter45.value+=(x[45])
	    counter44.value+=(x[44])
	    counter43.value+=(x[43])
	    counter42.value+=(x[42])
	    counter41.value+=(x[41])
	    counter40.value+=(x[40])
	    counter39.value+=(x[39])
	    counter38.value+=(x[38])
	    counter37.value+=(x[37])
	    counter36.value+=(x[36])
	    counter35.value+=(x[35])
	    counter34.value+=(x[34])
	    counter33.value+=(x[33])
	    counter32.value+=(x[32])
	    counter31.value+=(x[31])
	    counter30.value+=(x[30])
            counter29.value+=(x[29])
	    counter28.value+=(x[28])
	    counter27.value+=(x[27])
	    counter26.value+=(x[26])
	    counter25.value+=(x[25])
	    counter24.value+=(x[24])
	    counter23.value+=(x[23])
	    counter22.value+=(x[22])
	    counter21.value+=(x[21])
	    counter20.value+=(x[20])
	    counter19.value+=(x[19])
	    counter18.value+=(x[18])
	    counter17.value+=(x[17])
	    counter16.value+=(x[16])
	    counter15.value+=(x[15])
	    counter14.value+=(x[14])
	    counter13.value+=(x[13])
            counter12.value+=(x[12])
	    counter11.value+=(x[11])
	    counter10.value+=(x[10])
	    counter9.value+=(x[9])
	    counter8.value+=(x[8])
	    counter7.value+=(x[7])
	    counter6.value+=(x[6])
	    counter5.value+=(x[5])
	    counter4.value+=(x[4])
	    counter3.value+=(x[3])
	    counter2.value+=(x[2])
	    counter1.value+=(x[1])
	    counter0.value+=(x[0])

            
            lock.release()
            '''
	    for item in x:
               print item
            '''
            fwrite.close()
            f.close()
Exemple #7
0
# print (train_weight, np.min(train_weight), np.max(train_weight))
# train_weight_mean = np.mean(train_weight)
# train_weight = train_weight / train_weight_mean * neg_num
# test_weight = test_weight / train_weight_mean * neg_num

num = torch.as_tensor(num)
num_list = torch.as_tensor(num_list)
print(num, num_list)
print("walk type", args.walk)

if args.feature == 'walk':
    node_list = np.arange(num_list[-1]).astype('int')
    if args.walk == 'hyper':
        walk_path = random_walk_hyper(args, node_list, data)
    else:
        walk_path = random_walk(args, num, data)
    del node_list

compress = True
# Note that, no matter how many node types are here, make sure the
# hyperedge (N1,N2,N3,...) has id, N1 < N2 < N3...
if not dynamic_dict:
    test_dict = build_hash(data,
                           compress=compress,
                           max_size=max_size,
                           min_size=min_size,
                           fname="test")
    train_dict = test_dict
    # train_dict = build_hash(train_data, compress = compress, max_size=max_size, min_size = min_size, fname="test")
else:
    train_dict = [BloomFilter(1e8, 1e-3) for i in range(max_size + 1)]
Exemple #8
0
def main():
    #Import the cloud
    pc_source = utils.load_pc('cloud_icp_source.csv')

    ###YOUR CODE HERE###
    #pc_target = utils.load_pc('cloud_icp_target3.csv') # Change this to load in a different target

    for tg in range(4):
        if tg == 0:
            pc_target = utils.load_pc('cloud_icp_target0.csv')
            utils.view_pc([pc_source, pc_target], None, ['b', 'r'], ['o', '^'])
            print 'test target 0:\n\n'
        elif tg == 1:
            pc_source = utils.load_pc('cloud_icp_source.csv')
            pc_target = utils.load_pc('cloud_icp_target1.csv')
            utils.view_pc([pc_source, pc_target], None, ['b', 'r'], ['o', '^'])
            print 'test target 1:\n\n'
        elif tg == 2:
            pc_source = utils.load_pc('cloud_icp_source.csv')
            pc_target = utils.load_pc('cloud_icp_target2.csv')
            utils.view_pc([pc_source, pc_target], None, ['b', 'r'], ['o', '^'])
            print 'test target 2:\n\n'
        elif tg == 3:
            pc_source = utils.load_pc('cloud_icp_source.csv')
            pc_target = utils.load_pc('cloud_icp_target3.csv')
            utils.view_pc([pc_source, pc_target], None, ['b', 'r'], ['o', '^'])
            print 'test target 3:\n\n'

        p = utils.convert_pc_to_matrix(pc_source)
        q = utils.convert_pc_to_matrix(pc_target)
        T_list = []
        iteration = []
        error_all = []
        success = 0
        print 'stop criterion: distance error converges to the threshold or not able to converge within 2000 iterations. So please wait for at most 2000 iterations, which takes only a few minutes'
        raw_input('\npress enter to start\n')

        for num in range(2000):
            print 'iteration', num + 1, ':\n'
            iteration.append(num + 1)
            pf = numpy.matrix([[], [], []])
            qf = numpy.matrix([[], [], []])

            while p.shape[1] > 0:
                i = random.choice(range(p.shape[1]))
                j = numpy.argmin(numpy.linalg.norm(q - p[:, i], axis=0))
                pf = numpy.hstack((pf, p[:, i]))
                p = numpy.delete(p, i, 1)
                qf = numpy.hstack((qf, q[:, j]))
                q = numpy.delete(q, j, 1)

            p = pf.copy()
            q = qf.copy()

            p_avg = p.sum(axis=1) / (p.shape[1] * 1.0)
            q_avg = q.sum(axis=1) / (q.shape[1] * 1.0)
            X = numpy.subtract(p, p_avg)
            Y = numpy.subtract(q, q_avg)
            u, s, w = numpy.linalg.svd(X * Y.T)
            m = numpy.matrix([[1., 0., 0.], [0., 1., 0.],
                              [0., 0., numpy.linalg.det(w.T * u.T)]])
            R = w.T * m * u.T
            t = q_avg - R * p_avg

            T = numpy.concatenate((R, t), axis=1)
            T = numpy.concatenate((T, numpy.matrix([[0., 0., 0., 1.]])))
            T_list.append(T)

            fit_error = numpy.add(R * p, t) - q
            error_all.append(numpy.linalg.norm(fit_error)**2)
            print 'distance least square error:', numpy.linalg.norm(
                fit_error)**2, '\n\n'
            p = R * p + t

            if tg == 3 and random.randint(
                    1, 20) == 1 and numpy.linalg.norm(fit_error)**2 > 0.1:
                R_random = random_walk.random_walk()
                p = R_random * (p - p_avg) + p_avg
                R = R_random
                t = p_avg - R_random * p_avg
                T = numpy.concatenate((R, t), axis=1)
                T = numpy.concatenate((T, numpy.matrix([[0., 0., 0., 1.]])))
                T_list.append(T)

            if numpy.linalg.norm(fit_error) < 0.1:
                for i in range(len(T_list)):
                    if i == 0:
                        T_final = T_list[i]
                    else:
                        T_final = T_list[i] * T_final
                    success = 1
                break

        pc = utils.convert_pc_to_matrix(pc_source)
        if success == 0:
            for i in range(len(T_list)):
                if i == 0:
                    T_final = T_list[i]
                else:
                    T_final = T_list[i] * T_final

        print 'transformation from source to target point cloud:\n'
        print 'R =\n', T_final[:3, :3], '\n\nt =\n', T_final[:3, 3]
        pc = T_final[:3, :3] * pc + T_final[:3, 3]
        pc_source = utils.convert_matrix_to_pc(pc)
        utils.view_pc([pc_source], None, ['b'], ['o'])
        plt.axis([-0.15, 0.15, -0.15, 0.15])
        plt.figure()
        plt.title('ICP Error vs Iteration')
        plt.plot(iteration, error_all, 'ro-')
        plt.xlabel('Iteration')
        plt.ylabel('Least squares error')
        raw_input('press enter and test the next target\n')
        plt.close()
        plt.close()
        plt.close()
    ###YOUR CODE HERE###

    raw_input("\nPress enter to end:")
Exemple #9
0
def usps_test(l=20):

    x_0_4, x_5_9, y_0_4, y_5_9 = usps.get_data()  #Get usps data

    y_test = np.hstack((y_0_4[l:], y_5_9[l:]))
    x_test = np.vstack((x_0_4[l:], x_5_9[l:]))
    #y_test = np.hstack((y_mac[-500:], y_win[-500:]))

    y_labeled = np.hstack((y_0_4[:l], y_5_9[:l]))
    #l = 20 # 40 labeled data for each run.

    kernel_gauss = lambda x: mixturemodel_kernels.marginalized_kernel(x, k=10)
    kernel_cluster = lambda x: cluster_kernel.kernel(
        x, 10, "polyStep", 16, gamma=5)
    kernel_standard = lambda x: cluster_kernel.kernel(
        x, 10, "linear", 16, gamma=5)  #use with evaluate_SVM

    acc_labelProp = np.array([None] * 50)
    acc_gauss = np.array([None] * 50)
    acc_polyStep = np.array([None] * 50)
    acc_linear = np.array([None] * 50)
    acc_random_walk = np.array([None] * 50)
    for test in range(50):  #50 runs
        print(test)
        np.random.shuffle(x_0_4)
        np.random.shuffle(x_5_9)
        x_labeled = np.vstack((x_0_4[:l], x_5_9[:l]))
        x_unlabeled = np.vstack((x_0_4[l:], x_5_9[l:]))

        #y_labeled = np.hstack((y_0_4[:l], y_5_9[:l]))

        acc_gauss[test] = evaluate_kernel(x_labeled, x_unlabeled, x_test,
                                          y_labeled, y_test, kernel_gauss)
        #acc_labelProp[test] = evaluate_kernel(x_labeled, x_unlabeled, x_test, y_labeled, y_test, kernel_gauss)
        acc_polyStep[test] = evaluate_kernel(x_labeled, x_unlabeled, x_test,
                                             y_labeled, y_test, kernel_cluster)
        acc_linear[test] = evaluate_kernel_SVM(x_labeled, x_unlabeled, x_test,
                                               y_labeled, y_test,
                                               kernel_standard)
        acc_random_walk[test] = random_walk.random_walk(x_labeled,
                                                        x_unlabeled,
                                                        x_test,
                                                        y_labeled,
                                                        y_test,
                                                        sigma=5)
        print(f'accuracy = {acc_random_walk[test] * 100}% () Random Walk')
        print(
            f'accuracy = {acc_gauss[test] * 100}% () Marginalized kernel Walk')
        print(f'accuracy = {acc_polyStep[test] * 100}% () Polystep Walk')
        print(f'accuracy = {acc_linear[test] * 100}% () Linear Walk')
        #print(acc[test])

        # shuffle targets as well
    print(
        f'Marginalized Kernel: accuracy = {acc_gauss.mean() * 100}% (±{acc_gauss.std() * 100:.2})'
    )
    print(
        f'PolyStep: accuracy = {acc_polyStep.mean() * 100}% (±{acc_polyStep.std() * 100:.2})'
    )
    print(
        f'Linear: accuracy = {acc_linear.mean() * 100}% (±{acc_linear.std() * 100:.2})'
    )
    print(
        f'Random Walk: accuracy = {acc_random_walk.mean() * 100}% (±{acc_random_walk.std() * 100:.2})'
    )
Exemple #10
0
def run(params):
    """
    Runs the program with parameters specified from command line
    :param params:
    :return:
    """
    # sets random seed
    if 'random_seed' in params:
        random.seed(params['random_seed'])

    # creates output directory
    outdir = params['output_dir']
    if not os.path.exists(outdir):
        os.mkdir(outdir)

    worldfile = None
    if 'world' in params:
        worldfile = params['world']

    # sets target radius
    target_radius = params['target_radius']

    # sets max targets
    max_targets = params['max_targets']

    maze = pacmaze.PacMaze(worldfile)

    # sets diagonal moves as specified by user
    maze.set_diagonal_moves(args['diagonals'])

    for num in range(args['number_trials']):  #, goal in enumerate(goals):

        cont_steps = 0

        for i in range(args['max_targets']):
            col, row = create_target(maze, target_radius)
            goal = (row, col)
            maze.add_goal(goal[0], goal[1])

        # print(maze._goals) #print list goals

        outfile = open(os.path.join(outdir, 'log%d.log' % num), 'w')
        outfile.write('%d, %d, %s' %
                      (maze.pacman_position()[0], maze.pacman_position()[1],
                       maze.query(maze.pacman_position()[0],
                                  maze.pacman_position()[1])))
        outfile.write('\n')
        while cont_steps < args['max_steps']:

            # print(maze._goals) #print list remaining
            # print maze.__str__() # print world

            if len(maze._goals) == 0:
                break

            possible_directions = []
            # determines which method will be used for walking
            if params['method'] == 'astar':
                possible_directions = search.astar(maze,
                                                   maze.pacman_position())

            elif params['method'] == 'random':
                possible_directions = random_walk.random_walk(
                    maze, maze.pacman_position())

            real_directions = []
            for i in possible_directions:
                print cont_steps
                if cont_steps < args['max_steps']:
                    real_directions.append(i)
                    cont_steps = cont_steps + 1
                else:
                    break
            path = maze.walk(maze.pacman_position(), real_directions)
            # agent might have reached some goals, let's re-add them
            while len(maze._goals) < args['max_targets']:
                col, row = create_target(maze, target_radius)
                goal = (row, col)
                maze.add_goal(goal[0], goal[1])

            # print 'PM at', maze.pacman_position()
            outfile.write('\n'.join(
                ['%d, %d, %s' % (x[0], x[1], x[2]) for x in path]))
            outfile.write('\n')
            # print '\n'.join(['%d, %d, %s' % (x[0], x[1], pacmaze.NOTE_TO_INT[x[2]]) for x in path])
        print 'File ' + str(num) + ' written.'
        outfile.close()
def theoretical_criterion_sampling(first_node,
                                   sample_size,
                                   directed,
                                   successors,
                                   predecessors=None,
                                   count_type='nodes',
                                   weight_feature=None,
                                   random_walk_init=0.5,
                                   random_walk_type='rw',
                                   patience=math.inf,
                                   leaderboard_size=100,
                                   neigh_eval_frac=0.1,
                                   alpha=None,
                                   neighs_type='incoming',
                                   verbose=False):
    """
    Search with iterative evaluation of best criterion value, as from reference paper.
    Initialize search with a random walk of size random_walk_init.
    INPUT:
        - first_node: the node from where to start the search. Compliant with networkx, a node can be any hashable
          object.
        - sample_size: int, the minimum number of nodes or edges (see count_type) of the final sampled subgraph
        - directed: bool, if the sampled graph (and therefore, the returned subgraph) is directed
        - successors: a function of type
          f(node) --> adj_dict
          that, given a node i as input, returns an adjacency dictionary specified as follows.
          The keys are all the nodes j pointed from edges i-->j. The values are (eventually empty) dictionaries
          containing values attached to the edge i-->j.
          Therefore adjacency dictionaries look like:
          adj_dict = {
            1: {'weight': 0.3, 'count': 2, ...},
            2: {'weight': 1.2},         # for example the edge i-->2 has attached the attribute 'weight' with value 1.2
            3: dict(),
            ...
          }
          Notice that a node can be any hashable object, but they need to be uniquely identifiable.
          IMPORTANT NOTE: the function successors (as well as predecessors) is called very often. According to the cost
          of the call, one may prefer to store results in memory instead of calling on already seen nodes. This choice
          is left to the final user, which can decide to memoize successors and predecessors functions, at an increased
          memory cost but avoiding repeated calls.
        - predecessors: like successors, but the adjacency dictionary must contain as keys all the nodes j that are
          contained in edges like j-->i. predecessors defaults to None, but an error is raised if it is not provided
          when directed=True
        - count_type: one in ['nodes', 'edges']. If 'nodes', then sample_size is computed as the number of nodes
          visited. If 'edges' the same is done counting the number of edges.
        - weight_feature: the weight feature that defines the adjacency matrix. Must be present for all edges of G, with
          numeric type and non negative
        - random_walk_init: the fraction of the required final graph to be explored at the beginning via random walk. If
          int, count the number of nodes or edges. If float 0<sample_size<1, the fraction of nodes or edges of the final
          sample
        - random_walk_type: the type of random walk used for exploration if random_walk_init > 0
        - patience: patience of initial random walk
        - leaderbpard_size: the maximum size allowed for the leaderboard
        - neigh_eval_frac: float, 0 < neigh_frac_eval <=1. The fraction of neighbours to choose at random for criterion
          evaluation (randomization parameter p in reference paper)
        - alpha: float in [0, 1], parameter used in the theoretical criterion computation. If None, 1 is used for
          undirected graphs and 0.5 for directed ones
        - neighs_type: one in ['incoming', 'outgoing'], if to define neighbours like incoming or outgoing connections
    """
    # check correctness of the inputs
    _generic_input_check(directed, count_type, predecessors)

    # if random_walk_init is a fraction, find the expected sample size from random walk
    if 0 < random_walk_init < 1:
        random_walk_init = np.ceil(random_walk_init * sample_size)
    if alpha is None:
        alpha = 0.5 if directed else 1

    # perform random walk initialization if required
    current_node = first_node
    if random_walk_init > 1:
        subG = random_walk(first_node,
                           random_walk_init,
                           directed,
                           successors,
                           predecessors,
                           walk_type=random_walk_type,
                           count_type=count_type,
                           weight_feature=weight_feature,
                           patience=patience,
                           verbose=verbose)
        border = {
            neigh
            for node in subG for neigh in _neighbourhood(
                node, successors, predecessors, directed, neighs_type)
            if neigh not in subG
        }
    else:
        subG = nx.DiGraph() if directed else nx.Graph()
        subG.add_node(first_node)
        border = set(
            _neighbourhood(current_node, successors, predecessors, directed,
                           neighs_type).keys())

    # add custom_weight to every node in sample.
    # It is the sum of weights of connections coming from border nodes into a sampled node
    border = set(
        np.random.choice(list(border),
                         int(len(border) * neigh_eval_frac),
                         replace=False))
    for node in subG:
        subG.nodes[node]['in_deg_weight'] = sum(
            _adj_val(neigh, node, successors, weight_feature)
            for neigh in (predecessors(node) if directed else successors(node))
            if neigh in border)
    leaderboard = TopNHeapq(n=leaderboard_size,
                            data=[(node,
                                   _theoretical_criterion(
                                       node, successors, predecessors, subG,
                                       weight_feature, alpha))
                                  for node in border])

    # start influence increment sampling
    while not _sample_size_is_reached(subG, sample_size, count_type):
        if len(leaderboard) == 0:
            while len(leaderboard) == 0:
                current_nodes = np.random.choice(
                    list(subG.nodes),
                    max(int(subG.number_of_nodes() * neigh_eval_frac), 1))
                neighs = {
                    node
                    for current_node in current_nodes for node in
                    _neighbourhood(current_node, successors, predecessors,
                                   directed, neighs_type) if node not in subG
                }
                leaderboard = TopNHeapq(n=leaderboard_size,
                                        data=[(node,
                                               _theoretical_criterion(
                                                   node, successors,
                                                   predecessors, subG,
                                                   weight_feature, alpha))
                                              for node in neighs])
        # select node and add to the sampled graph
        selected_node = leaderboard.pop_max()[0]
        for node in successors(selected_node):
            if node in subG:
                subG.nodes[node]['in_deg_weight'] -= _adj_val(
                    selected_node, node, successors, weight_feature)**2
        subG.add_node(selected_node)
        subG.nodes[selected_node]['in_deg_weight'] = 0
        subG.add_edges_from(
            (selected_node, neigh, edge_attr)
            for neigh, edge_attr in successors(selected_node).items()
            if neigh in subG)
        if directed:
            subG.add_edges_from(
                (in_neigh, selected_node, edge_attr)
                for in_neigh, edge_attr in predecessors(selected_node).items()
                if in_neigh in subG)

        # update the leaderboard with newly discovered neighbours
        neighs = [
            neigh for neigh in _neighbourhood(
                selected_node, successors, predecessors, directed, neighs_type)
            if neigh not in subG
        ]
        if len(neighs) > 0:
            neighs = np.random.choice(neighs,
                                      max(int(len(neighs) * neigh_eval_frac),
                                          1),
                                      replace=False)
        # update in_deg_weight and border info
        for neigh in neighs:
            subG.nodes[selected_node]['in_deg_weight'] += _adj_val(
                neigh, selected_node, successors, weight_feature)**2
            if neigh not in border:
                border.add(neigh)
                for node in successors(neigh):
                    if node in subG and node != selected_node:
                        subG.nodes[node]['in_deg_weight'] += _adj_val(
                            neigh, node, successors, weight_feature)**2
        # update leaderboard
        for neigh in neighs:
            infl_neigh = _theoretical_criterion(neigh, successors,
                                                predecessors, subG,
                                                weight_feature, alpha)
            leaderboard.add(neigh, infl_neigh)
    return subG
Exemple #12
0
def generate_random_walk(N=10):
    steps = int(interp1d([1, 100], [10, 2000])(N))
    start = [0, 0]
    path = random_walk(start, speed=10, steps=steps)
    return [path, []]
Exemple #13
0
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.append("..")
import random_walk

n = 100
x0 = 10
sigma = 9
const = 2
t = np.arange(n)

plt.figure()
plt.subplot(2, 1, 1)
plt.plot(t, random_walk.random_walk(x0, sigma, 0, n))
plt.title('x(t)=x(t-1)+a(t), x0=10')
plt.subplot(2, 1, 2)
plt.plot(t, random_walk.random_walk(x0, sigma, const, n))
plt.title('x(t)=2+x(t-1)+a(t), x0=10')
plt.show()
Exemple #14
0
def sac():
    #CONFIGURATIONS
    start_coordinates = cf.get_random_coordinates()
    iterations = cf.get_iteration()
    min_domain = cf.get_min_domain()
    max_domain = cf.get_max_domain()
    function = cf.get_function()
    show_plots = cf.get_show_plots()
    
    #BOUNDS FOR SHGO_SOBOL
    bounds = [(min_domain, max_domain), (min_domain, max_domain)]
    
    #FIG 1. SHOW FUNCTION IN 3D
    if(show_plots):
        x = np.arange(min_domain, max_domain+1)
        y = np.arange(min_domain, max_domain+1)
        xgrid, ygrid = np.meshgrid(x, y)
        xy = np.stack([xgrid, ygrid])
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')
        ax.view_init(45, -45)
        ax.plot_surface(xgrid, ygrid, function(xy), cmap='terrain')
        ax.set_xlabel('x')
        ax.set_ylabel('y')
        ax.set_zlabel('goldstein-price(x, y)')
        #plt.savefig('3Dfunction.png',dpi=600)
        plt.show()
    
    #OPTIMIZATION ALGORITHMS
    results = dict()
    # run shgo_sobol for better minimas visualization
    results['shgo_sobol'] = optimize.shgo(function, bounds, n=500, iters=5, sampling_method='sobol')
    results['random_walk'] = r_w.random_walk(function=function, iterations=iterations, start_coordinates=start_coordinates,show_plots=show_plots,bounds=bounds)
    results['levy_flight'] = l_f.levy_flight(function=function, iterations=iterations, start_coordinates=start_coordinates,show_plots=show_plots, bounds=bounds)
    results['pure_random_search'] = p_r_s.pure_random_search(function=function, iterations=iterations, start_coordinates=start_coordinates,show_plots=show_plots,bounds=bounds)
    '''
    #results['shgo'] = optimize.shgo(eggholder, bounds)
    results['DA'] = optimize.dual_annealing(eggholder, bounds)
    results['DE'] = optimize.differential_evolution(eggholder, bounds)
    results['BH'] = optimize.basinhopping(eggholder, bounds)
    print(results['DE'])
    '''
    #SHOW LOCAL MINIMAS AND RESULTS OF OPT ALGS
    if(show_plots):
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.imshow(function(xy), interpolation='bilinear', origin='center',
                       cmap='gray')
        ax.set_xlabel('x')
        ax.set_ylabel('y')
        
        def plot_point(res, marker='o', color=None):
            return ax.plot(max_domain+res.x[0], max_domain+res.x[1], marker=marker, color=color, ms=10)
        
        '''
        plot_point(results['BH'], color='y')  # basinhopping           - yellow
        plot_point(results['DE'], color='c')  # differential_evolution - cyan
        plot_point(results['DA'], color='w')  # dual_annealing.        - white
        # SHGO produces multiple minima, plot them all (with a smaller marker size)
        plot_point(results['shgo'], color='r', marker='+')
        '''
        yellow_dot = plot_point(results['random_walk'], color='y')
        cyan_dot = plot_point(results['levy_flight'], color='c')
        blue_dot = plot_point(results['pure_random_search'], color='b')
        green_plus = ax.plot(max_domain+start_coordinates[0], max_domain+start_coordinates[1], marker='+', color='g', ms=10)
        red_x = plot_point(results['shgo_sobol'], color='r', marker='x')
        
        #PRINT RESULTS
        print('random_walk\n ','fun: '+str(results['random_walk'].fun)+'\n ','x: '+str(results['random_walk'].x))
        print('levy_flight\n ','fun: '+str(results['levy_flight'].fun)+'\n ','x: '+str(results['levy_flight'].x))
        print('pure_random_search\n ','fun: '+str(results['pure_random_search'].fun)+'\n ','x: '+str(results['pure_random_search'].x))
        print('shgo_sobol\n ','success: '+str(results['shgo_sobol'].success)+'\n ',
              'fun: '+str(results['shgo_sobol'].fun)+'\n ','x: '+str(results['shgo_sobol'].x))
    
        #FIG 2. PRINT ALL MINIMAS
        for i in range(results['shgo_sobol'].xl.shape[0]):
            ax.plot(max_domain + results['shgo_sobol'].xl[i, 0],
                    max_domain + results['shgo_sobol'].xl[i, 1],
                    'ro', ms=2)
    
        #PRINT OBTAINED MINIMAS
        red_x = mpatches.Patch(color='r', label='global')
        cyan_dot = mpatches.Patch(color='c', label='l_f')
        yellow_dot = mpatches.Patch(color='y', label='r_w')
        blue_dot = mpatches.Patch(color='b', label='p_r_s')
        green_plus = mpatches.Patch(color='g', label='start')
        plt.legend(handles=[cyan_dot,yellow_dot,blue_dot,green_plus,red_x])
    
        ax.set_xlim([0, max_domain*2])
        ax.set_ylim([0, max_domain*2])
        fig.suptitle('Minimas', fontsize=10)
        #plt.savefig('Minimas.png',dpi=600)
        plt.show()
        
        #FIG 3. PLOT MINIMA IMPROVEMENT
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(results['pure_random_search'].iter_to_best, results['pure_random_search'].f_points,'.-',label='p_r_s')
        ax.plot(results['levy_flight'].iter_to_best, results['levy_flight'].f_points,'.-',label='l_f')
        
        plt.plot(results['pure_random_search'].iter_to_best[-2], results['pure_random_search'].f_points[-1], '|', color='b', ms=15);
        plt.plot(results['levy_flight'].iter_to_best[-2], results['levy_flight'].f_points[-1], '|', color='red', ms=15);
        
        ax.set(xlabel='iterations', ylabel='f_val',
               title='towards the global minima')
        plt.legend(loc = 'best')
        ax.grid()
        #plt.savefig('LevyVsPureRS',dpi=600)
        plt.show()
        
    best_x = min(results['pure_random_search'].fun,results['levy_flight'].fun)#,results['random_walk'].fun)
    if results['pure_random_search'].fun == best_x:
        best_alg = 'pure_random_search'
    elif results['levy_flight'].fun == best_x:
        best_alg = 'levy_flight'
    else: best_alg = 'random_walk'
    #print('best alg: '+best_alg, best_x)
    
    return results
def run_algorithm(algorithms, algorithm, protein_string, dimension):
    '''
    Runs an algorithm in a cetain dimension with a certain protein, decided by the user.
    Returns the protein object, energies energiesionary and elapsed time in a list
    '''
    # Handle a 1D dimension selection
    if dimension == "1D":

        plot1D(protein_string)
        plt.show()

        exit("Error: Couldn't fold protein in only one dimension")

    # dictionary for keeping track of the used parameters
    parameters = {
        "Matrix size": "",
        "Iterations": "",
        "Look-aheads": "",
        "Prob. below": "",
        "Prob. above": "",
        "Beam width": "",
        "Cut acids": ""
    }

    # Ask the user for the matrix size to use
    matrix_size = ask_matrix_size(len(protein_string))
    parameters["Matrix size"] = f"{matrix_size}"
    matrix_sizes = {}

    # Run a random walk
    if algorithm == algorithms[0]:

        # Ask the user for the number of runs
        N_runs = ask_number(1, 1E100, "integer",
                            "How many proteins to fold [2-∞]?: ")
        parameters["Iterations"] = f"{N_runs}"

        # Run the algorithm and keep track of the time
        start_time = time.time()
        protein, energies, matrix_sizes = random_walk(protein_string, N_runs,
                                                      dimension, matrix_size)
        elapsed_time = time.time() - start_time

    # Run a greedy search with look-ahead
    elif algorithm == algorithms[1]:

        # Ask the user for the number of runs and look-aheads
        N_runs = ask_number(1, 1E100, "integer",
                            "How many proteins to fold [1-∞]?: ")
        parameters["Iterations"] = f"{N_runs}"

        look_aheads = ask_number(0, 1E100, "integer",
                                 "How many steps to look ahead [0-∞]?: ")
        parameters["Look-aheads"] = f"{look_aheads}"

        # Run the algorithm and keep track of the time
        start_time = time.time()
        protein, energies, matrix_sizes = greedy(protein_string, look_aheads,
                                                 N_runs, dimension,
                                                 matrix_size)
        elapsed_time = time.time() - start_time

    # Run a beam search
    elif algorithm == algorithms[2]:

        # Ask the user for the beam width
        beam_width = ask_number(1, 1E100, "integer",
                                "What is the beam width [1-∞]?: ")
        parameters["Beam width"] = f"{beam_width}"

        # Run the algorithm and keep track of the time
        start_time = time.time()
        protein, energies, matrix_sizes = beamsearch(protein_string,
                                                     beam_width, dimension,
                                                     matrix_size)
        elapsed_time = time.time() - start_time

    # Run a probability-based branch and bound algorithm
    elif algorithm == algorithms[3]:

        # Ask the user for the probabilities for pruning
        prob_below_average = ask_number(
            0.0, 1.0, "float",
            "Choose a probability to discard proteins with energy below the average [0.0-1.0]?: "
        )
        parameters["Prob. below"] = f"{prob_below_average}"

        prob_above_average = ask_number(
            0.0, 1.0, "float",
            "Choose a probability to discard proteins with energy above the average [0.0-1.0]?: "
        )
        parameters["Prob. above"] = f"{prob_above_average}"

        # Run the algorithm and keep track of the time
        start_time = time.time()
        protein, energies, matrix_sizes = branch_n_bound(
            protein_string, prob_above_average, prob_below_average, dimension,
            matrix_size)
        elapsed_time = time.time() - start_time

    # Run a hill climber
    elif algorithm == algorithms[4]:

        # Ask the user for the number of acids they want to cut out
        cut_acids = ask_number(
            4, 6, "integer",
            "How many acids will be cut out of the protein each time[4-6]: ")
        parameters["Cut acids"] = f"{cut_acids}"

        # Ask the user for the number of iterations that they want to re-fold a part of the protein
        iterations = ask_number(
            1, 1E100, "integer",
            "How many times will the protein be cut and re-folded(iterations): [1-∞]: "
        )
        parameters["Iterations"] = f"{iterations}"

        # Run the algorithm and keep track of the time
        start_time = time.time()
        protein, energies, matrix_sizes = hillclimber(protein_string,
                                                      dimension, matrix_size,
                                                      iterations, cut_acids)
        elapsed_time = time.time() - start_time

    return protein, energies, matrix_sizes, start_time, elapsed_time, parameters
Exemple #16
0
def calc(sign, lock):
    for indx in range(iter_num):
        lst_motif = [0] * 21
        w = rdmW.random_walk(step_num + 1000, G, 50)
        i = 1000

        while i < step_num + 1000:
            if len(list(set(w[(i - 2):i + 1]))) == 3:
                m = rdm.randint(0, G.degree(w[i - 1]) + G.degree(w[i]))
                if m < G.degree(w[i - 1]):
                    node4 = rdm.choice(G.neighbors(w[i - 1]))
                else:
                    node4 = rdm.choice(G.neighbors(w[i]))
                n = rdm.randint(
                    0,
                    G.degree(w[i - 1]) + G.degree(w[i]) + G.degree(node4))
                if n < G.degree(w[i - 1]):
                    node5 = rdm.choice(G.neighbors(w[i - 1]))
                elif n < G.degree(w[i - 1]) + G.degree(w[i]):
                    node5 = rdm.choice(G.neighbors(w[i]))
                else:
                    node5 = rdm.choice(G.neighbors(node4))
                temp = G.subgraph(w[(i - 2):i + 1] + [node4] + [node5])
                dg_prod = G.degree(
                    w[i - 1]) * (G.degree(w[i - 1]) + G.degree(w[i])) * (
                        G.degree(w[i - 1]) + G.degree(w[i]) + G.degree(node4))
                if len(list(set(w[(i - 2):i + 1] + [node4] + [node5]))) == 5:
                    for graph_in in range(21):
                        if temp.isomorphic(Graph_list[graph_in]):
                            lst_motif[graph_in] += dg_prod
                            break
            i += 1

        x = [
            G.ecount() * 2 * (a / (b * step_num))
            for a, b in zip(lst_motif, W_constant)
        ]

        lock.acquire()

        counter20.value += (x[20])
        counter19.value += (x[19])
        counter18.value += (x[18])
        counter17.value += (x[17])
        counter16.value += (x[16])
        counter15.value += (x[15])
        counter14.value += (x[14])
        counter13.value += (x[13])
        counter12.value += (x[12])
        counter11.value += (x[11])
        counter10.value += (x[10])
        counter9.value += (x[9])
        counter8.value += (x[8])
        counter7.value += (x[7])
        counter6.value += (x[6])
        counter5.value += (x[5])
        counter4.value += (x[4])
        counter3.value += (x[3])
        counter2.value += (x[2])
        counter1.value += (x[1])
        counter0.value += (x[0])

        lock.release()
Exemple #17
0
 def rebuild(self, nlines):
     self.nlines = nlines
     mygeom = random_walk(nlines) * 5
     self.vbo = vbo.VBO(mygeom)
Exemple #18
0
def experemint_2(l=8):

    # Experiment comparing random walk, tSVM, SVM and our cluster kernel:
    tSVM = LabelPropagation(max_iter=5000)

    np.random.seed(133769)  # reproducibility
    x_mac, x_win, y_mac, y_win = get_data()
    x_test = np.vstack((x_mac[-500:], x_win[-500:]))
    y_test = np.hstack((y_mac[-500:], y_win[-500:]))

    y_test_tsvm = np.hstack((0.0 * y_mac[-500:], y_win[-500:]))

    x_mac, x_win, y_mac, y_win = x_mac[:
                                       -500], x_win[:
                                                    -500], y_mac[:
                                                                 -500], y_win[:
                                                                              -500]

    y_mac_tsvm = np.zeros((y_mac.shape))  # change -1 to zero

    x_labeled = np.vstack((x_mac[:l], x_win[:l]))
    x_unlabeled = np.vstack((x_mac[l:], x_win[l:]))

    X = np.vstack((x_labeled, x_unlabeled))
    y_labeled = np.hstack((y_mac[:l], y_win[:l]))
    y_labeled_tsvm = np.hstack((y_mac_tsvm[:l], y_win[:l]))
    y_unlabeled = np.hstack((y_mac[l:], y_win[l:]))
    y_unlabeled_tsvm = -np.ones((y_unlabeled.shape))  # Set unlabeled points
    labels_tsvm = np.hstack((y_labeled_tsvm, y_unlabeled_tsvm))

    acc_tSVM = np.array([None] * 100)
    acc_random_walk = np.array([None] * 100)
    acc_polyStep = np.array([None] * 100)
    acc_linear = np.array([None] * 100)

    kernel1 = lambda x: cluster_kernel.kernel(x, 10, "polyStep", 16)
    kernel2 = lambda x: cluster_kernel.kernel(x, 10, "linear", 16)

    for test in range(100):
        np.random.shuffle(x_mac)
        np.random.shuffle(x_win)
        x_labeled = np.vstack((x_mac[:l], x_win[:l]))
        x_unlabeled = np.vstack((x_mac[l:], x_win[l:]))

        y_labeled = np.hstack((y_mac[:l], y_win[:l]))
        X = np.vstack((x_labeled, x_unlabeled))

        tSVM.fit(X, labels_tsvm)

        acc_tSVM[test] = tSVM.score(x_test, y_test_tsvm)
        print(f'accuracy = {acc_tSVM[test] * 100}% () tSVM')

        acc_random_walk[test] = random_walk.random_walk(
            x_labeled, x_unlabeled, x_test, y_labeled, y_test)
        print(f'accuracy = {acc_random_walk[test] * 100}% () Random Walk')

        acc_polyStep[test] = evaluate_kernel(x_labeled, x_unlabeled, x_test,
                                             y_labeled, y_test, kernel1)
        print(f'accuracy = {acc_polyStep[test] * 100}% () Poly Step')

        acc_linear[test] = evaluate_kernel_SVM(x_labeled, x_unlabeled, x_test,
                                               y_labeled, y_test, kernel2)
        print(f'accuracy = {acc_linear[test] * 100}% () Linear')

        # acc[test] = evaluate_kernel_2(x_labeled_i, x_test, y_labeled, y_test, k)
        #acc[test] = evaluate_kernel(x_labeled, x_unlabeled, x_test, y_labeled, y_test, kernel)
        # acc[test] = random_walk.random_walk(x_labeled, x_unlabeled, x_test, y_labeled, y_test)

    print(
        f'normal SVM: accuracy = {acc_linear.mean() * 100}% (±{acc_linear.std() * 100:.2})'
    )
    print(
        f'tSVM: accuracy = {acc_tSVM.mean() * 100}% (±{acc_tSVM.std() * 100:.2})'
    )
    print(
        f'random walk: accuracy = {acc_random_walk.mean() * 100}% (±{acc_random_walk.std() * 100:.2})'
    )
    print(
        f'Cluster kernel: accuracy = {acc_polyStep.mean() * 100}% (±{acc_polyStep.std() * 100:.2})'
    )