Ejemplo n.º 1
0
class Dummy(Symbol):
    """Dummy symbols are each unique, even if they have the same name:

    >>> from sympy import Dummy
    >>> Dummy("x") == Dummy("x")
    False

    If a name is not supplied then a string value of an internal count will be
    used. This is useful when a temporary variable is needed and the name
    of the variable used in the expression is not important.

    >>> Dummy() #doctest: +SKIP
    _Dummy_10

    """

    # In the rare event that a Dummy object needs to be recreated, both the
    # `name` and `dummy_index` should be passed.  This is used by `srepr` for
    # example:
    # >>> d1 = Dummy()
    # >>> d2 = eval(srepr(d1))
    # >>> d2 == d1
    # True
    #
    # If a new session is started between `srepr` and `eval`, there is a very
    # small chance that `d2` will be equal to a previously-created Dummy.

    _count = 0
    _prng = random.Random()
    _base_dummy_index = _prng.randint(10**6, 9*10**6)

    __slots__ = ('dummy_index',)

    is_Dummy = True

    def __new__(cls, name=None, dummy_index=None, **assumptions):
        if dummy_index is not None:
            assert name is not None, "If you specify a dummy_index, you must also provide a name"

        if name is None:
            name = "Dummy_" + str(Dummy._count)

        if dummy_index is None:
            dummy_index = Dummy._base_dummy_index + Dummy._count
            Dummy._count += 1

        cls._sanitize(assumptions, cls)
        obj = Symbol.__xnew__(cls, name, **assumptions)

        obj.dummy_index = dummy_index

        return obj

    def __getstate__(self):
        return {'_assumptions': self._assumptions, 'dummy_index': self.dummy_index}

    @cacheit
    def sort_key(self, order=None):
        return self.class_key(), (
            2, (self.name, self.dummy_index)), S.One.sort_key(), S.One

    def _hashable_content(self):
        return Symbol._hashable_content(self) + (self.dummy_index,)
Ejemplo n.º 2
0
def main(args):
    def worker_init_fn(worker_id):
        np.random.seed(args.random_seed + worker_id)

    n_gpu = 0
    if torch.cuda.is_available():
        n_gpu = torch.cuda.device_count()
    np.random.seed(args.random_seed)
    random.seed(args.random_seed)
    rng = random.Random(args.random_seed)
    torch.manual_seed(args.random_seed)
    if n_gpu > 0:
        torch.cuda.manual_seed(args.random_seed)
        torch.cuda.manual_seed_all(args.random_seed)
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True

    if not os.path.exists(args.save_dir):
        os.mkdir(args.save_dir)

    ontology = json.load(open(args.ontology_data))
    slot_meta, ontology = make_slot_meta(ontology)
    op2id = OP_SET[args.op_code]
    # print(op2id)
    tokenizer = BertTokenizer(args.vocab_path, do_lower_case=True)

    train_data_raw = prepare_dataset(data_path=args.train_data_path,
                                     tokenizer=tokenizer,
                                     slot_meta=slot_meta,
                                     n_history=args.n_history,
                                     max_seq_length=args.max_seq_length,
                                     op_code=args.op_code,
                                     turn_weight=args.turn_weight,
                                     seq_num=args.seq_num)

    train_data = MultiWozDataset(train_data_raw,
                                 tokenizer,
                                 slot_meta,
                                 args.max_seq_length,
                                 rng,
                                 ontology,
                                 args.word_dropout,
                                 args.shuffle_state,
                                 args.shuffle_p)
    print("# train examples %d" % len(train_data_raw))

    dev_data_raw = prepare_dataset(data_path=args.dev_data_path,
                                   tokenizer=tokenizer,
                                   slot_meta=slot_meta,
                                   n_history=args.n_history,
                                   max_seq_length=args.max_seq_length,
                                   op_code=args.op_code)
    print("# dev examples %d" % len(dev_data_raw))

    test_data_raw = prepare_dataset(data_path=args.test_data_path,
                                    tokenizer=tokenizer,
                                    slot_meta=slot_meta,
                                    n_history=args.n_history,
                                    max_seq_length=args.max_seq_length,
                                    op_code=args.op_code)
    print("# test examples %d" % len(test_data_raw))

    model_config = BertConfig.from_json_file(args.bert_config_path)
    model_config.dropout = args.dropout
    model_config.attention_probs_dropout_prob = args.attention_probs_dropout_prob
    model_config.hidden_dropout_prob = args.hidden_dropout_prob

    model = SomDST(model_config, len(op2id), len(domain2id), op2id['update'], args.exclude_domain)

    if not os.path.exists(args.bert_ckpt_path):
        args.bert_ckpt_path = download_ckpt(args.bert_ckpt_path, args.bert_config_path, 'assets')

    ckpt = torch.load(args.bert_ckpt_path, map_location='cpu')
    model.encoder.bert.load_state_dict(ckpt)

    # re-initialize added special tokens ([SLOT], [NULL], [EOS])
    model.encoder.bert.embeddings.word_embeddings.weight.data[1].normal_(mean=0.0, std=0.02)
    model.encoder.bert.embeddings.word_embeddings.weight.data[2].normal_(mean=0.0, std=0.02)
    model.encoder.bert.embeddings.word_embeddings.weight.data[3].normal_(mean=0.0, std=0.02)
    model.to(device)

    num_train_steps = int(len(train_data_raw) / args.batch_size * args.n_epochs)

    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    enc_param_optimizer = list(model.encoder.named_parameters())
    enc_optimizer_grouped_parameters = [
        {'params': [p for n, p in enc_param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
        {'params': [p for n, p in enc_param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]

    enc_optimizer = AdamW(enc_optimizer_grouped_parameters, lr=args.enc_lr)
    enc_scheduler = WarmupLinearSchedule(enc_optimizer, int(num_train_steps * args.enc_warmup),
                                         t_total=num_train_steps)

    dec_param_optimizer = list(model.decoder.parameters())
    dec_optimizer = AdamW(dec_param_optimizer, lr=args.dec_lr)
    dec_scheduler = WarmupLinearSchedule(dec_optimizer, int(num_train_steps * args.dec_warmup),
                                         t_total=num_train_steps)

    if n_gpu > 1:
        model = torch.nn.DataParallel(model)

    train_sampler = RandomSampler(train_data)
    train_dataloader = DataLoader(train_data,
                                  sampler=train_sampler,
                                  batch_size=args.batch_size,
                                  collate_fn=train_data.collate_fn,
                                  num_workers=args.num_workers,
                                  worker_init_fn=worker_init_fn)

    loss_fnc = nn.CrossEntropyLoss(reduction="none")
    best_score = {'epoch': 0, 'joint_acc': float("-inf"), 'op_acc': 0, 'final_slot_f1': 0}
    for epoch in range(args.n_epochs):
        batch_loss = []
        model.train()
        for step, batch in enumerate(train_dataloader):
            batch = [b.to(device) if not isinstance(b, int) else b for b in batch]
            input_ids, input_mask, segment_ids, state_position_ids, op_ids, \
            domain_ids, gen_ids, max_value, max_update, turn_weights = batch

            if rng.random() < args.decoder_teacher_forcing:  # teacher forcing
                teacher = gen_ids
            else:
                teacher = None

            domain_scores, state_scores, gen_scores = model(input_ids=input_ids,
                                                            token_type_ids=segment_ids,
                                                            state_positions=state_position_ids,
                                                            attention_mask=input_mask,
                                                            max_value=max_value,
                                                            op_ids=op_ids,
                                                            max_update=max_update,
                                                            teacher=teacher)

            # print(op_ids.view(-1))
            # print(gen_scores)

            loss_s = loss_fnc(state_scores.view(-1, len(op2id)), op_ids.view(-1))
            loss_g = masked_cross_entropy_for_value(gen_scores.contiguous(),
                                                    gen_ids.contiguous(), turn_weights,
                                                    tokenizer.vocab['[PAD]'])
            # there are 30 slots
            loss_s = loss_s.reshape(turn_weights.size()[0], 30) * turn_weights.repeat_interleave(30).reshape(
                turn_weights.size()[0], 30)
            loss_s = loss_s.mean()

            loss = loss_s + loss_g
            if args.exclude_domain is not True:
                loss_d = loss_fnc(domain_scores.view(-1, len(domain2id)), domain_ids.view(-1)).mean()
                # print(turn_weights)
                # print(loss_s.size(), loss_d.size(), loss_g.size())
                loss = loss + loss_d

            batch_loss.append(loss.item())

            loss.backward()
            enc_optimizer.step()
            enc_scheduler.step()
            dec_optimizer.step()
            dec_scheduler.step()
            model.zero_grad()

            if step % 100 == 0:
                if args.exclude_domain is not True:
                    print("[%d/%d] [%d/%d] mean_loss : %.3f, state_loss : %.3f, gen_loss : %.3f, dom_loss : %.3f" \
                          % (epoch + 1, args.n_epochs, step,
                             len(train_dataloader), np.mean(batch_loss),
                             loss_s.item(), loss_g.item(), loss_d.item()))
                else:
                    print("[%d/%d] [%d/%d] mean_loss : %.3f, state_loss : %.3f, gen_loss : %.3f" \
                          % (epoch + 1, args.n_epochs, step,
                             len(train_dataloader), np.mean(batch_loss),
                             loss_s.item(), loss_g.item()))
                batch_loss = []

        if (epoch + 1) % args.eval_epoch == 0:
            eval_res = model_evaluation(model, dev_data_raw, tokenizer, slot_meta, epoch + 1, args.op_code,mode="dev")
            if eval_res['joint_acc'] > best_score['joint_acc']:
                best_score = eval_res
                model_to_save = model.module if hasattr(model, 'module') else model
                save_path = os.path.join(args.save_dir, 'model_best.bin')
                torch.save(model_to_save.state_dict(), save_path)
            print("Best Score : ", best_score)
            print("\n")

    print("Test using best model...")
    best_epoch = best_score['epoch']
    ckpt_path = os.path.join(args.save_dir, 'model_best.bin')
    model = SomDST(model_config, len(op2id), len(domain2id), op2id['update'], args.exclude_domain)
    ckpt = torch.load(ckpt_path, map_location='cpu')
    model.load_state_dict(ckpt)
    model.to(device)

    model_evaluation(model, test_data_raw, tokenizer, slot_meta, best_epoch, args.op_code,
                     is_gt_op=False, is_gt_p_state=False, is_gt_gen=False, save_dir=args.save_dir, mode="test")
        for k, v in inputs_dict.items():
            if torch.is_tensor(v):
                inputs_dict[k] = v.to(0)

        for model_class in self.all_model_classes:
            model = model_class(config=config)
            model.to(0)
            model.eval()

            # Wrap model in nn.DataParallel
            model = torch.nn.DataParallel(model)
            with torch.no_grad():
                _ = model(**self._prepare_for_class(inputs_dict, model_class))


global_rng = random.Random()


def ids_tensor(shape, vocab_size, rng=None, name=None):
    #  Creates a random int32 tensor of the shape within the vocab size
    if rng is None:
        rng = global_rng

    total_dims = 1
    for dim in shape:
        total_dims *= dim

    values = []
    for _ in range(total_dims):
        values.append(rng.randint(0, vocab_size - 1))
Ejemplo n.º 4
0
def main(args):
    # ----- solver and variable declaration -----
    (nb_slabs, capacity, max_capacity, weights, colors, loss, color_orders) =\
        ReadData(args.data)
    nb_colors = len(color_orders)
    solver = pywrapcp.Solver('Steel Mill Slab')
    x = [solver.IntVar(0, nb_slabs - 1, 'x' + str(i)) for i in range(nb_slabs)]
    load_vars = [
        solver.IntVar(0, max_capacity - 1, 'load_vars' + str(i))
        for i in range(nb_slabs)
    ]

    # ----- post of the constraints -----

    # Bin Packing.
    BinPacking(solver, x, weights, load_vars)
    # At most two colors per slab.
    for s in range(nb_slabs):
        solver.Add(
            solver.SumLessOrEqual([
                solver.Max([solver.IsEqualCstVar(x[c], s) for c in o])
                for o in color_orders
            ], 2))

    # ----- Objective -----

    objective_var = \
        solver.Sum([load_vars[s].IndexOf(loss) for s in range(nb_slabs)]).Var()
    objective = solver.Minimize(objective_var, 1)

    # ----- start the search and optimization -----

    assign_db = SteelDecisionBuilder(x, nb_slabs, weights, loss, load_vars)
    first_solution = solver.Assignment()
    first_solution.Add(x)
    first_solution.AddObjective(objective_var)
    store_db = solver.StoreAssignment(first_solution)
    first_solution_db = solver.Compose([assign_db, store_db])
    print('searching for initial solution,', end=' ')
    solver.Solve(first_solution_db)
    print('initial cost =', first_solution.ObjectiveValue())

    # To search a fragment, we use a basic randomized decision builder.
    # We can also use assign_db instead of inner_db.
    inner_db = solver.Phase(x, solver.CHOOSE_RANDOM, solver.ASSIGN_MIN_VALUE)
    # The most important aspect is to limit the time exploring each fragment.
    inner_limit = solver.FailuresLimit(args.lns_fail_limit)
    continuation_db = solver.SolveOnce(inner_db, [inner_limit])

    # Now, we create the LNS objects.
    rand = random.Random()
    rand.seed(args.lns_random_seed)
    local_search_operator = SteelRandomLns(x, rand, args.lns_fragment_size)
    # This is in fact equivalent to the following predefined LNS operator:
    # local_search_operator = solver.RandomLNSOperator(x,
    #                                                  args.lns_fragment_size,
    #                                                  args.lns_random_seed)
    local_search_parameters = solver.LocalSearchPhaseParameters(
        local_search_operator, continuation_db)
    local_search_db = solver.LocalSearchPhase(first_solution,
                                              local_search_parameters)
    global_limit = solver.TimeLimit(args.time_limit)

    print('using LNS to improve the initial solution')

    search_log = solver.SearchLog(100000, objective_var)
    solver.NewSearch(local_search_db, [objective, search_log, global_limit])
    while solver.NextSolution():
        print('Objective:', objective_var.Value(),\
            'check:', sum(loss[load_vars[s].Min()] for s in range(nb_slabs)))
    solver.EndSearch()
Ejemplo n.º 5
0
from snake_game import SnakeGame
import pygame
import time
import random

rand = random.Random()


class SnakeGameGUI(SnakeGame):
    def __init__(self, headless_mode=False):
        super().__init__()
        self.BLUE = (0, 0, 255)
        self.PURPLE = (255, 0, 255)
        self.BLACK = (0, 0, 0)
        self.RED = (255, 0, 0)
        self.SQUARESIZE = 10
        self.WIDTH = self.SQUARESIZE * self.width
        self.HEIGHT = self.SQUARESIZE * self.height
        self.SIZE = (self.WIDTH + 400, self.HEIGHT)

        if headless_mode == False:
            self.SCREEN = pygame.display.set_mode(self.SIZE)
            pygame.init()

    def draw_board(self):
        myfont = pygame.font.SysFont("monospace", 50)
        self.SCREEN.fill(self.BLACK)
        for i in range(self.height):
            for j in range(self.width):
                # check for head, body, food
                if self.board[i, j] == 1:
Ejemplo n.º 6
0
def test_contract_receive_channelnew_must_be_idempotent():
    block_number = 10
    pseudo_random_generator = random.Random()

    token_network_id = factories.make_address()
    token_id = factories.make_address()
    token_network_state = TokenNetworkState(token_network_id, token_id)
    payment_network_identifier = factories.make_payment_network_identifier()

    amount = 30
    our_balance = amount + 50
    channel_state1 = factories.make_channel(our_balance=our_balance)
    channel_state2 = copy.deepcopy(channel_state1)

    state_change1 = ContractReceiveChannelNew(
        factories.make_transaction_hash(),
        token_network_id,
        channel_state1,
    )

    token_network.state_transition(
        payment_network_identifier,
        token_network_state,
        state_change1,
        pseudo_random_generator,
        block_number,
    )

    # change the existing channel
    payment_identifier = 1
    message_identifier = random.randint(0, UINT64_MAX)
    channel.send_directtransfer(
        channel_state1,
        amount,
        message_identifier,
        payment_identifier,
    )

    state_change2 = ContractReceiveChannelNew(
        factories.make_transaction_hash(),
        token_network_id,
        channel_state2,
    )

    # replay the ContractReceiveChannelNew state change
    iteration = token_network.state_transition(
        payment_network_identifier,
        token_network_state,
        state_change2,
        pseudo_random_generator,
        block_number,
    )

    msg = 'the channel must not been overwritten'
    channelmap_by_id = iteration.new_state.channelidentifiers_to_channels
    assert channelmap_by_id[channel_state1.identifier] == channel_state1, msg

    channelmap_by_address = iteration.new_state.partneraddresses_to_channels
    partner_channels = channelmap_by_address[
        channel_state1.partner_state.address]
    assert partner_channels[channel_state1.identifier] == channel_state1, msg
Ejemplo n.º 7
0
def test_routing_updates(
    token_network_state,
    our_address,
):
    open_block_number = 10
    pseudo_random_generator = random.Random()
    pkey1, address1 = factories.make_privkey_address()
    pkey2, address2 = factories.make_privkey_address()
    pkey3, address3 = factories.make_privkey_address()

    amount = 30
    our_balance = amount + 50
    channel_state = factories.make_channel(
        our_balance=our_balance,
        our_address=our_address,
        partner_balance=our_balance,
        partner_address=address1,
    )
    payment_network_identifier = factories.make_payment_network_identifier()

    # create a new channel as participant, check graph update
    channel_new_state_change = ContractReceiveChannelNew(
        transaction_hash=factories.make_transaction_hash(),
        token_network_identifier=token_network_state.address,
        channel_state=channel_state,
    )

    channel_new_iteration1 = token_network.state_transition(
        payment_network_identifier=payment_network_identifier,
        token_network_state=token_network_state,
        state_change=channel_new_state_change,
        pseudo_random_generator=pseudo_random_generator,
        block_number=open_block_number,
    )

    graph_state = channel_new_iteration1.new_state.network_graph
    assert channel_state.identifier in graph_state.channel_identifier_to_participants
    assert len(graph_state.channel_identifier_to_participants) == 1
    assert graph_state.network[our_address][address1] is not None
    assert len(graph_state.network.edges()) == 1

    # create a new channel without being participant, check graph update
    new_channel_identifier = factories.make_channel_identifier()
    channel_new_state_change = ContractReceiveRouteNew(
        transaction_hash=factories.make_transaction_hash(),
        token_network_identifier=token_network_state.address,
        channel_identifier=new_channel_identifier,
        participant1=address2,
        participant2=address3,
    )

    channel_new_iteration2 = token_network.state_transition(
        payment_network_identifier=payment_network_identifier,
        token_network_state=channel_new_iteration1.new_state,
        state_change=channel_new_state_change,
        pseudo_random_generator=pseudo_random_generator,
        block_number=open_block_number + 10,
    )

    graph_state = channel_new_iteration2.new_state.network_graph
    assert channel_state.identifier in graph_state.channel_identifier_to_participants
    assert new_channel_identifier in graph_state.channel_identifier_to_participants
    assert len(graph_state.channel_identifier_to_participants) == 2
    assert graph_state.network[our_address][address1] is not None
    assert graph_state.network[address2][address3] is not None
    assert len(graph_state.network.edges()) == 2

    # close the channel the node is a participant of, check edge is removed from graph
    closed_block_number = open_block_number + 20
    channel_close_state_change1 = ContractReceiveChannelClosed(
        transaction_hash=factories.make_transaction_hash(),
        transaction_from=channel_state.partner_state.address,
        token_network_identifier=token_network_state.address,
        channel_identifier=channel_state.identifier,
        closed_block_number=closed_block_number,
    )

    channel_closed_iteration1 = token_network.state_transition(
        payment_network_identifier=payment_network_identifier,
        token_network_state=channel_new_iteration2.new_state,
        state_change=channel_close_state_change1,
        pseudo_random_generator=pseudo_random_generator,
        block_number=closed_block_number,
    )

    # Check that a second ContractReceiveChannelClosed events is handled properly
    # This might have been sent from the other participant of the channel
    # See issue #2449
    channel_close_state_change2 = ContractReceiveChannelClosed(
        transaction_hash=factories.make_transaction_hash(),
        transaction_from=channel_state.our_state.address,
        token_network_identifier=token_network_state.address,
        channel_identifier=channel_state.identifier,
        closed_block_number=closed_block_number,
    )

    channel_closed_iteration2 = token_network.state_transition(
        payment_network_identifier=payment_network_identifier,
        token_network_state=channel_closed_iteration1.new_state,
        state_change=channel_close_state_change2,
        pseudo_random_generator=pseudo_random_generator,
        block_number=closed_block_number,
    )

    graph_state = channel_closed_iteration2.new_state.network_graph
    assert channel_state.identifier not in graph_state.channel_identifier_to_participants
    assert new_channel_identifier in graph_state.channel_identifier_to_participants
    assert len(graph_state.channel_identifier_to_participants) == 1
    assert graph_state.network[address2][address3] is not None
    assert len(graph_state.network.edges()) == 1

    # close the channel the node is not a participant of, check edge is removed from graph
    channel_close_state_change3 = ContractReceiveRouteClosed(
        transaction_hash=factories.make_transaction_hash(),
        token_network_identifier=token_network_state.address,
        channel_identifier=new_channel_identifier,
    )

    channel_closed_iteration3 = token_network.state_transition(
        payment_network_identifier=payment_network_identifier,
        token_network_state=channel_closed_iteration2.new_state,
        state_change=channel_close_state_change3,
        pseudo_random_generator=pseudo_random_generator,
        block_number=closed_block_number + 10,
    )

    # Check that a second ContractReceiveRouteClosed events is handled properly.
    # This might have been sent from the second participant of the channel
    # See issue #2449
    channel_close_state_change4 = ContractReceiveRouteClosed(
        transaction_hash=factories.make_transaction_hash(),
        token_network_identifier=token_network_state.address,
        channel_identifier=new_channel_identifier,
    )

    channel_closed_iteration4 = token_network.state_transition(
        payment_network_identifier=payment_network_identifier,
        token_network_state=channel_closed_iteration3.new_state,
        state_change=channel_close_state_change4,
        pseudo_random_generator=pseudo_random_generator,
        block_number=closed_block_number + 10,
    )

    graph_state = channel_closed_iteration4.new_state.network_graph
    assert channel_state.identifier not in graph_state.channel_identifier_to_participants
    assert new_channel_identifier not in graph_state.channel_identifier_to_participants
    assert len(graph_state.channel_identifier_to_participants) == 0
    assert len(graph_state.network.edges()) == 0
Ejemplo n.º 8
0
def addition(G, num_walks, walklength, walks, es, edge, ind):
    G.add_edge(*edge, weight=1)
    node_i = str(edge[0])
    node_j = str(edge[1])
    allwalks_i = helpers.scan(
        client=es,
        query={"query": {
            "match_phrase": {
                "wlks": {
                    "query": node_i
                }
            }
        }},
        index=ind,
        size=10000,
        scroll='1m')
    allwalks_i = list(allwalks_i)
    degree_i = G.degree(edge[0])
    degree_j = G.degree(edge[1])
    sampled_i = random.sample(allwalks_i, int(len(allwalks_i) / degree_i))
    blk = []
    if len(sampled_i) > 0:
        for wk in sampled_i:
            wk_id = int(wk['_id'])
            wk_pos = random.choice(
                es.termvectors(index=ind,
                               doc_type='walk',
                               id=wk_id,
                               field_statistics=False,
                               term_statistics=False,
                               offsets=False,
                               positions=True,
                               fields=['wlks'])['term_vectors']['wlks']
                ['terms'][node_i]['tokens'])['position']
            wk_pos = wk_pos % 100
            walks[wk_id][wk_pos + 1:] = random_walk(G,
                                                    walklength - wk_pos - 1,
                                                    alpha=0,
                                                    rand=random.Random(),
                                                    start=edge[1])
            action = {
                "_op_type": 'update',
                "_index": ind,
                "_type": "walk",
                "_id": int(wk['_id']),
                "_source": {
                    "doc": {
                        "wlks": ' '.join((walks[wk_id]))
                    }
                }
            }
            blk.append(action)
        helpers.bulk(es, blk)
    es.indices.refresh(index=ind)
    allwalks_j = helpers.scan(
        client=es,
        query={"query": {
            "match_phrase": {
                "wlks": {
                    "query": node_j
                }
            }
        }},
        index=ind,
        size=10000,
        scroll='1m')
    allwalks_j = list(allwalks_j)
    sampled_j = random.sample(allwalks_j, int(len(allwalks_j) / degree_j))
    blk = []
    if len(sampled_j) > 0:
        for wk in sampled_j:
            wk_id = int(wk['_id'])
            wk_pos = random.choice(
                es.termvectors(index=ind,
                               doc_type='walk',
                               id=wk_id,
                               field_statistics=False,
                               term_statistics=False,
                               offsets=False,
                               positions=True,
                               fields=['wlks'])['term_vectors']['wlks']
                ['terms'][node_j]['tokens'])['position']
            wk_pos = wk_pos % 100
            walks[wk_id][wk_pos + 1:] = random_walk(G,
                                                    walklength - wk_pos - 1,
                                                    alpha=0,
                                                    rand=random.Random(),
                                                    start=edge[0])
            action = {
                "_op_type": 'update',
                "_index": ind,
                "_type": "walk",
                "_id": int(wk['_id']),
                "_source": {
                    "doc": {
                        "wlks": ' '.join((walks[wk_id]))
                    }
                }
            }

            blk.append(action)
        helpers.bulk(es, blk)

    blk = []
    if len(allwalks_i) == 0:
        print 'node addition'
        for kk in range(num_walks):
            random.seed(kk)
            walks.append(
                random_walk(G,
                            walklength,
                            alpha=0,
                            rand=random.Random(),
                            start=edge[0]))
            action = {
                "_op_type": 'create',
                "_index": ind,
                "_type": "walk",
                "_id": len(walks),
                "_source": {
                    "doc": {
                        "wlks": ' '.join((walks[-1]))
                    }
                }
            }

            blk.append(action)

    if len(allwalks_j) == 0:
        print 'node addition'
        for kk in range(num_walks):
            random.seed(kk)
            walks.append(
                random_walk(G,
                            walklength,
                            alpha=0,
                            rand=random.Random(),
                            start=edge[1]))
            action = {
                "_op_type": 'create',
                "_index": ind,
                "_type": "walk",
                "_id": len(walks),
                "_source": {
                    "doc": {
                        "wlks": ' '.join((walks[-1]))
                    }
                }
            }

            blk.append(action)
    helpers.bulk(es, blk)
    es.indices.refresh(index=ind)
    print 'updated!'
    return walks, G
Ejemplo n.º 9
0
    def pre_mainloop(self):

        ## logging
        assert (len(self.log_filename) != 0
                )  # 'log_filename' must not be empty string!
        logger.setLevel(logging.ERROR)
        handler = logging.FileHandler(self.log_filename, 'w')
        handler.setLevel(logging.INFO)
        formatter = logging.Formatter("%(asctime)s: %(message)s")
        handler.setFormatter(formatter)
        self.logger.setLevel(logging.INFO)
        self.logger.addHandler(handler)

        self._nr_elements = 6
        self._idx_backdoor = 5
        self._init_classifier_output()
        self._classified_element = -1
        self._classified_letter = -1
        for s in self.desired_phrase:
            assert s in [l for ls in self.letter_set
                         for l in ls]  # invalid letters in desired phrase!
        self._spelled_phrase = ""
        self._spelled_letters = ""
        self._desired_letters = self.desired_phrase
        self._copyspelling_finished = False
        #        if self.offline:
        #            self.__idle()                # In offline mode: set the first to-be-spelled letter

        self._spellerHeight = self.geometry[3] - self.letterbox_size[1]
        self._centerPos = (self.geometry[2] / 2., self._spellerHeight / 2.)

        self._nr_letters = 0
        for i in xrange(len(self.letter_set)):
            self._nr_letters += len(self.letter_set[i])

        self._current_level = 1  # Index of current level
        self._current_sequence = 0  # Index of current sequence
        self._current_stimulus = 0  # Index of current stimlus
        self._current_countdown = self.nCountdown
        self.random = random.Random(clock())
        self._debug_classified = None

        ## init states:
        self._state_countdown = True
        if not self.countdown_level1:
            self._state_countdown = False
            self._state_trial = True
        else:
            #self._state_countdown = not self.offline
            self._state_trial = False

        self._state_classify = False
        self._state_feedback = False
        self._state_abort = False

        ## init containers for VE elements:
        self._ve_elements = []

        ## oscillator state:
        if not self.use_oscillator:
            self.osc_color = self.bg_color
            self.osc_size = 0

        ## call subclass-specific pre_mainloop:
        self.prepare_mainloop()

        ## build screen elements:
        self.__init_screen()
        if self.offline:
            self.__idle()
        if self.abort_trial:
            '''
            Start listener for abort_trial event eg.
            '''

        ## delay after play (might be useful for filters...)
        pygame.time.wait(int(self.wait_after_start * 1000))
        self.logger.info("waiting %d seconds after play." %
                         self.wait_after_start)

        ## send start trigger:
        self.send_parallel(marker.RUN_START)
        self.logger.info("[TRIGGER] %d" % marker.RUN_START)

        ## error potential classifier:
        self._ErrP_classifier = None
Ejemplo n.º 10
0
    spread_function_name = "monte_carlo"
    models = ["IC", "WC"]

    # output directories
    degree_dist_dir = "../experiments/datasets/degree_distributions/"
    datasets_dir = "../experiments/datasets/"
    ground_truth_dir = "../experiments/ground_truth/"

    # compute the ground truth for each seed set size
    for k in K:

        for dataset_name in dataset_names:

            max_trials = 100
            G = load_graph(g_type=dataset_name)
            prng = random.Random(seed)
            if community:
                G_sampled1 = sampler.random_walk_sampling_with_fly_back(
                    G, nodes / 2, 0.15, prng)
                G_sampled2 = sampler.random_walk_sampling_with_fly_back(
                    G, nodes / 2, 0.15, prng)
                # compose two graphs together
                G_sampled = nx.compose(G_sampled1, G_sampled2)
                # while nodes in common keep sampling
                while len(G_sampled) < nodes and max_trials > 0:
                    G_sampled1 = sampler.random_walk_sampling_with_fly_back(
                        G, nodes / 2, 0.15, prng)
                    G_sampled2 = sampler.random_walk_sampling_with_fly_back(
                        G, nodes / 2, 0.15, prng)
                    # compose two graphs together
                    G_sampled = nx.compose(G_sampled1, G_sampled2)
Ejemplo n.º 11
0
def deletion(G, num_walks, walklength, walks, es, edge, ind):
    G.remove_edge(*edge)
    node_i = str(edge[0])
    node_j = str(edge[1])
    es.indices.refresh(index=ind)
    allwalks_i = helpers.scan(client=es,
                              query={
                                  "query": {
                                      "match_phrase": {
                                          "wlks": {
                                              "query": node_i + " " + node_j
                                          }
                                      }
                                  }
                              },
                              index=ind,
                              size=10000,
                              scroll='1m')
    degree_i = G.degree(edge[0])
    degree_j = G.degree(edge[1])
    blk = []
    for wk in allwalks_i:
        wk_id = int(wk['_id'])
        wk_pos = (es.termvectors(index=ind,
                                 doc_type='walk',
                                 id=wk_id,
                                 field_statistics=False,
                                 term_statistics=False,
                                 offsets=False,
                                 positions=True,
                                 fields=['wlks'])['term_vectors']['wlks']
                  ['terms'][node_i]['tokens'][0]['position'])
        change = int(wk_pos) % 100
        #node deletion change
        if degree_i == 0 & change == 0:
            action = {
                "_op_type": 'delete',
                "_index": ind,
                "_type": "walk",
                "_id": wk_id
            }
            blk.append(action)
            helpers.bulk(es, blk)
            es.indices.refresh(index=ind)
            blk = []
        elif degree_i == 0 & change != 0:
            walks[wk_id][(change):] = random_walk(G,
                                                  walklength - change,
                                                  alpha=0,
                                                  rand=random.Random(),
                                                  start=i[1])
            action = {
                "_op_type": 'update',
                "_index": ind,
                "_type": "walk",
                "_id": wk_id,
                "_source": {
                    "doc": {
                        "wlks": " ".join(walks[wk_id])
                    }
                }
            }
            blk.append(action)
            helpers.bulk(es, blk)
            es.indices.refresh(index=ind)
            blk = []
        else:
            walks[wk_id][(change):] = random_walk(G,
                                                  walklength - change,
                                                  alpha=0,
                                                  rand=random.Random(),
                                                  start=i[0])
            action = {
                "_op_type": 'update',
                "_index": ind,
                "_type": "walk",
                "_id": wk_id,
                "_source": {
                    "doc": {
                        "wlks": " ".join(walks[wk_id])
                    }
                }
            }
            blk.append(action)
            helpers.bulk(es, blk)
            es.indices.refresh(index=ind)
            blk = []
    es.indices.refresh(index=ind)
    allwalks_j = helpers.scan(client=es,
                              query={
                                  "query": {
                                      "match_phrase": {
                                          "wlks": {
                                              "query": node_j + " " + node_i
                                          }
                                      }
                                  }
                              },
                              index=ind,
                              size=10000,
                              scroll='1m')
    for wk in allwalks_j:
        wk_id = int(wk['_id'])
        wk_pos = (es.termvectors(index=ind,
                                 doc_type='walk',
                                 id=wk_id,
                                 field_statistics=False,
                                 term_statistics=False,
                                 offsets=False,
                                 positions=True,
                                 fields=['wlks'])['term_vectors']['wlks']
                  ['terms'][node_j]['tokens'][0]['position'])
        change = int(wk_pos) % 100
        if degree_j == 0 & change == 0:
            action = {
                "_op_type": 'delete',
                "_index": ind,
                "_type": "walk",
                "_id": wk_id
            }
            blk.append(action)
            helpers.bulk(es, blk)
            es.indices.refresh(index=ind)
            blk = []
        elif degree_j == 0 & change != 0:
            walks[wk_id][int(change):] = random_walk(G,
                                                     walklength - change,
                                                     alpha=0,
                                                     rand=random.Random(),
                                                     start=i[0])
            action = {
                "_op_type": 'update',
                "_index": ind,
                "_type": "walk",
                "_id": wk_id,
                "_source": {
                    "doc": {
                        "wlks": " ".join(walks[wk_id])
                    }
                }
            }
            blk.append(action)
            helpers.bulk(es, blk)
            es.indices.refresh(index=ind)
            blk = []
        else:
            walks[wk_id][int(change):] = random_walk(G,
                                                     walklength - change,
                                                     alpha=0,
                                                     rand=random.Random(),
                                                     start=i[1])
            action = {
                "_op_type": 'update',
                "_index": ind,
                "_type": "walk",
                "_id": wk_id,
                "_source": {
                    "doc": {
                        "wlks": " ".join(walks[wk_id])
                    }
                }
            }
            blk.append(action)
            helpers.bulk(es, blk)
            es.indices.refresh(index=ind)
            blk = []
    return walks, G
gamma = 0.0002
lembda = 0.0003




utility_matrix = np.zeros((no_of_users,no_of_movies))
user_lfm =  np.random.random((no_of_users,no_of_latent_factors))
movie_lfm =  np.random.random((no_of_movies,no_of_latent_factors))


df = pd.read_csv('ratings10k.csv',  delim_whitespace=False, sep=',', header=None)
df = df.drop(df.index[0])
df = df.values.tolist()
# shuffle
random.Random(4).shuffle(df)
# random.Random(4).shuffle(df2)

# divide in test and train
train, test = train_test_split(df, test_size=0.2)







def createUtilityMatrix():
    for each in train:               # creating rating matrix
        userId = int(int(each[0]) - 1)
        movieId = int(int(each[1]) - 1)
Ejemplo n.º 13
0
def get_random_string(seed):
    """Generates a random string based on the given seed"""
    choices = string.ascii_letters + string.digits + string.punctuation
    seed = seed.encode("utf-8")
    rand = random.Random(seed)
    return [rand.choice(choices) for i in range(16)]
Ejemplo n.º 14
0
	def train(self, train_file, lexicon_file=None, freq_file=None, test_prop=0.1, output_importances=False, dump_model=False,
			  cross_val_test=False, output_errors=False, ablations=None, dump_transformed_data=False, do_shuffle=True, conf=None):
		"""

		:param train_file: File with segmentations to train on in one of the two formats described in make_prev_next()
		:param lexicon_file: Tab delimited lexicon file with full forms in first column and POS tag in second column (multiple rows per form possible)
		:param freq_file: Tab delimited file with segment forms and their frequencies as integers in two columns
		:param conf: configuration file for training (by default: <MODELNAME>.conf)
		:param test_prop: (0.0 -- 0.99) Proportion of shuffled data to test on
		:param output_importances: Whether to print feature importances (only if test proportion > 0.0)
		:param dump_model: Whether to dump trained model to disk via joblib
		:param cross_val_test: Whether to perform cross-validation for hyper parameter optimization
		:param output_errors: Whether to output prediction errors to a file 'errs.txt'
		:param ablations: Comma separated string of feature names to ablate, e.g. "freq_ratio,prev_grp_pos,next_grp_pos"
		:param dump_transformed_data: If true, transform data to a pandas dataframe and write to disk, then quit
				(useful to train other approaches on the same features, e.g. a DNN classifier)
		:param do_shuffle: Whether training data is shuffled after context extraction but before test partition is created
				(this has no effect if training on whole training corpus)
		:return: None
		"""
		import timing

		pos_lookup = read_lex(self.short_pos,lexicon_file)
		self.read_conf_file(file_name=conf)
		self.pos_lookup = pos_lookup
		conf_file_parser = self.conf_file_parser
		letter_config = LetterConfig(self.letters, self.conf["vowels"], self.pos_lookup)

		np.random.seed(42)

		if lexicon_file is None:
			print("i WARN: No lexicon file provided, learning purely from examples")

		seg_table = io.open(train_file,encoding="utf8").read()
		seg_table = seg_table.replace("\r","").strip()
		for c in self.conf["diacritics"]:  # TODO: configurable diacritic removal
			pass
			#seg_table = seg_table.replace(c,"")
		seg_table = seg_table.split("\n")

		sys.stderr.write("o Encoding Training data\n")

		# Validate training data
		non_tab_lines = 0
		non_tab_row = 0
		for r, line in enumerate(seg_table):
			if line.count("\t") < 1:
				non_tab_lines += 1
				non_tab_row = r
		if non_tab_lines > 0:
			sys.stderr.write("FATAL: found " + str(non_tab_lines) + " rows in training data not containing tab\n")
			sys.stderr.write("       Last occurrence at line: " + str(non_tab_row) + "\n")
			sys.exit()

		# Make into four cols: prev \t next \t current \t segmented (unless already receiving such a table, for shuffled datasets)
		if seg_table[0].count("\t") == 1:
			seg_table = make_prev_next(seg_table)

		# Ensure OOV symbol is in data
		seg_table = ["_\t_\t_\t_"] + seg_table

		data_y = []
		words = []
		all_encoded_groups = []

		encoding_cache = {}
		non_ident_segs = 0

		shuffle_mapping = list(range(len(seg_table)))
		zipped = list(zip(seg_table, shuffle_mapping))

		# Shuffle table to sample across entire dataset if desired
		if do_shuffle and False:
			random.Random(24).shuffle(zipped)

		seg_table, shuffle_mapping = zip(*zipped)

		headers = bg2array("_________",prev_group="_",next_group="_",print_headers=True,is_test=1,grp_id=1,config=letter_config)

		word_idx = -1
		bug_rows = []

		freqs = defaultdict(float)
		total_segs = 0.0
		flines = io.open(freq_file,encoding="utf8").read().replace("\r","").split("\n") if freq_file is not None else []
		for l in flines:
			if l.count("\t")==1:
				w, f = l.split("\t")
				freqs[w] += float(f)
				total_segs += float(f)

		for u in freqs:
			freqs[u] = freqs[u]/total_segs

		# Don't use freqs if they're empty
		if len(freqs) == 0:
			sys.stderr.write("o No segment frequencies provided, adding 'freq_ratio' to ablated features\n")
			if ablations is None:
				ablations = "freq_ratio"
			else:
				if "freq_ratio" not in ablations:
					ablations += ",freq_ratio"

		step = int(1/test_prop) if test_prop > 0 else 0
		test_indices = list(range(len(seg_table)))[0::step] if step > 0 else []
		test_rows = []

		for row_idx, row in enumerate(seg_table):
			is_test = 1 if row_idx in test_indices else 0

			prev_group, next_group, bound_group, segmentation = row.split("\t")
			if bound_group != "|":
				if len(bound_group) != len(segmentation.replace("|","")):  # Ignore segmentations that also normalize
					non_ident_segs += 1
					bug_rows.append(row_idx)
					continue

			###
			if dump_transformed_data:
				if is_test:
					test_rows.append(bound_group + "\t" + segmentation)
			###

			word_idx += 1
			words.append(bound_group)
			group_type = "_".join([x for x in [prev_group, next_group, bound_group] if x != ""])
			if group_type in encoding_cache:  # No need to encode, an identical featured group has already been seen
				encoded_group = encoding_cache[group_type]
				for c in encoded_group:
					c[headers.index("is_test")] = is_test  # Make sure that this group's test index is correctly assigned
			else:
				encoded_group = bg2array(bound_group,prev_group=prev_group,next_group=next_group,is_test=is_test,grp_id=word_idx,config=letter_config,train=True,freqs=freqs)
				encoding_cache[group_type] = encoded_group
			all_encoded_groups += encoded_group
			data_y += segs2array(segmentation)

		sys.stderr.write("o Finished encoding " + str(len(data_y)) + " chars (" + str(len(seg_table)) + " groups, " + str(len(encoding_cache)) + " group types)\n")

		if non_ident_segs > 0:
			with open("bug_rows.txt",'w') as f:
				f.write("\n".join([str(r) for r in sorted([shuffle_mapping[x] for x in bug_rows])]) + "\n")

			sys.stderr.write("i WARN: found " + str(non_ident_segs) + " rows in training data where left column characters not identical to right column characters\n")
			sys.stderr.write("        Row numbers dumped to: bug_rows.txt\n")
			sys.stderr.write("        " + str(non_ident_segs) + " rows were ignored in training\n\n")

		data_y = np.array(data_y)

		cat_labels = ['group_in_lex','current_letter', 'prev_prev_letter', 'prev_letter', 'next_letter', 'next_next_letter',
					 'mns4_coarse', 'mns3_coarse', 'mns2_coarse',
					 'mns1_coarse', 'pls1_coarse', 'pls2_coarse',
					 'pls3_coarse', 'pls4_coarse', "so_far_pos", "remaining_pos","prev_grp_pos","next_grp_pos",
					  "remaining_pos_mns1","remaining_pos_mns2",
					  "prev_grp_first", "prev_grp_last","next_grp_first","next_grp_last"]

		num_labels = ['idx','len_bound_group',"current_vowel","prev_prev_vowel","prev_vowel","next_vowel","next_next_vowel",
					  "prev_grp_len","next_grp_len","freq_ratio"]

		# Remove features switched off in .conf file
		for label in self.conf["unused"]:
			if label in cat_labels:
				cat_labels.remove(label)
			if label in num_labels:
				num_labels.remove(label)

		# Handle temporary ablations if specified in option -a
		if ablations is not None:
			if len(ablations) > 0 and ablations != "none":
				abl_feats = ablations.split(",")
				sys.stderr.write("o Ablating features:\n")
				for feat in abl_feats:
					found = False
					if feat in cat_labels:
						cat_labels.remove(feat)
						found = True
					elif feat in num_labels:
						num_labels.remove(feat)
						found = True
					if found:
						sys.stderr.write("\t"+feat+"\n")
					else:
						sys.stderr.write("\tERR: can't find ablation feature " + feat + "\n")
						sys.exit()

		data_x = pd.DataFrame(all_encoded_groups, columns=headers)

		###
		if dump_transformed_data:
			data_x["resp"] = data_y
			import csv
			to_remove = ["is_test","grp_id"]  # Columns to remove from transformed data dump
			out_cols = [col for col in headers if col not in to_remove] + ["resp"]  # Add the response column as 'resp'
			data_x.iloc[data_x.index[data_x["is_test"] == 0]].to_csv("rftokenizer_train_featurized.tab",sep="\t",quotechar="",quoting=csv.QUOTE_NONE,encoding="utf8",index=False,columns=out_cols)
			data_x.iloc[data_x.index[data_x["is_test"] == 1]].to_csv("rftokenizer_test_featurized.tab",sep="\t",quotechar="",quoting=csv.QUOTE_NONE,encoding="utf8",index=False,columns=out_cols)
			# Dump raw test rows to compare gold solution
			with io.open("rftokenizer_test_gold.tab","w",encoding="utf8") as gold:
				gold.write("\n".join(test_rows) + "\n")
			sys.stderr.write("o Wrote featurized train/test set and gold test to rftokenizer_*.tab\n")
			sys.exit()
		###

		encoder = MultiColumnLabelEncoder(pd.Index(cat_labels))
		data_x_enc = encoder.fit_transform(data_x)

		if test_prop > 0:
			sys.stderr.write("o Generating train/test split with test proportion "+str(test_prop)+"\n")

		data_x_enc["boundary"] = data_y
		strat_train_set = data_x_enc.iloc[data_x_enc.index[data_x_enc["is_test"] == 0]]
		strat_test_set = data_x_enc.iloc[data_x_enc.index[data_x_enc["is_test"] == 1]]

		cat_pipeline = Pipeline([
			('selector', DataFrameSelector(cat_labels)),
		])

		num_pipeline = Pipeline([
			('selector', DataFrameSelector(num_labels))
		])

		preparation_pipeline = FeatureUnion(transformer_list=[
			("cat_pipeline", cat_pipeline),
			("num_pipeline", num_pipeline),
		])

		sys.stderr.write("o Transforming data to numerical array\n")
		train_x = preparation_pipeline.fit_transform(strat_train_set)

		train_y = strat_train_set["boundary"]
		train_y_bin = np.where(strat_train_set['boundary'] == 0, 0, 1)

		if test_prop > 0:
			test_x = preparation_pipeline.transform(strat_test_set)
			test_y_bin = np.where(strat_test_set['boundary'] == 0, 0, 1)
			bound_grp_idx = np.array(strat_test_set['grp_id'])

			from sklearn.dummy import DummyClassifier
			d = DummyClassifier(strategy="most_frequent")
			d.fit(train_x,train_y_bin)
			pred = d.predict(test_x)
			print("o Majority baseline:")
			print("\t" + str(accuracy_score(test_y_bin, pred)))

		forest_clf = ExtraTreesClassifier(n_estimators=250, max_features=None, n_jobs=3, random_state=42)

		if cross_val_test:
			# Modify code to tune hyperparameters/use different estimators

			from sklearn.model_selection import GridSearchCV
			sys.stderr.write("o Running CV...\n")

			params = {"n_estimators":[300,400,500],"max_features":["auto",None]}#,"class_weight":["balanced",None]}
			grid = GridSearchCV(RandomForestClassifier(n_jobs=-1,random_state=42,warm_start=True),param_grid=params,refit=False)
			grid.fit(train_x,train_y_bin)
			print("\nGrid search results:\n" + 30 * "=")
			for key in grid.cv_results_:
				print(key + ": " + str(grid.cv_results_[key]))

			print("\nBest parameters:\n" + 30 * "=")
			print(grid.best_params_)
			sys.exit()

		sys.stderr.write("o Learning...\n")
		forest_clf.fit(train_x, train_y_bin)

		if test_prop > 0:
			pred = forest_clf.predict(test_x)
			j=-1
			for i, row in strat_test_set.iterrows():
				j+=1
				if row["idx"] +1 == row["len_bound_group"]:
					pred[j] = 0

			print("o Binary clf accuracy:")
			print("\t" + str(accuracy_score(test_y_bin, pred)))

			group_results = defaultdict(lambda : 1)
			for i in range(len(pred)):
				grp = bound_grp_idx[i]
				if test_y_bin[i] != pred[i]:
					group_results[grp] = 0

			correct = 0
			total = 0
			for grp in set(bound_grp_idx):
				if group_results[grp] == 1:
					correct +=1
				total +=1
			print("o Perfect bound group accuracy:")
			print("\t" + str(float(correct)/total))

			errs = defaultdict(int)
			for i, word in enumerate(words):
				if i in group_results:
					if group_results[i] == 0:
						errs[word] += 1

			if output_errors:
				print("o Writing prediction errors to errs.txt")
				with io.open("errs.txt",'w',encoding="utf8") as f:
					for err in errs:
						f.write(err + "\t" + str(errs[err])+"\n")

			if output_importances:
				feature_names = cat_labels + num_labels

				zipped = zip(feature_names, forest_clf.feature_importances_)
				sorted_zip = sorted(zipped, key=lambda x: x[1], reverse=True)
				print("o Feature importances:\n")
				for name, importance in sorted_zip:
					print(name, "=", importance)
		else:
			print("o Test proportion is 0%, skipping evaluation")

		if dump_model:
			joblib.dump((forest_clf, num_labels, cat_labels, encoder, preparation_pipeline, pos_lookup, freqs, conf_file_parser), self.lang + ".sm" + str(sys.version_info[0]), compress=3)
			print("o Dumped trained model to " + self.lang + ".sm" + str(sys.version_info[0]))
Ejemplo n.º 15
0
def _test_honeybadger(N=4, f=1, seed=None):
    sid = 'sidA'
    # Generate threshold sig keys
    sPK, sSKs = dealer(N, f + 1, seed=seed)
    # Generate threshold enc keys
    ePK, eSKs = tpke.dealer(N, f + 1)

    rnd = random.Random(seed)
    #print 'SEED:', seed
    router_seed = rnd.random()
    sends, recvs = simple_router(N, seed=router_seed)

    B = N  # 1 tx per node, per round

    badgers = [None] * N
    threads = [None] * N
    input_queues = [Queue() for _ in range(N)]  # to submit lists of txes
    output_queues = [Queue() for _ in range(N)]  # to read lists of txes

    for i in range(N):
        input_queues[i].put(['<[HBBFT Input %d]>' % i])

    for i in range(N):
        badgers[i] = HoneyBadgerBFT(sid,
                                    i,
                                    B,
                                    N,
                                    f,
                                    sPK,
                                    sSKs[i],
                                    ePK,
                                    eSKs[i],
                                    sends[i],
                                    recvs[i],
                                    input_queues[i].get,
                                    output_queues[i].put,
                                    encode=repr,
                                    decode=ast.literal_eval,
                                    max_rounds=3)
        threads[i] = gevent.spawn(badgers[i].run)

    for i in range(N):
        input_queues[i].put(['<[HBBFT Input %d]>' % (i + 10)])

    for i in range(N):
        input_queues[i].put(['<[HBBFT Input %d]>' % (i + 20)])

    try:
        # Wait for each badger to finish running
        for i in range(N):
            threads[i].get()
            output_queues[i].put(StopIteration)

        outs = [tuple(output_queues[i]) for i in range(N)]

        # Consistency check
        assert len(set(outs)) == 1

    except KeyboardInterrupt:
        gevent.killall(threads)
        raise
Ejemplo n.º 16
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    validate_flags_or_throw(bert_config)

    tf.gfile.MakeDirs(FLAGS.output_dir)

    tokenizer = tokenization.FullTokenizer(
        vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case
    )

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project
        )

    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
    run_config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host,
        ),
    )

    train_examples = None
    num_train_steps = None
    num_warmup_steps = None
    if FLAGS.do_train:
        train_examples = read_squad_examples(input_file=FLAGS.train_file, is_training=True)
        num_train_steps = int(len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

        # Pre-shuffle the input to avoid having to make a very large shuffle
        # buffer in in the `input_fn`.
        rng = random.Random(12345)
        rng.shuffle(train_examples)

    model_fn = model_fn_builder(
        bert_config=bert_config,
        init_checkpoint=FLAGS.init_checkpoint,
        learning_rate=FLAGS.learning_rate,
        num_train_steps=num_train_steps,
        num_warmup_steps=num_warmup_steps,
        use_tpu=FLAGS.use_tpu,
        use_one_hot_embeddings=FLAGS.use_tpu,
    )

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tf.contrib.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        predict_batch_size=FLAGS.predict_batch_size,
    )

    if FLAGS.do_train:
        # We write to a temporary file to avoid storing very large constant tensors
        # in memory.
        train_writer = FeatureWriter(
            filename=os.path.join(FLAGS.output_dir, "train.tf_record"), is_training=True
        )
        convert_examples_to_features(
            examples=train_examples,
            tokenizer=tokenizer,
            max_seq_length=FLAGS.max_seq_length,
            doc_stride=FLAGS.doc_stride,
            max_query_length=FLAGS.max_query_length,
            is_training=True,
            output_fn=train_writer.process_feature,
        )
        train_writer.close()

        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num orig examples = %d", len(train_examples))
        tf.logging.info("  Num split examples = %d", train_writer.num_features)
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        del train_examples

        train_input_fn = input_fn_builder(
            input_file=train_writer.filename,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True,
        )
        estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

    if FLAGS.do_predict:
        eval_examples = read_squad_examples(input_file=FLAGS.predict_file, is_training=False)

        eval_writer = FeatureWriter(
            filename=os.path.join(FLAGS.output_dir, "eval.tf_record"), is_training=False
        )
        eval_features = []

        def append_feature(feature):
            eval_features.append(feature)
            eval_writer.process_feature(feature)

        convert_examples_to_features(
            examples=eval_examples,
            tokenizer=tokenizer,
            max_seq_length=FLAGS.max_seq_length,
            doc_stride=FLAGS.doc_stride,
            max_query_length=FLAGS.max_query_length,
            is_training=False,
            output_fn=append_feature,
        )
        eval_writer.close()

        tf.logging.info("***** Running predictions *****")
        tf.logging.info("  Num orig examples = %d", len(eval_examples))
        tf.logging.info("  Num split examples = %d", len(eval_features))
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        all_results = []

        predict_input_fn = input_fn_builder(
            input_file=eval_writer.filename,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=False,
        )

        # If running eval on the TPU, you will need to specify the number of
        # steps.
        all_results = []
        for result in estimator.predict(predict_input_fn, yield_single_examples=True):
            if len(all_results) % 1000 == 0:
                tf.logging.info("Processing example: %d" % (len(all_results)))
            unique_id = int(result["unique_ids"])
            start_logits = [float(x) for x in result["start_logits"].flat]
            end_logits = [float(x) for x in result["end_logits"].flat]
            all_results.append(
                RawResult(unique_id=unique_id, start_logits=start_logits, end_logits=end_logits)
            )

        output_prediction_file = os.path.join(FLAGS.output_dir, "predictions.json")
        output_nbest_file = os.path.join(FLAGS.output_dir, "nbest_predictions.json")
        output_null_log_odds_file = os.path.join(FLAGS.output_dir, "null_odds.json")

        write_predictions(
            eval_examples,
            eval_features,
            all_results,
            FLAGS.n_best_size,
            FLAGS.max_answer_length,
            FLAGS.do_lower_case,
            output_prediction_file,
            output_nbest_file,
            output_null_log_odds_file,
        )
Ejemplo n.º 17
0
 def start(self):
     rand = random.Random()
     self.__v = rand.choice([15])
     self.__ang = 0  # TODO: random good angle
Ejemplo n.º 18
0
    def __init__(self,
                 args=None,
                 topology=None,
                 results=None,
                 seed=None,
                 load=None,
                 policy=None,
                 id_simulation=None):

        if args is not None and hasattr(args, 'mean_service_holding_time'):
            self.mean_service_holding_time = args.mean_service_holding_time
        else:
            self.mean_service_holding_time = 86400.0  # service holding time in seconds (54000 sec = 15 h)

        self.load = 0.0
        self.mean_service_inter_arrival_time = 0.0
        if args is not None and hasattr(args, 'load') and load is None:
            self.set_load(load=args.load)
        elif load is not None:  # load through parameter has precedence over argument
            self.set_load(load=load)
        else:
            self.set_load(load=50)

        # num_seeds defines the number of seeds (simulations) to be run for each configuration
        if args is not None and hasattr(args, "num_seeds"):
            self.num_seeds = args.num_seeds
        else:
            self.num_seeds = 25

        if args is not None and hasattr(args, "num_arrivals"):
            self.num_arrivals = args.num_arrivals
        else:
            self.num_arrivals = 10000

        if args is not None and hasattr(args, "k_paths"):
            self.k_paths = args.k_paths
        else:
            self.k_paths = 5

        if args is not None and hasattr(args, 'threads'):
            self.threads = args.threads
        else:
            self.threads = 6

        if args is not None and hasattr(args, 'topology_file'):
            self.topology_file = args.topology_file
            self.topology_name = args.topology_file.split('.')[0]
        else:
            self.topology_file = "nobel-us.xml"  #"nobel-us.xml" #"test-topo.xml"
            self.topology_name = 'nobel-us'
            # self.topology_file = "simple"  # "nobel-us.xml" #"test-topo.xml"
            # self.topology_name = 'simple'

        if args is not None and hasattr(args, "resource_units_per_link"):
            self.resource_units_per_link = args.resource_units_per_link
        else:
            self.resource_units_per_link = 80

        if args is not None and hasattr(args, "policy") and policy is None:
            self.policy = args.policy
        elif policy is not None:
            self.policy = policy  # parameter has precedence over argument
        else:
            self.policy = 'SP'  # shortest path by default

        if topology is not None:
            self.topology = topology

        if seed is not None:
            self.seed = seed
            self.rng = random.Random(seed)
        else:
            self.seed = 42
            self.rng = random.Random(42)

        if results is not None:
            self.results = results
        else:
            self.results = []  # initiates with an empty local results vector

        if id_simulation is not None:
            self.id_simulation = id_simulation
        else:
            self.id_simulation = 0

        self.track_stats_every = 100  # frequency at which results are saved
        self.plot_tracked_stats_every = 1000  # frequency at which results are plotted
        self.tracked_results = {}
        self.tracked_statistics = [
            'request_blocking_ratio', 'average_link_usage'
        ]
        for obs in self.tracked_statistics:
            self.tracked_results[obs] = []

        self.events = []  # event queue
        self._processed_arrivals = 0
        self._rejected_services = 0
        self.current_time = 0.0

        self.output_folder = 'data'

        self.plot_formats = [
            'pdf'
        ]  # you can configure this to other formats such as PNG, SVG
Ejemplo n.º 19
0
def test_multiple_channel_states(
    chain_state,
    token_network_state,
    our_address,
):
    open_block_number = 10
    pseudo_random_generator = random.Random()
    pkey, address = factories.make_privkey_address()

    amount = 30
    our_balance = amount + 50
    channel_state = factories.make_channel(
        our_balance=our_balance,
        our_address=our_address,
        partner_balance=our_balance,
        partner_address=address,
        token_network_identifier=token_network_state.address,
    )
    payment_network_identifier = factories.make_payment_network_identifier()

    channel_new_state_change = ContractReceiveChannelNew(
        factories.make_transaction_hash(),
        token_network_state.address,
        channel_state,
    )

    channel_new_iteration = token_network.state_transition(
        payment_network_identifier,
        token_network_state,
        channel_new_state_change,
        pseudo_random_generator,
        open_block_number,
    )

    lock_amount = 30
    lock_expiration = 20
    lock_secret = sha3(b'test_end_state')
    lock_secrethash = sha3(lock_secret)
    lock = HashTimeLockState(
        lock_amount,
        lock_expiration,
        lock_secrethash,
    )

    mediated_transfer = make_receive_transfer_mediated(
        channel_state=channel_state,
        privkey=pkey,
        nonce=1,
        transferred_amount=0,
        lock=lock,
    )

    from_route = factories.route_from_channel(channel_state)
    init_target = ActionInitTarget(
        from_route,
        mediated_transfer,
    )

    node.state_transition(chain_state, init_target)

    closed_block_number = open_block_number + 10
    channel_close_state_change = ContractReceiveChannelClosed(
        factories.make_transaction_hash(),
        channel_state.partner_state.address,
        token_network_state.address,
        channel_state.identifier,
        closed_block_number,
    )

    channel_closed_iteration = token_network.state_transition(
        payment_network_identifier,
        channel_new_iteration.new_state,
        channel_close_state_change,
        pseudo_random_generator,
        closed_block_number,
    )

    settle_block_number = closed_block_number + channel_state.settle_timeout + 1
    channel_settled_state_change = ContractReceiveChannelSettled(
        factories.make_transaction_hash(),
        token_network_state.address,
        channel_state.identifier,
        settle_block_number,
    )

    channel_settled_iteration = token_network.state_transition(
        payment_network_identifier,
        channel_closed_iteration.new_state,
        channel_settled_state_change,
        pseudo_random_generator,
        closed_block_number,
    )

    token_network_state_after_settle = channel_settled_iteration.new_state
    ids_to_channels = token_network_state_after_settle.channelidentifiers_to_channels
    assert len(ids_to_channels) == 1
    assert channel_state.identifier in ids_to_channels

    # Create new channel while the previous one is pending unlock
    new_channel_state = factories.make_channel(
        our_balance=our_balance,
        partner_balance=our_balance,
        partner_address=address,
    )
    channel_new_state_change = ContractReceiveChannelNew(
        factories.make_transaction_hash(),
        token_network_state.address,
        new_channel_state,
    )

    channel_new_iteration = token_network.state_transition(
        payment_network_identifier,
        token_network_state,
        channel_new_state_change,
        pseudo_random_generator,
        open_block_number,
    )

    token_network_state_after_new_open = channel_new_iteration.new_state
    ids_to_channels = token_network_state_after_new_open.channelidentifiers_to_channels

    assert len(ids_to_channels) == 2
    assert channel_state.identifier in ids_to_channels
Ejemplo n.º 20
0
 def setUp(self):
     self.random = random.Random(42)
Ejemplo n.º 21
0
def test_channel_settle_must_properly_cleanup():
    open_block_number = 10
    pseudo_random_generator = random.Random()

    token_network_id = factories.make_address()
    token_id = factories.make_address()
    token_network_state = TokenNetworkState(token_network_id, token_id)
    payment_network_identifier = factories.make_payment_network_identifier()

    amount = 30
    our_balance = amount + 50
    channel_state = factories.make_channel(our_balance=our_balance)

    channel_new_state_change = ContractReceiveChannelNew(
        factories.make_transaction_hash(),
        token_network_id,
        channel_state,
    )

    channel_new_iteration = token_network.state_transition(
        payment_network_identifier,
        token_network_state,
        channel_new_state_change,
        pseudo_random_generator,
        open_block_number,
    )

    closed_block_number = open_block_number + 10
    channel_close_state_change = ContractReceiveChannelClosed(
        factories.make_transaction_hash(),
        channel_state.partner_state.address,
        token_network_id,
        channel_state.identifier,
        closed_block_number,
    )

    channel_closed_iteration = token_network.state_transition(
        payment_network_identifier,
        channel_new_iteration.new_state,
        channel_close_state_change,
        pseudo_random_generator,
        closed_block_number,
    )

    settle_block_number = closed_block_number + channel_state.settle_timeout + 1
    channel_settled_state_change = ContractReceiveChannelSettled(
        factories.make_transaction_hash(),
        token_network_id,
        channel_state.identifier,
        settle_block_number,
    )

    channel_settled_iteration = token_network.state_transition(
        payment_network_identifier,
        channel_closed_iteration.new_state,
        channel_settled_state_change,
        pseudo_random_generator,
        closed_block_number,
    )

    token_network_state_after_settle = channel_settled_iteration.new_state
    ids_to_channels = token_network_state_after_settle.channelidentifiers_to_channels
    assert channel_state.identifier not in ids_to_channels
Ejemplo n.º 22
0
    def prepare_file_path_queue(self):
        """Generate more file paths to process. Result are saved in _file_path_queue."""
        self._parsing_start_time = time.perf_counter()
        # If the file path is already being processed, or if a file was
        # processed recently, wait until the next batch
        file_paths_in_progress = self._processors.keys()
        now = timezone.utcnow()

        # Sort the file paths by the parsing order mode
        list_mode = conf.get("scheduler", "file_parsing_sort_mode")

        files_with_mtime = {}
        file_paths = []
        is_mtime_mode = list_mode == "modified_time"

        file_paths_recently_processed = []
        for file_path in self._file_paths:

            if is_mtime_mode:
                try:
                    files_with_mtime[file_path] = os.path.getmtime(file_path)
                except FileNotFoundError:
                    self.log.warning("Skipping processing of missing file: %s",
                                     file_path)
                    continue
                file_modified_time = timezone.make_aware(
                    datetime.fromtimestamp(files_with_mtime[file_path]))
            else:
                file_paths.append(file_path)
                file_modified_time = None

            # Find file paths that were recently processed to exclude them
            # from being added to file_path_queue
            # unless they were modified recently and parsing mode is "modified_time"
            # in which case we don't honor "self._file_process_interval" (min_file_process_interval)
            last_finish_time = self.get_last_finish_time(file_path)
            if (last_finish_time is not None
                    and (now - last_finish_time).total_seconds() <
                    self._file_process_interval
                    and not (is_mtime_mode and file_modified_time and
                             (file_modified_time > last_finish_time))):
                file_paths_recently_processed.append(file_path)

        # Sort file paths via last modified time
        if is_mtime_mode:
            file_paths = sorted(files_with_mtime,
                                key=files_with_mtime.get,
                                reverse=True)
        elif list_mode == "alphabetical":
            file_paths = sorted(file_paths)
        elif list_mode == "random_seeded_by_host":
            # Shuffle the list seeded by hostname so multiple schedulers can work on different
            # set of files. Since we set the seed, the sort order will remain same per host
            random.Random(get_hostname()).shuffle(file_paths)

        files_paths_at_run_limit = [
            file_path for file_path, stat in self._file_stats.items()
            if stat.run_count == self._max_runs
        ]

        file_paths_to_exclude = set(file_paths_in_progress).union(
            file_paths_recently_processed, files_paths_at_run_limit)

        # Do not convert the following list to set as set does not preserve the order
        # and we need to maintain the order of file_paths for `[scheduler] file_parsing_sort_mode`
        files_paths_to_queue = [
            file_path for file_path in file_paths
            if file_path not in file_paths_to_exclude
        ]

        for file_path, processor in self._processors.items():
            self.log.debug(
                "File path %s is still being processed (started: %s)",
                processor.file_path,
                processor.start_time.isoformat(),
            )

        self.log.debug("Queuing the following files for processing:\n\t%s",
                       "\n\t".join(files_paths_to_queue))

        for file_path in files_paths_to_queue:
            if file_path not in self._file_stats:
                self._file_stats[file_path] = DagFileStat(
                    num_dags=0,
                    import_errors=0,
                    last_finish_time=None,
                    last_duration=None,
                    run_count=0)

        self._file_path_queue.extend(files_paths_to_queue)
Ejemplo n.º 23
0
 def __init__(self, playable_encounter, encounter_display):
     self.rnd = random.Random()
     self.ended = False
     self.__inital_done = False
     self.encounter = playable_encounter
     self.display = encounter_display
Ejemplo n.º 24
0
def start(father="data/", changeRate=0.1, filename='twitter.txt'):
    fx = open(father + filename)
    fc = open(father + 'changey.txt', 'w')
    fy = open(father + 'y2.txt', 'w')
    biggestNode = 1
    nowIter = 0
    nodeSet = set()
    nodeSet2 = set()

    #1.-
    for line in fx.readlines():
        lineArr = re.split(' |,|\t', line.strip())
        lenth = len(lineArr)
        if (lenth < 2):
            break

        fc.write('%s %s\n' % (lineArr[0], lineArr[1]))
        nodeSet.add(int(lineArr[0]))
        nodeSet.add(int(lineArr[1]))
        nodeSet2.add(int(lineArr[0]))
        nodeSet2.add(int(lineArr[1]))

        nowIter += 1
    biggestNode = max(nodeSet2)
    print "biggestNode=", biggestNode
    #2.
    fc.close()
    fc = open(father + 'changey.txt', 'r')
    removeSet = set()
    lenSet = int(len(nodeSet))
    nowIter = 0
    nodeSet = list(nodeSet)
    random.Random(0).shuffle(nodeSet)

    #delete randomly
    for nowIter in range(int(changeRate * lenSet)):
        node = random.Random().choice(nodeSet)
        removeSet.add(node)
        nodeSet2.remove(node)
        nodeSet.remove(node)
    print "len(removeSet)=", len(removeSet)
    #
    for line in fc.readlines():
        lineArr = re.split(' |,|\t', line.strip())  #
        lenth = len(lineArr)
        if (lenth < 2):  #
            break
        if int(lineArr[0]) not in removeSet and int(
                lineArr[1]) not in removeSet:
            fy.write('a%s a%s\n' % (lineArr[0], lineArr[1]))
    #print "removeSet=",removeSet
    fremove = open(father + 'remove.txt', 'w')
    for ra in removeSet:
        fremove.write('%s ' % ra)
    fremove.close()
    #3.
    #biggestNode=int(5000)
    newNode = int(biggestNode)  #
    alllen, du, xDict = cal.start(father + filename)
    #alllen=11765;du=947;
    for j in range(int(changeRate * alllen)):
        newNode += 1  #
        i = 0
        nowIter = 0
        nodeSet2 = list(nodeSet2)
        random.Random(0).shuffle(nodeSet2)
        addset = set()
        while (i < du):
            nodeY = random.Random().choice(nodeSet2)
            if (nodeY not in addset):
                addset.add(nodeY)
                fy.write('a%s a%s\n' % (newNode, nodeY))
                #print "i=",i,":",newNode,"+",nodeY," "
                i += 1

    ff = open(father + 'connectY2.txt', 'w')
    for node in nodeSet2:
        ff.write('%s a%s\n' % (node, node))

    ff.close()
    fy.close()
    fc.close()
    fx.close()
    return xDict
Ejemplo n.º 25
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
from nltk.cluster import KMeansClusterer
import nltk
import json
import random
from nltk.cluster.util import cosine_distance

config = json.load(open('../config.json', 'r'))
INPUT_DIR = config['INPUT_DIR']
OUTPUT_DIR = config['OUTPUT_DIR']
TWEET_DIR = config['TWEET_DIR']
NUM_CLUSTERS = config['NUM_CLUSTERS']
RNG = random.Random()
RNG.seed(config['SEED'])
events = open(INPUT_DIR + 'event_names.txt', 'r').read().splitlines()

print('loading...')


# use a joint sample of embeddings from each event to determine the cluster centroids
def get_samples():
    tweet_embeds = ''
    for event in events:
        embeds = np.load(TWEET_DIR + event + '/' + event +
                         '_embeddings_partisan.npy')
        indices = np.load(TWEET_DIR + event + '/' + event +
                          '_indices_among_embeddings_for_getting_topics.npy')
        embeds = embeds[indices, :]
        if len(tweet_embeds) == 0:
Ejemplo n.º 26
0
import random
from eth_keys import keys
from hexbytes import HexBytes
from web3.datastructures import MutableAttributeDict
from monitor.blocks import get_canonicalized_block, calculate_block_signature

random_generator = random.Random(0)


def random_hash():
    return bytes(random_generator.randint(0, 255) for _ in range(32))


def random_address():
    return bytes(random_generator.randint(0, 255) for _ in range(20))


def random_private_key():
    return keys.PrivateKey(random_hash())


def random_step():
    return random_generator.randint(0, 2**32)


def make_block(*,
               block_hash=None,
               parent_hash=None,
               proposer_privkey=None,
               step=None):
    if proposer_privkey is None:
    def Train(self):
        fixedRandomSeed = 0
        trainToValidationChance = 0.2
        includeEvaluationInValidation = True
        stepMultiplier = 1.0
        stepCount = 1000
        showInputs = False
        augmentationLevel = 0
        detNMSThresh = 0.35
        rpnNMSThresh = 0.55
        trainDir = os.path.join(os.curdir, self.__mParams["train_dir"])
        evalDir = os.path.join(os.curdir, self.__mParams["eval_dir"])
        inModelPath = os.path.join(os.curdir, self.__mParams["input_model"])
        os.makedirs(name=self.__mParams["output_dir"], exist_ok=True)
        outModelPath = os.path.join(self.__mParams["output_dir"],
                                    self.__mParams["model_name"] + ".h5")

        blankInput = True

        if "eval_dir" in self.__mParams:
            evalDir = os.path.join(os.curdir, self.__mParams["eval_dir"])

        if "image_size" in self.__mParams:
            maxdim = self.__mParams["image_size"]

        if "train_to_val_ratio" in self.__mParams:
            trainToValidationChance = self.__mParams["train_to_val_ratio"]

        if "step_num" in self.__mParams:
            stepCount = self.__mParams["step_num"]

        if "show_inputs" in self.__mParams:
            showInputs = self.__mParams["show_inputs"]

        if "random_augmentation_level" in self.__mParams:
            augmentationLevel = self.__mParams["random_augmentation_level"]

        if "detection_nms_threshold" in self.__mParams:
            detNMSThresh = self.__mParams["detection_nms_threshold"]

        if "rpn_nms_threshold" in self.__mParams:
            rpnNMSThresh = self.__mParams["rpn_nms_threshold"]

        rnd = random.Random()
        rnd.seed(fixedRandomSeed)
        trainImagesAndMasks = {}
        validationImagesAndMasks = {}

        # iterate through train set
        imagesDir = os.path.join(trainDir, "images")
        masksDir = os.path.join(trainDir, "masks")

        # adding evaluation data into validation
        if includeEvaluationInValidation and evalDir is not None:

            # iterate through test set
            imagesValDir = os.path.join(evalDir, "images")
            masksValDir = os.path.join(evalDir, "masks")

            imageValFileList = [
                f for f in os.listdir(imagesValDir)
                if os.path.isfile(os.path.join(imagesValDir, f))
            ]
            for imageFile in imageValFileList:
                baseName = os.path.splitext(os.path.basename(imageFile))[0]
                imagePath = os.path.join(imagesValDir, imageFile)
                if os.path.exists(os.path.join(masksValDir,
                                               baseName + ".png")):
                    maskPath = os.path.join(masksValDir, baseName + ".png")
                elif os.path.exists(
                        os.path.join(masksValDir, baseName + ".tif")):
                    maskPath = os.path.join(masksValDir, baseName + ".tif")
                elif os.path.exists(
                        os.path.join(masksValDir, baseName + ".tiff")):
                    maskPath = os.path.join(masksValDir, baseName + ".tiff")
                else:
                    sys.exit(
                        "The image " + imageFile +
                        " does not have a corresponding mask file ending with png, tif or tiff"
                    )
                if not os.path.isfile(imagePath) or not os.path.isfile(
                        maskPath):
                    continue
                validationImagesAndMasks[imagePath] = maskPath
            imageFileList = [
                f for f in os.listdir(imagesDir)
                if os.path.isfile(os.path.join(imagesDir, f))
            ]
            for imageFile in imageFileList:
                baseName = os.path.splitext(os.path.basename(imageFile))[0]
                imagePath = os.path.join(imagesDir, imageFile)
                if os.path.exists(os.path.join(masksDir, baseName + ".png")):
                    maskPath = os.path.join(masksDir, baseName + ".png")
                elif os.path.exists(os.path.join(masksDir, baseName + ".tif")):
                    maskPath = os.path.join(masksDir, baseName + ".tif")
                elif os.path.exists(os.path.join(masksDir,
                                                 baseName + ".tiff")):
                    maskPath = os.path.join(masksDir, baseName + ".tiff")
                else:
                    sys.exit(
                        "The image " + imageFile +
                        " does not have a corresponding mask file ending with png, tif or tiff"
                    )
                if not os.path.isfile(imagePath) or not os.path.isfile(
                        maskPath):
                    continue
                trainImagesAndMasks[imagePath] = maskPath
        # splitting train data into train and validation
        else:
            imageFileList = [
                f for f in os.listdir(imagesDir)
                if os.path.isfile(os.path.join(imagesDir, f))
            ]
            for imageFile in imageFileList:
                baseName = os.path.splitext(os.path.basename(imageFile))[0]
                imagePath = os.path.join(imagesDir, imageFile)
                if os.path.exists(os.path.join(masksDir, baseName + ".png")):
                    maskPath = os.path.join(masksDir, baseName + ".png")
                elif os.path.exists(os.path.join(masksDir, baseName + ".tif")):
                    maskPath = os.path.join(masksDir, baseName + ".tif")
                elif os.path.exists(os.path.join(masksDir,
                                                 baseName + ".tiff")):
                    maskPath = os.path.join(masksDir, baseName + ".tiff")
                else:
                    sys.exit(
                        "The image " + imageFile +
                        " does not have a corresponding mask file ending with png, tif or tiff"
                    )
                if not os.path.isfile(imagePath) or not os.path.isfile(
                        maskPath):
                    continue
                if rnd.random() > trainToValidationChance:
                    trainImagesAndMasks[imagePath] = maskPath
                else:
                    validationImagesAndMasks[imagePath] = maskPath

        if len(trainImagesAndMasks) < 1:
            sys.exit("Empty train image list")

        #just to be non-empty
        if len(validationImagesAndMasks) < 1:
            for key, value in trainImagesAndMasks.items():
                validationImagesAndMasks[key] = value
                break

        # Training dataset
        dataset_train = mask_rcnn_additional.NucleiDataset()
        dataset_train.initialize(pImagesAndMasks=trainImagesAndMasks,
                                 pAugmentationLevel=augmentationLevel)
        dataset_train.prepare()

        # Validation dataset
        dataset_val = mask_rcnn_additional.NucleiDataset()
        dataset_val.initialize(pImagesAndMasks=validationImagesAndMasks,
                               pAugmentationLevel=0)
        dataset_val.prepare()

        print("training images (with augmentation):", dataset_train.num_images)
        print("validation images (with augmentation):", dataset_val.num_images)

        config = mask_rcnn_additional.NucleiConfig()
        config.IMAGE_MAX_DIM = maxdim
        config.IMAGE_MIN_DIM = maxdim
        config.STEPS_PER_EPOCH = stepCount
        if stepMultiplier is not None:
            steps = int(float(dataset_train.num_images) * stepMultiplier)
            config.STEPS_PER_EPOCH = steps

        config.VALIDATION_STEPS = dataset_val.num_images
        config.DETECTION_NMS_THRESHOLD = detNMSThresh
        config.RPN_NMS_THRESHOLD = rpnNMSThresh
        config.MAX_GT_INSTANCES = 512
        config.BATCH_SIZE = 5000
        config.__init__()

        # Create model in training mode
        mdl = mrcnn_model.MaskRCNN(mode="training",
                                   config=config,
                                   model_dir=os.path.dirname(outModelPath))

        if blankInput:
            mdl.load_weights(inModelPath,
                             by_name=True,
                             exclude=[
                                 "mrcnn_class_logits", "mrcnn_bbox_fc",
                                 "mrcnn_bbox", "mrcnn_mask"
                             ])
        else:
            mdl.load_weights(inModelPath, by_name=True)

        allcount = 0
        logdir = "logs/scalars/" + self.__mParams["model_name"]
        for epochgroup in self.__mParams["epoch_groups"]:
            epochs = int(epochgroup["epochs"])
            if epochs < 1:
                continue
            allcount += epochs
            mdl.train(dataset_train,
                      dataset_val,
                      learning_rate=float(epochgroup["learning_rate"]),
                      epochs=allcount,
                      layers=epochgroup["layers"])

        mdl.keras_model.save_weights(outModelPath)
Ejemplo n.º 28
0
def configuration_model_label_z_score_mixing_matrix(Data,
                                                    runs=1000,
                                                    label="type",
                                                    shuffle_label=False,
                                                    force_simple_graph=False,
                                                    seed_config_mat=None,
                                                    seed_label=None):
    r"""Creates a contact matrix based on the configuration model

        Creates a contact matrix with z-scores based on the chosen attribute. You can assume randomized attributes and/
        or randomized degrees in the null model.

        Parameters
        ----------
        Data: Data
            Data Object that contains Tij- and Metadata for a data set.
        runs: int
            The amount of times the function should be executed. It's a heuristic approach, so the more the runs the
            better might be the result
        label: str
            A string that tells the function for which attribute the contact matrix should be made.
        shuffle_label: bool
            Gives the option to extend the null model by randomizing the node attributes.
        force_simple_graph: bool
            Deletes parallel- and selfedges that can occur by using the networkx "configuration_model"-function if True.
        seed_config_mat : List, default None
            Allows to create reproducible configuration models for a reproducible output. This parameters is basically
            just for applying tests.
        seed_label : list, default None
            Allows to create reproducible "randomized" labels for a reproducible output. This parameters is basically
            just for applying tests.

        Returns
        -------
        contact_matrix : list
             A matrixlike List of lists that contains the z-scores for a given attribute

        References
        ----------
        .. [1] Génois, Mathieu & Zens, Maria & Lechner, Clemens & Rammstedt, Beatrice & Strohmaier, Markus. (2019).
               Building connections: How scientists meet each other during a conference.

        Examples
        ---------
        >>> contact_matrix = configuration_model_label_z_score_mixing_matrix(test_network, test_df, runs=1000,
        >>>                                                                  label="Age", shuffle_label=True,
        >>>                                                                  force_simple_graph=True)
        >>> print(contact_matrix)
        [[1.0320936930842797, -0.7717976357301974, -0.5],
        [-0.7717976357301974, -0.0667601413575786, -1.2927763604862383],
        [-0.5, -1.2927763604862383, 2.632147318581194]]

        See Also
        ---------
        face2face.statistics.null_modell.shuffle_label_z_score_mixing_matrix
        """

    network = create_network_from_data(Data, replace_attr=True, label=label)
    mapping = mapping_function(Data, label)
    data_mixing_matrix = nx.assortativity.attribute_mixing_matrix(
        network, label, mapping=mapping)
    degree_sequence = [v[1] for v in network.degree]
    type_sequence = [network.nodes[n][label] for n in network.nodes]
    matrices = []
    matrices_abs = []
    for _ in range(runs):
        if seed_config_mat is None:
            null_model = nx.configuration_model(degree_sequence)
        else:
            null_model = nx.configuration_model(degree_sequence,
                                                seed=seed_config_mat[_])
        if force_simple_graph:
            null_model = to_simple_graph(null_model)
        if shuffle_label:
            if seed_label is None:
                np.random.shuffle(type_sequence)
            else:
                random.Random(seed_label[_]).shuffle(type_sequence)
        for n, t in zip(null_model.nodes, type_sequence):
            null_model.nodes[n][label] = t
        matrices.append(
            nx.assortativity.attribute_mixing_matrix(null_model,
                                                     label,
                                                     mapping=mapping))
        matrices_abs.append(
            nx.assortativity.attribute_mixing_matrix(null_model,
                                                     label,
                                                     mapping=mapping,
                                                     normalized=False))

    return (data_mixing_matrix - np.array(matrices).mean(axis=0))/np.array(matrices).std(axis=0), matrices, \
        data_mixing_matrix, matrices_abs
Ejemplo n.º 29
0
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Random utils"""

import random
import numpy as np
rng = random.Random()
nprng = np.random.RandomState()


def set_seeds(seed):
    """Sets the seeds of both random number generators used by Foolbox.

    Parameters
    ----------
    seed : int
        The seed for both random number generators.

    """
    rng.seed(seed)
    nprng.seed(seed)
Ejemplo n.º 30
0
#ATS:test(SELF, label="PolyClipper 2D (polygon) tests")

import unittest
from math import *
import time
from PolyhedronFileUtilities import *

from Spheral2d import *
from SpheralTestUtilities import fuzzyEqual

# Create a global random number generator.
import random

rangen = random.Random()

#-------------------------------------------------------------------------------
# Make a square
#   3    2
#   |----|
#   |    |
#   |----|
#   0    1
square_points = vector_of_Vector()
for coords in [(0, 0), (10, 0), (10, 10), (0, 10)]:
    square_points.append(Vector(*coords))

#-------------------------------------------------------------------------------
# Make a non-convex notched thingy.
#    6           5      3          2
#    |------------\    /-----------|
#    |             \  /            |