コード例 #1
0
def _get_normalized_density(data, tag_groups, discretizing_divisor, begin_time,
                            end_time):
    def data_producer():
        l = len(tag_groups)
        for i, tag_group in enumerate(tag_groups):
            if type(tag_group) is str:
                tag_group = {tag_group}
            print("{}/{} ({})".format(i + 1, l, str(tag_group)))
            try:
                T, R = extract_rating_by_time(
                    data,
                    lambda v: any([tag in v["tags"] for tag in tag_group]))
                yield T, R, tag_group
            except RuntimeError as re:
                warnings.warn("No such tags: {}".format(str(tag_group)),
                              RuntimeWarning)

    def frequency_sorter_function(X):
        return np.max(X) - np.min(X)

    tag_time_rating = []
    for T, R, tag_group in data_producer():
        T = discretize(T,
                       discretizing_divisor=discretizing_divisor,
                       begin_x=begin_time,
                       end_x=end_time,
                       normalize=True)
        swing = frequency_sorter_function(T)
        tag_time_rating.append([swing, tag_group, T])

    return sorted(tag_time_rating, key=lambda x: -x[0])
コード例 #2
0
ファイル: game.py プロジェクト: LeendersR/MClone
    def hit_test(self, position, direction, max_distance=8):
        """Tests whether a block is hit.

        We draw a line from the position towards with a given direction for
        max_distance blocks-length. If at any time we hit a block we return
        that block and the previous block. The previous block is the block
        we pass through before we hit the block.

        Args:
            position: The position from which we draw a line.
            direction: The direction we draw the line in.
            max_distance: The maximum length in number of blocks

        Returns:
            A tuple (prev, curr) with the previous and current block if a block
            has been hit, (None, None) otherwise.
        """
        x, y, z = position
        x_dir, y_dir, z_dir = direction
        num_steps = 10
        x_step = x_dir/num_steps
        y_step = y_dir/num_steps
        z_step = z_dir/num_steps
        prev_pos = None
        for step in xrange(num_steps*max_distance):
            block_pos = discretize((x, y, z))
            if prev_pos != block_pos and self.world.occupied(block_pos):
                return prev_pos, block_pos
            prev_pos = block_pos
            x, y, z = x+x_step, y+y_step, z+z_step
        return None, None
コード例 #3
0
ファイル: game.py プロジェクト: LeendersR/MClone
 def position_intersects_object(self, position, obj):
     """Checks whether a position intersects with an object.
     """
     x, y, z = discretize(obj.position)
     for dy in xrange(obj.height):
         if position == (x, y-dy, z):
             return True
     return False
コード例 #4
0
ファイル: markov_q_rl.py プロジェクト: Proch92/SIR-exam
    def action(self, state, train=True):
        if train and (random.random() < self.exploration_p):
            return self.action_space.sample()

        self.exploration_p -= EXPLORATION_DEC
        if self.exploration_p < MIN_EXPLORATION:
            self.exploration_p = MIN_EXPLORATION

        discrete = utils.discretize(state, self.observation_space, QUANTA)
        return np.argmax(self.q[tuple(discrete)])
コード例 #5
0
def get_wheel_bbox(points, shifted_threshold, dim = 500):
    heightmap = np.zeros((dim,dim)).astype('uint8')
    x_co, y_co, z_co = utils.discretize(points)
    x_co = x_co[z_co > shifted_threshold]
    y_co = y_co[z_co > shifted_threshold]
    heightmap[x_co, y_co] = 255
    kernel = np.ones((5,5),np.uint8)
    heightmap = cv2.dilate(heightmap,kernel,iterations = 2)
    _, cnt, _ = cv2.findContours(heightmap, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    c = max(cnt, key = cv2.contourArea)
    rect = cv2.minAreaRect(c)
    box = cv2.boxPoints(rect)
    bbox = np.int0(box)
    return bbox, heightmap
コード例 #6
0
def generate_heightmap(points, threshold, dim=500, mask=False):
    heightmap = np.zeros((dim, dim, 3)).astype('float')
    x_co, y_co, z_co = utils.discretize(points)
    x_co -= np.amin(x_co)
    y_co -= np.amin(y_co)
    min_val = np.amin(z_co)
    z_co -= min_val
    threshold -= min_val
    utils.publish_threshold_frame(threshold)
    heightmap[x_co, y_co, 0] = z_co * 20
    heightmap[x_co, y_co, 1] = z_co * 20
    heightmap[x_co, y_co, 2] = z_co * 20
    if mask:
        masked_x_co = x_co[z_co < threshold]
        masked_y_co = y_co[z_co < threshold]
        heightmap[masked_x_co, masked_y_co, 1] = 1
    return heightmap, threshold
コード例 #7
0
def generate_heightmap(points, dim=500):
    '''
    Create a 2D representation of the point cloud
    Input: Nx3 array of points in cloud
           Value of threshold (debugging purposes)
           Dimension of 2D image
           Mask Variable (debugging purposes)
    Output: 2D image of size (dim x dim) representing the point cloud data
    '''
    heightmap = np.zeros((dim, dim)).astype('float')
    x_co, y_co, z_co = utils.discretize(points)
    x_co -= np.amin(x_co)
    y_co -= np.amin(y_co)
    min_val = np.amin(z_co)
    z_co -= min_val
    heightmap[x_co, y_co] = z_co * 10
    return heightmap
コード例 #8
0
ファイル: train_search.py プロジェクト: nightstorm0909/EvNAS
def train(model, train_queue, criterion, optimizer, gen):
    model.train()
    for step, (inputs, targets) in enumerate(train_queue):
        #model.copy_arch_parameters(population.get_population()[step % args.pop_size].arch_parameters)
        #assert utils.check_equality(model, population.get_population()[step % args.pop_size].arch_parameters)
        discrete_alphas = utils.discretize(
            population.get_population()[step % args.pop_size].arch_parameters,
            device)
        model.copy_arch_parameters(discrete_alphas)
        assert utils.check_equality(model, discrete_alphas)
        n = inputs.size(0)
        inputs = inputs.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()
        logits = model(inputs)
        loss = criterion(logits, targets)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
        optimizer.step()

        prec1, prec5 = utils.accuracy(logits, targets, topk=(1, 5))
        population.get_population()[step % args.pop_size].objs.update(
            loss.data, n)
        population.get_population()[step % args.pop_size].top1.update(
            prec1.data, n)
        population.get_population()[step % args.pop_size].top5.update(
            prec5.data, n)

        #population.get_population()[step % args.pop_size].accumulate()

        #print(step)
        if (step + 1) % 100 == 0:
            #	break
            logging.info("[{} Generation]".format(gen))
            logging.info(
                "Using Training batch #{} for {}/{} architecture with loss: {}, prec1: {}, prec5: {}"
                .format(
                    step, step % args.pop_size,
                    len(population.get_population()),
                    population.get_population()[step % args.pop_size].objs.avg,
                    population.get_population()[step % args.pop_size].top1.avg,
                    population.get_population()[step %
                                                args.pop_size].top5.avg))
コード例 #9
0
ファイル: train_search.py プロジェクト: nightstorm0909/EvNAS
def validation(model, valid_queue, criterion, gen):
    #model.eval()
    for i in range(len(population.get_population())):
        valid_start = time.time()
        #model.copy_arch_parameters(population.get_population()[i].arch_parameters)
        #assert utils.check_equality(model, population.get_population()[i].arch_parameters)
        discrete_alphas = utils.discretize(
            population.get_population()[i].arch_parameters, device)
        model.copy_arch_parameters(discrete_alphas)
        assert utils.check_equality(model, discrete_alphas)
        population.get_population()[i].objs.reset()
        population.get_population()[i].top1.reset()
        population.get_population()[i].top5.reset()
        with torch.no_grad():
            for step, (inputs, targets) in enumerate(valid_queue):
                n = inputs.size(0)
                inputs = inputs.to(device)
                targets = targets.to(device)
                logits = model(inputs)
                loss = criterion(logits, targets)

                prec1, prec5 = utils.accuracy(logits, targets, topk=(1, 5))
                population.get_population()[i].objs.update(loss.data, n)
                population.get_population()[i].top1.update(prec1.data, n)
                population.get_population()[i].top5.update(prec5.data, n)

                #print(step)
                #if (step + 1) % 10 == 0:
                #	break
        #print("Finished in {} seconds".format((time.time() - valid_start) ))

        logging.info(
            "[{} Generation] {}/{} finished with validation loss: {}, prec1: {}, prec5: {}"
            .format(gen, i + 1, len(population.get_population()),
                    population.get_population()[i].objs.avg,
                    population.get_population()[i].top1.avg,
                    population.get_population()[i].top5.avg))
コード例 #10
0
def analyze_density_by_time(data):

    print("Analyze density by time")
    clean_folders([RESULT_FOLDER])

    T, R = extract_rating_by_time(data, lambda x: True)

    print("Analyze density by time: hourly for weekdays")
    rolled_timestamps = _roll_timestamps(T, _TimestampFormat.HOURLY_WEEKDAY)
    N_rolled, R_rolled = discretize(X=rolled_timestamps,
                                    Y=R,
                                    bins=2 * 24,
                                    normalize=False)
    corresponding_time_ticks = np.arange(0, 24 * 60 * 60, 30 * 60)
    path_to_save = os.path.join(RESULT_FOLDER, "hourly_weekday")
    draw_rating_hourly(corresponding_time_ticks,
                       R_rolled,
                       N_rolled,
                       path_to_save=path_to_save)
    path_to_save = os.path.join(RESULT_FOLDER, "lores_hourly_weekday")
    draw_rating_hourly(corresponding_time_ticks,
                       R_rolled,
                       N_rolled,
                       path_to_save=path_to_save,
                       figsize=(12, 6))

    print("Analyze density by time: daily for weeks")
    rolled_timestamps = _roll_timestamps(T, _TimestampFormat.DAILY)
    N_rolled, R_rolled = discretize(X=rolled_timestamps,
                                    Y=R,
                                    bins=7 * 24,
                                    normalize=False)
    corresponding_time_ticks = np.arange(0, 7 * 24 * 60 * 60, 60 * 60)
    path_to_save = os.path.join(RESULT_FOLDER, "daily")
    draw_rating_daily(corresponding_time_ticks,
                      R_rolled,
                      N_rolled,
                      path_to_save=path_to_save)
    path_to_save = os.path.join(RESULT_FOLDER, "lores_daily")
    draw_rating_daily(corresponding_time_ticks,
                      R_rolled,
                      N_rolled,
                      path_to_save=path_to_save,
                      figsize=(12, 6))

    print("Analyze density by time: monthly")
    rolled_timestamps = _roll_timestamps(T, _TimestampFormat.MONTHLY)
    N_rolled, R_rolled = discretize(X=rolled_timestamps,
                                    Y=R,
                                    bins=30 * 12,
                                    normalize=False)
    corresponding_time_ticks = np.arange(0, 30 * 24 * 60 * 60, 2 * 60 * 60)
    path_to_save = os.path.join(RESULT_FOLDER, "monthly")
    draw_rating_monthly(corresponding_time_ticks,
                        R_rolled,
                        N_rolled,
                        path_to_save=path_to_save)
    path_to_save = os.path.join(RESULT_FOLDER, "lores_monthly")
    draw_rating_monthly(corresponding_time_ticks,
                        R_rolled,
                        N_rolled,
                        path_to_save=path_to_save,
                        figsize=(12, 6))
コード例 #11
0
def main():
    parser = argparse.ArgumentParser(description="")
    parser.add_argument("-b", "--biom-file", help="An input biom file", required=True)
    parser.add_argument("-m", "--mapping-file", help="A mapping file", required=True)
    parser.add_argument("-c", "--class-label", help="Which data are we trying to analyze", required=True)
    parser.add_argument(
        "-d",
        "--subclass",
        action="append",
        help="Subselect only some of the data - if specified, this should appear at least twice with the adequate options. ex: -c SEX -d male -d female",
        required=False,
    )
    parser.add_argument("-o", "--output-folder", help="The folder to output our data to", required=True)
    parser.add_argument("-p", "--min-features", help="Minimum number of features to test", default=50, required=False)
    parser.add_argument("-q", "--max-features", help="Maximum number of features to test", default=150, required=False)
    parser.add_argument(
        "-s",
        "--step-size",
        help="Step size within the range of the number of features to be tested",
        default=1,
        required=False,
    )
    # parser.add_argument("-p", "--predictor", help="Classifier/Predictor used", default="nbc", required=False) # As of today, contains only nbc
    parser.add_argument(
        "-j",
        "--objective-function",
        help="Objective function for the feature selection algorithm",
        default="mim",
        required=False,
    )
    parser.add_argument(
        "-t",
        "--output-type",
        help="data output format. default: CSV options: csv, matlab, r, numpy",
        default="csv",
        required=False,
    )
    parser.add_argument(
        "-f",
        "--select-field",
        help="Field to extract a subset of the data. e.g. EN_BIOME, COUNTRY. The default considers the whole dataset",
        default=None,
        required=False,
    )
    parser.add_argument(
        "-g",
        "--value-field",
        action="append",
        help="When used with -f specifies the value of the field to filter - THIS IS REQUIRED IF -f if present",
        default=None,
        required=False,
    )
    parser.add_argument(
        "-k",
        "--cluster",
        action="append",
        help="Allows to subgroup some of the labels. Ex: -k 'Vegan Vegan+Seafood'. The different values are separated with semi colon. Requires at least two appearances. This cannot be used in conjunction with the -d option",
        default=None,
        required=False,
    )
    ## Need to be continued!!!!
    print "Definition of the arguments done"
    global output_type
    print "Start of the program"

    args = parser.parse_args()

    output_type = args.output_type.lower()

    # if our folder doesn't exist create it
    if not os.path.isdir(args.output_folder):
        os.mkdir(args.output_folder)

    nb_features = range(int(args.min_features), int(args.max_features) + 1, int(args.step_size))
    print "nb_features prepared"

    matrix, site_names, otu_ids, otu_phylo = utils.load_biom(args.biom_file)
    metadata = utils.load_map(args.mapping_file)
    class_labels = []
    for sample in site_names:
        class_labels.append(metadata[sample][args.class_label])

    print "class_labels loaded"

    interesting_samples = range(0, len(site_names))

    if args.select_field is not None:
        interesting_fields = [it.lower() for it in args.value_field]
        print interesting_fields
        subsample_habitat = [
            i
            for i, sample in enumerate(site_names)
            if metadata[sample][args.select_field].lower() in interesting_fields
        ]
        interesting_samples = list(set(interesting_samples).intersection(set(subsample_habitat)))

    if args.subclass is not None:
        target_labels = [it.lower() for it in args.subclass]
        subsamples = [i for i in xrange(0, len(class_labels)) if class_labels[i].lower() in target_labels]
        interesting_samples = list(set(interesting_samples).intersection(set(subsamples)))

    if (args.cluster is not None) and (args.subclass is None):
        print "In da cluster separation"
        clusters = [it for it in args.cluster]
        clusters_dict = {}
        print "Initial Dictionary created"
        for idx, a_cluster in enumerate(clusters):
            print "In da loop"
            # keys = a_cluster.split()
            keys = a_cluster.split(";")
            print keys
            for a_key in keys:
                clusters_dict[a_key.lower()] = idx

        subsamples = [i for i in xrange(0, len(class_labels)) if class_labels[i].lower() in clusters_dict]
        interesting_samples = list(set(interesting_samples).intersection(set(subsamples)))
        for i in subsamples:
            class_labels[i] = "cluster" + str(clusters_dict[class_labels[i].lower()])

    matrix = matrix[interesting_samples, :]
    class_labels = [class_labels[i] for i in interesting_samples]

    class_labels, labels_key = utils.discretize(class_labels)

    matrix = matrix + 1
    row_sums = matrix.sum(axis=1)

    matrix = matrix / row_sums[:, np.newaxis]
    matrix = np.ceil(matrix / matrix.min())

    # So far, we have the biom file open and the environment parameters
    # We can now launch our feature selection algorithm
    further_param = []  # This has to be adapted to the case we are using other objective functions

    nb_tests = 10
    nb_folds = 5

    launch_tests_feature_selection(
        matrix,
        np.array(map(int, class_labels)),
        site_names,
        otu_ids,
        otu_phylo,
        args.objective_function,
        nb_features,
        nb_tests,
        args.output_folder,
        nb_folds,
    )
    avg_consistency, max_consistency, min_consistency, std_consistency = get_consistencies(
        nb_features, len(otu_ids), nb_tests, args.output_folder
    )
    save_results(
        "consistency",
        os.path.join(args.output_folder, "consistencyresults.txt"),
        avg_consistency,
        max_consistency,
        min_consistency,
        std_consistency,
    )

    avg_accuracy_g, max_accuracy_g, min_accuracy_g, std_accuracy_g, avg_accuracy, max_accuracy, min_accuracy, std_accuracy = get_accuracies(
        nb_features, len(otu_ids), nb_tests, args.output_folder
    )
    save_results(
        "Accuracy",
        os.path.join(args.output_folder, "accuracyGaussianresults.txt"),
        avg_accuracy_g,
        max_accuracy_g,
        min_accuracy_g,
        std_accuracy_g,
    )
コード例 #12
0
ファイル: markov_q_rl.py プロジェクト: Proch92/SIR-exam
    def reward(self, state, action, reward, new_state):
        state = utils.discretize(state, self.observation_space, QUANTA)
        new_state = utils.discretize(new_state, self.observation_space, QUANTA)

        self.q[tuple(
            state)][action] = reward + GAMMA * max(self.q[tuple(new_state)])
コード例 #13
0
def analyze_rating_density(data):

    print("Analyze rating overall density")
    clean_folders([RESULT_FOLDER])

    ratings = sorted([v["rating"] for v in data.values()], reverse=True)
    names = []
    hlines = []
    idxes = list(range(8)) + [11, 15, 20, 30, 40, 60, 80]
    for i in idxes:
        r = ratings[i]
        try:
            record = next(record for record in data.values()
                          if record["rating"] == r)
            hlines.append([
                0.61 + 0.285 * (i % 2), r, record["title"] + " (р:" + str(r) +
                ", " + timestamp_to_date(record["timestamp"]) + ")"
            ])
            names.append(record["title"])
        except StopIteration as si:
            warnings.warn(
                "Could not find record with such rating: {}".format(r),
                RuntimeWarning)

    gini = _compute_gini_coefficient(ratings)
    p999 = np.percentile(ratings, 99.9)
    p99 = np.percentile(ratings, 99)
    p95 = np.percentile(ratings, 95)
    mean = np.mean(ratings)
    median = np.median(ratings)

    hlines.append([1.0, p999, "99.9 перцентиль ({0:.2f})".format(p999)])
    hlines.append([1.0, p99, "99 перцентиль ({0:.2f})".format(p99)])
    hlines.append([1.0, p95, "95 перцентиль ({0:.2f})".format(p95)])
    hlines.append([
        0.61, 0.0,
        "Индекс Джини: {0:.4f}, среднее: {1:.2f}, медиана: {2:.2f}".format(
            gini, mean, median)
    ])
    scatter_top_posts = list(zip([1.0] * 80, ratings[:80]))
    name = os.path.join(RESULT_FOLDER, "rating_violinplot.png")
    draw_rating_violinplot(ratings,
                           hlines=hlines,
                           scatter=scatter_top_posts,
                           path_to_save=name)
    name = os.path.join(RESULT_FOLDER, "lores_rating_violinplot.png")
    hlines = [hlines[0]] + [hlines[2]] + [hlines[4]] + [hlines[6]
                                                        ] + hlines[-4:]
    draw_rating_violinplot(ratings,
                           hlines=hlines,
                           scatter=scatter_top_posts[:25],
                           path_to_save=name,
                           figsize=(8, 10))

    n_bins_for_logplot = 100
    N = discretize([r for r in ratings if 100 < r <= 10000],
                   bins=n_bins_for_logplot,
                   normalize=False)
    name = os.path.join(RESULT_FOLDER, "logplot.png")
    draw_post_number_logplot([n_bins_for_logplot * i for i in range(len(N))],
                             [N], [u"Количество постов"],
                             path_to_save=name)
    name = os.path.join(RESULT_FOLDER, "loores_logplot.png")
    draw_post_number_logplot([n_bins_for_logplot * i for i in range(len(N))],
                             [N], [u"Количество постов"],
                             path_to_save=name,
                             figsize=(14, 8))
コード例 #14
0
def preprocess_state(state, state_grid):
    """Map a continuous state to its discretized representation."""
    return discretize(state, state_grid)
コード例 #15
0
def eval(context, question):
    with open(os.path.join(config.data_dir, "train", "word2idx.pkl"), "rb") as wi, \
         open(os.path.join(config.data_dir, "train", "char2idx.pkl"), "rb") as ci, \
         open(os.path.join(config.data_dir, "train", "word_embeddings.pkl"), "rb") as wb, \
         open(os.path.join(config.data_dir, "train", "char_embeddings.pkl"), "rb") as cb:
        word2idx = pickle.load(wi)
        char2idx = pickle.load(ci)
        word_embedding_matrix = pickle.load(wb)
        char_embedding_matrix = pickle.load(cb)

    # transform them into Tensors
    word_embedding_matrix = torch.from_numpy(
        np.array(word_embedding_matrix)).type(torch.float32)
    char_embedding_matrix = torch.from_numpy(
        np.array(char_embedding_matrix)).type(torch.float32)
    idx2word = dict([(y, x) for x, y in word2idx.items()])

    context = clean_text(context)
    context = [w for w in word_tokenize(context) if w]

    question = clean_text(question)
    question = [w for w in word_tokenize(question) if w]

    if len(context) > config.max_len_context:
        print("The context is too long. Maximum accepted length is",
              config.max_len_context, "words.")
    if max([len(w) for w in context]) > config.max_len_word:
        print("Some words in the context are longer than", config.max_len_word,
              "characters.")
    if len(question) > config.max_len_question:
        print("The question is too long. Maximum accepted length is",
              config.max_len_question, "words.")
    if max([len(w) for w in question]) > config.max_len_word:
        print("Some words in the question are longer than",
              config.max_len_word, "characters.")
    if len(question) < 3:
        print(
            "The question is too short. It needs to be at least a three words question."
        )

    context_idx = np.zeros([config.max_len_context], dtype=np.int32)
    question_idx = np.zeros([config.max_len_question], dtype=np.int32)
    context_char_idx = np.zeros([config.max_len_context, config.max_len_word],
                                dtype=np.int32)
    question_char_idx = np.zeros(
        [config.max_len_question, config.max_len_word], dtype=np.int32)

    # replace 0 values with word and char IDs
    for j, word in enumerate(context):
        if word in word2idx:
            context_idx[j] = word2idx[word]
        else:
            context_idx[j] = 1
        for k, char in enumerate(word):
            if char in char2idx:
                context_char_idx[j, k] = char2idx[char]
            else:
                context_char_idx[j, k] = 1

    for j, word in enumerate(question):
        if word in word2idx:
            question_idx[j] = word2idx[word]
        else:
            question_idx[j] = 1
        for k, char in enumerate(word):
            if char in char2idx:
                question_char_idx[j, k] = char2idx[char]
            else:
                question_char_idx[j, k] = 1

    model = BiDAF(word_vectors=word_embedding_matrix,
                  char_vectors=char_embedding_matrix,
                  hidden_size=config.hidden_size,
                  drop_prob=config.drop_prob)
    try:
        if config.cuda:
            model.load_state_dict(
                torch.load(os.path.join(config.squad_models,
                                        "model_final.pkl"))["state_dict"])
        else:
            model.load_state_dict(
                torch.load(
                    os.path.join(config.squad_models, "model_final.pkl"),
                    map_location=lambda storage, loc: storage)["state_dict"])
        print("Model weights successfully loaded.")
    except:
        pass
        print(
            "Model weights not found, initialized model with random weights.")
    model.to(device)
    model.eval()
    with torch.no_grad():
        context_idx, context_char_idx, question_idx, question_char_idx = torch.tensor(context_idx, dtype=torch.int64).unsqueeze(0).to(device),\
                                                                         torch.tensor(context_char_idx, dtype=torch.int64).unsqueeze(0).to(device),\
                                                                         torch.tensor(question_idx, dtype=torch.int64).unsqueeze(0).to(device),\
                                                                         torch.tensor(question_char_idx, dtype=torch.int64).unsqueeze(0).to(device)

        pred1, pred2 = model(context_idx, context_char_idx, question_idx,
                             question_char_idx)
        starts, ends = discretize(pred1.exp(), pred2.exp(), 15, False)
        prediction = " ".join(context[starts.item():ends.item() + 1])

    return prediction
コード例 #16
0
def sarsa(env, num_episodes, state_grid, alpha, gamma=1.0):
    np.random.seed(928)

    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
    nA = brain.vector_action_space_size
    Q = defaultdict(lambda: np.zeros(nA))

    epsilon = 0.7
    min_epsilon = 0.05
    decay_epsilon = 0.999

    num_episodes_concluded = 0

    scores = []
    max_avg_score = -np.inf

    for i_episode in range(1, num_episodes + 1):
        # monitor progress

        if i_episode % 100 == 0:
            print("\rEpisode {}/{} - Epsilon: {} Max avg score: {}".format(
                i_episode, num_episodes, epsilon, max_avg_score),
                  end="")
            sys.stdout.flush()

        env_info = env.reset(train_mode=True)[brain_name]
        state = env_info.vector_observations[0]
        state = discretize(state, state_grid)

        epsilon = max(min_epsilon, decay_epsilon * epsilon)
        action = np.random.choice(np.arange(nA),
                                  p=epsilon_greedy_probs(
                                      Q[state], epsilon, nA))

        total_reward = 0

        while True:

            env_info = env.step(action)[brain_name]

            next_state = env_info.vector_observations[0]
            next_state = discretize(next_state, state_grid)
            reward = env_info.rewards[0]
            done = env_info.local_done[0]
            total_reward += reward

            if done:
                num_episodes_concluded += 1
                break

            next_action = np.random.choice(np.arange(nA),
                                           p=get_probs(Q[next_state], epsilon,
                                                       nA))

            Q[state + (action, )] = Q[state + (action, )] + alpha * (
                reward + gamma * Q[next_state +
                                   (next_action, )] - Q[state + (action, )])
            state = next_state
            action = next_action

        scores.append(total_reward)
        if len(scores) > 100:
            avg_score = np.mean(scores[-100:])
            if avg_score > max_avg_score:
                max_avg_score = avg_score

        if max_avg_score >= 13:
            print("The expect average score was bet. avg score: {}".format(
                max_avg_score))
            break

    print("\n\n{}/{} were completly finished".format(num_episodes_concluded,
                                                     num_episodes))

    return Q, scores