Esempio n. 1
0
def train(model,iterator,optimizer,criterion,train_loader,visual_path,trg2idx,savepath):
    loss_for_save=100
    for epoch in range(iterator):
        for k, (src_batch, trg_batch) in enumerate(train_loader):
            src_tensor = torch.LongTensor(src_batch).to(device)
            trg_tensor = torch.LongTensor(trg_batch).to(device)

            optimizer.zero_grad()

            outputs = model.forward(src=src_tensor, trg=trg_tensor, teacher_force=True)

            outputs = outputs[1:].contiguous().view(-1, outputs.shape[-1])
            trg_tensor = trg_tensor[1:].contiguous().view(-1)
            loss = criterion(outputs, trg_tensor)
            visualize(loss,epoch,visual_path,model,src_tensor,trg_tensor,trg2idx=trg2idx)
            loss.backward()

            optimizer.step()
            if(loss.item()<loss_for_save):
                loss_for_save=loss.item()
                torch.save(model.state_dict(),savepath)
                print("save model at Epoch {:d}".format(epoch+1))


            print("Epoch: {:d} batch step: [{:d}/{:d}] Loss: {:.4f}".format(epoch + 1, k + 1, len(train_loader), loss))
Esempio n. 2
0
def try_two_objects_interaction():
    orange_sphere = download_point_cloud.download_to_object(
        "models/orange sphere.ply", 1000)
    orange_sphere.scale(0.3)
    orange_sphere.shift([0, 0.18, 0])
    # visualization.visualize_object([orange_sphere])
    # moving_orange_sphere = PotentialFieldObject(orange_sphere)

    grey_plane = download_point_cloud.download_to_object(
        "models/grey plane.ply", 6000)
    grey_plane.scale(0.1)
    grey_plane.rotate([1, 0, 0], math.radians(90))
    # visualization.visualize(objects=[grey_plane, orange_sphere])
    # moving_orange_sphere.interaction(grey_plane)

    # blue_conus = download_point_cloud.download_to_object("models/blue conus.ply", 3000)
    # blue_conus.scale(0.4)
    # blue_conus.rotate([1, 0, 0], math.radians(30))
    # blue_conus.rotate([0, 1, 0], math.radians(60))
    # blue_conus.shift([0, -0.3, 0])
    # visualization.visualize(objects=[blue_conus, orange_sphere])
    # moving_orange_sphere.interaction(blue_conus)

    # brown_cylinder = download_point_cloud.download_to_object("models/brown cylinder.ply", 3000)
    # brown_cylinder.scale(0.4)
    # brown_cylinder.rotate([1, 0, 0], math.radians(60))
    # brown_cylinder.rotate([0, 1, 0], math.radians(30))
    # brown_cylinder.shift([-0.3, -0.6, 0])
    # visualization.visualize(objects=[brown_cylinder, orange_sphere])
    # moving_orange_sphere.interaction(brown_cylinder)

    violet_thor = download_point_cloud.download_to_object(
        "models/violet thor.ply")
    visualization.visualize(objects=[violet_thor])
Esempio n. 3
0
def get_focus_of_attention(videopath, frames):
    path = videopath[:videopath.rfind('/')]
    mean_path = join(videopath[:videopath.rfind('/')], "mean_frame.png")

    outputfolder = join(path, "output")
    if not os.path.exists(outputfolder):
        os.mkdir(outputfolder)
    print("///////////////////")
    print("getFrames Begin")
    print("///////////////////")
    get_frames(frames, videopath, join(outputfolder, 'Frames'))
    print("///////////////////")
    print("OpticalFlow Begin")
    print("///////////////////")
    optical_flow(outputfolder, frames)
    print("///////////////////")
    print("Segmantention Begin")
    print("///////////////////")
    segment(outputfolder, frames)
    print("///////////////////")
    print("Prediction Begin")
    print("///////////////////")
    predict(outputfolder, mean_path, frames)
    print("///////////////////")
    print("Visualization Begin")
    print("///////////////////")
    visualize(outputfolder, frames)
    print("///////////////////")
    print("generateVideo Begin")
    print("///////////////////")
    generate_video(outputfolder, frames)
Esempio n. 4
0
def scatter_plot_molecular_weight(x2d, data_matrix, visualization_options):
    """Plots the 2D PCA with the molecules colored by their molecular weight

    Args:
        x2d (numpy array): 2d matrix containing the points given by a 2D PCA on the data matrix
        data_matrix (numpy array): non normalized data matrix of all the features
        visualization_options (dict): contains the options for visualization (plotting)
    """
    molecular_weight_fig = plt.figure(
        figsize=viz.get_option(visualization_options, 'figsize'))
    ax = molecular_weight_fig.add_subplot(111)

    ax.set_title(r"2D PCA with molecules colored by their molecular volume",
                 fontsize=viz.get_option(visualization_options, 'fontsize'))
    scatter = ax.scatter(x2d[:, 0],
                         x2d[:, 1],
                         c=data_matrix[:, 2],
                         alpha=0.05,
                         s=10,
                         cmap=viz.get_option(visualization_options,
                                             'colormap'))
    colorbar = plt.colorbar(scatter, label=r"Molecular volume [m$^3$/mol]")
    colorbar.set_alpha(0.5)
    colorbar.draw_all()

    molecular_weight_fig.tight_layout()

    if viz.do_plot(visualization_options):
        viz.visualize(molecular_weight_fig, 'molecular_weights',
                      visualization_options)
Esempio n. 5
0
 def test_linear_classifier_moons(self):
     target = 'label'
     df = moons_data()
     X = df.loc[:, df.columns != target].values
     y = df[target].values
     clf = linear_model.LogisticRegressionCV()
     clf.fit(X, y)
     visualize(X, y, lambda x: clf.predict(x))
Esempio n. 6
0
 def test_linear_classifier_iris_all(self):
     target = 'species'
     df = iris_data()
     mappings = enumerate_strings(df)
     X = df[['sepal_length', 'sepal_width']].values
     y = df[target].values
     clf = linear_model.LogisticRegressionCV()
     clf.fit(X, y)
     visualize(X, y, lambda x: clf.predict(x))
Esempio n. 7
0
 def test_backprop_standard_moons(self):
     """
     Check moons data with backprop classifier
     Moons has 2 features
     """
     target = 'label'
     df = normalize_data(moons_data(), target)
     X = df.loc[:, df.columns != target].values
     y = df[target].values
     model = build_model(X, y, 3)
     visualize(X, y, lambda x: predict(model, x))
Esempio n. 8
0
 def test_backprop_standard_flights_months(self):
     """
     Check iris data with backprop weight optimizer
     Iris has 2 features and binary classification outputs
     """
     target = 'month'
     df = normalize_data(flights_data(), target)
     mappings = enumerate_strings(df)
     X = df.loc[:, df.columns != target].values
     y = df[target].values
     model = build_model(X, y, 3)
     visualize(X, y, lambda x: predict(model, x))
Esempio n. 9
0
 def test_backprop_standard_iris_all_species(self):
     """
     Check iris data with backprop weight optimizer
     Iris has 2 features and binary classification outputs
     """
     target = 'species'
     df = normalize_data(iris_data(), target)
     mappings = enumerate_strings(df)
     X = df[['sepal_length', 'sepal_width']].values
     y = df[target].values
     model = build_model(X, y, 3)
     visualize(X, y, lambda x: predict(model, x))
Esempio n. 10
0
def main():
    dataset = Dataset(TRAIN_DATASET_PATH, batch_size=10)
    dataset.shuffle()

    print("Training model...")
    model: Model = train(Model(), dataset)
    print("Done!")

    dataset_test = Dataset(TEST_DATASET_PATH, batch_size=500)
    labels: List[int] = label_test_dataset(model, dataset_test)
    dataset_test.y = labels

    visualize(dataset, dataset_test)
Esempio n. 11
0
def check_data_generation():
    data_generation.save_images_from_VREP()
    depth_im = image_processing.load_image("3d_map/", "room_depth0.png",
                                           "depth")
    rgb_im = image_processing.load_image("3d_map/", "room_rgb0.png")

    xyz, rgb = image_processing.calculate_point_cloud(rgb_im / 255,
                                                      depth_im / 255)

    temp = PointsObject()
    temp.add_points(xyz, rgb)
    temp.save_all_points("3d_map/", "room")
    visualization.visualize([temp])
Esempio n. 12
0
def BattleFortune(turns,
                  max_threads,
                  game,
                  province,
                  dom_path,
                  game_path,
                  temp_path,
                  dump_log=False):
    """
    Runs BattleFortune, simulate battles, and return results.
    :param temp_path: OS path to dominions temporary files
    :param turns: Number of Turns to be simulated.
    :param max_threads: Maximum number of simultaneous threads.
    :param game: Game to be simulated.
    :param province: Province where battle occurs.
    :param dom_path: dominions OS path.
    :param game_path: game OS path.
    :param dump_log: If true, created log files.
    :return: True when simulation is completed.
    """

    setup(dom_path=dom_path,
          game_path=game_path,
          max_threads=max_threads,
          temp_path=temp_path)

    logs = batchrun(turns, game, province)
    n = logs['nations']
    w = logs['winners']
    b = logs['battles']

    if dump_log:

        logpath = './battlefortune/logs/' + game + '/'
        if not os.path.exists(logpath):
            os.makedirs(logpath)

        yaml.dump(data=w, stream=open(logpath + 'winlog.yaml', 'w'))
        yaml.dump(data=b, stream=open(logpath + 'battlelog.yaml', 'w'))

        with open(logpath + 'battlelog.json', 'w') as outfile:
            json.dump(b, outfile)

        with open(logpath + 'winlog.json', 'w') as outfile:
            json.dump(w, outfile)

    visualize(nations=n, win_log=w, battle_log=b, rounds=turns)

    return True
Esempio n. 13
0
def load_many_objects():
    models_list = []

    models_list.append(
        download_point_cloud.download_to_object("models/blue conus.ply"))
    models_list.append(
        download_point_cloud.download_to_object("models/grey plane.ply"))
    models_list.append(
        download_point_cloud.download_to_object("models/red cube.ply"))

    models_list[0].scale(0.1)
    models_list[0].clear()
    visualization.visualize(models_list[0].get_points()[0],
                            models_list[0].get_points()[1])
    visualization.visualize_object(models_list)
Esempio n. 14
0
def check_normals_estimation():
    stable_object = download_point_cloud.download_to_object("3d_map/room.pcd")
    points = stable_object.get_points()[0]
    normals = stable_object.get_normals() / 100
    normals_object = PointsObject(points + normals)
    visualization.visualize([stable_object, normals_object])

    d_x = 0.1

    new_points, new_normals, _ = data_generation.reduce_environment_points(
        stable_object.get_points()[0], stable_object.get_normals(), d_x)
    new_points_object = PointsObject(new_points,
                                     np.full(new_points.shape, 0.3))
    new_normals_object = PointsObject(new_points + new_normals / 100)
    visualization.visualize([new_points_object, new_normals_object])
Esempio n. 15
0
    def run(self, frame, EM):
        self.current_container = FrameContainer(frame)
        candidates, auxiliary = self.run_part1()
        self.current_container.traffic_light, self.current_container.auxiliary = self.run_part2(candidates,
                                                                                                auxiliary)
        try:
            # sanity: make sure part2 returns not more than part1 candidates
            assert len(self.current_container.traffic_light) <= len(candidates)
        except AssertionError:
            self.current_container.traffic_light, self.current_container.auxiliary = candidates, auxiliary
        if EM is not None:
            self.run_part3(EM)

        visualize(candidates, auxiliary, self.prev_container, self.current_container, self.focal, self.pp)
        self.prev_container = self.current_container
        self.current_container = None
Esempio n. 16
0
def run():
    args = argparser()

    path = utils.create_log_dir(sys.argv)
    utils.start(args.http_port)

    env = Env(args)
    agents = [Agent(args) for _ in range(args.n_agent)]
    master = Master(args)

    for agent in agents:
        master.add_agent(agent)
    master.add_env(env)

    success_list = []
    time_list = []

    for idx in range(args.n_episode):
        print('=' * 80)
        print("Episode {}".format(idx + 1))
        # 서버의 stack, timer 초기화
        print("서버를 초기화하는중...")
        master.reset(path)

        # 에피소드 시작
        master.start()
        # 에이전트 학습
        master.train()
        print('=' * 80)
        success_list.append(master.infos["is_success"])
        time_list.append(master.infos["end_time"] - master.infos["start_time"])

        if (idx + 1) % args.print_interval == 0:
            print("=" * 80)
            print("EPISODE {}: Avg. Success Rate / Time: {:.2} / {:.2}".format(
                idx + 1, np.mean(success_list), np.mean(time_list)))
            success_list.clear()
            time_list.clear()
            print("=" * 80)

        if (idx + 1) % args.checkpoint_interval == 0:
            utils.save_checkpoints(path, agents, idx + 1)

    if args.visual:
        visualize(path, args)
    print("끝")
    utils.close()
Esempio n. 17
0
def plot_combination_colors(x2d, data_matrix, visualization_options):
    """Tries to color code the last four features in the data set and apply it to the each point of the 2D PCA.

    Args:
        x2d (numpy array): 2d matrix containing the points given by a 2D PCA on the data matrix
        data_matrix (numpy array): data matrix of all the features
        visualization_options (dict): contains the options for visualization (plotting)
    """

    combination_colors_fig = plt.figure(
        figsize=viz.get_option(visualization_options, 'figsize'))
    ax = combination_colors_fig.add_subplot(111)

    ax.set_title(
        r"Coloring each point with the combination of the last four features",
        fontsize=20)

    # === Generate color space from data ===
    cyan = data_matrix[:, 4]
    magenta = data_matrix[:, 5]
    yellow = data_matrix[:, 6]
    key = data_matrix[:, 7]
    colors = np.ones((x2d.shape[0], 4))
    colors[:, 0] = (1 - cyan) * (1 - key)
    colors[:, 1] = (1 - magenta) * (1 - key)
    colors[:, 2] = (1 - yellow) * (1 - key)

    min = np.min(colors[:, 0])
    max = np.max(colors[:, 0])
    colors[:, 0] = (colors[:, 0] - min) / (max - min)
    min = np.min(colors[:, 1])
    max = np.max(colors[:, 1])
    colors[:, 1] = (colors[:, 1] - min) / (max - min)
    min = np.min(colors[:, 2])
    max = np.max(colors[:, 2])
    colors[:, 2] = (colors[:, 2] - min) / (max - min)

    colors[:, 3] = 0.7

    scatter = ax.scatter(x2d[:, 0], x2d[:, 1], c=colors, s=10)

    combination_colors_fig.tight_layout()

    if viz.do_plot(visualization_options):
        viz.visualize(combination_colors_fig, 'combination_colors',
                      visualization_options)
Esempio n. 18
0
def run_program():
    """
    Driver function for the text collection, processing,
    topic modeling, and visualization scripts.
    """
    era = input("Select '19th' or '20th' as era for analysis: ")
    for i in tqdm(range(6), desc="Generating and visualizing topics..."):
        # this loop is necessary for the progress bar
        if i == 0:
            source_and_split()
        elif i == 1:
            gather_text(era)
        elif i == 2:
            generate_corpus()
        elif i == 3:
            utility_year()
        elif i == 4:
            # n_topics is 8 by default, n_iterations is 300 by default
            model_topics(era=era)
        elif i == 5:
            # visualization
            visualize(era)
    print("\nProcess complete.")
def run(initial_lettice, rules, max_t):
    """
    Run cellular automaton

    Parameters
    ----------
    initial_lettice: list
        a two dimensional array of states (0, 1)
    rules: list
        the first row states the probabilities for getting sick,
        the second one states the probabilities for becoming healthy.
    max_t: int
        the maximum number of steps

    Returns
    -------
    percentage_of_dead_list: percentage of dead in each checkpoint
    """
    lettice = initial_lettice
    infected_list = rules[0]
    # infected_list[0] = 0
    # healthy_list = rules[1]
    healthy_list = [0 for i in range(9)]

    interval = int(max_t / 40)
    max_t = 40 * interval

    num_dead_list = []
    max_t = 100

    for t in range(max_t):
        for i in range(len(lettice)):
            for j in range(len(lettice[0])):
                ret = __find(lettice, i, j)
                if lettice[i][j] == 0:
                    # Healthy
                    if random.random() < infected_list[ret]:
                        lettice[i][j] = 1
                else:
                    # Infected
                    if random.random() < healthy_list[ret]:
                        lettice[i][j] = 0
        if t % interval == interval - 1:
            # Count Dead
            f = visualization.visualize(lettice)
            plt.savefig("{0}".format(t))
            plt.close()
            num_dead_list.append(__cnt_dead(lettice))

    return [p / (len(lettice) * len(lettice[0])) for p in num_dead_list]
Esempio n. 20
0
def validate(net, loader, writer):
    global global_step

    net.eval()
    loader.reset()
    res = evaluate(net, loader, max_aabbs=1000)

    for i, (img, aabbs) in enumerate(zip(res.batch_imgs, res.batch_aabbs)):
        vis = visualize(img, aabbs)
        writer.add_image(f'img{i}', vis.transpose((2, 0, 1)), global_step)
        writer.add_scalar('val_loss', res.loss, global_step)
        writer.add_scalar('val_recall', res.metrics.recall(), global_step)
        writer.add_scalar('val_precision', res.metrics.precision(), global_step)
        writer.add_scalar('val_f1', res.metrics.f1(), global_step)

    return res.metrics.f1()
Esempio n. 21
0
def tab_content(identifier, annotation_types, view, text):
    meta_id = "%s:Metadata" % identifier
    anno_id = "%s:Annotations" % identifier
    content = div(
        {
            'id': identifier,
            'class': 'tab_c1',
            'style': "display: none;"
        }, [])
    sub_tabs = content.add(
        div({'class': 'tab2'},
            [tab_button_sub(meta_id),
             tab_button_sub(anno_id)]))
    content.add_all([
        tab_text_sub(meta_id, dump(view.get('metadata'))),
        tab_text_sub(anno_id, dump(view.get('annotations')))
    ])
    for annotation_type in annotation_types:
        id_sub = identifier + ':' + annotation_type
        sub_tabs.add(tab_button_sub(id_sub))
        content.add(tab_text_sub(id_sub, visualize(id_sub, view, text)))
    return content
Esempio n. 22
0
from data.make_clusters import *
from visualization.visualize import *
from models.recommender import *
import pandas as pd

print("Loading datasets...")
df_aisles = pd.read_csv("../data/raw/aisles.csv")
df_orders = pd.read_csv("../data/raw/orders.csv")
df_products = pd.read_csv("../data/raw/products.csv")
df_departments = pd.read_csv("../data/raw/departments.csv")
df_order_products__prior = pd.read_csv("../data/raw/order_products__prior.csv")
df_order_products__train = pd.read_csv("../data/raw/order_products__train.csv")

df_orders = createClusters(df_aisles, df_orders, df_products,
                           df_order_products__prior)

visualize(df_aisles, df_departments, df_orders, df_products,
          df_order_products__train, df_order_products__prior)

product_recommender(df_order_products__prior, df_order_products__train,
                    df_orders, df_products)
Esempio n. 23
0

class DatasetIAMSplit:
    """wrapper which provides a dataset interface for a split of the original dataset"""
    def __init__(self, dataset, start_idx, end_idx):
        assert start_idx >= 0 and end_idx <= len(dataset)

        self.dataset = dataset
        self.start_idx = start_idx
        self.end_idx = end_idx

    def __getitem__(self, idx):
        return self.dataset[self.start_idx + idx]

    def __len__(self):
        return self.end_idx - self.start_idx


if __name__ == '__main__':
    from visualization import visualize
    from coding import encode, decode
    import matplotlib.pyplot as plt

    dataset = DatasetIAM(Path('../data'), (350, 350), (350, 350), caching=False)
    img, gt = dataset[0]
    gt_map = encode(img.shape, gt)
    gt = decode(gt_map)

    plt.imshow(visualize(img / 255 - 0.5, gt))
    plt.show()
Esempio n. 24
0
    stats.reset()
    agent.play(args.play_games, args)
    stats.write(0, "play")
    if args.visualization_file:
        from visualization import visualize
        # use states recorded during gameplay. NB! Check buffer size, that it can accomodate one game!
        states = [
            agent.mem.getState(i)
            for i in range(agent.history_length, agent.mem.current -
                           agent.random_starts)
        ]
        logger.info("Collected %d game states" % len(states))
        import numpy as np
        states = np.array(states)
        states = states / 255.
        visualize(net.model, states, args.visualization_filters,
                  args.visualization_file)
    sys.exit()

if args.random_steps:
    # populate replay memory with random steps
    logger.info("Populating replay memory with %d random moves" %
                args.random_steps)
    # Set env mode test so that loss of life is considered as terminal
    env.setMode('train')
    stats.reset()
    agent.play_random(args.random_steps, args)
    stats.write(0, "random")

# loop over epochs
for epoch in range(args.start_epoch, args.epochs):
    logger.info("Epoch #%d" % (epoch + 1))
Esempio n. 25
0

    model.eval()
    if args.cuda:
        model.cuda()
    # read the image
    img = cv2.imread('examples/' + args.img)
    if args.model_type == 'inception':
        # the input image's size is different
        img = cv2.resize(img, (299, 299))
    img = img.astype(np.float32) 
    img = img[:, :, (2, 1, 0)]
    # calculate the gradient and the label index
    gradients, label_index = calculate_outputs_and_gradients([img], model, None, args.cuda)
    gradients = np.transpose(gradients[0], (1, 2, 0))
    img_gradient_overlay = visualize(gradients, img, clip_above_percentile=99, clip_below_percentile=0, overlay=True, mask_mode=True)
    img_gradient = visualize(gradients, img, clip_above_percentile=99, clip_below_percentile=0, overlay=False)

    # calculae the integrated gradients 
    attributions = random_baseline_integrated_gradients(img, model, label_index, calculate_outputs_and_gradients, \
                                                        steps=100, num_random_trials=25, cuda=args.cuda)
    img_integrated_gradient_overlay = visualize(attributions, img, clip_above_percentile=99, clip_below_percentile=0, \
                                                overlay=True, mask_mode=True)
    img_integrated_gradient = visualize(attributions, img, clip_above_percentile=99, clip_below_percentile=0, overlay=False)
    output_img = generate_entrie_images(img, img_gradient, img_gradient_overlay, img_integrated_gradient, \
                                        img_integrated_gradient_overlay)
    cv2.imwrite('results/' + args.model_type + '/' + os.path.splitext(args.img)[0] + "_img.jpg", np.uint8(img)[:, :, (2, 1, 0)])
    cv2.imwrite('results/' + args.model_type + '/' + os.path.splitext(args.img)[0] + "_exp.jpg", np.uint8(img_integrated_gradient[:, :, (2, 1, 0)]))
    cv2.imwrite('results/' + args.model_type + '/' + args.img, np.uint8(output_img))

    print(np.uint8(np.max(img_integrated_gradient, 2)))
        d_map[i][j] = 2
    for (i, j) in [(0, 1), (1, 0), (n - 2, 0), (n - 1, 1), (0, n - 2),
                   (1, n - 1), (n - 2, n - 1), (n - 1, n - 2)]:
        d_map[i][j] = 3
    for (i, j) in [(1, 1), (1, n - 2), (n - 2, 1), (n - 2, n - 2)]:
        d_map[i][j] = 4
    dic_position_degree = {(x, y): d_map[x][y]
                           for x in range(n)
                           for y in range(n)}  #key=position,value=degree
    dic_position_steps = {}  #key=position,value=which step
    next = (start_x, start_y)
    for step in range(n * n):
        (x, y) = next  #stand at (x,y) now
        dic_position_steps[(x, y)] = step  #update dic_position_steps
        update_dic_position_degree(x, y, dic_position_degree)
        tmp = 999999  #find the next move
        for (i, j) in [(1, -2), (2, -1), (2, 1), (1, 2), (-1, 2), (-2, 1),
                       (-2, -1), (-1, -2)]:
            if (x + i) >= 0 and (x + i) < n and (y + j) >= 0 and (
                    y + j) < n and dic_position_degree[(x + i), (y + j)] < tmp:
                tmp = dic_position_degree[(x + i), (y + j)]
                next = ((x + i), (y + j))
        if dic_position_degree[
                next] > 8 and step != n * n - 1:  #The KnightTour couldn't be finished
            return n, dic_position_steps, 2
    return n, dic_position_steps, 0


(N, dic, errCode) = knightTour(20, 4, 5)  #20*20 Matrix ,start at (4,5)
visualize(N, dic, errCode)
Esempio n. 27
0
  net.load_weights(args.load_weights)

if args.play_games:
  logger.info("Playing for %d game(s)" % args.play_games)
  stats.reset()
  agent.play(args.play_games)
  stats.write(0, "play")
  if args.visualization_file:
    from visualization import visualize
    # use states recorded during gameplay. NB! Check buffer size, that it can accomodate one game!
    states = [agent.mem.getState(i) for i in xrange(agent.history_length, agent.mem.current - agent.random_starts)]
    logger.info("Collected %d game states" % len(states))
    import numpy as np
    states = np.array(states)
    states = states / 255.
    visualize(net.model, states, args.visualization_filters, args.visualization_file)
  sys.exit()

if args.random_steps:
  # populate replay memory with random steps
  logger.info("Populating replay memory with %d random moves" % args.random_steps)
  stats.reset()
  agent.play_random(args.random_steps)
  stats.write(0, "random")

# loop over epochs
for epoch in xrange(args.epochs):
  logger.info("Epoch #%d" % (epoch + 1))

  if args.train_steps:
    logger.info(" Training for %d steps" % args.train_steps)
def generate_chunk(start_time, end_time, raw_audio, truth_labels, boundary_mask_vec, spectrogram_info):
    '''
        Generate a data chunk around a given elephant call. The data
        chunk is of size "chunk_length" seconds and has the call
        of interest randomly placed inside the window

        Parameters:
        - start_time and end_time in seconds
        - truth_labels: Gives the ground truth elephant call labelings
        - boundary_mask_vec: Gives the location of the "fuzzy" boundary regions around each call
        where we want to allow for flexability in prediction
    '''
    window_size = spectrogram_info['window']
    # Flag for whether to sample oversized windows
    oversize = spectrogram_info['oversize_windows']    

    # Convert the times to .wav frames to help ensure
    # robustness of approach
    start_frame = int(math.floor(start_time * spectrogram_info['samplerate']))
    end_frame = int(math.ceil(end_time * spectrogram_info['samplerate']))

    # Generate oversized windows to allow for random location sampling of the calls.
    if oversize:
        # Formula is: spect_frames = floor((wav_frames - overlap) / hop)
        len_call_spect_frames = math.floor(((end_frame - start_frame) - (spectrogram_info['NFFT'] - spectrogram_info['hop'])) / spectrogram_info['hop'])
        window_size = 2 * window_size - len_call_spect_frames
        
        # Calculate the start first based on true window size
        true_chunk_size = (window_size - 1) * spectrogram_info['hop'] + spectrogram_info['NFFT'] 
        chunk_start = end_frame - true_chunk_size

        # Add the size of the new window
        chunk_size = ((window_size  - 1) * spectrogram_info['hop'] + spectrogram_info['NFFT'])
        chunk_end = chunk_start + chunk_size
        #chunk_end = start_frame + true_chunk_size # Somehow off by one?

        # For now skip if at edges
        if chunk_start < 0 or (chunk_end >= raw_audio.shape[0]):
            print ("skipping too long of call") # Maybe don't need this let us se
            return None, None, None
    else:
        # Convert from window size in spectrogram frames to raw audio size
        # Note we use the -1 term to force the correct number of frames
        # wav = frames * hop - hop + window ==> wav = frames * hop + overlap
        chunk_size = (window_size - 1) * spectrogram_info['hop'] + spectrogram_info['NFFT'] 

        # Padding to call
        call_length = end_frame - start_frame # In .wav frames
        padding_length = chunk_size - call_length
        # if padding_frame is neg skip call
        # but still want to go to the next!!
        if padding_length < 0:
            print ("skipping too long of call") # Maybe don't need this let us se
            return None, None, None
        
        # Randomly split the pad to before and after
        pad_front = np.random.randint(0, padding_length + 1)

        # Do some stuff to avoid the front and end!
        chunk_start = start_frame - pad_front
        chunk_end  = start_frame + call_length + (padding_length - pad_front)
        
        # Do some quick voodo - assume cant have issue where 
        # the window of 64 frames is lareger than the sound file!
        if (chunk_start < 0):
            # Amount to transfer to end
            chunk_start = 0
            chunk_end = chunk_size
        # See if we have passed the end of the sound file.
        # Note divide by sr to get sound file length in seconds
        if (chunk_end >= raw_audio.shape[0]):
            chunk_end = raw_audio.shape[0]
            chunk_start = raw_audio.shape[0] - chunk_size

        assert(chunk_end - chunk_start == chunk_size)
        # Make sure the call is fully in the region
        assert(chunk_start <= start_frame and chunk_end >= end_frame)

    NFFT = spectrogram_info['NFFT']
    samplerate = spectrogram_info['samplerate']
    hop = spectrogram_info['hop']
    max_freq = spectrogram_info['max_freq']
    pad_to = spectrogram_info['pad_to']
    # Extract the spectogram
    [spectrum, freqs, t] = ml.specgram(raw_audio[chunk_start: chunk_end], 
                NFFT=NFFT, Fs=samplerate, noverlap=(NFFT - hop), window=ml.window_hanning, pad_to=pad_to)

    # Check our math
    assert(spectrum.shape[1] == window_size)
    
    # Cutout the high frequencies that are not of interest
    spectrum = spectrum[(freqs <= max_freq)]
    # Get the corresponding labels
    # Calculate the relative start time w/r 
    # to the entire spectogram for the given chunk 
    start_spec = max(math.ceil((chunk_start - spectrogram_info['NFFT'] / 2.) / spectrogram_info['hop']), 0)
    end_spec = start_spec + spectrum.shape[1] 
    
    data_labels = truth_labels[start_spec: end_spec]
    boundary_mask = boundary_mask_vec[start_spec: end_spec]

    if VERBOSE:
        new_features = 10*np.log10(spectrum)
        visualize(new_features.T, labels=data_labels, boundaries=boundary_mask)

    # We want spectrograms to be time x freq
    spectrum = spectrum.T
    return spectrum, data_labels, boundary_mask
def generate_empty_chunks(n, raw_audio, label_vec, boundary_mask_vec, spectrogram_info):
    """
        Generate n empty data chunks by uniformally sampling 
        time sections with no elephant calls present
    """
    # Step through the labels vector and collect the indeces from
    # which we can define a window with no elephant call
    # i.e. all start indeces such that the window (start, start + window_sz)
    # does not contain an elephant call
    # In the case where we are considering uncertainty around boundaries,
    # we add the label_vec and boundary_mask_vec to prevent having negative
    # samples including uncertain boundaries

    valid_starts = []
    window_size = spectrogram_info['window']
    updated_labels = label_vec + boundary_mask_vec
    # Flag for whether to sample oversized windows
    oversize = spectrogram_info['oversize_windows']
    if oversize:
        window_size *= 2
    # Step backwards and keep track of how far away the
    # last elephant call was
    last_elephant = 0  # For now is the size of the window
    for i in range(label_vec.shape[0] - 1, -1, -1):
        last_elephant += 1

        # Check if we encounter an elephant call
        # Note: do >= in case where boundary + label = 2
        if (updated_labels[i] >= 1):
            last_elephant = 0

        # If we haven't seen an elephant call
        # for a chunk size than record this index
        if (last_elephant >= window_size):
            valid_starts.append(i)

    # Generate num_empty uniformally random 
    # empty chunks
    empty_features = []
    empty_labels = []
    empty_boundary_masks = []
    NFFT = spectrogram_info['NFFT']
    samplerate = spectrogram_info['samplerate']
    hop = spectrogram_info['hop']
    max_freq = spectrogram_info['max_freq']
    pad_to = spectrogram_info['pad_to']

    for i in range(n):
        # Generate a valid empty start chunk
        # index by randomly sampling from our
        # ground truth labels
        start = np.random.choice(valid_starts)

        # Now we have to do a litle back conversion to get 
        # the raw audio index in raw audio frames
        # Number of hops in we are marks the first raw audio frame to use
        chunk_start = start * spectrogram_info['hop']
        chunk_size = (window_size - 1) * spectrogram_info['hop'] + spectrogram_info['NFFT'] 
        chunk_end = int(chunk_start + chunk_size)

        # Get the spectrogram chunk
        # Extract the spectogram
        [spectrum, freqs, t] = ml.specgram(raw_audio[chunk_start: chunk_end], 
                    NFFT=NFFT, Fs=samplerate, noverlap=(NFFT - hop), window=ml.window_hanning, pad_to=pad_to)
        
        # Cutout the high frequencies that are not of interest
        spectrum = spectrum[(freqs <= max_freq)]
        assert(spectrum.shape[1] == window_size)

        data_labels = label_vec[start : start + spectrum.shape[1]]
        # Make sure that no call exists in the chunk
        assert(np.sum(data_labels) == 0)
        boundary_mask = boundary_mask_vec[start : start + spectrum.shape[1]]
        assert(np.sum(boundary_mask) == 0)

        if VERBOSE:
            new_features = 10*np.log10(spectrum)
            visualize(new_features.T, labels=data_labels)

        # We want spectrograms to be time x freq
        spectrum = spectrum.T
        empty_features.append(spectrum)
        empty_labels.append(data_labels)
        empty_boundary_masks.append(boundary_mask)

    return empty_features, empty_labels, empty_boundary_masks
Esempio n. 30
0
        centers = kmeans.cluster_centers_

        sse[k] = kmeans.inertia_
        wcss.append(kmeans.inertia_)
        print(sse[k])
        plt.figure()
        plt.plot(list(sse.keys()), list(sse.values()))
        plt.xlabel('Cluster')
        plt.ylabel('Sum of squared Errors of prediction')
        outfile = 'results/elbow-plot/kmeans-elbowmethod-result' + '-' + file_code + '.jpg'
        plt.savefig(outfile)

    kneedle = KneeLocator(range_n_clusters,
                          wcss,
                          S=1.0,
                          invert=False,
                          direction='decreasing')
    print('Optimal number of clusters: ', kneedle.knee)

    # optimal = int(input('Enter optimal number of clusters: '))
    kmeans = KMeans(kneedle.knee, random_state=42)
    labels = kmeans.fit_predict(X.values)
    visualize(df, labels, file_code)
    cluster_labels = pd.DataFrame(labels,
                                  index=X.index,
                                  columns=['Cluster_Labels'])
    cluster_labels.to_csv('results/labels/labels' + '-' + file_code + '.csv',
                          sep=',',
                          encoding='utf-8',
                          index='True')
Esempio n. 31
0
def demo():
	##########################
	# 1. GET NEW DATASET     #
	# 2. ADD LOCATIONS       #
	# 3. TRAIN CLASSIFIERS   #
	# 4. MAKE PREDICTIONS    #
	# 5. FILTER, SORT, GROUP #
	# 6. VISUALIZE           #
	##########################
	print()

	######################
	# 1. GET NEW DATASET #
	######################
	print('\n1. GET NEW DATASET')
	# read Twitter tokens
	consumer_key, consumer_secret, access_token, access_token_secret = read_twitter_tokens('tokens/twitter_tokens.txt')
	# connect with the Twitter API
	twitter_api: tweepy.API = connect_to_twitter_api(consumer_key, consumer_secret, access_token, access_token_secret)
	# define keywords
	# define keywords
	# COVID_KEYWORDS: List[str] = [
	# 	'corona', 'covid', 'quaranteen', 'home', 'stay', 'inside', 'virology', 'doctor', 'nurse', 'virus', 'grandma',
	# 	'vaccin', 'sars', 'alone', 'strongtogether', 'elbow', 'mouth mask', 'protective equipment', 'hospitalization',
	# 	'increas', 'death', 'dead', 'impact', 'ICU', 'intensive care', 'applause', 'stay healthy', 'take care', 'risk',
	# 	'risk group', 'environment',
	# 	'U+1F637',  # Medical Mask Emoji
	# 	'U+1F691',  # Amublance Emoji
	# 	'U+1F92E',  # Vomiting Emoji
	# 	'U+1F912',  # Thermometer Emoji
	# ]
	# COVID_FAKE_KEYWORDS: List[str] = [
	# 	'coronascam', 'fakecorona', 'fake', 'coronahoax', 'hoaxcorona', 'gooutside', 'donotstayhome''fuckvirology',
	# 	'donttrustvirologists', 'coronadoesntexist', 'chinesevirushoax',
	# ]
	keywords: Dict[str, int] = {
		'covid': 100,  # get 100 tweets with 'covid' in it
		'corona': 100,  # get 100 tweet with 'corona' in it
		'coronahoax': 100,  # get tweets 100 with 'coronahoax' in it
	}
	# get new dataset
	new_dataset: List[Tweet] = get_new_tweets(twitter_api, keywords)
	print(f'First tweet:\n{new_dataset[0]}')
	# save new dataset
	save_tweets(new_dataset, 'tweets/new_dataset.pickle')

	####################
	# 2. ADD LOCATIONS #
	####################
	print('\n2. ADD LOCATION TO THOSE TWEETS')
	# read Google token
	geocoding_api_key: str = read_google_token('tokens/google_token.txt')
	# initialize Google API
	google_api: GoogleV3 = GoogleV3(api_key=geocoding_api_key)
	# add location to tweets when possible
	num_tweets_with_location_before: int = 0
	num_tweets_with_location_after: int = 0
	for tweet in new_dataset:
		if tweet.country_code is not None and tweet.continent is not None:
			num_tweets_with_location_before += 1
		tweet.add_location(google_api)
		if tweet.country_code is not None and tweet.continent is not None:
			num_tweets_with_location_after += 1
	print(f'Number of tweets with location before: {num_tweets_with_location_before}')
	print(f'Number of tweets with location after: {num_tweets_with_location_after}')
	# save new dataset with locations included
	save_tweets(new_dataset, 'tweets/new_dataset.pickle')

	########################
	# 3. TRAIN CLASSIFIERS #
	########################
	print('\n3. TRAIN CLASSIFIERS')
	# load train dataset
	train_dataset = load_tweets('tweets/train_dataset.pickle')
	# pre-process train dataset
	X: List[str] = [tweet.text for tweet in train_dataset]
	X: List[str] = preprocess_corpus(X)
	labels: List[bool] = [tweet.denier for tweet in train_dataset]

	# train on part of the data
	# train, validation split
	X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2)
	# vectorize
	vectorizer: CountVectorizer = CountVectorizer()
	X_train = vectorizer.fit_transform(X_train)
	X_test = vectorizer.transform(X_test)

	# create Complement Naive Bayes classifier
	naive_bayes_classifier = ComplementNB()
	# train Complement Naive Bayes classifier
	naive_bayes_classifier = naive_bayes_classifier.fit(X_train, y_train)
	# validate Complement Naive Bayes classifier
	naive_bayes_accuracy: float = naive_bayes_classifier.score(X_test, y_test)
	print(f'Naive Bayes accuracy:\t{naive_bayes_accuracy * 100:>3.2f}%')
	# save Naive Bayes classifier
	save_model(naive_bayes_classifier, 'models/naive_bayes.pickle')

	# create Decision Tree classifier
	decision_tree_classifier = DecisionTreeClassifier()
	# train Decision Tree classifier
	decision_tree_classifier = decision_tree_classifier.fit(X_train, y_train)
	# validate Decision Tree classifier
	decision_tree_accuracy: float = decision_tree_classifier.score(X_test, y_test)
	print(f'Decision Tree accuracy:\t{decision_tree_accuracy * 100:>3.2f}%')
	# save Decision Tree classifier
	save_model(decision_tree_classifier, 'models/decision_tree.pickle')

	# retrain best model on all of the data
	# vectorize
	vectorizer: CountVectorizer = CountVectorizer()
	X: List[str] = vectorizer.fit_transform(X)
	best_model = ComplementNB().fit(X, labels) \
		if naive_bayes_accuracy >= decision_tree_accuracy \
		else DecisionTreeClassifier().fit(X, labels)
	# save best mode
	save_model(best_model, 'models/best_model.pickle')

	#######################
	# 4. MAKE PREDICTIONS #
	#######################
	print('\n4. USE CLASSIFIERS')
	# load test dataset
	test_dataset = load_tweets('tweets/test_dataset.pickle')

	# pre-processing
	X: List[str] = [tweet.text for tweet in test_dataset]
	X: List[str] = preprocess_corpus(X)
	# vectorize
	X = vectorizer.transform(X)
	# make predictions
	y = best_model.predict(X)

	# add predictions to tweet
	for tweet, label in zip(test_dataset, y):
		tweet.denier = label

	##########################
	# 5. FILTER, SORT, GROUP #
	##########################
	print('\n5. USE VARIOUS FILTERS')
	# use filters
	tweets_filtered_by_hashtag: List[Tweet] = filter_by_hashtag(test_dataset, '#coronahoax')
	tweets_filtered_by_hashtags_all: List[Tweet] = filter_by_hashtags_all(test_dataset, ['#corona', '#coronahoax'])
	tweets_filtered_by_hashtags_any: List[Tweet] = filter_by_hashtags_any(test_dataset, ['#corona', '#coronahoax', '#coronavirus', '#covid19'])
	tweets_filtered_before: List[Tweet] = filter_before(test_dataset, datetime(2020, 4, 19, 18, 58, 46))
	tweets_filtered_at: List[Tweet] = filter_at(test_dataset, datetime(2020, 4, 19, 18, 58, 46))
	tweets_filtered_after: List[Tweet] = filter_after(test_dataset, datetime(2020, 4, 19, 18, 58, 46))
	tweets_filtered_between: List[Tweet] = filter_between(test_dataset, datetime(2020, 4, 19, 18, 0, 0), datetime(2020, 4, 19, 19, 0, 0))
	tweets_filtered_by_country_code: List[Tweet] = filter_by_country_code(test_dataset, 'US')
	tweets_filtered_by_country_codes: List[Tweet] = filter_by_country_codes(test_dataset, ['US', 'GB'])
	tweets_filtered_by_continent: List[Tweet] = filter_by_continent(test_dataset, 'Europe')
	tweets_filtered_by_continents: List[Tweet] = filter_by_continents(test_dataset, ['Europe', 'North America'])
	tweets_sorted_by_date_ascending: List[Tweet] = sort_by_date_ascending(test_dataset)
	tweets_sorted_by_date_descending: List[Tweet] = sort_by_date_descending(test_dataset)
	tweets_grouped_by_country_code: defaultdict = group_by_country_code(test_dataset)
	tweets_grouped_by_continent: defaultdict = group_by_continent(test_dataset)

	################
	# 6. VISUALIZE #
	################
	print('\n6. VISUALIZE')
	# continents
	CONTINENTS: Dict[str, str] = {
		'Asia': 'asia',
		'Europe': 'europe',
		'Africa': 'africa',
		'North America': 'north_america',
		'South America': 'south_america',
		'Oceania': 'oceania',
		'Antarctica': 'antartica',
	}

	# create series to plot
	num_tweets_per_country_per_continent_absolute = defaultdict(lambda: defaultdict(int))
	num_tweets_per_country_absolute = defaultdict(lambda: defaultdict(int))
	num_tweets_per_continent_absolute = defaultdict(lambda: defaultdict(int))
	for tweet in test_dataset:
		if tweet.has_location():
			country_code: str = tweet.country_code.lower()
			continent: str = CONTINENTS[tweet.continent]

			num_tweets_per_country_per_continent_absolute[tweet.continent][country_code] += 1
			num_tweets_per_country_absolute['World'][country_code] += 1
			num_tweets_per_continent_absolute['World'][continent] += 1

	# visualize plots
	title = 'Absolute number of tweets per country and per continent'
	series = num_tweets_per_country_per_continent_absolute
	filename = 'num_tweets_per_country_per_continent_absolute'
	visualize(title, series, filename, per_continent=False)

	title = 'Absolute number of tweets per country'
	series = num_tweets_per_country_absolute
	filename = 'num_tweets_per_country_absolute'
	visualize(title, series, filename, per_continent=False)

	title = 'Absolute number of tweets per continent'
	series = num_tweets_per_continent_absolute
	filename = 'num_tweets_per_continent_absolute'
	visualize(title, series, filename, per_continent=True)
#print constellationNames, len(constellationNames)
#print len(starsNeedClustering)
# if the user runs kmeans 
if args.algorithm == 'Kmeans':
	K = args.K
	# running K means for 1000 times with 20 centroids
	standardKMeans = algorithms.KMeansPlusPlus(starsNeedClustering,K)
	#standardKMeans.randInitCentroid()
	#standardKMeans.decisiveInitCentroid()
	#standardKMeans.runStandardKmeansWithIter(2000)
	#standardKMeans.runStandardKmeansWithoutIter()
	standardKMeans.runKmeansPlusPlus()
	# output the stars that belong to centroid 1
	# cluster_1 = algorithms.getCluster(1, assignments)

	visualization.visualize(standardKMeans.assignments, 'Kmeans')
	#print len(assignments), len(cluster_1), cluster_1
	# print centroids, assignments

# if the user runs DBSCAN
elif args.algorithm == 'DBSCAN':
	Eps = args.Eps
	minDist = args.minDist
	#print Eps, minDist, len(starsNeedClustering)
	standardDBS = algorithms.densityBasedClustering(starsNeedClustering, Eps, minDist) 
	standardDBS.runDBA()
	#print standardDBS.getNumOfClusters()
	noise = standardDBS.getNoise()
	#print 'Number of noise stars: ', len(noise)
	visualization.visualize(standardDBS.assignments, 'DBSCAN')