Ejemplo n.º 1
0
def search_engine_3(query_match):
    print('In which year was the movie released?')
    year_user = int(input())

    # Rank the results by closeness to a given year
    years = utils.year_docs(query_match)
    sim_years = utils.sim_docs(years, year_user)

    df = pd.DataFrame(columns=['Title','Intro','Wikipedia Url', 'Similarity'])

    for sim in heapq.nlargest(5, sim_years.items(), key = lambda i: i[1]):
        i = sim[0]  # document_id
        file = open('webpages/tsv/output_%d.tsv' %i).read().split('\n\n')[1].split('\t')
        title, intro, link = file[3].encode('utf8').decode("unicode_escape"), file[1].encode('utf8').decode("unicode_escape"), urls[str(i+1)]
        new_row = {'Title':title, 'Intro': intro, 'Wikipedia Url': link, 'Similarity': sim[1]}
        df = df.append(new_row, ignore_index=True)

    # Visualization of the top 5 documents related to the query
    d = dict(selector="th", props=[('text-align', 'center')])
    df1 = df.sort_values(by=['Similarity'], ascending = False)
    df1.style.format({'Wikipedia Url': utils.make_clickable}).hide_index().set_table_styles([d]).set_properties(**{'text-align': 'center'}).set_properties(subset=['Title'], **{'width': '130px'})
    
    # Bonus: CO-STARDOM NETWORK
    movies = [movie[0] for movie in heapq.nlargest(10, sim_years.items(), key = lambda i: i[1])]
    G = utils.add_nodes(movies)
    G = utils.add_edges(G)
    network = utils.draw_graph(G)
    return df, network
def main():
    from pascal.pascal_helpers import load_pascal
    from datasets.pascal import PascalSegmentation
    from utils import add_edges
    from scipy.misc import imsave
    from skimage.segmentation import mark_boundaries

    ds = PascalSegmentation()
    data = load_pascal("train1")

    data = add_edges(data, independent=False)
    # X, Y, image_names, images, all_superpixels = load_data(
    # "train", independent=False)
    for x, name, sps in zip(data.X, data.file_names, data.superpixels):
        segments = get_km_segments(x, ds.get_image(name), sps, n_segments=25)
        boundary_image = mark_boundaries(mark_boundaries(ds.get_image(name), sps), segments[sps], color=[1, 0, 0])
        imsave("hierarchy_sp_own_25/%s.png" % name, boundary_image)
Ejemplo n.º 3
0
def main():
    from pascal.pascal_helpers import load_pascal
    from datasets.pascal import PascalSegmentation
    from utils import add_edges
    from scipy.misc import imsave
    from skimage.segmentation import mark_boundaries

    ds = PascalSegmentation()
    data = load_pascal("train1")

    data = add_edges(data, independent=False)
    #X, Y, image_names, images, all_superpixels = load_data(
    #"train", independent=False)
    for x, name, sps in zip(data.X, data.file_names, data.superpixels):
        segments = get_km_segments(x, ds.get_image(name), sps, n_segments=25)
        boundary_image = mark_boundaries(mark_boundaries(
            ds.get_image(name), sps),
                                         segments[sps],
                                         color=[1, 0, 0])
        imsave("hierarchy_sp_own_25/%s.png" % name, boundary_image)
Ejemplo n.º 4
0
                                                 patent_classification)

delta = 10
train_percentage = 0.8

my_range = utils.Range(delta, train_percentage, patents['date'].min(),
                       patents['date'].max())
my_range.print()
G = igraph.Graph(directed=True)

global_assigned_patents = dict()

while my_range.range_end <= my_range.max_date:
    range_patents, range_train_patents, range_test_patents, range_uspatentcitations = utils.find_range_dataframes(
        my_range, patents, uspatentcitations)
    G = utils.add_edges(G, range_uspatentcitations)
    print("finding components")
    connected_components = leidenalg.find_partition(
        G, leidenalg.ModularityVertexPartition)
    subgraphs = connected_components.subgraphs()
    num_subgraphs = len(subgraphs)
    range_assigned_patents = utils.igraph_classify_train_test_graph(
        subgraphs, num_subgraphs, range_patents, range_train_patents,
        range_test_patents)
    for key in range_assigned_patents.keys():
        global_assigned_patents[key] = range_assigned_patents[key]

    my_range.increase_proportionally(delta, train_percentage)
    my_range.print()
    G = igraph.Graph(directed=True)  # restore the initial graph
Ejemplo n.º 5
0
import itertools as it

import networkx as nx
import pandas as pd

import utils as u

# Create empty graph
graph = nx.Graph()

# Load edge and node lists
edgelist = pd.read_csv('edgelist_wmata.csv')
nodelist = pd.read_csv('nodelist_wmata.csv')

# Add edges, nodes, and their attributes
graph = u.add_edges(graph, edgelist)
graph = u.add_nodes(graph, nodelist)

# Find nodes of odd degree and odd node pairs
odd_degree_nodes = u.find_odd_degree_nodes(graph)
odd_node_pairs = list(it.combinations(odd_degree_nodes, 2))

# Compute shortest distance between each pair of nodes in graph
distances = u.find_shortest_distances(odd_node_pairs, graph)

# Create complete graph
graph_complete = u.build_complete_graph(distances)

# Compute minimum weight matching, removing duplicates
matches = u.compute_min_weight_matches(graph_complete)
Ejemplo n.º 6
0
from graph import Graph
import utils

if __name__ == "__main__":
    v, adjacency_matrix = utils.read_from_file("matrix")
    edges = utils.get_edges(adjacency_matrix)
    g = Graph(v)
    utils.add_edges(g, edges)
    g.dfs(2)
    print("-----------------------------")
    g1 = Graph(v)
    utils.add_edges(g1, edges)
    g1.bfs(4)
    print("-----------------------------")
def crazy_visual():
    dataset = NYUSegmentation()
    # load training data
    data = load_nyu(n_sp=500)
    data = add_edges(data)

    for x, image_name, superpixels, y in zip(data.X, data.file_names,
                                             data.superpixels, data.Y):
        print(image_name)
        if int(image_name) != 11:
            continue
        image = dataset.get_image(image_name)
        plt.figure(figsize=(20, 20))
        bounary_image = mark_boundaries(image, superpixels)
        plt.imshow(bounary_image)
        gridx, gridy = np.mgrid[:superpixels.shape[0], :superpixels.shape[1]]

        edges = x[1]
        points_normals = dataset.get_pointcloud_normals(image_name)
        centers2d = get_superpixel_centers(superpixels)
        centers3d = [
            np.bincount(superpixels.ravel(), weights=c.ravel())
            for c in points_normals[:, :, :3].reshape(-1, 3).T
        ]
        centers3d = (np.vstack(centers3d) / np.bincount(superpixels.ravel())).T
        sp_normals = get_sp_normals(points_normals[:, :, 3:], superpixels)
        offset = centers3d[edges[:, 0]] - centers3d[edges[:, 1]]
        offset = offset / np.sqrt(np.sum(offset**2, axis=1))[:, np.newaxis]
        #mean_normal = (sp_normals[edges[:, 0]] + sp_normals[edges[:, 1]]) / 2.
        mean_normal = sp_normals[edges[:, 0]]
        #edge_features = np.arccos(np.abs((offset * mean_normal).sum(axis=1))) * 2. / np.pi
        edge_features = 1 - np.abs((offset * mean_normal).sum(axis=1))
        no_normals = (np.all(sp_normals[edges[:, 0]] == 0, axis=1) +
                      np.all(sp_normals[edges[:, 1]] == 0, axis=1))
        edge_features[no_normals] = 0  # nan normals

        if True:
            coords = points_normals[:, :, :3].reshape(-1, 3)
            perm = np.random.permutation(superpixels.max() + 1)
            mv.points3d(coords[:, 0],
                        coords[:, 1],
                        coords[:, 2],
                        perm[superpixels.ravel()],
                        mode='point')
            #mv.points3d(centers3d[:, 0], centers3d[:, 1], centers3d[:, 2], scale_factor=.04)
            mv.quiver3d(centers3d[:, 0], centers3d[:, 1], centers3d[:, 2],
                        sp_normals[:, 0], sp_normals[:, 1], sp_normals[:, 2])
            mv.show()
        from IPython.core.debugger import Tracer
        Tracer()()

        for i, edge in enumerate(edges):
            e0, e1 = edge
            #color = (dataset.colors[y[e0]] + dataset.colors[y[e1]]) / (2. * 255.)
            #f = edge_features[i]
            #if f < 0:
            #e0, e1 = e1, e0
            #f = -f

            #plt.arrow(centers[e0][0], centers[e0][1],
            #centers[e1][0] - centers[e0][0], centers[e1][1] - centers[e0][1],
            #width=f * 5
            #)
            color = "black"
            plt.plot([centers2d[e0][0], centers2d[e1][0]],
                     [centers2d[e0][1], centers2d[e1][1]],
                     c=color,
                     linewidth=edge_features[i] * 5)
        plt.scatter(centers2d[:, 0], centers2d[:, 1], s=100)
        plt.tight_layout()
        plt.xlim(0, superpixels.shape[1])
        plt.ylim(superpixels.shape[0], 0)
        plt.axis("off")
        plt.savefig("figures/normal_relative/%s.png" % image_name,
                    bbox_inches="tight")
        plt.close()
Ejemplo n.º 8
0
def main():
    argv = sys.argv
    print("loading %s ..." % argv[1])
    ssvm = SaveLogger(file_name=argv[1]).load()
    if hasattr(ssvm, 'problem'):
        ssvm.model = ssvm.problem
    print(ssvm)
    if hasattr(ssvm, 'base_ssvm'):
        ssvm = ssvm.base_ssvm
    print("Iterations: %d" % len(ssvm.objective_curve_))
    print("Objective: %f" % ssvm.objective_curve_[-1])
    inference_run = None
    if hasattr(ssvm, 'cached_constraint_'):
        inference_run = ~np.array(ssvm.cached_constraint_)
        print("Gap: %f" %
              (np.array(ssvm.primal_objective_curve_)[inference_run][-1] -
               ssvm.objective_curve_[-1]))

    if len(argv) <= 2:
        argv.append("acc")

    if len(argv) <= 3:
        dataset = 'nyu'
    else:
        dataset = argv[3]

    if argv[2] == 'acc':

        ssvm.n_jobs = 1

        for data_str, title in zip(["train", "val"],
                                   ["TRAINING SET", "VALIDATION SET"]):
            print(title)
            edge_type = "pairwise"

            if dataset == 'msrc':
                ds = MSRC21Dataset()
                data = msrc_helpers.load_data(data_str, which="piecewise_new")
                #data = add_kraehenbuehl_features(data, which="train_30px")
                data = msrc_helpers.add_kraehenbuehl_features(data, which="train")
            elif dataset == 'pascal':
                ds = PascalSegmentation()
                data = pascal_helpers.load_pascal(data_str, sp_type="cpmc")
                #data = pascal_helpers.load_pascal(data_str)
            elif dataset == 'nyu':
                ds = NYUSegmentation()
                data = nyu_helpers.load_nyu(data_str, n_sp=500, sp='rgbd')
            else:
                raise ValueError("Excepted dataset to be 'nyu', 'pascal' or 'msrc',"
                                 " got %s." % dataset)

            if type(ssvm.model).__name__ == "LatentNodeCRF":
                print("making data hierarchical")
                data = pascal_helpers.make_cpmc_hierarchy(ds, data)
                #data = make_hierarchical_data(
                    #ds, data, lateral=True, latent=True, latent_lateral=False,
                    #add_edge_features=False)
            else:
                data = add_edges(data, edge_type)

            if type(ssvm.model).__name__ == 'EdgeFeatureGraphCRF':
                data = add_edge_features(ds, data, depth_diff=True, normal_angles=True)

            if type(ssvm.model).__name__ == "EdgeFeatureLatentNodeCRF":
                data = add_edge_features(ds, data)
                data = make_hierarchical_data(
                    ds, data, lateral=True, latent=True, latent_lateral=False,
                    add_edge_features=True)
            #ssvm.model.inference_method = "qpbo"
            Y_pred = ssvm.predict(data.X)

            if isinstance(ssvm.model, LatentNodeCRF):
                Y_pred = [ssvm.model.label_from_latent(h) for h in Y_pred]
            Y_flat = np.hstack(data.Y)

            print("superpixel accuracy: %.2f"
                  % (np.mean((np.hstack(Y_pred) == Y_flat)[Y_flat != ds.void_label]) * 100))

            if dataset == 'msrc':
                res = msrc_helpers.eval_on_pixels(data, Y_pred,
                                                  print_results=True)
                print("global: %.2f, average: %.2f" % (res['global'] * 100,
                                                       res['average'] * 100))
                #msrc_helpers.plot_confusion_matrix(res['confusion'])
            else:
                hamming, jaccard = eval_on_sp(ds, data, Y_pred,
                                              print_results=True)
                print("Jaccard: %.2f, Hamming: %.2f" % (jaccard.mean(),
                                                        hamming.mean()))

        plt.show()

    elif argv[2] == 'plot':
        data_str = 'val'
        if len(argv) <= 4:
            raise ValueError("Need a folder name for plotting.")
        if dataset == "msrc":
            ds = MSRC21Dataset()
            data = msrc_helpers.load_data(data_str, which="piecewise")
            data = add_edges(data, independent=False)
            data = msrc_helpers.add_kraehenbuehl_features(
                data, which="train_30px")
            data = msrc_helpers.add_kraehenbuehl_features(
                data, which="train")

        elif dataset == "pascal":
            ds = PascalSegmentation()
            data = pascal_helpers.load_pascal("val")
            data = add_edges(data)

        elif dataset == "nyu":
            ds = NYUSegmentation()
            data = nyu_helpers.load_nyu("test")
            data = add_edges(data)

        if type(ssvm.model).__name__ == 'EdgeFeatureGraphCRF':
            data = add_edge_features(ds, data, depth_diff=True, normal_angles=True)
        Y_pred = ssvm.predict(data.X)

        plot_results(ds, data, Y_pred, argv[4])
import igraph
import utils
import leidenalg
import pandas as pd

patents = utils.read_patents()
patent_classification = utils.read_patent_classification()
uspatentcitations = utils.read_uspatentcitation()
patents = utils.merge_patents_and_classification(patents, patent_classification)
patents = patents.set_index("id")

print("read graph")
g = igraph.Graph()
g = utils.add_edges(g, uspatentcitations)
# g = g.as_undirected()

print("read components")
connected_components = leidenalg.find_partition(g, leidenalg.ModularityVertexPartition)

print("find subgraphs")
subgraphs = connected_components.subgraphs()
num_subgraphs = len(subgraphs)

print("start to classify")
range_assigned_patents = utils.igraph_classify_whole_graph(
    subgraphs, num_subgraphs, patents
)

# data frame test
forecasted_patents = pd.DataFrame.from_dict(range_assigned_patents, orient='index', columns=['number', 'section_id', 'forecast_section_id'])
utils.write_to_csv(forecasted_patents, 'forecasted_patents')
Ejemplo n.º 10
0
def main():
    argv = sys.argv
    print("loading %s ..." % argv[1])
    ssvm1 = SaveLogger(file_name=argv[1]).load()
    ssvm2 = SaveLogger(file_name=argv[2]).load()

    data_str = 'val'
    if len(argv) <= 3:
        raise ValueError("Need a folder name for plotting.")
    print("loading data...")
    data = load_nyu(data_str, n_sp=500)
    dataset = NYUSegmentation()
    print("done")
    data1 = add_edges(data, kind="pairwise")
    data2 = add_edges(data, kind="pairwise")
    data1 = add_edge_features(dataset, data1)
    data2 = add_edge_features(dataset, data2, depth_diff=True)
    Y_pred1 = ssvm1.predict(data1.X)
    Y_pred2 = ssvm2.predict(data2.X)
    folder = argv[3]

    if not os.path.exists(folder):
        os.mkdir(folder)

    np.random.seed(0)
    for image_name, superpixels, y_pred1, y_pred2 in zip(data.file_names,
                                                         data.superpixels,
                                                         Y_pred1, Y_pred2):
        if np.all(y_pred1 == y_pred2):
            continue
        gt = dataset.get_ground_truth(image_name)
        perf1 = eval_on_pixels(dataset, [gt], [y_pred1[superpixels]],
                              print_results=False)[0]
        perf1 = np.mean(perf1[np.isfinite(perf1)])

        perf2 = eval_on_pixels(dataset, [gt], [y_pred2[superpixels]],
                              print_results=False)[0]
        perf2 = np.mean(perf2[np.isfinite(perf2)])
        if np.abs(perf1 - perf2) < 2:
            continue

        image = dataset.get_image(image_name)
        fig, axes = plt.subplots(2, 3, figsize=(12, 6))
        axes[0, 0].imshow(image)
        axes[0, 0].imshow((y_pred1 != y_pred2)[superpixels], vmin=0, vmax=1,
                          alpha=.7)

        axes[0, 1].set_title("ground truth")
        axes[0, 1].imshow(image)
        axes[0, 1].imshow(gt, alpha=.7, cmap=dataset.cmap, vmin=0,
                          vmax=dataset.cmap.N)
        axes[1, 0].set_title("%.2f" % perf1)
        axes[1, 0].imshow(image)
        axes[1, 0].imshow(y_pred1[superpixels], vmin=0, vmax=dataset.cmap.N,
                          alpha=.7, cmap=dataset.cmap)

        axes[1, 1].set_title("%.2f" % perf2)
        axes[1, 1].imshow(image)
        axes[1, 1].imshow(y_pred2[superpixels], alpha=.7, cmap=dataset.cmap,
                          vmin=0, vmax=dataset.cmap.N)
        present_y = np.unique(np.hstack([y_pred1, y_pred2, np.unique(gt)]))
        present_y = np.array([y_ for y_ in present_y if y_ !=
                              dataset.void_label])
        axes[0, 2].imshow(present_y[:, np.newaxis], interpolation='nearest',
                          cmap=dataset.cmap, vmin=0, vmax=dataset.cmap.N)
        for i, c in enumerate(present_y):
            axes[0, 2].text(1, i, dataset.classes[c])
        for ax in axes.ravel():
            ax.set_xticks(())
            ax.set_yticks(())
        axes[1, 2].set_visible(False)
        fig.savefig(folder + "/%s.png" % image_name, bbox_inches="tight")
        plt.close(fig)
def crazy_visual():
    dataset = NYUSegmentation()
    # load training data
    data = load_nyu(n_sp=500)
    data = add_edges(data)

    for x, image_name, superpixels, y in zip(data.X, data.file_names,
                                             data.superpixels, data.Y):
        print(image_name)
        if int(image_name) != 11:
            continue
        image = dataset.get_image(image_name)
        plt.figure(figsize=(20, 20))
        bounary_image = mark_boundaries(image, superpixels)
        plt.imshow(bounary_image)
        gridx, gridy = np.mgrid[:superpixels.shape[0], :superpixels.shape[1]]

        edges = x[1]
        points_normals = dataset.get_pointcloud_normals(image_name)
        centers2d = get_superpixel_centers(superpixels)
        centers3d = [np.bincount(superpixels.ravel(), weights=c.ravel())
                   for c in points_normals[:, :, :3].reshape(-1, 3).T]
        centers3d = (np.vstack(centers3d) / np.bincount(superpixels.ravel())).T
        sp_normals = get_sp_normals(points_normals[:, :, 3:], superpixels)
        offset = centers3d[edges[:, 0]] - centers3d[edges[:, 1]]
        offset = offset / np.sqrt(np.sum(offset ** 2, axis=1))[:, np.newaxis]
        #mean_normal = (sp_normals[edges[:, 0]] + sp_normals[edges[:, 1]]) / 2.
        mean_normal = sp_normals[edges[:, 0]]
        #edge_features = np.arccos(np.abs((offset * mean_normal).sum(axis=1))) * 2. / np.pi
        edge_features = 1 - np.abs((offset * mean_normal).sum(axis=1))
        no_normals = (np.all(sp_normals[edges[:, 0]] == 0, axis=1)
                      + np.all(sp_normals[edges[:, 1]] == 0, axis=1))
        edge_features[no_normals] = 0  # nan normals

        if True:
            coords = points_normals[:, :, :3].reshape(-1, 3)
            perm = np.random.permutation(superpixels.max()+1)
            mv.points3d(coords[:,0], coords[:, 1], coords[:, 2], perm[superpixels.ravel()], mode='point')
            #mv.points3d(centers3d[:, 0], centers3d[:, 1], centers3d[:, 2], scale_factor=.04)
            mv.quiver3d(centers3d[:, 0], centers3d[:, 1], centers3d[:, 2], sp_normals[:, 0], sp_normals[:, 1], sp_normals[:, 2])
            mv.show()
        from IPython.core.debugger import Tracer
        Tracer()()


        for i, edge in enumerate(edges):
            e0, e1 = edge
            #color = (dataset.colors[y[e0]] + dataset.colors[y[e1]]) / (2. * 255.)
            #f = edge_features[i]
            #if f < 0:
                #e0, e1 = e1, e0
                #f = -f

            #plt.arrow(centers[e0][0], centers[e0][1],
                     #centers[e1][0] - centers[e0][0], centers[e1][1] - centers[e0][1],
                     #width=f * 5
                     #)
            color = "black"
            plt.plot([centers2d[e0][0], centers2d[e1][0]],
                     [centers2d[e0][1], centers2d[e1][1]],
                     c=color,
                     linewidth=edge_features[i] * 5
                     )
        plt.scatter(centers2d[:, 0], centers2d[:, 1], s=100)
        plt.tight_layout()
        plt.xlim(0, superpixels.shape[1])
        plt.ylim(superpixels.shape[0], 0)
        plt.axis("off")
        plt.savefig("figures/normal_relative/%s.png" % image_name,
                    bbox_inches="tight")
        plt.close()