コード例 #1
0
def main():
	if len(sys.argv) < 2:
		print('Expected arguments are not provided.')
		return
	genre = sys.argv[1]
	no_of_components = 4
	genome_tags = util.read_genome_tags()
	#print genome_tags

	tf_idf_matrix = util.get_tf_idf_matrix(genre)
	tagid_list = list(tf_idf_matrix.columns.values)
	tag_list = genome_tags[genome_tags['tagId'].isin(tagid_list)]['tag'].tolist()
	#print tag_list
	
        svd = TruncatedSVD(n_components=no_of_components, n_iter=100, random_state=None)
	svd.fit(tf_idf_matrix)

	concepts = []
	for i in range(no_of_components):
		concept = []
		for j, component in enumerate(svd.components_[i]):
			concept.append((tag_list[j], component))
		concept.sort(key=lambda tup: abs(tup[1]), reverse=True)
		concepts.append(concept)
	util.print_output(genre, concepts)
	util.write_output_file(genre, concepts, output_file)
コード例 #2
0
def main():
	if len(sys.argv) < 2:
		print('Expected arguments are not provided.')
		return
	genre = sys.argv[1]
	no_of_components = 4
	imdb_actor_info = util.read_imdb_actor_info()
	#print imdb_actor_info

	tf_idf_matrix = util.get_tf_idf_matrix(genre)
	actor_list = list(tf_idf_matrix.columns.values)
	actor_list = imdb_actor_info[imdb_actor_info['id'].isin(actor_list)]['name'].tolist()
	#print actor_list
	
	pca = PCA(n_components=no_of_components)
	pca.fit(tf_idf_matrix)

	concepts = []
	for i in range(no_of_components):
		concept = []
		for j, component in enumerate(pca.components_[i]):
			concept.append((actor_list[j], component))
		concept.sort(key=lambda tup: abs(tup[1]), reverse=True)
		concepts.append(concept)
	util.print_output(genre, concepts)
	util.write_output_file(genre, concepts, output_file)
コード例 #3
0
def main():
	if len(sys.argv) < 2:
		print('Expected arguments are not provided.')
		return
	movieid = int(sys.argv[1])
	mlmovies = util.read_mlmovies()
	movie_actors = util.read_movie_actor()
	imdb_actor_info = util.read_imdb_actor_info()

	input_movie = mlmovies[mlmovies['movieid'] == movieid]['moviename'].values[0]
	actors_of_movie = movie_actors.where(movie_actors['movieid']==movieid).dropna().loc[:,'actorid'].unique()
	#print (actors_of_movie)

	movie_matrix = util.get_movie_tf_idf_matrix()
	actor_matrix = util.get_actor_tf_idf_matrix()
	#print(actor_matrix.shape)
	input_movie_vector = pd.DataFrame(movie_matrix.loc[movieid])#.transpose()
	#print(input_movie_vector.shape)
	similarity_matrix = actor_matrix.dot(input_movie_vector)
	similarity_matrix = similarity_matrix[~similarity_matrix.index.isin(actors_of_movie)]
	#print(similarity_matrix)

	actors = []
	for index, row in similarity_matrix.iterrows():
		actor_name = imdb_actor_info[imdb_actor_info['id'] == index]['name'].values[0]
		actors.append((index, actor_name, similarity_matrix.loc[index][movieid]))
	actors.sort(key=lambda tup: tup[2], reverse=True)
	#print (actors)
	
	util.print_output(movieid, input_movie, actors[:no_of_actors])
	util.write_output_file(movieid, input_movie, actors[:no_of_actors], output_file)
コード例 #4
0
def main():
    if len(sys.argv) < 2:
        print('Expected arguments are not provided.')
        return
    actorid = int(sys.argv[1])
    imdb_actor_info = util.read_imdb_actor_info()
    input_actor = imdb_actor_info[imdb_actor_info['id'] ==
                                  actorid]['name'].values[0]

    tf_idf_matrix = util.get_tf_idf_matrix()
    #print (tf_idf_matrix)
    input_actor_tf_idf = tf_idf_matrix.loc[actorid]
    #print (input_actor_tf_idf)

    actors = []
    for index, row in tf_idf_matrix.iterrows():
        actor_name = imdb_actor_info[imdb_actor_info['id'] ==
                                     index]['name'].values[0]
        actors.append((index, actor_name, 1 - cosine(row, input_actor_tf_idf)))
    other_actors = list(filter(lambda tup: tup[0] != actorid, actors))
    other_actors.sort(key=lambda tup: tup[2], reverse=True)

    util.print_output(actorid, input_actor, other_actors[:no_of_actors])
    util.write_output_file(actorid, input_actor, other_actors[:no_of_actors],
                           output_file)
コード例 #5
0
def main():
	if len(sys.argv) < 2:
		print('Expected arguments are not provided.')
		return
	actorid = int(sys.argv[1])
	imdb_actor_info = util.read_imdb_actor_info()
	input_actor_name = imdb_actor_info[imdb_actor_info['id'] == actorid]['name'].values[0]

	tf_idf_matrix = util.get_tf_idf_matrix()
	#print(tf_idf_matrix)
	actor_tf_idf = tf_idf_matrix.loc[actorid]
	#print(actor_tf_idf)

	svd = SVD(n_components=no_of_components)
	svd.fit(tf_idf_matrix)
	svd_df = pd.DataFrame(svd.transform(tf_idf_matrix), index=tf_idf_matrix.index)

	input_actor_row = svd_df.loc[actorid]

	actors = []
	for index, row in svd_df.iterrows():
		name = imdb_actor_info[imdb_actor_info['id'] == index]['name'].values[0]
		actors.append((index, name, 1 - cosine(row, input_actor_row)))
	other_actors = list(filter(lambda tup: tup[0] != actorid, actors))
	other_actors.sort(key=lambda tup: tup[2], reverse=True)
	util.print_output(actorid, input_actor_name, other_actors[:no_of_actors])
	util.write_output_file(actorid, input_actor_name, other_actors[:no_of_actors], output_file)
コード例 #6
0
def latent_rating_semantics(rating_matrix):
    mlratings = util.read_mlratings()
    ratings_list = mlratings.rating.unique()
    concepts = []
    for i in range(no_of_components):
        concept = []
        for j, component in enumerate(np.transpose(rating_matrix)[i]):
            concept.append((ratings_list[j], component))
        concept.sort(key=lambda tup: abs(tup[1]), reverse=True)
        concepts.append(concept)
    util.print_output(concepts, 'Rating')
    util.write_output_file(concepts, output_file, 'Rating')
コード例 #7
0
def latent_year_semantics(year_matrix):
    mlmovies = util.read_mlmovies()
    year_list = mlmovies.year.unique()
    concepts = []
    for i in range(no_of_components):
        concept = []
        for j, component in enumerate(np.transpose(year_matrix)[i]):
            concept.append((year_list[j], component))
        concept.sort(key=lambda tup: abs(tup[1]), reverse=True)
        concepts.append(concept)
    util.print_output(concepts, 'Year')
    util.write_output_file(concepts, output_file, 'Year')
コード例 #8
0
def alice(filename):
    socket = util.ClientSocket()

    with open(filename) as json_file:
        json_circuits = json.load(json_file)
    print()

    for json_circuit in json_circuits['circuits']:
        circuit = yao.Circuit()
        #Parse and fill circuit
        circuit.parseJson(json_circuit)
        print("======= "+ circuit.Name + " =======")

        #Create random p values
        p_values = {}
        for wire in circuit.Wires:
            p_values[wire] = random.randint(0,1)

        #Generate keys for each wire
        keys = {}
        for wire in circuit.Wires:
            keys[wire] = (Fernet.generate_key(), Fernet.generate_key())

        #Create table
        create_garble_tables(circuit, p_values, keys)

        #Generate value and key for all possible bob values for circuit
        all_bob_values = ot.generate_all_bob_values(circuit.Bob, p_values, keys)

        #Send all combinations to Bob
        for Alice_values in util.create_all_combination(len(circuit.Alice)):
            Alice_pvalues = list(map(lambda x, y: x ^ p_values[y],
                                                Alice_values, circuit.Alice))
            Alice_pvalues = list(map(lambda x, y: (x, keys[y][x]),
                                                Alice_pvalues, circuit.Alice))
            output_pvalues = list(map(lambda x: p_values[x], circuit.Outputs))

            socket.send_wait((circuit, Alice_pvalues, output_pvalues))

            #Check if bob values exist
            if (not len(circuit.Bob)):
                #Get output from bob
                output = socket.send_wait("Get output")
                util.print_output(circuit, Alice_values, [], output)
            else:
                for Bob_values in util.create_all_combination(len(circuit.Bob)):
                    #send bob p value via OT
                    ot.send_bob_values(circuit.Bob, all_bob_values, socket)
                    #Get output from bob
                    output = socket.send_wait("Get output")
                    util.print_output(circuit, Alice_values, Bob_values, output)
        print()
コード例 #9
0
def latent_movie_semantics(movie_matrix):
    mlmovies = util.read_mlmovies()
    movies_list = mlmovies.movieid.unique()
    movies_list = mlmovies[mlmovies['movieid'].isin(
        movies_list)]['moviename'].tolist()
    concepts = []
    for i in range(no_of_components):
        concept = []
        for j, component in enumerate(np.transpose(movie_matrix)[i]):
            concept.append((movies_list[j], component))
        concept.sort(key=lambda tup: abs(tup[1]), reverse=True)
        concepts.append(concept)
    util.print_output(concepts, 'Movie')
    util.write_output_file(concepts, output_file, 'Movie')
コード例 #10
0
def latent_actor_semantics(actor_matrix):
    imdb_actor_info = util.read_imdb_actor_info()
    actor_list = imdb_actor_info.id.unique()
    actor_list = imdb_actor_info[imdb_actor_info['id'].isin(
        actor_list)]['name'].tolist()
    concepts = []
    for i in range(no_of_components):
        concept = []
        for j, component in enumerate(np.transpose(actor_matrix)[i]):
            concept.append((actor_list[j], component))
        concept.sort(key=lambda tup: abs(tup[1]), reverse=True)
        concepts.append(concept)
    util.print_output(concepts, 'Actor')
    util.write_output_file(concepts, output_file, 'Actor')
コード例 #11
0
def local_test(filename):
    with open(filename) as json_file:
        json_circuits = json.load(json_file)
    print()

    for json_circuit in json_circuits['circuits']:
        circuit = yao.Circuit()
        circuit.parseJson(json_circuit)

        print("======= "+ circuit.Name + " =======")

        #Create random p values
        p_values = {}
        for wire in circuit.Wires:
            p_values[wire] = random.randint(0,1)

        #Generate keys for each wire
        keys = {}
        for wire in circuit.Wires:
            keys[wire] = (Fernet.generate_key(), Fernet.generate_key())

        #Create table
        create_garble_tables(circuit, p_values, keys)

        #Try evaluate
        for Alice_values in util.create_all_combination(len(circuit.Alice)):
            Alice_pval = list(map(lambda x, y: x ^ p_values[y],
                                                   Alice_values, circuit.Alice))
            Alice_pval = list(map(lambda x, y: (x, keys[y][x]),
                                                   Alice_pval, circuit.Alice))
            output_pval = list(map(lambda x: p_values[x], circuit.Outputs))

            if (not len(circuit.Bob)):
                outputs = evaluate(Alice_pval, [], circuit, output_pval)
                util.print_output(circuit, Alice_values, [], outputs)

            for Bob_values in util.create_all_combination(len(circuit.Bob)):
                Bob_pval = list(map(lambda x, y: x ^ p_values[y],
                                                      Bob_values, circuit.Bob))
                Bob_pval = list(map(lambda x, y: (x, keys[y][x]),
                                                      Bob_pval, circuit.Bob))
                outputs = evaluate(Alice_pval, Bob_pval, circuit, output_pval)

                # Write output
                util.print_output(circuit, Alice_values, Bob_values, outputs)

        print()
コード例 #12
0
def latent_tag_semantics(tag_matrix):
    mltags = util.read_mltags()
    genome_tags = util.read_genome_tags()
    mltags = pd.merge(mltags,
                      genome_tags,
                      left_on='tagid',
                      right_on='tagId',
                      how='inner')
    tags_list = mltags.tagid.unique()
    tags_list = mltags[mltags['tagid'].isin(tags_list)]['tag'].tolist()
    concepts = []
    for i in range(no_of_components):
        concept = []
        for j, component in enumerate(np.transpose(tag_matrix)[i]):
            concept.append((tags_list[j], component))
        concept.sort(key=lambda tup: abs(tup[1]), reverse=True)
        concepts.append(concept)
    util.print_output(concepts, 'Tag')
    util.write_output_file(concepts, output_file, 'Tag')
コード例 #13
0
ファイル: manager.py プロジェクト: mcmartins/broccoli
def monitor(thread_util, sub_tasks):
    unique_id = util.short_unique_id()
    exit_event = multiprocessing.Event()
    waiting_tasks = []
    logging.debug('Manager - Starting ID [%s].', str(unique_id))
    while not exit_event.is_set() or not thread_util.is_kill_event_set():
        non_finished_tasks = []
        if sub_tasks:
            for (sub_task, process) in sub_tasks:
                return_code = process.poll()
                if return_code is not None:
                    # process finished
                    thread_util.remove_process(sub_task.get_parent().id, process.pid)
                    (std_out, std_err) = process.communicate()
                    if return_code == 0:
                        # sub_task finished successfully
                        logging.info('Manager - FINISHED - Task [%s], SubTask [%s].', str(sub_task.get_parent().name),
                                     str(sub_task.id))
                        util.print_output(std_err, std_out)
                        if sub_task.get_parent().wait:
                            # should we wait for others to finish?
                            logging.info('Manager - Waiting for other Tasks to finish.')
                            waiting_tasks.append((sub_task, process))
                            continue
                        else:
                            # good to go
                            if sub_task.get_parent().has_children():
                                logging.info('Manager - No need to wait for other processes to finish.')
                                for (s, p) in sub_tasks:
                                    util.kill_process(p.pid)
                                    thread_util.remove_process(s.get_parent().id, p.pid)
                                logging.info('Manager - Task has Children. Sending Tasks to Processing Queue.')
                                for task in sub_task.get_parent().get_children():
                                    thread_util.add_task(task)
                                exit_event.set()
                                break
                            else:
                                util.print_task_tree(sub_task.get_parent())
                                logging.info('Manager - Job Finished with success.')
                                exit_event.set()
                                thread_util.kill(0)
                                exit(0)
                    else:
                        # failed tasks goes here
                        logging.info('Manager - FINISHED - Task Failure [%s], SubTask [%s].',
                                     str(sub_task.get_parent().name), str(sub_task.id))
                        util.print_output(std_err, std_out)
                        if sub_task.get_parent().fail_tolerant:
                            logging.info('Manager - The Task is Fail Tolerant.')
                            if thread_util.has_running_processes(sub_task.get_parent().id):
                                # should we wait for others to finish?
                                logging.info('Manager - Waiting for other Tasks to finish.')
                                continue
                            else:
                                # good to go
                                if sub_task.get_parent().has_children():
                                    logging.info('Manager - No need to wait for other processes to finish.')
                                    for (s, p) in sub_tasks:
                                        util.kill_process(p.pid)
                                        thread_util.remove_process(s.get_parent().id, p.pid)
                                    logging.info('Manager - Task has Children. Sending Tasks to Processing Queue.')
                                    for task in sub_task.get_parent().get_children():
                                        thread_util.add_task(task)
                                    exit_event.set()
                                else:
                                    util.print_task_tree(sub_task.get_parent())
                                    logging.info(
                                        "Manager - Job Finished with success, but Fail Tolerance has been applied.")
                                    exit_event.set()
                                    thread_util.kill(0)
                                    exit(0)
                        elif sub_task.get_parent().wait:
                            # hum this task failed and it seems to be waiting for
                            # the output of another at the same level, the most probable scenario
                            # is that it won't work from here on. Better to kill the Job now.
                            logging.info('Manager - Job Finished with errors.')
                            exit_event.set()
                            thread_util.kill(1)
                            exit(1)
                        else:
                            # hum we cannot proceed to the children tasks because this one failed
                            # lets see if the Job has still tasks running
                            if thread_util.has_running_processes(sub_task.get_parent().id):
                                # OK fine, there are still other tasks at the same level running
                                continue
                            else:
                                # seems we were waiting for this one to complete
                                # better to kill this now
                                logging.info('Manager - Job Finished with errors.')
                                exit_event.set()
                                thread_util.kill(2)
                                exit(2)
                else:
                    non_finished_tasks.append((sub_task, process))
        else:
            # are there tasks waiting for others to finish?
            if waiting_tasks:
                for (sub_task, process) in waiting_tasks:
                    if sub_task.get_parent().has_children():
                        logging.info('Manager - Task has Children. Sending Tasks to self.manager.')
                        thread_util.add_task(sub_task.get_parent().get_children())
                        exit_event.set()
                        break
                    else:
                        util.print_task_tree(sub_task.get_parent())
                        logging.info('Manager - Job Finished with success.')
                        exit_event.set()
                        thread_util.kill(0)
                        exit(0)
            else:
                exit_event.set()
                break
        # remove elements that were already processed
        sub_tasks = non_finished_tasks

    logging.debug('Manager - Finished ID [%s].', str(unique_id))
    return
コード例 #14
0
def main(args):
    data = DataLoader(pca=args.PCA, norm=args.norm)

    train_captions, train_feature, train_url, train_len = data.get_Training_data(
        args.training)
    test_captions, test_feature, test_url, test_len = data.get_val_data(
        args.testing)
    f, c, _ = data.eval_data()

    writer = SummaryWriter()

    encoder = Encoder(input_size=train_feature.shape[1],
                      hidden_size=args.hidden_size) \
        .to(device)

    decoder = Decoder(embed_size=args.embed_size,
                      hidden_size=args.hidden_size, attention_dim=args.attention_size,
                      vocab_size=len(data.word_to_idx)) \
        .to(device)

    if args.load_weight:
        load_weights(encoder, args.model_path + "Jul28_10-04-57encoder")
        load_weights(decoder, args.model_path + "Jul28_10-04-57decoder")

    for epoch in range(args.num_epochs):
        params = list(decoder.parameters()) + list(encoder.parameters())
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(params=params, lr=args.learning_rate)

        # if epoch >= 100:
        training_loss = step(encoder=encoder,
                             decoder=decoder,
                             criterion=criterion,
                             data=(train_captions, train_feature, train_len),
                             optimizer=optimizer)
        # if epoch + 1 % 5 == 0:
        #     a = evaluate(encoder, decoder, train_feature[0:2], train_captions[0:2], 5, data.word_to_idx)
        #     print("bleu4 ", a)

        with torch.no_grad():
            test_loss = step(encoder=encoder,
                             decoder=decoder,
                             criterion=criterion,
                             data=(test_captions, test_feature, test_len))

        # if epoch > 1:
        b1, b2, b3, b4 = evaluate(encoder, decoder, f, c, 5, data.word_to_idx,
                                  data.idx_to_word)
        writer.add_scalars('BLEU', {
            'BLEU1': b1,
            'BLEU2': b2,
            'BLEU3': b3,
            'BLEU4': b4
        }, epoch + 1)
        if (epoch % 30) == 0:
            save_weights(encoder, args.model_path + "encoder" + str(epoch))
            save_weights(decoder, args.model_path + "decoder" + str(epoch))

        writer.add_scalars('loss', {
            'train': training_loss,
            'val': test_loss
        }, epoch + 1)

        print(
            'Epoch [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}, TestLoss: {:.4f}, TestPerplexity: {:5.4f}'
            .format(epoch + 1, args.num_epochs, training_loss,
                    np.exp(training_loss), test_loss, np.exp(test_loss)))

        args.learning_rate *= 0.995
        if args.save_weight:
            save_weights(encoder, args.model_path + "encoder" + str(epoch))
            save_weights(decoder, args.model_path + "decoder" + str(epoch))

    if args.save_weight:
        save_weights(encoder, args.model_path + "encoder")
        save_weights(decoder, args.model_path + "decoder")

    if args.predict:

        sample = Sample(encoder=encoder, decoder=decoder, device=device)

        train_mask = [
            random.randint(0, train_captions.shape[0] - 1)
            for _ in range(args.numOfpredection)
        ]
        test_mask = [
            random.randint(0, test_captions.shape[0] - 1)
            for _ in range(args.numOfpredection)
        ]

        train_featur = torch.from_numpy(train_feature[train_mask])
        train_featur = train_featur.to(device)
        train_encoder_out = encoder(train_featur)

        test_featur = torch.from_numpy(test_feature[test_mask])
        test_featur = test_featur.to(device)
        test_encoder_out = encoder(test_featur)

        train_output = []
        test_output = []

        for i in range(len(test_mask)):
            print(i)
            pre = sample.caption_image_beam_search(
                train_encoder_out[i].reshape(1, args.embed_size),
                data.word_to_idx, 2)
            train_output.append(pre)
            pre = sample.caption_image_beam_search(
                test_encoder_out[i].reshape(1, args.embed_size),
                data.word_to_idx, 50)
            test_output.append(pre)

        print_output(output=test_output,
                     sample=0,
                     gt=test_captions[test_mask],
                     img=test_url[test_mask],
                     title="val",
                     show_image=args.show_image,
                     idx_to_word=data.idx_to_word)

        print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX")
        print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX")
        print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX")
        print("")

        print_output(output=train_output,
                     sample=0,
                     gt=train_captions[train_mask],
                     img=train_url[train_mask],
                     title="traning",
                     show_image=args.show_image,
                     idx_to_word=data.idx_to_word)
コード例 #15
0
                   random_state=None)
svd.fit(cc_matrix)
normalizer = Normalizer(copy=False)
lsa = make_pipeline(svd, normalizer)

dataC = lsa.fit_transform(cc_matrix)
#print dataC

concepts = []
for i in range(no_of_components):
    concept = []
    for j, component in enumerate(svd.components_[i]):
        concept.append((actor_list[j], component))
    concept.sort(key=lambda tup: tup[1], reverse=True)
    concepts.append(concept)
util.print_output(task, concepts)
util.write_output_file(task, concepts, output_file)

cluster_rule = KMeans(n_clusters=3)
cluster_rule.fit(dataC)

labels = cluster_rule.predict(dataC)
centroids = cluster_rule.cluster_centers_

#print labels
print('\n')
print("Centroids of 3 new clusters: \n")
print(centroids)
print('\n')
print("Clustered actors into the 3 groups (0 : 1 :2) \n")
for i, j in zip(actor_list, labels):