Ejemplo n.º 1
0
 def __init__(self):
     self.conf = config_parser.ParseConfig()
     self.data_set_loc = self.conf.config_section_mapper("filePath").get("data_set_loc")
     self.data_extractor = data_extractor.DataExtractor(self.data_set_loc)
     self.mlmovies = self.data_extractor.get_mlmovies_data()
     self.mltags = self.data_extractor.get_mltags_data()
     self.mlmovies = self.data_extractor.get_mlmovies_data()
     self.mlratings = self.data_extractor.get_mlratings_data()
     self.combined_data = self.get_combined_data()
     self.util = Util()
     self.reshuffle = False
Ejemplo n.º 2
0
 def __init__(self, user_id):
     self.user_id = user_id
     self.conf = config_parser.ParseConfig()
     self.data_set_loc = self.conf.config_section_mapper("filePath").get(
         "data_set_loc")
     self.data_extractor = data_extractor.DataExtractor(self.data_set_loc)
     self.feedback_data = self.get_feedback_data()
     self.util = Util()
     self.movies_dict = {}
     self.tag_dict = {}
     self.movie_tag_matrix = self.get_movie_tag_matrix()
     self.feedback_metadata_dict = {}
 def __init__(self):
     self.conf = config_parser.ParseConfig()
     self.data_set_loc = self.conf.config_section_mapper("filePath").get(
         "data_set_loc")
     self.data_extractor = data_extractor.DataExtractor(self.data_set_loc)
     self.util = Util()
     self.movies_dict = {}
     self.movie_tag_matrix = self.get_movie_tag_matrix()
     self.task_3_input = self.data_extractor.get_lsh_details()
     self.movieLSH = MovieLSH(self.task_3_input["num_layers"],
                              self.task_3_input["num_hashs"])
     (self.query_df, self.query_vector) = self.fetch_query_vector_from_csv()
     self.movieLSH.create_index_structure(self.task_3_input["movie_list"])
Ejemplo n.º 4
0
SENTENCES_PER_SPEAKER = 10
FREQ_ELEMENTS = 128
MAX_SPEAKERS = 5
WITH_SPLIT = True
SPEAKER_LIST = '../../data/speaker_lists/speakers_5_clustering_vs_reynolds_v3.txt'
#OUTPUT_1 = '../../data/training/TIMIT_extracted/train_speakers_5_clustering_vs_reynolds_v3.pickle'
#OUTPUT_2 = '../../data/training/TIMIT_extracted/test_speakers_5_clustering_vs_reynolds_v3.pickle'
OUTPUT_1 = '../../data/training/TIMIT_extracted/train_speakers_5_clustering_vs_reynolds.pickle'
OUTPUT_2 = '../../data/training/TIMIT_extracted/test_speakers_5_clustering_vs_reynolds.pickle'

ONE_SEC = 100  # array elements corresponding to one sec
STEP_SIZE = 100
SPECT_DIMENSION = (FREQ_ELEMENTS, ONE_SEC)
MAX_AUDIO_LENGTH = 100

extractor = de.DataExtractor(MAX_SPEAKERS, ONE_SEC, STEP_SIZE, SPECT_DIMENSION)
#X = np.zeros((MAX_SPEAKERS*1000, 1, SPECT_DIMENSION[0], SPECT_DIMENSION[1]), dtype=np.float32)
X = np.zeros((MAX_SPEAKERS * 20, 1, SPECT_DIMENSION[0], MAX_AUDIO_LENGTH),
             dtype=np.float32)
y = np.zeros(MAX_SPEAKERS * 20, dtype=np.int32)

valid_speakers = []
with open(SPEAKER_LIST, 'rb') as f:
    for line in f:
        valid_speakers.append(line.replace('\n', ''))

X, y, speaker_names = extractor.traverse_TIMIT_data(
    '../../data/training/TIMIT/', X, y, valid_speakers)

print X.shape
Ejemplo n.º 5
0
import data_extractor as data_extractor
import data_transformer as data_transformer
import data_loader as data_loader
import config

# This component is the main program for the requested task
# It uses a configuration file (config.py) for input, output and transformation configurations

# defining multiple configurations is possible in config.py.
# runs the transfer operation for each configuration.
for configNo in range(0, config.CONFIGURATIONS_COUNT):
    # get the proper implementations of extractor, transformer and loader based on the configurations
    extractor = data_extractor.DataExtractor(
        config.INPUT_TYPE[configNo], config.INPUT_DATA_CONNECTION[configNo])
    transformer = data_transformer.DataTransformer(
        config.INPUT_TYPE[configNo], config.OUTPUT_TYPE[configNo],
        config.INPUT_TO_OUTPUT_MAPPING[configNo])
    loader = data_loader.DataLoader(config.OUTPUT_TYPE[configNo],
                                    config.OUTPUT_DATA_CONNECTION[configNo],
                                    config.OUTPUT_DB[configNo],
                                    config.OUTPUT_COLLECTION[configNo])

    counter = 1
    # loop through the data extracted from the input source
    for item in extractor.extract_data():
        # transform the item into desired format
        out_item = transformer.transform_data(item)
        # insert the transformed item into output destination
        insert_id = loader.load_data(out_item)
        print("Created item {0} with id {1}".format(counter, insert_id))
        counter += 1
Ejemplo n.º 6
0
import argparse
import operator
from collections import Counter

import config_parser
import data_extractor
import numpy
from scipy.spatial import distance as dist
from sklearn.svm import SVC
from util import *

util = Util()
conf = config_parser.ParseConfig()
data_set_loc = conf.config_section_mapper("filePath").get("data_set_loc")
data_extractor_obj = data_extractor.DataExtractor(data_set_loc)
movie_tag_frame = util.get_movie_tag_matrix()

movie_tag_matrix_value = movie_tag_frame.values
(U, s, Vh) = util.SVD(movie_tag_matrix_value)
movie_latent_matrix = U[:, :10]

movies = list(movie_tag_frame.index.values)
tags = list(movie_tag_frame)
label_movies_json_data = data_extractor_obj.get_json()


class Classifier(object):
    def __init__(self, r=0):
        self.util = util
        self.r = r
        self.movie_latent_matrix = movie_latent_matrix