Esempio n. 1
0
	def train_data(self, filename):
		print(Style.BRIGHT + 'Training model for data in ' + Fore.MAGENTA + filename + Fore.RESET + Style.RESET_ALL)
		data_analyzer = DataAnalyzer(filename)
		self.__sample_count = data_analyzer.sample_count
		Y_labeled = data_analyzer.Y_labeled
		self.__X = data_analyzer.X[:, 9:]
		self.__mean_list = [data_analyzer.all_descriptions[i].mean for i in range(9, len(data_analyzer.all_descriptions))]
		self.__stdev_list = [data_analyzer.all_descriptions[i].standard_deviation for i in range(9, len(data_analyzer.all_descriptions))]

		# Apply feature scaling
		for i in range(self.__X.shape[1]):
			self.__X[:, i] = (self.__X[:, i] - self.__mean_list[i]) / self.__stdev_list[i]
		self.__X = np.c_[np.ones(self.__sample_count), self.__X]

		# Find theta for a model that determines GRYFFINDOR or NOT
		print(Style.BRIGHT + Fore.BLUE + 'Running gradient descent to determine GRYFFINDOR or NOT...' + Style.RESET_ALL + Fore.RESET)
		theta_gryffindor = self.__run_gradient_descent(np.where(Y_labeled == 'Gryffindor', 1, 0))
		# Find theta for a model that determines HUFFLEPUFF or NOT
		print(Style.BRIGHT + Fore.BLUE + 'Running gradient descent to determine HUFFLEPUFF or NOT...' + Style.RESET_ALL + Fore.RESET)
		theta_hufflepuff = self.__run_gradient_descent(np.where(Y_labeled == 'Hufflepuff', 1, 0))
		# Find theta for a model that determines RAVENCLAW or NOT
		print(Style.BRIGHT + Fore.BLUE + 'Running gradient descent to determine RAVENCLAW or NOT...' + Style.RESET_ALL + Fore.RESET)
		theta_ravenclaw = self.__run_gradient_descent(np.where(Y_labeled == 'Ravenclaw', 1, 0))
		# Find theta for a model that determines SLYTHERIN or NOT
		print(Style.BRIGHT + Fore.BLUE + 'Running gradient descent to determine SLYTHERIN or NOT...' + Style.RESET_ALL + Fore.RESET)
		theta_slytherin = self.__run_gradient_descent(np.where(Y_labeled == 'Slytherin', 1, 0))
		# Merge all theta in one matrix
		self.__theta = np.c_[theta_gryffindor, theta_hufflepuff, theta_ravenclaw, theta_slytherin]
		self.__save_weights_file()
Esempio n. 2
0
    def __separate_data(self, data_filename):
        data_analyzer = DataAnalyzer(data_filename)
        self.mean_list = data_analyzer.get_mean_list()
        self.stdev_list = data_analyzer.get_stdev_list()

        # feature scale, and add column of 1s to X
        all_X = data_analyzer.X
        all_X = self.__apply_feature_scaling(all_X)
        all_X = np.c_[np.ones(all_X.shape[0]), all_X]
        all_Y = data_analyzer.Y
        all_data = np.c_[all_X, all_Y]

        np.random.shuffle(all_data)
        split_row_index = int(all_data.shape[0] *
                              0.8)  # top 80% of rows will be for training

        training_data = all_data[:split_row_index, :]
        validation_data = all_data[split_row_index:, :]

        self.training_X = training_data[:, :-1]
        self.training_Y = training_data[:, -1]
        self.training_Y = self.training_Y.reshape(self.training_Y.shape[0], 1)

        self.validation_X = validation_data[:, :-1]
        self.validation_Y = validation_data[:, -1]
        self.validation_Y = self.validation_Y.reshape(
            self.validation_Y.shape[0], 1)
Esempio n. 3
0
 def __init__(self,
              path,
              start_date,
              interested_symbols,
              interested_start_date,
              interested_end_date,
              num_std=3,
              frequency=FrequencyMap.Minute,
              forward=True,
              model_type='CloseToClose',
              clean=True):
     """
     :param path: str
     :param start_date: datetime.date
     :param interested_symbols: list[str]
     :param interested_start_date: datetime.date
     :param interested_end_date: datetime.date
     :param num_std: int
     :param frequency: FrequencyMap
     :param forward: boolean
     :param model_type: str
     :param clean: boolean
     """
     self.interested_symbols = interested_symbols
     self.interested_start_date = interested_start_date
     self.interested_end_date = interested_end_date
     self.frequency = frequency
     self.analysis_window = 30 if frequency is not FrequencyMap.Month else 10
     self.loader = DataLoader(path, start_date)
     self.pre_process = DataPreProcessor(num_std, frequency, forward)
     self.estimator = VolatilityEstimator(model_type, clean, frequency)
     self.data_analyzer = DataAnalyzer()
     self.model = Garch11('Constant', 'Garch')
     self.error_estimator = ErrorEstimator(self.model, self.estimator,
                                           self.frequency)
Esempio n. 4
0
 def on_pre_enter(self):
     try:
         self.Dm = DataAnalyzer('data/tweets.json')
         self.Dm.create_dataframe()
         self.Dm.sentiment_analysis()
         self.Dm.create_csv()
         self.Dm.create_graphs()
     except:
         pass
Esempio n. 5
0
 def __init_training_data(self, training_filename):
     data_analyzer = DataAnalyzer(training_filename)
     self.mean_list = data_analyzer.get_mean_list()
     self.stdev_list = data_analyzer.get_stdev_list()
     # feature scale, and add column of 1s to X
     self.training_X = data_analyzer.X
     self.training_X = self.__apply_feature_scaling(self.training_X)
     self.training_X = np.c_[np.ones(self.training_X.shape[0]),
                             self.training_X]
     self.training_Y = data_analyzer.Y
Esempio n. 6
0
def main():
	# check argv
	if len(sys.argv) != 2:
		print('usage: ' + Fore.RED + 'python3' + Fore.BLUE + ' histogram.py ' + Fore.RESET + 'data_file.csv')
		sys.exit(-1)
	data_file = sys.argv[1]
	try:
		data_analyzer = DataAnalyzer(data_file)
		data_analyzer.show_histograms()
	except IOError as e:
		print(Style.BRIGHT + Fore.RED + 'I/O Error: ' + Style.RESET_ALL + Fore.RESET + str(e))
	except ParserException as e:
		print(Style.BRIGHT + Fore.RED + 'ParserException: ' + Style.RESET_ALL + Fore.RESET + str(e))
Esempio n. 7
0
def make_la_figs(stock_data):
    da = DataAnalyzer(stock_data)
    data = da.normalize_dataframe()

    # making the figures
    # normalized data
    da.plot_data(data,
                 'months', 
                 'stock data', 
                 'Stock Data',
                 '../data/figures/no_all.png')
    # mean of normalized data
    da.plot_data(data.mean(axis=1),
                 'months', 
                 'stock data', 
                 'Stock Data',
                 '../data/figures/no_mean.png')
Esempio n. 8
0
def make_graphs(stock_data):
    da = DataAnalyzer(stock_data.data)
    data = da.normalize_dataframe()

    # making the figures
    # norms
    da.plot_data(data,
                 'months', 
                 'stock data',
                 'Test',
                 '../data/figures/all.png')
    # mean of the norms             
    da.plot_data(data.mean(axis=1),
                 'months', 
                 'stock data mean', 
                 'Price Means', 
                 '../data/figures/mean.png') 
Esempio n. 9
0
import click
import sys
from tkinter import *
from data_analyzer import DataAnalyzer

data_analyzer = DataAnalyzer()


@click.group()
def analyzer():
    """This is the cli for analyzing the data of the rated movies by the users"""


@analyzer.command()
def get_ratings_movies():
    """Get a joined table of ratings and movies"""
    click.echo(data_analyzer.ratings_movies)


@analyzer.command()
def get_ratings_users():
    """Get a joined table of ratings and users"""
    click.echo(data_analyzer.ratings_users)


@analyzer.command()
def get_ratings_movies_users():
    """Get a joined table of ratings, movies and users"""
    click.echo(data_analyzer.ratings_movies_users)

Esempio n. 10
0
 def make_graph(self, data):
     analyzer = DataAnalyzer(data)
     data_norm = analyzer.normalize_dataframe()
     analyzer.plot_data(data_norm, 'month', 'stuff', 'title stuff',
                        '../data/figures/local_eco.png')
Esempio n. 11
0
class ArmAnalyzer(Analyzer):
    """Analyzer 30: in-line assembling of low-level code and data into the gdb's arm simulator"""

    # definition of subanalyzers
    address_analyzer = AddressAnalyzer()
    data_analyzer = DataAnalyzer()
    instdat_analyzer = InstdatAnalyzer()
    instmul_analyzer = InstmulAnalyzer()
    instjmp_analyzer = InstjmpAnalyzer()
    instmem_analyzer = InstmemAnalyzer()
    instmsc_analyzer = InstmscAnalyzer()

    # definition of internal helper functions (methods)
    def update_address(self, match):
        override = 0
        if match:  # new address = specified address + size of elements * number of elements
            result = self.result  # helper variable to avoid using 'self.' so many times
            rid = 0 if len(result) == 2 else 2  # reference index (2 in case of relative PC addressing mode)
            if result[rid] % result[rid + 1][0] != 0:  # if current address is misaligned
                previous_address = result[rid]  # compute the remainder positions up to the next aligned address
                remainder_shift = result[rid + 1][0] - (previous_address % self.result[rid + 1][0])
                alignment_type = 'halfword' if (result[rid + 1][0] == 2) else 'word'
                if self.state == 2:  # in case of implicit address setting
                    result[rid] = previous_address + remainder_shift  # update current address
                    if result[rid] >= 2 ** 32:
                        override = -4006  # overriding error: too big address error after alignment
                    if rid == 2:  # if there is a second address + data entry
                        result[0] = result[0] + remainder_shift  # also move forward the second address
                        if result[0] >= 2 ** 32:
                            override = -4006  # second overriding error: too big address error after alignment
                    print "WARNING: implicit %s address automatically aligned skipping %d position%s,\n\tfrom 0x%X to 0x%X" \
                          % (alignment_type, remainder_shift, 's' if remainder_shift > 1 else '',
                             previous_address, result[rid])
                else:  # in case of explicit address (self.state == 1)
                    print "WARNING: explicit %s address misaligned by %d position%s" \
                          % (alignment_type, result[rid + 1][0] - remainder_shift,
                             's' if (result[rid + 1][0] - remainder_shift) > 1 else '')
                    # update the implicit address for next instructions
            self.next_address = result[rid] + result[rid + 1][0] * (len(result[rid + 1]) - 1)
        return override

    # definition of internal transition actions (methods)
    def implicit_address(self, match, sub_result, sub_state, super_result):
        if match:  # use the current address as starting address
            self.result.append(self.next_address)
        return 0

    def stack_info(self, match, sub_result, sub_state, super_result):
        override = 0
        if match:
            self.result.append(list(sub_result))
            sub_result *= 0
            override = self.update_address(match)
        else:
            override = self.error_spring(match, sub_result, sub_state, super_result)
        return override

    def __init__(self):
        Analyzer.__init__(self)
        self.next_address = 0x8000
        # definition of error spring list
        self.error_list = [-1002, -1003, -1004, -1005, -1006,
                           -1102, -1103, -1104, -1105, -1202, -1203, -1204,
                           -1301, -1302, -1303, -1304, -1403, -1502, -1503, -1504,
                           -1603, -1604, -1605, -1606, -1607, -1702, -1703, -1704, -1705, -1706,
                           -2002, -2003, -2004,
                           -2102, -2103, -2104, -2105, -2106, -2107,
                           -2204, -2205, -2207, -2302, -2303, -2304, -2306, -2308, -2310, -2311,
                           -2402, -2403, -2404, -2405, -2406, -2407, -2408, -2409, -2410, -2411, -2412,
                           -2502, -2503, -2504, -2505, -2506, -2510, -2511, -2512, -2513,
                           -3102, -3104, -3105,
                           -3202, -3204, -3205, -3207, -3208,
                           -3302, -3304, -3305, -3307, -3308,
                           -3403, -3404, -3405, -3406, -3407, -3408, -3409, -3410,
                           -3502, -3504, -3505
                           ]
        # definition of the (instance) parsing graph
        self.graph = {0:  # initial state
                          ([(None, None, -4001, None),  # T40.0.0 EOSeq -> missing hex address
                            (' ', None, 0, None),  # T40.0.1 skip initial spaces
                            ('>', ' ', -4003, None,  # T40.0.2a found '>' at end of seqence
                             2, self.implicit_address,  # T40.0.2b found '> ', stack address and go to 2
                             -4003, None),  # t40.0.2c found '>' followed by strange char
                            (self.address_analyzer, None, 1, self.error_spring)],  # T40.0.3 get the address
                           -4002),  # T40.0.4 wrong initial hex address
                      1:  # decoder state after hex address
                          ([(None, None, -4004, None),  # T40.1.0 EOSeq -> missing info
                            (self.data_analyzer, None, 1000, self.stack_info),  # T40.1.1 get the data
                            (self.instdat_analyzer, None, 1000, self.stack_info),  # T40.1.2 get data instr.
                            (self.instmul_analyzer, None, 1000, self.stack_info),  # T40.1.3 get multiply instr.
                            (self.instjmp_analyzer, None, 1000, self.stack_info),  # T40.1.4 get branch instr.
                            (self.instmem_analyzer, None, 1000, self.stack_info),  # T40.1.5 get mem transfer instr.
                            (self.instmsc_analyzer, None, 1000, self.stack_info)],  # T40.1.6 get miscellanea instr.
                           -4005),  # T40.1.7 unrecognized instruction or directive
                      2:  # decoder state after '>' symbol
                          ([(None, None, -4004, None),  # T40.2.0 EOSeq -> missing info
                            (self.data_analyzer, None, 1000, self.stack_info),  # T40.2.1 get the data
                            (self.instdat_analyzer, None, 1000, self.stack_info),  # T40.2.2 get data instr.
                            (self.instmul_analyzer, None, 1000, self.stack_info),  # T40.2.3 get multiply instr.
                            (self.instjmp_analyzer, None, 1000, self.stack_info),  # T40.2.4 get branch instr.
                            (self.instmem_analyzer, None, 1000, self.stack_info),  # T40.2.5 get mem transfer instr.
                            (self.instmsc_analyzer, None, 1000, self.stack_info)],  # T40.2.6 get miscellanea instr.
                           -4005)  # T40.2.7 unrecognized instruction or directive
                      }
evolution = GeneticAlgorithm(population_size=10000, chromosome_size=65)
evolution.createPopulation()
total_generations = evolution.evolve(file_training,
                                     crossing_probability=1,
                                     mutation_rate=0.1,
                                     plague_max_percent=0.4,
                                     plague_probability=0.1,
                                     selection_exp_const=1,
                                     min_num_vars=3,
                                     max_generations=200,
                                     const_num_digits=3,
                                     satisfactory_mse=0.01)

print("\n\n\nBest subject: ", evolution.best_expr)
print("sqrt(MSE): ", np.sqrt(min(evolution.mse_list)))
print("In ", total_generations, "generations")

analysis = DataAnalyzer('testing.csv')
strength = analysis.strength(evolution.best_expr)

with open('sub0.csv', mode='w') as f:
    sample_writer = csv.writer(f,
                               delimiter=',',
                               quotechar='"',
                               quoting=csv.QUOTE_MINIMAL)
    sample_writer.writerow(['ID', 'strength'])
    i = 722
    for value in strength:
        sample_writer.writerow([i, value])
        i += 1
Esempio n. 13
0
        elif prefix is None or prefix == '':
            print('Prefix must be specified!')
        else:
            break
    method = 'metric'
    model_type = 'resnet'  # should be in ['resnet', 'inception3']
    train_root = '/home/ubuntu/Program/Tableware/DataArgumentation/dataset/o_train/'
    test_root = '/home/ubuntu/Program/Tableware/DataArgumentation/dataset/n_test/'
    sample_file_dir = '/home/ubuntu/Program/Tableware/DataArgumentation/dataset/n_base_sample_5'

    load_model_path = None
    # load_model_path = './model/pretrained/inception_v3_google-1a9a5a14.pth'
    trainer = Trainer(model_type=model_type, load_model_path=load_model_path)
    trainer.set_super_training_parameters(train_root=train_root,
                                          test_root=test_root,
                                          sample_file_dir=sample_file_dir,
                                          prefix=prefix,
                                          batch_size=128)
    save_dir, maxacc = trainer.metric_training(balance_testset=False)

    best_model_path = './model/keep/resnet_%s_%s_conv0.05_%.2f.tar' % (
        prefix, method, maxacc * 100)
    shutil.copy(os.path.join(save_dir, '%s_%s.pth.tar' % (prefix, method)),
                best_model_path)

    analyzer = DataAnalyzer(sample_file_dir=sample_file_dir,
                            test_dir=test_root,
                            num_of_classes=42,
                            prefix=prefix)
    analyzer.analysis_for_inter_exter_acc(model_path=best_model_path)
Esempio n. 14
0
def plot_data():
    da = DataAnalyzer()
    # da.show()
    da.plot_data()
    da.plot_ratio()
    da.plot_order()