Example #1
0
    def run(self, Train=True, Test=False):
        print("Running on", self.arg.device)
        self.set_device(self.arg.device)

        np.random.seed(self.arg.seed)
        torch.manual_seed(self.arg.seed)

        # create training set
        if self.arg.data_path:
            log('loading corpus from %s' % self.arg.data_path)
        if not os.path.exists(self.arg.output_path):
            os.makedirs(self.arg.output_path)
            os.makedirs(self.arg.output_path + "/code")
            os.makedirs(self.arg.output_path + "/code/models")

        self.define_input_field()  # define the fields of several inputs
        print(self.arg)
        print(self.bertConfig)
        print(self.bertTokenizer)

        consts.TOKEN_MASK_TYPE = self.arg.token_mask_type
        self.train_set = self.construct_dataset(self.train, keep_events=1, skip_sample=self.arg.skip_sample, tokenizer=self.bertTokenizer)  # load datafiles and transinto field
        self.dev_set = self.construct_dataset(self.dev, tokenizer=self.bertTokenizer)
        self.test_set = self.construct_dataset(self.test, tokenizer=self.bertTokenizer)
        self.buil_field_vocab()  # build vocab on train and dev set
        tester = self.get_tester()


        if self.arg.restart > 0:
            log('init model from ' + self.arg.demo_model)
            self.model = self.load_model(self.arg.demo_model)
            log('model loaded, there are %i sets of params' % len(self.model.parameters_requires_grad_clipping()))
        else:
            self.model = self.load_model(None)
            log('model created from scratch, there are %i sets of params' % len(self.model.parameters_requires_grad_clipping()))

        self.arg.word_i2s = self.WordsField.vocab.itos
        self.arg.trigger_label_i2s = self.TriggerLabelField.vocab.itos
        optimizer_constructor, bert_optimizer_constructor = self.get_otimizer_constructor(self.model)
        trainer = Trainer(model=self.model, args=self.arg, word_i2s=self.arg.word_i2s, EERuner=self,
                          optimizer_constructor=optimizer_constructor,
                          bert_optimizer_constructor=bert_optimizer_constructor, tester=tester)
        if Train:
            print("backup codes")
            os.system("cp config.cfg {}".format(self.arg.output_path))
            os.system("cp network/models/*.py {}".format(self.arg.output_path + "/code/models"))
            self.store_vec()
            train_writer = SummaryWriter(os.path.join(self.arg.output_path, "train"))
            detection_writer = SummaryWriter(os.path.join(self.arg.output_path, "detection"))
            classification_writer = SummaryWriter(os.path.join(self.arg.output_path, "classification"))
            self.arg.writer = {"train": train_writer, "detect": detection_writer, "cls": classification_writer}
            trainer.train(train_set=self.train_set, dev_set=self.dev_set, test_set=self.test_set, epochs=
                          self.arg.epochs, other_testsets={})
            self.arg.writer["train"].close()
            self.arg.writer["detect"].close()
            self.arg.writer["cls"].close()
        if Test:
            trainer.eval(test_set=self.test_set)

        log('Done!')
Example #2
0
def dual_ensemble_classifier_performance_statistics(
        data_directory, multiple_runs_directory, output_directory,
        create_simple_plots, create_distribution_plots, widget):
    log(
        "Started Dual Ensemble Classifier Performance Statistics: " +
        str(datetime.datetime.now()), None, widget)

    # create output directory
    output_directory = os.path.join(
        output_directory, 'DualEnsembleClassifierPerformanceStatistics')
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    # create simple plots
    if create_simple_plots:
        plot_initial_performance(data_directory, output_directory, widget)

    # create distribution plots
    if create_distribution_plots:
        plot_distribution_for_multiple_runs(multiple_runs_directory,
                                            output_directory, widget)

    log(
        "Finished Dual Ensemble Classifier Performance Statistics: " +
        str(datetime.datetime.now()), None, widget)
Example #3
0
def train(models, it_train, it_val, params):
    """
    Train the model.

    Parameters:
    - models: a dictionary with all the models.
        - atob: a model that goes from A to B.
        - d: the discriminator model.
        - p2p: a Pix2Pix model.
    - it_train: the iterator of the training data.
    - it_val: the iterator of the validation data.
    - params: parameters of the training procedure.
    - dout_size: the size of the output of the discriminator model.
    """
    # Create the experiment folder and save the parameters
    create_expt_dir(params)

    # Get the output shape of the discriminator
    dout_size = d.output_shape[-2:]
    # Define the data generators
    generators = generators_creation(it_train, it_val, models, dout_size)

    # Define the number of samples to use on each training epoch
    train_samples = params.train_samples
    if params.train_samples == -1:
        train_samples = it_train.N
    batches_per_epoch = train_samples // params.samples_per_batch

    # Define the number of samples to use for validation
    val_samples = params.val_samples
    if val_samples == -1:
        val_samples = it_val.N

    losses = {'p2p': [], 'd': [], 'p2p_val': [], 'd_val': []}
    if params.continue_train:
        losses = load_losses(log_dir=params.log_dir,
                             expt_name=params.expt_name)

    for e in tqdm(range(params.epochs)):

        for b in range(batches_per_epoch):
            train_iteration(models, generators, losses, params)

        # Evaluate how the models is doing on the validation set.
        evaluate(models, generators, losses, val_samples=val_samples)

        if (e + 1) % params.save_every == 0:
            save_weights(models,
                         log_dir=params.log_dir,
                         expt_name=params.expt_name)
            log(losses,
                models.atob,
                it_val,
                log_dir=params.log_dir,
                expt_name=params.expt_name,
                is_a_binary=params.is_a_binary,
                is_b_binary=params.is_b_binary)
Example #4
0
def model_analytics(multiple_runs_path, output_path, plot_weight_heatmaps,
                    plot_collapsed_weight_heatmaps,
                    plot_collapsed_weight_heatmaps_aligned, widget):
    log("Started plotting weight heatmaps", None, widget)

    plot_heatmaps(multiple_runs_path, plot_weight_heatmaps,
                  plot_collapsed_weight_heatmaps,
                  plot_collapsed_weight_heatmaps_aligned, output_path, widget)

    log("Finished plotting weight heatmaps", None, widget)
Example #5
0
    def handle(self):
        currentTrack = sp.current_user_playing_track()
        currentTrackURI = util.propertyToString(currentTrack["item"]["uri"])
        currentTrackProgressMS = util.propertyToString(
            currentTrack["progress_ms"])

        util.log("Request made, responding with: %s|%s" %
                 (currentTrackURI, currentTrackProgressMS))
        self.request.sendall("%s|%s\n" %
                             (currentTrackURI, currentTrackProgressMS))
def dataset_statistics(data_directory, output_directory, widget):
    # create output directory
    output_directory = os.path.join(output_directory, 'DatasetStatistics')
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    # create output log file
    output_file = os.path.join(output_directory, 'DatasetStatistic.txt')

    log("Started dataset statistics: " + str(datetime.datetime.now()), file = output_file, widget = widget)

    number_of_subjects, subjects_trial_lengths, subjects_trial_response, trial_lengths = read_trial_metadata(
        data_directory)

    compute_statistics(number_of_subjects, output_directory, output_file, subjects_trial_lengths,
                       subjects_trial_response, trial_lengths, widget)

    log("Finished dataset statistics: " + str(datetime.datetime.now()), file = output_file, widget = widget)
Example #7
0
def read_trial_data(channel_list, data_directory, log_file, only_two_subjects, widget):
    count = 0
    file_names = []
    for (dirpath, dirnames, filenames) in os.walk(data_directory):
        file_names.extend(filenames)
        count += 1
        if only_two_subjects and count == 2:
            break
    # compute number of subjects
    subject_number = len(file_names)
    subjects_data = []
    for x in range(0, subject_number):
        subjects_data.append([])
    log('Start time of parsing: ' + str(datetime.datetime.now()), file = log_file, widget = widget)
    count = 0
    # parse the file for each subject
    for file_name in file_names:

        # get subject's file
        subject_file = os.path.join(data_directory, file_name)

        # compute subject's number
        subject_number = int(file_name.split('.')[0]) - 1

        log('Started reading filtered data for subject ' + str(subject_number + 1), file = log_file, widget = widget)

        # read a subject's data trial by trial
        # differentiate between channels knowing that there are 210 trials per channel
        with open(subject_file, 'rb') as file:

            # iterate over channels
            for channel_index in range(0, NUMBER_OF_CHANNELS):
                subjects_data[subject_number].append([])

                # iterate over trials
                for trial_index in range(0, NUMBER_OF_TRIALS):
                    # read trial's length
                    trial_length = read_value_from_binary_file(file, 'f', 4)

                    # read trial's values
                    trial_values = read_array_from_binary_file(file, 'f', 4, int(trial_length))

                    # keep only channel A23
                    if channel_index in channel_list:
                        subjects_data[subject_number][channel_index].append(list(trial_values))
        log('Finished reading filtered data for subject ' + str(subject_number + 1), file = log_file, widget = widget)
        count += 1
        if only_two_subjects and count == 2:
            break
    log('End time of parsing: ' + str(datetime.datetime.now()), file = log_file, widget = widget)
    return subjects_data
def graph_regions_plot_individual(matrices_directory, output_directory, trial_index, window_index, is_trial = False,
                                  widget = None, normalize = True, should_filter = True):
    # create output directory
    output_directory = os.path.join(output_directory, 'GraphWavenetAdjacency')
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    if is_trial:
        output_directory = os.path.join(output_directory, 'Trial')
    else:
        output_directory = os.path.join(output_directory, 'Window')
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    output_directory = os.path.join(output_directory, f'{trial_index}')
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    output_directory = os.path.join(output_directory, 'Individual')
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    log("Started graph regions plot individual: " + str(datetime.datetime.now()), file = None, widget = widget)

    # find input matrix
    if is_trial:
        matrices_directory = os.path.join(matrices_directory, 'Trial')
    else:
        matrices_directory = os.path.join(matrices_directory, 'Window')

    matrices_directory = os.path.join(matrices_directory, f'{trial_index}')

    node_size, node_edges = aggregate_channels(matrices_directory, trial_index, window_index, is_trial, normalize,
                                               should_filter)

    title = f'{trial_index}'
    if not is_trial:
        title += f' {window_index}'

    plot_graph(node_edges, node_size, title, output_directory, plt.cm.Blues, INTERVAL_START, INTERVAL_END)

    log("Finished graph regions plot individual: " + str(datetime.datetime.now()), file = None, widget = widget)
Example #9
0
def compute_histogram(input_matrix, widget, output_directory, is_trial):
    output_directory = os.path.join(output_directory, 'Histogram')
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    log_file = os.path.join(output_directory, 'histogram_details.txt')

    if is_trial:
        current = 0.0
        while current <= 1.0:
            count = len(list(filter(lambda x: x >= current, input_matrix)))
            log(f'Number connections for threshold {current}: {count}',
                file=log_file,
                widget=None)
            current += 0.1

        fig = go.Figure(data=[
            go.Histogram(
                x=input_matrix,
                xbins=dict(start=0.0, end=1.0, size=0.1),
            )
        ])
        plotly.offline.plot(fig,
                            filename=os.path.join(output_directory,
                                                  'WeightHistogram.html'),
                            auto_open=False)
    else:

        fig = go.Figure()

        for window, matrix in enumerate(input_matrix):
            log(f'Window {window}', file=log_file, widget=None)
            current = 0.0
            while current <= 1.0:
                count = len(list(filter(lambda x: x >= current, matrix)))
                log(f'\tNumber connections for threshold {current}: {count}',
                    file=log_file,
                    widget=None)
                current += 0.1

            fig.add_trace(
                go.Histogram(x=matrix,
                             xbins=dict(start=0.0, end=1.0, size=0.1),
                             name=f'Window {window}'))
        fig.update_layout(barmode='overlay')
        # Reduce opacity to see both histograms
        fig.update_traces(opacity=0.75)
        plotly.offline.plot(fig,
                            filename=os.path.join(output_directory,
                                                  'WeightHistogram.html'),
                            auto_open=False)
Example #10
0
def graph_minimum_spanning_arborescence(output_directory, is_trial, graphs,
                                        adjacency_matrices, widget,
                                        properties_dict):
    output_directory = os.path.join(output_directory,
                                    'MaximumSpanningArborescence')
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    log_file = os.path.join(output_directory,
                            'MaximumSpanningArborescence.txt')

    msa_list = []
    for window, graph in enumerate(graphs):
        msa_graph = nx.minimum_spanning_arborescence(graph)

        msa_list.append(msa_graph.edges())

        count = 0
        weight = 0

        for edge in msa_graph.edges():
            node_index_1 = 0
            node_index_2 = 0

            for key in list(CHANNELS_DICT.keys()):
                if CHANNELS_DICT[key] == edge[0]:
                    node_index_1 = key
                    break

            for key in list(CHANNELS_DICT.keys()):
                if CHANNELS_DICT[key] == edge[1]:
                    node_index_2 = key
                    break

            weight += adjacency_matrices[window][node_index_1][node_index_2]
            count += 1

        if not is_trial:
            log(f'Window {window}', file=log_file)
        log(f'\t Weight: {weight}', file=log_file)
        log(f'\t Weight average: {weight / (count * 1.0)}', file=log_file)

        if not is_trial:
            properties_dict[window][MSA_WEIGHT] = weight
        else:
            properties_dict[MSA_WEIGHT] = weight

    with open(os.path.join(output_directory, 'MSAList.bin'), 'wb+') as f:
        pickle.dump(msa_list, f)
Example #11
0
def raw_data_filter(data_directory, output_directory, degree_of_parallelism,
                    trial_filter_length, widget):
    # find the subject's directories
    subjects_directories = [x[0] for x in os.walk(data_directory)]

    # eliminate current directory
    subjects_directories = subjects_directories[1:]

    subject_threads = []

    log("Start time: " + str(datetime.datetime.now()), None, widget)
    start_time = time.time()

    for subject_directory in subjects_directories:
        # compute subject number
        subject_number = subjects_directories.index(subject_directory) + 1

        # create thread
        # specify the subject directory and subject number
        subject_thread = ParsingThread(subject_number,
                                       "thread-" + str(subject_number),
                                       subject_directory, subject_number,
                                       output_directory, trial_filter_length,
                                       widget)

        # start thread
        subject_thread.start()
        subject_threads.append(subject_thread)

        # create a number of threads equal to the degree of parallelism
        if subject_number % degree_of_parallelism == 0:
            # wait for threads to finish
            for subject_thread in subject_threads:
                subject_thread.join()
            subject_threads = []

    log("--- %s seconds ---" % (time.time() - start_time), None, widget)
    log("End time: " + str(datetime.datetime.now()), None, widget)
Example #12
0
def graph_shortest_path(output_directory, is_trial, graphs, widget,
                        properties_dict):
    output_directory = os.path.join(output_directory, 'ShortestPath')
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    log_file = os.path.join(output_directory, 'ShortestPath.txt')

    for window, graph in enumerate(graphs):
        shortest_path_dict = nx.shortest_path(graph, weight='weight')

        average = 0
        for start in list(shortest_path_dict.keys()):
            for end in list(shortest_path_dict[start].keys()):
                if start != end:
                    path = shortest_path_dict[start][end]

                    path_weight = 0
                    for index in range(1, len(path)):
                        path_weight = path_weight + 1 - graph.get_edge_data(
                            path[index - 1], path[index])['weight']

                    average += path_weight
        average /= (NUMBER_OF_CHANNELS * (NUMBER_OF_CHANNELS - 1.0))

        shortest_path = nx.average_shortest_path_length(graph)

        if not is_trial:
            log(f'Window {window}: ', file=log_file)
        log(f'\tAverage shortest path length: {shortest_path}', file=log_file)
        log(f'\tAverage maximum weight path: {average}', file=log_file)

        if not is_trial:
            properties_dict[window][AVG_SHORTEST_PATH] = shortest_path
            properties_dict[window][AVG_MAX_WEIGHT_PATH] = average
        else:
            properties_dict[AVG_SHORTEST_PATH] = shortest_path
            properties_dict[AVG_MAX_WEIGHT_PATH] = average
Example #13
0
def initialize_model(channel_list, dual_dataset_cross_loader, dual_dataset_test_loader, dual_dataset_train_loader,
                     dual_log_file_channels_txt, dual_log_file_html, dual_log_file_response_csv,
                     dual_log_file_response_txt, dual_log_file_stimulus_csv, dual_log_file_stimulus_txt,
                     dual_model_path, example_length, log_file, number_of_subjects, response_classes, stimulus_classes,
                     widget, with_visdom):
    # plot to VISDOM if enabled
    viz = None
    if with_visdom:
        viz = Visdom(port = 8097, server = 'http://localhost', base_url = '/')
    # create model
    dual_model = DualEnsembleClassifierModel(
        (
            [example_length,
             int(example_length * 2 / 3 + STIMULUS_OUTPUT_SIZE), STIMULUS_OUTPUT_SIZE],
            [example_length,
             int(example_length * 2 / 3 + RESPONSE_OUTPUT_SIZE), RESPONSE_OUTPUT_SIZE]
        ),
        len(channel_list)
    )
    log(dual_model, file = log_file, widget = None)
    # initialize weights
    weightInit = WeightInitializer()
    weightInit.init_weights(dual_model, 'xavier_normal_', { 'gain': 0.02 })
    log("Started dual training: " + str(datetime.datetime.now()), file = log_file, widget = widget)
    # fit the model
    dual_model.fit(viz, "dual", dual_dataset_train_loader, dual_dataset_cross_loader, dual_log_file_html,
                   number_epochs = 5,
                   learning_rate = 0.001,
                   widget = widget)
    # make the prediction
    dual_model.predict(dual_dataset_test_loader, dual_log_file_stimulus_csv, dual_log_file_stimulus_txt,
                       dual_log_file_response_csv, dual_log_file_response_txt, dual_log_file_channels_txt,
                       stimulus_classes,
                       response_classes, number_of_subjects)
    # save the model to a file
    dual_model.save_model_to_file(dual_model_path)
    log("End dual training: " + str(datetime.datetime.now()), file = log_file, widget = widget)
def recurrent_graph_wavenet(dots_folder_path,
                            trial_division_file_path,
                            output_path,
                            subject_number,
                            trial_index,
                            window_index,
                            input_length,
                            output_length,
                            batch_size,
                            loader_option,
                            widget,
                            blocks,
                            layers,
                            number_of_epochs,
                            initial_train_percentage,
                            increase_train_percentage,
                            use_functional_network,
                            functional_network_path,
                            use_previous_weight_matrix,
                            previous_weight_matrix_path,
                            include_cross,
                            use_gpu,
                            is_experiment=True):
    # set device
    if use_gpu:
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    else:
        device = torch.device("cpu")

    # set number of pytorch threads
    torch.set_num_threads(int(os.cpu_count() * 0.75))

    # set the highest priority to the process (if unix)
    if platform.uname().system == 'Linux':
        os.nice(-40)

    output_path = os.path.join(output_path, 'GraphWavenet')

    if is_experiment:
        if not os.path.exists(output_path):
            os.makedirs(output_path)
            output_path = os.path.join(output_path, 'Run 1')
            os.makedirs(output_path)
        else:
            folders = next(os.walk(output_path))[1]
            folder_numbers = [int(x.split()[1]) for x in folders]
            new_number = max(folder_numbers) + 1
            output_path = os.path.join(output_path, f'Run {new_number}')
            os.makedirs(output_path)
    else:
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        if loader_option == 'Window':
            output_path = os.path.join(output_path,
                                       f'{trial_index}_{window_index}')
        else:
            output_path = os.path.join(output_path, f'{trial_index}')
        os.makedirs(output_path)

    arguments_file_path = os.path.join(output_path, 'arguments.json')
    save_running_parameters(
        batch_size, blocks, dots_folder_path, increase_train_percentage,
        initial_train_percentage, input_length, layers, loader_option,
        number_of_epochs, output_length, output_path, subject_number,
        trial_division_file_path, trial_index, window_index,
        arguments_file_path, use_functional_network, functional_network_path,
        use_previous_weight_matrix, previous_weight_matrix_path, include_cross)

    log_file = os.path.join(output_path, 'log.txt')
    log(f'Graph wavenet start: {str(datetime.datetime.now())}', log_file,
        widget)

    supports = None
    if use_functional_network:
        supports = load_functional_network(functional_network_path,
                                           subject_number, trial_index)

    if use_previous_weight_matrix:
        if supports is None:
            supports = []
        supports.extend(
            load_previous_weight_matrix(previous_weight_matrix_path,
                                        loader_option, trial_index,
                                        window_index))

    if supports is not None:
        supports = [x.to(device) for x in supports]

    loader_splits = None

    if loader_option == 'Window':
        loader_splits = create_loader_window(
            dots_folder_path=dots_folder_path,
            subject_number=subject_number,
            trial_index=trial_index,
            window_index=window_index,
            input_length=input_length,
            output_length=output_length,
            batch_size=batch_size,
            shuffle=True,
            trial_division_file_path=trial_division_file_path,
            output_path=output_path,
            initial_train_percentage=initial_train_percentage,
            increase_train_percentage=increase_train_percentage,
            include_cross=include_cross)

    if loader_option == 'Trial':
        loader_splits = create_loader_trial(
            dots_folder_path=dots_folder_path,
            subject_number=subject_number,
            trial_index=trial_index,
            input_length=input_length,
            output_length=output_length,
            batch_size=batch_size,
            shuffle=True,
            trial_division_file_path=trial_division_file_path,
            output_path=output_path,
            initial_train_percentage=initial_train_percentage,
            increase_train_percentage=increase_train_percentage,
            include_cross=include_cross)

    train_engine = TrainEngine(number_of_nodes=NUMBER_OF_CHANNELS,
                               blocks=blocks,
                               layers=layers,
                               loader_splits=loader_splits,
                               log_file=log_file,
                               widget=widget,
                               output_directory=output_path,
                               number_of_epochs=number_of_epochs,
                               use_previous_model=False,
                               input_length=input_length,
                               output_length=output_length,
                               supports=supports,
                               device=device)

    if include_cross:
        train_engine.train()
    else:
        train_engine.full_train()

        matrix_path = previous_weight_matrix_path
        if loader_option == 'Trial':
            matrix_path = os.path.join(matrix_path, 'Trial', f'{trial_index}')
            if not os.path.exists(matrix_path):
                os.makedirs(matrix_path)

        else:
            matrix_path = os.path.join(matrix_path, 'Window', f'{trial_index}')
            if not os.path.exists(matrix_path):
                os.makedirs(matrix_path)
            matrix_path = os.path.join(matrix_path, f'{window_index}')
            if not os.path.exists(matrix_path):
                os.makedirs(matrix_path)

        train_engine.save_weight_matrix(matrix_path)

    log(f'Graph wavenet end: {str(datetime.datetime.now())}', log_file, widget)
Example #15
0
    def __new__(cls, *args, **kwargs):
        log('created %s with params %s' % (str(cls), str(args)))

        instance = super(Model, cls).__new__(cls)
        instance.__init__(*args, **kwargs)
        return instance
    def reconstruct_signal_from_loader(self, split_index, loader, mean, std):
        cross_mase_file = os.path.join(self.output_directory,
                                       f'cross_mase_{split_index}.txt')
        cross_loss_file = os.path.join(self.output_directory,
                                       f'cross_loss_{split_index}.txt')

        if self.output_length == 1:
            real_list = [[] for x in range(self.number_of_nodes)]
            predicted_list = [[] for x in range(self.number_of_nodes)]

            mase = np.array([0.0 for x in range(self.number_of_nodes)])
            count = 0

            for input, real in loader:
                input = input.float()
                real = real.float()

                input = input.to(self.device)
                real = real.to(self.device)

                # pad one zero at the beginning
                input = nn.functional.pad(input, (1, 0, 0, 0))

                # get prediction
                predicted = self.best_model[split_index](input)

                # transpose 2nd and 4th dimension (channel and time)
                predicted = predicted.transpose(1, 3)

                loss = self.loss_function(real, predicted)
                log(f'Loss: {loss}', file=cross_loss_file)

                for node in range(self.number_of_nodes):
                    real_list[node].append(float(real[0][0][node][0]))
                    predicted_list[node].append(float(
                        predicted[0][0][node][0]))

                mase += self.mean_absolute_scaled_error(input, real, predicted)

                count += 1

            mase = mase / count

            for node in range(self.number_of_nodes):
                log(f'Channel {node}. MASE: {mase[node]}',
                    file=cross_mase_file,
                    widget=None)

            log(f'Overall MASE: {mase.mean()}',
                file=cross_mase_file,
                widget=None)

            if self.reconstruct_signal:
                real_list = np.array(real_list)
                predicted_list = np.array(predicted_list)

                real_list = (real_list * std) + mean
                predicted_list = (predicted_list * std) + mean

                if self.cross_signal_reconstruction_figure is None:
                    self.cross_signal_reconstruction_figure = go.Figure()
                    self.index_list = [i for i in range(len(real_list[0]))]

                for node in range(self.number_of_nodes):
                    if not self.was_real_plotted:
                        self.cross_signal_reconstruction_figure.add_trace(
                            go.Scatter(
                                x=self.index_list[-len(real_list[node]):],
                                y=real_list[node],
                                mode='lines',
                                name=f'Real ch. {node}_{split_index}'))

                    self.cross_signal_reconstruction_figure.add_trace(
                        go.Scatter(x=self.index_list[-len(real_list[node]):],
                                   y=predicted_list[node],
                                   mode='lines',
                                   name=f'Predict ch. {node}_{split_index}'))

                plotly.offline.plot(self.cross_signal_reconstruction_figure,
                                    filename=os.path.join(
                                        self.output_directory,
                                        f'CrossSignal_{split_index}.html'),
                                    auto_open=False)
        else:
            raise NotImplementedError(
                'GW with output greater than 1 not implemented.')

        self.was_real_plotted = True
Example #17
0
import socket
import sys
import time

import spotipy
from util import util

#constants
HOST = "localhost"
PORT = 8000

#parse arguments
if len(sys.argv) == 2:
    username = sys.argv[1]
else:
    util.log("Usage: python %s username" % sys.argv[0])
    sys.exit()

#determine necessary scope and authorize
scope = util.gatherScope()
sp = util.promptAuth(username, scope)

while (True):
	#make request to conductor server
	sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
	try:
	    sock.connect((HOST, PORT))
	    responseArr = sock.recv(1024).strip().split("|")
	    conductorTrackURI = responseArr[0]
	    conductorTrackProgressMS = int(responseArr[1])
	except KeyboardInterrupt:
    def train(self):

        log(f'Start training time: {str(datetime.datetime.now())}',
            self.log_file, self.widget)

        for split_index in range(len(self.loader_splits)):

            log(
                f'Train {split_index + 1}/{len(self.loader_splits)}. Start time: {str(datetime.datetime.now())}',
                self.log_file, self.widget)
            loader_split = self.loader_splits[split_index]
            train_loader = loader_split[0]
            cross_loader = loader_split[1]
            mean = loader_split[2]
            std = loader_split[3]

            if split_index == 0 or not self.use_previous_model:
                self.create_model()

            if split_index != 0 and self.use_previous_model:
                self.model = copy.deepcopy(self.best_model[-1])
                self.best_model.append(copy.deepcopy(self.model))
            else:
                self.best_model.append(copy.deepcopy(self.model))

            min_cross_error = 100000
            last_update = 0

            self.create_optimizer()

            losses = []
            cross_epoch_loss = []

            self.actual_epochs = 0

            for epoch in range(self.number_of_epochs):
                self.actual_epochs += 1

                self.model.train()

                log(
                    f'Epoch: {epoch + 1}/{self.number_of_epochs}. Start time: {str(datetime.datetime.now())}',
                    self.log_file, self.widget)

                for input, real in train_loader:
                    input = input.float()
                    real = real.float()

                    input = input.to(self.device)
                    real = real.to(self.device)

                    self.optimizer.zero_grad()

                    # pad one zero at the beginning
                    input = nn.functional.pad(input, (1, 0, 0, 0))

                    # get prediction
                    predicted = self.model(input)

                    # transpose 2nd and 4th dimension (channel and time)
                    predicted = predicted.transpose(1, 3)

                    # compute loss
                    loss = self.loss_function(predicted, real)

                    # compute gradient
                    loss.backward()

                    # clip gradient
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                                   self.clipping_gradient)

                    # update model
                    self.optimizer.step()

                    # log loss
                    log(
                        f'\tLoss: {float(loss)}; Time: {str(datetime.datetime.now())}',
                        self.log_file, self.widget)
                    losses.append(float(loss))

                self.scheduler.step()

                cross_loss = 0
                count = 0

                self.model.eval()

                for input, real in cross_loader:
                    input = input.float()
                    real = real.float()

                    input = input.to(self.device)
                    real = real.to(self.device)

                    # pad one zero at the beginning
                    input = nn.functional.pad(input, (1, 0, 0, 0))

                    # get prediction
                    predicted = self.model(input)

                    # transpose 2nd and 4th dimension (channel and time)
                    predicted = predicted.transpose(1, 3)

                    # compute loss
                    loss = self.loss_function(real, predicted)

                    cross_loss += float(loss)
                    count += 1

                cross_epoch_loss.append(cross_loss / count)
                log(
                    f'Cross Loss: {cross_epoch_loss[-1]}; Last update: {last_update}; '
                    f'Time: {str(datetime.datetime.now())}', self.log_file,
                    self.widget)

                if cross_epoch_loss[-1] <= min_cross_error:
                    self.best_model[-1] = copy.deepcopy(self.model)
                    last_update = 0
                    min_cross_error = cross_epoch_loss[-1]

                    log('New best model!', self.log_file, self.widget)

                else:
                    last_update += 1

                if last_update >= 10:
                    log("EARLY STOP", self.log_file, self.widget)
                    break

            if last_update < 10:
                self.best_model[split_index] = copy.deepcopy(self.model)

            self.best_model[split_index].eval()

            self.plot_train_loss(losses, cross_epoch_loss, split_index)

            self.best_model[split_index].save_model_to_file(
                os.path.join(self.output_directory,
                             f'model_{split_index}.model'))

            with torch.no_grad():
                self.reconstruct_signal_from_loader(split_index, cross_loader,
                                                    mean, std)

        log(f'End training time: {str(datetime.datetime.now())}',
            self.log_file, self.widget)
    def full_train(self):
        log(f'Train start time: {str(datetime.datetime.now())}', self.log_file,
            self.widget)

        loader_split = self.loader_splits
        train_loader = loader_split[0][0]

        self.create_model()
        self.create_optimizer()

        losses = []
        self.actual_epochs = 0

        for epoch in range(self.number_of_epochs):
            self.actual_epochs += 1

            self.model.train()

            log(
                f'Epoch: {epoch + 1}/{self.number_of_epochs}. Start time: {str(datetime.datetime.now())}',
                self.log_file, self.widget)

            for input, real in train_loader:
                input = input.float()
                real = real.float()

                input = input.to(self.device)
                real = real.to(self.device)

                self.optimizer.zero_grad()

                # pad one zero at the beginning
                input = nn.functional.pad(input, (1, 0, 0, 0))

                # get prediction
                predicted = self.model(input)

                # transpose 2nd and 4th dimension (channel and time)
                predicted = predicted.transpose(1, 3)

                # compute loss
                loss = self.loss_function(predicted, real)

                # compute gradient
                loss.backward()

                # clip gradient
                torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                               self.clipping_gradient)

                # update model
                self.optimizer.step()

                # log loss
                log(
                    f'\tLoss: {float(loss)}; Time: {str(datetime.datetime.now())}',
                    self.log_file, self.widget)
                losses.append(float(loss))

            self.scheduler.step()

        self.best_model = [self.model]
        self.best_model[0].eval()

        self.plot_train_loss(losses, None, 0)

        self.best_model[0].save_model_to_file(
            os.path.join(self.output_directory, f'model.model'))

        log(f'End training time: {str(datetime.datetime.now())}',
            self.log_file, self.widget)
def plot_heatmaps(multiple_runs_path, plot_weight_heatmaps,
                  plot_collapsed_weight_heatmaps,
                  plot_collapsed_weight_heatmaps_aligned, output_path, widget):
    # open multiple runs directory
    data_directory = multiple_runs_path

    # create output directory
    heatmap_output_directory = output_path
    heatmap_output_directory = os.path.join(heatmap_output_directory,
                                            'ModelAnalytics')

    if not os.path.exists(heatmap_output_directory):
        os.makedirs(heatmap_output_directory)

    # find configurations
    division_directories = os.listdir(data_directory)

    # for each configuration
    for directory in division_directories:

        # open configuration
        directory = os.path.join(data_directory, directory)

        # get configuration name
        division_name = directory.split('\\')[-1:][0]

        # create configuration heatmap directory
        division_directory_heatmap = os.path.join(heatmap_output_directory,
                                                  division_name)
        if not os.path.exists(division_directory_heatmap):
            os.makedirs(division_directory_heatmap)

        # compute example lengts
        example_length = get_example_length(DIVISION_LENGTH,
                                            int(division_name.split('_')[-2]),
                                            int(division_name.split('_')[-1]))

        # compute stimulus and response hidden size
        stimulus_hidden_size = get_hidden_size(example_length,
                                               STIMULUS_OUTPUT_SIZE)
        response_hidden_size = get_hidden_size(example_length,
                                               RESPONSE_OUTPUT_SIZE)

        # create a dummy model in which we will load the actual models
        model = DualEnsembleClassifierModel(
            ([example_length, stimulus_hidden_size, STIMULUS_OUTPUT_SIZE
              ], [example_length, response_hidden_size, RESPONSE_OUTPUT_SIZE]),
            NUMBER_OF_CHANNELS)

        # for each individual run
        runs_directories = [
            x[0] for x in os.walk(os.path.join(data_directory, directory))
        ][1:]

        # find the number of individual runs
        number_of_models = len(runs_directories)

        model_tensor_stimulus = None
        model_tensor_response = None
        first_model = True

        # for each run
        for runs_directory in runs_directories:

            # load model
            model.load_model_from_file(
                os.path.join(runs_directory, division_name + '_DUAL.model'))

            first_tensor_response = True
            first_tensor_stimulus = True
            tensor_list_stimulus = None
            tensor_list_response = None

            # for each parameter of our model
            for name, param in model.named_parameters():

                # find input matrix for stimulus
                if name.find('0.0.weight') != -1:
                    if first_tensor_stimulus:
                        tensor_list_stimulus = param.data
                        tensor_list_stimulus = tensor_list_stimulus[None, :, :]
                        first_tensor_stimulus = False
                    else:
                        tensor = param.data
                        tensor = tensor[None, :, :]
                        tensor_list_stimulus = torch.cat(
                            (tensor_list_stimulus, tensor), 0)

                # find input matrix for response
                elif name.find('1.0.weight') != -1:
                    if first_tensor_response:
                        tensor_list_response = param.data
                        tensor_list_response = tensor_list_response[None, :, :]
                        first_tensor_response = False
                    else:
                        tensor = param.data
                        tensor = tensor[None, :, :]
                        tensor_list_response = torch.cat(
                            (tensor_list_response, tensor), 0)

            # if the first model, save the list of tensors (one tensor for each channel)
            if first_model:
                model_tensor_response = tensor_list_response
                model_tensor_stimulus = tensor_list_stimulus
                first_model = False

            # otherwise, add over the previous run
            else:
                model_tensor_response += tensor_list_response
                model_tensor_stimulus += tensor_list_stimulus

            log(f'Finished {runs_directory}', file=None, widget=widget)

        # average
        model_tensor_stimulus = model_tensor_stimulus / number_of_models
        model_tensor_response = model_tensor_response / number_of_models

        # compute std
        std_stimulus = torch.std(model_tensor_stimulus,
                                 unbiased=False).numpy().tolist()
        std_response = torch.std(model_tensor_response,
                                 unbiased=False).numpy().tolist()

        # compute mean
        mean_stimulus = torch.mean(model_tensor_stimulus).numpy().tolist()
        mean_response = torch.mean(model_tensor_response).numpy().tolist()

        # standardize response input
        response_array = model_tensor_response.numpy()
        response_array = response_array - mean_response
        response_array = response_array / std_response

        # standardize stimulus input
        stimulus_array = model_tensor_stimulus.numpy()
        stimulus_array = stimulus_array - mean_stimulus
        stimulus_array = stimulus_array / std_stimulus

        if plot_weight_heatmaps:
            # create a diverging pallete ( 0 - white, extremities - red)
            cmap = sns.diverging_palette(10, 10, as_cmap=True)
            """
    
            HEATMAP RESPONSE
    
            """

            # compute the heatmaps limits
            min_response = response_array[0][0][0]
            for channel in range(NUMBER_OF_CHANNELS):
                min_response = min(min_response, response_array[channel].min())

            max_response = response_array[0][0][0]
            for channel in range(NUMBER_OF_CHANNELS):
                max_response = max(max_response, response_array[channel].max())

            channel = 0

            # create 8 figs and plot response input
            for count in range(8):
                f, axes = plt.subplots(4, 4)

                # plot each channel
                for row in range(4):
                    for col in range(4):
                        sns.heatmap(response_array[channel],
                                    cmap=cmap,
                                    center=0.0,
                                    ax=axes[row][col],
                                    cbar=False,
                                    vmin=min_response,
                                    vmax=max_response)
                        axes[row][col].set_ylabel('')
                        axes[row][col].set_xlabel('')
                        axes[row][col].set_xticks([])
                        axes[row][col].set_yticks([])
                        axes[row][col].set_title(f'{channel}',
                                                 fontdict={'fontsize': 7},
                                                 pad=0)
                        log(f'Plotted channel {channel}',
                            file=None,
                            widget=widget)
                        channel += 1
                f.savefig(
                    os.path.join(
                        division_directory_heatmap,
                        f"{division_name}_response_heatmap_{count}.png"))
                plt.close(f)

            channel = 0
            """
            HEATMAP STIMULUS
            """

            # compute the heatmaps limits
            min_stimulus = stimulus_array[0][0][0]
            for channel in range(NUMBER_OF_CHANNELS):
                min_stimulus = min(min_stimulus, stimulus_array[channel].min())

            max_stimulus = stimulus_array[0][0][0]
            for channel in range(NUMBER_OF_CHANNELS):
                max_stimulus = max(max_stimulus, stimulus_array[channel].max())

            channel = 0

            # create 8 figs and plot stimulus input
            for count in range(8):
                f, axes = plt.subplots(4, 4)

                # plot each channel
                for row in range(4):
                    for col in range(4):
                        sns.heatmap(stimulus_array[channel],
                                    cmap=cmap,
                                    center=0.0,
                                    ax=axes[row][col],
                                    cbar=False,
                                    vmax=max_stimulus,
                                    vmin=min_stimulus)
                        axes[row][col].set_ylabel('')
                        axes[row][col].set_xlabel('')
                        axes[row][col].set_xticks([])
                        axes[row][col].set_yticks([])
                        axes[row][col].set_title(f'{channel}',
                                                 fontdict={'fontsize': 7},
                                                 pad=0)
                        log(f'Plotted channel {channel}',
                            file=None,
                            widget=widget)
                        channel += 1
                f.savefig(
                    os.path.join(
                        division_directory_heatmap,
                        f"{division_name}_stimulus_heatmap_{count}.png"))
                plt.close(f)

            channel = 0

        if plot_collapsed_weight_heatmaps:
            """
            HEATMAPS Y COLLAPSE
            """

            collapsed_folder = os.path.join(division_directory_heatmap,
                                            'CollapsedHeatmaps')
            if not os.path.exists(collapsed_folder):
                os.makedirs(collapsed_folder)

            plot_collapsed_heatmaps(stimulus_array, collapsed_folder,
                                    'stimulus_input',
                                    int(division_name.split('_')[-2]),
                                    int(division_name.split('_')[-1]),
                                    DIVISION_LENGTH, True, True, False)
            plot_collapsed_heatmaps(response_array, collapsed_folder,
                                    'response_input',
                                    int(division_name.split('_')[-2]),
                                    int(division_name.split('_')[-1]),
                                    DIVISION_LENGTH, False, True, False)

        if plot_collapsed_weight_heatmaps_aligned:
            """
            HEATMAPS Y COLLAPSE
            """

            collapsed_folder = os.path.join(division_directory_heatmap,
                                            'CollapsedHeatmapsAligned')
            if not os.path.exists(collapsed_folder):
                os.makedirs(collapsed_folder)

            plot_collapsed_heatmaps(stimulus_array, collapsed_folder,
                                    'stimulus_input',
                                    int(division_name.split('_')[-2]),
                                    int(division_name.split('_')[-1]),
                                    DIVISION_LENGTH, True, True, True)
            plot_collapsed_heatmaps(response_array, collapsed_folder,
                                    'response_input',
                                    int(division_name.split('_')[-2]),
                                    int(division_name.split('_')[-1]),
                                    DIVISION_LENGTH, False, True, True)
 def run(self):
     log("Starting " + self.name, None, self.widget)
     self.thread_function()
     log("Exiting " + self.name, None, self.widget)
def graph_regions_plot_window_difference(matrices_directory, output_directory, trial_index,
                                         widget = None, normalize = True, should_filter = True):
    # create output directory
    output_directory = os.path.join(output_directory, 'GraphWavenetAdjacency')
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    output_directory = os.path.join(output_directory, 'Window')
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    output_directory = os.path.join(output_directory, f'{trial_index}')
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    output_directory = os.path.join(output_directory, 'Differences')
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    log("Started graph regions plot window difference: " + str(datetime.datetime.now()), file = None, widget = widget)

    # find input matrix
    matrices_directory = os.path.join(matrices_directory, 'Window')
    matrices_directory = os.path.join(matrices_directory, f'{trial_index}')

    node_sizes_list = []
    node_edges_list = []

    folder_list = [int(x) for x in next(os.walk(matrices_directory))[1]]
    folder_list = sorted(folder_list)

    for window_index in folder_list:
        node_size, node_edges = aggregate_channels(matrices_directory, trial_index, window_index, False, normalize,
                                                   should_filter)

        node_sizes_list.append(node_size)
        node_edges_list.append(node_edges)

    node_sizes_list = [np.array(x) for x in node_sizes_list]
    node_edges_list = [np.array(x) for x in node_edges_list]

    node_sizes_differences = []
    node_edges_differences = []

    node_similarity = []
    edge_similarity = []

    for index in range(1, len(node_sizes_list)):
        node_size = node_sizes_list[index] - node_sizes_list[index - 1]
        node_edges = node_edges_list[index] - node_edges_list[index - 1]

        node_sizes_differences.append(node_size)
        node_edges_differences.append(node_edges)

        node_similarity.append(1 - abs(node_size).sum() / node_size.size)
        edge_similarity.append(1 - abs(node_edges).sum() / node_edges.size)

    similarity_file = os.path.join(output_directory, "similarity.txt")

    if os.path.exists(similarity_file):
        os.remove(similarity_file)

    for index in range(len(node_similarity)):
        log(f'Window {index + 1}-{index}', file = similarity_file, widget = None)
        log(f'\tNode similarity: {node_similarity[index]}', file = similarity_file, widget = None)
        log(f'\tEdge similarity: {edge_similarity[index]}', file = similarity_file, widget = None)

    maximum = max(
        max([x.max() for x in node_sizes_differences]),
        max([x.max() for x in node_edges_differences])
    )
    minimum = min(
        min([x.min() for x in node_sizes_differences]),
        min([x.min() for x in node_edges_differences])
    )
    maximum = max(maximum, abs(minimum))
    minimum = -maximum

    for index in range(len(node_sizes_differences)):
        title = f'{trial_index} {index + 1}-{index}'

        plot_graph(node_edges_differences[index], node_sizes_differences[index], title, output_directory, plt.cm.bwr,
                   minimum, maximum)

    log("Finished graph regions plot window difference: " + str(datetime.datetime.now()), file = None, widget = widget)
def dynamic_time_warping(metrics, output_path, stimulus_pairs,
                         trial_dictionary):
    output_path = os.path.join(output_path, "DynamicTimeWarping")
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    for metric in metrics:

        metric_dir = os.path.join(output_path, metric)
        if not os.path.exists(metric_dir):
            os.makedirs(metric_dir)

        for first_stimulus, second_stimulus in stimulus_pairs:
            first_values = []
            second_values = []

            first_text = []
            second_text = []

            for trial in TRIALS_FOR_STIMULUS[first_stimulus][1:]:
                for window in trial_dictionary[trial]:
                    first_values.append(
                        float(trial_dictionary[trial][window][metric]))
                    first_text.append(f'{trial}_{window}')

            for trial in TRIALS_FOR_STIMULUS[second_stimulus][1:]:
                for window in trial_dictionary[trial]:
                    second_values.append(
                        float(trial_dictionary[trial][window][metric]))
                    second_text.append(f'{trial}_{window}')

            if len(first_values) <= len(second_values):
                query = first_values
                template = second_values

                query_text = first_text
                template_text = second_text

                title = first_stimulus.split(
                    ' ')[0] + "+" + second_stimulus.split(' ')[0]
            else:
                query = second_values
                template = first_values

                query_text = second_text
                template_text = first_text

                title = second_stimulus.split(
                    ' ')[0] + "+" + first_stimulus.split(' ')[0]

            query = np.array(query)
            template = np.array(template)

            query_normalized = (query - query.min()) / (query.max() -
                                                        query.min())
            template_normalized = (template - template.min()) / (
                template.max() - template.min())

            _, paths = dtw.warping_paths(query_normalized,
                                         template_normalized,
                                         window=10,
                                         psi=0)
            best_path = dtw.best_path(paths)

            metric_file = os.path.join(metric_dir, f'{title}.txt')

            log(f'Similarity: {1 - paths[best_path[-1][0] + 1][best_path[-1][1] + 1] / len(best_path)}',
                file=metric_file)

            for pair in best_path:
                log(f'\tPair: {pair}. Match: {query_text[pair[0]]} {template_text[pair[1]]}',
                    file=metric_file)

            fig, axes = dtwvis.plot_warpingpaths(query, template, paths,
                                                 best_path)
            axes[0].texts[0].set_visible(False)
            axes[0].text(
                0, 0, "Similarity = {:.4f}".format(
                    1 - paths[best_path[-1][0] + 1][best_path[-1][1] + 1] /
                    len(best_path)))

            plt.savefig(os.path.join(metric_dir, f'{title}.png'))
            plt.close()
    def fit(self, viz, viz_name, train_loader, cross_loader, html_file, number_epochs, learning_rate, widget):
        """
        Fits the model based on the train dataset and number of epochs.
        """

        # define cost
        first_criterion = nn.CrossEntropyLoss()

        # weighted classes for response
        weights = [1.0, 2.5, 1.5]
        weights_class = torch.FloatTensor(weights)
        second_criterion = nn.CrossEntropyLoss(weight = weights_class)

        # define optimizer
        optimizer = torch.optim.AdamW(self.parameters(), lr = learning_rate)

        epochs_number = [x for x in range(1, number_epochs + 1)]
        el_array = []
        cv_array = []
        first_array = []
        second_array = []
        first_cross_array = []
        second_cross_array = []

        # for each pass through the examples
        for epoch in range(number_epochs):
            epoch_loss = 0
            first_loss_epoch = 0
            second_loss_epoch = 0
            count = 0

            log(f'Epoch {epoch + 1}/{number_epochs}', file = None, widget = widget)

            # switch to train mode (Dropout used)
            self.train()

            # adjust the model one batch at a time
            for batch in train_loader:

                first_agg_output = None
                second_agg_output = None

                # for each channel
                for channel in range(self.number_of_channels):

                    # set the tensors to require grad
                    batch[channel * 2].requires_grad = True
                    batch[channel * 2 + 1].requires_grad = True

                    # compute output
                    first_output, second_output = self(batch[channel * 2].float(), batch[channel * 2 + 1].float(),
                                                       channel)

                    # "vote" = ensemble
                    if channel == 0:
                        first_agg_output = first_output
                        second_agg_output = second_output
                    else:
                        first_agg_output += first_output
                        second_agg_output += second_output

                # reset the gradients
                optimizer.zero_grad()

                # compute loss
                first_loss = 1 + first_criterion(first_agg_output, batch[-3].long())
                second_loss = 1 + second_criterion(second_agg_output, batch[-2].long())

                loss = self.dual_loss_aggregation(first_loss, second_loss)

                # backward propagate through the network
                loss.backward()

                # update weights
                optimizer.step()

                # compute epoch loss
                epoch_loss += loss.item()
                first_loss_epoch += first_loss.item()
                second_loss_epoch += second_loss.item()
                count += 1.0

            # compute cross validation loss

            # set to eval mode (Dropout not used)
            self.eval()
            cv_loss = 0
            first_cross_loss = 0
            second_cross_loss = 0
            count = 0

            # for each batch
            for batch in cross_loader:
                first_agg_output = None
                second_agg_output = None

                # for each channel
                for channel in range(self.number_of_channels):

                    # set the tensors to require grad
                    batch[channel * 2].requires_grad = True
                    batch[channel * 2 + 1].requires_grad = True

                    # compute output
                    first_output, second_output = self(batch[channel * 2].float(), batch[channel * 2 + 1].float(),
                                                       channel)

                    # "vote" = ensemble
                    if channel == 0:
                        first_agg_output = first_output
                        second_agg_output = second_output
                    else:
                        first_agg_output += first_output
                        second_agg_output += second_output

                # compute loss
                first_loss = 1 + first_criterion(first_agg_output, batch[-3].long())
                second_loss = 1 + second_criterion(second_agg_output, batch[-2].long())

                loss = self.dual_loss_aggregation(first_loss, second_loss)

                first_cross_loss += first_loss.item()
                second_cross_loss += second_loss.item()

                cv_loss += loss.item()

                count += 1

            # append losses
            el_array.append(epoch_loss / count)
            first_array.append(first_loss_epoch / count)
            second_array.append(second_loss_epoch / count)
            cv_array.append(cv_loss / count)
            first_cross_array.append(first_cross_loss / count)
            second_cross_array.append(second_cross_loss / count)

            # print to VISDOM if available
            if viz:
                self.plot_to_vizdom(count, cv_loss, epoch, epoch_loss, first_cross_array, first_loss_epoch,
                                    second_cross_array, second_loss_epoch, viz, viz_name)

        # plot losses
        self.plot_elt_and_elcv(epochs_number, el_array, cv_array, first_array, second_array, first_cross_array,
                               second_cross_array, html_file)
Example #25
0
def trial_window_configuration(dots_folder_path, output_directory, window_size, window_offset, threshold, widget):
    output_directory = os.path.join(output_directory, 'TrialWindowConfiguration')
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    window_output_directory = os.path.join(output_directory, 'Window')
    if not os.path.exists(window_output_directory):
        os.makedirs(window_output_directory)

    trial_output_directory = os.path.join(output_directory, 'Trial')
    if not os.path.exists(trial_output_directory):
        os.makedirs(trial_output_directory)

    log_file = os.path.join(output_directory, "log.txt")

    log("Started creating files for split configuration for each subject and trial: " + str(datetime.datetime.now()),
        log_file, widget)
    number_of_subjects = 0
    for _, dirnames, filenames in os.walk(dots_folder_path):
        number_of_subjects += len(dirnames)

    for subject_number in range(1, number_of_subjects + 1):
        subject_directory = os.path.join(dots_folder_path, SUBJECT_FILE_PREFIX + get_string_from_number(subject_number))

        # construct event timestamp file name for the current subject
        event_timestamp_file_path = SUBJECT_FILE_PREFIX + get_string_from_number(subject_number) + \
                                    SUBJECT_FILE_EVENT_TIMESTAMPS + SUBJECT_FILE_EXTENSION

        # construct full path for the current student's event timestamp file
        event_timestamp_file_path = os.path.join(subject_directory, event_timestamp_file_path)

        # read the timestamps for the current subject
        timestamps = read_values_from_binary_file_one_by_one(event_timestamp_file_path, 'i', 4)

        # construct event code file name for the current subject
        event_codes_file_path = SUBJECT_FILE_PREFIX + get_string_from_number(subject_number) + \
                                SUBJECT_FILE_EVENT_CODES + SUBJECT_FILE_EXTENSION

        # construct full path for the current subject's event codes file
        event_codes_file_path = os.path.join(subject_directory, event_codes_file_path)

        # read the event codes for the current subject
        event_codes = read_values_from_binary_file_one_by_one(event_codes_file_path, 'i', 4)

        # create a list of tuples where we attach to each event code its corresponding timestamp
        # structure: [...,(timestamp, event), ...]
        event_code_timestamps = list(zip(timestamps, event_codes))

        # filter out the events we don't need
        event_code_timestamps = list(filter(lambda event_code_timestamp: event_code_timestamp[1] in EVENT_CODES_FILTER,
                                            event_code_timestamps))

        for trial_number in range(NUMBER_OF_TRIALS):
            trial_start_timestamp = event_code_timestamps[2 * trial_number][0]
            trial_end_timestamp = event_code_timestamps[2 * trial_number + 1][0]

            trial_length = trial_end_timestamp - trial_start_timestamp + 1

            window_file = open(os.path.join(window_output_directory, f'{subject_number}_{trial_number + 1}.txt'), 'w+')
            trial_file = open(os.path.join(trial_output_directory, f'{subject_number}_{trial_number + 1}.txt'), 'w+')

            if trial_length <= threshold:
                split_trial(window_file, trial_start_timestamp, trial_end_timestamp, window_size, window_offset)
                print(f'{trial_start_timestamp} {trial_end_timestamp}', file = trial_file)
            else:
                split_trial(window_file, trial_start_timestamp, trial_start_timestamp + threshold // 2 - 1, window_size,
                            window_offset)
                split_trial(window_file, trial_end_timestamp - threshold // 2 + 1, trial_end_timestamp, window_size,
                            window_offset)

                print(f'{trial_start_timestamp} {trial_start_timestamp + threshold // 2 - 1}', file = trial_file)
                print(f'{trial_end_timestamp - threshold // 2 + 1} {trial_end_timestamp}', file = trial_file)

            window_file.close()
            trial_file.close()
    log("Finished creating files for split configuration for each subject and trial: " + str(datetime.datetime.now()),
        log_file, widget)
    def predict(self, test_loader, first_file_csv, first_file_txt, second_file_csv, second_file_txt, channel_file,
                first_class_names, second_class_names, number_of_subjects):

        """
        Computes the confusion matrix for the test dataset
        """

        # the two needed arrays for computation
        first_actual_output = []
        first_expected_output = []

        second_actual_output = []
        second_expected_output = []

        # set to eval mode (Dropout not used)
        self.eval()

        # for each batch
        for batch in test_loader:
            first_agg_output = None
            second_agg_output = None

            # for each channel
            for channel in range(self.number_of_channels):

                # set the tensors to require grad
                batch[channel * 2].requires_grad = True
                batch[channel * 2 + 1].requires_grad = True

                # compute output
                first_output, second_output = self(batch[channel * 2].float(), batch[channel * 2 + 1].float(), channel)

                # "vote" = ensemble
                if channel == 0:
                    first_agg_output = first_output
                    second_agg_output = second_output
                else:
                    first_agg_output += first_output
                    second_agg_output += second_output

            # Get predictions from the maximum value
            _, first_predicted = torch.max(first_agg_output.data, 1)
            _, second_predicted = torch.max(second_agg_output.data, 1)

            first_predicted = first_predicted.tolist()
            second_predicted = second_predicted.tolist()
            first_labels = batch[-3].tolist()
            second_labels = batch[-2].tolist()

            # extend the arrays with the predicted values and corresponding labels
            first_actual_output.extend(first_predicted)
            first_expected_output.extend(first_labels)
            second_actual_output.extend(second_predicted)
            second_expected_output.extend(second_labels)

        # print the classification reports
        first_report = classification_report(y_true = np.array(first_expected_output),
                                             y_pred = np.array(first_actual_output),
                                             target_names = first_class_names, output_dict = True)
        first_df = pandas.DataFrame(first_report).transpose()
        first_df.to_csv(first_file_csv, index = False)

        second_report = classification_report(y_true = np.array(second_expected_output),
                                              y_pred = np.array(second_actual_output),
                                              target_names = second_class_names, output_dict = True)

        second_df = pandas.DataFrame(second_report).transpose()
        second_df.to_csv(second_file_csv, index = False)

        log(
            classification_report(y_true = np.array(first_expected_output),
                                  y_pred = np.array(first_actual_output),
                                  target_names = first_class_names)
            , file = first_file_txt,
            widget = None
        )

        log(
            classification_report(y_true = np.array(second_expected_output),
                                  y_pred = np.array(second_actual_output),
                                  target_names = second_class_names)
            , file = second_file_txt,
            widget = None
        )
def plot_distribution_for_multiple_runs(multiple_runs_directory,
                                        output_directory, widget):
    # open multiple runs path
    division_directories = os.listdir(multiple_runs_directory)

    # create log file
    log_file = os.path.join(output_directory,
                            'Multiple_runs_statistics.txt.txt')

    multiple_runs_dict = {}

    # for each configuration
    for directory in division_directories:
        directory = os.path.join(multiple_runs_directory, directory)
        division_name = directory.split('\\')[-1:][0]
        multiple_runs_dict[division_name] = {}
        multiple_runs_dict[division_name]['avg_response_list'] = []
        multiple_runs_dict[division_name]['avg_stimulus_list'] = []

        # for each run
        runs_directories = [
            x[0]
            for x in os.walk(os.path.join(multiple_runs_directory, directory))
        ][1:]
        for runs_directory in runs_directories:
            response_csv_file = os.path.join(
                runs_directory, division_name + '_DUAL_RESPONSE.csv')
            stimulus_csv_file = os.path.join(
                runs_directory, division_name + '_DUAL_STIMULUS.csv')

            # open csv
            response_df = pd.read_csv(response_csv_file)

            # keep only the f1-score
            response_df = response_df['f1-score']

            # drop the last 3 rows because they are no use for us
            response_df = response_df.drop([3, 4, 5], axis=0)

            # add response average
            multiple_runs_dict[division_name]['avg_response_list'].append(
                (response_df[0] + response_df[1] + response_df[2]) / 3)

            # open csv
            stimulus_df = pd.read_csv(stimulus_csv_file)

            # keep only the f1-score
            stimulus_df = stimulus_df['f1-score']

            # find number of rows
            number_of_rows = stimulus_df.shape[0]

            # drop the last 3 rows because they are no use for us
            stimulus_df = stimulus_df.drop(
                [number_of_rows - 1, number_of_rows - 2, number_of_rows - 3],
                axis=0)

            # number of classes
            number_of_rows = number_of_rows - 3

            average = 0

            # compute average of f1- score amongst divisions
            for row in stimulus_df:
                average += row

            average /= number_of_rows

            # add stimulus average
            multiple_runs_dict[division_name]['avg_stimulus_list'].append(
                average)

        # compute response mean for current configuration
        mean_response = sum(
            multiple_runs_dict[division_name]['avg_response_list']) / len(
                multiple_runs_dict[division_name]['avg_response_list'])

        # compute response std for current configuration
        std_response = sqrt(
            sum(
                list(
                    map(lambda x: (x - mean_response)**2,
                        multiple_runs_dict[division_name]
                        ['avg_response_list']))) /
            len(multiple_runs_dict[division_name]['avg_response_list']))

        # compute stimulus mean for current configuration
        mean_stimulus = sum(
            multiple_runs_dict[division_name]['avg_stimulus_list']) / len(
                multiple_runs_dict[division_name]['avg_stimulus_list'])

        # compute stimulus std for current configuration
        std_stimulus = sqrt(
            sum(
                list(
                    map(lambda x: (x - mean_stimulus)**2,
                        multiple_runs_dict[division_name]
                        ['avg_stimulus_list']))) /
            len(multiple_runs_dict[division_name]['avg_stimulus_list']))

        # save parameters
        multiple_runs_dict[division_name]['mean_response'] = mean_response
        multiple_runs_dict[division_name]['std_response'] = std_response

        multiple_runs_dict[division_name]['mean_stimulus'] = mean_stimulus
        multiple_runs_dict[division_name]['std_stimulus'] = std_stimulus

    # generate distributions to be plotted
    response_distributions = []
    stimulus_distributions = []
    for key in multiple_runs_dict.keys():
        response_distributions.append(
            generate_distribution(multiple_runs_dict[key]['std_response'],
                                  multiple_runs_dict[key]['mean_response']))
        stimulus_distributions.append(
            generate_distribution(multiple_runs_dict[key]['std_stimulus'],
                                  multiple_runs_dict[key]['mean_stimulus']))

    # create a figure
    fig = make_subplots(rows=2,
                        cols=1,
                        subplot_titles=('Stimulus', 'Response'))

    # create distribution plots for response
    distribution_plot = ff.create_distplot(stimulus_distributions,
                                           list(multiple_runs_dict.keys()),
                                           show_hist=False)

    # for each configuration (for STIMULUS)
    count = 0
    for name in list(multiple_runs_dict.keys()):
        # plot distribution
        fig.add_trace(go.Scatter(
            distribution_plot['data'][count],
            name=name.split('_')[-2] + '_' + name.split('_')[-1],
            line=dict(color=COLOR_LIST_DISTRIBUTION_PLOTS[count])),
                      row=1,
                      col=1)

        # plot distribution mean
        fig.add_trace(go.Scatter(
            x=[
                multiple_runs_dict[name]['mean_stimulus'],
                multiple_runs_dict[name]['mean_stimulus']
            ],
            y=[0, max(distribution_plot['data'][count].y)],
            mode='lines+markers',
            name='Mean ' + name.split('_')[-2] + '_' + name.split('_')[-1],
            line=dict(color=COLOR_LIST_DISTRIBUTION_PLOTS[count])),
                      row=1,
                      col=1)

        # plot distribution std
        fig.add_trace(go.Scatter(
            x=[
                multiple_runs_dict[name]['mean_stimulus'] -
                multiple_runs_dict[name]['std_stimulus'],
                multiple_runs_dict[name]['mean_stimulus'] +
                multiple_runs_dict[name]['std_stimulus']
            ],
            y=[count * 0.5, count * 0.5],
            mode='lines+markers',
            name='Std ' + name.split('_')[-2] + '_' + name.split('_')[-1],
            line=dict(color=COLOR_LIST_DISTRIBUTION_PLOTS[count])),
                      row=1,
                      col=1)
        count += 1

    # create distribution plots for response
    distribution_plot = ff.create_distplot(response_distributions,
                                           list(multiple_runs_dict.keys()),
                                           show_hist=False)

    # for each configuration (for STIMULUS)
    count = 0
    for name in list(multiple_runs_dict.keys()):
        # plot distribution
        fig.add_trace(go.Scatter(
            distribution_plot['data'][count],
            name=name.split('_')[-2] + '_' + name.split('_')[-1],
            line=dict(color=COLOR_LIST_DISTRIBUTION_PLOTS[count])),
                      row=2,
                      col=1)

        # plot distribution mean
        fig.add_trace(go.Scatter(
            x=[
                multiple_runs_dict[name]['mean_response'],
                multiple_runs_dict[name]['mean_response']
            ],
            y=[0, max(distribution_plot['data'][count].y)],
            mode='lines+markers',
            name='Mean ' + name.split('_')[-2] + '_' + name.split('_')[-1],
            line=dict(color=COLOR_LIST_DISTRIBUTION_PLOTS[count])),
                      row=2,
                      col=1)

        # plot distribution std
        fig.add_trace(go.Scatter(
            x=[
                multiple_runs_dict[name]['mean_response'] -
                multiple_runs_dict[name]['std_response'],
                multiple_runs_dict[name]['mean_response'] +
                multiple_runs_dict[name]['std_response']
            ],
            y=[count * 0.5, count * 0.5],
            mode='lines+markers',
            name='Std ' + name.split('_')[-2] + '_' + name.split('_')[-1],
            line=dict(color=COLOR_LIST_DISTRIBUTION_PLOTS[count])),
                      row=2,
                      col=1)
        count += 1

    # Add figure title
    fig.update_layout(title_text="Performance distribution plots")

    # save figure
    plotly.offline.plot(fig,
                        filename=os.path.join(output_directory,
                                              'Distribution_plots.html'),
                        auto_open=False)

    # log distribution parameters
    for name in list(multiple_runs_dict.keys()):
        log(f'{name} :', log_file, widget)
        log(f'- Mean response: {multiple_runs_dict[name]["mean_response"]}',
            log_file, widget)
        log(f'- Std response: {multiple_runs_dict[name]["std_response"]}',
            log_file, widget)
        log(f'- Mean stimulus: {multiple_runs_dict[name]["mean_stimulus"]}',
            log_file, widget)
        log(f'- Std stimulus: {multiple_runs_dict[name]["std_stimulus"]}',
            log_file, widget)
Example #28
0
import SocketServer
import sys

import spotipy
from util import util

#constants
HOST = "localhost"
PORT = 8000

#parse arguments
if len(sys.argv) == 2:
    username = sys.argv[1]
else:
    util.log("Usage: python %s username" % sys.argv[0])
    sys.exit()

#determine necessary scope and authorize
scope = util.gatherScope()
sp = util.promptAuth(username, scope)


#define request handler
class Handler(SocketServer.BaseRequestHandler):
    def handle(self):
        currentTrack = sp.current_user_playing_track()
        currentTrackURI = util.propertyToString(currentTrack["item"]["uri"])
        currentTrackProgressMS = util.propertyToString(
            currentTrack["progress_ms"])

        util.log("Request made, responding with: %s|%s" %
def model_classification_statistics(model_path, trial_lengths_path,
                                    output_path, median_value,
                                    generate_from_train, generate_from_cross,
                                    generate_from_test, widget):
    log("Started classification statistics: " + str(datetime.datetime.now()),
        file=None,
        widget=widget)

    model_directory = model_path
    trial_lengths_directory = trial_lengths_path
    output_directory = output_path

    output_directory = os.path.join(output_directory,
                                    "ClassificationStatistics")
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    window_size = int(model_directory.split('_')[-2])
    window_offset = int(model_directory.split('_')[-1])

    example_length = get_example_length(DIVISION_LENGTH, window_size,
                                        window_offset)
    stimulus_hidden_size = get_hidden_size(example_length,
                                           STIMULUS_OUTPUT_SIZE)
    response_hidden_size = get_hidden_size(example_length,
                                           RESPONSE_OUTPUT_SIZE)

    model = DualEnsembleClassifierModel(
        ([example_length, stimulus_hidden_size, STIMULUS_OUTPUT_SIZE
          ], [example_length, response_hidden_size, RESPONSE_OUTPUT_SIZE]),
        NUMBER_OF_CHANNELS)

    model.load_model_from_file(
        os.path.join(
            model_directory,
            f"Training_with_{window_size}_{window_offset}_DUAL.model"))

    response_labels_train = np.fromfile(os.path.join(
        model_directory, "response_labels_train.dat"),
                                        dtype=int)
    response_labels_cross = np.fromfile(os.path.join(
        model_directory, "response_labels_cross.dat"),
                                        dtype=int)
    response_labels_test = np.fromfile(os.path.join(
        model_directory, "response_labels_test.dat"),
                                       dtype=int)

    stimulus_labels_train = np.fromfile(os.path.join(
        model_directory, "stimulus_labels_train.dat"),
                                        dtype=int)
    stimulus_labels_cross = np.fromfile(os.path.join(
        model_directory, "stimulus_labels_cross.dat"),
                                        dtype=int)
    stimulus_labels_test = np.fromfile(os.path.join(
        model_directory, "stimulus_labels_test.dat"),
                                       dtype=int)

    subjects_train = np.fromfile(os.path.join(model_directory,
                                              "subjects_train.dat"),
                                 dtype=int)
    subjects_cross = np.fromfile(os.path.join(model_directory,
                                              "subjects_cross.dat"),
                                 dtype=int)
    subjects_test = np.fromfile(os.path.join(model_directory,
                                             "subjects_test.dat"),
                                dtype=int)

    trial_index_train = np.fromfile(os.path.join(model_directory,
                                                 "trial_index_train.dat"),
                                    dtype=int)
    trial_index_cross = np.fromfile(os.path.join(model_directory,
                                                 "trial_index_cross.dat"),
                                    dtype=int)
    trial_index_test = np.fromfile(os.path.join(model_directory,
                                                "trial_index_test.dat"),
                                   dtype=int)

    files_list = []
    for (dirpath, dirnames, filenames) in os.walk(model_directory):
        files_list.extend(filenames)

    stimulus_train_examples = None
    stimulus_cross_examples = None
    stimulus_test_examples = None

    response_train_examples = None
    response_cross_examples = None
    response_test_examples = None
    for file in files_list:
        if "channel_stimulus_train" in file:
            stimulus_train_examples = int(file.split('_')[-2])

        if "channel_stimulus_cross" in file:
            stimulus_cross_examples = int(file.split('_')[-2])

        if "channel_stimulus_test" in file:
            stimulus_test_examples = int(file.split('_')[-2])

        if "channel_response_train" in file:
            response_train_examples = int(file.split('_')[-2])

        if "channel_response_cross" in file:
            response_cross_examples = int(file.split('_')[-2])

        if "channel_response_test" in file:
            response_test_examples = int(file.split('_')[-2])

    channel_stimulus_train = np.fromfile(
        os.path.join(
            model_directory,
            f"channel_stimulus_train_{NUMBER_OF_CHANNELS}_{stimulus_train_examples}_{example_length}.dat"
        ))
    channel_stimulus_cross = np.fromfile(
        os.path.join(
            model_directory,
            f"channel_stimulus_cross_{NUMBER_OF_CHANNELS}_{stimulus_cross_examples}_{example_length}.dat"
        ))
    channel_stimulus_test = np.fromfile(
        os.path.join(
            model_directory,
            f"channel_stimulus_test_{NUMBER_OF_CHANNELS}_{stimulus_test_examples}_{example_length}.dat"
        ))

    channel_response_train = np.fromfile(
        os.path.join(
            model_directory,
            f"channel_response_train_{NUMBER_OF_CHANNELS}_{response_train_examples}_{example_length}.dat"
        ))
    channel_response_cross = np.fromfile(
        os.path.join(
            model_directory,
            f"channel_response_cross_{NUMBER_OF_CHANNELS}_{response_cross_examples}_{example_length}.dat"
        ))
    channel_response_test = np.fromfile(
        os.path.join(
            model_directory,
            f"channel_response_cross_{NUMBER_OF_CHANNELS}_{response_test_examples}_{example_length}.dat"
        ))

    channel_stimulus_train = np.reshape(
        channel_stimulus_train,
        (NUMBER_OF_CHANNELS, stimulus_train_examples, example_length))
    channel_stimulus_cross = np.reshape(
        channel_stimulus_cross,
        (NUMBER_OF_CHANNELS, stimulus_cross_examples, example_length))
    channel_stimulus_test = np.reshape(
        channel_stimulus_test,
        (NUMBER_OF_CHANNELS, stimulus_test_examples, example_length))

    channel_response_train = np.reshape(
        channel_response_train,
        (NUMBER_OF_CHANNELS, response_train_examples, example_length))
    channel_response_cross = np.reshape(
        channel_response_cross,
        (NUMBER_OF_CHANNELS, response_cross_examples, example_length))
    channel_response_test = np.reshape(
        channel_response_test,
        (NUMBER_OF_CHANNELS, response_test_examples, example_length))

    trial_lengths = np.fromfile(os.path.join(trial_lengths_directory,
                                             "trial_lengths.dat"),
                                dtype=int)
    trial_lengths = np.reshape(trial_lengths, (11, 180))

    # dual dataset creation
    dual_dataset_train = DatasetForClassificationStatistics(
        channel_stimulus_train, channel_response_train, stimulus_labels_train,
        response_labels_train, subjects_train, trial_index_train)
    dual_dataset_train_loader = DataLoader(dual_dataset_train,
                                           batch_size=1,
                                           shuffle=True)

    dual_dataset_cross = DatasetForClassificationStatistics(
        channel_stimulus_cross, channel_response_cross, stimulus_labels_cross,
        response_labels_cross, subjects_cross, trial_index_cross)
    dual_dataset_cross_loader = DataLoader(dual_dataset_cross,
                                           batch_size=1,
                                           shuffle=True)

    dual_dataset_test = DatasetForClassificationStatistics(
        channel_stimulus_test, channel_response_test, stimulus_labels_test,
        response_labels_test, subjects_test, trial_index_test)
    dual_dataset_test_loader = DataLoader(dual_dataset_test,
                                          batch_size=1,
                                          shuffle=True)

    stimulus_classified = []
    stimulus_misclassified = []
    response_classified = []
    response_misclassified = []

    if generate_from_train:
        train_output_directory = os.path.join(output_directory, 'Train')
        if not os.path.exists(train_output_directory):
            os.makedirs(train_output_directory)
        stimulus_classified_train, stimulus_misclasified_train, response_classified_train, response_misclasified_train = model.predict_for_classification_statistics(
            dual_dataset_train_loader, trial_lengths, STIMULUS_OUTPUT_SIZE,
            RESPONSE_OUTPUT_SIZE, train_output_directory, "train",
            median_value)
        stimulus_classified.extend(stimulus_classified_train)
        stimulus_misclassified.extend(stimulus_misclasified_train)
        response_classified.extend(response_classified_train)
        response_misclassified.extend(response_misclasified_train)

    if generate_from_cross:
        cross_output_directory = os.path.join(output_directory, 'Cross')
        if not os.path.exists(cross_output_directory):
            os.makedirs(cross_output_directory)
        stimulus_classified_cross, stimulus_misclasified_cross, response_classified_cross, response_misclasified_cross = model.predict_for_classification_statistics(
            dual_dataset_cross_loader, trial_lengths, STIMULUS_OUTPUT_SIZE,
            RESPONSE_OUTPUT_SIZE, cross_output_directory, "cross",
            median_value)
        stimulus_classified.extend(stimulus_classified_cross)
        stimulus_misclassified.extend(stimulus_misclasified_cross)
        response_classified.extend(response_classified_cross)
        response_misclassified.extend(response_misclasified_cross)

    if generate_from_test:
        test_output_directory = os.path.join(output_directory, 'Test')
        if not os.path.exists(test_output_directory):
            os.makedirs(test_output_directory)
        stimulus_classified_test, stimulus_misclasified_test, response_classified_test, response_misclasified_test = model.predict_for_classification_statistics(
            dual_dataset_test_loader, trial_lengths, STIMULUS_OUTPUT_SIZE,
            RESPONSE_OUTPUT_SIZE, test_output_directory, "test", median_value)
        stimulus_classified.extend(stimulus_classified_test)
        stimulus_misclassified.extend(stimulus_misclasified_test)
        response_classified.extend(response_classified_test)
        response_misclassified.extend(response_misclasified_test)

    aggregated_output_directory = os.path.join(output_directory, 'Aggregated')
    if not os.path.exists(aggregated_output_directory):
        os.makedirs(aggregated_output_directory)
    plot_histogram(stimulus_classified, stimulus_misclassified,
                   'Stimulus Correctly Classified',
                   'Stimulus Incorrectly Classified',
                   aggregated_output_directory, f"stimulus_classified",
                   median_value)
    plot_histogram(response_classified, response_misclassified,
                   'Response Correctly Classified',
                   'Response Incorrectly Classified',
                   aggregated_output_directory, f"response_classified",
                   median_value)

    log("Finished classification statistics: " + str(datetime.datetime.now()),
        file=None,
        widget=widget)
def graph_metrics(matrices_directory, output_directory, trial_index, is_trial = False,
                  widget = None, histogram = False, percentage = 0.05):
    # create output directory
    output_directory = os.path.join(output_directory, 'GraphWavenetAdjacency')
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    if is_trial:
        output_directory = os.path.join(output_directory, 'Trial')
    else:
        output_directory = os.path.join(output_directory, 'Window')
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    output_directory = os.path.join(output_directory, f'{trial_index}')
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    # find input matrix
    if is_trial:
        matrices_directory = os.path.join(matrices_directory, 'Trial')
    else:
        matrices_directory = os.path.join(matrices_directory, 'Window')

    matrices_directory = os.path.join(matrices_directory, f'{trial_index}')

    log_file = os.path.join(output_directory, 'log.txt')

    log("Started graph analysis: " + str(datetime.datetime.now()), file = log_file, widget = widget)

    properties_dict_file_path = os.path.join(output_directory, 'property_dict.json')
    if os.path.exists(properties_dict_file_path):
        properties_dict = load_dictionary_from_file(properties_dict_file_path)
        properties_dict = { int(k): v for k, v in properties_dict.items() }
        os.remove(properties_dict_file_path)
        init_properties_dict = False
    else:
        properties_dict = { }
        init_properties_dict = True

    input_matrix = []
    if not is_trial:
        folder_list = [int(x) for x in next(os.walk(matrices_directory))[1]]
        folder_list = sorted(folder_list)
        for folder in folder_list:
            matrix_directory = os.path.join(matrices_directory, f'{folder}')
            input_matrix.append(read_and_normalize_matrix(matrix_directory))
            if init_properties_dict:
                properties_dict[folder] = { }
    else:
        input_matrix.append(read_and_normalize_matrix(matrices_directory))

    if histogram:
        compute_histogram(input_matrix, widget, output_directory, is_trial)

    inversed_filtered_graphs = []
    filtered_graphs = []
    inversed_unfiltered_graphs = []
    adjacency_matrices = []
    for matrix in input_matrix:
        inversed_unfiltered_graphs.append(
            nx.DiGraph()
        )
        inversed_filtered_graphs.append(
            nx.DiGraph()
        )
        filtered_graphs.append(
            nx.DiGraph()
        )
        values = sorted(matrix, reverse = True)
        threshold = values[int(percentage * len(values))]
        matrix = matrix.reshape(NUMBER_OF_CHANNELS, NUMBER_OF_CHANNELS)
        adjacency_matrices.append(matrix)
        for start in range(NUMBER_OF_CHANNELS):
            for end in range(NUMBER_OF_CHANNELS):
                if matrix[start][end] >= threshold:
                    inversed_filtered_graphs[-1].add_edge(
                        CHANNELS_DICT[start],
                        CHANNELS_DICT[end],
                        weight = 1.0 - matrix[start][end]
                    )
                    filtered_graphs[-1].add_edge(
                        CHANNELS_DICT[start],
                        CHANNELS_DICT[end],
                        weight = matrix[start][end]
                    )
                inversed_unfiltered_graphs[-1].add_edge(
                    CHANNELS_DICT[start],
                    CHANNELS_DICT[end],
                    weight = 1.0 - matrix[start][end]
                )

    log("Started graph clique: " + str(datetime.datetime.now()), file = log_file, widget = widget)
    graph_clique(output_directory, is_trial, inversed_filtered_graphs, widget, properties_dict)

    log("Started graph strongly connected components: " + str(datetime.datetime.now()), file = log_file,
        widget = widget)
    graph_strongly_connected_components(output_directory, is_trial, inversed_filtered_graphs, widget, properties_dict)

    log("Started graph MSA: " + str(datetime.datetime.now()), file = log_file, widget = widget)
    graph_minimum_spanning_arborescence(output_directory, is_trial, inversed_unfiltered_graphs, adjacency_matrices,
                                        widget, properties_dict)

    log("Started graph shortest path: " + str(datetime.datetime.now()), file = log_file, widget = widget)
    graph_shortest_path(output_directory, is_trial, inversed_filtered_graphs, widget, properties_dict)

    log("Started graph clustering: " + str(datetime.datetime.now()), file = log_file, widget = widget)
    clustering(output_directory, is_trial, filtered_graphs, widget, properties_dict)

    log("Started graph centrality: " + str(datetime.datetime.now()), file = log_file, widget = widget)
    centrality(output_directory, is_trial, inversed_filtered_graphs, widget, properties_dict)

    save_dictionary_to_file(properties_dict, os.path.join(output_directory, 'property_dict.json'))

    log("Finished graph analysis: " + str(datetime.datetime.now()), file = log_file, widget = widget)