Ejemplo n.º 1
0
    def evaluate(self, alpha_script, parent_parser):
        """
        Run an alfarvis script
        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)
        if alpha_script.data.data_type is not DataType.alpha_script:
            Printer.Print("File not of alpha script type: ",
                          alpha_script.data.data_type)
            return result_object
        # Get the lines
        try:
            lines = [
                line.rstrip('\n') for line in open(alpha_script.data.path)
            ]
        except:
            Printer.Print("Alpha script not found")
            return ResultObject(None, None, None, CommandStatus.Error)
        # Update parent parser state
        parent_parser.data.clearCommandSearchResults()
        for i, line in enumerate(lines):
            line = line.lstrip()
            print("Line: ", line)
            if len(line) == 0:
                continue
            elif line[0] == '#':
                continue  # Ignore comments
            parent_parser.data.parse(line)
            if parent_parser.data.currentState == ParserStates.command_known_data_unknown:
                Printer.Print("Ambiguous command at line: ", i)
                Printer.Print("Exiting script")
                break
        parent_parser.data.clearCommandSearchResults()
        result_object = ResultObject(None, None, None, CommandStatus.Success)

        return result_object
Ejemplo n.º 2
0
    def evaluate(self, array_datas):
        """
        Create a box plot between multiple variables

        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)
        sns.set(color_codes=True)
        command_status, df, kl1, _ = DataGuru.transformArray_to_dataFrame(
            array_datas)
        win = Window.window()
        f = win.gcf()
        ax = f.add_subplot(111)
        if command_status == CommandStatus.Error:
            return ResultObject(None, None, None, CommandStatus.Error)

        if StatContainer.ground_truth is None or len(
                StatContainer.ground_truth.data) != df.shape[0]:
            df.dropna(inplace=True)
            df.boxplot(ax=ax)
        else:
            ground_truth = StatContainer.ground_truth.name
            df[ground_truth] = StatContainer.filterGroundTruth()
            df.dropna(inplace=True)
            df.boxplot(by=ground_truth, ax=ax)
            f.suptitle("")
        win.show()

        return VizContainer.createResult(win, array_datas, ['box'])
Ejemplo n.º 3
0
 def createResult(self, out, keyword_list):
     result = ResultObject(out, [], DataType.logical_array,
                           CommandStatus.Success, True)
     result.createName(keyword_list,
                       command_name='between',
                       set_keyword_list=True)
     return result
Ejemplo n.º 4
0
    def evaluate(self, image):
        """
        Display the image specified
        """
        try:
            if image.data_type is DataType.file_name:
                file_path = image.data.path
                if not os.path.isfile(file_path):
                    Printer.Print("Cannot find image file: ", file_path)
                    raise RuntimeError
                curr_image = imread(file_path)
                result_object = ResultObject(
                    curr_image, image.keyword_list, DataType.image, CommandStatus.Success)
            else:
                curr_image = image.data
                result_object = ResultObject(
                    None, None, None, CommandStatus.Success)
            image_name = image.keyword_list[0]
            win = Window.window()
            plt.imshow(curr_image)
            plt.gca().axis('off')
            win.show()
            Printer.Print("Displaying image" + image_name)
        except:
            result_object = ResultObject(None, None, None, CommandStatus.Error)

        return result_object
Ejemplo n.º 5
0
    def evaluate(self, array_datas):
        """
        Displaying a heatmap for data visualization 

        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)

        sns.set(color_codes=True)
        command_status, df, kl1, _ = DataGuru.transformArray_to_dataFrame(
            array_datas, remove_nan=True)
        if command_status == CommandStatus.Error:
            return ResultObject(None, None, None, CommandStatus.Error)

        Printer.Print("Displaying heatmap")
        win = Window.window()
        f = win.gcf()
        if StatContainer.ground_truth is None:
            sns.clustermap(df,
                           cbar=True,
                           square=False,
                           annot=False,
                           cmap='jet',
                           standard_scale=1)
        else:
            gt1 = pd.Series(StatContainer.ground_truth.data)
            lut = dict(zip(gt1.unique(), "rbg"))
            row_colors = gt1.map(lut)
            sns.clustermap(df,
                           standard_scale=1,
                           row_colors=row_colors,
                           cmap="jet")

        win.show()
        return VizContainer.createResult(win, array_datas, ['heatmap'])
Ejemplo n.º 6
0
    def evaluate(self, array_datas):
        """
        Create a line plot 

        """
        sns.set(color_codes=True)
        command_status, df, kl1, cname = DataGuru.transformArray_to_dataFrame(
                array_datas, useCategorical=True, expand_single=True,
                remove_nan=True)
        if command_status == CommandStatus.Error:
            return ResultObject(None, None, None, CommandStatus.Error)
        elif (df.shape[0] == 0 or
              (df.shape[1] == 1 and
               np.issubdtype(array_datas[0].data.dtype, np.number) == False)):
            Printer.Print("No data left to plot after cleaning up!")
            return ResultObject(None, None, None, CommandStatus.Error)

        win = Window.window()
        f = win.gcf()
        ax = f.add_subplot(111)
        ax.set_title(cname)
        df.plot(ax=ax)

        win.show()

        return VizContainer.createResult(win, array_datas, ['line'])
Ejemplo n.º 7
0
    def evaluate(self, array_datas):
        """
        Create a violin plot for multiple variables

        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)

        sns.set(color_codes=True)
        command_status, df, kl1, cname = DataGuru.transformArray_to_dataFrame(
                array_datas, remove_nan=True)
        if command_status == CommandStatus.Error:
            return ResultObject(None, None, None, CommandStatus.Error)
        win = Window.window()
        f = win.gcf()
        ax = f.add_subplot(111)
        if StatContainer.ground_truth is None or len(StatContainer.ground_truth.data) != df.shape[0]:
            df.dropna(inplace=True)
            sns.violinplot(data=df, ax=ax)
        else:
            ground_truth = " ".join(StatContainer.ground_truth.keyword_list)
            df[ground_truth] = StatContainer.filterGroundTruth()
            df.dropna(inplace=True)
            df1 = pd.melt(df, id_vars=ground_truth)
            sns.violinplot(data=df1, ax=ax, x='variable', y='value', hue=ground_truth)

        win.show()

        return VizContainer.createResult(win, array_datas, ['violin'])
Ejemplo n.º 8
0
    def read(self, file_path, keyword_list):
        try:
            property_data, model_name = self.createProperties(file_path)
            model = DataGuru.createModel(property_data, model_name)
        except:
            Printer.Print("File not found")
            return ResultObject(None, None, None, CommandStatus.Error)

        command_status = CommandStatus.Success
        result_data = [model, property_data, model_name, self.updateModel]
        result_object = ResultObject(result_data,
                                     keyword_list,
                                     DataType.algorithm_arg,
                                     command_status,
                                     add_to_cache=True)
        result_object.createName(keyword_list)

        if (PropertyEditor.parent_widget is None
                or PropertyEditor.property_editor_class is None):
            Printer.Print("Cannot modify algorithm properties in non-GUI mode")
        else:
            property_editor = PropertyEditor.property_editor_class(
                result_object)
            PropertyEditor.addPropertyEditor(property_editor)

        return result_object
Ejemplo n.º 9
0
    def evaluate(self, data_frame, array_datas, classifier_algo, pre_evaluate_results=None):
        """
        Train a classifier on multiple arrays

        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)
        if type(pre_evaluate_results) is not list:
            Printer.Print("Pre evaluation results failed! Attach bug report!")
            return result_object
        win = Window.window()

        if data_frame is not None:
            result_object = VizContainer.createResult(win, data_frame, ['cval'])
        elif array_datas is not None:
            result_object = VizContainer.createResult(win, array_datas, ['cval'])
        else:
            Printer.Print("Provide one of data frame or array datas")
            return result_object
        cv_output, aux_output = pre_evaluate_results
        properties, model_data = aux_output.data

        result_object.data = [win, properties, model_data, self.processkFoldCV]
        self.printkValueMessage(cv_output.data[0])
        self.updateWindow(win, cv_output.data[1], cv_output.data[2], model_data[1], properties["title"])
        self.modify_figure.evaluate(result_object)
        return result_object
Ejemplo n.º 10
0
    def evaluate(self, data_frame, classifier_model):
        """
        Run a trained classifier on multiple arrays

        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)

        # Get the data frame
        sns.set(color_codes=True)
        #command_status, df, kl1, _ = DataGuru.transformArray_to_dataFrame(array_datas)
        df = data_frame.data
        # if command_status == CommandStatus.Error:
        #    return ResultObject(None, None, None, CommandStatus.Error)

        if StatContainer.ground_truth is None:
            Printer.Print("Please set a feature vector to ground truth by",
                          "typing set ground truth to",
                          "to get the prediction accuracy")
            result_object = ResultObject(None, None, None, CommandStatus.Error)
            return result_object
        else:
            df = DataGuru.removeGT(df, StatContainer.ground_truth)
            Y = StatContainer.ground_truth.data

        # Remove nans:
        df, Y = DataGuru.removenan(df, Y)
        X = df.values

        # Get the classifier model
        trained_model = classifier_model.data
        model = trained_model['Model']
        scaler = trained_model['Scaler']

        # Scale the values based on the training standardizer
        X = scaler.transform(X)

        # Code to run the classifier
        # Plot the classification result
        win = Window.window()
        f = win.gcf()
        ax = f.add_subplot(111)
        Printer.Print('Running the trained classifier...')

        predictions = model.predict(X)
        accuracy = metrics.accuracy_score(predictions, Y)
        Printer.Print("Accuracy : %s" % "{0:.3%}".format(accuracy))
        cm = metrics.confusion_matrix(Y, predictions)
        DataGuru.plot_confusion_matrix(cm,
                                       np.unique(Y),
                                       ax,
                                       title="confusion matrix")
        win.show()

        # TODO Need to save the result
        result_object = ResultObject(None, None, None, CommandStatus.Success)

        return result_object
Ejemplo n.º 11
0
    def evaluate(self, data_frame, array_datas):
        """
        Run Isomap on a dataset of multiple arrays

        """

        # Get the data frame
        if data_frame is not None:
            df = data_frame.data
            df = DataGuru.convertStrCols_toNumeric(df)
            cname = data_frame.name
        elif array_datas is not None:
            command_status, df, kl1, cname = DataGuru.transformArray_to_dataFrame(
                array_datas, useCategorical=True)
            if command_status == CommandStatus.Error:
                return ResultObject(None, None, None, CommandStatus.Error)
        else:
            Printer.Print("Please provide data frame or arrays to analyze")
            return ResultObject(None, None, None, CommandStatus.Error)
        Y = None
        if StatContainer.ground_truth is not None:
            df = DataGuru.removeGT(df, StatContainer.ground_truth)
            Y = StatContainer.filterGroundTruth()
            df, Y = DataGuru.removenan(df, Y)
        # Remove nans:
        else:
            df.dropna(inplace=True)

        # Get the Isomap model

        # Code to run the classifier
        X = df.values

        # Get a standard scaler for the extracted data X
        scaler = preprocessing.StandardScaler().fit(X)
        X = scaler.transform(X)

        # Train the classifier

        win = Window.window()

        properties = self.createDefaultProperties()
        properties['title'] = cname

        # return ResultObject(None, None, None, CommandStatus.Success)
        if data_frame is not None:
            result_object = VizContainer.createResult(win, data_frame,
                                                      ['ismp'])
        else:
            result_object = VizContainer.createResult(win, array_datas,
                                                      ['ismp'])

        result_object.data = [win, properties, [X, Y], self.updateFigure]
        self.updateFigure(result_object.data)
        self.modify_figure.evaluate(result_object)
        return result_object
Ejemplo n.º 12
0
 def createResult(self, out, keyword_list, create_name=True):
     result = ResultObject(out, [], DataType.logical_array,
                           CommandStatus.Success, True)
     if create_name:
         result.createName(keyword_list,
                           command_name=self._condition[0],
                           set_keyword_list=True)
     else:
         result.keyword_list = keyword_list
     return result
Ejemplo n.º 13
0
 def evaluate(self, input1):
     if input1.data_type == DataType.string:
         print("I received a string")
         return ResultObject(input1.data, ["overload", "result"],
                             DataType.string)
     elif input1.data_type == DataType.array:
         print("I received a number")
         return ResultObject(input1.data + 1, ["overload", "result"],
                             DataType.array)
     return ResultObject(None, None, None, CommandStatus.Error)
Ejemplo n.º 14
0
    def evaluate(self, array_data):
        """
        Create a pie plot 

        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)
        sns.set(color_codes=True)
        stTitle = " ".join(array_data.keyword_list)
        if StatContainer.conditional_array is not None and len(
                StatContainer.conditional_array.data) == array_data.data.size:
            inds = StatContainer.conditional_array.data
            Printer.Print("Nfiltered: ", np.sum(inds))
        else:
            inds = np.full(array_data.data.size, True)
        col_data = pd.Series(array_data.data[inds], name='array')
        col_data.dropna(inplace=True)
        try:
            uniqVals, inv, counts = np.unique(col_data,
                                              return_inverse=True,
                                              return_counts=True)
        except:
            return ResultObject(None, None, None, CommandStatus.Error)
        if len(uniqVals) > self.max_unique:
            if isinstance(uniqVals[0], str):
                best_idx = np.argpartition(counts,
                                           -self.max_unique)[-self.max_unique:]
                idx = np.isin(inv, best_idx)
                col_data = col_data[idx]
            elif np.issubdtype(col_data.dtype, np.number):
                # Convert to categorical
                col_data = pd.cut(col_data, 10)
                uniqVals = True
            else:
                uniqVals = None

        if uniqVals is not None:
            counts = pd.Series(np.ones(col_data.size), name='count')
            concat_df = pd.concat([counts, col_data], axis=1)
            ds = concat_df.groupby(col_data.name).sum()['count']
        else:
            Printer.Print("Too many unique values to plot on a pie chart\n")
            Printer.Print("Please select another chart type")
            return result_object

        win = Window.window()
        f = win.gcf()
        ax = f.add_subplot(111)

        ds.plot.pie(figsize=(8, 8), ax=ax)
        ax.set_title(stTitle)
        ax.set_xlabel('')
        ax.set_aspect('equal')

        win.show()
        return VizContainer.createResult(win, array_data, ['pie'])
Ejemplo n.º 15
0
    def evaluate(self, array_datas):
        """
        Create a histogram for multiple variables

        """

        sns.set(color_codes=True)
        command_status, df, kl1, _ = DataGuru.transformArray_to_dataFrame(
            array_datas, useCategorical=True, remove_nan=True)
        if command_status == CommandStatus.Error:
            return ResultObject(None, None, None, CommandStatus.Error)
        dCol = df[df.columns[0]]
        try:
            uniqVals, inv, counts = np.unique(dCol,
                                              return_inverse=True,
                                              return_counts=True)
        except:
            return ResultObject(None, None, None, CommandStatus.Error)
        if len(uniqVals) > self.max_unique:
            if isinstance(uniqVals[0], str):
                best_idx = np.argpartition(counts,
                                           -self.max_unique)[-self.max_unique:]
                idx = np.isin(inv, best_idx)
                dCol = dCol[idx]
            else:
                uniqVals = None
        if uniqVals is not None and isinstance(uniqVals[0], str):
            max_len = max([len(uniqVal) for uniqVal in uniqVals])
        else:
            max_len = 0

        if (uniqVals is None and not np.issubdtype(dCol.dtype, np.number)):
            Printer.Print("Too many unique values in non-numeric type data")
            return ResultObject(None, None, None, CommandStatus.Error)

        win = Window.window()
        f = win.gcf()
        ax = f.add_subplot(111)

        # TODO Create an argument for setting number of bins
        if uniqVals is not None:
            if len(uniqVals) > 5 and max_len > 8:
                df = dCol.to_frame(name=kl1[0])
                sns.countplot(y=kl1[0], data=df, ax=ax)
            else:
                df = dCol.to_frame(name=kl1[0])
                sns.countplot(x=kl1[0], data=df, ax=ax)
        elif np.issubdtype(dCol.dtype, np.number):
            df.plot.hist(stacked=True, ax=ax)

        win.show()

        return VizContainer.createResult(win, array_datas,
                                         ['histogram', 'hist'])
Ejemplo n.º 16
0
 def read(self, file_path, keyword_list):
     """
     Load the file name specified and store it in history
     Parameters:
         file_path file location which is expected to be of type csv
         keyword_list keywords used to describe the database
     """
     result_object = ResultObject(None, None, None, CommandStatus.Error)
     skipped_files = 0
     mod_file_path = self.findFilePath(file_path)
     if mod_file_path is not None:
         # try:
         data_frame = pd.read_csv(mod_file_path)
         self.checkHeaders(data_frame.columns.values)
         result_list = []
         for idx, row in data_frame.iterrows():
             try:
                 file_type = DataType[row['file_type']]
             except KeyError:
                 # Depending on verbosity
                 Printer.Print("file type in line ", idx,
                               " not understood in", row['file_name'])
                 Printer.Print("Skipping file ...")
                 skipped_files = skipped_files + 1
                 continue
             if file_type == DataType.folder:
                 Printer.Print("Loading folder: ", row['file_name'])
                 read_folder = ReadFolder()
                 result = read_folder.read(
                     row['file_name'], row['keywords'].split(),
                     'recursive' == row['description'])
                 if result.command_status == CommandStatus.Success:
                     result_list.append(result)
                 else:
                     Printer.Print("Failed to load folder: ",
                                   row['file_name'])
                 continue
             row_file_path = self.findFilePath(row['file_name'])
             if row_file_path is None:
                 Printer.Print("Cannot find file: ", row['file_name'])
                 continue
             file_object = FileObject(row_file_path, file_type,
                                      row['description'], False)
             keywords = row['keywords'].split(' ')
             file_res = ResultObject(file_object, keywords,
                                     DataType.file_name)
             file_res.createName(keywords)
             result_list.append(file_res)
         result_object = result_list
         # except:
         #    result_object = ResultObject(None, None, None, CommandStatus.Error)
     return result_object
Ejemplo n.º 17
0
 def evaluate(self, array_data, target):
     split_target = splitPattern(target.data)
     out = np.array([
         self.containsWordList(data, split_target)
         for data in array_data.data
     ])
     result = ResultObject(out, [], DataType.logical_array,
                           CommandStatus.Success, True)
     result.createName(array_data.keyword_list,
                       split_target,
                       command_name='contains',
                       set_keyword_list=True)
     return result
Ejemplo n.º 18
0
    def evaluate(self, array_data):
        """
        Calculate max value of the array and store it to history
        Parameters:

        """
        result_objects = []
        result_object = ResultObject(None, None, None, CommandStatus.Error)
        array = array_data.data

        if numpy.issubdtype(array.dtype, numpy.number):
            idx = numpy.logical_not(numpy.isnan(array))
        elif numpy.issubdtype(array.dtype, numpy.datetime64):
            idx = numpy.logical_not(numpy.isnat(array))
        else:
            Printer.Print("The array is not supported type so cannot find max")
            return result_object
        if StatContainer.conditional_array is not None and StatContainer.conditional_array.data.size == array.size:
            idx = numpy.logical_and(idx, StatContainer.conditional_array.data)
        max_val = numpy.max(array[idx])
        idx = numpy.argmax(array[idx])
        if StatContainer.row_labels is not None:
            rl = StatContainer.row_labels.data
            max_rl = rl[idx]
            # Result for max index
            result_object = ResultObject(max_rl, [], DataType.array,
                                         CommandStatus.Success)

            result_object.createName(StatContainer.row_labels.name,
                                     command_name=self.commandTags()[0],
                                     set_keyword_list=True)
            result_objects.append(result_object)
        # Result for max value
        result_object = ResultObject(max_val, [], DataType.array,
                                     CommandStatus.Success)
        result_object.createName(array_data.keyword_list,
                                 command_name=self.commandTags()[0],
                                 set_keyword_list=True)
        result_objects.append(result_object)

        # Create a dataframe to store the results
        df_new = pd.DataFrame()
        df_new['Feature'] = [array_data.name]
        df_new['Maximum'] = [max_val]
        if StatContainer.row_labels is not None:
            df_new[StatContainer.row_labels.name] = [max_rl]
            #Printer.Print("Maximum of", array_data.name, "is", max_val, "corresponding to", max_rl)
        # else:
        #Printer.Print("Maximum of", array_data.name, "is", max_val)
        TablePrinter.printDataFrame(df_new)
        return result_objects
Ejemplo n.º 19
0
    def evaluate(self, history, user_conv, name=None):
        """
        Saves the last element from history and saves it with given name
        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)
        if 'notebook' in user_conv.data or 'chat' in user_conv.data:
            Printer.save(name)
            return ResultObject(None, None, None, CommandStatus.Success)
        elif 'table' in user_conv.data:
            result = save_table(name, user_conv)
            if not result:
                return result_object
            return ResultObject(None, None, None, CommandStatus.Success)
        if name is None:
            return result_object
        try:
            previous_result = history.data.getLastObject()
            name_lower = name.data.lower()
            keyword_list = name_lower.split(' ')
            result_object = ResultObject(previous_result.data, keyword_list,
                                         history.data.last_data_type,
                                         CommandStatus.Success)
            result_object.createName(keyword_list)
            Printer.Print("Saving ", ' '.join(previous_result.keyword_list),
                          ' as ', result_object.name)
        except RuntimeError:
            Printer.Print("Cannot find last object from history")

        return result_object
Ejemplo n.º 20
0
    def evaluate(self, array_datas):
        if not isinstance(array_datas, collections.Iterable):
            array_datas = [array_datas]
        N = array_datas[0].data.size
        out = np.full(N, 'Unknown', dtype='U40')
        out_filter = np.full(N, False)
        Printer.Print("Creating a categorical array from: ")
        for array_data in array_datas:
            Printer.Print(array_data.name)
            if array_data.data.size == N:
                out[array_data.data] = array_data.name
                out_filter[array_data.data] = True
        kl1 = [" ".join(array_data.keyword_list) for array_data in array_datas]
        truncated_kl1, common_name = StatContainer.removeCommonNames(kl1)
        if common_name == '':
            common_name_list = array_data[0].keyword_list
        else:
            common_name_list = common_name.split(' ')

        result = ResultObject(out, [], DataType.array,
                              CommandStatus.Success)
        result.createName(common_name_list, command_name='categorical',
                          set_keyword_list=True)
        result_filter = ResultObject(out_filter, [], DataType.logical_array,
                                     CommandStatus.Success, True)
        result_filter.createName(common_name_list, command_name='filter',
                                 set_keyword_list=True)
        Printer.Print('Saving categorical array as', result.name)
        Printer.Print('Saving filter as', result_filter.name)
        return [result, result_filter]
Ejemplo n.º 21
0
    def evaluate(self, data_frame, classifier_algo):
        """
        Train a classifier on multiple arrays

        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)

        # Get the data frame
        df = data_frame.data
        #command_status, df, kl1, _ = DataGuru.transformArray_to_dataFrame(array_datas)

        if StatContainer.ground_truth is None:
            Printer.Print("Please set a feature vector to ground truth by",
                          "typing set ground truth before using this command")
            result_object = ResultObject(None, None, None, CommandStatus.Error)
            return result_object
        else:
            df = DataGuru.removeGT(df, StatContainer.ground_truth)
            Y = StatContainer.filterGroundTruth()

        # Remove nans:
        df, Y = DataGuru.removenan(df, Y)

        # Get the classifier model
        model = classifier_algo.data[0]

        # Code to run the classifier
        X = df.values

        # Get a standard scaler for the extracted data X
        scaler = preprocessing.StandardScaler().fit(X)
        X = scaler.transform(X)

        # Train the classifier
        Printer.Print("Training the classifier")
        df_show = pd.DataFrame()
        df_show['Features'] = df.columns

        TablePrinter.printDataFrame(df_show)
        model.fit(X, Y)

        # Print an update
        Printer.Print("The classifier", classifier_algo.name,
                      "has been trained")

        predictions = model.predict(X)
        accuracy = metrics.accuracy_score(predictions, Y)
        Printer.Print("Accuracy on training set : %s" % "{0:.3%}".format(accuracy))

        trained_model = {'Scaler': scaler, 'Model': model}

        result_object = ResultObject(trained_model, [], DataType.trained_model,
                              CommandStatus.Success)

        classifier_algo_name = classifier_algo.name.replace('.', ' ')
        result_object.createName(data_frame.keyword_list, command_name=classifier_algo_name,
                          set_keyword_list=True)

        return result_object
Ejemplo n.º 22
0
    def evaluate(self, array_datas):
        """
        Visualize the relationship between variables

        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)

        sns.set(color_codes=True)
        df = pd.DataFrame()
        for array_data in array_datas:
            if (np.issubdtype(array_data.data.dtype, np.number)) == True:
                Printer.Print("The data to plot is not categorical, Please use scatter plot")
                return result_object
            df[" ".join(array_data.keyword_list)] = array_data.data

        df.dropna(inplace=True)
        df = df.pivot_table(
            index=df.columns[0], columns=df.columns[1], aggfunc=np.size, fill_value=0)

        Printer.Print("Displaying heatmap")
        win = Window.window()
        f = win.gcf()
        ax = f.add_subplot(111)
        sns.heatmap(df, ax=ax)

        win.show()
        return VizContainer.createResult(win, array_datas, ['heatmap'])
Ejemplo n.º 23
0
    def createResult(self, window, array_datas, in_keywords):
        figure = window.gcf()
        fig_keywords = []
        fig_keywords.append('figure')
        fig_keywords.append(str(figure.number))
        fig_keywords = fig_keywords + in_keywords
        if not isinstance(array_datas, collections.Iterable):
            array_datas = [array_datas]
        # TODO Later try adding some room for error like its there in 70% of the arrays
        common_kl = set.intersection(*[set(array_data.keyword_list) for array_data in array_datas])
        fig_keywords = fig_keywords + list(common_kl)

        result_object = ResultObject(window, fig_keywords, DataType.figure, CommandStatus.Success, add_to_cache=True)
        result_object.createName(fig_keywords)
        self.current_figure = figure
        return result_object
Ejemplo n.º 24
0
    def evaluate(self, array_datas):
        """
        Create a scatter plot between multiple variables

        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)
        sns.set(color_codes=True)
        command_status, df, kl1, cname = DataGuru.transformArray_to_dataFrame(
                array_datas)
        if command_status == CommandStatus.Error:
            return result_object
        if len(df.columns) <= 1:
            Printer.Print("There needs to be atleast two variables to perform multiscatter plot!")
            return result_object

        win = Window.window()
        f = win.gcf()
        ax = f.add_subplot(111)

        if StatContainer.ground_truth is None or len(StatContainer.ground_truth.data) != df.shape[0]:
            df.dropna(inplace=True)
            pd.plotting.scatter_matrix(df, alpha=0.2, diagonal='kde', ax=ax)
        else:
            gt1 = pd.Series(StatContainer.filterGroundTruth())
            df, gt1 = DataGuru.removenan(df, gt1)
            lut = dict(zip(gt1.unique(), np.linspace(0, 1, gt1.unique().size)))
            row_colors = gt1.map(lut)
            pd.plotting.scatter_matrix(df, alpha=0.2, diagonal='kde', c=row_colors, cmap="jet", ax=ax)

        f.suptitle(cname)

        win.show()

        return VizContainer.createResult(win, array_datas, ['multiscatter'])
Ejemplo n.º 25
0
    def read(self, file_path, keyword_list):
        try:
            data = imread(file_path)
        except:
            return ResultObject(None, None, None,
                                command_status=CommandStatus.Error)

        win = Window.window()
        #f = win.gcf()
        plt.imshow(data)
        plt.gca().axis('off')
        win.show()
        # Initialize image manipulation command group
        result = ResultObject(data, keyword_list, DataType.image,
                              CommandStatus.Success, add_to_cache=True)
        result.createName(keyword_list)
        return result
Ejemplo n.º 26
0
    def evaluate(self, data_frame, target):
        """
        Use one of the models to identify the top predictors
        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)

        # Get the data frame
        df = data_frame.data
        #command_status, df, kl1, _ = DataGuru.transformArray_to_dataFrame(array_datas)

        if StatContainer.ground_truth is None:
            Printer.Print("Please set a feature vector to ground truth by",
                          "typing set ground truth before using this command")
            result_object = ResultObject(None, None, None, CommandStatus.Error)
            return result_object
        else:
            df = DataGuru.removeGT(df, StatContainer.ground_truth)
            Y = StatContainer.filterGroundTruth()

        # Remove nans:
        df, Y = DataGuru.removenan(df, Y)

        numbers = findNumbers(target.data, 1)
        if numbers != [] and numbers[0].data > 0:
            num = int(numbers[0].data)
        else:
            num = 10  # If not specified select top 10 features

        X = df.values

        # Get a standard scaler for the extracted data X
        scaler = preprocessing.StandardScaler().fit(X)
        X = scaler.transform(X)

        model = RandomForestClassifier(n_estimators=100)
        model.fit(X, Y)
        featImpVals = model.feature_importances_

        featimp = pd.Series(featImpVals,
                            index=df.columns).sort_values(ascending=False)

        df_show = pd.DataFrame()
        df_show['top features'] = featimp.index[0:num]
        df_show['feature importance'] = featimp.values[0:num]
        TablePrinter.printDataFrame(df_show)
        df_new = df[featimp.index[0:num]]

        result_object = ResultObject(df_new, [], DataType.csv,
                                     CommandStatus.Success)

        command_name = 'top.predictors'
        result_object.createName(data_frame.name,
                                 command_name=command_name,
                                 set_keyword_list=True)

        return result_object
Ejemplo n.º 27
0
    def evaluate(self, dummy):
        if dummy is None:
            res_string = "default string"
        else:
            res_string = dummy.data

        return ResultObject(res_string, ["dummy", "result"],
                            DataType.string,
                            add_to_cache=True)
Ejemplo n.º 28
0
    def evaluate(self):
        """
        Calculate average value of the array and store it to history
        Parameters:

        """
        StatContainer.conditional_array = None
        Printer.Print("clearing conditional array")
        return ResultObject(None, None, None, CommandStatus.Success)
Ejemplo n.º 29
0
    def evaluate(self, array_data):
        """
        Calculate average value of the array and store it to history
        Parameters:

        """
        StatContainer.conditional_array = array_data
        Printer.Print("Setting filter to ", array_data.name)
        return ResultObject(None, None, None, CommandStatus.Success)
Ejemplo n.º 30
0
    def evaluate(self, file_name, pre_evaluate_results=None):
        """
        Load the file name specified and store it in history
        Parameters:
            file_name has two entries
                1. Path of the file to load
                2. Type of the file to load
        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)
        if file_name.data_type is DataType.figure:
            Printer.Print("Loading figure ", ' '.join(file_name.keyword_list))
            if type(file_name.data) == list:
                win = file_name.data[0]
            else:
                win = file_name.data
            VizContainer.current_figure = win.gcf()
            win.show()
            return ResultObject(None, None, None, CommandStatus.Success)

        if file_name.data.loaded and file_name.data.data_type is not DataType.algorithm_arg:
            Printer.Print("File already loaded!")
            return ResultObject(None, None, None, CommandStatus.Success)

        if os.path.isfile(file_name.data.path):
            data_type = file_name.data.data_type
            if data_type in self.reader_dictionary:
                reader = self.reader_dictionary[data_type]
                if reader.read_in_background:
                    result_object = reader.read(file_name.data.path,
                                                file_name.keyword_list, pre_evaluate_results)
                else:
                    result_object = reader.read(file_name.data.path,
                                                file_name.keyword_list)
                Printer.Print("Loaded file: ",
                              os.path.basename(file_name.data.path))
                file_name.data.loaded = True
            else:
                Printer.Print("We cannot load ", data_type,
                              " yet! Please try again later")
        else:
            Printer.Print("File not found.\n Please make sure the file exists "
                          "in the specified location")
        return result_object