Beispiel #1
0
 def evaluate(self, array_data, user_conv):
     try:
         if isinstance(array_data.data[0], str):
             date_time = pd.to_datetime(array_data.data,
                                        infer_datetime_format=True)
             array_data.data = date_time
         else:
             date_time = array_data.data
         if not isinstance(array_data.data[0], pd.datetime):
             raise RuntimeError()
     except:
         Printer.Print("Cannot transform data to date time")
         return ResultObject(None, None, None, CommandStatus.Error)
     results = []
     for word in ['day', 'year', 'month', 'hour', 'minute']:
         if word in user_conv.data or word + 's' in user_conv.data:
             out = getattr(date_time, word)
             result = ResultObject(out, [], DataType.array)
             result.createName(array_data.keyword_list,
                               command_name=word,
                               set_keyword_list=True)
             results.append(result)
             Printer.Print('Saving ', word, 'from ', array_data.name, ' as',
                           result.name)
     if results != []:
         return results
     return ResultObject(None, None, None, CommandStatus.Success)
Beispiel #2
0
    def evaluate(self, history, user_conv, name=None):
        """
        Saves the last element from history and saves it with given name
        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)
        if 'notebook' in user_conv.data or 'chat' in user_conv.data:
            Printer.save(name)
            return ResultObject(None, None, None, CommandStatus.Success)
        elif 'table' in user_conv.data:
            result = save_table(name, user_conv)
            if not result:
                return result_object
            return ResultObject(None, None, None, CommandStatus.Success)
        if name is None:
            return result_object
        try:
            previous_result = history.data.getLastObject()
            name_lower = name.data.lower()
            keyword_list = name_lower.split(' ')
            result_object = ResultObject(previous_result.data, keyword_list,
                                         history.data.last_data_type,
                                         CommandStatus.Success)
            result_object.createName(keyword_list)
            Printer.Print("Saving ", ' '.join(previous_result.keyword_list),
                          ' as ', result_object.name)
        except RuntimeError:
            Printer.Print("Cannot find last object from history")

        return result_object
Beispiel #3
0
    def evaluate(self, array_datas):
        if not isinstance(array_datas, collections.Iterable):
            array_datas = [array_datas]
        N = array_datas[0].data.size
        out = np.full(N, 'Unknown', dtype='U40')
        out_filter = np.full(N, False)
        Printer.Print("Creating a categorical array from: ")
        for array_data in array_datas:
            Printer.Print(array_data.name)
            if array_data.data.size == N:
                out[array_data.data] = array_data.name
                out_filter[array_data.data] = True
        kl1 = [" ".join(array_data.keyword_list) for array_data in array_datas]
        truncated_kl1, common_name = StatContainer.removeCommonNames(kl1)
        if common_name == '':
            common_name_list = array_data[0].keyword_list
        else:
            common_name_list = common_name.split(' ')

        result = ResultObject(out, [], DataType.array,
                              CommandStatus.Success)
        result.createName(common_name_list, command_name='categorical',
                          set_keyword_list=True)
        result_filter = ResultObject(out_filter, [], DataType.logical_array,
                                     CommandStatus.Success, True)
        result_filter.createName(common_name_list, command_name='filter',
                                 set_keyword_list=True)
        Printer.Print('Saving categorical array as', result.name)
        Printer.Print('Saving filter as', result_filter.name)
        return [result, result_filter]
Beispiel #4
0
    def evaluate(self, array_data):
        N = len(array_data)
        if N < 1:
            return ResultObject(None, None, None, CommandStatus.Error)
        out = array_data[0].data
        Printer.Print("Performing logical", self._add_tags[0], "on ")
        Printer.Print(array_data[0].name)
        if self._operator == '!':
            out = np.logical_not(array_data[0].data)

        for arr_data in array_data[1:]:
            Printer.Print(", ", arr_data.name)
            if self._operator == '&':
                out = np.logical_and(out, arr_data.data)
            elif self._operator == '||':
                out = np.logical_or(out, arr_data.data)
            elif self._operator == '^':
                out = np.logical_xor(out, arr_data.data)
            else:
                return ResultObject(None, None, None, CommandStatus.Error)
            Printer.Print(arr_data.name)
        if StatContainer.conditional_array is not None:
            non_filt_idx = np.logical_not(StatContainer.conditional_array)
            out[non_filt_idx] = False
        result = ResultObject(out, [], DataType.logical_array,
                              CommandStatus.Success, True)
        if len(array_data) > 1:
            keyword_list2 = array_data[1].keyword_list
        else:
            keyword_list2 = []
        result.createName(array_data[0].keyword_list,
                          keyword_list2,
                          command_name=self._add_tags[0],
                          set_keyword_list=True)
        return result
Beispiel #5
0
    def evaluate(self, data_frame, array_datas, classifier_algo, pre_evaluate_results=None):
        """
        Train a classifier on multiple arrays

        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)
        if type(pre_evaluate_results) is not list:
            Printer.Print("Pre evaluation results failed! Attach bug report!")
            return result_object
        win = Window.window()

        if data_frame is not None:
            result_object = VizContainer.createResult(win, data_frame, ['cval'])
        elif array_datas is not None:
            result_object = VizContainer.createResult(win, array_datas, ['cval'])
        else:
            Printer.Print("Provide one of data frame or array datas")
            return result_object
        cv_output, aux_output = pre_evaluate_results
        properties, model_data = aux_output.data

        result_object.data = [win, properties, model_data, self.processkFoldCV]
        self.printkValueMessage(cv_output.data[0])
        self.updateWindow(win, cv_output.data[1], cv_output.data[2], model_data[1], properties["title"])
        self.modify_figure.evaluate(result_object)
        return result_object
Beispiel #6
0
    def evaluate(self, array_data):
        """
        Calculate sum of all elements of the array and store it to history
        Parameters:

        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)
        array = array_data.data

        if numpy.issubdtype(array.dtype, numpy.number):
            idx = numpy.logical_not(numpy.isnan(array))
            if StatContainer.conditional_array is not None and StatContainer.conditional_array.data.size == array.size:
                idx = numpy.logical_and(idx,
                                        StatContainer.conditional_array.data)
            mean_val = numpy.sum(array[idx])
            result_object = ResultObject(mean_val, [], DataType.array,
                                         CommandStatus.Success)
            result_object.createName(array_data.keyword_list,
                                     command_name=self.commandTags()[0],
                                     set_keyword_list=True)
            Printer.Print("Sum of", array_data.name, "is", mean_val)
        else:
            Printer.Print("The array is not of numeric type so cannot",
                          "take sum")

        return result_object
Beispiel #7
0
 def DataMineArgFoundResponse(self, arg_name):
     if arg_name == "data_frame":
         Printer.Print("Found the data frame")
     elif arg_name == "array_datas":
         Printer.Print("Found array data")
     else:
         Printer.Print("Found the classification models to test")
Beispiel #8
0
    def evaluate(self, array_datas):
        """
        Visualize the relationship between variables

        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)

        sns.set(color_codes=True)
        df = pd.DataFrame()
        for array_data in array_datas:
            if (np.issubdtype(array_data.data.dtype, np.number)) == True:
                Printer.Print("The data to plot is not categorical, Please use scatter plot")
                return result_object
            df[" ".join(array_data.keyword_list)] = array_data.data

        df.dropna(inplace=True)
        df = df.pivot_table(
            index=df.columns[0], columns=df.columns[1], aggfunc=np.size, fill_value=0)

        Printer.Print("Displaying heatmap")
        win = Window.window()
        f = win.gcf()
        ax = f.add_subplot(111)
        sns.heatmap(df, ax=ax)

        win.show()
        return VizContainer.createResult(win, array_datas, ['heatmap'])
Beispiel #9
0
    def evaluate(self, image):
        """
        Display the image specified
        """
        try:
            if image.data_type is DataType.file_name:
                file_path = image.data.path
                if not os.path.isfile(file_path):
                    Printer.Print("Cannot find image file: ", file_path)
                    raise RuntimeError
                curr_image = imread(file_path)
                result_object = ResultObject(
                    curr_image, image.keyword_list, DataType.image, CommandStatus.Success)
            else:
                curr_image = image.data
                result_object = ResultObject(
                    None, None, None, CommandStatus.Success)
            image_name = image.keyword_list[0]
            win = Window.window()
            plt.imshow(curr_image)
            plt.gca().axis('off')
            win.show()
            Printer.Print("Displaying image" + image_name)
        except:
            result_object = ResultObject(None, None, None, CommandStatus.Error)

        return result_object
Beispiel #10
0
    def read(self, file_path, keyword_list):
        try:
            property_data, model_name = self.createProperties(file_path)
            model = DataGuru.createModel(property_data, model_name)
        except:
            Printer.Print("File not found")
            return ResultObject(None, None, None, CommandStatus.Error)

        command_status = CommandStatus.Success
        result_data = [model, property_data, model_name, self.updateModel]
        result_object = ResultObject(result_data,
                                     keyword_list,
                                     DataType.algorithm_arg,
                                     command_status,
                                     add_to_cache=True)
        result_object.createName(keyword_list)

        if (PropertyEditor.parent_widget is None
                or PropertyEditor.property_editor_class is None):
            Printer.Print("Cannot modify algorithm properties in non-GUI mode")
        else:
            property_editor = PropertyEditor.property_editor_class(
                result_object)
            PropertyEditor.addPropertyEditor(property_editor)

        return result_object
Beispiel #11
0
def save_table_as_csv(table, path):
    if path is None:
        path = QFileDialog.getSaveFileName(None, 'Save File', '', 'CSV(*.csv)')
        try:
            path = path[0]
        except:
            return
    else:
        path = path.data
    Printer.Print("Saving table as csv: ", path)
    try:
        with open(path, 'w') as stream:
            writer = csv.writer(stream)
            header_data = []
            for column in range(table.columnCount()):
                item = table.horizontalHeaderItem(column)
                if item is not None:
                    header_data.append(str(item.text()))
                else:
                    header_data.append('')
            writer.writerow(header_data)
            for row in range(table.rowCount()):
                rowdata = []
                for column in range(table.columnCount()):
                    item = table.item(row, column)
                    if item is not None:
                        rowdata.append(
                            str(item.text()))
                    else:
                        rowdata.append('')
                writer.writerow(rowdata)
    except FileNotFoundError:
        Printer.Print("Cannot save to file: ", path)
Beispiel #12
0
    def evaluate(self, alpha_script, parent_parser):
        """
        Run an alfarvis script
        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)
        if alpha_script.data.data_type is not DataType.alpha_script:
            Printer.Print("File not of alpha script type: ",
                          alpha_script.data.data_type)
            return result_object
        # Get the lines
        try:
            lines = [
                line.rstrip('\n') for line in open(alpha_script.data.path)
            ]
        except:
            Printer.Print("Alpha script not found")
            return ResultObject(None, None, None, CommandStatus.Error)
        # Update parent parser state
        parent_parser.data.clearCommandSearchResults()
        for i, line in enumerate(lines):
            line = line.lstrip()
            print("Line: ", line)
            if len(line) == 0:
                continue
            elif line[0] == '#':
                continue  # Ignore comments
            parent_parser.data.parse(line)
            if parent_parser.data.currentState == ParserStates.command_known_data_unknown:
                Printer.Print("Ambiguous command at line: ", i)
                Printer.Print("Exiting script")
                break
        parent_parser.data.clearCommandSearchResults()
        result_object = ResultObject(None, None, None, CommandStatus.Success)

        return result_object
Beispiel #13
0
    def evaluate(self, array_data):
        """
        Calculate count (number of values) of an array and store it to history
        Parameters:

        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)
        array = array_data.data
        if StatContainer.conditional_array is not None:
            array = array[StatContainer.conditional_array.data]
        nan_idx = StatContainer.getNanIdx(array)
        if numpy.issubdtype(array.dtype, numpy.number):
            array_filtered = array[numpy.logical_not(nan_idx)]
            count_val = numpy.count_nonzero(array_filtered)

            result_object = ResultObject(count_val, [],
                                         DataType.array,
                                         CommandStatus.Success)
            result_object.createName(
                    array_data.keyword_list,
                    command_name=self.commandTags()[0],
                    set_keyword_list=True)
            Printer.Print("Count of", array_data.name, "is", count_val)
        else:
            Printer.Print("The array is not of numeric type so cannot",
                          "find count")

        return result_object
Beispiel #14
0
    def evaluate(self, data_frame, classifier_algo):
        """
        Train a classifier on multiple arrays

        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)

        # Get the data frame
        df = data_frame.data
        #command_status, df, kl1, _ = DataGuru.transformArray_to_dataFrame(array_datas)

        if StatContainer.ground_truth is None:
            Printer.Print("Please set a feature vector to ground truth by",
                          "typing set ground truth before using this command")
            result_object = ResultObject(None, None, None, CommandStatus.Error)
            return result_object
        else:
            df = DataGuru.removeGT(df, StatContainer.ground_truth)
            Y = StatContainer.filterGroundTruth()

        # Remove nans:
        df, Y = DataGuru.removenan(df, Y)

        # Get the classifier model
        model = classifier_algo.data[0]

        # Code to run the classifier
        X = df.values

        # Get a standard scaler for the extracted data X
        scaler = preprocessing.StandardScaler().fit(X)
        X = scaler.transform(X)

        # Train the classifier
        Printer.Print("Training the classifier")
        df_show = pd.DataFrame()
        df_show['Features'] = df.columns

        TablePrinter.printDataFrame(df_show)
        model.fit(X, Y)

        # Print an update
        Printer.Print("The classifier", classifier_algo.name,
                      "has been trained")

        predictions = model.predict(X)
        accuracy = metrics.accuracy_score(predictions, Y)
        Printer.Print("Accuracy on training set : %s" % "{0:.3%}".format(accuracy))

        trained_model = {'Scaler': scaler, 'Model': model}

        result_object = ResultObject(trained_model, [], DataType.trained_model,
                              CommandStatus.Success)

        classifier_algo_name = classifier_algo.name.replace('.', ' ')
        result_object.createName(data_frame.keyword_list, command_name=classifier_algo_name,
                          set_keyword_list=True)

        return result_object
Beispiel #15
0
 def DataMineMultipleArgsFoundResponse(self, arg_name):
     if arg_name == "data_frame":
         Printer.Print("I found multiple data frames that seem to match your query")
         Printer.Print("Could you please look at the following data frames and tell me which one you "
               "want to classify?")
     elif arg_name == "classifier_algo":
         Printer.Print("I found multiple classification algorithms matching your query")
         Printer.Print("Could you please look at available classification algorithms: ")
Beispiel #16
0
 def printArguments(self, args):
     if type(args) is DataObject:
         args = [args]
     if args is None:
         Printer.Print(" None")
         return
     for arg in args:
         Printer.Print(" ".join(arg.keyword_list))
Beispiel #17
0
    def FindBestClassifier(self, modelList, num_folds, topFeatures=0):

        cdsvores = []
        for i in range(len(modelList)):
            cdsvores1, aucscores = (self.classification_model(modelList[i]['Model'], num_folds, topFeatures))
            cdsvores.append(cdsvores1)

        Printer.Print("Best classifier is " + modelList[np.argmax(cdsvores)]['Name'] + " with an accuracy of -  %.2f%% " % max(cdsvores))
        Printer.Print('--------------------------\n--------------------------\n')
Beispiel #18
0
    def evaluate(self, data_frame, classifier_model):
        """
        Run a trained classifier on multiple arrays

        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)

        # Get the data frame
        sns.set(color_codes=True)
        #command_status, df, kl1, _ = DataGuru.transformArray_to_dataFrame(array_datas)
        df = data_frame.data
        # if command_status == CommandStatus.Error:
        #    return ResultObject(None, None, None, CommandStatus.Error)

        if StatContainer.ground_truth is None:
            Printer.Print("Please set a feature vector to ground truth by",
                          "typing set ground truth to",
                          "to get the prediction accuracy")
            result_object = ResultObject(None, None, None, CommandStatus.Error)
            return result_object
        else:
            df = DataGuru.removeGT(df, StatContainer.ground_truth)
            Y = StatContainer.ground_truth.data

        # Remove nans:
        df, Y = DataGuru.removenan(df, Y)
        X = df.values

        # Get the classifier model
        trained_model = classifier_model.data
        model = trained_model['Model']
        scaler = trained_model['Scaler']

        # Scale the values based on the training standardizer
        X = scaler.transform(X)

        # Code to run the classifier
        # Plot the classification result
        win = Window.window()
        f = win.gcf()
        ax = f.add_subplot(111)
        Printer.Print('Running the trained classifier...')

        predictions = model.predict(X)
        accuracy = metrics.accuracy_score(predictions, Y)
        Printer.Print("Accuracy : %s" % "{0:.3%}".format(accuracy))
        cm = metrics.confusion_matrix(Y, predictions)
        DataGuru.plot_confusion_matrix(cm,
                                       np.unique(Y),
                                       ax,
                                       title="confusion matrix")
        win.show()

        # TODO Need to save the result
        result_object = ResultObject(None, None, None, CommandStatus.Success)

        return result_object
Beispiel #19
0
 def read(self, file_path, keyword_list, pre_evaluate_results):
     if type(pre_evaluate_results) != list:
         Printer.Print("No preevaluation done!",
                       " Please file a bug report with the chat")
         return ResultObject(None, None, None, CommandStatus.Error)
     # List the information about csv
     Printer.Print("Loaded " + " ".join(keyword_list))
     self.list_command.evaluate(pre_evaluate_results[0], DataObject([''],
                                                                    []))
     return pre_evaluate_results
Beispiel #20
0
 def consumePreEvaluateResults(self, command, arguments,
                               pre_evaluate_results):
     if (type(pre_evaluate_results) is not list and
             pre_evaluate_results.command_status == CommandStatus.Error):
         Printer.Print("Failed to evaluate parallel thread: ")
         if (type(pre_evaluate_results.data) == str):
             Printer.Print(pre_evaluate_results.data)
         return
     arguments['pre_evaluate_results'] = pre_evaluate_results
     self.postEvaluateCommand(command, arguments)
Beispiel #21
0
    def evaluate(self, array_data):
        """
        Create a pie plot 

        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)
        sns.set(color_codes=True)
        stTitle = " ".join(array_data.keyword_list)
        if StatContainer.conditional_array is not None and len(
                StatContainer.conditional_array.data) == array_data.data.size:
            inds = StatContainer.conditional_array.data
            Printer.Print("Nfiltered: ", np.sum(inds))
        else:
            inds = np.full(array_data.data.size, True)
        col_data = pd.Series(array_data.data[inds], name='array')
        col_data.dropna(inplace=True)
        try:
            uniqVals, inv, counts = np.unique(col_data,
                                              return_inverse=True,
                                              return_counts=True)
        except:
            return ResultObject(None, None, None, CommandStatus.Error)
        if len(uniqVals) > self.max_unique:
            if isinstance(uniqVals[0], str):
                best_idx = np.argpartition(counts,
                                           -self.max_unique)[-self.max_unique:]
                idx = np.isin(inv, best_idx)
                col_data = col_data[idx]
            elif np.issubdtype(col_data.dtype, np.number):
                # Convert to categorical
                col_data = pd.cut(col_data, 10)
                uniqVals = True
            else:
                uniqVals = None

        if uniqVals is not None:
            counts = pd.Series(np.ones(col_data.size), name='count')
            concat_df = pd.concat([counts, col_data], axis=1)
            ds = concat_df.groupby(col_data.name).sum()['count']
        else:
            Printer.Print("Too many unique values to plot on a pie chart\n")
            Printer.Print("Please select another chart type")
            return result_object

        win = Window.window()
        f = win.gcf()
        ax = f.add_subplot(111)

        ds.plot.pie(figsize=(8, 8), ax=ax)
        ax.set_title(stTitle)
        ax.set_xlabel('')
        ax.set_aspect('equal')

        win.show()
        return VizContainer.createResult(win, array_data, ['pie'])
Beispiel #22
0
 def checkHeaders(self, headers):
     if headers.size != 4:
         Printer.Print("Headers in data base file does not match")
         return False
     expected_headers = [
         'file_name', 'file_type', 'keywords', 'description'
     ]
     for i, header in enumerate(expected_headers):
         if header != headers[i]:
             Printer.Print("Header at ", i, " does not match with ", header)
             return False
     return True
Beispiel #23
0
 def read(self, file_path, keyword_list):
     """
     Load the file name specified and store it in history
     Parameters:
         file_path file location which is expected to be of type csv
         keyword_list keywords used to describe the database
     """
     result_object = ResultObject(None, None, None, CommandStatus.Error)
     skipped_files = 0
     mod_file_path = self.findFilePath(file_path)
     if mod_file_path is not None:
         # try:
         data_frame = pd.read_csv(mod_file_path)
         self.checkHeaders(data_frame.columns.values)
         result_list = []
         for idx, row in data_frame.iterrows():
             try:
                 file_type = DataType[row['file_type']]
             except KeyError:
                 # Depending on verbosity
                 Printer.Print("file type in line ", idx,
                               " not understood in", row['file_name'])
                 Printer.Print("Skipping file ...")
                 skipped_files = skipped_files + 1
                 continue
             if file_type == DataType.folder:
                 Printer.Print("Loading folder: ", row['file_name'])
                 read_folder = ReadFolder()
                 result = read_folder.read(
                     row['file_name'], row['keywords'].split(),
                     'recursive' == row['description'])
                 if result.command_status == CommandStatus.Success:
                     result_list.append(result)
                 else:
                     Printer.Print("Failed to load folder: ",
                                   row['file_name'])
                 continue
             row_file_path = self.findFilePath(row['file_name'])
             if row_file_path is None:
                 Printer.Print("Cannot find file: ", row['file_name'])
                 continue
             file_object = FileObject(row_file_path, file_type,
                                      row['description'], False)
             keywords = row['keywords'].split(' ')
             file_res = ResultObject(file_object, keywords,
                                     DataType.file_name)
             file_res.createName(keywords)
             result_list.append(file_res)
         result_object = result_list
         # except:
         #    result_object = ResultObject(None, None, None, CommandStatus.Error)
     return result_object
Beispiel #24
0
    def evaluate(self, csv_data):
        """
        Transform a csv to its standardized counterpart

        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)
        data = csv_data.data.copy()

        # if numpy.issubdtype(data.dtype, numpy.number):
        for column in data.columns:
            col_data_drop = data[column].dropna()
            uniqVals = StatContainer.isCategorical(col_data_drop)
            if (uniqVals is None and
                len(col_data_drop) > 0 and
                isinstance(col_data_drop.iloc[0], str) == False):
                data[column] = ((data[column] - numpy.mean(col_data_drop)) /
                                numpy.std(col_data_drop))

        Printer.Print("Saving the scaled data...")
        result_object = ResultObject(data, [],
                                     DataType.csv,
                                     CommandStatus.Success)
        result_object.createName(csv_data.keyword_list,
                                 command_name=self.commandTags()[0],
                                 set_keyword_list=True)

        return result_object
Beispiel #25
0
    def evaluate(self, array_data):
        """
        Calculate stdev value of the array and store it to history
        Parameters:

        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)
        array = array_data.data
        if numpy.issubdtype(array.dtype, numpy.number):
            idx = numpy.logical_not(numpy.isnan(array))
            if StatContainer.conditional_array is not None and StatContainer.conditional_array.data.size == array.size:
                idx = numpy.logical_and(idx, StatContainer.conditional_array.data)
            std_val = numpy.std(array[idx])
            result_object = ResultObject(
                std_val, [], DataType.array, CommandStatus.Success)
            result_object.createName(
                    array_data.keyword_list,
                    command_name=self.commandTags()[0],
                    set_keyword_list=True)
            df_new = pd.DataFrame()
            df_new['Feature'] = [array_data.name]
            df_new['Standard Deviation'] = [std_val]
            TablePrinter.printDataFrame(df_new)

            # Printer.Print("Standard deviation of", array_data.name,
            #              "is", std_val)
        else:
            Printer.Print("The array is not of numeric type so cannot",
                          "find stdev")

        return result_object
Beispiel #26
0
    def evaluate(self, array_datas):
        """
        Displaying a heatmap for data visualization 

        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)

        sns.set(color_codes=True)
        command_status, df, kl1, _ = DataGuru.transformArray_to_dataFrame(
            array_datas, remove_nan=True)
        if command_status == CommandStatus.Error:
            return ResultObject(None, None, None, CommandStatus.Error)

        Printer.Print("Displaying heatmap")
        win = Window.window()
        f = win.gcf()
        if StatContainer.ground_truth is None:
            sns.clustermap(df,
                           cbar=True,
                           square=False,
                           annot=False,
                           cmap='jet',
                           standard_scale=1)
        else:
            gt1 = pd.Series(StatContainer.ground_truth.data)
            lut = dict(zip(gt1.unique(), "rbg"))
            row_colors = gt1.map(lut)
            sns.clustermap(df,
                           standard_scale=1,
                           row_colors=row_colors,
                           cmap="jet")

        win.show()
        return VizContainer.createResult(win, array_datas, ['heatmap'])
Beispiel #27
0
    def extractArgFromUser(self, key_words, argument):
        """
        Extract argument from user input if possible
        """
        data_res = []
        for tag in argument.tags:
            try:
                index = key_words.index(tag.name)
            except:
                continue

            if tag.position == Argument.TagPosition.After:
                search_scope = key_words[(index + 1):]
            elif tag.position == Argument.TagPosition.Before:
                # Reverse list to be consistent with search
                # order
                search_scope = key_words[:index][::-1]
            else:
                search_scope = key_words

            if argument.argument_type is DataType.number:
                res = findNumbers(search_scope, argument.number)
                if len(res) != 0:
                    data_res = data_res + res
                    break
            elif argument.argument_type is DataType.user_string:
                res = DataObject(' '.join(search_scope), search_scope)
                data_res.append(res)
                break
            else:
                Printer.Print("Can only extract numbers and strings from user"
                              "currently")
                break
        return data_res
Beispiel #28
0
    def evaluate(self, array_datas):
        """
        Create a line plot 

        """
        sns.set(color_codes=True)
        command_status, df, kl1, cname = DataGuru.transformArray_to_dataFrame(
                array_datas, useCategorical=True, expand_single=True,
                remove_nan=True)
        if command_status == CommandStatus.Error:
            return ResultObject(None, None, None, CommandStatus.Error)
        elif (df.shape[0] == 0 or
              (df.shape[1] == 1 and
               np.issubdtype(array_datas[0].data.dtype, np.number) == False)):
            Printer.Print("No data left to plot after cleaning up!")
            return ResultObject(None, None, None, CommandStatus.Error)

        win = Window.window()
        f = win.gcf()
        ax = f.add_subplot(111)
        ax.set_title(cname)
        df.plot(ax=ax)

        win.show()

        return VizContainer.createResult(win, array_datas, ['line'])
Beispiel #29
0
    def evaluate(self, array_datas):
        """
        Create a scatter plot between multiple variables

        """
        result_object = ResultObject(None, None, None, CommandStatus.Error)
        sns.set(color_codes=True)
        command_status, df, kl1, cname = DataGuru.transformArray_to_dataFrame(
                array_datas)
        if command_status == CommandStatus.Error:
            return result_object
        if len(df.columns) <= 1:
            Printer.Print("There needs to be atleast two variables to perform multiscatter plot!")
            return result_object

        win = Window.window()
        f = win.gcf()
        ax = f.add_subplot(111)

        if StatContainer.ground_truth is None or len(StatContainer.ground_truth.data) != df.shape[0]:
            df.dropna(inplace=True)
            pd.plotting.scatter_matrix(df, alpha=0.2, diagonal='kde', ax=ax)
        else:
            gt1 = pd.Series(StatContainer.filterGroundTruth())
            df, gt1 = DataGuru.removenan(df, gt1)
            lut = dict(zip(gt1.unique(), np.linspace(0, 1, gt1.unique().size)))
            row_colors = gt1.map(lut)
            pd.plotting.scatter_matrix(df, alpha=0.2, diagonal='kde', c=row_colors, cmap="jet", ax=ax)

        f.suptitle(cname)

        win.show()

        return VizContainer.createResult(win, array_datas, ['multiscatter'])
Beispiel #30
0
    def FindTopPredictors(self, algoType="RF"):
        X = self.X
        Y = self.Y
        # If the selected algo is RF, find the top features using RF
        featImpVals = self.SortTopFeatures_RF(X, Y)
        # If the selected algo is TTest
        #featImpVals = self.SortTopFeatures_TT(X,Y)
        # If the selected algo is LogReg
        # = self.SortTopFeatures_LR(X,Y)

        self.featimp = pd.Series(featImpVals, index=self.columnList).sort_values(ascending=False)
        Printer.Print("Here is a sorted list of top features found using the " + algoType + "algorithm")

        Printer.Print(self.featimp)
        Printer.Print('--------------------------\n--------------------------\n')
        self.X = self.data[self.featimp.index[0:]].values
        self.columnList = self.featimp.index