Python isint Examples

Programming Language: Python

Namespace/Package Name: nl4dv.utils.helpers

Method/Function: isint

Examples at hotexamples.com: 5

Python isint - 5 examples found. These are the top rated real world Python examples of nl4dv.utils.helpers.isint extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def set_tasks_to_datatable(self, dim, task):

        for column in self.vl_spec["hconcat"]:
            if task["task"] == 'filter':
                if task["operator"] == 'IN':
                    for attr in task['attributes']:
                        column['transform'].append({
                            'filter': {
                                "field": attr,
                                "oneOf": task["values"]
                            }
                        })
                elif task["operator"] == 'RANGE':
                    for attr in task['attributes']:
                        column['transform'].append({
                            "filter": {
                                "field": attr,
                                "range": task["values"]
                            }
                        })
                elif task["operator"] == 'NOT RANGE':
                    for attr in task['attributes']:
                        # self.vl_spec['transform'].append({"filter": {"field": attr, "gte": task["values"][1], "lte": task["values"][0]}})
                        column['transform'].append({
                            "filter": {
                                "not": {
                                    "field": attr,
                                    "range": task["values"]
                                }
                            }
                        })
                else:
                    for attr in task['attributes']:
                        symbol = constants.operator_symbol_mapping[
                            task["operator"]]
                        if helpers.isfloat(task["values"][0]) or helpers.isint(
                                task["values"][0]):
                            column['transform'].append({
                                'filter':
                                'lower(datum["{}"]) {} {}'.format(
                                    attr, symbol, task["values"][0])
                            })
                        elif helpers.isdate(task["values"][0]):
                            column['transform'].append({
                                'filter':
                                'lower(datum["{}"]) {} "{}"'.format(
                                    attr, symbol, task["values"][0])
                            })

Example #2

Show file

File: datagenie.py Project: helt/nl4dv

    def set_data(self, data_url=None):
        # type: (str) -> bool
        """
        User can choose to manually initialize data

        """
        self.nl4dv_instance.data_url = data_url

        # initialize values
        self.data_attribute_map = dict()
        self.data = list()
        self.rows = 0

        if self.nl4dv_instance.data_url is not None and os.path.isfile(
                self.nl4dv_instance.data_url):

            # local variables
            reader = None
            json_data = None
            attributes = list()

            if self.nl4dv_instance.data_url.lower().endswith('.csv'):
                reader = csv.reader(open(self.nl4dv_instance.data_url,
                                         'r',
                                         encoding='utf-8'),
                                    delimiter=',')
                attributes = next(
                    reader)  # assumes headers are in the first line
            elif self.nl4dv_instance.data_url.lower().endswith('.tsv'):
                reader = csv.reader(open(self.nl4dv_instance.data_url,
                                         'r',
                                         encoding='utf-8'),
                                    delimiter='\t')
                attributes = next(
                    reader)  # assumes headers are in the first line
            elif self.nl4dv_instance.data_url.lower().endswith('.json'):
                json_data = json.load(
                    open(self.nl4dv_instance.data_url, 'r', encoding='utf-8'))
                attributes = json_data[0].keys()

            # initialize properties in Attribute Map
            for attr in attributes:
                # Don't consider attribute names that are empty or just whitespaces
                if attr and attr.strip():
                    self.data_attribute_map[attr] = {
                        'domain': set(),
                        'isLabelAttribute':
                        attr == self.nl4dv_instance.label_attribute,
                        'summary': dict(),
                        'dataTypeList':
                        list(),  # temporary to determine datatype
                        'dataType': '',
                        'aliases': list(),
                    }

            # initialize properties in Attribute Map
            # implies file is either .csv or .tsv
            if reader is not None:
                for line in reader:
                    data_obj = dict()
                    for i in range(len(line)):
                        # Don't consider attribute names that are empty or just whitespaces
                        if attributes[i] and attributes[i].strip():
                            data_obj[attributes[i]] = line[i]
                    self.data.append(data_obj)
                    self.rows += 1
            else:
                # JSON file
                for data_obj in json_data:
                    self.data.append(data_obj)
                    self.rows += 1

            # infer attribute datatypes and compute summary (range, domain)
            for datum in self.data:
                for attr in self.data_attribute_map.keys():
                    attr_val = datum[attr]

                    # Check for Numeric (float, int)
                    if helpers.isfloat(attr_val) or helpers.isint(attr_val):
                        attr_datatype = constants.attribute_types[
                            'QUANTITATIVE']
                        self.populate_dataset_meta(attr, attr_val,
                                                   attr_datatype)

                    # Check for Datetime
                    # ToDo:- Works fine for datetime strings. Not for others like Epochs and Int-only Years (e.g. 2018) which get captured above.
                    # ToDo:- It is VERY risky to switch this elif block with the if block above
                    elif helpers.isdate(attr_val)[0]:
                        attr_datatype = constants.attribute_types['TEMPORAL']
                        self.populate_dataset_meta(attr, attr_val,
                                                   attr_datatype)

                    # Otherwise set as Nominal
                    else:
                        attr_datatype = constants.attribute_types['NOMINAL']
                        self.populate_dataset_meta(attr, attr_val,
                                                   attr_datatype)

                    # Irrespective of above assignment, make a list of attribute types for each data row
                    # to take best decision on heterogeneous data with multiple datatypes
                    self.data_attribute_map[attr]['dataTypeList'].append(
                        attr_datatype)

            # Determine the Datatype based on majority of values.
            # Also Override a few datatypes set above based on rules such as NOMINAL to ORDINAL if all values are unique such as Sr. 1, Sr. 2, ...
            for attr in self.data_attribute_map:
                # most common attribute type
                attr_datatype = Counter(self.data_attribute_map[attr]
                                        ['dataTypeList']).most_common(1)[0][0]

                # if it's quantitative but with less than or equal to 12 unique values, then it's ordinal.
                # eg. 1, 2, 3, ..., 12 (months of a year)
                # eg. -3, -2, -1, 0, 1, 2, 3 (likert ratings)
                if attr_datatype == constants.attribute_types[
                        'QUANTITATIVE'] and len(
                            self.data_attribute_map[attr]['domain']) <= 12:
                    attr_datatype = constants.attribute_types['ORDINAL']
                    self.populate_dataset_meta_for_attr(attr, attr_datatype)

                # If an attribute has (almnost) no repeating value, then mark it as the label attribute.
                # eg. primary/unique key of the table? Car1 , Car2, Car3, ...
                # Almost == 90% heuristic-based
                if attr_datatype == constants.attribute_types[
                        'NOMINAL'] and len(self.data_attribute_map[attr]
                                           ['domain']) > 0.9 * self.rows:
                    self.nl4dv_instance.label_attribute = attr
                    self.data_attribute_map[attr]['isLabelAttribute'] = True

                # Set the final data type
                self.data_attribute_map[attr]['dataType'] = attr_datatype

                # Presentation
                self.prepare_output(attr, attr_datatype)

            return True

        return False

Example #3

Show file

    def set_data(self, data_url=None, data_value=None):
        # type: (str) -> None
        """
        User can choose to manually initialize data

        """
        self.nl4dv_instance.data_url = data_url if data_url is not None else self.nl4dv_instance.data_url
        self.nl4dv_instance.data_value = data_value if data_value is not None else self.nl4dv_instance.data_value

        # initialize values
        self.data_attribute_map = dict()
        self.data = list()
        self.rows = 0

        if self.nl4dv_instance.data_url is not None:
            # Possible Local FILE or HTTP URL
            if self.nl4dv_instance.data_url.lower().endswith('.csv'):
                self.data = pd.read_csv(self.nl4dv_instance.data_url,
                                        sep=',').to_dict('records')
            elif self.nl4dv_instance.data_url.lower().endswith('.tsv'):
                self.data = pd.read_csv(self.nl4dv_instance.data_url,
                                        sep='\t').to_dict('records')
            elif self.nl4dv_instance.data_url.lower().endswith('.json'):
                self.data = pd.read_json(
                    self.nl4dv_instance.data_url).to_dict('records')

        elif self.nl4dv_instance.data_value is not None:
            if isinstance(data_value, pd.DataFrame):
                self.data = data_value.to_dict('records')
            elif isinstance(data_value, list):
                self.data = data_value
            elif isinstance(data_value, dict):
                self.data = pd.DataFrame(data_value).to_dict('records')

        # Set number of rows in the dataset
        self.rows = len(self.data)

        # initialize properties in Attribute Map
        if len(self.data) > 0:
            for attr in self.data[0].keys():
                # Don't consider attribute names that are empty or just whitespaces
                if attr and attr.strip():
                    self.data_attribute_map[attr] = {
                        'domain': set(),
                        'domainMeta': dict(),
                        'isLabelAttribute':
                        attr == self.nl4dv_instance.label_attribute,
                        'summary': dict(),
                        'dataTypeList':
                        list(),  # temporary to determine datatype
                        'dataType': '',
                        'dataTypeMeta': dict(
                        ),  # Used for for e.g., temporal attributes when they conform to a certain format
                        'aliases': list(),
                    }

        # infer attribute datatypes and compute summary (range, domain)
        for datum in self.data:
            for attr in self.data_attribute_map.keys():
                # Check for Datetime
                is_date, unformatted_date_obj = helpers.isdate(datum[attr])
                if is_date:
                    attr_datatype_for_majority = constants.attribute_types[
                        'TEMPORAL'] + "-" + str(
                            unformatted_date_obj["regex_id"])
                # Check for Numeric (float, int)
                elif helpers.isfloat(datum[attr]) or helpers.isint(
                        datum[attr]):
                    attr_datatype_for_majority = constants.attribute_types[
                        'QUANTITATIVE']
                # Otherwise set as Nominal
                else:
                    attr_datatype_for_majority = constants.attribute_types[
                        'NOMINAL']

                # Append the list of attribute types for each data row to take best decision on heterogeneous data with multiple datatypes
                self.data_attribute_map[attr]['dataTypeList'].append(
                    attr_datatype_for_majority)

        # Determine the Datatype based on majority of values.
        # Also Override a few datatypes set above based on rules such as NOMINAL to ORDINAL if all values are unique such as Sr. 1, Sr. 2, ...
        for attr in self.data_attribute_map:
            # By default, set the attribute datatype to the most common attribute
            attr_datatype = Counter(self.data_attribute_map[attr]
                                    ['dataTypeList']).most_common(1)[0][0]

            # If attr_datatype is Temporal (e.g., T-1, T-2, T-n where 'n' corresponds to the n'th index of the date_regex array.
            # Then: PROCESS this and eventually strip the '-n' from the datatype
            if not (attr_datatype in [
                    constants.attribute_types['QUANTITATIVE'],
                    constants.attribute_types['NOMINAL']
            ]):

                # If there is at least one instance of 'T-2' (DD*MM*YY(YY)), in the `dataTypeList`, set the regex_id to this, even if 'T-1' is the majority.
                if 'T-2' in self.data_attribute_map[attr]['dataTypeList']:
                    attr_datatype = 'T-2'

                # Strip the '-n' from the datatype
                attr_datatype_split = attr_datatype.split("-")
                attr_datatype = attr_datatype_split[0]

                # Set the final data type
                self.data_attribute_map[attr]['dataTypeMeta'] = {
                    "regex_id": attr_datatype_split[1]
                }

                # Add raw data to the domain's metadata. Only for Temporal Attributes.
                if not 'raw' in self.data_attribute_map[attr]['domainMeta']:
                    self.data_attribute_map[attr]['domainMeta']['raw'] = set()

            # Set the final data type
            self.data_attribute_map[attr]['dataType'] = attr_datatype

            # Update the dataset metadata for each attribute
            self.populate_dataset_meta_for_attr(attr, attr_datatype)

Example #4

Show file

    def set_task(self, dim, task):
        if task["task"] == 'find_extremum':
            if dim is None:
                dim = 'y'
            if task["operator"] == 'MIN':
                if dim == 'x':
                    if 'y' in self.vl_spec['encoding']:
                        self.vl_spec['encoding']['y']['sort'] = 'x'
                elif dim == 'y':
                    if 'x' in self.vl_spec['encoding']:
                        self.vl_spec['encoding']['x']['sort'] = 'y'
            elif task["operator"] == 'MAX':
                if dim == 'x':
                    if 'y' in self.vl_spec['encoding']:
                        self.vl_spec['encoding']['y']['sort'] = '-x'
                elif dim == 'y':
                    if 'x' in self.vl_spec['encoding']:
                        self.vl_spec['encoding']['x']['sort'] = '-y'

        elif task["task"] == 'filter':
            if task["operator"] == 'IN':
                for attr in task['attributes']:
                    self.vl_spec['transform'].append(
                        {'filter': {
                            "field": attr,
                            "oneOf": task["values"]
                        }})
            elif task["operator"] == 'RANGE':
                for attr in task['attributes']:
                    self.vl_spec['transform'].append(
                        {"filter": {
                            "field": attr,
                            "range": task["values"]
                        }})
            elif task["operator"] == 'NOT RANGE':
                for attr in task['attributes']:
                    # self.vl_spec['transform'].append({"filter": {"field": attr, "gte": task["values"][1], "lte": task["values"][0]}})
                    self.vl_spec['transform'].append({
                        "filter": {
                            "not": {
                                "field": attr,
                                "range": task["values"]
                            }
                        }
                    })
            else:
                for attr in task['attributes']:
                    symbol = constants.operator_symbol_mapping[
                        task["operator"]]
                    if helpers.isfloat(task["values"][0]) or helpers.isint(
                            task["values"][0]):
                        self.vl_spec['transform'].append({
                            'filter':
                            'lower(datum["{}"]) {} {}'.format(
                                attr, symbol, task["values"][0])
                        })
                    elif helpers.isdate(task["values"][0]):
                        self.vl_spec['transform'].append({
                            'filter':
                            'lower(datum["{}"]) {} "{}"'.format(
                                attr, symbol, task["values"][0])
                        })

Example #5

Show file

    def set_data(self, data_url=None, data_value=None):
        # type: (str) -> None
        """
        User can choose to manually initialize data

        """
        self.nl4dv_instance.data_url = data_url if data_url is not None else self.nl4dv_instance.data_url
        self.nl4dv_instance.data_value = data_value if data_value is not None else self.nl4dv_instance.data_value

        # initialize values
        self.data_attribute_map = dict()
        self.data = list()
        self.rows = 0

        if self.nl4dv_instance.data_url is not None:
            # Possible Local FILE or HTTP URL
            if self.nl4dv_instance.data_url.lower().endswith('.csv'):
                self.data = pd.read_csv(self.nl4dv_instance.data_url,
                                        sep=',').to_dict('records')
            elif self.nl4dv_instance.data_url.lower().endswith('.tsv'):
                self.data = pd.read_csv(self.nl4dv_instance.data_url,
                                        sep='\t').to_dict('records')
            elif self.nl4dv_instance.data_url.lower().endswith('.json'):
                self.data = pd.read_json(
                    self.nl4dv_instance.data_url).to_dict('records')

        elif self.nl4dv_instance.data_value is not None:
            if isinstance(data_value, pd.DataFrame):
                self.data = data_value.to_dict('records')
            elif isinstance(data_value, list):
                self.data = data_value
            elif isinstance(data_value, dict):
                self.data = pd.DataFrame(data_value).to_dict('records')

        # Set number of rows in the dataset
        self.rows = len(self.data)

        # initialize properties in Attribute Map
        if len(self.data) > 0:
            for attr in self.data[0].keys():
                # Don't consider attribute names that are empty or just whitespaces
                if attr and attr.strip():
                    self.data_attribute_map[attr] = {
                        'domain': set(),
                        'isLabelAttribute':
                        attr == self.nl4dv_instance.label_attribute,
                        'summary': dict(),
                        'dataTypeList':
                        list(),  # temporary to determine datatype
                        'dataType': '',
                        'aliases': list(),
                    }

        # infer attribute datatypes and compute summary (range, domain)
        for datum in self.data:
            for attr in self.data_attribute_map.keys():
                attr_val = datum[attr]

                # Check for Numeric (float, int)
                if helpers.isfloat(attr_val) or helpers.isint(attr_val):
                    attr_datatype = constants.attribute_types['QUANTITATIVE']
                    self.populate_dataset_meta(attr, attr_val, attr_datatype)

                # Check for Datetime
                # ToDo:- Works fine for datetime strings. Not for others like Epochs and Int-only Years (e.g. 2018) which get captured above.
                # ToDo:- It is VERY risky to switch this elif block with the if block above
                elif helpers.isdate(attr_val)[0]:
                    attr_datatype = constants.attribute_types['TEMPORAL']
                    self.populate_dataset_meta(attr, attr_val, attr_datatype)

                # Otherwise set as Nominal
                else:
                    attr_datatype = constants.attribute_types['NOMINAL']
                    self.populate_dataset_meta(attr, attr_val, attr_datatype)

                # Irrespective of above assignment, make a list of attribute types for each data row
                # to take best decision on heterogeneous data with multiple datatypes
                self.data_attribute_map[attr]['dataTypeList'].append(
                    attr_datatype)

        # Determine the Datatype based on majority of values.
        # Also Override a few datatypes set above based on rules such as NOMINAL to ORDINAL if all values are unique such as Sr. 1, Sr. 2, ...
        for attr in self.data_attribute_map:
            # most common attribute type
            attr_datatype = Counter(self.data_attribute_map[attr]
                                    ['dataTypeList']).most_common(1)[0][0]

            ## NOTE: For all practical purposes, let QUANTITATIVE be the determined Data Type. If an attribute is known to be ORDINAL, it can be set using the set_attribute_datatype() API.
            # # if it's quantitative but with less than or equal to 12 unique values, then it's ordinal.
            # # eg. 1, 2, 3, ..., 12 (months of a year)
            # # eg. -3, -2, -1, 0, 1, 2, 3 (likert ratings)
            # if attr_datatype == constants.attribute_types['QUANTITATIVE'] and len(
            #         self.data_attribute_map[attr]['domain']) <= 12:
            #     attr_datatype = constants.attribute_types['ORDINAL']
            #     self.populate_dataset_meta_for_attr(attr, attr_datatype)

            # If an attribute has (almnost) no repeating value, then mark it as the label attribute.
            # eg. primary/unique key of the table? Car1 , Car2, Car3, ...
            # Almost == 90% heuristic-based
            if attr_datatype == constants.attribute_types['NOMINAL'] and len(
                    self.data_attribute_map[attr]['domain']) > 0.9 * self.rows:
                self.nl4dv_instance.label_attribute = attr
                self.data_attribute_map[attr]['isLabelAttribute'] = True

            # Set the final data type
            self.data_attribute_map[attr]['dataType'] = attr_datatype

            # Presentation
            self.prepare_output(attr, attr_datatype)