Python isdate 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: nl4dv.utils.helpers

메소드/함수: isdate

hotexamples.com에서의 예제들: 8

Python isdate - 8개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 nl4dv.utils.helpers.isdate에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: datagenie.py 프로젝트: helt/nl4dv

    def populate_dataset_meta(self, attr, attr_val, attr_datatype):
        if attr_datatype == constants.attribute_types['QUANTITATIVE']:
            try:
                attr_val = float(attr_val)
            except Exception as e:
                attr_val = float('NaN')
            self.data_attribute_map[attr]['domain'].add(attr_val)

            # Compute Max and Min of the attribute values
            if 'min' not in self.data_attribute_map[attr]['summary']:
                self.data_attribute_map[attr]['summary']['min'] = float("inf")
            if 'max' not in self.data_attribute_map[attr]['summary']:
                self.data_attribute_map[attr]['summary']['max'] = float("-inf")

            if attr_val > self.data_attribute_map[attr]['summary']['max']:
                self.data_attribute_map[attr]['summary']['max'] = attr_val
            if attr_val < self.data_attribute_map[attr]['summary']['min']:
                self.data_attribute_map[attr]['summary']['min'] = attr_val

        elif attr_datatype == constants.attribute_types['TEMPORAL']:
            parsed_status, parsed_attr_val = helpers.isdate(attr_val)
            if parsed_status:
                self.data_attribute_map[attr]['domain'].add(parsed_attr_val)
            else:
                parsed_attr_val = float('NaN')

            # Compute Max and Min of the attribute datetime
            if 'start' not in self.data_attribute_map[attr]['summary']:
                self.data_attribute_map[attr]['summary'][
                    'start'] = parsed_attr_val

            if 'end' not in self.data_attribute_map[attr]['summary']:
                self.data_attribute_map[attr]['summary'][
                    'end'] = parsed_attr_val

            # print(parsed_status, attr_val, parsed_attr_val, self.data_attribute_map[attr]['summary']['end'])
            if parsed_attr_val > self.data_attribute_map[attr]['summary'][
                    'end']:
                self.data_attribute_map[attr]['summary'][
                    'end'] = parsed_attr_val

            if parsed_attr_val < self.data_attribute_map[attr]['summary'][
                    'start']:
                self.data_attribute_map[attr]['summary'][
                    'start'] = parsed_attr_val

        else:
            attr_val = str(attr_val)
            self.data_attribute_map[attr]['domain'].add(attr_val)
            # Compute # occurrences of attribute values
            if 'group_counts' not in self.data_attribute_map[attr]['summary']:
                self.data_attribute_map[attr]['summary'][
                    'group_counts'] = dict()
            if attr_val not in self.data_attribute_map[attr]['summary'][
                    'group_counts']:
                self.data_attribute_map[attr]['summary']['group_counts'][
                    attr_val] = 0
            self.data_attribute_map[attr]['summary']['group_counts'][
                attr_val] += 1

예제 #2

파일 보기

    def set_tasks_to_datatable(self, dim, task):

        for column in self.vl_spec["hconcat"]:
            if task["task"] == 'filter':
                if task["operator"] == 'IN':
                    for attr in task['attributes']:
                        column['transform'].append({
                            'filter': {
                                "field": attr,
                                "oneOf": task["values"]
                            }
                        })
                elif task["operator"] == 'RANGE':
                    for attr in task['attributes']:
                        column['transform'].append({
                            "filter": {
                                "field": attr,
                                "range": task["values"]
                            }
                        })
                elif task["operator"] == 'NOT RANGE':
                    for attr in task['attributes']:
                        # self.vl_spec['transform'].append({"filter": {"field": attr, "gte": task["values"][1], "lte": task["values"][0]}})
                        column['transform'].append({
                            "filter": {
                                "not": {
                                    "field": attr,
                                    "range": task["values"]
                                }
                            }
                        })
                else:
                    for attr in task['attributes']:
                        symbol = constants.operator_symbol_mapping[
                            task["operator"]]
                        if helpers.isfloat(task["values"][0]) or helpers.isint(
                                task["values"][0]):
                            column['transform'].append({
                                'filter':
                                'lower(datum["{}"]) {} {}'.format(
                                    attr, symbol, task["values"][0])
                            })
                        elif helpers.isdate(task["values"][0]):
                            column['transform'].append({
                                'filter':
                                'lower(datum["{}"]) {} "{}"'.format(
                                    attr, symbol, task["values"][0])
                            })

예제 #3

파일 보기

    def get_attributes_values(self, attribute, amount):
        amount_formatted = amount
        if self.nl4dv_instance.data_genie_instance.data_attribute_map[
                attribute]["dataType"] == constants.attribute_types[
                    'TEMPORAL']:
            is_date, unformatted_date_obj = helpers.isdate(amount)
            if is_date:
                for format in constants.date_regexes[
                        unformatted_date_obj['regex_id']][0]:
                    # Vega-Lite can understand "%Y/%m/%d" for temporal fields in the filter transforms.
                    date_obj = helpers.format_str_to_date(
                        "/".join(unformatted_date_obj["regex_matches"]),
                        format)
                    if date_obj is not None:
                        amount_formatted = date_obj.strftime("%Y/%m/%d")
                        break
        elif self.nl4dv_instance.data_genie_instance.data_attribute_map[
                attribute]["dataType"] == constants.attribute_types[
                    'QUANTITATIVE']:
            amount_formatted = float(amount)

        return amount_formatted

예제 #4

파일 보기

파일: datagenie.py 프로젝트: helt/nl4dv

    def set_data(self, data_url=None):
        # type: (str) -> bool
        """
        User can choose to manually initialize data

        """
        self.nl4dv_instance.data_url = data_url

        # initialize values
        self.data_attribute_map = dict()
        self.data = list()
        self.rows = 0

        if self.nl4dv_instance.data_url is not None and os.path.isfile(
                self.nl4dv_instance.data_url):

            # local variables
            reader = None
            json_data = None
            attributes = list()

            if self.nl4dv_instance.data_url.lower().endswith('.csv'):
                reader = csv.reader(open(self.nl4dv_instance.data_url,
                                         'r',
                                         encoding='utf-8'),
                                    delimiter=',')
                attributes = next(
                    reader)  # assumes headers are in the first line
            elif self.nl4dv_instance.data_url.lower().endswith('.tsv'):
                reader = csv.reader(open(self.nl4dv_instance.data_url,
                                         'r',
                                         encoding='utf-8'),
                                    delimiter='\t')
                attributes = next(
                    reader)  # assumes headers are in the first line
            elif self.nl4dv_instance.data_url.lower().endswith('.json'):
                json_data = json.load(
                    open(self.nl4dv_instance.data_url, 'r', encoding='utf-8'))
                attributes = json_data[0].keys()

            # initialize properties in Attribute Map
            for attr in attributes:
                # Don't consider attribute names that are empty or just whitespaces
                if attr and attr.strip():
                    self.data_attribute_map[attr] = {
                        'domain': set(),
                        'isLabelAttribute':
                        attr == self.nl4dv_instance.label_attribute,
                        'summary': dict(),
                        'dataTypeList':
                        list(),  # temporary to determine datatype
                        'dataType': '',
                        'aliases': list(),
                    }

            # initialize properties in Attribute Map
            # implies file is either .csv or .tsv
            if reader is not None:
                for line in reader:
                    data_obj = dict()
                    for i in range(len(line)):
                        # Don't consider attribute names that are empty or just whitespaces
                        if attributes[i] and attributes[i].strip():
                            data_obj[attributes[i]] = line[i]
                    self.data.append(data_obj)
                    self.rows += 1
            else:
                # JSON file
                for data_obj in json_data:
                    self.data.append(data_obj)
                    self.rows += 1

            # infer attribute datatypes and compute summary (range, domain)
            for datum in self.data:
                for attr in self.data_attribute_map.keys():
                    attr_val = datum[attr]

                    # Check for Numeric (float, int)
                    if helpers.isfloat(attr_val) or helpers.isint(attr_val):
                        attr_datatype = constants.attribute_types[
                            'QUANTITATIVE']
                        self.populate_dataset_meta(attr, attr_val,
                                                   attr_datatype)

                    # Check for Datetime
                    # ToDo:- Works fine for datetime strings. Not for others like Epochs and Int-only Years (e.g. 2018) which get captured above.
                    # ToDo:- It is VERY risky to switch this elif block with the if block above
                    elif helpers.isdate(attr_val)[0]:
                        attr_datatype = constants.attribute_types['TEMPORAL']
                        self.populate_dataset_meta(attr, attr_val,
                                                   attr_datatype)

                    # Otherwise set as Nominal
                    else:
                        attr_datatype = constants.attribute_types['NOMINAL']
                        self.populate_dataset_meta(attr, attr_val,
                                                   attr_datatype)

                    # Irrespective of above assignment, make a list of attribute types for each data row
                    # to take best decision on heterogeneous data with multiple datatypes
                    self.data_attribute_map[attr]['dataTypeList'].append(
                        attr_datatype)

            # Determine the Datatype based on majority of values.
            # Also Override a few datatypes set above based on rules such as NOMINAL to ORDINAL if all values are unique such as Sr. 1, Sr. 2, ...
            for attr in self.data_attribute_map:
                # most common attribute type
                attr_datatype = Counter(self.data_attribute_map[attr]
                                        ['dataTypeList']).most_common(1)[0][0]

                # if it's quantitative but with less than or equal to 12 unique values, then it's ordinal.
                # eg. 1, 2, 3, ..., 12 (months of a year)
                # eg. -3, -2, -1, 0, 1, 2, 3 (likert ratings)
                if attr_datatype == constants.attribute_types[
                        'QUANTITATIVE'] and len(
                            self.data_attribute_map[attr]['domain']) <= 12:
                    attr_datatype = constants.attribute_types['ORDINAL']
                    self.populate_dataset_meta_for_attr(attr, attr_datatype)

                # If an attribute has (almnost) no repeating value, then mark it as the label attribute.
                # eg. primary/unique key of the table? Car1 , Car2, Car3, ...
                # Almost == 90% heuristic-based
                if attr_datatype == constants.attribute_types[
                        'NOMINAL'] and len(self.data_attribute_map[attr]
                                           ['domain']) > 0.9 * self.rows:
                    self.nl4dv_instance.label_attribute = attr
                    self.data_attribute_map[attr]['isLabelAttribute'] = True

                # Set the final data type
                self.data_attribute_map[attr]['dataType'] = attr_datatype

                # Presentation
                self.prepare_output(attr, attr_datatype)

            return True

        return False

예제 #5

파일 보기

    def set_task(self, dim, task):
        if task["task"] == 'find_extremum':
            if dim is None:
                dim = 'y'
            if task["operator"] == 'MIN':
                if dim == 'x':
                    if 'y' in self.vl_spec['encoding']:
                        self.vl_spec['encoding']['y']['sort'] = 'x'
                elif dim == 'y':
                    if 'x' in self.vl_spec['encoding']:
                        self.vl_spec['encoding']['x']['sort'] = 'y'
            elif task["operator"] == 'MAX':
                if dim == 'x':
                    if 'y' in self.vl_spec['encoding']:
                        self.vl_spec['encoding']['y']['sort'] = '-x'
                elif dim == 'y':
                    if 'x' in self.vl_spec['encoding']:
                        self.vl_spec['encoding']['x']['sort'] = '-y'

        elif task["task"] == 'filter':
            if task["operator"] == 'IN':
                for attr in task['attributes']:
                    self.vl_spec['transform'].append(
                        {'filter': {
                            "field": attr,
                            "oneOf": task["values"]
                        }})
            elif task["operator"] == 'RANGE':
                for attr in task['attributes']:
                    self.vl_spec['transform'].append(
                        {"filter": {
                            "field": attr,
                            "range": task["values"]
                        }})
            elif task["operator"] == 'NOT RANGE':
                for attr in task['attributes']:
                    # self.vl_spec['transform'].append({"filter": {"field": attr, "gte": task["values"][1], "lte": task["values"][0]}})
                    self.vl_spec['transform'].append({
                        "filter": {
                            "not": {
                                "field": attr,
                                "range": task["values"]
                            }
                        }
                    })
            else:
                for attr in task['attributes']:
                    symbol = constants.operator_symbol_mapping[
                        task["operator"]]
                    if helpers.isfloat(task["values"][0]) or helpers.isint(
                            task["values"][0]):
                        self.vl_spec['transform'].append({
                            'filter':
                            'lower(datum["{}"]) {} {}'.format(
                                attr, symbol, task["values"][0])
                        })
                    elif helpers.isdate(task["values"][0]):
                        self.vl_spec['transform'].append({
                            'filter':
                            'lower(datum["{}"]) {} "{}"'.format(
                                attr, symbol, task["values"][0])
                        })

예제 #6

파일 보기

    def set_data(self, data_url=None, data_value=None):
        # type: (str) -> None
        """
        User can choose to manually initialize data

        """
        self.nl4dv_instance.data_url = data_url if data_url is not None else self.nl4dv_instance.data_url
        self.nl4dv_instance.data_value = data_value if data_value is not None else self.nl4dv_instance.data_value

        # initialize values
        self.data_attribute_map = dict()
        self.data = list()
        self.rows = 0

        if self.nl4dv_instance.data_url is not None:
            # Possible Local FILE or HTTP URL
            if self.nl4dv_instance.data_url.lower().endswith('.csv'):
                self.data = pd.read_csv(self.nl4dv_instance.data_url,
                                        sep=',').to_dict('records')
            elif self.nl4dv_instance.data_url.lower().endswith('.tsv'):
                self.data = pd.read_csv(self.nl4dv_instance.data_url,
                                        sep='\t').to_dict('records')
            elif self.nl4dv_instance.data_url.lower().endswith('.json'):
                self.data = pd.read_json(
                    self.nl4dv_instance.data_url).to_dict('records')

        elif self.nl4dv_instance.data_value is not None:
            if isinstance(data_value, pd.DataFrame):
                self.data = data_value.to_dict('records')
            elif isinstance(data_value, list):
                self.data = data_value
            elif isinstance(data_value, dict):
                self.data = pd.DataFrame(data_value).to_dict('records')

        # Set number of rows in the dataset
        self.rows = len(self.data)

        # initialize properties in Attribute Map
        if len(self.data) > 0:
            for attr in self.data[0].keys():
                # Don't consider attribute names that are empty or just whitespaces
                if attr and attr.strip():
                    self.data_attribute_map[attr] = {
                        'domain': set(),
                        'domainMeta': dict(),
                        'isLabelAttribute':
                        attr == self.nl4dv_instance.label_attribute,
                        'summary': dict(),
                        'dataTypeList':
                        list(),  # temporary to determine datatype
                        'dataType': '',
                        'dataTypeMeta': dict(
                        ),  # Used for for e.g., temporal attributes when they conform to a certain format
                        'aliases': list(),
                    }

        # infer attribute datatypes and compute summary (range, domain)
        for datum in self.data:
            for attr in self.data_attribute_map.keys():
                # Check for Datetime
                is_date, unformatted_date_obj = helpers.isdate(datum[attr])
                if is_date:
                    attr_datatype_for_majority = constants.attribute_types[
                        'TEMPORAL'] + "-" + str(
                            unformatted_date_obj["regex_id"])
                # Check for Numeric (float, int)
                elif helpers.isfloat(datum[attr]) or helpers.isint(
                        datum[attr]):
                    attr_datatype_for_majority = constants.attribute_types[
                        'QUANTITATIVE']
                # Otherwise set as Nominal
                else:
                    attr_datatype_for_majority = constants.attribute_types[
                        'NOMINAL']

                # Append the list of attribute types for each data row to take best decision on heterogeneous data with multiple datatypes
                self.data_attribute_map[attr]['dataTypeList'].append(
                    attr_datatype_for_majority)

        # Determine the Datatype based on majority of values.
        # Also Override a few datatypes set above based on rules such as NOMINAL to ORDINAL if all values are unique such as Sr. 1, Sr. 2, ...
        for attr in self.data_attribute_map:
            # By default, set the attribute datatype to the most common attribute
            attr_datatype = Counter(self.data_attribute_map[attr]
                                    ['dataTypeList']).most_common(1)[0][0]

            # If attr_datatype is Temporal (e.g., T-1, T-2, T-n where 'n' corresponds to the n'th index of the date_regex array.
            # Then: PROCESS this and eventually strip the '-n' from the datatype
            if not (attr_datatype in [
                    constants.attribute_types['QUANTITATIVE'],
                    constants.attribute_types['NOMINAL']
            ]):

                # If there is at least one instance of 'T-2' (DD*MM*YY(YY)), in the `dataTypeList`, set the regex_id to this, even if 'T-1' is the majority.
                if 'T-2' in self.data_attribute_map[attr]['dataTypeList']:
                    attr_datatype = 'T-2'

                # Strip the '-n' from the datatype
                attr_datatype_split = attr_datatype.split("-")
                attr_datatype = attr_datatype_split[0]

                # Set the final data type
                self.data_attribute_map[attr]['dataTypeMeta'] = {
                    "regex_id": attr_datatype_split[1]
                }

                # Add raw data to the domain's metadata. Only for Temporal Attributes.
                if not 'raw' in self.data_attribute_map[attr]['domainMeta']:
                    self.data_attribute_map[attr]['domainMeta']['raw'] = set()

            # Set the final data type
            self.data_attribute_map[attr]['dataType'] = attr_datatype

            # Update the dataset metadata for each attribute
            self.populate_dataset_meta_for_attr(attr, attr_datatype)

예제 #7

파일 보기

    def populate_dataset_meta(self, attr, attr_val, attr_datatype):
        if attr_datatype == constants.attribute_types['QUANTITATIVE']:
            try:
                attr_val = float(attr_val)
            except Exception as e:
                attr_val = float('NaN')
            self.data_attribute_map[attr]['domain'].add(attr_val)

            # Compute Max and Min of the attribute values
            if 'min' not in self.data_attribute_map[attr]['summary']:
                self.data_attribute_map[attr]['summary']['min'] = float("inf")
            if 'max' not in self.data_attribute_map[attr]['summary']:
                self.data_attribute_map[attr]['summary']['max'] = float("-inf")

            if attr_val > self.data_attribute_map[attr]['summary']['max']:
                self.data_attribute_map[attr]['summary']['max'] = attr_val
            if attr_val < self.data_attribute_map[attr]['summary']['min']:
                self.data_attribute_map[attr]['summary']['min'] = attr_val

        elif attr_datatype == constants.attribute_types['TEMPORAL']:
            is_date, unformatted_date_obj = helpers.isdate(attr_val)
            parsed_attr_val = None
            if is_date:
                for format in constants.date_regexes[
                        unformatted_date_obj['regex_id']][0]:
                    parsed_attr_val = helpers.format_str_to_date(
                        "/".join(unformatted_date_obj["regex_matches"]),
                        format)
                    if parsed_attr_val is not None:
                        self.data_attribute_map[attr]['domain'].add(
                            parsed_attr_val)
                        self.data_attribute_map[attr]['domainMeta']['raw'].add(
                            attr_val)
                        break

            if parsed_attr_val is not None:
                # Compute Max and Min of the attribute datetime
                if 'start' not in self.data_attribute_map[attr]['summary']:
                    self.data_attribute_map[attr]['summary'][
                        'start'] = parsed_attr_val

                if 'end' not in self.data_attribute_map[attr]['summary']:
                    self.data_attribute_map[attr]['summary'][
                        'end'] = parsed_attr_val

                # print(parsed_status, attr_val, parsed_attr_val, self.data_attribute_map[attr]['summary']['end'])
                if parsed_attr_val > self.data_attribute_map[attr]['summary'][
                        'end']:
                    self.data_attribute_map[attr]['summary'][
                        'end'] = parsed_attr_val

                if parsed_attr_val < self.data_attribute_map[attr]['summary'][
                        'start']:
                    self.data_attribute_map[attr]['summary'][
                        'start'] = parsed_attr_val

        else:
            attr_val = str(attr_val)
            self.data_attribute_map[attr]['domain'].add(attr_val)
            # Compute # occurrences of attribute values
            if 'group_counts' not in self.data_attribute_map[attr]['summary']:
                self.data_attribute_map[attr]['summary'][
                    'group_counts'] = dict()
            if attr_val not in self.data_attribute_map[attr]['summary'][
                    'group_counts']:
                self.data_attribute_map[attr]['summary']['group_counts'][
                    attr_val] = 0
            self.data_attribute_map[attr]['summary']['group_counts'][
                attr_val] += 1

예제 #8

파일 보기

    def set_data(self, data_url=None, data_value=None):
        # type: (str) -> None
        """
        User can choose to manually initialize data

        """
        self.nl4dv_instance.data_url = data_url if data_url is not None else self.nl4dv_instance.data_url
        self.nl4dv_instance.data_value = data_value if data_value is not None else self.nl4dv_instance.data_value

        # initialize values
        self.data_attribute_map = dict()
        self.data = list()
        self.rows = 0

        if self.nl4dv_instance.data_url is not None:
            # Possible Local FILE or HTTP URL
            if self.nl4dv_instance.data_url.lower().endswith('.csv'):
                self.data = pd.read_csv(self.nl4dv_instance.data_url,
                                        sep=',').to_dict('records')
            elif self.nl4dv_instance.data_url.lower().endswith('.tsv'):
                self.data = pd.read_csv(self.nl4dv_instance.data_url,
                                        sep='\t').to_dict('records')
            elif self.nl4dv_instance.data_url.lower().endswith('.json'):
                self.data = pd.read_json(
                    self.nl4dv_instance.data_url).to_dict('records')

        elif self.nl4dv_instance.data_value is not None:
            if isinstance(data_value, pd.DataFrame):
                self.data = data_value.to_dict('records')
            elif isinstance(data_value, list):
                self.data = data_value
            elif isinstance(data_value, dict):
                self.data = pd.DataFrame(data_value).to_dict('records')

        # Set number of rows in the dataset
        self.rows = len(self.data)

        # initialize properties in Attribute Map
        if len(self.data) > 0:
            for attr in self.data[0].keys():
                # Don't consider attribute names that are empty or just whitespaces
                if attr and attr.strip():
                    self.data_attribute_map[attr] = {
                        'domain': set(),
                        'isLabelAttribute':
                        attr == self.nl4dv_instance.label_attribute,
                        'summary': dict(),
                        'dataTypeList':
                        list(),  # temporary to determine datatype
                        'dataType': '',
                        'aliases': list(),
                    }

        # infer attribute datatypes and compute summary (range, domain)
        for datum in self.data:
            for attr in self.data_attribute_map.keys():
                attr_val = datum[attr]

                # Check for Numeric (float, int)
                if helpers.isfloat(attr_val) or helpers.isint(attr_val):
                    attr_datatype = constants.attribute_types['QUANTITATIVE']
                    self.populate_dataset_meta(attr, attr_val, attr_datatype)

                # Check for Datetime
                # ToDo:- Works fine for datetime strings. Not for others like Epochs and Int-only Years (e.g. 2018) which get captured above.
                # ToDo:- It is VERY risky to switch this elif block with the if block above
                elif helpers.isdate(attr_val)[0]:
                    attr_datatype = constants.attribute_types['TEMPORAL']
                    self.populate_dataset_meta(attr, attr_val, attr_datatype)

                # Otherwise set as Nominal
                else:
                    attr_datatype = constants.attribute_types['NOMINAL']
                    self.populate_dataset_meta(attr, attr_val, attr_datatype)

                # Irrespective of above assignment, make a list of attribute types for each data row
                # to take best decision on heterogeneous data with multiple datatypes
                self.data_attribute_map[attr]['dataTypeList'].append(
                    attr_datatype)

        # Determine the Datatype based on majority of values.
        # Also Override a few datatypes set above based on rules such as NOMINAL to ORDINAL if all values are unique such as Sr. 1, Sr. 2, ...
        for attr in self.data_attribute_map:
            # most common attribute type
            attr_datatype = Counter(self.data_attribute_map[attr]
                                    ['dataTypeList']).most_common(1)[0][0]

            ## NOTE: For all practical purposes, let QUANTITATIVE be the determined Data Type. If an attribute is known to be ORDINAL, it can be set using the set_attribute_datatype() API.
            # # if it's quantitative but with less than or equal to 12 unique values, then it's ordinal.
            # # eg. 1, 2, 3, ..., 12 (months of a year)
            # # eg. -3, -2, -1, 0, 1, 2, 3 (likert ratings)
            # if attr_datatype == constants.attribute_types['QUANTITATIVE'] and len(
            #         self.data_attribute_map[attr]['domain']) <= 12:
            #     attr_datatype = constants.attribute_types['ORDINAL']
            #     self.populate_dataset_meta_for_attr(attr, attr_datatype)

            # If an attribute has (almnost) no repeating value, then mark it as the label attribute.
            # eg. primary/unique key of the table? Car1 , Car2, Car3, ...
            # Almost == 90% heuristic-based
            if attr_datatype == constants.attribute_types['NOMINAL'] and len(
                    self.data_attribute_map[attr]['domain']) > 0.9 * self.rows:
                self.nl4dv_instance.label_attribute = attr
                self.data_attribute_map[attr]['isLabelAttribute'] = True

            # Set the final data type
            self.data_attribute_map[attr]['dataType'] = attr_datatype

            # Presentation
            self.prepare_output(attr, attr_datatype)