Exemple #1
0
    def get_CRC_occurrences(self, rows, headers):
        '''sets all CRC cases to initial diagnosis date values in 
			id2data[patient][CRC_dates][0]'''
        print '...getting all target (CRC) occurrences'

        # get the index of the relevant columns
        ID_idx = headers.index(self.ID_column)
        CRC_idx = headers.index('icpc')
        date_idx = headers.index('datum')

        # regex pattern to match = D75 (CRC)
        CRC_pattern = re.compile('D75')

        # iterate over all data to check for CRC cases
        for row in rows:

            # get key and if it's in the dict, the current corresponding CRC value
            key = int(row[ID_idx])
            if key in self.id2data:
                CRC = self.id2data[key]['CRC_dates'][0]

                # get ICPC code and its date
                code = row[CRC_idx]
                if code == None:
                    continue
                elif type(code) == str:
                    code = code.strip().upper()[0:3]

                code_date = str2date(row[date_idx])

                # add CRC case if code matches, AND corresponding date is earlier than the currently recorded
                if CRC_pattern.match(code) and (CRC == 'negative'
                                                or CRC > code_date):
                    self.id2data[key]['CRC_dates'][0] = code_date
                    self.id2data[key]['data'][0] = 'positive'
Exemple #2
0
    def get_IDs(self, rows, headers):
        '''sets all IDs as keys to a dict. Additionally adds gender/age data
			and date registration data'''
        print '...getting all record IDs'

        # get the index of the relevant columns
        ID_idx = headers.index(self.ID_column)
        age_idx = headers.index('geboortedatum')
        gender_idx = headers.index('geslacht')
        begin_idx = headers.index('inschrijfdatum')
        end_idx = headers.index('uitschrijfdatums')

        # pair IDs with a dict corresponding to data and dates
        for row in rows:

            # key is ID
            key = int(row[ID_idx])

            # skip if instance is outside the specified age limits
            ID_age = 2011 - int(row[age_idx])
            if ID_age < self.min_age or ID_age > self.max_age:
                continue

            # val is a new dict with keys 'data' and 'dates',
            # containing the processed data and registration dates, respectively
            val = dict()
            val['data'] = ['negative', key, ID_age, row[gender_idx]]

            registration = str2date(row[begin_idx], give_default_begin=True)
            unregistration = str2date(
                row[end_idx], ymd=False, give_default_end=True
            )  #if not (row[end_idx] in ['', None]) else str2date('2050-12-31')
            val['CRC_dates'] = ['negative', registration, unregistration]

            # add key/value pair
            self.id2data[key] = val

        return ['ID', 'age', 'gender']
	def insert_data(self, rows, headers, code_column, date_column, regex_string, limit, suffix='', incorporate_SOEP=False):
		'''inserts data from the specified csv and corresponding columns'''

		# make convenient reference to the dictionary
		dct = self.id2data

		# # get data and corresponding headers
		# rows, headers = util.import_data(f, delim=self.delim)

		# get the index of the relevant columns
		ID_idx = headers.index(self.ID_column)
		code_idx = headers.index(code_column)
		date_idx = headers.index(date_column[0])
		
		if suffix == 'lab_results':
			val_idx = headers.index('waarde')
			min_idx = headers.index('referentie_minimum')
			max_idx = headers.index('referentie_maximum')
		if incorporate_SOEP:
			SOEP_idx = headers.index(incorporate_SOEP)
		
		# get the right suffix to append for the attribute name
		if suffix == '':
			suffix = code_column

		# regex pattern to match (ATC/ICPC standards)
		pattern = re.compile(regex_string)

		# keep track of number of times the row is attributed to a positive CRC patient (or patient where the target instance = 'positive')
		num_pos = 0
		num_total = 0

		# iterate over all instances, making a new dict with the new attributes as keys
		attribute2ids = dict()
		for row in rows:
			original_code = row[code_idx]
			if original_code == None:
				continue
			truncated_code = self.generate_code(original_code, limit)
			if truncated_code == None:
				continue

			### is in Marshall Predictors check ###
			### if it is a marshall predictor, we skip this line.
			if self.marshall_predictor(truncated_code, code_column):
				continue
			num_total+=1

			# if key is not in the data dictionary, we skip it
			key = int(row[ID_idx])
			if not key in dct:
				continue

			if dct[key]['CRC_dates'][0] != 'negative':
				num_pos+=1

			# init other vars
			date = str2date(row[date_idx], give_default_begin=True)
			begin = dct[key]['CRC_dates'][3]
			end = dct[key]['CRC_dates'][4]
			if code_column == 'specialisme':
				end = end - four_weeks()

			if suffix == 'lab_results':
				val, min_val, max_val = self.make_lab_values(row[val_idx], row[min_idx], row[max_idx])
				if val == '':
					continue

			# if in the required interval and code is valid
			if (begin <= date and date <= end) and pattern.match(truncated_code):
				
				# if we do not care about SOEPcode (always except for journaal case) or the SOEPcode is E
				if (not incorporate_SOEP) or (incorporate_SOEP and row[SOEP_idx] == 'E'):
				
					if suffix == 'lab_results': # if we prepare for lab result abstraction
						if not 'ID2abstractions' in locals():
							# dict (patient) of dict (lab measurement name) of list of tuples (all value/date combinations of measurement)
							ID2abstractions = dict()
						
						util.init_key(ID2abstractions, key, dict())
						util.init_key(ID2abstractions[key], original_code, [])

						ID2abstractions[key][original_code].append((date, val))

						if '' not in [val, min_val, max_val]:
							attr = abstracts.get_value(val, min_val, max_val, original_code)

							# check if attribute name and ID instance already exist, if not, make them
							util.init_key(attribute2ids, attr, dict())
							util.init_key(attribute2ids[attr], key, 0)

							# add 1 to the occurrence of the attribute in the instance
							attribute2ids[attr][key] += 1

					else: # else no lab result collection, regular aggregation
						# generate attribute names
						attributes = self.generate_attributes(original_code, limit, suffix, src=code_column)
						
						# this loop allows multiple attributes to be created in the previous code line
						# this allows for other classes to subclass this class, e.g. StandardEnrichProcess
						for attr in attributes:
							# print truncated_code, attr
							# check if attribute name and ID instance already exist, if not, make them
							util.init_key(attribute2ids, attr, dict())
							util.init_key(attribute2ids[attr], key, 0)

							# add 1 to the occurrence of the attribute in the instance
							attribute2ids[attr][key] += 1

		if suffix == 'lab_results': # do funky stuff with trends and abstractions
			# convert to trends PER lab result
			for ID in ID2abstractions:
				# print ID2abstractions[ID]
				for k, points in ID2abstractions[ID].iteritems():
					
					# the values are sorted before abstraction
					points = sorted(list(set(points)))

					# abstract the values and count the occurrences per measurement-trend per patient
					# if only 1 measurement was done, we cannot do time series analysis
					if len(points) > 1 and ID in dct: 
						abstractions = abstracts.get_trends(k, points)
						for attr in abstractions:
							attr = attr[0] # get the state
							util.init_key(attribute2ids, attr, dict())
							util.init_key(attribute2ids[attr], ID, 0)
							attribute2ids[attr][ID] += 1
		# print len(attribute2ids)
		# print attribute2ids.keys()[0:5]
		
		# add data to each instance
		for ID in dct:
			data = dct[ID]['data']

			for id2occurrences in attribute2ids.values():
				
				# if patient has occurrences for the attribute, add that number, else add 0
				if ID in id2occurrences: 
					data.append(id2occurrences[ID])
				else:
					data.append(0)

		# return the keys to be used as headers when writing the processed data
		return attribute2ids.keys(), num_total, num_pos
	def insert_data(self, rows, headers, code_column, date_column, regex_string, limit, suffix='', incorporate_SOEP=False):
		'''inserts data from the specified csv and corresponding columns'''

		# make convenient reference to the dictionary
		dct = self.id2data

		# # get data and corresponding headers
		# rows, headers = util.import_data(f, delim=self.delim)

		# get the index of the relevant columns
		ID_idx = headers.index(self.ID_column)
		code_idx = headers.index(code_column)
		date_idx = headers.index(date_column[0])
		
		if suffix == 'lab_results':
			val_idx = headers.index('waarde')
			min_idx = headers.index('referentie_minimum')
			max_idx = headers.index('referentie_maximum')
		if incorporate_SOEP:
			SOEP_idx = headers.index(incorporate_SOEP)
		
		# get the right suffix to append for the attribute name
		if suffix == '':
			suffix = code_column

		# regex pattern to match (ATC/ICPC standards)
		pattern = re.compile(regex_string)

		# keep track of number of times the row is attributed to a positive CRC patient (or patient where the target instance = 'positive')
		num_pos = 0
		num_total = 0

		# iterate over all instances, making a new dict with the new attributes as keys
		attribute2ids = dict()
		for row in rows:
			num_total+=1

			# if key is not in the data dictionary, we skip it
			key = int(row[ID_idx])
			if not key in dct:
				continue

			if dct[key]['CRC_dates'][0] != 'negative':
				num_pos+=1

			# init other vars
			date = str2date(row[date_idx], give_default_begin=True)
			begin = dct[key]['CRC_dates'][3]
			end = dct[key]['CRC_dates'][4]
			if code_column == 'specialisme':
				end = end - four_weeks()

			original_code = row[code_idx]
			if original_code == None:
				continue
			truncated_code = self.generate_code(original_code, limit)
			if truncated_code == None:
				continue
			if suffix == 'lab_results':
				val, min_val, max_val = self.make_lab_values(row[val_idx], row[min_idx], row[max_idx])
				if val == '':
					continue

			# if in the required interval and code is valid
			if (begin <= date and date <= end) and pattern.match(truncated_code):
				
				# if we do not care about SOEPcode (always except for journaal case) or the SOEPcode is E
				if (not incorporate_SOEP) or (incorporate_SOEP and row[SOEP_idx] == 'E'):
				
					if suffix == 'lab_results': # if we prepare for lab result abstraction
						if not 'ID2abstractions' in locals():
							# dict (patient) of dict (lab measurement name) of list of tuples (all value/date combinations of measurement)
							ID2abstractions = dict()
						
						util.init_key(ID2abstractions, key, dict())
						util.init_key(ID2abstractions[key], original_code, [])

						ID2abstractions[key][original_code].append((date, val))

						if '' not in [val, min_val, max_val]:
							attr = abstracts.get_value(val, min_val, max_val, original_code)

							# check if attribute name and ID instance already exist, if not, make them
							util.init_key(attribute2ids, attr, dict())
							util.init_key(attribute2ids[attr], key, 0)

							# add 1 to the occurrence of the attribute in the instance
							attribute2ids[attr][key] += 1

					else: # else no lab result collection, regular aggregation
						# generate attribute names
						attributes = self.generate_attributes(original_code, limit, suffix, src=code_column)
						
						# this loop allows multiple attributes to be created in the previous code line
						# this allows for other classes to subclass this class, e.g. StandardEnrichProcess
						for attr in attributes:
							# print truncated_code, attr
							# check if attribute name and ID instance already exist, if not, make them
							util.init_key(attribute2ids, attr, dict())
							util.init_key(attribute2ids[attr], key, 0)

							# add 1 to the occurrence of the attribute in the instance
							attribute2ids[attr][key] += 1

		if suffix == 'lab_results': # do funky stuff with trends and abstractions
			# convert to trends PER lab result
			for ID in ID2abstractions:
				# print ID2abstractions[ID]
				for k, points in ID2abstractions[ID].iteritems():
					
					# the values are sorted before abstraction
					points = sorted(list(set(points)))

					# abstract the values and count the occurrences per measurement-trend per patient
					# if only 1 measurement was done, we cannot do time series analysis
					if len(points) > 1 and ID in dct: 
						abstractions = abstracts.get_trends(k, points)
						for attr in abstractions:
							attr = attr[0] # get the state
							util.init_key(attribute2ids, attr, dict())
							util.init_key(attribute2ids[attr], ID, 0)
							attribute2ids[attr][ID] += 1
		# print len(attribute2ids)
		# print attribute2ids.keys()[0:5]
		
		# add data to each instance
		for ID in dct:
			data = dct[ID]['data']

			for id2occurrences in attribute2ids.values():
				
				# if patient has occurrences for the attribute, add that number, else add 0
				if ID in id2occurrences: 
					data.append(id2occurrences[ID])
				else:
					data.append(0)

		# return the keys to be used as headers when writing the processed data
		return attribute2ids.keys(), num_total, num_pos
    def insert_data(self,
                    rows,
                    headers,
                    code_column,
                    date_column,
                    regex_string,
                    limit,
                    suffix='',
                    incorporate_SOEP=False):
        '''inserts data from the specified csv and corresponding columns'''

        # make convenient reference to the dictionary
        dct = self.id2data

        # # get data and corresponding headers
        # rows, headers = util.import_data(f, delim=self.delim)

        # get the index of the relevant columns
        ID_idx = headers.index(self.ID_column)
        code_idx = headers.index(code_column)
        date_idx = headers.index(date_column[0])

        if incorporate_SOEP:
            SOEP_idx = headers.index(incorporate_SOEP)

        # get the right suffix to append for the attribute name
        if suffix == '':
            suffix = code_column

        # regex pattern to match (ATC/ICPC standards)
        pattern = re.compile(regex_string)

        # iterate over all instances, making a new dict with the new attributes as keys
        attribute2counts = dict()
        for row in rows:

            # if key is not in the data dictionary, we skip it
            key = int(row[ID_idx])
            if not key in dct:
                continue

            # init other vars
            date = str2date(row[date_idx])
            begin = dct[key]['CRC_dates'][3]
            end = dct[key]['CRC_dates'][4]
            original_code = row[code_idx]

            # if we do not care about SOEPcode (always except for journaal case) or the SOEPcode is E
            if (not incorporate_SOEP) or (incorporate_SOEP
                                          and row[SOEP_idx] == 'E'):

                # generate attribute names
                attributes = self.generate_attributes(original_code,
                                                      limit,
                                                      suffix,
                                                      src=code_column)

                # this loop allows multiple attributes to be created in the previous code line
                # this allows for other classes to subclass this class, e.g. StandardEnrichProcess
                for attr in attributes:

                    # check if attribute name and ID instance already exist, if not, make them
                    util.init_key(attribute2counts, attr, dict())
                    util.init_key(attribute2counts[attr], key, 0)

                    # add 1 to the occurrence of the attribute in the instance
                    attribute2counts[attr] += 1

        # add data to each instance
        for ID in dct:
            data = dct[ID]['data']

            for id2occurrences in attribute2ids.values():

                # if patient has occurrences for the attribute, add that number, else add 0
                if ID in id2occurrences:
                    data.append(id2occurrences[ID])
                else:
                    data.append(0)

        # return the keys to be used as headers when writing the processed data
        return attribute2ids.keys()
	def insert_data(self, rows, headers, code_column, date_column, regex_string, limit, suffix='', incorporate_SOEP=False):
		'''inserts data from the specified csv and corresponding columns'''

		# make convenient reference to the dictionary
		dct = self.id2data

		# # get data and corresponding headers
		# rows, headers = util.import_data(f, delim=self.delim)

		# get the index of the relevant columns
		ID_idx = headers.index(self.ID_column)
		code_idx = headers.index(code_column)
		date_idx = headers.index(date_column[0])
		
		if incorporate_SOEP:
			SOEP_idx = headers.index(incorporate_SOEP)
		
		# get the right suffix to append for the attribute name
		if suffix == '':
			suffix = code_column

		# regex pattern to match (ATC/ICPC standards)
		pattern = re.compile(regex_string)

		# iterate over all instances, making a new dict with the new attributes as keys
		attribute2counts = dict()
		for row in rows:

			# if key is not in the data dictionary, we skip it
			key = int(row[ID_idx])
			if not key in dct:
				continue

			# init other vars
			date = str2date(row[date_idx])
			begin = dct[key]['CRC_dates'][3]
			end = dct[key]['CRC_dates'][4]
			original_code = row[code_idx]
				
			# if we do not care about SOEPcode (always except for journaal case) or the SOEPcode is E
			if (not incorporate_SOEP) or (incorporate_SOEP and row[SOEP_idx] == 'E'):
			
				# generate attribute names
				attributes = self.generate_attributes(original_code, limit, suffix, src=code_column)
				
				# this loop allows multiple attributes to be created in the previous code line
				# this allows for other classes to subclass this class, e.g. StandardEnrichProcess
				for attr in attributes:

					# check if attribute name and ID instance already exist, if not, make them
					util.init_key(attribute2counts, attr, dict())
					util.init_key(attribute2counts[attr], key, 0)

					# add 1 to the occurrence of the attribute in the instance
					attribute2counts[attr] += 1
		
		# add data to each instance
		for ID in dct:
			data = dct[ID]['data']

			for id2occurrences in attribute2ids.values():
				
				# if patient has occurrences for the attribute, add that number, else add 0
				if ID in id2occurrences: 
					data.append(id2occurrences[ID])
				else:
					data.append(0)

		# return the keys to be used as headers when writing the processed data
		return attribute2ids.keys()
	def insert_data(self, rows, headers, code_column, date_column, regex_string, limit, suffix='', incorporate_SOEP=False):
		'''inserts data from the specified csv and corresponding columns'''

		# make convenient reference to the dictionary
		dct = self.id2data

		# # get data and corresponding headers
		# rows, headers = util.import_data(f, delim=self.delim)

		# get the index of the relevant columns
		ID_idx = headers.index(self.ID_column)
		code_idx = headers.index(code_column)
		b_date_idx = headers.index(date_column[0])
		e_date_idx = headers.index(date_column[1])
		if suffix == 'lab_results':
			val_idx = headers.index('waarde')
			min_idx = headers.index('referentie_minimum')
			max_idx = headers.index('referentie_maximum')
		if incorporate_SOEP:
			SOEP_idx = headers.index(incorporate_SOEP)

		# get the right suffix to append for the attribute name
		if suffix == '':
			suffix = code_column

		# regex pattern to match (ATC/ICPC standards)
		pattern = re.compile(regex_string)

		# iterate over all instances
		for row in rows:

			# if key is not in the data dictionary, we skip it
			key = int(row[ID_idx])
			if not key in dct:
				continue

			# init other vars
			b_date = str2date(row[b_date_idx], give_default_begin=True) # begin of event
			e_date = str2date(row[e_date_idx], give_default_end=True) # end of event
			b_reg = dct[key]['CRC_dates'][3] # beginning of registration
			e_reg = dct[key]['CRC_dates'][4] # ending of registration
			if code_column == 'specialisme':
				e_reg = e_reg - four_weeks()

			original_code = row[code_idx]
			if original_code == None:
				continue
			truncated_code = self.generate_code(original_code, limit) 
			if truncated_code == None:
				continue
			if suffix == 'lab_results':
				val, min_val, max_val = self.make_lab_values(row[val_idx], row[min_idx], row[max_idx])
				if val == '':
					continue

			# if in the required interval (either beginning or ending date) AND code is valid
			if ( (b_reg <= b_date and b_date <= e_reg) or (b_reg <= e_date and e_date <= e_reg) ) and pattern.match(truncated_code):
				
				# if we need to take the SOEP code of consults into account
				if (not incorporate_SOEP) or (incorporate_SOEP and row[SOEP_idx] == 'E'):

					# generate attribute names
					if suffix == 'lab_results': # if we prepare for lab result abstraction
						if not 'ID2abstractions' in locals():
							# dict (patient) of dict (lab measurement name) of list of tuples (all value/date combinations of measurement)
							ID2abstractions = dict()
						
						util.init_key(ID2abstractions, key, dict())
						util.init_key(ID2abstractions[key], original_code, [])

						ID2abstractions[key][original_code].append((b_date, val))
					
						if '' not in [val, min_val, max_val]:
							attributes = [abstracts.get_value(val, min_val, max_val, original_code)]

							# # add value abstraction as state interval
							# self.insert_state_interval(key, attr, b_date, e_date)
						else:
							attributes = []

					else:
						attributes = self.generate_attributes(original_code, limit, suffix, src=code_column)

					# this loop allows multiple attributes to be created in the previous code line
					# this allows for other classes to subclass this class, e.g. SequenceEnrichProcess
					for attr in attributes:

						# insert a StateInterval object with the specified parameters
						self.insert_state_interval(key, attr, b_date, e_date, original_code, code_column)

		if suffix == 'lab_results': # do funky stuff with trends and abstractions
			# convert to trends PER lab result
			for ID in ID2abstractions:
				# print ID2abstractions[ID]
				for k, points in ID2abstractions[ID].iteritems():
					
					# the values are sorted before abstraction
					points = sorted(list(set(points)))

					# abstract the values and append to the current patient's sequence
					# if only 1 measurement was done, we cannot do time series analysis
					if len(points) > 1 and ID in dct: 
						abstractions = abstracts.get_trends(k, points)
						for abstraction in abstractions:
							self.insert_state_interval(ID, *abstraction, original_code=original_code, src=code_column)
						# self.id2data[ID]['data'] = self.id2data[ID]['data'] + abstractions
		
		# to satisfy return value requirement for the method 'process' in the superclass
		return [], -1, -1
Exemple #8
0
    def insert_data(self,
                    rows,
                    headers,
                    code_column,
                    date_column,
                    regex_string,
                    limit,
                    suffix='',
                    incorporate_SOEP=False):
        '''inserts data from the specified csv and corresponding columns'''

        # make convenient reference to the dictionary
        dct = self.id2data

        # # get data and corresponding headers
        # rows, headers = util.import_data(f, delim=self.delim)

        # get the index of the relevant columns
        ID_idx = headers.index(self.ID_column)
        code_idx = headers.index(code_column)
        b_date_idx = headers.index(date_column[0])
        e_date_idx = headers.index(date_column[1])
        if suffix == 'lab_results':
            val_idx = headers.index('waarde')
            min_idx = headers.index('referentie_minimum')
            max_idx = headers.index('referentie_maximum')
        if incorporate_SOEP:
            SOEP_idx = headers.index(incorporate_SOEP)

        # get the right suffix to append for the attribute name
        if suffix == '':
            suffix = code_column

        # regex pattern to match (ATC/ICPC standards)
        pattern = re.compile(regex_string)

        # iterate over all instances
        for row in rows:

            original_code = row[code_idx]
            if original_code == None:
                continue
            truncated_code = self.generate_code(original_code, limit)
            if truncated_code == None:
                continue

            ### is in Marshall Predictors check ###
            ### if it is a marshall predictor, we skip this line.
            if self.marshall_predictor(truncated_code, code_column):
                continue

            # if key is not in the data dictionary, we skip it
            key = int(row[ID_idx])
            if not key in dct:
                continue

            # init other vars
            b_date = str2date(row[b_date_idx],
                              give_default_begin=True)  # begin of event
            e_date = str2date(row[e_date_idx],
                              give_default_end=True)  # end of event
            b_reg = dct[key]['CRC_dates'][3]  # beginning of registration
            e_reg = dct[key]['CRC_dates'][4]  # ending of registration
            if code_column == 'specialisme':
                e_reg = e_reg - four_weeks()

            if suffix == 'lab_results':
                val, min_val, max_val = self.make_lab_values(
                    row[val_idx], row[min_idx], row[max_idx])
                if val == '':
                    continue

            # if in the required interval (either beginning or ending date) AND code is valid
            if ((b_reg <= b_date and b_date <= e_reg) or
                (b_reg <= e_date
                 and e_date <= e_reg)) and pattern.match(truncated_code):

                # if we need to take the SOEP code of consults into account
                if (not incorporate_SOEP) or (incorporate_SOEP
                                              and row[SOEP_idx] == 'E'):

                    # generate attribute names
                    if suffix == 'lab_results':  # if we prepare for lab result abstraction
                        if not 'ID2abstractions' in locals():
                            # dict (patient) of dict (lab measurement name) of list of tuples (all value/date combinations of measurement)
                            ID2abstractions = dict()

                        util.init_key(ID2abstractions, key, dict())
                        util.init_key(ID2abstractions[key], original_code, [])

                        ID2abstractions[key][original_code].append(
                            (b_date, val))

                        if '' not in [val, min_val, max_val]:
                            attributes = [
                                abstracts.get_value(val, min_val, max_val,
                                                    original_code)
                            ]

                            # # add value abstraction as state interval
                            # self.insert_state_interval(key, attr, b_date, e_date)
                        else:
                            attributes = []

                    else:
                        attributes = self.generate_attributes(original_code,
                                                              limit,
                                                              suffix,
                                                              src=code_column)

                    # this loop allows multiple attributes to be created in the previous code line
                    # this allows for other classes to subclass this class, e.g. SequenceEnrichProcess
                    for attr in attributes:

                        # insert a StateInterval object with the specified parameters
                        self.insert_state_interval(key, attr, b_date, e_date,
                                                   original_code, code_column)

        if suffix == 'lab_results':  # do funky stuff with trends and abstractions
            # convert to trends PER lab result
            for ID in ID2abstractions:
                # print ID2abstractions[ID]
                for k, points in ID2abstractions[ID].iteritems():

                    # the values are sorted before abstraction
                    points = sorted(list(set(points)))

                    # abstract the values and append to the current patient's sequence
                    # if only 1 measurement was done, we cannot do time series analysis
                    if len(points) > 1 and ID in dct:
                        abstractions = abstracts.get_trends(k, points)
                        for abstraction in abstractions:
                            self.insert_state_interval(
                                ID,
                                *abstraction,
                                original_code=original_code,
                                src=code_column)
                        # self.id2data[ID]['data'] = self.id2data[ID]['data'] + abstractions

        # to satisfy return value requirement for the method 'process' in the superclass
        return [], -1, -1