Exemple #1
0
 def __init__(self, config):
     super(self.__class__, self).__init__(config)
     self.segments = []
     if not isinstance(self.config.data_file, list):
         self.config.data_file = [self.config.data_file]
     if self.config.window_size == -1:
         self.config.window_size = self.config.segment_size
     for filename in self.config.data_file:
         with open(filename, 'r') as data_file:
             data_reader = csv.reader(data_file, delimiter=',')
             data = [[line[i] for i in self.config.data_index]
                     for line in data_reader]
         for segment_start in range(
                 0,
                 len(data) - self.config.segment_size + 1,
                 self.config.segment_stride):
             segment_end = segment_start + self.config.segment_size
             self.segments.append(
                 Segment(windows=[[
                     float(item) for sublist in data[window_start:(
                         window_start + self.config.window_size)]
                     for item in sublist
                 ] for window_start in range(
                     segment_start, segment_end - self.config.window_size +
                     1, self.config.window_stride)],
                         segment_start=segment_start,
                         segment_size=self.config.segment_size,
                         window_stride=self.config.window_stride,
                         window_size=self.config.window_size,
                         labels=dict([(filename.split('/')[-1],
                                       segment_end - segment_start)]),
                         filename=filename,
                         data_index=self.config.data_index,
                         label_index=self.config.label_index))
Exemple #2
0
    def __init__(self, config):
        config.label_index = 1
        super(self.__class__, self).__init__(config)
        if isinstance(self.config.data_file, list):
            self.config.data_file = self.config.data_file[0]
        if not isinstance(self.config.data_index, list):
            self.config.data_index = [self.config.data_index]
        with open(self.config.data_file, 'r') as data_file:
            data_reader = csv.reader(data_file, delimiter=' ')
            full_data = [line for line in data_reader]

        # carry over previous values for any NaNs
        prev_line = full_data[0]
        line_range = range(len(prev_line))
        for line in full_data[1:]:
            for (l0, l1, i) in zip(prev_line, line, line_range):
                if l1 == "NaN":
                    line[i] = l0
            prev_line = line
        label_set = set([d[self.config.label_index] for d in full_data])

        if self.config.window_size == -1:
            self.config.window_size = self.config.segment_size
        self.segments = []
        for segment_start in range(
                0,
                len(full_data) - self.config.segment_size + 1,
                self.config.segment_stride):
            segment_end = segment_start + self.config.segment_size
            windows = []
            # if the data_index has more than one entry, interleave the results.
            # e.g. if data_index is [1,2] it's [(x_0, label), (y_0, label), (x_1, label), (y_1, label)...]
            for window_start in range(
                    segment_start, segment_end - self.config.window_size + 1,
                    self.config.window_stride):
                window_end = window_start + self.config.window_size
                windows.append(list(itertools.chain(*itertools.izip(*[[float(d[i]) for d in full_data[window_start:window_end]] \
                                                                      for i in self.config.data_index]))))

            labels = [
                d[self.config.label_index]
                for d in full_data[segment_start:segment_end]
            ]
            label_dict = dict([(str(l), len([d for d in labels if d == l]))
                               for l in list(set(labels))])
            segment = Segment(windows=windows,
                              segment_start=segment_start,
                              segment_size=self.config.segment_size,
                              window_stride=self.config.window_stride,
                              window_size=self.config.window_size,
                              labels=label_dict,
                              filename=self.config.data_file,
                              data_index=self.config.data_index,
                              label_index=self.config.label_index)
            self.segments.append(segment)
Exemple #3
0
    def __init__(self, config):
        super(self.__class__, self).__init__(config)
        self.config.label_index = None
        self.config.data_index = [int(i) for i in self.config.data_index]
        prefix = os.path.commonprefix(
            [os.path.abspath(f) for f in self.config.data_file])
        self.segments = []
        for f in self.config.data_file:
            p = os.path.abspath(f)[len(prefix):]
            label = p.split('/')[1]

            with open(f, 'r') as data_file:
                # skip the first two lines
                data = []
                for line in itertools.islice(data_file, 2, None):
                    line_data = []
                    for i in line.split()[0:-1]:
                        try:
                            line_data.append(float(i))
                        except ValueError as e:
                            line_data.append(i)
                    data.append(line_data)
                for segment_start in range(
                        0,
                        len(data) - self.config.segment_size + 1,
                        self.config.segment_stride):
                    segment_end = segment_start + self.config.segment_size

                    windows = [
                        list(
                            itertools.chain.from_iterable([[
                                d[i] for i in self.config.data_index
                            ] for d in data[window_start:(
                                window_start + self.config.window_size)]]))
                        for window_start in range(
                            segment_start, segment_end -
                            self.config.window_size +
                            1, self.config.window_stride)
                    ]
                    self.segments.append(
                        Segment(windows=windows,
                                segment_start=segment_start,
                                segment_size=self.config.segment_size,
                                window_stride=self.config.window_stride,
                                window_size=self.config.window_size,
                                labels=dict([(label, self.config.segment_size)
                                             ]),
                                filename=p,
                                data_index=self.config.data_index,
                                label_index=self.config.label_index))
                print "File %s segments %s" % (p, len(self.segments))
Exemple #4
0
    def __init__(self, config):
        super(self.__class__, self).__init__(config)
        self.config.label_index = 14
        if isinstance(self.config.data_file, list):
            self.config.data_file = self.config.data_file[0]
        with open(self.config.data_file, 'r') as data_file:
            for line in data_file:
                if line.startswith('@DATA'):
                    break
            data_reader = csv.reader(data_file, delimiter=',')
            full_data = [line for line in data_reader]
        if not isinstance(self.config.data_index, list):
            self.config.data_index = [self.config.data_index]
        label_set = set([d[self.config.label_index] for d in full_data])
        if self.config.window_size == -1:
            self.config.window_size = self.config.segment_size
        self.segments = []
        for segment_start in range(
                0,
                len(full_data) - self.config.segment_size + 1,
                self.config.segment_stride):
            segment_end = segment_start + self.config.segment_size
            windows = []
            # if the data_index has more than one entry, interleave the results.
            # e.g. if data_index is [1,2] it's [(x_0, label), (y_0, label), (x_1, label), (y_1, label)...]
            for window_start in range(
                    segment_start, segment_end - self.config.window_size + 1,
                    self.config.window_stride):
                window_end = window_start + self.config.window_size
                windows.append(list(itertools.chain(*itertools.izip(*[[float(d[i]) for d in full_data[window_start:window_end]] \
                                                                      for i in self.config.data_index]))))

            labels = [
                d[self.config.label_index]
                for d in full_data[segment_start:segment_end]
            ]
            label_dict = dict([(str(l), len([d for d in labels if d == l]))
                               for l in list(set(labels))])
            segment = Segment(windows=windows,
                              segment_start=segment_start,
                              segment_size=self.config.segment_size,
                              window_stride=self.config.window_stride,
                              window_size=self.config.window_size,
                              labels=label_dict,
                              filename=self.config.data_file,
                              data_index=self.config.data_index,
                              label_index=self.config.label_index)
            self.segments.append(segment)
Exemple #5
0
 def __init__(self, config, segments):
     super(self.__class__, self).__init__(config)
     transformed = []
     for segment in segments:
         transform = [normalize(w) for w in segment.windows]
         transformed.append(
             Segment(windows=transform,
                     segment_start=segment.segment_start,
                     segment_size=segment.segment_size,
                     window_stride=segment.window_stride,
                     window_size=len(transform),
                     labels=segment.labels,
                     filename=segment.filename,
                     data_index=segment.data_index,
                     label_index=segment.label_index,
                     learning=segment.learning))
     self.segments = transformed
 def __init__(self, config):
     super(self.__class__, self).__init__(config)
     self.config.label_index = None
     self.config.data_index = 0
     prefix = os.path.commonprefix(
         [os.path.abspath(f) for f in self.config.data_file])
     self.segments = []
     for f in self.config.data_file:
         p = os.path.abspath(f)[len(prefix):]
         (label, filename) = p.split('/')
         with open(f, 'r') as data_file:
             data = [float(line.strip()) for line in data_file]
         for segment_start in range(
                 0,
                 len(data) - self.config.segment_size + 1,
                 self.config.segment_stride):
             segment_end = segment_start + self.config.segment_size
             windows = []
             for window_start in range(
                     segment_start,
                     segment_end - self.config.window_size + 1,
                     self.config.window_stride):
                 window_end = window_start + self.config.window_size
                 windows.append(data[window_start:window_end])
             labels = dict([(label, self.config.segment_size)])
             self.segments.append(
                 Segment(windows=windows,
                         segment_start=segment_start,
                         segment_size=self.config.segment_size,
                         window_stride=self.config.window_stride,
                         window_size=self.config.window_size,
                         labels=labels,
                         filename=p,
                         data_index=self.config.data_index,
                         label_index=self.config.label_index))
     # Strip out trailing '/'
     self.config.data_file = prefix[0:-1]
Exemple #7
0
 def __init__(self, config):
     super(self.__class__, self).__init__(config)
     self.config.label_index = None
     prefix = os.path.commonprefix(
         list(
             itertools.chain([[os.path.abspath(f) for f in files]
                              for (key,
                                   files) in self.config.data_file.items()
                              ])))
     self.segments = []
     for (key, files) in self.config.data_file.items():
         for f in files:
             p = os.path.abspath(f)[len(prefix):]
             label = key
             with open(f, 'r') as data_file:
                 data = []
                 data_element = None
                 for line in data_file:
                     try:
                         index = int(line.strip())
                         if data_element != None:
                             data.append(data_element)
                         data_element = dict([("time", index)])
                     except ValueError as e:
                         if data_element == None:
                             continue
                         else:
                             key_vals = line.strip().split(' ')
                             vals = [float(v) for v in key_vals[1:]]
                             data_element[key_vals[0]] = vals
                 if data_element != None:
                     data.append(data_element)
             data_index = []
             for i in self.config.data_index:
                 if len(data[0][i]) > 1:
                     for j in range(len(data[0][i])):
                         data_index.append("%s_%d" % (i, j))
                 else:
                     data_index.append(i)
             for segment_start in range(
                     0,
                     len(data) - self.config.segment_size + 1,
                     self.config.segment_stride):
                 segment_end = segment_start + self.config.segment_size
                 windows = [
                     list(
                         itertools.chain.from_iterable([
                             itertools.chain.from_iterable(
                                 [d[i] for i in self.config.data_index])
                             for d in data[window_start:(
                                 window_start + self.config.window_size)]
                         ])) for window_start in range(
                             segment_start, segment_end -
                             self.config.window_size +
                             1, self.config.window_stride)
                 ]
                 self.segments.append(
                     Segment(windows=windows,
                             segment_start=segment_start,
                             segment_size=self.config.segment_size,
                             window_stride=self.config.window_stride,
                             window_size=self.config.window_size,
                             labels=dict([(label, self.config.segment_size)
                                          ]),
                             filename=p,
                             data_index=data_index,
                             label_index=self.config.label_index))
             print "File %s segments %s" % (p, len(self.segments))