コード例 #1
0
 def parse_mediafragment(target):
     if isinstance(target, list):
         # List of multiple targets. Let's consider only the first one for now.
         target = target[0]
     try:
         selector = target['selector']
         # First try source, then id, then fail.
         media = target.get('source', target.get('id', ''))
     except KeyError:
         logger.debug("Invalid target")
         return None
     if selector.get('@type', selector.get('type')) != "FragmentSelector":
         logger.debug("No mediafragment selector")
         return None
     # If there are advene:begin/advene:end properties, use
     # them.
     if selector.get('advene:begin') is not None and selector.get('advene:end') is not None:
         begin = selector.get('advene:begin')
         end = selector.get('advene:end')
     else:
         # Else parse the MediaFragment syntax
         val = selector.get('value', "")
         if not val.startswith('t='):
             logger.debug("Invalid mediafragment value %s", val)
             return None
         begin, end = val[2:].split(',')
         begin = helper.parse_time(begin)
         end = helper.parse_time(end)
     return media, begin, end
コード例 #2
0
 def convert_entered_value(self, *p):
     t = self.entry.get_text()
     v = helper.parse_time(t)
     if v is not None and v != self.value:
         if not self.set_value(v):
             return False
     return False
コード例 #3
0
ファイル: timeadjustment.py プロジェクト: eamexicano/advene
 def convert_entered_value(self, *p):
     t=unicode(self.entry.get_text())
     v=helper.parse_time(t)
     if v is not None and v != self.value:
         if not self.set_value(v):
             return False
     return False
コード例 #4
0
    def load_transcription(self, filename=None, buffer=None):
        if buffer is None:
            try:
                if re.match('[a-zA-Z]:', filename):
                    # Windows drive: notation. Convert it to
                    # a more URI-compatible syntax
                    fname=urllib.request.pathname2url(filename)
                else:
                    fname=filename
                f=urllib.request.urlopen(fname)
            except IOError as e:
                self.message(_("Cannot open %(filename)s: %(error)s") % {'filename': filename,
                                                                         'error': str(e) })
                return
            data="".join(f.readlines())
            f.close()
        else:
            data=buffer

        if isinstance(data, bytes):
            data = data.decode('utf-8')

        b=self.textview.get_buffer()
        begin,end=b.get_bounds()
        b.delete(begin, end)

        mark_re=re.compile('\[(I?)(\d+:\d+:\d+.?\d*)\]([^\[]*)')

        # 0-mark at the beginning
        self.create_timestamp_mark(0, begin)
        last_time=0

        m=mark_re.search(data)
        if m:
            # Handle the start case: there may be some text before the
            # first mark
            b.insert_at_cursor(data[:m.start()])
            for m in mark_re.finditer(data):
                # We set the sourcefile if it was already a timestamped
                # transcription: we do not want to overwrite a plain
                # transcription by mistake
                self.sourcefile=filename
                ignore, timestamp, text = m.group(1, 2, 3)
                t=helper.parse_time(timestamp)
                if last_time != t or ignore:
                    it=b.get_iter_at_mark(b.get_insert())
                    mark=self.create_timestamp_mark(t, it)
                    if ignore:
                        mark.ignore=True
                        self.update_mark(mark)
                    last_time = t
                b.insert_at_cursor(text)
        else:
            b.insert_at_cursor(data)
        return
コード例 #5
0
ファイル: transcribe.py プロジェクト: oaubert/advene
    def load_transcription(self, filename=None, buffer=None):
        if buffer is None:
            try:
                if re.match('[a-zA-Z]:', filename):
                    # Windows drive: notation. Convert it to
                    # a more URI-compatible syntax
                    fname=urllib.request.pathname2url(filename)
                else:
                    fname=filename
                f=urllib.request.urlopen(fname)
            except IOError as e:
                self.message(_("Cannot open %(filename)s: %(error)s") % {'filename': filename,
                                                                         'error': str(e) })
                return
            data="".join(f.readlines())
            f.close()
        else:
            data=buffer

        if isinstance(data, bytes):
            data = data.decode('utf-8')

        b=self.textview.get_buffer()
        begin,end=b.get_bounds()
        b.delete(begin, end)

        mark_re=re.compile('\[(I?)(\d+:\d+:\d+.?\d*)\]([^\[]*)')

        # 0-mark at the beginning
        self.create_timestamp_mark(0, begin)
        last_time=0

        m=mark_re.search(data)
        if m:
            # Handle the start case: there may be some text before the
            # first mark
            b.insert_at_cursor(data[:m.start()])
            for m in mark_re.finditer(data):
                # We set the sourcefile if it was already a timestamped
                # transcription: we do not want to overwrite a plain
                # transcription by mistake
                self.sourcefile=filename
                ignore, timestamp, text = m.group(1, 2, 3)
                t=helper.parse_time(timestamp)
                if last_time != t or ignore:
                    it=b.get_iter_at_mark(b.get_insert())
                    mark=self.create_timestamp_mark(t, it)
                    if ignore:
                        mark.ignore=True
                        self.update_mark(mark)
                    last_time = t
                b.insert_at_cursor(text)
        else:
            b.insert_at_cursor(data)
        return
コード例 #6
0
    def npt2time(self, npt):
        """Convert a NPT timespec into a milliseconds time.

        Cf http://www.annodex.net/TR/draft-pfeiffer-temporal-fragments-03.html#anchor5
        """
        if isinstance(npt, (int, float)):
            return npt

        if npt.startswith('npt:'):
            npt=npt[4:]

        try:
            msec=helper.parse_time(npt)
        except Exception as e:
            self.log("Unhandled NPT format: " + npt)
            self.log(str(e))
            msec=0

        return msec
コード例 #7
0
 def iterator(self, f):
     reg=re.compile(self.regexp)
     begin=1
     end=1
     chapter=None
     for l in f:
         l=l.rstrip()
         l=str(l, self.encoding).encode('utf-8')
         m=reg.search(l)
         if m is not None:
             d=m.groupdict()
             end=helper.parse_time(d['begin'])
             if chapter is not None:
                 res={ 'content': "Chapter %s" % chapter,
                       'begin': begin,
                       'end': end }
                 yield res
             chapter=d['chapter']
             begin=end
コード例 #8
0
 def iterator(self, f):
     reg=re.compile(self.regexp)
     begin=1
     incr=0.02
     progress=0.1
     for l in f:
         progress += incr
         if not self.progress(progress, _("Processing data")):
             break
         l=l.rstrip()
         l=str(l, self.encoding).encode('utf-8')
         m=reg.search(l)
         if m is not None:
             d=m.groupdict()
             duration=helper.parse_time(d['duration'])
             res={'content': "Chapter %s" % d['chapter'],
                  'begin': begin,
                  'duration': duration}
             begin += duration + 10
             yield res
コード例 #9
0
    def convert(self, source):
        """Converts the source elements to annotations.

        Source is an iterator or a list returning dictionaries.
        The following keys MUST be defined:
          - begin (in ms)
          - end or duration (in ms)
          - content

        The following keys are optional:
          - id
          - type (can be an annotation-type instance or a type-id)
          - mimetype (used when specifying a type-id)
          - notify: if True, then each annotation creation will generate a AnnotationCreate signal
          - complete: boolean. Used to mark the completeness of the annotation.
          - send: yield should return the created annotation
        """
        if self.defaulttype is None:
            self.package, self.defaulttype = self.init_package(annotationtypeid='imported', schemaid='imported-schema')
        if not hasattr(source, '__next__'):
            # It is not an iterator, so it may be another iterable
            # (most probably a list). Replace it by an iterator to
            # access its contents.
            source = iter(source)

        try:
            if hasattr(source, 'send'):
                d = source.send(None)
            else:
                d = next(source)
        except StopIteration:
            return
        while True:
            try:
                begin=helper.parse_time(d['begin'])
            except KeyError:
                raise Exception("Begin is mandatory")
            if 'end' in d:
                end=helper.parse_time(d['end'])
            elif 'duration' in d:
                end=begin + helper.parse_time(d['duration'])
            else:
                raise Exception("end or duration is missing")
            content = d.get('content', "Default content")
            if not isinstance(content, str):
                content = json.dumps(content)
            ident = d.get('id', None)
            # Support both author and creator keys
            author = d.get('author', d.get('creator', self.author))
            title = d.get('title', content[:20])
            timestamp = d.get('timestamp', self.timestamp)

            type_ = d.get('type')
            if not type_:
                # Either None or an empty string. Set to defaulttype anyway.
                type_ = self.defaulttype
            elif isinstance(type_, str):
                # A type id was specified. Dereference it, and
                # create it if necessary.
                type_id = type_
                type_ = self.package.get_element_by_id(type_id)

                # mimetype was the key in initial versions of the
                # import API. But I used content_type in FlatJSON
                # export. Let's support both.
                mimetype = d.get('mimetype', d.get('content_type', None))
                if type_ is None:
                    # Not existing, create it.
                    type_ = self.ensure_new_type(prefix=type_id,
                                                 title=d.get('type_title', type_id),
                                                 mimetype=mimetype,
                                                 color=d.get('type_color', None),
                                                 )
            if not isinstance(type_, AnnotationType):
                raise Exception("Error during import: the specified type id %s is not an annotation type" % type_)

            a = self.create_annotation(type_=type_,
                                       begin=begin,
                                       end=end,
                                       data=content,
                                       ident=ident,
                                       author=author,
                                       title=title,
                                       timestamp=timestamp)
            self.package._modified = True
            if 'complete' in d:
                a.complete=d['complete']
            if 'notify' in d and d['notify'] and self.controller is not None:
                logger.debug("Notifying %s", a)
                self.controller.notify('AnnotationCreate', annotation=a)
            try:
                if hasattr(source, 'send'):
                    d = source.send(None)
                else:
                    d = next(source)
            except StopIteration:
                break
コード例 #10
0
    def iterator(self, f):
        filesize = float(os.path.getsize(f.name))
        # We cannot simply use string.split() since we want to be able
        # to specify the number of splits() while keeping the
        # flexibility of having any blank char as separator
        whitespace_re = re.compile(r'\s+')
        stored_begin = 0
        stored_data = None
        index = 1
        while True:
            l = f.readline()
            if not l or not self.progress(f.tell() / filesize):
                break
            l = l.strip()
            data = whitespace_re.split(l, 2)

            if not data:
                # Error, cannot do anything with it.
                self.log("invalid data: ", l)
                continue

            try:
                begin = helper.parse_time(data[0])
            except helper.InvalidTimestamp:
                self.log("cannot parse " + data[0] + " as a timestamp.")
                continue
            if self.first_timestamp is None:
                self.first_timestamp = begin
                if not self.relative:
                    stored_begin = begin

            if self.relative:
                begin = begin - self.first_timestamp

            if self.unit == "s":
                begin = begin * 1000

            # We have only a begin time.
            if len(data) == 2:
                if self.timestampmode == 'both':
                    self.log("Cannot find end timestamp: ", l)
                    continue
                if stored_data is None:
                    # First line. Just buffer timestamp
                    stored_data = str(index)
                    stored_begin = begin
                else:
                    # Only 1 time.
                    yield {
                        'begin': stored_begin,
                        'end': max(begin - 1, 0),
                        'content': stored_data,
                        }
                    stored_begin = begin
                index += 1
                continue
            else:
                try:
                    end = helper.parse_time(data[1])
                except helper.InvalidTimestamp:
                    end = None

                if self.timestampmode == 'begin' or (self.timestampmode == 'auto' and end is None):
                    # Invalid timestamp or 'begin' mode - consider
                    # that we have only a begin time, followed by
                    # data.
                    data = whitespace_re.split(l, 1)
                    if stored_data is None:
                        # First line. Just buffer timestamp and data
                        stored_data = data[1]
                        stored_begin = begin
                    else:
                        if self.first_word_is_type:
                            if ' ' in stored_data:
                                type_, content = stored_data.split(" ", 1)
                            else:
                                type_, content = stored_data, ""
                        else:
                            type_, content = "text_import", stored_data
                        yield {
                            'begin': stored_begin,
                            'end': max(begin - 1, 0),
                            'content': content,
                            'type': type_.strip(),
                        }
                        stored_begin = begin
                        stored_data = data[1]
                    index += 1
                    continue
                elif end is None and self.timestampmode == 'both':
                    self.log("Cannot find end timestamp: ", l)
                    continue
                else:
                    # We have valid begin and end times.
                    if self.relative:
                        end = end - self.first_timestamp
                    if self.unit == "s":
                        end = end * 1000
                    if len(data) == 3:
                        content = data[2]
                    else:
                        content = ""
                    if self.first_word_is_type:
                        if ' ' in content:
                            type_, content = content.split(" ", 1)
                        else:
                            type_, content = content, ""
                    else:
                        type_ = "text_import"
                    yield {
                        'begin': begin,
                        'end': end,
                        'content': content,
                        'type': type_.strip(),
                    }
                    stored_begin = begin
                    index += 1
        # End of file. If we are in begin/auto mode, the last line
        # contains a timecode that we should import.
        if self.timestampmode != 'both' and stored_begin is not None:
            if self.first_word_is_type:
                if ' ' in stored_data:
                    type_, content = stored_data.split(" ", 1)
                else:
                    type_, content = stored_data, ""
            else:
                type_, content = "text_import", stored_data
            # end is either the media duration (if we have it), or we
            # add an arbitrary duration
            end = self.controller.cached_duration or (stored_begin + 2000)
            yield {
                'begin': stored_begin,
                'end': end,
                'content': content,
                'type': type_.strip(),

            }