def parse_mediafragment(target): if isinstance(target, list): # List of multiple targets. Let's consider only the first one for now. target = target[0] try: selector = target['selector'] # First try source, then id, then fail. media = target.get('source', target.get('id', '')) except KeyError: logger.debug("Invalid target") return None if selector.get('@type', selector.get('type')) != "FragmentSelector": logger.debug("No mediafragment selector") return None # If there are advene:begin/advene:end properties, use # them. if selector.get('advene:begin') is not None and selector.get('advene:end') is not None: begin = selector.get('advene:begin') end = selector.get('advene:end') else: # Else parse the MediaFragment syntax val = selector.get('value', "") if not val.startswith('t='): logger.debug("Invalid mediafragment value %s", val) return None begin, end = val[2:].split(',') begin = helper.parse_time(begin) end = helper.parse_time(end) return media, begin, end
def convert_entered_value(self, *p): t = self.entry.get_text() v = helper.parse_time(t) if v is not None and v != self.value: if not self.set_value(v): return False return False
def convert_entered_value(self, *p): t=unicode(self.entry.get_text()) v=helper.parse_time(t) if v is not None and v != self.value: if not self.set_value(v): return False return False
def load_transcription(self, filename=None, buffer=None): if buffer is None: try: if re.match('[a-zA-Z]:', filename): # Windows drive: notation. Convert it to # a more URI-compatible syntax fname=urllib.request.pathname2url(filename) else: fname=filename f=urllib.request.urlopen(fname) except IOError as e: self.message(_("Cannot open %(filename)s: %(error)s") % {'filename': filename, 'error': str(e) }) return data="".join(f.readlines()) f.close() else: data=buffer if isinstance(data, bytes): data = data.decode('utf-8') b=self.textview.get_buffer() begin,end=b.get_bounds() b.delete(begin, end) mark_re=re.compile('\[(I?)(\d+:\d+:\d+.?\d*)\]([^\[]*)') # 0-mark at the beginning self.create_timestamp_mark(0, begin) last_time=0 m=mark_re.search(data) if m: # Handle the start case: there may be some text before the # first mark b.insert_at_cursor(data[:m.start()]) for m in mark_re.finditer(data): # We set the sourcefile if it was already a timestamped # transcription: we do not want to overwrite a plain # transcription by mistake self.sourcefile=filename ignore, timestamp, text = m.group(1, 2, 3) t=helper.parse_time(timestamp) if last_time != t or ignore: it=b.get_iter_at_mark(b.get_insert()) mark=self.create_timestamp_mark(t, it) if ignore: mark.ignore=True self.update_mark(mark) last_time = t b.insert_at_cursor(text) else: b.insert_at_cursor(data) return
def npt2time(self, npt): """Convert a NPT timespec into a milliseconds time. Cf http://www.annodex.net/TR/draft-pfeiffer-temporal-fragments-03.html#anchor5 """ if isinstance(npt, (int, float)): return npt if npt.startswith('npt:'): npt=npt[4:] try: msec=helper.parse_time(npt) except Exception as e: self.log("Unhandled NPT format: " + npt) self.log(str(e)) msec=0 return msec
def iterator(self, f): reg=re.compile(self.regexp) begin=1 end=1 chapter=None for l in f: l=l.rstrip() l=str(l, self.encoding).encode('utf-8') m=reg.search(l) if m is not None: d=m.groupdict() end=helper.parse_time(d['begin']) if chapter is not None: res={ 'content': "Chapter %s" % chapter, 'begin': begin, 'end': end } yield res chapter=d['chapter'] begin=end
def iterator(self, f): reg=re.compile(self.regexp) begin=1 incr=0.02 progress=0.1 for l in f: progress += incr if not self.progress(progress, _("Processing data")): break l=l.rstrip() l=str(l, self.encoding).encode('utf-8') m=reg.search(l) if m is not None: d=m.groupdict() duration=helper.parse_time(d['duration']) res={'content': "Chapter %s" % d['chapter'], 'begin': begin, 'duration': duration} begin += duration + 10 yield res
def convert(self, source): """Converts the source elements to annotations. Source is an iterator or a list returning dictionaries. The following keys MUST be defined: - begin (in ms) - end or duration (in ms) - content The following keys are optional: - id - type (can be an annotation-type instance or a type-id) - mimetype (used when specifying a type-id) - notify: if True, then each annotation creation will generate a AnnotationCreate signal - complete: boolean. Used to mark the completeness of the annotation. - send: yield should return the created annotation """ if self.defaulttype is None: self.package, self.defaulttype = self.init_package(annotationtypeid='imported', schemaid='imported-schema') if not hasattr(source, '__next__'): # It is not an iterator, so it may be another iterable # (most probably a list). Replace it by an iterator to # access its contents. source = iter(source) try: if hasattr(source, 'send'): d = source.send(None) else: d = next(source) except StopIteration: return while True: try: begin=helper.parse_time(d['begin']) except KeyError: raise Exception("Begin is mandatory") if 'end' in d: end=helper.parse_time(d['end']) elif 'duration' in d: end=begin + helper.parse_time(d['duration']) else: raise Exception("end or duration is missing") content = d.get('content', "Default content") if not isinstance(content, str): content = json.dumps(content) ident = d.get('id', None) # Support both author and creator keys author = d.get('author', d.get('creator', self.author)) title = d.get('title', content[:20]) timestamp = d.get('timestamp', self.timestamp) type_ = d.get('type') if not type_: # Either None or an empty string. Set to defaulttype anyway. type_ = self.defaulttype elif isinstance(type_, str): # A type id was specified. Dereference it, and # create it if necessary. type_id = type_ type_ = self.package.get_element_by_id(type_id) # mimetype was the key in initial versions of the # import API. But I used content_type in FlatJSON # export. Let's support both. mimetype = d.get('mimetype', d.get('content_type', None)) if type_ is None: # Not existing, create it. type_ = self.ensure_new_type(prefix=type_id, title=d.get('type_title', type_id), mimetype=mimetype, color=d.get('type_color', None), ) if not isinstance(type_, AnnotationType): raise Exception("Error during import: the specified type id %s is not an annotation type" % type_) a = self.create_annotation(type_=type_, begin=begin, end=end, data=content, ident=ident, author=author, title=title, timestamp=timestamp) self.package._modified = True if 'complete' in d: a.complete=d['complete'] if 'notify' in d and d['notify'] and self.controller is not None: logger.debug("Notifying %s", a) self.controller.notify('AnnotationCreate', annotation=a) try: if hasattr(source, 'send'): d = source.send(None) else: d = next(source) except StopIteration: break
def iterator(self, f): filesize = float(os.path.getsize(f.name)) # We cannot simply use string.split() since we want to be able # to specify the number of splits() while keeping the # flexibility of having any blank char as separator whitespace_re = re.compile(r'\s+') stored_begin = 0 stored_data = None index = 1 while True: l = f.readline() if not l or not self.progress(f.tell() / filesize): break l = l.strip() data = whitespace_re.split(l, 2) if not data: # Error, cannot do anything with it. self.log("invalid data: ", l) continue try: begin = helper.parse_time(data[0]) except helper.InvalidTimestamp: self.log("cannot parse " + data[0] + " as a timestamp.") continue if self.first_timestamp is None: self.first_timestamp = begin if not self.relative: stored_begin = begin if self.relative: begin = begin - self.first_timestamp if self.unit == "s": begin = begin * 1000 # We have only a begin time. if len(data) == 2: if self.timestampmode == 'both': self.log("Cannot find end timestamp: ", l) continue if stored_data is None: # First line. Just buffer timestamp stored_data = str(index) stored_begin = begin else: # Only 1 time. yield { 'begin': stored_begin, 'end': max(begin - 1, 0), 'content': stored_data, } stored_begin = begin index += 1 continue else: try: end = helper.parse_time(data[1]) except helper.InvalidTimestamp: end = None if self.timestampmode == 'begin' or (self.timestampmode == 'auto' and end is None): # Invalid timestamp or 'begin' mode - consider # that we have only a begin time, followed by # data. data = whitespace_re.split(l, 1) if stored_data is None: # First line. Just buffer timestamp and data stored_data = data[1] stored_begin = begin else: if self.first_word_is_type: if ' ' in stored_data: type_, content = stored_data.split(" ", 1) else: type_, content = stored_data, "" else: type_, content = "text_import", stored_data yield { 'begin': stored_begin, 'end': max(begin - 1, 0), 'content': content, 'type': type_.strip(), } stored_begin = begin stored_data = data[1] index += 1 continue elif end is None and self.timestampmode == 'both': self.log("Cannot find end timestamp: ", l) continue else: # We have valid begin and end times. if self.relative: end = end - self.first_timestamp if self.unit == "s": end = end * 1000 if len(data) == 3: content = data[2] else: content = "" if self.first_word_is_type: if ' ' in content: type_, content = content.split(" ", 1) else: type_, content = content, "" else: type_ = "text_import" yield { 'begin': begin, 'end': end, 'content': content, 'type': type_.strip(), } stored_begin = begin index += 1 # End of file. If we are in begin/auto mode, the last line # contains a timecode that we should import. if self.timestampmode != 'both' and stored_begin is not None: if self.first_word_is_type: if ' ' in stored_data: type_, content = stored_data.split(" ", 1) else: type_, content = stored_data, "" else: type_, content = "text_import", stored_data # end is either the media duration (if we have it), or we # add an arbitrary duration end = self.controller.cached_duration or (stored_begin + 2000) yield { 'begin': stored_begin, 'end': end, 'content': content, 'type': type_.strip(), }