def _parse_value(self, data): """ Parse the next metadata value. """ if data[0] == FLV_DATA_TYPE_NUMBER: value = struct.unpack('>d', data[1:9])[0] return 9, value if data[0] == FLV_DATA_TYPE_BOOL: return 2, bool(data[1]) if data[0] == FLV_DATA_TYPE_STRING: length = (data[1] << 8) + data[2] return length + 3, data[3:length + 3] if data[0] == FLV_DATA_TYPE_ECMARRAY: init_length = len(data) num = struct.unpack('>I', data[1:5])[0] data = data[5:] result = {} for _ in range(num): length = (data[0] << 8) + data[1] key = data[2:length + 2] data = data[length + 2:] length, value = self._parse_value(data) if not length: return 0, result result[key] = value data = data[length:] return init_length - len(data), result log.info('unknown code: %x. Stop metadata parser' % data[0]) return 0, None
def stop_watching_dir(self, dir_path): try: self.ignore(twisted.python.filepath.FilePath(dir_path)) except KeyError: log.info("inotify: failed to stop watching directory '%s' " "(not watched?)" % dir_path) else: log.debug("inotify: stopped watching directory '%s'" % dir_path)
def isPES(self, file): log.info('trying mpeg-pes scan') file.seek(0, 0) buffer = file.read(3) # header (also valid for all mpegs) if not buffer == '\x00\x00\x01': return 0 self.sequence_header_offset = 0 buffer += file.read(10000) offset = 0 while offset + 1000 < len(buffer): pos, timestamp = self.ReadPESHeader(offset, buffer[offset:]) if not pos: return 0 if timestamp != None and not hasattr(self, 'start'): self.get_time = self.ReadPTS bpos = buffer[offset + timestamp:offset + timestamp + 5] self.start = self.get_time(bpos) if self.sequence_header_offset and hasattr(self, 'start'): # we have all informations we need break offset += pos if offset + 1000 < len(buffer) and len(buffer) < 1000000 or 1: # looks like a pes, read more buffer += file.read(10000) if not self.video and not self.audio: # no video and no audio? return 0 self.type = 'MPEG-PES' # fill in values for support functions: self.__seek_size__ = 10000000 # 10 MB self.__sample_size__ = 500000 # 500 k scanning self.__search__ = self._find_timer_PES_ self.filename = file.name # get length of the file self.length = self.get_length() return 1
def tags_to_attributes(self, obj, tags): # Convert tags to core attributes. for name, tag in list(tags.items()): if isinstance(tag, dict): # Nested tags dict, recurse. self.tags_to_attributes(obj, tag) continue elif name not in TAGS_MAP: continue attr, filter = TAGS_MAP[name] if attr not in obj._keys and attr not in self._keys: # Tag is not in any core attribute for this object or global, # so skip. continue # Pull value out of Tag object or list of Tag objects. value = [item.value for item in tag] if isinstance(tag, list) else tag.value if filter: try: value = [list(filter(item)) for item in value] if isinstance(value, list) else list(filter(value)) except Exception as e: log.info('Failed to convert tag to core attribute: %r' % e) # Special handling for tv series recordings. The 'title' tag # can be used for both the series and the episode name. The # same is true for trackno which may refer to the season # and the episode number. Therefore, if we find these # attributes already set we try some guessing. if attr == 'trackno' and getattr(self, attr) is not None: # delete trackno and save season and episode self.season = self.trackno self.episode = value self.trackno = None continue if attr == 'title' and getattr(self, attr) is not None: # store current value of title as series and use current # value of title as title self.series = self.title if attr in obj._keys: setattr(obj, attr, value) else: setattr(self, attr, value)
def __init__(self, file): core.AVContainer.__init__(self) self.samplerate = 1 self.file = file # Read enough that we're likely to get the full seekhead (FIXME: kludge) buffer = file.read(2000) if len(buffer) == 0: # Regular File end raise ParseError() # Check the Matroska header header = EbmlEntity(buffer) if header.get_id() != MATROSKA_HEADER_ID: raise ParseError() log.debug('HEADER ID found %08X' % header.get_id()) self.mime = 'video/x-matroska' self.type = 'Matroska' self.has_idx = False self.objects_by_uid = {} # Now get the segment self.segment = segment = EbmlEntity(buffer[header.get_total_len():]) # Record file offset of segment data for seekheads self.segment.offset = header.get_total_len() + segment.get_header_len() if segment.get_id() != MATROSKA_SEGMENT_ID: log.debug('SEGMENT ID not found %08X' % segment.get_id()) return log.debug('SEGMENT ID found %08X' % segment.get_id()) try: for elem in self.process_one_level(segment): if elem.get_id() == MATROSKA_SEEKHEAD_ID: self.process_elem(elem) except ParseError: pass if not self.has_idx: log.info('File has no index') self._set('corrupt', True)
def process_tags(self, tags): # Tags spec: http://www.matroska.org/technical/specs/tagging/index.html # Iterate over Tags children. Tags element children is a # Tag element (whose children are SimpleTags) and a Targets element # whose children specific what objects the tags apply to. for tag_elem in self.process_one_level(tags): # Start a new dict to hold all SimpleTag elements. tags_dict = Tags() # A list of target uids this tags dict applies too. If empty, # tags are global. targets = [] for sub_elem in self.process_one_level(tag_elem): if sub_elem.get_id() == MATROSKA_SIMPLE_TAG_ID: self.process_simple_tag(sub_elem, tags_dict) elif sub_elem.get_id() == MATROSKA_TARGETS_ID: # Targets element: if there is no uid child (track uid, # chapter uid, etc.) then the tags dict applies to the # whole file (top-level Media object). for target_elem in self.process_one_level(sub_elem): target_elem_id = target_elem.get_id() if target_elem_id in (MATRSOKA_TAGS_TRACK_UID_ID, MATRSOKA_TAGS_EDITION_UID_ID, MATRSOKA_TAGS_CHAPTER_UID_ID, MATRSOKA_TAGS_ATTACHMENT_UID_ID): targets.append(target_elem.get_value()) elif target_elem_id == MATROSKA_TARGET_TYPE_VALUE_ID: # Target types not supported for now. (Unclear how this # would fit with kaa.metadata.) pass if targets: # Assign tags to all listed uids for target in targets: try: self.objects_by_uid[target].tags.update(tags_dict) self.tags_to_attributes(self.objects_by_uid[target], tags_dict) except KeyError: log.info('Tags assigned to unknown/unsupported target uid %d' % target) else: self.tags.update(tags_dict) self.tags_to_attributes(self, tags_dict)
def update_file(self, path, filename): file_path = os.path.join(path, filename) try: file_mtime = os.stat(file_path).st_mtime except OSError: log.debug("library: file %s is gone, skipping." % file_path) return folder = self._get_folder(path) if folder is not None: file_obj = self._get_file(folder, filename) if file_obj is None: file_obj = self.OBJECT_CLASS(filename=filename, folder=folder, last_modified=-1) if file_mtime > file_obj.last_modified: file_obj = self._get_file_info(file_obj) if file_obj is None: self.update_extrainfo_file(file_path) return Session.add(file_obj) else: log.info("Inotify: a media has been updated in an unknown folder")
def childDataReceived(self, childFD, data): l = data.decode('utf-8').strip() if l: log.info('ctrl: %s' % l)
def _readatom(self, file): s = file.read(8) if len(s) < 8: return 0 atomsize, atomtype = struct.unpack('>I4s', s) if not atomtype.decode('latin1').isalnum(): # stop at nonsense data return 0 log.debug('%r [%X]' % (atomtype, atomsize)) if atomtype == b'udta': # Userdata (Metadata) pos = 0 tabl = {} i18ntabl = {} atomdata = file.read(atomsize - 8) while pos < atomsize - 12: (datasize, datatype) = struct.unpack('>I4s', atomdata[pos:pos + 8]) if datatype[0] == 169: # i18n Metadata... mypos = 8 + pos while mypos + 4 < datasize + pos: # first 4 Bytes are i18n header (tlen, lang) = struct.unpack('>HH', atomdata[mypos:mypos + 4]) i18ntabl[lang] = i18ntabl.get(lang, {}) l = atomdata[mypos + 4:mypos + tlen + 4] i18ntabl[lang][datatype[1:]] = l mypos += tlen + 4 elif datatype == b'WLOC': # Drop Window Location pass else: if atomdata[pos + 8:pos + datasize][0] > 1: tabl[datatype] = atomdata[pos + 8:pos + datasize] pos += datasize if len(i18ntabl) > 0: for k in i18ntabl: if k in QTLANGUAGES and QTLANGUAGES[k] == 'en': self._appendtable('QTUDTA', i18ntabl[k]) self._appendtable('QTUDTA', tabl) else: log.debug('NO i18') self._appendtable('QTUDTA', tabl) elif atomtype == b'trak': atomdata = file.read(atomsize - 8) pos = 0 trackinfo = {} tracktype = None while pos < atomsize - 8: (datasize, datatype) = struct.unpack('>I4s', atomdata[pos:pos + 8]) if datatype == b'tkhd': tkhd = struct.unpack('>6I8x4H36xII', atomdata[pos + 8:pos + datasize]) trackinfo['width'] = tkhd[10] >> 16 trackinfo['height'] = tkhd[11] >> 16 trackinfo['id'] = tkhd[3] try: # XXX Timestamp of Seconds is since January 1st 1904! # XXX 2082844800 is the difference between Unix and # XXX Apple time. FIXME to work on Apple, too self.timestamp = int(tkhd[1]) - 2082844800 except Exception as e: log.err('There was trouble extracting timestamp') elif datatype == b'mdia': pos += 8 datasize -= 8 log.debug('--> mdia information') while datasize: mdia = struct.unpack('>I4s', atomdata[pos:pos + 8]) if mdia[1] == b'mdhd': # Parse based on version of mdhd header. See # http://wiki.multimedia.cx/index.php?title=QuickTime_container#mdhd ver = atomdata[pos + 8] if ver == 0: mdhd = struct.unpack( '>IIIIIhh', atomdata[pos + 8:pos + 8 + 24]) elif ver == 1: mdhd = struct.unpack( '>IQQIQhh', atomdata[pos + 8:pos + 8 + 36]) else: mdhd = None if mdhd: # duration / time scale trackinfo['length'] = mdhd[4] / mdhd[3] if mdhd[5] in QTLANGUAGES: trackinfo['language'] = QTLANGUAGES[ mdhd[5]] # mdhd[6] == quality self.length = max(self.length, mdhd[4] / mdhd[3]) elif mdia[1] == b'minf': # minf has only atoms inside pos -= (mdia[0] - 8) datasize += (mdia[0] - 8) elif mdia[1] == b'stbl': # stbl has only atoms inside pos -= (mdia[0] - 8) datasize += (mdia[0] - 8) elif mdia[1] == b'hdlr': hdlr = struct.unpack( '>I4s4s', atomdata[pos + 8:pos + 8 + 12]) if hdlr[1] == b'mhlr': if hdlr[2] == b'vide': tracktype = 'video' if hdlr[2] == b'soun': tracktype = 'audio' elif mdia[1] == b'stsd': stsd = struct.unpack('>2I', atomdata[pos + 8:pos + 8 + 8]) if stsd[1] > 0: codec = atomdata[pos + 16:pos + 16 + 8] codec = struct.unpack('>I4s', codec) trackinfo['codec'] = codec[1] if codec[1] == b'jpeg': tracktype = 'image' elif mdia[1] == b'dinf': dref = struct.unpack('>I4s', atomdata[pos + 8:pos + 8 + 8]) log.debug(' --> %r, %r (useless)' % mdia) if dref[1] == b'dref': num = struct.unpack( '>I', atomdata[pos + 20:pos + 20 + 4])[0] rpos = pos + 20 + 4 for ref in range(num): # FIXME: do somthing if this references ref = struct.unpack( '>I3s', atomdata[rpos:rpos + 7]) data = atomdata[rpos + 7:rpos + ref[0]] rpos += ref[0] else: if mdia[1].startswith(b'st'): log.debug(' --> %r, %r (sample)' % mdia) elif mdia[1] == b'vmhd' and not tracktype: # indicates that this track is video tracktype = 'video' elif mdia[1] in [b'vmhd', b'smhd' ] and not tracktype: # indicates that this track is audio tracktype = 'audio' else: log.debug(' --> %r, %r (unknown)' % mdia) pos += mdia[0] datasize -= mdia[0] elif datatype == b'udta': log.debug(str(struct.unpack('>I4s', atomdata[:8]))) else: if datatype == b'edts': log.debug('--> %r [%d] (edit list)' % \ (datatype, datasize)) else: log.debug('--> %r [%d] (unknown)' % \ (datatype, datasize)) pos += datasize info = None if tracktype == 'video': info = core.VideoStream() self.video.append(info) if tracktype == 'audio': info = core.AudioStream() self.audio.append(info) if info: for key, value in list(trackinfo.items()): setattr(info, key, value) elif atomtype == b'mvhd': # movie header mvhd = struct.unpack('>6I2h', file.read(28)) self.length = max(self.length, mvhd[4] / mvhd[3]) self.volume = mvhd[6] file.seek(atomsize - 8 - 28, 1) elif atomtype == b'cmov': # compressed movie datasize, atomtype = struct.unpack('>I4s', file.read(8)) if not atomtype == b'dcom': return atomsize method = struct.unpack('>4s', file.read(datasize - 8))[0] datasize, atomtype = struct.unpack('>I4s', file.read(8)) if not atomtype == b'cmvd': return atomsize if method == b'zlib': data = file.read(datasize - 8) try: decompressed = zlib.decompress(data) except Exception as e: try: decompressed = zlib.decompress(data[4:]) except Exception as e: log.err('There was a proble decompressiong atom') return atomsize decompressedIO = io.StringIO(decompressed) while self._readatom(decompressedIO): pass else: log.info('unknown compression %r' % method) # unknown compression method file.seek(datasize - 8, 1) elif atomtype == b'moov': # decompressed movie info while self._readatom(file): pass elif atomtype == b'mdat': pos = file.tell() + atomsize - 8 # maybe there is data inside the mdat log.info('parsing mdat') while self._readatom(file): pass log.info('end of mdat') file.seek(pos, 0) elif atomtype == b'rmra': # reference list while self._readatom(file): pass elif atomtype == b'rmda': # reference atomdata = file.read(atomsize - 8) pos = 0 url = '' quality = 0 datarate = 0 while pos < atomsize - 8: (datasize, datatype) = struct.unpack('>I4s', atomdata[pos:pos + 8]) if datatype == b'rdrf': rflags, rtype, rlen = struct.unpack( '>I4sI', atomdata[pos + 8:pos + 20]) if rtype == b'url ': url = atomdata[pos + 20:pos + 20 + rlen] if url.find('\0') > 0: url = url[:url.find('\0')] elif datatype == b'rmqu': quality = struct.unpack('>I', atomdata[pos + 8:pos + 12])[0] elif datatype == b'rmdr': datarate = struct.unpack('>I', atomdata[pos + 12:pos + 16])[0] pos += datasize if url: self._references.append((url, quality, datarate)) else: if not atomtype in [b'wide', b'free']: log.info('unhandled base atom %r' % atomtype) # Skip unknown atoms try: file.seek(atomsize - 8, 1) except IOError: return 0 return atomsize
def isTS(self, file): file.seek(0, 0) buffer = file.read(TS_PACKET_LENGTH * 2) c = 0 while c + TS_PACKET_LENGTH < len(buffer): if buffer[c] == buffer[c + TS_PACKET_LENGTH] == TS_SYNC: break c += 1 else: return 0 buffer += file.read(10000) self.type = 'MPEG-TS' while c + TS_PACKET_LENGTH < len(buffer): start = buffer[c + 1] & 0x40 # maybe load more into the buffer if c + 2 * TS_PACKET_LENGTH > len(buffer) and c < 500000: buffer += file.read(10000) # wait until the ts payload contains a payload header if not start: c += TS_PACKET_LENGTH continue tsid = ((buffer[c + 1] & 0x3F) << 8) + buffer[c + 2] adapt = (buffer[c + 3] & 0x30) >> 4 offset = 4 if adapt & 0x02: # meta info present, skip it for now adapt_len = buffer[c + offset] offset += adapt_len + 1 if not buffer[c + 1] & 0x40: # no new pes or psi in stream payload starting pass elif adapt & 0x01: # PES timestamp = self.ReadPESHeader(c + offset, buffer[c + offset:], tsid)[1] if timestamp != None: if not hasattr(self, 'start'): self.get_time = self.ReadPTS timestamp = c + offset + timestamp self.start = self.get_time(buffer[timestamp:timestamp + 5]) elif not hasattr(self, 'audio_ok'): timestamp = c + offset + timestamp start = self.get_time(buffer[timestamp:timestamp + 5]) if start is not None and self.start is not None and \ abs(start - self.start) < 10: # looks ok self.audio_ok = True else: # timestamp broken del self.start log.info('Timestamp error, correcting') if hasattr(self, 'start') and self.start and \ self.sequence_header_offset and self.video and self.audio: break c += TS_PACKET_LENGTH if not self.sequence_header_offset: return 0 # fill in values for support functions: self.__seek_size__ = 10000000 # 10 MB self.__sample_size__ = 100000 # 100 k scanning self.__search__ = self._find_timer_TS_ self.filename = file.name # get length of the file self.length = self.get_length() return 1
def ReadPESHeader(self, offset, buffer, id=0): """ Parse a PES header. Since it starts with 0x00 0x00 0x01 like 'normal' mpegs, this function will return (0, None) when it is no PES header or (packet length, timestamp position (maybe None)) http://dvd.sourceforge.net/dvdinfo/pes-hdr.html """ if not buffer[0:3] == '\x00\x00\x01': return 0, None packet_length = (buffer[4] << 8) + buffer[5] + 6 align = buffer[6] & 4 header_length = buffer[8] # PES ID (starting with 001) if buffer[3] & 0xE0 == 0xC0: id = id or buffer[3] & 0x1F for a in self.audio: if a.id == id: break else: self.audio.append(core.AudioStream()) self.audio[-1]._set('id', id) elif buffer[3] & 0xF0 == 0xE0: id = id or buffer[3] & 0xF for v in self.video: if v.id == id: break else: self.video.append(core.VideoStream()) self.video[-1]._set('id', id) # new mpeg starting if buffer[header_length + 9:header_length + 13] == \ '\x00\x00\x01\xB3' and not self.sequence_header_offset: # yes, remember offset for later use self.sequence_header_offset = offset + header_length + 9 elif buffer[3] == 189 or buffer[3] == 191: # private stream. we don't know, but maybe we can guess later id = id or buffer[3] & 0xF if align and \ buffer[header_length + 9:header_length + 11] == '\x0b\x77': # AC3 stream for a in self.audio: if a.id == id: break else: self.audio.append(core.AudioStream()) self.audio[-1]._set('id', id) self.audio[-1].codec = 0x2000 # AC3 else: # unknown content pass ptsdts = buffer[7] >> 6 if ptsdts and ptsdts == buffer[9] >> 4: if buffer[9] >> 4 != ptsdts: log.info('WARNING: bad PTS/DTS, please contact us') return packet_length, None return packet_length, 9 return packet_length, None
def __init__(self, file): core.AVContainer.__init__(self) self.mime = 'video/flv' self.type = 'Flash Video' data = file.read(13) if len(data) < 13 or struct.unpack('>3sBBII', data)[0] != 'FLV': raise ParseError() for _ in range(10): if self.audio and self.video: break data = file.read(11) if len(data) < 11: break chunk = struct.unpack('>BH4BI', data) size = (chunk[1] << 8) + chunk[2] if chunk[0] == FLV_TAG_TYPE_AUDIO: flags = file.read(1) if not self.audio: a = core.AudioStream() a.channels = (flags & FLV_AUDIO_CHANNEL_MASK) + 1 srate = (flags & FLV_AUDIO_SAMPLERATE_MASK) a.samplerate = (44100 << (srate >> FLV_AUDIO_SAMPLERATE_OFFSET) >> 3) codec = (flags & FLV_AUDIO_CODECID_MASK) >> FLV_AUDIO_CODECID_OFFSET if codec < len(FLV_AUDIO_CODECID): a.codec = FLV_AUDIO_CODECID[codec] self.audio.append(a) file.seek(size - 1, 1) elif chunk[0] == FLV_TAG_TYPE_VIDEO: flags = file.read(1) if not self.video: v = core.VideoStream() codec = (flags & FLV_VIDEO_CODECID_MASK) - 2 if codec < len(FLV_VIDEO_CODECID): v.codec = FLV_VIDEO_CODECID[codec] # width and height are in the meta packet, but I have # no file with such a packet inside. So maybe we have # to decode some parts of the video. self.video.append(v) file.seek(size - 1, 1) elif chunk[0] == FLV_TAG_TYPE_META: log.info('metadata %r' % str(chunk)) metadata = file.read(size) try: while metadata: length, value = self._parse_value(metadata) if isinstance(value, dict): log.info('metadata: %r' % value) if value.get('creator'): self.copyright = value.get('creator') if value.get('width'): self.width = value.get('width') if value.get('height'): self.height = value.get('height') if value.get('duration'): self.length = value.get('duration') self._appendtable('FLVINFO', value) if not length: # parse error break metadata = metadata[length:] except (IndexError, struct.error, TypeError): pass else: log.info('unkown %r' % str(chunk)) file.seek(size, 1) file.seek(4, 1)